RPi ChatGPT
USB Mic and Audiojack
Aplay -l to list available audio output devices, take note for card number and device number
Arecord -l to list available input devices and also take these notes
Sudo nano /home/pi/.asoundrc include card number and device number accordingly:
pcm.!default {
type asym
capture.pcm "mic"
playback.pcm "speaker"
}
pcm.mic {
type plug
slave {
pcm "hw:2,0"
rate 48000
}
}
pcm.speaker {
type plug
slave {
pcm "hw:1,0"
}
}
Test by:
Speaker-test -t wav
Arecord –format=S16_LE –duration=5 –rate=16000 –file-type=raw out.raw
Aplay –format=S16_LE –rate=16000 out.raw
Voice Chatbot (ChatGPT)
Sudo apt install espeak
Sudo apt install epseak-ng
Sudo apt install flac
Pip3 install soundfile
pip3 install pyttsx3
pip3 install sounddevice
Pip3 install scipy
Pip3 install openai
P
import time
import os
import openai
import sounddevice as sd
import soundfile as sf
import pyttsx3
import speech_recognition as sr
from scipy.io import wavfile
# Set up OpenAI API credentials
openai.api_key = "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"
# Define function to interact with ChatGPT and return its response
def ask_chatbot(prompt):
# Set up parameters for the API request
model_engine = "text-davinci-002"
prompt = f"{prompt}\nChatbot:"
max_tokens = 1024
temperature = 0.7
# Send the prompt to the API and wait for the response
response = openai.Completion.create(
engine=model_engine,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
)
time.sleep(1) # Wait a bit to avoid rate limiting
return response.choices[0].text.strip()
# Define function to convert text to speech
def speak_response(response):
engine = pyttsx3.init()
engine.setProperty("rate", 150) # You can adjust the speech rate (words pe$
engine.say(response)
engine.runAndWait()
# Define function to record audio from USB microphone
def record_audio(file_name):
sample_rate = 16000
duration = 5 # Set the duration for recording (in seconds)
print("Recording audio...")
audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype=”float32”)
sd.wait() # Wait until recording is finished
print("Finished recording.")
# Save the recorded audio to a file
sf.write(file_name, audio, sample_rate)
# Define function to transcribe audio to text
def transcribe_audio(file_name):
r = sr.Recognizer()
with sr.AudioFile(file_name) as source:
audio_data = r.record(source)
text = r.recognize_google(audio_data)
return text
# Start voice chat with the Chatbot
while True:
record_audio("user_input.wav")
user_input_text = transcribe_audio("user_input.wav")
if user_input_text.lower() in ["exit", "quit", "bye"]:
break
bot_response = ask_chatbot(user_input_text)
print("Chatbot:", bot_response)
speak_response(bot_response)
Voice Chat with hot word and custom duration of question:
import time
import struct
import openai
import sounddevice as sd
import soundfile as sf
import pyaudio
import pyttsx3
import pvporcupine
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence
import RPi.GPIO as GPIO
GPIO.setmode(GPIO.BOARD)
GPIO.setwarnings(False)
led = 16
GPIO.setup(led, GPIO.OUT)
GPIO.output(led, GPIO.LOW)
porcupine = None
pa = None
audio_stream = None
# Set up OpenAI API credentials
openai.api_key = "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"
# Define function to interact with ChatGPT and return its response
def ask_chatbot(prompt):
# Set up parameters for the API request
model_engine = "text-davinci-002"
prompt = f"{prompt}\nChatbot:"
max_tokens = 1024
temperature = 0.7
# Send the prompt to the API and wait for the response
response = openai.Completion.create(
engine=model_engine,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
)
time.sleep(1) # Wait a bit to avoid rate limiting
return response.choices[0].text.strip()
# Define function to convert text to speech
def speak_response(response):
engine = pyttsx3.init()
engine.setProperty("rate", 150) # You can adjust the speech rate (words per minute)
engine.say(response)
engine.runAndWait()
# Define function to record audio from USB microphone
def record_audio(file_name):
sample_rate = 16000
duration = 10 # Set the initial duration for recording (in seconds)
min_silence_length = 1000 # Minimum silence length for end-of-sentence detection (in milliseconds)
silence_threshold = -40 # Silence threshold for end-of-sentence detection (in dB)
print("Recording audio...")
audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype="float32")
sd.wait() # Wait until recording is finished
print("Finished recording.")
# Save the recorded audio to a file
sf.write(file_name, audio, sample_rate)
# Load the recorded audio file
audio_segment = AudioSegment.from_file(file_name, format="wav")
# Split the audio based on silence (end-of-sentence detection)
chunks = split_on_silence(
audio_segment,
min_silence_len=min_silence_length,
silence_thresh=silence_threshold
)
# Determine the longest chunk (assuming it contains the main question)
main_chunk = max(chunks, key=len)
# Export the longest chunk as the final audio file
main_chunk.export(file_name, format="wav")
# Define function to transcribe audio to text
def transcribe_audio(file_name):
r = sr.Recognizer()
with sr.AudioFile(file_name) as source:
audio_data = r.record(source)
text = r.recognize_google(audio_data)
return text
# Start voice chat with the Chatbot
try:
porcupine = pvporcupine.create(access_key="e6zujir/A8i0cDt+7q8uUkD3QQORoKM+lqK6v20vYf2lpmjJHn76Ag==", keywords=["picovoice", "blueberry"])
pa = pyaudio.PyAudio()
audio_stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length)
while True:
pcm = audio_stream.read(porcupine.frame_length)
pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
keyword_index = porcupine.process(pcm)
if keyword_index >= 0:
GPIO.output(led, GPIO.HIGH)
print("Hotword Detected")
record_audio("user_input.wav")
GPIO.output(led, GPIO.LOW)
user_input_text = transcribe_audio("user_input.wav")
if user_input_text.lower() in ["exit", "quit", "bye"]:
break
bot_response = ask_chatbot(user_input_text)
print("Chatbot:", bot_response)
speak_response(bot_response)
finally:
if porcupine is not None:
porcupine.delete()
if audio_stream is not None:
audio_stream.close()
if pa is not None:
pa.terminate()
https://github.com/Picovoice/Porcupine#python-demos
https://picovoice.ai/docs/quick-start/porcupine-python/