RPi ChatGPT

USB Mic and Audiojack

Aplay -l to list available audio output devices, take note for card number and device number

Arecord -l to list available input devices and also take these notes

Sudo nano /home/pi/.asoundrc include card number and device number accordingly:

pcm.!default {

type asym

capture.pcm "mic"

playback.pcm "speaker"

}

pcm.mic {

type plug

slave {

pcm "hw:2,0"

rate 48000

}

}

pcm.speaker {

type plug

slave {

pcm "hw:1,0"

}

}


Test by:

Speaker-test -t wav

Arecord –format=S16_LE –duration=5 –rate=16000 –file-type=raw out.raw

Aplay –format=S16_LE –rate=16000 out.raw



Voice Chatbot (ChatGPT)

Sudo apt install espeak

Sudo apt install epseak-ng

Sudo apt install flac

Pip3 install soundfile

pip3 install pyttsx3

pip3 install sounddevice

Pip3 install scipy

Pip3 install openai

P

import time

import os

import openai

import sounddevice as sd

import soundfile as sf

import pyttsx3

import speech_recognition as sr

from scipy.io import wavfile

# Set up OpenAI API credentials

openai.api_key = "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"

# Define function to interact with ChatGPT and return its response

def ask_chatbot(prompt):

# Set up parameters for the API request

model_engine = "text-davinci-002"

prompt = f"{prompt}\nChatbot:"

max_tokens = 1024

temperature = 0.7

# Send the prompt to the API and wait for the response

response = openai.Completion.create(

engine=model_engine,

prompt=prompt,

max_tokens=max_tokens,

temperature=temperature,

)

time.sleep(1) # Wait a bit to avoid rate limiting

return response.choices[0].text.strip()

# Define function to convert text to speech

def speak_response(response):

engine = pyttsx3.init()

engine.setProperty("rate", 150) # You can adjust the speech rate (words pe$

engine.say(response)

engine.runAndWait()

# Define function to record audio from USB microphone

def record_audio(file_name):

sample_rate = 16000

duration = 5 # Set the duration for recording (in seconds)

print("Recording audio...")

audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype=”float32”)

sd.wait() # Wait until recording is finished

print("Finished recording.")

# Save the recorded audio to a file

sf.write(file_name, audio, sample_rate)

# Define function to transcribe audio to text

def transcribe_audio(file_name):

r = sr.Recognizer()

with sr.AudioFile(file_name) as source:

audio_data = r.record(source)

text = r.recognize_google(audio_data)

return text

# Start voice chat with the Chatbot

while True:

record_audio("user_input.wav")

user_input_text = transcribe_audio("user_input.wav")

if user_input_text.lower() in ["exit", "quit", "bye"]:

break

bot_response = ask_chatbot(user_input_text)

print("Chatbot:", bot_response)

speak_response(bot_response)

Voice Chat with hot word and custom duration of question:

import time

import struct

import openai

import sounddevice as sd

import soundfile as sf

import pyaudio

import pyttsx3

import pvporcupine

import speech_recognition as sr

from pydub import AudioSegment

from pydub.silence import split_on_silence

import RPi.GPIO as GPIO

GPIO.setmode(GPIO.BOARD)

GPIO.setwarnings(False)

led = 16

GPIO.setup(led, GPIO.OUT)

GPIO.output(led, GPIO.LOW)

porcupine = None

pa = None

audio_stream = None

# Set up OpenAI API credentials

openai.api_key = "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"

# Define function to interact with ChatGPT and return its response

def ask_chatbot(prompt):

# Set up parameters for the API request

model_engine = "text-davinci-002"

prompt = f"{prompt}\nChatbot:"

max_tokens = 1024

temperature = 0.7

# Send the prompt to the API and wait for the response

response = openai.Completion.create(

engine=model_engine,

prompt=prompt,

max_tokens=max_tokens,

temperature=temperature,

)

time.sleep(1) # Wait a bit to avoid rate limiting

return response.choices[0].text.strip()

# Define function to convert text to speech

def speak_response(response):

engine = pyttsx3.init()

engine.setProperty("rate", 150) # You can adjust the speech rate (words per minute)

engine.say(response)

engine.runAndWait()

# Define function to record audio from USB microphone

def record_audio(file_name):

sample_rate = 16000

duration = 10 # Set the initial duration for recording (in seconds)

min_silence_length = 1000 # Minimum silence length for end-of-sentence detection (in milliseconds)

silence_threshold = -40 # Silence threshold for end-of-sentence detection (in dB)

print("Recording audio...")

audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype="float32")

sd.wait() # Wait until recording is finished

print("Finished recording.")

# Save the recorded audio to a file

sf.write(file_name, audio, sample_rate)

# Load the recorded audio file

audio_segment = AudioSegment.from_file(file_name, format="wav")

# Split the audio based on silence (end-of-sentence detection)

chunks = split_on_silence(

audio_segment,

min_silence_len=min_silence_length,

silence_thresh=silence_threshold

)

# Determine the longest chunk (assuming it contains the main question)

main_chunk = max(chunks, key=len)

# Export the longest chunk as the final audio file

main_chunk.export(file_name, format="wav")

# Define function to transcribe audio to text

def transcribe_audio(file_name):

r = sr.Recognizer()

with sr.AudioFile(file_name) as source:

audio_data = r.record(source)

text = r.recognize_google(audio_data)

return text

# Start voice chat with the Chatbot

try:

porcupine = pvporcupine.create(access_key="e6zujir/A8i0cDt+7q8uUkD3QQORoKM+lqK6v20vYf2lpmjJHn76Ag==", keywords=["picovoice", "blueberry"])

pa = pyaudio.PyAudio()

audio_stream = pa.open(

rate=porcupine.sample_rate,

channels=1,

format=pyaudio.paInt16,

input=True,

frames_per_buffer=porcupine.frame_length)

while True:

pcm = audio_stream.read(porcupine.frame_length)

pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)

keyword_index = porcupine.process(pcm)

if keyword_index >= 0:

GPIO.output(led, GPIO.HIGH)

print("Hotword Detected")

record_audio("user_input.wav")

GPIO.output(led, GPIO.LOW)

user_input_text = transcribe_audio("user_input.wav")

if user_input_text.lower() in ["exit", "quit", "bye"]:

break

bot_response = ask_chatbot(user_input_text)

print("Chatbot:", bot_response)

speak_response(bot_response)

finally:

if porcupine is not None:

porcupine.delete()

if audio_stream is not None:

audio_stream.close()

if pa is not None:

pa.terminate()

https://github.com/Picovoice/Porcupine#python-demos

https://picovoice.ai/docs/quick-start/porcupine-python/