Hi everyone,
I’m using a custom behaviour in my python server where I handle a text input from my client, then I make a request to another server (of my own) and this response I send it back to the client using AgentSession.say() to generate a video and audio response with LiveAvatar. My problem appear when I implement a way to use the say method with a local mp3 file (I use ffmpeg and pydub AudioSegment to convert from mp3 to PCM and rtc.AudioFrame), if I await by the next way it just never finish the task and block me from start a new task using say method (even when I implemented a AgentSession.interrupt before the say). This is my current implementation:
def mp3_to_audio_frame(path: str)-> tuple[bytes, int, int, int]:
#MP3 to mono, 16-bit, PCM, 48kHz
audio = AudioSegment.from_mp3(path)
audio = audio.set_channels(1)
audio = audio.set_sample_width(2)
audio = audio.set_frame_rate(48000)
raw_data = audio.raw_data
num_channels = 1
samples_per_channel = len(raw_data) // (2 * num_channels)
return raw_data, 48000, 1, samples_per_channel
sync def single_shot_audio(audio_path: str):
loop = asyncio.get_event_loop()
raw, sample_rate, num_channels, samples_per_channel = await loop.run_in_executor(None, mp3_to_audio_frame, audio_path)
try:
yield rtc.AudioFrame(
data=raw,
sample_rate=sample_rate,
num_channels=num_channels,
samples_per_channel=samples_per_channel
)
await asyncio.sleep(1)
except asyncio.CancelledError:
return
async def play_audio_once(session: AgentSession,audio_path):
path = os.path.join(main_audio_path, audio_path)
handle_speech = session.say(audio=single_shot_audio(path), text="¡Hola!, soy Sophia. ¿En qué puedo ayudarte?", allow_interruptions=True)
In RTC SESSION...
#after start the avatar and session
await play_audio_once(session,saludo_path)
Thank for your help in advance, I hope we can get a solution!