Here’s a small agent that collects an email using the tool:
import logging
from livekit.agents.beta.workflows import GetEmailTask
from livekit.agents import function_tool, RunContext
from dotenv import load_dotenv
from livekit.agents import (
Agent,
AgentServer,
AgentSession,
JobContext,
JobProcess,
cli,
inference,
room_io,
)
from livekit.plugins import ai_coustics, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel
logger = logging.getLogger("agent")
load_dotenv(".env.local")
AGENT_MODEL = "openai/gpt-5.3-chat-latest"
class Assistant(Agent):
def __init__(self) -> None:
super().__init__(
instructions="""You are a helpful voice AI assistant.
The user is on a voice call. Keep responses short, natural, and without markdown or symbols.
When you need the user's email, always call collect_email so the guided email workflow can capture it.
After collect_email returns, confirm the address with the user in plain language.""",
)
async def on_enter(self) -> None:
await self.session.generate_reply(
instructions=(
"Greet the user briefly, say you need their email address, and call the "
"collect_email tool so the guided capture flow can run."
),
tools=["collect_email"],
)
@function_tool
async def collect_email(self, context: RunContext) -> str:
"""Collect the user's email using the built-in email capture workflow.
Use this whenever you need a validated email address from the user.
"""
email_result = await GetEmailTask(
chat_ctx=self.chat_ctx,
extra_instructions="",
)
email = email_result.email_address
logger.info("****************************************************************")
logger.info("*** EMAIL COLLECTED: %s ***", email)
logger.info("****************************************************************")
return f"Collected email: {email}"
server = AgentServer()
def prewarm(proc: JobProcess):
proc.userdata["vad"] = silero.VAD.load()
server.setup_fnc = prewarm
@server.rtc_session(agent_name="my-agent")
async def my_agent(ctx: JobContext):
# Logging setup
# Add any other context you want in all log entries here
ctx.log_context_fields = {
"room": ctx.room.name,
}
# Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector
session = AgentSession(
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
# See all available models at https://docs.livekit.io/agents/models/stt/
stt=inference.STT(model="deepgram/nova-3", language="multi"),
# A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
# See all available models at https://docs.livekit.io/agents/models/llm/
llm=inference.LLM(model=AGENT_MODEL),
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
tts=inference.TTS(
model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
),
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
# See more at https://docs.livekit.io/agents/build/turns
turn_detection=MultilingualModel(),
vad=ctx.proc.userdata["vad"],
# allow the LLM to generate a response while waiting for the end of turn
# See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
preemptive_generation=True,
)
# Start the session, which initializes the voice pipeline and warms up the models
await session.start(
agent=Assistant(),
room=ctx.room,
room_options=room_io.RoomOptions(
audio_input=room_io.AudioInputOptions(
noise_cancellation=ai_coustics.audio_enhancement(
model=ai_coustics.EnhancerModel.QUAIL_VF_L
),
),
),
)
# Join the room and connect to the user
await ctx.connect()
if __name__ == "__main__":
cli.run_app(server)