diff --git a/.env.example b/.env.example index 3a449b2..1bd975b 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,3 @@ LIVEKIT_URL= LIVEKIT_API_KEY= LIVEKIT_API_SECRET= - -OPENAI_API_KEY= -DEEPGRAM_API_KEY= -CARTESIA_API_KEY= diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ec10142..593ae14 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,7 +17,7 @@ jobs: strategy: matrix: - node-version: [18, 20, 22] + node-version: [22] steps: - name: Checkout code @@ -26,13 +26,12 @@ jobs: - name: Install pnpm uses: pnpm/action-setup@v4 with: - version: 9.15.9 + version: 10.15.0 - name: Setup Node.js ${{ matrix.node-version }} uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} - cache: 'pnpm' - name: Install dependencies run: pnpm install @@ -54,13 +53,12 @@ jobs: - name: Install pnpm uses: pnpm/action-setup@v4 with: - version: 9.15.9 + version: 10.15.0 - - name: Setup Node.js ${{ matrix.node-version }} + - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: ${{ matrix.node-version }} - cache: 'pnpm' + node-version: 22 - name: Install dependencies run: pnpm install diff --git a/README.md b/README.md index aa7e8a2..7e1b5bb 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,17 @@ # LiveKit Agents Starter - Node.js -A complete starter project for building voice AI apps with [LiveKit Agents for Node.js](https://github.com/livekit/agents-js). +A complete starter project for building voice AI apps with [LiveKit Agents for Node.js](https://github.com/livekit/agents-js) and [LiveKit Cloud](https://cloud.livekit.io/). The starter project includes: -- A simple voice AI assistant based on the [AI Voice Assistant quickstart](https://docs.livekit.io/agents/start/voice-ai/) -- Voice AI pipeline based on [OpenAI](https://docs.livekit.io/agents/integrations/llm/openai/), [Cartesia](https://docs.livekit.io/agents/integrations/tts/cartesia/), and [Deepgram](https://docs.livekit.io/agents/integrations/stt/deepgram/) - - Easily integrate your preferred [LLM](https://docs.livekit.io/agents/integrations/llm/), [STT](https://docs.livekit.io/agents/integrations/stt/), and [TTS](https://docs.livekit.io/agents/integrations/tts/) instead, or swap to a realtime model like the [OpenAI Realtime API](https://docs.livekit.io/agents/integrations/realtime/openai) +- A simple voice AI assistant, ready for extension and customization +- A voice AI pipeline with [models](https://docs.livekit.io/agents/models) from OpenAI, Cartesia, and AssemblyAI served through LiveKit Cloud + - Easily integrate your preferred [LLM](https://docs.livekit.io/agents/models/llm/), [STT](https://docs.livekit.io/agents/models/stt/), and [TTS](https://docs.livekit.io/agents/models/tts/) instead, or swap to a realtime model like the [OpenAI Realtime API](https://docs.livekit.io/agents/models/realtime/openai) - [LiveKit Turn Detector](https://docs.livekit.io/agents/build/turns/turn-detector/) for contextually-aware speaker detection, with multilingual support -- [LiveKit Cloud enhanced noise cancellation](https://docs.livekit.io/home/cloud/noise-cancellation/) -- Integrated [metrics and logging](https://docs.livekit.io/agents/v0/build/metrics/) +- [Background voice cancellation](https://docs.livekit.io/home/cloud/noise-cancellation/) +- Integrated [metrics and logging](https://docs.livekit.io/agents/build/metrics/) +- A Dockerfile ready for [production deployment](https://docs.livekit.io/agents/ops/deployment/) This starter app is compatible with any [custom web/mobile frontend](https://docs.livekit.io/agents/start/frontend/) or [SIP-based telephony](https://docs.livekit.io/agents/start/telephony/). @@ -24,34 +25,32 @@ This project uses [pnpm](https://pnpm.io/) as the package manager. Clone the repository and install dependencies: ```console -cd agent-starter-nodejs +cd agent-starter-node pnpm install ``` -Set up the environment by copying `.env.example` to `.env.local` and filling in the required values: +Sign up for [LiveKit Cloud](https://cloud.livekit.io/) then set up the environment by copying `.env.example` to `.env.local` and filling in the required keys: -- `LIVEKIT_URL`: Use [LiveKit Cloud](https://cloud.livekit.io/) or [run your own](https://docs.livekit.io/home/self-hosting/) +- `LIVEKIT_URL` - `LIVEKIT_API_KEY` - `LIVEKIT_API_SECRET` -- `OPENAI_API_KEY`: [Get a key](https://platform.openai.com/api-keys) or use your [preferred LLM provider](https://docs.livekit.io/agents/integrations/llm/) -- `DEEPGRAM_API_KEY`: [Get a key](https://console.deepgram.com/) or use your [preferred STT provider](https://docs.livekit.io/agents/integrations/stt/) -- `CARTESIA_API_KEY`: [Get a key](https://play.cartesia.ai/keys) or use your [preferred TTS provider](https://docs.livekit.io/agents/integrations/tts/) You can load the LiveKit environment automatically using the [LiveKit CLI](https://docs.livekit.io/home/cli/cli-setup): ```bash -lk app env -w .env.local +lk cloud auth +lk app env -w -d .env.local ``` ## Run the agent -Before your first run, you must download certain models such as [Silero VAD](https://docs.livekit.io/agents/integrations/vad/) and the [LiveKit turn detector](https://docs.livekit.io/agents/build/turns/turn-detector/): +Before your first run, you must download certain models such as [Silero VAD](https://docs.livekit.io/agents/build/turns/vad/) and the [LiveKit turn detector](https://docs.livekit.io/agents/build/turns/turn-detector/): ```console pnpm run download-files ``` -To run the agent for use with a frontend or telephony, use the `dev` command: +To run the agent during development, use the `dev` command: ```console pnpm run dev @@ -75,7 +74,7 @@ Get started quickly with our pre-built frontend starter apps, or add telephony s | **React Native** | [`livekit-examples/voice-assistant-react-native`](https://github.com/livekit-examples/voice-assistant-react-native) | Native mobile app with React Native & Expo | | **Android** | [`livekit-examples/agent-starter-android`](https://github.com/livekit-examples/agent-starter-android) | Native Android app with Kotlin & Jetpack Compose | | **Web Embed** | [`livekit-examples/agent-starter-embed`](https://github.com/livekit-examples/agent-starter-embed) | Voice AI widget for any website | -| **Telephony** | [📚 Documentation](https://docs.livekit.io/agents/start/telephony/) | Add inbound or outbound calling to your agent | +| **Telephony** | [📚 Documentation](https://docs.livekit.io/agents/start/telephony/) | Add inbound or outbound calling to your agent | For advanced customization, see the [complete frontend guide](https://docs.livekit.io/agents/start/frontend/). @@ -91,6 +90,10 @@ Once you've started your own project based on this repo, you should: This project is production-ready and includes a working `Dockerfile`. To deploy it to LiveKit Cloud or another environment, see the [deploying to production](https://docs.livekit.io/agents/ops/deployment/) guide. +## Self-hosted LiveKit + +You can also self-host LiveKit instead of using LiveKit Cloud. See the [self-hosting](https://docs.livekit.io/home/self-hosting/) guide for more information. If you choose to self-host, you'll need to also use [model plugins](https://docs.livekit.io/agents/models/#plugins) instead of LiveKit Inference and will need to remove the [LiveKit Cloud noise cancellation](https://docs.livekit.io/home/cloud/noise-cancellation/) plugin. + ## License This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. diff --git a/eslint.config.ts b/eslint.config.ts index 2c01edd..31ec5f2 100644 --- a/eslint.config.ts +++ b/eslint.config.ts @@ -1,9 +1,14 @@ -import js from "@eslint/js"; -import globals from "globals"; -import tseslint from "typescript-eslint"; -import { defineConfig } from "eslint/config"; +import js from '@eslint/js'; +import { defineConfig } from 'eslint/config'; +import globals from 'globals'; +import tseslint from 'typescript-eslint'; export default defineConfig([ - { files: ["**/*.{js,mjs,cjs,ts,mts,cts}"], plugins: { js }, extends: ["js/recommended"], languageOptions: { globals: globals.node } }, + { + files: ['**/*.{js,mjs,cjs,ts,mts,cts}'], + plugins: { js }, + extends: ['js/recommended'], + languageOptions: { globals: globals.node }, + }, tseslint.configs.recommended, ]); diff --git a/package.json b/package.json index b5d1c2b..6cfd3af 100644 --- a/package.json +++ b/package.json @@ -33,12 +33,9 @@ "typescript-eslint": "^8.41.0" }, "dependencies": { - "@livekit/agents": "^1.0.0", - "@livekit/agents-plugin-cartesia": "^1.0.0", - "@livekit/agents-plugin-deepgram": "^1.0.0", - "@livekit/agents-plugin-livekit": "^1.0.0", - "@livekit/agents-plugin-openai": "^1.0.0", - "@livekit/agents-plugin-silero": "^1.0.0", + "@livekit/agents": "^1.0.7", + "@livekit/agents-plugin-livekit": "^1.0.7", + "@livekit/agents-plugin-silero": "^1.0.7", "@livekit/noise-cancellation-node": "^0.1.9", "dotenv": "^17.2.1", "zod": "^3.25.76" diff --git a/src/agent.ts b/src/agent.ts index 0b1b0c2..5f01d16 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -4,46 +4,45 @@ import { WorkerOptions, cli, defineAgent, - llm, metrics, voice, } from '@livekit/agents'; -import * as cartesia from '@livekit/agents-plugin-cartesia'; -import * as deepgram from '@livekit/agents-plugin-deepgram'; import * as livekit from '@livekit/agents-plugin-livekit'; -import * as openai from '@livekit/agents-plugin-openai'; import * as silero from '@livekit/agents-plugin-silero'; import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node'; import dotenv from 'dotenv'; import { fileURLToPath } from 'node:url'; -import { z } from 'zod'; dotenv.config({ path: '.env.local' }); class Assistant extends voice.Agent { constructor() { super({ - instructions: `You are a helpful voice AI assistant. + instructions: `You are a helpful voice AI assistant. The user is interacting with you via voice, even if you perceive the conversation as text. You eagerly assist users with their questions by providing information from your extensive knowledge. Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols. You are curious, friendly, and have a sense of humor.`, - tools: { - getWeather: llm.tool({ - description: `Use this tool to look up current weather information in the given location. - If the location is not supported by the weather service, the tool will indicate this. You must tell the user the location's weather is unavailable.`, - parameters: z.object({ - location: z - .string() - .describe('The location to look up weather information for (e.g. city name)'), - }), - execute: async ({ location }) => { - console.log(`Looking up weather for ${location}`); - - return 'sunny with a temperature of 70 degrees.'; - }, - }), - }, + // To add tools, specify `tools` in the constructor. + // Here's an example that adds a simple weather tool. + // You also have to add `import { llm } from '@livekit/agents' and `import { z } from 'zod'` to the top of this file + // tools: { + // getWeather: llm.tool({ + // description: `Use this tool to look up current weather information in the given location. + // + // If the location is not supported by the weather service, the tool will indicate this. You must tell the user the location's weather is unavailable.`, + // parameters: z.object({ + // location: z + // .string() + // .describe('The location to look up weather information for (e.g. city name)'), + // }), + // execute: async ({ location }) => { + // console.log(`Looking up weather for ${location}`); + // + // return 'sunny with a temperature of 70 degrees.'; + // }, + // }), + // }, }); } } @@ -53,28 +52,33 @@ export default defineAgent({ proc.userData.vad = await silero.VAD.load(); }, entry: async (ctx: JobContext) => { - // Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector + // Set up a voice AI pipeline using OpenAI, Cartesia, AssemblyAI, and the LiveKit turn detector const session = new voice.AgentSession({ - // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response - // See all providers at https://docs.livekit.io/agents/integrations/llm/ - llm: new openai.LLM({ model: 'gpt-4o-mini' }), // Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand - // See all providers at https://docs.livekit.io/agents/integrations/stt/ - stt: new deepgram.STT({ model: 'nova-3' }), + // See all available models at https://docs.livekit.io/agents/models/stt/ + stt: 'assemblyai/universal-streaming:en', + + // A Large Language Model (LLM) is your agent's brain, processing user input and generating a response + // See all providers at https://docs.livekit.io/agents/models/llm/ + llm: 'openai/gpt-4.1-mini', + // Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear - // See all providers at https://docs.livekit.io/agents/integrations/tts/ - tts: new cartesia.TTS({ - voice: '6f84f4b8-58a2-430c-8c79-688dad597532', - }), + // See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/ + tts: 'cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc', + // VAD and turn detection are used to determine when the user is speaking and when the agent should respond // See more at https://docs.livekit.io/agents/build/turns turnDetection: new livekit.turnDetector.MultilingualModel(), vad: ctx.proc.userData.vad! as silero.VAD, }); - // To use a realtime model instead of a voice pipeline, use the following session setup instead: + // To use a realtime model instead of a voice pipeline, use the following session setup instead. + // (Note: This is for the OpenAI Realtime API. For other providers, see https://docs.livekit.io/agents/models/realtime/)) + // 1. Install '@livekit/agents-plugin-openai' + // 2. Set OPENAI_API_KEY in .env.local + // 3. Add import `import * as openai from '@livekit/agents-plugin-openai'` to the top of this file + // 4. Use the following session setup instead of the version above // const session = new voice.AgentSession({ - // // See all providers at https://docs.livekit.io/agents/integrations/realtime/ // llm: new openai.realtime.RealtimeModel({ voice: 'marin' }), // });