diff --git a/.github/workflows/verify-litert-mcp.yml b/.github/workflows/verify-litert-mcp.yml new file mode 100644 index 000000000..c04e39f5c --- /dev/null +++ b/.github/workflows/verify-litert-mcp.yml @@ -0,0 +1,35 @@ +name: Verify LiteRT-LM MCP + +on: + push: + paths: + - 'mcp-servers/litert-mcp/**' + - '.github/workflows/verify-litert-mcp.yml' + pull_request: + paths: + - 'mcp-servers/litert-mcp/**' + - '.github/workflows/verify-litert-mcp.yml' + +jobs: + verify: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Verify Server Startup (Initialize) + run: | + echo '{"jsonrpc": "2.0", "id": 1, "method": "initialize", "params": {}}' | python3 mcp-servers/litert-mcp/server.py + + - name: Verify Tools List + run: | + OUTPUT=$(echo '{"jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": {}}' | python3 mcp-servers/litert-mcp/server.py) + echo "$OUTPUT" + if [[ "$OUTPUT" != *"run_inference"* ]]; then + echo "Error: run_inference tool not found in output" + exit 1 + fi diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..00396f1b4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,23 @@ +# AGENTS.md + +## Mission +To enable autonomous AI agents to efficiently reason, plan, and execute tasks within this repository, specifically focusing on the integration and orchestration of Model Context Protocol (MCP) servers and Video Intelligence pipelines. + +## Scope of "Actionable Room" +Agents are granted permission and encouraged to: +1. **Modify and Expand `mcp-servers/`**: Create new MCP servers, update existing ones, and refactor code to improve modularity and performance. +2. **Improve Automation**: Create and edit GitHub Actions workflows (`.github/workflows/`) to add robust testing and verification for new features. +3. **Refactor for Clarity**: Improve documentation (READMEs) and code structure to facilitate better "Machine Readability" and "Human Understandability". + +## Protocols +1. **Verify Before Submit**: + - Always run relevant verification scripts or tests before submitting changes. + - If no test exists for a new feature, **create one**. +2. **CI/CD Alignment**: + - Ensure all changes pass existing CI checks. + - When adding a new component (like an MCP server), add a corresponding CI workflow to ensure it remains functional. +3. **Cross-Platform Compatibility**: + - Write code that is compatible with Linux and Windows environments whenever possible (e.g., handling `asyncio` loops correctly). +4. **Documentation**: + - Update `README.md` files when interface changes occur. + - Document limitations (e.g., "Text-only CLI wrapper") clearly. diff --git a/mcp-servers/litert-mcp/README.md b/mcp-servers/litert-mcp/README.md new file mode 100644 index 000000000..96987a527 --- /dev/null +++ b/mcp-servers/litert-mcp/README.md @@ -0,0 +1,61 @@ +# LiteRT-LM MCP Server + +This MCP server provides an interface to Google's **LiteRT-LM**, a high-performance runtime for Large Language Models (LLMs) on edge devices (Android, iOS, Linux, MacOS, Windows). + +It allows you to run inference on local models (like Gemma, Phi, Qwen) directly from your MCP ecosystem. + +**Note:** This server currently wraps the `lit` CLI. Multimodal inputs (image/audio) are enabled in the interface but require the C++ API or Python bindings. The CLI wrapper currently supports **Text-only inference** until CLI flags for multimodal are verified. + +## Prerequisites + +1. **LiteRT-LM**: You must have LiteRT-LM installed or built. + * Official Repository: [google-ai-edge/LiteRT-LM](https://github.com/google-ai-edge/LiteRT-LM) + * Follow the "Build and Run" instructions in the official repo to build the `lit` CLI or `litert_lm_main` binary. + * Alternatively, download prebuilt binaries if available for your platform. + +2. **Models**: Download a supported `.litertlm` model. + * Models are available on Hugging Face: [LiteRT Community](https://huggingface.co/litert-community) + +## Configuration + +Set the following environment variables: + +* `LIT_BINARY_PATH`: Path to the `lit` CLI executable or `litert_lm_main` binary. Defaults to `lit` (assuming it's in your PATH). +* `LIT_MODEL_PATH`: Default path to your `.litertlm` model file. (Optional, can be passed per request). + +## Usage + +### Tools + +#### `run_inference` + +Runs inference using the configured LiteRT-LM model. + +* **Arguments**: + * `prompt` (string, required): The input text prompt. + * `model_path` (string, optional): Path to the `.litertlm` model file. Overrides `LIT_MODEL_PATH` env var. + * `image_path` (string, optional): Path to an image file for multimodal inference. + * `audio_path` (string, optional): Path to an audio file for multimodal inference. + * `backend` (string, optional): Backend to use (`cpu`, `gpu`, `npu`). Defaults to `cpu`. + +### Example + +```json +{ + "name": "run_inference", + "arguments": { + "prompt": "What is the capital of France?", + "backend": "cpu" + } +} +``` + +## Setup for Development + +This server uses a manual JSON-RPC implementation to avoid external dependencies in the base environment. Just run: + +```bash +python3 server.py +``` + +Ensure `LIT_BINARY_PATH` is set correctly. diff --git a/mcp-servers/litert-mcp/server.py b/mcp-servers/litert-mcp/server.py new file mode 100644 index 000000000..0e0b2bbb8 --- /dev/null +++ b/mcp-servers/litert-mcp/server.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python3 +""" +LiteRT-LM MCP Server +==================== + +Exposes Google's LiteRT-LM (Edge LLM Runtime) capabilities as an MCP Server. +""" + +import asyncio +import json +import logging +import sys +import os +import shutil +from typing import Dict, Any, Optional + +# Configure logging +logging.basicConfig(level=logging.INFO, stream=sys.stderr) +LOGGER = logging.getLogger("litert-mcp-server") + +# Constants +MCP_VERSION = "2024-11-05" + +class MCPServer: + def __init__(self): + self.default_model_path = os.environ.get("LIT_MODEL_PATH") + self.lit_binary = os.environ.get("LIT_BINARY_PATH", "lit") + + async def handle_request(self, request_data: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """Handle incoming JSON-RPC requests.""" + request_id = request_data.get("id") + method = request_data.get("method") + params = request_data.get("params", {}) + + LOGGER.info(f"Handling request: {method} (ID: {request_id})") + + try: + if method == "initialize": + return self._handle_initialize(request_id, params) + elif method == "tools/list": + return self._handle_tools_list(request_id) + elif method == "tools/call": + return await self._handle_tools_call(request_id, params) + elif method == "notifications/initialized": + return None # No response needed + else: + # For unknown methods, proper JSON-RPC error code -32601 + if request_id is not None: + return { + "jsonrpc": "2.0", + "id": request_id, + "error": {"code": -32601, "message": f"Method not found: {method}"}, + } + return None + + except Exception as e: + LOGGER.error(f"Error handling request: {e}", exc_info=True) + if request_id is not None: + return { + "jsonrpc": "2.0", + "id": request_id, + "error": {"code": -32000, "message": str(e)}, + } + return None + + def _handle_initialize(self, request_id, params): + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "serverInfo": { + "name": "LiteRT-LM MCP", + "version": "1.0.0", + "mcpVersion": MCP_VERSION, + }, + "capabilities": { + "tools": {}, + }, + } + } + + def _handle_tools_list(self, request_id): + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "tools": [ + { + "name": "run_inference", + "description": "Run inference using a LiteRT-LM model. Supports text generation and optionally multimodal inputs if supported by the runtime.", + "inputSchema": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "The input text prompt." + }, + "model_path": { + "type": "string", + "description": "Path to the .litertlm model file. Overrides LIT_MODEL_PATH." + }, + "image_path": { + "type": "string", + "description": "Path to an image file for multimodal inference." + }, + "audio_path": { + "type": "string", + "description": "Path to an audio file for multimodal inference." + }, + "backend": { + "type": "string", + "enum": ["cpu", "gpu", "npu"], + "default": "cpu", + "description": "Compute backend to use." + } + }, + "required": ["prompt"] + } + } + ] + } + } + + async def _handle_tools_call(self, request_id, params): + tool_name = params.get("name") + arguments = params.get("arguments", {}) + + result = {} + + if tool_name == "run_inference": + result = await self._run_inference(arguments) + else: + raise Exception(f"Unknown tool: {tool_name}") + + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "content": [ + { + "mimeType": "application/json", + "text": json.dumps(result, indent=2) + } + ] + } + } + + async def _run_inference(self, args: Dict[str, Any]) -> Dict[str, Any]: + prompt = args.get("prompt") + model_path = args.get("model_path") or self.default_model_path + image_path = args.get("image_path") + audio_path = args.get("audio_path") + backend = args.get("backend", "cpu") + + # Validate Prompt + if not prompt: + return { + "status": "error", + "message": "Prompt is required and cannot be empty." + } + + # Validate Backend + valid_backends = {"cpu", "gpu", "npu"} + if backend not in valid_backends: + return { + "status": "error", + "message": f"Invalid backend '{backend}'. Must be one of {sorted(list(valid_backends))}." + } + + if not model_path: + return { + "status": "error", + "message": "No model path provided. Set LIT_MODEL_PATH env var or pass model_path argument." + } + + # Check if binary exists + binary_path = shutil.which(self.lit_binary) + # If it's a direct path (e.g. ./lit), shutil.which might return None if not in PATH, so check explicitly + if not binary_path and os.path.exists(self.lit_binary): + binary_path = self.lit_binary + + if not binary_path: + return { + "status": "error", + "message": f"LiteRT binary '{self.lit_binary}' not found. Please set LIT_BINARY_PATH or install LiteRT-LM." + } + + # Construct command + cmd = [binary_path] + cmd.extend(["--backend", backend]) + cmd.extend(["--model_path", model_path]) + + # Multimodal Handling + # The current 'lit' CLI wrapper does not support verified multimodal input flags. + # We restrict to text-only to avoid speculative errors. + if image_path or audio_path: + return { + "status": "error", + "message": "Multimodal input (image/audio) is not yet supported via the 'lit' CLI wrapper. Please use the LiteRT-LM C++ or Python API directly, or update this server implementation once CLI flags are verified." + } + + cmd.extend(["--input_prompt", prompt]) + + LOGGER.info(f"Executing command: {' '.join(cmd)}") + + try: + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await process.communicate() + + stdout_str = stdout.decode().strip() + stderr_str = stderr.decode().strip() + + if process.returncode != 0: + return { + "status": "error", + "code": process.returncode, + "stdout": stdout_str, + "stderr": stderr_str, + "message": "LiteRT-LM execution failed." + } + + return { + "status": "success", + "output": stdout_str, + "debug_stderr": stderr_str + } + + except Exception as e: + return { + "status": "error", + "message": str(e) + } + +async def main(): + server = MCPServer() + LOGGER.info("LiteRT-LM MCP Server running on stdio...") + + reader = asyncio.StreamReader() + protocol = asyncio.StreamReaderProtocol(reader) + await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin) + + writer = None + if sys.platform != "win32": + try: + w_transport, w_protocol = await asyncio.get_event_loop().connect_write_pipe( + asyncio.Protocol, sys.stdout + ) + writer = asyncio.StreamWriter(w_transport, w_protocol, None, asyncio.get_event_loop()) + except Exception as e: + LOGGER.warning(f"Could not connect write pipe to stdout: {e}. Falling back to print.") + writer = None + else: + # Windows fallback: + # On Windows, connecting a pipe to stdout using asyncio can be problematic with the default loop. + # We fall back to standard print() which works for basic JSON-RPC over stdio. + LOGGER.info("Windows detected: Using print() fallback for stdout.") + writer = None + + while True: + try: + line = await reader.readline() + if not line: + break + + try: + request = json.loads(line) + response = await server.handle_request(request) + + if response: + response_str = json.dumps(response) + "\n" + if writer: + writer.write(response_str.encode()) + try: + await writer.drain() + except (AttributeError, BrokenPipeError) as e: + LOGGER.warning(f"Error draining writer: {e}. Switching to print fallback.") + writer = None + print(response_str, flush=True) + else: + print(response_str, flush=True) + + except json.JSONDecodeError: + LOGGER.error(f"Invalid JSON received: {line}") + except Exception as e: + LOGGER.error(f"Loop error: {e}") + break + +if __name__ == "__main__": + if sys.platform == "win32": + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + asyncio.run(main())