diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b41ecd2482..64ad93a3f0 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -50,3 +50,6 @@ /tests/dotnet/dotnet-aot-compat/ @agocke @dotnet/appmodel /plugins/dotnet/agents/optimizing-dotnet-performance.agent.md @dotnet/appmodel + +/plugins/dotnet-ai/ @luisquintanilla @JeremyLikness +/tests/dotnet-ai/ @luisquintanilla @JeremyLikness diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json index 877b28ab55..00803e4071 100644 --- a/.github/plugin/marketplace.json +++ b/.github/plugin/marketplace.json @@ -13,6 +13,11 @@ "name": "dotnet-msbuild", "source": "./plugins/dotnet-msbuild", "description": "Comprehensive MSBuild and .NET build skills: failure diagnosis, performance optimization, code quality, and modernization." + }, + { + "name": "dotnet-ai", + "source": "./plugins/dotnet-ai", + "description": "AI and ML skills for .NET: technology selection, LLM integration, ML.NET, vector search, RAG, ONNX inference, and agentic workflows." } ] -} \ No newline at end of file +} diff --git a/plugins/dotnet-ai/agents/.gitkeep b/plugins/dotnet-ai/agents/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plugins/dotnet-ai/plugin.json b/plugins/dotnet-ai/plugin.json new file mode 100644 index 0000000000..e4a7907e8a --- /dev/null +++ b/plugins/dotnet-ai/plugin.json @@ -0,0 +1,7 @@ +{ + "name": "dotnet-ai", + "version": "0.1.0", + "description": "AI and ML skills for .NET: technology selection, LLM integration, ML.NET, vector search, RAG, ONNX inference, and agentic workflows.", + "skills": "./skills/", + "agents": "./agents/" +} diff --git a/plugins/dotnet-ai/skills/meai-chat-integration/SKILL.md b/plugins/dotnet-ai/skills/meai-chat-integration/SKILL.md new file mode 100644 index 0000000000..3105250e9d --- /dev/null +++ b/plugins/dotnet-ai/skills/meai-chat-integration/SKILL.md @@ -0,0 +1,186 @@ +--- +name: meai-chat-integration +description: | + USE FOR: Adding chat/text generation, streaming responses, conversation history, + middleware (caching, telemetry, logging, function calling), DI registration, + provider switching between OpenAI/Azure OpenAI/Ollama. + DO NOT USE FOR: Classical ML tasks on structured data (use mlnet), running pre-trained ONNX + models (use onnx-runtime-inference), multi-step agent orchestration (use agentic-workflow). +--- + +# MEAI Chat Integration + +Add LLM chat capabilities to a .NET 10+ application using `Microsoft.Extensions.AI`. + +## Inputs + +| Input | Required | Description | +|---|---|---| +| Task description | Yes | What the chat feature should do | +| LLM provider | No | OpenAI, Azure OpenAI, Azure AI Inference, Ollama (defaults to OpenAI) | +| Existing project | No | Current `.csproj`, target framework | + +## Workflow + +### Step 1 · Install Packages + +Always install the abstractions package: + +``` +dotnet add package Microsoft.Extensions.AI +``` + +Then install the provider package: + +| Provider | Package | +|---|---| +| OpenAI | `OpenAI` | +| Azure OpenAI | `Azure.AI.OpenAI` | +| Azure AI Inference / GitHub Models | `Azure.AI.Inference` | +| Ollama | `OllamaSharp` | + +### Step 2 · Register IChatClient in DI + +Use the `ChatClientBuilder` pipeline inside `AddChatClient` to compose middleware, then terminate with the concrete provider client. + +```csharp +builder.Services.AddChatClient(pipeline => pipeline + .UseDistributedCache() + .UseOpenTelemetry() + .UseLogging() + .Use(new OpenAIClient(builder.Configuration["OpenAI:Key"]) + .GetChatClient("gpt-4o") + .AsIChatClient())); +``` + +> ⚠️ **NEVER** hardcode API keys. Load them from `builder.Configuration["OpenAI:Key"]` or environment variables. + +For other providers, replace the terminal client: + +```csharp +// Azure OpenAI +new AzureOpenAIClient(new Uri(endpoint), new ApiKeyCredential(key)) + .GetChatClient("gpt-4o").AsIChatClient() + +// Azure AI Inference / GitHub Models +new ChatCompletionsClient(new Uri(endpoint), new AzureKeyCredential(key)) + .AsIChatClient("model-name") + +// Ollama +new OllamaApiClient(new Uri("http://localhost:11434"), "llama3.1") +``` + +### Step 3 · Use IChatClient + +**Simple completion:** + +```csharp +var response = await chatClient.GetResponseAsync("prompt"); +Console.WriteLine(response.Text); +``` + +**Streaming (prefer for user-facing responses):** + +```csharp +await foreach (var update in chatClient.GetStreamingResponseAsync("prompt")) +{ + Console.Write(update.Text); +} +``` + +**Multi-turn conversation:** + +```csharp +List history = +[ + new(ChatRole.System, "You are a helpful assistant."), + new(ChatRole.User, "Hello!") +]; +var response = await chatClient.GetResponseAsync(history); +history.AddMessages(response); +``` + +**Structured output:** + +```csharp +var response = await chatClient.GetResponseAsync("prompt"); +``` + +**Configure ChatOptions for production:** + +```csharp +var options = new ChatOptions +{ + Temperature = 0.7f, + MaxOutputTokens = 1024, + ModelId = "gpt-4o-2024-08-06" // Pin to dated version +}; +var response = await chatClient.GetResponseAsync("prompt", options); +``` + +Log token usage for cost monitoring — check `response.Usage.InputTokenCount` and `response.Usage.OutputTokenCount` after each call. + +### Step 4 · Add Middleware (conditional) + +Add middleware only when needed, in the `ChatClientBuilder` pipeline from Step 2. + +- **Caching** — `UseDistributedCache()`. Requires an `IDistributedCache` registration (e.g., `AddDistributedMemoryCache()` or Redis). +- **Telemetry** — `UseOpenTelemetry()`. Add `OpenTelemetry` packages and configure an exporter. +- **Logging** — `UseLogging()`. Uses the registered `ILoggerFactory`. +- **Function calling** — `UseFunctionInvocation()`. Annotate methods with `[Description]` and register via `AIFunctionFactory.Create()`: + +```csharp +var getWeather = AIFunctionFactory.Create( + [Description("Gets the weather for a city")] + (string city) => $"The weather in {city} is sunny."); + +var options = new ChatOptions { Tools = [getWeather] }; +var response = await chatClient.GetResponseAsync("What's the weather in Seattle?", options); +``` + +- **Retry / Resilience** — Use `.UseRetry()` middleware or integrate Polly for exponential backoff. Essential for handling HTTP 429 rate-limit responses and transient failures. + +### Step 5 · Context Window Management + +Use `Microsoft.ML.Tokenizers` to count tokens before sending large prompts. For the full tokenizer API (all 8 tokenizer types, encoding/decoding, factory patterns), see [references/tokenizers.md](references/tokenizers.md). + +``` +dotnet add package Microsoft.ML.Tokenizers +``` + +```csharp +var tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o"); +int count = tokenizer.CountTokens(text); + +// Truncate to fit context window +var truncated = text.AsSpan()[..tokenizer.GetIndexByTokenCount(text, maxTokens)]; +``` + +### Step 6 · Verify + +- Build and run: `dotnet build && dotnet run`. +- Confirm the LLM returns a response. +- Confirm middleware is active (check cache hits, telemetry traces, log output). + +## Validation + +- IChatClient registered via DI (not `new`'d directly in consuming code). +- API keys loaded from configuration or environment variables (not hardcoded). +- Middleware pipeline configured in the `AddChatClient` builder. +- Streaming used for user-facing responses. +- Context window checked before sending large prompts. + +## Pitfalls + +- **Hardcoding API keys** — Use `builder.Configuration` or environment variables; never inline secrets. +- **Not using DI** — Creating a client per request instead of registering a singleton via `AddChatClient`. +- **Ignoring context window limits** — Count tokens with `Microsoft.ML.Tokenizers` before sending large prompts. +- **Catching exceptions too broadly** — Handle provider-specific exceptions (e.g., rate-limit 429 responses) instead of bare `catch`. +- **Not disposing streaming responses** — Always consume the `IAsyncEnumerable` fully or dispose the enumerator. +- **Not pinning model versions** — Use dated model versions (e.g., `gpt-4o-2024-08-06`) in production to prevent output drift when providers update models. +- **Not validating structured output** — LLMs may return malformed JSON or unexpected values. Always wrap structured output parsing in try/catch and implement fallback logic for production use. + +## More Information + +- +- diff --git a/plugins/dotnet-ai/skills/meai-chat-integration/references/tokenizers.md b/plugins/dotnet-ai/skills/meai-chat-integration/references/tokenizers.md new file mode 100644 index 0000000000..5a847d947e --- /dev/null +++ b/plugins/dotnet-ai/skills/meai-chat-integration/references/tokenizers.md @@ -0,0 +1,113 @@ +# Microsoft.ML.Tokenizers — Token Counting and Text Encoding for .NET + +Read this reference when the user needs to count tokens for context window management, truncate text to a token budget, encode or decode text for LLM or ONNX model preprocessing, or size chunks for RAG ingestion. + +## What It Is + +`Microsoft.ML.Tokenizers` is a pure .NET tokenization library — no Python runtime, no native binaries. It converts text to token IDs and back, matching the exact tokenization behavior of popular LLMs. It is part of the ML.NET ecosystem but works standalone in any .NET application. + +## Install + +``` +dotnet add package Microsoft.ML.Tokenizers +``` + +For model-specific tokenizer data (vocabulary files), install the relevant data package: + +``` +dotnet add package Microsoft.ML.Tokenizers.Data.O200kBase # GPT-4o +dotnet add package Microsoft.ML.Tokenizers.Data.Cl100kBase # GPT-4, GPT-3.5 +``` + +## Supported Tokenizers + +| Class | Algorithm | Models | Factory | +|---|---|---|---| +| `TiktokenTokenizer` | Tiktoken (BPE) | GPT-4o, GPT-4, GPT-3.5-turbo | `TiktokenTokenizer.CreateForModel("gpt-4o")` | +| `BpeTokenizer` | Byte-level BPE | GPT-2, RoBERTa, custom BPE | `BpeTokenizer.Create(vocabPath, mergesPath)` | +| `SentencePieceTokenizer` | SentencePiece (Unigram) | T5, XLNet, mBART | `SentencePieceTokenizer.Create(modelPath)` | +| `LlamaTokenizer` | SentencePiece (BPE) | Llama 2, Llama 3, Mistral | `LlamaTokenizer.Create(modelPath)` | +| `BertTokenizer` | WordPiece | BERT, DistilBERT, MiniLM | `BertTokenizer.Create(vocabPath)` | +| `WordPieceTokenizer` | WordPiece | Custom WordPiece models | `WordPieceTokenizer.Create(vocabPath)` | +| `CodeGenTokenizer` | BPE | CodeGen, Codex | `CodeGenTokenizer.Create(vocabPath, mergesPath)` | +| `Phi2Tokenizer` | BPE | Phi-2 | `Phi2Tokenizer.Create(vocabPath, mergesPath)` | + +All inherit from the abstract `Tokenizer` base class. + +## Core Operations + +### Count Tokens (Most Common) + +Use `CountTokens()` to check whether text fits a model's context window. This is the fast path — it avoids allocating the full token ID list. + +```csharp +using Microsoft.ML.Tokenizers; + +var tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o"); +int count = tokenizer.CountTokens(text); + +if (count > maxContextTokens) +{ + // Truncate or summarize +} +``` + +> Always use `CountTokens()` instead of `EncodeToIds(text).Count` when you only need the count. + +### Truncate to a Token Budget + +`GetIndexByTokenCount` returns the character index where the token count reaches the limit. Use it to truncate without splitting mid-token. + +```csharp +var tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o"); +int maxTokens = 4096; + +int charIndex = tokenizer.GetIndexByTokenCount(text, maxTokens); +var truncated = text.AsSpan()[..charIndex]; +``` + +### Encode Text to Token IDs + +Use when you need the actual token IDs — for example, when preparing inputs for ONNX models or inspecting tokenization behavior. + +```csharp +var tokenizer = TiktokenTokenizer.CreateForModel("gpt-4o"); + +// Token IDs only +IReadOnlyList ids = tokenizer.EncodeToIds(text); + +// Full token details (string value, ID, character offsets) +IReadOnlyList tokens = tokenizer.EncodeToTokens(text); +foreach (var token in tokens) +{ + Console.WriteLine($"'{token.Value}' -> ID {token.Id} (offset {token.Offset})"); +} +``` + +### Decode Token IDs to Text + +```csharp +string decoded = tokenizer.Decode(ids); +``` + +## How This Library Is Used Across Skills + +| Skill | Use Case | Key Method | +|---|---|---| +| `meai-chat-integration` | Count tokens before sending prompts to stay within context window; truncate long inputs | `CountTokens()`, `GetIndexByTokenCount()` | +| `data-ingestion-pipeline` | Size chunks by token count (not character count) to match embedding model limits | `CountTokens()` | +| `onnx-runtime-inference` | Preprocess text inputs for ONNX models (BERT tokenization, vocabulary encoding) | `EncodeToIds()` via `BertTokenizer` | +| `rag-pipeline` | Token budget management for prompt assembly during retrieval-augmented generation | `CountTokens()`, `GetIndexByTokenCount()` | + +## Key Points + +- **Always use token counts, not character counts.** Characters do not map reliably to tokens. A 4-character word might be 1 token or 3 depending on the model's vocabulary. +- **Match the tokenizer to your model.** GPT-4o uses `o200k_base`, GPT-4/3.5 uses `cl100k_base`, Llama uses SentencePiece BPE. Using the wrong tokenizer gives inaccurate counts. +- **`CountTokens()` is the fast path.** Use it when you only need the count. It avoids allocating the token ID list. +- **Pure .NET, no dependencies.** Runs anywhere .NET runs — no Python runtime, no native binaries, no platform-specific code. +- **Thread-safe.** `Tokenizer` instances are immutable after creation. Create once, reuse across requests. + +## More Information + +- +- (source lives in the ML.NET repo under `src/Microsoft.ML.Tokenizers`) diff --git a/tests/dotnet-ai/meai-chat-integration/eval.yaml b/tests/dotnet-ai/meai-chat-integration/eval.yaml new file mode 100644 index 0000000000..1af147bab0 --- /dev/null +++ b/tests/dotnet-ai/meai-chat-integration/eval.yaml @@ -0,0 +1,65 @@ +scenarios: + - name: "Add chat to ASP.NET Core API" + prompt: "Add an LLM-powered chat endpoint to this .NET 10 web API using OpenAI. The endpoint should accept a user message and return the AI response." + setup: + files: + - path: "ChatApi/ChatApi.csproj" + content: | + + + net10.0 + enable + enable + + + - path: "ChatApi/Program.cs" + content: | + var builder = WebApplication.CreateBuilder(args); + var app = builder.Build(); + app.Run(); + assertions: + - type: "output_contains" + value: "Microsoft.Extensions.AI" + - type: "output_contains" + value: "IChatClient" + - type: "exit_success" + rubric: + - "Uses Microsoft.Extensions.AI as the abstraction layer" + - "Registers IChatClient in DI using AddChatClient pattern" + - "Does not hardcode API keys — uses configuration or environment variables" + - "Creates a proper API endpoint that accepts user input" + timeout: 360 + + - name: "Add middleware pipeline" + prompt: "My .NET 10 app already uses IChatClient from Microsoft.Extensions.AI with OpenAI. Add caching and OpenTelemetry tracing to the chat client pipeline." + setup: + files: + - path: "MyApp/MyApp.csproj" + content: | + + + net10.0 + + + + + + + - path: "MyApp/Program.cs" + content: | + using Microsoft.Extensions.AI; + var builder = WebApplication.CreateBuilder(args); + builder.Services.AddChatClient(new OpenAI.OpenAIClient(builder.Configuration["OpenAI:Key"]).GetChatClient("gpt-4o").AsIChatClient()); + var app = builder.Build(); + app.Run(); + assertions: + - type: "output_contains" + value: "Cache" + - type: "output_contains" + value: "Telemetry" + - type: "exit_success" + rubric: + - "Adds UseDistributedCache or similar caching middleware to the ChatClientBuilder pipeline" + - "Adds UseOpenTelemetry to the pipeline" + - "Configures the middleware in the correct order in AddChatClient" + timeout: 360