-
Notifications
You must be signed in to change notification settings - Fork 843
feat(cli): add --batch-size parameter to prevent context leaking #2055
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f4c1edb
10e8cef
2243c1a
9c82c24
f55ddbb
1e991d8
15ee4d4
c8cc355
03356cb
7328326
6a251f2
fe7ef72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| --- | ||
| "lingo.dev": minor | ||
| --- | ||
|
|
||
| feat: add `--batch-size` parameter to `run` and `i18n` commands to prevent context leaking |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,12 @@ export default async function setup(input: CmdRunContext) { | |
| ctx.flags.pseudo || ctx.config?.dev?.usePseudotranslator; | ||
| const provider = isPseudo ? "pseudo" : ctx.config?.provider; | ||
| const engineId = ctx.config?.engineId; | ||
| ctx.localizer = createLocalizer(provider, engineId, ctx.flags.apiKey); | ||
| ctx.localizer = createLocalizer( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use prettier to match the project formatting.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked into this and tried reformatting it. I also cross-verified it (including with AI and Prettier), and it seems the line is being split due to the |
||
| provider, | ||
| engineId, | ||
| ctx.flags.apiKey, | ||
| ctx.flags.batchSize, | ||
| ); | ||
| if (!ctx.localizer) { | ||
| throw new Error( | ||
| "Could not create localization provider. Please check your i18n.json configuration.", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,14 +6,16 @@ import { createMistral } from "@ai-sdk/mistral"; | |
| import { I18nConfig } from "@lingo.dev/_spec"; | ||
| import chalk from "chalk"; | ||
| import dedent from "dedent"; | ||
| import { ILocalizer, LocalizerData } from "./_types"; | ||
| import { ILocalizer, LocalizerData, LocalizerProgressFn } from "./_types"; | ||
| import { LanguageModel, ModelMessage, generateText } from "ai"; | ||
| import { colors } from "../constants"; | ||
| import { jsonrepair } from "jsonrepair"; | ||
| import { createOllama } from "ollama-ai-provider-v2"; | ||
|
|
||
| import _ from "lodash"; | ||
| import { extractPayloadChunks } from "../utils/chunk"; | ||
| export default function createExplicitLocalizer( | ||
| provider: NonNullable<I18nConfig["provider"]>, | ||
| batchSize?: number, | ||
| ): ILocalizer { | ||
| const settings = provider.settings || {}; | ||
|
|
||
|
|
@@ -26,10 +28,10 @@ export default function createExplicitLocalizer( | |
| To fix this issue: | ||
| 1. Switch to one of the supported providers, or | ||
| 2. Remove the ${chalk.italic( | ||
| "provider", | ||
| )} node from your i18n.json configuration to switch to ${chalk.hex( | ||
| colors.green, | ||
| )("Lingo.dev")} | ||
| "provider", | ||
| )} node from your i18n.json configuration to switch to ${chalk.hex( | ||
| colors.green, | ||
| )("Lingo.dev")} | ||
|
|
||
| ${chalk.hex(colors.blue)("Docs: https://lingo.dev/go/docs")} | ||
| `, | ||
|
|
@@ -42,6 +44,7 @@ export default function createExplicitLocalizer( | |
| apiKeyName: "OPENAI_API_KEY", | ||
| baseUrl: provider.baseUrl, | ||
| settings, | ||
| batchSize, | ||
| }); | ||
| case "anthropic": | ||
| return createAiSdkLocalizer({ | ||
|
|
@@ -52,6 +55,7 @@ export default function createExplicitLocalizer( | |
| apiKeyName: "ANTHROPIC_API_KEY", | ||
| baseUrl: provider.baseUrl, | ||
| settings, | ||
| batchSize, | ||
| }); | ||
| case "google": | ||
| return createAiSdkLocalizer({ | ||
|
|
@@ -62,6 +66,7 @@ export default function createExplicitLocalizer( | |
| apiKeyName: "GOOGLE_API_KEY", | ||
| baseUrl: provider.baseUrl, | ||
| settings, | ||
| batchSize, | ||
| }); | ||
| case "openrouter": | ||
| return createAiSdkLocalizer({ | ||
|
|
@@ -72,6 +77,7 @@ export default function createExplicitLocalizer( | |
| apiKeyName: "OPENROUTER_API_KEY", | ||
| baseUrl: provider.baseUrl, | ||
| settings, | ||
| batchSize, | ||
| }); | ||
| case "ollama": | ||
| return createAiSdkLocalizer({ | ||
|
|
@@ -80,6 +86,7 @@ export default function createExplicitLocalizer( | |
| prompt: provider.prompt, | ||
| skipAuth: true, | ||
| settings, | ||
| batchSize, | ||
| }); | ||
| case "mistral": | ||
| return createAiSdkLocalizer({ | ||
|
|
@@ -90,6 +97,7 @@ export default function createExplicitLocalizer( | |
| apiKeyName: "MISTRAL_API_KEY", | ||
| baseUrl: provider.baseUrl, | ||
| settings, | ||
| batchSize, | ||
| }); | ||
| } | ||
| } | ||
|
|
@@ -120,26 +128,29 @@ function createAiSdkLocalizer(params: { | |
| baseUrl?: string; | ||
| skipAuth?: boolean; | ||
| settings?: { temperature?: number }; | ||
| batchSize?: number; | ||
| }): ILocalizer { | ||
| const skipAuth = params.skipAuth === true; | ||
|
|
||
| const apiKey = process.env[params?.apiKeyName ?? ""]; | ||
| if (!skipAuth && (!apiKey || !params.apiKeyName)) { | ||
| throw new Error( | ||
| dedent` | ||
| You're trying to use raw ${chalk.dim(params.id)} API for translation. ${params.apiKeyName | ||
| ? `However, ${chalk.dim( | ||
| params.apiKeyName, | ||
| )} environment variable is not set.` | ||
| : "However, that provider is unavailable." | ||
| You're trying to use raw ${chalk.dim(params.id)} API for translation. ${ | ||
| params.apiKeyName | ||
| ? `However, ${chalk.dim( | ||
| params.apiKeyName, | ||
| )} environment variable is not set.` | ||
| : "However, that provider is unavailable." | ||
| } | ||
|
|
||
| To fix this issue: | ||
| 1. ${params.apiKeyName | ||
| ? `Set ${chalk.dim( | ||
| params.apiKeyName, | ||
| )} in your environment variables` | ||
| : "Set the environment variable for your provider (if required)" | ||
| 1. ${ | ||
| params.apiKeyName | ||
| ? `Set ${chalk.dim( | ||
| params.apiKeyName, | ||
| )} in your environment variables` | ||
| : "Set the environment variable for your provider (if required)" | ||
| }, or | ||
| 2. Remove the ${chalk.italic( | ||
| "provider", | ||
|
|
@@ -183,85 +194,132 @@ function createAiSdkLocalizer(params: { | |
| return { valid: false, error: errorMessage }; | ||
| } | ||
| }, | ||
| localize: async (input: LocalizerData) => { | ||
| const systemPrompt = params.prompt | ||
| .replaceAll("{source}", input.sourceLocale) | ||
| .replaceAll("{target}", input.targetLocale); | ||
| const shots = [ | ||
| [ | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| message: "Hello, world!", | ||
| }, | ||
| }, | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| message: "Hola, mundo!", | ||
| localize: async ( | ||
| input: LocalizerData, | ||
| onProgress?: LocalizerProgressFn, | ||
| ) => { | ||
| const chunks = extractPayloadChunks( | ||
| input.processableData, | ||
| params.batchSize, | ||
| ); | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| const subResults: Record<string, any>[] = []; | ||
|
|
||
| for (let i = 0; i < chunks.length; i++) { | ||
| const chunk = chunks[i]; | ||
|
|
||
| const systemPrompt = params.prompt | ||
| .replaceAll("{source}", input.sourceLocale) | ||
| .replaceAll("{target}", input.targetLocale); | ||
|
|
||
| const shots = [ | ||
| [ | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| message: "Hello, world!", | ||
| }, | ||
| }, | ||
| }, | ||
| ], | ||
| [ | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| spring: "Spring", | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| message: "Hola, mundo!", | ||
| }, | ||
| }, | ||
| hints: { | ||
| spring: ["A source of water"], | ||
| ], | ||
| [ | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| spring: "Spring", | ||
| }, | ||
| hints: { | ||
| spring: ["A source of water"], | ||
| }, | ||
| }, | ||
| }, | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| spring: "Manantial", | ||
| { | ||
| sourceLocale: "en", | ||
| targetLocale: "es", | ||
| data: { | ||
| spring: "Manantial", | ||
| }, | ||
| }, | ||
| }, | ||
| ], | ||
| ]; | ||
| ], | ||
| ]; | ||
|
|
||
| const hasHints = input.hints && Object.keys(input.hints).length > 0; | ||
| const chunkHints = input.hints | ||
| ? _.pick(input.hints, Object.keys(chunk)) | ||
| : undefined; | ||
| const hasHints = chunkHints && Object.keys(chunkHints).length > 0; | ||
|
|
||
| const payload = { | ||
| sourceLocale: input.sourceLocale, | ||
| targetLocale: input.targetLocale, | ||
| data: input.processableData, | ||
| ...(hasHints && { hints: input.hints }), | ||
| }; | ||
| const payload = { | ||
| sourceLocale: input.sourceLocale, | ||
| targetLocale: input.targetLocale, | ||
| data: chunk, | ||
| ...(hasHints && { hints: chunkHints }), | ||
| }; | ||
|
|
||
| const response = await generateText({ | ||
| model, | ||
| ...params.settings, | ||
| messages: [ | ||
| { role: "system", content: systemPrompt }, | ||
| ...shots.flatMap( | ||
| ([userShot, assistantShot]) => | ||
| [ | ||
| { role: "user", content: JSON.stringify(userShot) }, | ||
| { role: "assistant", content: JSON.stringify(assistantShot) }, | ||
| ] as ModelMessage[], | ||
| ), | ||
| { role: "user", content: JSON.stringify(payload) }, | ||
| ], | ||
| }); | ||
| const response = await generateText({ | ||
| model, | ||
| ...params.settings, | ||
| messages: [ | ||
| { role: "system", content: systemPrompt }, | ||
| ...shots.flatMap( | ||
| ([userShot, assistantShot]) => | ||
| [ | ||
| { role: "user", content: JSON.stringify(userShot) }, | ||
| { role: "assistant", content: JSON.stringify(assistantShot) }, | ||
| ] as ModelMessage[], | ||
| ), | ||
| { role: "user", content: JSON.stringify(payload) }, | ||
| ], | ||
| }); | ||
|
|
||
| const result = parseModelResponse(response.text); | ||
| let result: any; | ||
| try { | ||
| result = parseModelResponse(response.text); | ||
| } catch (e2) { | ||
| const snippet = | ||
| response.text.length > 500 | ||
| ? `${response.text.slice(0, 500)}…` | ||
| : response.text; | ||
| console.error( | ||
| `Failed to parse response from ${params.id}. Response snippet: ${snippet}`, | ||
| ); | ||
| throw new Error( | ||
| `Failed to parse response from ${params.id}: ${e2} (Snippet: ${snippet})`, | ||
| ); | ||
Hellnight2005 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| let finalResult: Record<string, any> = {}; | ||
|
|
||
| // Handle both object and string responses | ||
| if (typeof result.data === "object" && result.data !== null) { | ||
| return result.data; | ||
| // Handle both object and string responses | ||
| if (typeof result?.data === "object" && result.data !== null) { | ||
| finalResult = result.data; | ||
| } else if (typeof result?.data === "string") { | ||
| // Handle string responses where the model double-stringified the JSON | ||
| try { | ||
| const parsed = parseModelResponse(result.data); | ||
| finalResult = parsed.data || parsed || {}; | ||
| } catch (e) { | ||
| console.error( | ||
| `Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`, | ||
| ); | ||
| throw new Error( | ||
| `Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`, | ||
| ); | ||
| } | ||
| } | ||
Hellnight2005 marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
+280
to
+313
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handle top-level parsed payloads before assuming a This branch only succeeds when the parsed response is stored under Suggested change- let finalResult: Record<string, any> = {};
-
- // Handle both object and string responses
- if (typeof result?.data === "object" && result.data !== null) {
- finalResult = result.data;
- } else if (result?.data) {
+ let finalResult: Record<string, any> = {};
+ const rawData =
+ typeof result === "object" && result !== null && "data" in result
+ ? result.data
+ : result;
+
+ if (typeof rawData === "object" && rawData !== null) {
+ finalResult = rawData as Record<string, any>;
+ } else if (typeof rawData === "string" && rawData) {
// Handle string responses - extract and repair JSON
- const index = result.data.indexOf("{");
- const lastIndex = result.data.lastIndexOf("}");
+ const index = rawData.indexOf("{");
+ const lastIndex = rawData.lastIndexOf("}");
if (index !== -1 && lastIndex !== -1) {
try {
- const trimmed = result.data.slice(index, lastIndex + 1);
+ const trimmed = rawData.slice(index, lastIndex + 1);
const repaired = jsonrepair(trimmed);
const parsed = JSON.parse(repaired);
finalResult = parsed.data || parsed || {};
} catch (e) {
console.error(
- `Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`,
+ `Failed to parse nested JSON response. Snippet: ${rawData.slice(0, 100)}...`,
);
throw new Error(
- `Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`,
+ `Failed to parse nested JSON response: ${e} (Snippet: ${rawData.slice(0, 100)}...)`,
);
}
} else {
console.error(
- `Unexpected response format - no JSON object found. Snippet: ${String(result.data).slice(0, 100)}...`,
+ `Unexpected response format - no JSON object found. Snippet: ${String(rawData).slice(0, 100)}...`,
);
throw new Error(
`Unexpected response format from ${params.id} - no JSON object found in response`,
);
}🤖 Prompt for AI Agents |
||
|
|
||
| subResults.push(finalResult); | ||
| if (onProgress) { | ||
| onProgress(((i + 1) / chunks.length) * 100, chunk, finalResult); | ||
| } | ||
| } | ||
|
|
||
| // Handle string responses - extract and repair JSON | ||
| const index = result.data.indexOf("{"); | ||
| const lastIndex = result.data.lastIndexOf("}"); | ||
| const trimmed = result.data.slice(index, lastIndex + 1); | ||
| return JSON.parse(jsonrepair(trimmed)).data; | ||
| const finalMergedResult = _.merge({}, ...subResults); | ||
| return finalMergedResult; | ||
| }, | ||
| }; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🌐 Web query:
interactive-commander option parser callback signature: does the parser receive (value, previousValue), and is passing JavaScript parseInt directly considered unsafe because of its second-argument radix behavior?💡 Result:
No, the option parser (argParser) callback in interactive-commander receives only a single value parameter, not (value, previousValue). This is shown in official documentation examples where .argParser(Number) and .argParser((value) => { ... }) are used, with the lambda taking one argument. Yes, passing JavaScript parseInt directly is considered unsafe in Commander.js option processing (which interactive-commander extends and is compatible with). The parser receives (value, previousValue) for repeatable options, so parseInt(value, previousValue) treats previousValue as radix, often causing NaN (e.g., radix=1). Multiple GitHub issues confirm this: defaults become NaN (Issue
#523), and wrappers like v => parseInt(v) are recommended (Issue#1007). MDN notes parseInt requires explicit radix (10) for safety.Citations:
Use a dedicated numeric parser and add integer validation for
--batch-size.Line 96 passes
parseIntdirectly as the parser callback. When interactive-commander extends Commander.js behavior, repeatable options pass bothvalueandpreviousValueto the callback, causingparseInt(value, previousValue)to treat the previous value as a radix argument, often producing unexpected results. Use an explicit wrapper:(value) => Number.parseInt(value, 10).Additionally, since
batchSizerepresents a count and must be a whole number, add.int()validation to the schema at line 671.🔧 Proposed fix
.option( "--batch-size <number>", "Number of translations to process in a single batch", - parseInt, + (value: string) => Number.parseInt(value, 10), )Also applies to: 671-672
🤖 Prompt for AI Agents