From f4c1edb4372221c0b1b5aa50b20947a88c54ac08 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Thu, 5 Feb 2026 15:22:24 +0530 Subject: [PATCH 01/11] feat(react): export locale cookie helpers --- packages/react/src/client/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/react/src/client/index.ts b/packages/react/src/client/index.ts index ceb8ef160..ec985c936 100644 --- a/packages/react/src/client/index.ts +++ b/packages/react/src/client/index.ts @@ -5,3 +5,4 @@ export * from "./component"; export * from "./locale-switcher"; export * from "./attribute-component"; export * from "./locale"; +export { getLocaleFromCookies, setLocaleInCookies } from "./utils"; From 10e8cef181ed370d9886fc7a9fd7ec5ffd8f2854 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Thu, 5 Feb 2026 15:31:15 +0530 Subject: [PATCH 02/11] chore: add changeset for locale helper exports --- .changeset/export-locale-helpers.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/export-locale-helpers.md diff --git a/.changeset/export-locale-helpers.md b/.changeset/export-locale-helpers.md new file mode 100644 index 000000000..898322a19 --- /dev/null +++ b/.changeset/export-locale-helpers.md @@ -0,0 +1,5 @@ +--- +"@lingo.dev/_react": minor +--- + +Export `getLocaleFromCookies` and `setLocaleInCookies` helpers from client entrypoint. From 2243c1a258980abd85483fccfc78e0f8e7ccc1fa Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 14:26:52 +0530 Subject: [PATCH 03/11] feat: add batch size parameter to prevent context leaking (closes #1733) --- packages/cli/src/cli/cmd/i18n.ts | 15 +- packages/cli/src/cli/cmd/run/_types.ts | 1 + packages/cli/src/cli/cmd/run/index.ts | 5 + packages/cli/src/cli/cmd/run/setup.ts | 35 +-- packages/cli/src/cli/localizer/explicit.ts | 235 +++++++++++++------ packages/cli/src/cli/localizer/index.ts | 3 +- packages/cli/src/cli/localizer/pseudo.ts | 3 + packages/cli/src/cli/processor/basic.spec.ts | 131 +++++++++++ packages/cli/src/cli/processor/basic.ts | 9 +- packages/cli/src/cli/processor/index.ts | 33 +-- 10 files changed, 362 insertions(+), 108 deletions(-) create mode 100644 packages/cli/src/cli/processor/basic.spec.ts diff --git a/packages/cli/src/cli/cmd/i18n.ts b/packages/cli/src/cli/cmd/i18n.ts index 3f828ad54..6c0015b26 100644 --- a/packages/cli/src/cli/cmd/i18n.ts +++ b/packages/cli/src/cli/cmd/i18n.ts @@ -90,6 +90,11 @@ export default new Command() "--strict", "Stop immediately on first error instead of continuing to process remaining buckets and locales (fail-fast mode)", ) + .option( + "--batch-size ", + "Number of translations to process in a single batch", + parseInt, + ) .action(async function (options) { updateGitignore(); @@ -432,14 +437,14 @@ export default new Command() } bucketOra.start( - `[${sourceLocale} -> ${targetLocale}] [${ - Object.keys(processableData).length + `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length } entries] (0%) AI localization in progress...`, ); let processPayload = createProcessor(i18nConfig!.provider, { apiKey: settings.auth.apiKey, apiUrl: settings.auth.apiUrl, engineId: i18nConfig!.engineId, + batchSize: flags.batchSize, }); processPayload = withExponentialBackoff( processPayload, @@ -457,9 +462,8 @@ export default new Command() targetData: flags.force ? {} : targetData, }, (progress, sourceChunk, processedChunk) => { - bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${ - Object.keys(processableData).length - } entries] (${progress}%) AI localization in progress...`; + bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length + } entries] (${progress}%) AI localization in progress...`; }, ); @@ -662,6 +666,7 @@ function parseFlags(options: any) { file: Z.array(Z.string()).optional(), interactive: Z.boolean().prefault(false), debug: Z.boolean().prefault(false), + batchSize: Z.number().min(1).optional(), }).parse(options); } diff --git a/packages/cli/src/cli/cmd/run/_types.ts b/packages/cli/src/cli/cmd/run/_types.ts index 967d05deb..86a22b424 100644 --- a/packages/cli/src/cli/cmd/run/_types.ts +++ b/packages/cli/src/cli/cmd/run/_types.ts @@ -56,5 +56,6 @@ export const flagsSchema = z.object({ debounce: z.number().positive().prefault(5000), // 5 seconds default sound: z.boolean().optional(), pseudo: z.boolean().optional(), + batchSize: z.number().min(1).optional(), }); export type CmdRunFlags = z.infer; diff --git a/packages/cli/src/cli/cmd/run/index.ts b/packages/cli/src/cli/cmd/run/index.ts index 7b28ecd9f..6c9cc0196 100644 --- a/packages/cli/src/cli/cmd/run/index.ts +++ b/packages/cli/src/cli/cmd/run/index.ts @@ -123,6 +123,11 @@ export default new Command() "--pseudo", "Enable pseudo-localization mode: automatically pseudo-translates all extracted strings with accented characters and visual markers without calling any external API. Useful for testing UI internationalization readiness", ) + .option( + "--batch-size ", + "Number of translations to process in a single batch", + (val: string) => parseInt(val), + ) .action(async (args) => { let userIdentity: UserIdentity = null; try { diff --git a/packages/cli/src/cli/cmd/run/setup.ts b/packages/cli/src/cli/cmd/run/setup.ts index 4277bd6b9..f4b846574 100644 --- a/packages/cli/src/cli/cmd/run/setup.ts +++ b/packages/cli/src/cli/cmd/run/setup.ts @@ -54,7 +54,12 @@ export default async function setup(input: CmdRunContext) { ctx.flags.pseudo || ctx.config?.dev?.usePseudotranslator; const provider = isPseudo ? "pseudo" : ctx.config?.provider; const engineId = ctx.config?.engineId; - ctx.localizer = createLocalizer(provider, engineId, ctx.flags.apiKey); + ctx.localizer = createLocalizer( + provider, + engineId, + ctx.flags.apiKey, + ctx.flags.batchSize, + ); if (!ctx.localizer) { throw new Error( "Could not create localization provider. Please check your i18n.json configuration.", @@ -105,23 +110,23 @@ export default async function setup(input: CmdRunContext) { const subTasks = isLingoDotDev ? [ - "Brand voice enabled", - "Translation memory connected", - "Glossary enabled", - "Quality assurance enabled", - ].map((title) => ({ title, task: () => {} })) + "Brand voice enabled", + "Translation memory connected", + "Glossary enabled", + "Quality assurance enabled", + ].map((title) => ({ title, task: () => { } })) : isPseudo ? [ - "Pseudo-localization mode active", - "Character replacement configured", - "No external API calls", - ].map((title) => ({ title, task: () => {} })) + "Pseudo-localization mode active", + "Character replacement configured", + "No external API calls", + ].map((title) => ({ title, task: () => { } })) : [ - "Skipping brand voice", - "Skipping glossary", - "Skipping translation memory", - "Skipping quality assurance", - ].map((title) => ({ title, task: () => {}, skip: true })); + "Skipping brand voice", + "Skipping glossary", + "Skipping translation memory", + "Skipping quality assurance", + ].map((title) => ({ title, task: () => { }, skip: true })); return task.newListr(subTasks, { concurrent: true, diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index ec212d035..199d68132 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -6,14 +6,16 @@ import { createMistral } from "@ai-sdk/mistral"; import { I18nConfig } from "@lingo.dev/_spec"; import chalk from "chalk"; import dedent from "dedent"; -import { ILocalizer, LocalizerData } from "./_types"; +import { ILocalizer, LocalizerData, LocalizerProgressFn } from "./_types"; import { LanguageModel, ModelMessage, generateText } from "ai"; import { colors } from "../constants"; import { jsonrepair } from "jsonrepair"; import { createOllama } from "ollama-ai-provider-v2"; +import _ from "lodash"; export default function createExplicitLocalizer( provider: NonNullable, + batchSize?: number, ): ILocalizer { const settings = provider.settings || {}; @@ -42,6 +44,7 @@ export default function createExplicitLocalizer( apiKeyName: "OPENAI_API_KEY", baseUrl: provider.baseUrl, settings, + batchSize, }); case "anthropic": return createAiSdkLocalizer({ @@ -52,6 +55,7 @@ export default function createExplicitLocalizer( apiKeyName: "ANTHROPIC_API_KEY", baseUrl: provider.baseUrl, settings, + batchSize, }); case "google": return createAiSdkLocalizer({ @@ -62,6 +66,7 @@ export default function createExplicitLocalizer( apiKeyName: "GOOGLE_API_KEY", baseUrl: provider.baseUrl, settings, + batchSize, }); case "openrouter": return createAiSdkLocalizer({ @@ -72,6 +77,7 @@ export default function createExplicitLocalizer( apiKeyName: "OPENROUTER_API_KEY", baseUrl: provider.baseUrl, settings, + batchSize, }); case "ollama": return createAiSdkLocalizer({ @@ -80,6 +86,7 @@ export default function createExplicitLocalizer( prompt: provider.prompt, skipAuth: true, settings, + batchSize, }); case "mistral": return createAiSdkLocalizer({ @@ -90,6 +97,7 @@ export default function createExplicitLocalizer( apiKeyName: "MISTRAL_API_KEY", baseUrl: provider.baseUrl, settings, + batchSize, }); } } @@ -120,6 +128,7 @@ function createAiSdkLocalizer(params: { baseUrl?: string; skipAuth?: boolean; settings?: { temperature?: number }; + batchSize?: number; }): ILocalizer { const skipAuth = params.skipAuth === true; @@ -183,85 +192,173 @@ function createAiSdkLocalizer(params: { return { valid: false, error: errorMessage }; } }, - localize: async (input: LocalizerData) => { - const systemPrompt = params.prompt - .replaceAll("{source}", input.sourceLocale) - .replaceAll("{target}", input.targetLocale); - const shots = [ - [ - { - sourceLocale: "en", - targetLocale: "es", - data: { - message: "Hello, world!", - }, - }, - { - sourceLocale: "en", - targetLocale: "es", - data: { - message: "Hola, mundo!", + localize: async ( + input: LocalizerData, + onProgress?: LocalizerProgressFn, + ) => { + const chunks = extractPayloadChunks( + input.processableData, + params.batchSize, + ); + const subResults: Record[] = []; + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + + const systemPrompt = params.prompt + .replaceAll("{source}", input.sourceLocale) + .replaceAll("{target}", input.targetLocale); + + const shots = [ + [ + { + sourceLocale: "en", + targetLocale: "es", + data: { + message: "Hello, world!", + }, }, - }, - ], - [ - { - sourceLocale: "en", - targetLocale: "es", - data: { - spring: "Spring", + { + sourceLocale: "en", + targetLocale: "es", + data: { + message: "Hola, mundo!", + }, }, - hints: { - spring: ["A source of water"], + ], + [ + { + sourceLocale: "en", + targetLocale: "es", + data: { + spring: "Spring", + }, + hints: { + spring: ["A source of water"], + }, }, - }, - { - sourceLocale: "en", - targetLocale: "es", - data: { - spring: "Manantial", + { + sourceLocale: "en", + targetLocale: "es", + data: { + spring: "Manantial", + }, }, - }, - ], - ]; + ], + ]; - const hasHints = input.hints && Object.keys(input.hints).length > 0; + const chunkHints = input.hints + ? _.pick(input.hints, Object.keys(chunk)) + : undefined; + const hasHints = chunkHints && Object.keys(chunkHints).length > 0; - const payload = { - sourceLocale: input.sourceLocale, - targetLocale: input.targetLocale, - data: input.processableData, - ...(hasHints && { hints: input.hints }), - }; + const payload = { + sourceLocale: input.sourceLocale, + targetLocale: input.targetLocale, + data: chunk, + ...(hasHints && { hints: chunkHints }), + }; - const response = await generateText({ - model, - ...params.settings, - messages: [ - { role: "system", content: systemPrompt }, - ...shots.flatMap( - ([userShot, assistantShot]) => - [ - { role: "user", content: JSON.stringify(userShot) }, - { role: "assistant", content: JSON.stringify(assistantShot) }, - ] as ModelMessage[], - ), - { role: "user", content: JSON.stringify(payload) }, - ], - }); + const response = await generateText({ + model, + ...params.settings, + messages: [ + { role: "system", content: systemPrompt }, + ...shots.flatMap( + ([userShot, assistantShot]) => + [ + { role: "user", content: JSON.stringify(userShot) }, + { role: "assistant", content: JSON.stringify(assistantShot) }, + ] as ModelMessage[], + ), + { role: "user", content: JSON.stringify(payload) }, + ], + }); - const result = parseModelResponse(response.text); + const result = parseModelResponse(response.text); + let finalResult: Record = {}; - // Handle both object and string responses - if (typeof result.data === "object" && result.data !== null) { - return result.data; + // Handle both object and string responses + if (typeof result.data === "object" && result.data !== null) { + finalResult = result.data; + } else if (result.data) { + // Handle string responses - extract and repair JSON + const index = result.data.indexOf("{"); + const lastIndex = result.data.lastIndexOf("}"); + if (index !== -1 && lastIndex !== -1) { + const trimmed = result.data.slice(index, lastIndex + 1); + const repaired = jsonrepair(trimmed); + const parsed = JSON.parse(repaired); + finalResult = parsed.data || {}; + } + } + + subResults.push(finalResult); + if (onProgress) { + onProgress((i / chunks.length) * 100, chunk, finalResult); + } } - // Handle string responses - extract and repair JSON - const index = result.data.indexOf("{"); - const lastIndex = result.data.lastIndexOf("}"); - const trimmed = result.data.slice(index, lastIndex + 1); - return JSON.parse(jsonrepair(trimmed)).data; + const finalMergedResult = _.merge({}, ...subResults); + return finalMergedResult; }, }; } + +/** + * Extract payload chunks based on the ideal chunk size + * @param payload - The payload to be chunked + * @param batchSize - Max number of keys per chunk (default: 25) + * @returns An array of payload chunks + */ +function extractPayloadChunks( + payload: Record, + batchSize: number = 25, +): Record[] { + const idealBatchItemSize = 250; + const result: Record[] = []; + let currentChunk: Record = {}; + let currentChunkItemCount = 0; + + const payloadEntries = Object.entries(payload); + for (let i = 0; i < payloadEntries.length; i++) { + const [key, value] = payloadEntries[i]; + currentChunk[key] = value; + currentChunkItemCount++; + + const currentChunkSize = countWordsInRecord(currentChunk); + if ( + currentChunkSize > idealBatchItemSize || + currentChunkItemCount >= batchSize || + i === payloadEntries.length - 1 + ) { + result.push(currentChunk); + currentChunk = {}; + currentChunkItemCount = 0; + } + } + + return result; +} + +/** + * Count words in a record or array + * @param payload - The payload to count words in + * @returns The total number of words + */ +function countWordsInRecord( + payload: any | Record | Array, +): number { + if (Array.isArray(payload)) { + return payload.reduce((acc, item) => acc + countWordsInRecord(item), 0); + } else if (typeof payload === "object" && payload !== null) { + return Object.values(payload).reduce( + (acc: number, item) => acc + countWordsInRecord(item), + 0, + ); + } else if (typeof payload === "string") { + return payload.trim().split(/\s+/).filter(Boolean).length; + } else { + return 0; + } +} diff --git a/packages/cli/src/cli/localizer/index.ts b/packages/cli/src/cli/localizer/index.ts index 6d20b192b..ea5f3b18d 100644 --- a/packages/cli/src/cli/localizer/index.ts +++ b/packages/cli/src/cli/localizer/index.ts @@ -9,6 +9,7 @@ export default function createLocalizer( provider: I18nConfig["provider"] | "pseudo" | null | undefined, engineId?: string, apiKey?: string, + batchSize?: number, ): ILocalizer { if (provider === "pseudo") { return createPseudoLocalizer(); @@ -17,6 +18,6 @@ export default function createLocalizer( if (!provider) { return createLingoDotDevLocalizer(apiKey, engineId); } else { - return createExplicitLocalizer(provider); + return createExplicitLocalizer(provider, batchSize); } } diff --git a/packages/cli/src/cli/localizer/pseudo.ts b/packages/cli/src/cli/localizer/pseudo.ts index d20a3e20d..4083f528f 100644 --- a/packages/cli/src/cli/localizer/pseudo.ts +++ b/packages/cli/src/cli/localizer/pseudo.ts @@ -14,6 +14,9 @@ export default function createPseudoLocalizer(): ILocalizer { authenticated: true, }; }, + validateSettings: async () => { + return { valid: true }; + }, localize: async (input: LocalizerData, onProgress) => { // Nothing to translate – return the input as-is. if (!Object.keys(input.processableData).length) { diff --git a/packages/cli/src/cli/processor/basic.spec.ts b/packages/cli/src/cli/processor/basic.spec.ts new file mode 100644 index 000000000..2e61d0495 --- /dev/null +++ b/packages/cli/src/cli/processor/basic.spec.ts @@ -0,0 +1,131 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { createBasicTranslator } from "./basic"; +import { LanguageModel, generateText } from "ai"; + +// Mock the ai module +vi.mock("ai", async () => { + const actual = await vi.importActual("ai"); + return { + ...actual, + generateText: vi.fn(), + }; +}); + +describe("createBasicTranslator", () => { + const mockModel = {} as LanguageModel; + const mockSystemPrompt = "Translate from {source} to {target}"; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("should process all keys in a single batch by default", async () => { + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: { + key1: "value1", + key2: "value2", + key3: "value3", + }, + }; + + // Mock response + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ + data: { + key1: "valeur1", + key2: "valeur2", + key3: "valeur3", + }, + }), + }); + + const onProgress = vi.fn(); + const translator = createBasicTranslator(mockModel, mockSystemPrompt); + + await translator(input, onProgress); + + expect(generateText).toHaveBeenCalledTimes(1); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: expect.stringContaining("key1"), + }), + ]), + }) + ); + }); + + it("should respect batchSize parameter", async () => { + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: { + key1: "value1", + key2: "value2", + key3: "value3", + }, + }; + + // Mock response + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ + data: {}, + }), + }); + + const onProgress = vi.fn(); + // Set batchSize to 1 to force individual requests + const translator = createBasicTranslator(mockModel, mockSystemPrompt, { batchSize: 1 }); + + await translator(input, onProgress); + + expect(generateText).toHaveBeenCalledTimes(3); + + // allow calls to be in any order, but each should contain exactly one key + const calls = (generateText as any).mock.calls; + const keysProcessed = new Set(); + + calls.forEach((call: any) => { + const messages = call[0].messages; + const userMessage = messages[messages.length - 1]; + const content = JSON.parse(userMessage.content); + const keys = Object.keys(content.data); + expect(keys.length).toBe(1); + keysProcessed.add(keys[0]); + }); + + expect(keysProcessed.has("key1")).toBe(true); + expect(keysProcessed.has("key2")).toBe(true); + expect(keysProcessed.has("key3")).toBe(true); + }); + + it("should chunk requests correctly with batchSize > 1", async () => { + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: { + key1: "value1", + key2: "value2", + key3: "value3", + key4: "value4", + key5: "value5", + }, + }; + + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ data: {} }), + }); + + const onProgress = vi.fn(); + const translator = createBasicTranslator(mockModel, mockSystemPrompt, { batchSize: 2 }); + + await translator(input, onProgress); + + // 5 items with batchSize 2 -> 3 chunks (2, 2, 1) + expect(generateText).toHaveBeenCalledTimes(3); + }); +}); diff --git a/packages/cli/src/cli/processor/basic.ts b/packages/cli/src/cli/processor/basic.ts index ed962adf6..a96cee8aa 100644 --- a/packages/cli/src/cli/processor/basic.ts +++ b/packages/cli/src/cli/processor/basic.ts @@ -4,6 +4,7 @@ import _ from "lodash"; type ModelSettings = { temperature?: number; + batchSize?: number; }; export function createBasicTranslator( @@ -12,7 +13,10 @@ export function createBasicTranslator( settings: ModelSettings = {}, ) { return async (input: LocalizerInput, onProgress: LocalizerProgressFn) => { - const chunks = extractPayloadChunks(input.processableData); + const chunks = extractPayloadChunks( + input.processableData, + settings.batchSize, + ); const subResults: Record[] = []; for (let i = 0; i < chunks.length; i++) { @@ -88,13 +92,14 @@ export function createBasicTranslator( /** * Extract payload chunks based on the ideal chunk size * @param payload - The payload to be chunked + * @param batchSize - Max number of keys per chunk (default: 25) * @returns An array of payload chunks */ function extractPayloadChunks( payload: Record, + batchSize: number = 25, ): Record[] { const idealBatchItemSize = 250; - const batchSize = 25; const result: Record[] = []; let currentChunk: Record = {}; let currentChunkItemCount = 0; diff --git a/packages/cli/src/cli/processor/index.ts b/packages/cli/src/cli/processor/index.ts index 6b7db441a..92845fde6 100644 --- a/packages/cli/src/cli/processor/index.ts +++ b/packages/cli/src/cli/processor/index.ts @@ -14,7 +14,7 @@ import { createOllama } from "ollama-ai-provider-v2"; export default function createProcessor( provider: I18nConfig["provider"], - params: { apiKey?: string; apiUrl: string; engineId?: string }, + params: { apiKey?: string; apiUrl: string; engineId?: string; batchSize?: number }, ): LocalizerFn { if (!provider) { const result = createLingoLocalizer(params); @@ -22,7 +22,10 @@ export default function createProcessor( } else { const model = getPureModelProvider(provider); const settings = provider.settings || {}; - const result = createBasicTranslator(model, provider.prompt, settings); + const result = createBasicTranslator(model, provider.prompt, { + ...settings, + batchSize: params.batchSize, + }); return result; } } @@ -32,23 +35,21 @@ function getPureModelProvider(provider: I18nConfig["provider"]) { providerId: string, envVar?: string, ) => dedent` - You're trying to use raw ${chalk.dim(providerId)} API for translation. ${ - envVar + You're trying to use raw ${chalk.dim(providerId)} API for translation. ${envVar ? `However, ${chalk.dim(envVar)} environment variable is not set.` : "However, that provider is unavailable." - } + } To fix this issue: - 1. ${ - envVar + 1. ${envVar ? `Set ${chalk.dim(envVar)} in your environment variables` : "Set the environment variable for your provider (if required)" - }, or + }, or 2. Remove the ${chalk.italic( - "provider", - )} node from your i18n.json configuration to switch to ${chalk.hex( - colors.green, - )("Lingo.dev")} + "provider", + )} node from your i18n.json configuration to switch to ${chalk.hex( + colors.green, + )("Lingo.dev")} ${chalk.hex(colors.blue)("Docs: https://lingo.dev/go/docs")} `; @@ -60,10 +61,10 @@ function getPureModelProvider(provider: I18nConfig["provider"]) { To fix this issue: 1. Switch to one of the supported providers, or 2. Remove the ${chalk.italic( - "provider", - )} node from your i18n.json configuration to switch to ${chalk.hex( - colors.green, - )("Lingo.dev")} + "provider", + )} node from your i18n.json configuration to switch to ${chalk.hex( + colors.green, + )("Lingo.dev")} ${chalk.hex(colors.blue)("Docs: https://lingo.dev/go/docs")} `; From 9c82c2493357e5b299068572ef07e3add399d794 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 14:36:29 +0530 Subject: [PATCH 04/11] fix: resolve progress calculation off-by-one error (review feedback) --- packages/cli/src/cli/localizer/explicit.ts | 2 +- packages/cli/src/cli/processor/basic.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index 199d68132..d0706c546 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -295,7 +295,7 @@ function createAiSdkLocalizer(params: { subResults.push(finalResult); if (onProgress) { - onProgress((i / chunks.length) * 100, chunk, finalResult); + onProgress(((i + 1) / chunks.length) * 100, chunk, finalResult); } } diff --git a/packages/cli/src/cli/processor/basic.ts b/packages/cli/src/cli/processor/basic.ts index a96cee8aa..f9f1b1545 100644 --- a/packages/cli/src/cli/processor/basic.ts +++ b/packages/cli/src/cli/processor/basic.ts @@ -26,7 +26,7 @@ export function createBasicTranslator( processableData: chunk, }); subResults.push(result); - onProgress((i / chunks.length) * 100, chunk, result); + onProgress(((i + 1) / chunks.length) * 100, chunk, result); } const result = _.merge({}, ...subResults); From f55ddbba041b010cdf78e6f7fc7fbf6431c95216 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 14:46:02 +0530 Subject: [PATCH 05/11] fix: handle unhandled JSON.parse exception (review feedback) --- packages/cli/src/cli/localizer/explicit.ts | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index d0706c546..b28c76536 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -275,13 +275,21 @@ function createAiSdkLocalizer(params: { ], }); - const result = parseModelResponse(response.text); + let result: any; + try { + result = parseModelResponse(response.text); + } catch (e2) { + console.error( + `Failed to parse response from Lingo.dev. Response: ${response.text}`, + ); + throw new Error(`Failed to parse response from Lingo.dev: ${e2}`); + } let finalResult: Record = {}; // Handle both object and string responses - if (typeof result.data === "object" && result.data !== null) { + if (typeof result?.data === "object" && result.data !== null) { finalResult = result.data; - } else if (result.data) { + } else if (result?.data) { // Handle string responses - extract and repair JSON const index = result.data.indexOf("{"); const lastIndex = result.data.lastIndexOf("}"); @@ -289,7 +297,7 @@ function createAiSdkLocalizer(params: { const trimmed = result.data.slice(index, lastIndex + 1); const repaired = jsonrepair(trimmed); const parsed = JSON.parse(repaired); - finalResult = parsed.data || {}; + finalResult = parsed.data || parsed || {}; } } From 1e991d8573494f7fa9928becd1e81a5b4ab3f3d9 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 15:03:25 +0530 Subject: [PATCH 06/11] fix: correct changeset for batch size feature --- .changeset/add-batch-size.md | 5 +++++ .changeset/export-locale-helpers.md | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 .changeset/add-batch-size.md delete mode 100644 .changeset/export-locale-helpers.md diff --git a/.changeset/add-batch-size.md b/.changeset/add-batch-size.md new file mode 100644 index 000000000..e07ad8344 --- /dev/null +++ b/.changeset/add-batch-size.md @@ -0,0 +1,5 @@ +--- +"lingo.dev": minor +--- + +feat: add `--batch-size` parameter to `run` and `i18n` commands to prevent context leaking diff --git a/.changeset/export-locale-helpers.md b/.changeset/export-locale-helpers.md deleted file mode 100644 index 898322a19..000000000 --- a/.changeset/export-locale-helpers.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"@lingo.dev/_react": minor ---- - -Export `getLocaleFromCookies` and `setLocaleInCookies` helpers from client entrypoint. From 15ee4d4d3b1d7ea6f44e882d9cac6a99f7fb495c Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 15:42:56 +0530 Subject: [PATCH 07/11] fix: address review feedback (docs update for batch-size flag and whitespace cleanup) --- packages/cli/src/cli/cmd/run/index.ts | 2 +- packages/cli/src/cli/localizer/explicit.ts | 16 +++++++++---- packages/cli/src/cli/processor/basic.spec.ts | 25 ++++++++++++++++++++ packages/cli/src/cli/processor/basic.ts | 6 +++-- 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/cli/cmd/run/index.ts b/packages/cli/src/cli/cmd/run/index.ts index 6c9cc0196..ed0b49d8e 100644 --- a/packages/cli/src/cli/cmd/run/index.ts +++ b/packages/cli/src/cli/cmd/run/index.ts @@ -125,7 +125,7 @@ export default new Command() ) .option( "--batch-size ", - "Number of translations to process in a single batch", + "Number of translations to process in a single batch (not applicable when using lingo.dev provider)", (val: string) => parseInt(val), ) .action(async (args) => { diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index b28c76536..2a705213f 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -279,10 +279,16 @@ function createAiSdkLocalizer(params: { try { result = parseModelResponse(response.text); } catch (e2) { + const snippet = + response.text.length > 500 + ? `${response.text.slice(0, 500)}…` + : response.text; console.error( - `Failed to parse response from Lingo.dev. Response: ${response.text}`, + `Failed to parse response from Lingo.dev. Response snippet: ${snippet}`, + ); + throw new Error( + `Failed to parse response from Lingo.dev: ${e2} (Snippet: ${snippet})`, ); - throw new Error(`Failed to parse response from Lingo.dev: ${e2}`); } let finalResult: Record = {}; @@ -321,7 +327,7 @@ function createAiSdkLocalizer(params: { */ function extractPayloadChunks( payload: Record, - batchSize: number = 25, + batchSize?: number, ): Record[] { const idealBatchItemSize = 250; const result: Record[] = []; @@ -335,9 +341,11 @@ function extractPayloadChunks( currentChunkItemCount++; const currentChunkSize = countWordsInRecord(currentChunk); + const effectiveBatchSize = + batchSize && batchSize > 0 ? batchSize : payloadEntries.length || 1; if ( currentChunkSize > idealBatchItemSize || - currentChunkItemCount >= batchSize || + currentChunkItemCount >= effectiveBatchSize || i === payloadEntries.length - 1 ) { result.push(currentChunk); diff --git a/packages/cli/src/cli/processor/basic.spec.ts b/packages/cli/src/cli/processor/basic.spec.ts index 2e61d0495..3eebf338c 100644 --- a/packages/cli/src/cli/processor/basic.spec.ts +++ b/packages/cli/src/cli/processor/basic.spec.ts @@ -59,6 +59,31 @@ describe("createBasicTranslator", () => { ); }); + it("should process >25 keys in a single batch by default (infinite batch size)", async () => { + const inputData: Record = {}; + for (let i = 0; i < 30; i++) { + inputData[`key${i}`] = `value${i}`; + } + + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: inputData, + }; + + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ data: {} }), + }); + + const onProgress = vi.fn(); + const translator = createBasicTranslator(mockModel, mockSystemPrompt); + + await translator(input, onProgress); + + // Should be 1 call, not 2 (which would happen if default was 25) + expect(generateText).toHaveBeenCalledTimes(1); + }); + it("should respect batchSize parameter", async () => { const input = { sourceLocale: "en", diff --git a/packages/cli/src/cli/processor/basic.ts b/packages/cli/src/cli/processor/basic.ts index f9f1b1545..0e723998c 100644 --- a/packages/cli/src/cli/processor/basic.ts +++ b/packages/cli/src/cli/processor/basic.ts @@ -97,7 +97,7 @@ export function createBasicTranslator( */ function extractPayloadChunks( payload: Record, - batchSize: number = 25, + batchSize?: number, ): Record[] { const idealBatchItemSize = 250; const result: Record[] = []; @@ -111,9 +111,11 @@ function extractPayloadChunks( currentChunkItemCount++; const currentChunkSize = countWordsInRecord(currentChunk); + const effectiveBatchSize = + batchSize && batchSize > 0 ? batchSize : payloadEntries.length || 1; if ( currentChunkSize > idealBatchItemSize || - currentChunkItemCount >= batchSize || + currentChunkItemCount >= effectiveBatchSize || i === payloadEntries.length - 1 ) { result.push(currentChunk); From c8cc35594c55104115a6874afab3cc2364fb4d94 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 16:13:17 +0530 Subject: [PATCH 08/11] fix: add error handling for nested string-to-JSON parsing (CodeRabbit review) --- packages/cli/src/cli/localizer/explicit.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index 2a705213f..8ad376403 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -300,10 +300,17 @@ function createAiSdkLocalizer(params: { const index = result.data.indexOf("{"); const lastIndex = result.data.lastIndexOf("}"); if (index !== -1 && lastIndex !== -1) { - const trimmed = result.data.slice(index, lastIndex + 1); - const repaired = jsonrepair(trimmed); - const parsed = JSON.parse(repaired); - finalResult = parsed.data || parsed || {}; + try { + const trimmed = result.data.slice(index, lastIndex + 1); + const repaired = jsonrepair(trimmed); + const parsed = JSON.parse(repaired); + finalResult = parsed.data || parsed || {}; + } catch (e) { + console.error( + `Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`, + ); + // Fallback to empty object or continue + } } } From 03356cb2fdb0735e9b4b8d7cb37bf78eca06a03b Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Fri, 6 Feb 2026 16:21:17 +0530 Subject: [PATCH 09/11] fix: throw error on nested JSON parse failure to prevent silent data loss (CodeRabbit review) --- packages/cli/src/cli/localizer/explicit.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index 8ad376403..13e1f6014 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -309,7 +309,9 @@ function createAiSdkLocalizer(params: { console.error( `Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`, ); - // Fallback to empty object or continue + throw new Error( + `Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`, + ); } } } From 73283261c1f268a306ce21a6b7fcda63cf6db5eb Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Wed, 25 Mar 2026 01:24:56 +0530 Subject: [PATCH 10/11] fix: address CodeRabbit review feedback --- packages/cli/src/cli/localizer/explicit.ts | 19 +++++++++++++------ packages/cli/src/cli/processor/basic.ts | 10 +++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index 13e1f6014..7d3a215ca 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -284,10 +284,10 @@ function createAiSdkLocalizer(params: { ? `${response.text.slice(0, 500)}…` : response.text; console.error( - `Failed to parse response from Lingo.dev. Response snippet: ${snippet}`, + `Failed to parse response from ${params.id}. Response snippet: ${snippet}`, ); throw new Error( - `Failed to parse response from Lingo.dev: ${e2} (Snippet: ${snippet})`, + `Failed to parse response from ${params.id}: ${e2} (Snippet: ${snippet})`, ); } let finalResult: Record = {}; @@ -313,6 +313,13 @@ function createAiSdkLocalizer(params: { `Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`, ); } + } else { + console.error( + `Unexpected response format - no JSON object found. Snippet: ${String(result.data).slice(0, 100)}...`, + ); + throw new Error( + `Unexpected response format from ${params.id} - no JSON object found in response`, + ); } } @@ -335,12 +342,12 @@ function createAiSdkLocalizer(params: { * @returns An array of payload chunks */ function extractPayloadChunks( - payload: Record, + payload: Record, batchSize?: number, -): Record[] { +): Record[] { const idealBatchItemSize = 250; - const result: Record[] = []; - let currentChunk: Record = {}; + const result: Record[] = []; + let currentChunk: Record = {}; let currentChunkItemCount = 0; const payloadEntries = Object.entries(payload); diff --git a/packages/cli/src/cli/processor/basic.ts b/packages/cli/src/cli/processor/basic.ts index 0e723998c..f43901aa6 100644 --- a/packages/cli/src/cli/processor/basic.ts +++ b/packages/cli/src/cli/processor/basic.ts @@ -92,16 +92,16 @@ export function createBasicTranslator( /** * Extract payload chunks based on the ideal chunk size * @param payload - The payload to be chunked - * @param batchSize - Max number of keys per chunk (default: 25) + * @param batchSize - Max number of keys per chunk * @returns An array of payload chunks */ function extractPayloadChunks( - payload: Record, + payload: Record, batchSize?: number, -): Record[] { +): Record[] { const idealBatchItemSize = 250; - const result: Record[] = []; - let currentChunk: Record = {}; + const result: Record[] = []; + let currentChunk: Record = {}; let currentChunkItemCount = 0; const payloadEntries = Object.entries(payload); From 6a251f2e714e935dc008a9bdbf02f6437daaaced Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Thu, 2 Apr 2026 11:09:22 +0530 Subject: [PATCH 11/11] chore: address PR review feedback on batch sizing and chunk extraction - Fallback to 25 instead of total keys for batch size - Add Zod max(250) validation for batchSize flag - Extract duplicate JSON chunking repair logic to utils - Revert unintended export from react/src/client/index.ts - Format modified files with Prettier --- packages/cli/src/cli/cmd/i18n.ts | 10 +- packages/cli/src/cli/cmd/run/_types.ts | 2 +- packages/cli/src/cli/cmd/run/setup.ts | 28 +- packages/cli/src/cli/localizer/explicit.ts | 121 ++------- packages/cli/src/cli/processor/basic.spec.ts | 266 ++++++++++--------- packages/cli/src/cli/processor/basic.ts | 61 +---- packages/cli/src/cli/utils/chunk.ts | 58 ++++ packages/react/src/client/index.ts | 1 - 8 files changed, 240 insertions(+), 307 deletions(-) create mode 100644 packages/cli/src/cli/utils/chunk.ts diff --git a/packages/cli/src/cli/cmd/i18n.ts b/packages/cli/src/cli/cmd/i18n.ts index 6c0015b26..a83aeb568 100644 --- a/packages/cli/src/cli/cmd/i18n.ts +++ b/packages/cli/src/cli/cmd/i18n.ts @@ -437,7 +437,8 @@ export default new Command() } bucketOra.start( - `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length + `[${sourceLocale} -> ${targetLocale}] [${ + Object.keys(processableData).length } entries] (0%) AI localization in progress...`, ); let processPayload = createProcessor(i18nConfig!.provider, { @@ -462,8 +463,9 @@ export default new Command() targetData: flags.force ? {} : targetData, }, (progress, sourceChunk, processedChunk) => { - bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length - } entries] (${progress}%) AI localization in progress...`; + bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${ + Object.keys(processableData).length + } entries] (${progress}%) AI localization in progress...`; }, ); @@ -666,7 +668,7 @@ function parseFlags(options: any) { file: Z.array(Z.string()).optional(), interactive: Z.boolean().prefault(false), debug: Z.boolean().prefault(false), - batchSize: Z.number().min(1).optional(), + batchSize: Z.number().min(1).max(250).optional(), }).parse(options); } diff --git a/packages/cli/src/cli/cmd/run/_types.ts b/packages/cli/src/cli/cmd/run/_types.ts index 86a22b424..fab0c274a 100644 --- a/packages/cli/src/cli/cmd/run/_types.ts +++ b/packages/cli/src/cli/cmd/run/_types.ts @@ -56,6 +56,6 @@ export const flagsSchema = z.object({ debounce: z.number().positive().prefault(5000), // 5 seconds default sound: z.boolean().optional(), pseudo: z.boolean().optional(), - batchSize: z.number().min(1).optional(), + batchSize: z.number().min(1).max(250).optional(), }); export type CmdRunFlags = z.infer; diff --git a/packages/cli/src/cli/cmd/run/setup.ts b/packages/cli/src/cli/cmd/run/setup.ts index f4b846574..19bfe83de 100644 --- a/packages/cli/src/cli/cmd/run/setup.ts +++ b/packages/cli/src/cli/cmd/run/setup.ts @@ -110,23 +110,23 @@ export default async function setup(input: CmdRunContext) { const subTasks = isLingoDotDev ? [ - "Brand voice enabled", - "Translation memory connected", - "Glossary enabled", - "Quality assurance enabled", - ].map((title) => ({ title, task: () => { } })) + "Brand voice enabled", + "Translation memory connected", + "Glossary enabled", + "Quality assurance enabled", + ].map((title) => ({ title, task: () => {} })) : isPseudo ? [ - "Pseudo-localization mode active", - "Character replacement configured", - "No external API calls", - ].map((title) => ({ title, task: () => { } })) + "Pseudo-localization mode active", + "Character replacement configured", + "No external API calls", + ].map((title) => ({ title, task: () => {} })) : [ - "Skipping brand voice", - "Skipping glossary", - "Skipping translation memory", - "Skipping quality assurance", - ].map((title) => ({ title, task: () => { }, skip: true })); + "Skipping brand voice", + "Skipping glossary", + "Skipping translation memory", + "Skipping quality assurance", + ].map((title) => ({ title, task: () => {}, skip: true })); return task.newListr(subTasks, { concurrent: true, diff --git a/packages/cli/src/cli/localizer/explicit.ts b/packages/cli/src/cli/localizer/explicit.ts index 7d3a215ca..e6a5746a4 100644 --- a/packages/cli/src/cli/localizer/explicit.ts +++ b/packages/cli/src/cli/localizer/explicit.ts @@ -12,7 +12,7 @@ import { colors } from "../constants"; import { jsonrepair } from "jsonrepair"; import { createOllama } from "ollama-ai-provider-v2"; import _ from "lodash"; - +import { extractPayloadChunks } from "../utils/chunk"; export default function createExplicitLocalizer( provider: NonNullable, batchSize?: number, @@ -28,10 +28,10 @@ export default function createExplicitLocalizer( To fix this issue: 1. Switch to one of the supported providers, or 2. Remove the ${chalk.italic( - "provider", - )} node from your i18n.json configuration to switch to ${chalk.hex( - colors.green, - )("Lingo.dev")} + "provider", + )} node from your i18n.json configuration to switch to ${chalk.hex( + colors.green, + )("Lingo.dev")} ${chalk.hex(colors.blue)("Docs: https://lingo.dev/go/docs")} `, @@ -136,19 +136,21 @@ function createAiSdkLocalizer(params: { if (!skipAuth && (!apiKey || !params.apiKeyName)) { throw new Error( dedent` - You're trying to use raw ${chalk.dim(params.id)} API for translation. ${params.apiKeyName - ? `However, ${chalk.dim( - params.apiKeyName, - )} environment variable is not set.` - : "However, that provider is unavailable." + You're trying to use raw ${chalk.dim(params.id)} API for translation. ${ + params.apiKeyName + ? `However, ${chalk.dim( + params.apiKeyName, + )} environment variable is not set.` + : "However, that provider is unavailable." } To fix this issue: - 1. ${params.apiKeyName - ? `Set ${chalk.dim( - params.apiKeyName, - )} in your environment variables` - : "Set the environment variable for your provider (if required)" + 1. ${ + params.apiKeyName + ? `Set ${chalk.dim( + params.apiKeyName, + )} in your environment variables` + : "Set the environment variable for your provider (if required)" }, or 2. Remove the ${chalk.italic( "provider", @@ -295,30 +297,17 @@ function createAiSdkLocalizer(params: { // Handle both object and string responses if (typeof result?.data === "object" && result.data !== null) { finalResult = result.data; - } else if (result?.data) { - // Handle string responses - extract and repair JSON - const index = result.data.indexOf("{"); - const lastIndex = result.data.lastIndexOf("}"); - if (index !== -1 && lastIndex !== -1) { - try { - const trimmed = result.data.slice(index, lastIndex + 1); - const repaired = jsonrepair(trimmed); - const parsed = JSON.parse(repaired); - finalResult = parsed.data || parsed || {}; - } catch (e) { - console.error( - `Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`, - ); - throw new Error( - `Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`, - ); - } - } else { + } else if (typeof result?.data === "string") { + // Handle string responses where the model double-stringified the JSON + try { + const parsed = parseModelResponse(result.data); + finalResult = parsed.data || parsed || {}; + } catch (e) { console.error( - `Unexpected response format - no JSON object found. Snippet: ${String(result.data).slice(0, 100)}...`, + `Failed to parse nested JSON response. Snippet: ${result.data.slice(0, 100)}...`, ); throw new Error( - `Unexpected response format from ${params.id} - no JSON object found in response`, + `Failed to parse nested JSON response: ${e} (Snippet: ${result.data.slice(0, 100)}...)`, ); } } @@ -334,63 +323,3 @@ function createAiSdkLocalizer(params: { }, }; } - -/** - * Extract payload chunks based on the ideal chunk size - * @param payload - The payload to be chunked - * @param batchSize - Max number of keys per chunk (default: 25) - * @returns An array of payload chunks - */ -function extractPayloadChunks( - payload: Record, - batchSize?: number, -): Record[] { - const idealBatchItemSize = 250; - const result: Record[] = []; - let currentChunk: Record = {}; - let currentChunkItemCount = 0; - - const payloadEntries = Object.entries(payload); - for (let i = 0; i < payloadEntries.length; i++) { - const [key, value] = payloadEntries[i]; - currentChunk[key] = value; - currentChunkItemCount++; - - const currentChunkSize = countWordsInRecord(currentChunk); - const effectiveBatchSize = - batchSize && batchSize > 0 ? batchSize : payloadEntries.length || 1; - if ( - currentChunkSize > idealBatchItemSize || - currentChunkItemCount >= effectiveBatchSize || - i === payloadEntries.length - 1 - ) { - result.push(currentChunk); - currentChunk = {}; - currentChunkItemCount = 0; - } - } - - return result; -} - -/** - * Count words in a record or array - * @param payload - The payload to count words in - * @returns The total number of words - */ -function countWordsInRecord( - payload: any | Record | Array, -): number { - if (Array.isArray(payload)) { - return payload.reduce((acc, item) => acc + countWordsInRecord(item), 0); - } else if (typeof payload === "object" && payload !== null) { - return Object.values(payload).reduce( - (acc: number, item) => acc + countWordsInRecord(item), - 0, - ); - } else if (typeof payload === "string") { - return payload.trim().split(/\s+/).filter(Boolean).length; - } else { - return 0; - } -} diff --git a/packages/cli/src/cli/processor/basic.spec.ts b/packages/cli/src/cli/processor/basic.spec.ts index 3eebf338c..53e1d425f 100644 --- a/packages/cli/src/cli/processor/basic.spec.ts +++ b/packages/cli/src/cli/processor/basic.spec.ts @@ -4,153 +4,157 @@ import { LanguageModel, generateText } from "ai"; // Mock the ai module vi.mock("ai", async () => { - const actual = await vi.importActual("ai"); - return { - ...actual, - generateText: vi.fn(), - }; + const actual = await vi.importActual("ai"); + return { + ...actual, + generateText: vi.fn(), + }; }); describe("createBasicTranslator", () => { - const mockModel = {} as LanguageModel; - const mockSystemPrompt = "Translate from {source} to {target}"; + const mockModel = {} as LanguageModel; + const mockSystemPrompt = "Translate from {source} to {target}"; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("should process all keys in a single batch by default", async () => { + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: { + key1: "value1", + key2: "value2", + key3: "value3", + }, + }; - beforeEach(() => { - vi.clearAllMocks(); + // Mock response + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ + data: { + key1: "valeur1", + key2: "valeur2", + key3: "valeur3", + }, + }), }); - it("should process all keys in a single batch by default", async () => { - const input = { - sourceLocale: "en", - targetLocale: "fr", - processableData: { - key1: "value1", - key2: "value2", - key3: "value3", - }, - }; - - // Mock response - (generateText as any).mockResolvedValue({ - text: JSON.stringify({ - data: { - key1: "valeur1", - key2: "valeur2", - key3: "valeur3", - }, - }), - }); - - const onProgress = vi.fn(); - const translator = createBasicTranslator(mockModel, mockSystemPrompt); - - await translator(input, onProgress); - - expect(generateText).toHaveBeenCalledTimes(1); - expect(generateText).toHaveBeenCalledWith( - expect.objectContaining({ - messages: expect.arrayContaining([ - expect.objectContaining({ - role: "user", - content: expect.stringContaining("key1"), - }), - ]), - }) - ); + const onProgress = vi.fn(); + const translator = createBasicTranslator(mockModel, mockSystemPrompt); + + await translator(input, onProgress); + + expect(generateText).toHaveBeenCalledTimes(1); + expect(generateText).toHaveBeenCalledWith( + expect.objectContaining({ + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: expect.stringContaining("key1"), + }), + ]), + }), + ); + }); + + it("should process >25 keys in multiple batches by default (fallback batch size 25)", async () => { + const inputData: Record = {}; + for (let i = 0; i < 30; i++) { + inputData[`key${i}`] = `value${i}`; + } + + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: inputData, + }; + + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ data: {} }), }); - it("should process >25 keys in a single batch by default (infinite batch size)", async () => { - const inputData: Record = {}; - for (let i = 0; i < 30; i++) { - inputData[`key${i}`] = `value${i}`; - } + const onProgress = vi.fn(); + const translator = createBasicTranslator(mockModel, mockSystemPrompt); - const input = { - sourceLocale: "en", - targetLocale: "fr", - processableData: inputData, - }; + await translator(input, onProgress); - (generateText as any).mockResolvedValue({ - text: JSON.stringify({ data: {} }), - }); + // Should be 2 calls, since default fallback is 25 and we have 30 keys + expect(generateText).toHaveBeenCalledTimes(2); + }); - const onProgress = vi.fn(); - const translator = createBasicTranslator(mockModel, mockSystemPrompt); + it("should respect batchSize parameter", async () => { + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: { + key1: "value1", + key2: "value2", + key3: "value3", + }, + }; - await translator(input, onProgress); + // Mock response + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ + data: {}, + }), + }); - // Should be 1 call, not 2 (which would happen if default was 25) - expect(generateText).toHaveBeenCalledTimes(1); + const onProgress = vi.fn(); + // Set batchSize to 1 to force individual requests + const translator = createBasicTranslator(mockModel, mockSystemPrompt, { + batchSize: 1, }); - it("should respect batchSize parameter", async () => { - const input = { - sourceLocale: "en", - targetLocale: "fr", - processableData: { - key1: "value1", - key2: "value2", - key3: "value3", - }, - }; - - // Mock response - (generateText as any).mockResolvedValue({ - text: JSON.stringify({ - data: {}, - }), - }); - - const onProgress = vi.fn(); - // Set batchSize to 1 to force individual requests - const translator = createBasicTranslator(mockModel, mockSystemPrompt, { batchSize: 1 }); - - await translator(input, onProgress); - - expect(generateText).toHaveBeenCalledTimes(3); - - // allow calls to be in any order, but each should contain exactly one key - const calls = (generateText as any).mock.calls; - const keysProcessed = new Set(); - - calls.forEach((call: any) => { - const messages = call[0].messages; - const userMessage = messages[messages.length - 1]; - const content = JSON.parse(userMessage.content); - const keys = Object.keys(content.data); - expect(keys.length).toBe(1); - keysProcessed.add(keys[0]); - }); - - expect(keysProcessed.has("key1")).toBe(true); - expect(keysProcessed.has("key2")).toBe(true); - expect(keysProcessed.has("key3")).toBe(true); + await translator(input, onProgress); + + expect(generateText).toHaveBeenCalledTimes(3); + + // allow calls to be in any order, but each should contain exactly one key + const calls = (generateText as any).mock.calls; + const keysProcessed = new Set(); + + calls.forEach((call: any) => { + const messages = call[0].messages; + const userMessage = messages[messages.length - 1]; + const content = JSON.parse(userMessage.content); + const keys = Object.keys(content.data); + expect(keys.length).toBe(1); + keysProcessed.add(keys[0]); }); - it("should chunk requests correctly with batchSize > 1", async () => { - const input = { - sourceLocale: "en", - targetLocale: "fr", - processableData: { - key1: "value1", - key2: "value2", - key3: "value3", - key4: "value4", - key5: "value5", - }, - }; - - (generateText as any).mockResolvedValue({ - text: JSON.stringify({ data: {} }), - }); - - const onProgress = vi.fn(); - const translator = createBasicTranslator(mockModel, mockSystemPrompt, { batchSize: 2 }); - - await translator(input, onProgress); - - // 5 items with batchSize 2 -> 3 chunks (2, 2, 1) - expect(generateText).toHaveBeenCalledTimes(3); + expect(keysProcessed.has("key1")).toBe(true); + expect(keysProcessed.has("key2")).toBe(true); + expect(keysProcessed.has("key3")).toBe(true); + }); + + it("should chunk requests correctly with batchSize > 1", async () => { + const input = { + sourceLocale: "en", + targetLocale: "fr", + processableData: { + key1: "value1", + key2: "value2", + key3: "value3", + key4: "value4", + key5: "value5", + }, + }; + + (generateText as any).mockResolvedValue({ + text: JSON.stringify({ data: {} }), }); + + const onProgress = vi.fn(); + const translator = createBasicTranslator(mockModel, mockSystemPrompt, { + batchSize: 2, + }); + + await translator(input, onProgress); + + // 5 items with batchSize 2 -> 3 chunks (2, 2, 1) + expect(generateText).toHaveBeenCalledTimes(3); + }); }); diff --git a/packages/cli/src/cli/processor/basic.ts b/packages/cli/src/cli/processor/basic.ts index f43901aa6..5a3596833 100644 --- a/packages/cli/src/cli/processor/basic.ts +++ b/packages/cli/src/cli/processor/basic.ts @@ -2,6 +2,7 @@ import { generateText, LanguageModel } from "ai"; import { LocalizerInput, LocalizerProgressFn } from "./_base"; import _ from "lodash"; +import { extractPayloadChunks } from "../utils/chunk"; type ModelSettings = { temperature?: number; batchSize?: number; @@ -88,63 +89,3 @@ export function createBasicTranslator( return result?.data || {}; } } - -/** - * Extract payload chunks based on the ideal chunk size - * @param payload - The payload to be chunked - * @param batchSize - Max number of keys per chunk - * @returns An array of payload chunks - */ -function extractPayloadChunks( - payload: Record, - batchSize?: number, -): Record[] { - const idealBatchItemSize = 250; - const result: Record[] = []; - let currentChunk: Record = {}; - let currentChunkItemCount = 0; - - const payloadEntries = Object.entries(payload); - for (let i = 0; i < payloadEntries.length; i++) { - const [key, value] = payloadEntries[i]; - currentChunk[key] = value; - currentChunkItemCount++; - - const currentChunkSize = countWordsInRecord(currentChunk); - const effectiveBatchSize = - batchSize && batchSize > 0 ? batchSize : payloadEntries.length || 1; - if ( - currentChunkSize > idealBatchItemSize || - currentChunkItemCount >= effectiveBatchSize || - i === payloadEntries.length - 1 - ) { - result.push(currentChunk); - currentChunk = {}; - currentChunkItemCount = 0; - } - } - - return result; -} - -/** - * Count words in a record or array - * @param payload - The payload to count words in - * @returns The total number of words - */ -function countWordsInRecord( - payload: any | Record | Array, -): number { - if (Array.isArray(payload)) { - return payload.reduce((acc, item) => acc + countWordsInRecord(item), 0); - } else if (typeof payload === "object" && payload !== null) { - return Object.values(payload).reduce( - (acc: number, item) => acc + countWordsInRecord(item), - 0, - ); - } else if (typeof payload === "string") { - return payload.trim().split(/\s+/).filter(Boolean).length; - } else { - return 0; - } -} diff --git a/packages/cli/src/cli/utils/chunk.ts b/packages/cli/src/cli/utils/chunk.ts new file mode 100644 index 000000000..dfe0d959d --- /dev/null +++ b/packages/cli/src/cli/utils/chunk.ts @@ -0,0 +1,58 @@ +/** + * Extract payload chunks based on the ideal chunk size + * @param payload - The payload to be chunked + * @param batchSize - Max number of keys per chunk + * @returns An array of payload chunks + */ +export function extractPayloadChunks( + payload: Record, + batchSize?: number, +): Record[] { + const idealBatchItemSize = 250; + const result: Record[] = []; + let currentChunk: Record = {}; + let currentChunkItemCount = 0; + + const payloadEntries = Object.entries(payload); + for (let i = 0; i < payloadEntries.length; i++) { + const [key, value] = payloadEntries[i]; + currentChunk[key] = value; + currentChunkItemCount++; + + const currentChunkSize = countWordsInRecord(currentChunk); + const effectiveBatchSize = batchSize && batchSize > 0 ? batchSize : 25; + if ( + currentChunkSize > idealBatchItemSize || + currentChunkItemCount >= effectiveBatchSize || + i === payloadEntries.length - 1 + ) { + result.push(currentChunk); + currentChunk = {}; + currentChunkItemCount = 0; + } + } + + return result; +} + +/** + * Count words in a record or array + * @param payload - The payload to count words in + * @returns The total number of words + */ +export function countWordsInRecord( + payload: any | Record | Array, +): number { + if (Array.isArray(payload)) { + return payload.reduce((acc, item) => acc + countWordsInRecord(item), 0); + } else if (typeof payload === "object" && payload !== null) { + return Object.values(payload).reduce( + (acc: number, item) => acc + countWordsInRecord(item), + 0, + ); + } else if (typeof payload === "string") { + return payload.trim().split(/\s+/).filter(Boolean).length; + } else { + return 0; + } +} diff --git a/packages/react/src/client/index.ts b/packages/react/src/client/index.ts index ec985c936..ceb8ef160 100644 --- a/packages/react/src/client/index.ts +++ b/packages/react/src/client/index.ts @@ -5,4 +5,3 @@ export * from "./component"; export * from "./locale-switcher"; export * from "./attribute-component"; export * from "./locale"; -export { getLocaleFromCookies, setLocaleInCookies } from "./utils";