From 251b301622fd2fedbc41f100449ba23de79f645c Mon Sep 17 00:00:00 2001 From: Luke-Bilhorn Date: Thu, 4 Jun 2026 15:19:55 -0500 Subject: [PATCH 01/12] Add OmniASR language data + resolver/labeller utilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four new files in sharedUtils/, all importable from both the extension host and the webview bundles: - omniAsrSupportedLangs.ts: 1672 supported {iso639_3}_{Script} codes, snapshotted from the live GET /languages endpoint. - omniAsrDefaultScripts.ts: per-base "best guess" script for the 19 multi-script bases (urd→Arab, cmn→Hans, uig→Arab, yue→Hant, ...). All others have exactly one supported script so no entry needed. - omniAsrFriendlyNames.ts: 1650 base→Ref_Name map, for rendering the transcription badge. - asrLanguageUtils.ts: pure helpers — resolveOmniAsrCode(meta, scriptPref) and labelForTranscriptionLanguage(serverLang, sentCode, projectLanguageName). Nothing is wired up yet; that comes in subsequent commits. Each file has a header explaining how to regenerate after a model/endpoint change. --- sharedUtils/asrLanguageUtils.ts | 268 ++++ sharedUtils/omniAsrDefaultScripts.ts | 77 ++ sharedUtils/omniAsrFriendlyNames.ts | 1680 ++++++++++++++++++++++++++ sharedUtils/omniAsrSupportedLangs.ts | 315 +++++ 4 files changed, 2340 insertions(+) create mode 100644 sharedUtils/asrLanguageUtils.ts create mode 100644 sharedUtils/omniAsrDefaultScripts.ts create mode 100644 sharedUtils/omniAsrFriendlyNames.ts create mode 100644 sharedUtils/omniAsrSupportedLangs.ts diff --git a/sharedUtils/asrLanguageUtils.ts b/sharedUtils/asrLanguageUtils.ts new file mode 100644 index 000000000..b050abb20 --- /dev/null +++ b/sharedUtils/asrLanguageUtils.ts @@ -0,0 +1,268 @@ +/** + * ASR language-utility functions + * ------------------------------ + * + * Pure helpers (no `vscode` imports → unit-testable, usable from both the + * extension host and the webviews) that: + * + * 1. **Resolve** a project's language metadata into an OmniASR-compatible + * `{iso639_3}_{Script}` code (or decide we should send no code, letting + * the server transcribe without language conditioning). + * 2. **Label** an OmniASR code with a friendly display name suitable for the + * post-transcription badge (e.g. `swh_Latn` → "Swahili"). + * + * Why this lives in `sharedUtils/` + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Both the extension host (`src/providers/...`) and the webviews + * (`webviews/.../CodexCellEditor`) need it: the host builds the `asrConfig` + * payload from project settings, and the webview renders the badge after a + * transcription completes. + */ + +import { + OMNI_ASR_SUPPORTED_LANGS, + OMNI_ASR_SUPPORTED_LANG_SET, +} from "./omniAsrSupportedLangs"; +import { OMNI_ASR_DEFAULT_SCRIPTS } from "./omniAsrDefaultScripts"; +import { OMNI_ASR_FRIENDLY_NAMES } from "./omniAsrFriendlyNames"; + +/** + * Minimal shape of the project's language metadata that we consume here. + * Matches `codex-types`'s `LanguageMetadata` but we restate it so this file + * doesn't pull `codex-types` (and its transitive deps) into the webview + * bundle. + */ +export type AsrLanguageMetaInput = { + tag?: string; + iso1?: string; + iso2t?: string; + iso2b?: string; + refName?: string; +}; + +/** + * Macrolanguage → individual-language remaps used when the project's tag + * names a macrolanguage that OmniASR doesn't serve directly. Each pair maps + * a macro ISO 639-3 to the individual ISO 639-3 that OmniASR actually + * supports for the most widely-spoken variety. Sources: + * - SIL macrolanguage mappings (iso-639-3-macrolanguages.tab) + * - cross-checked against `OMNI_ASR_SUPPORTED_LANGS` + * + * Add only when (a) the macro is genuinely not in OmniASR's set and (b) the + * "right" individual is unambiguous. + */ +const MACRO_TO_INDIVIDUAL: Readonly> = { + swa: "swh", // Swahili → Coastal Swahili (Kenya/Tanzania majority) + ara: "arb", // Arabic → Modern Standard Arabic + msa: "zsm", // Malay → Standard Malay + zho: "cmn", // Chinese → Mandarin + ori: "ory", // Oriya → Odia + est: "ekk", // Estonian → Standard Estonian + sqi: "als", // Albanian → Tosk Albanian + kur: "kmr", // Kurdish → Northern Kurdish (largest speaker base) + nor: "nob", // Norwegian → Bokmål + oji: "ojb", // Ojibwa → Northwestern Ojibwa +}; + +/** ISO 639-1 (2-letter) → ISO 639-3 (3-letter). Common languages only; the + * project usually carries `iso2t` directly so this is just a fallback. */ +const ISO1_TO_ISO3: Readonly> = { + en: "eng", fr: "fra", es: "spa", de: "deu", pt: "por", it: "ita", + nl: "nld", ru: "rus", zh: "cmn", ja: "jpn", ko: "kor", ar: "arb", + sw: "swh", ur: "urd", hi: "hin", bn: "ben", id: "ind", tr: "tur", + th: "tha", vi: "vie", uk: "ukr", pl: "pol", fa: "pes", he: "heb", +}; + +/** + * Pull the ISO 639-3 base + optional Script subtag out of a project's + * language metadata, normalizing macrolanguages to OmniASR-served + * individuals. Returns `undefined` if we can't recover a 3-letter code. + */ +function extractBaseAndScript( + meta: AsrLanguageMetaInput | undefined +): { base: string; explicitScript?: string; } | undefined { + if (!meta) return undefined; + + // BCP-47-ish tag is the richest source: e.g. "swh", "ur-Arab", "zh-Hans". + const tag = (meta.tag || "").trim(); + let base = ""; + let explicitScript: string | undefined; + + if (tag) { + const [primary, ...subtags] = tag.split(/[-_]/); + const lowered = (primary || "").toLowerCase(); + if (lowered.length === 3) { + base = lowered; + } else if (lowered.length === 2) { + base = ISO1_TO_ISO3[lowered] ?? ""; + } + // Script subtags are exactly 4 chars, title-case (Latn, Arab, Cyrl, ...). + const script = subtags.find((s) => s.length === 4); + if (script) { + explicitScript = script.charAt(0).toUpperCase() + script.slice(1).toLowerCase(); + } + } + + if (!base) { + base = (meta.iso2t || meta.iso2b || "").toLowerCase(); + } + if (!base) { + const i1 = (meta.iso1 || "").toLowerCase(); + base = ISO1_TO_ISO3[i1] ?? ""; + } + if (!base) return undefined; + + base = MACRO_TO_INDIVIDUAL[base] ?? base; + return { base, explicitScript }; +} + +/** + * `scriptPref` is what the user picked in the Script advanced setting. + * + * - `"auto"` → "best guess" (our default). Pick the script using + * `OMNI_ASR_DEFAULT_SCRIPTS`, falling back to Latin then + * the sole supported script. + * - `"latin"` → force Latin script when supported, otherwise fall back + * to auto behaviour. + * - any 4-char string (`"Arab"`, `"Cyrl"`, ...) → use that script. + */ +export type AsrScriptPref = "auto" | "latin" | string; + +/** + * Resolve a project's language metadata to an OmniASR-compatible + * `{iso639_3}_{Script}` code, or return `undefined` when we can't safely pick + * one (the caller should then omit the `lang` query param so the server + * transcribes without language conditioning). + * + * Selection priority: + * 1. Explicit `scriptPref` (4-letter ISO 15924 tag) → use as-is when + * `{base}_{Script}` is a supported code. + * 2. Script encoded in the project tag (e.g. `swa-Cyrl`) → ditto. + * 3. `scriptPref === "latin"` → Latin if supported. + * 4. `OMNI_ASR_DEFAULT_SCRIPTS[base]` (our hand-curated "best guess"). + * 5. Latin if supported. + * 6. Sole supported script for this base. + * 7. `undefined` (genuinely ambiguous → let the server pick). + * + * Future work: a per-cell script override could short-circuit step 1. + */ +export function resolveOmniAsrCode( + meta: AsrLanguageMetaInput | undefined, + scriptPref: AsrScriptPref = "auto" +): string | undefined { + const extracted = extractBaseAndScript(meta); + if (!extracted) return undefined; + const { base, explicitScript } = extracted; + + // Find every supported script for this base. + const supportedScripts = OMNI_ASR_SUPPORTED_LANGS + .filter((c) => c.startsWith(`${base}_`)) + .map((c) => c.split("_")[1]); + if (supportedScripts.length === 0) return undefined; + + const tryCode = (script: string): string | undefined => { + const code = `${base}_${script}`; + return OMNI_ASR_SUPPORTED_LANG_SET.has(code) ? code : undefined; + }; + + // 1. Explicit user-chosen script (4-letter custom tag from advanced setting) + if (scriptPref && scriptPref !== "auto" && scriptPref !== "latin" && scriptPref.length === 4) { + const normalized = scriptPref.charAt(0).toUpperCase() + scriptPref.slice(1).toLowerCase(); + const code = tryCode(normalized); + if (code) return code; + } + + // 2. Script encoded in the project tag + if (explicitScript) { + const code = tryCode(explicitScript); + if (code) return code; + } + + // 3. scriptPref === "latin" → Latin if supported + if (scriptPref === "latin") { + const code = tryCode("Latn"); + if (code) return code; + } + + // 4. Default script for this base + const defaultScript = OMNI_ASR_DEFAULT_SCRIPTS[base]; + if (defaultScript) { + const code = tryCode(defaultScript); + if (code) return code; + } + + // 5. Latin if supported + const latin = tryCode("Latn"); + if (latin) return latin; + + // 6. Sole supported script + if (supportedScripts.length === 1) { + return `${base}_${supportedScripts[0]}`; + } + + // 7. Genuinely ambiguous + return undefined; +} + +/** Split an OmniASR code like "swh_Latn" into base + script (or return null). */ +export function splitOmniAsrCode(code: string | undefined | null): { base: string; script: string; } | null { + if (!code) return null; + const m = /^([a-z]{2,3})_([A-Z][a-z]{3})$/.exec(code); + if (!m) return null; + return { base: m[1], script: m[2] }; +} + +/** + * SIL `Ref_Name` values are CamelCased with no spaces (e.g. "MinNanChinese"). + * Split on case changes for natural-looking display: "Min Nan Chinese". + */ +function prettifyRefName(name: string): string { + return name + // Insert a space before any uppercase letter that follows a lowercase one. + .replace(/([a-z])([A-Z])/g, "$1 $2") + // And before an uppercase letter that's followed by a lowercase one + // (handles runs of acronyms like "USA"). + .replace(/([A-Z])([A-Z][a-z])/g, "$1 $2") + .trim(); +} + +/** + * Friendly display name for a transcription's language badge. + * + * Inputs: + * - `serverLang` — the code OmniASR echoed back in its response (when we + * sent one). The primary source of truth. + * - `sentCode` — what we asked the server to use, in case it didn't + * echo (today the server only echoes when given a code). + * - `projectLanguageName` — `refName` of the project's target language, as + * a last-ditch fallback when we know we sent the + * project's code but the server omitted the echo. + * + * The badge returns `null` to mean "render nothing" (we have no honest label). + * The caller renders "Auto Detect" itself when in auto-detect mode and we + * have no detected-language info, so we never lie about it here. + */ +export function labelForTranscriptionLanguage( + serverLang: string | undefined | null, + sentCode: string | undefined | null, + projectLanguageName: string | undefined | null +): string | null { + const friendly = (code: string | null | undefined): string | null => { + const parts = splitOmniAsrCode(code); + if (!parts) return null; + const refName = OMNI_ASR_FRIENDLY_NAMES[parts.base]; + return refName ? prettifyRefName(refName) : null; + }; + + // 1. Server's echo is always the most truthful signal. + const fromServer = friendly(serverLang); + if (fromServer) return fromServer; + + // 2. If we sent a code but the server didn't echo, the server still used + // what we sent — show that. + const fromSent = friendly(sentCode); + if (fromSent) return fromSent; + + // 3. Last-ditch fallback: project language name, if any. + return projectLanguageName ? prettifyRefName(projectLanguageName) : null; +} diff --git a/sharedUtils/omniAsrDefaultScripts.ts b/sharedUtils/omniAsrDefaultScripts.ts new file mode 100644 index 000000000..3155590fd --- /dev/null +++ b/sharedUtils/omniAsrDefaultScripts.ts @@ -0,0 +1,77 @@ +/** + * OmniASR multi-script default-script table + * ----------------------------------------- + * + * For each OmniASR language with **multiple supported scripts**, the script + * we should pick by default when the user has not specified one. + * + * Background + * ~~~~~~~~~~ + * OmniASR codes are `{iso639_3}_{Script}` (e.g. `urd_Arab`). Almost every + * supported base language (1631 of 1650 unique bases) supports exactly one + * script, so the script choice is trivial. This file only lists the 19 + * multi-script bases that need a real tiebreaker. + * + * Selection priority used by the resolver (`asrLanguageUtils.ts`): + * 1. Explicit script the user typed in the advanced setting + * 2. Script encoded in the project's language tag (e.g. `swa-Cyrl`) + * 3. **This table** (the "best guess") + * 4. Latin, if the language supports Latin + * 5. Sole supported script (if only one) + * 6. Omit `lang` (server runs without language conditioning) + * + * Source / rationale per entry + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Picked using Unicode CLDR `likelySubtags.xml` (the official "if a user gives + * me a language tag with no script, what script should I assume?" table) + * cross-checked against modern majority usage. Macrolanguage → individual + * remaps (e.g. swa→swh, ara→arb, zho→cmn, kur→kmr) are handled in the + * resolver *before* lookup, so this table keys on the individual codes + * OmniASR actually serves. + * + * If you adjust an entry, leave a `// ←` note explaining why. + * + * Multi-script bases not listed here intentionally fall through to "Latin if + * supported, else sole script". Add an entry here only when CLDR or modern + * majority usage clearly disagrees with that default. + * + * Regenerating + * ~~~~~~~~~~~~ + * To rediscover which bases need entries (after a model update changes the + * supported set): + * + * curl -s "https://genesis-ai-dev--codex-asr-serve.modal.run/languages" \ + * | python3 -c " + * import json, sys + * d = json.load(sys.stdin) + * bases = {} + * for l in d['languages']: + * b, s = l.split('_') + * bases.setdefault(b, set()).add(s) + * for b, ss in sorted(bases.items()): + * if len(ss) > 1: + * print(b, sorted(ss)) + * " + */ + +export const OMNI_ASR_DEFAULT_SCRIPTS: Readonly> = { + aze: "Latn", // Azerbaijani — modern standard (Republic of Azerbaijan) is Latin + bcc: "Arab", // Southern Balochi — written in Arabic script + cmn: "Hans", // Mandarin Chinese — Simplified is the more common default + cmo: "Khmr", // Central Mnong — Khmer-script orthography (community standard) + crk: "Cans", // Plains Cree — Canadian Aboriginal Syllabics is the traditional script + ell: "Grek", // Greek — only one substantive script; entry exists for completeness + gag: "Latn", // Gagauz — modern orthography is Latin + kmr: "Latn", // Northern Kurdish — Latin (Hawar) is the predominant modern script + lld: "Latn", // Ladin — only Latin; entry exists for completeness + ojb: "Latn", // Northwestern Ojibwa — Latin (double-vowel) is most common in print + rif: "Latn", // Tarifit Berber — Latin in modern publications (Tifinagh not in OmniASR) + rmc: "Latn", // Carpathian Romani — Latin in modern orthographies + rmy: "Latn", // Vlax Romani — Latin in modern orthographies + tuk: "Latn", // Turkmen — modern standard (Turkmenistan) is Latin + uig: "Arab", // Uyghur — Arabic-script (Uyghur Ereb Yëziqi) is the predominant script + urd: "Arab", // Urdu — Arabic-script (Nastaliq) is the canonical script + uzb: "Latn", // Uzbek — modern standard (Uzbekistan) is Latin + wal: "Ethi", // Wolaytta — Ethiopic (Geʽez) script in modern orthographies + yue: "Hant", // Cantonese — Traditional Chinese (Hong Kong / Guangzhou default) +}; diff --git a/sharedUtils/omniAsrFriendlyNames.ts b/sharedUtils/omniAsrFriendlyNames.ts new file mode 100644 index 000000000..e39632bcb --- /dev/null +++ b/sharedUtils/omniAsrFriendlyNames.ts @@ -0,0 +1,1680 @@ +/** + * OmniASR friendly-name lookup + * ---------------------------- + * + * Maps each OmniASR-supported ISO 639-3 base (1650 entries) to its English + * "reference name" from the SIL ISO 639-3 registry. Used to render the + * language badge after a transcription completes (e.g. `swh_Latn` → "Swahili"). + * + * Notes + * ~~~~~ + * - Keyed on the **base** (ISO 639-3), not the full OmniASR code, because the + * friendly name is the same regardless of script. Callers should strip the + * `_{Script}` suffix before lookup. The resolver in `asrLanguageUtils.ts` + * handles that. + * - Names come straight from SIL's `Ref_Name` field, which is CamelCased and + * ASCII-only (e.g. "ArbëreshëAlbanian" → "ArbresheAlbanian"). The helper + * `prettifyRefName()` in `asrLanguageUtils.ts` splits these on case changes + * so they read naturally in the UI. + * - The 'nan' entry is added by hand (Min Nan Chinese) — SIL leaves Ref_Name + * blank for that code in the version we parsed. + * + * Regenerating + * ~~~~~~~~~~~~ + * If OmniASR's supported set changes, regenerate from the SIL data already + * bundled in `src/utils/languageUtils.ts` using the snippet in + * `omniAsrSupportedLangs.ts`'s header (look up each base's `Ref_Name`). + */ + +export const OMNI_ASR_FRIENDLY_NAMES: Readonly> = { + aae: "ArbëreshëAlbanian", + aal: "Afade", + abb: "Bankon", + abi: "Abidji", + abk: "Abkhazian", + abn: "Abua", + abp: "AbellenAyta", + abr: "Abron", + abs: "AmboneseMalay", + aca: "Achagua", + acd: "Gikyode", + ace: "Achinese", + acf: "SaintLucianCreoleFrench", + ach: "Acoli", + acm: "MesopotamianArabic", + acn: "Achang", + acr: "Achi", + acu: "Achuar-Shiwiar", + acw: "HijaziArabic", + ade: "Adele", + adh: "Adhola", + adj: "Adioukrou", + adx: "AmdoTibetan", + ady: "Adyghe", + aeb: "TunisianArabic", + aec: "SaidiArabic", + aeu: "Akeu", + afb: "GulfArabic", + afo: "Eloyi", + afr: "Afrikaans", + agd: "Agarabi", + agg: "Angor", + agn: "Agutaynen", + agr: "Aguaruna", + agu: "Aguacateco", + agx: "Aghul", + aha: "Ahanta", + ahk: "Akha", + ahl: "Igo", + ahs: "Ashe", + aia: "Arosi", + ajg: "Aja(Benin)", + aka: "Akan", + akb: "BatakAngkola", + ake: "Akawaio", + akp: "Siwu", + ala: "Alago", + alj: "Alangan", + aln: "GhegAlbanian", + alo: "Larike-Wakasihu", + alp: "Alune", + als: "ToskAlbanian", + alt: "SouthernAltai", + alz: "Alur", + ame: "Yanesha'", + amf: "Hamer-Banna", + amh: "Amharic", + ami: "Amis", + amk: "Ambai", + amu: "GuerreroAmuzgo", + anc: "Ngas", + ank: "Goemai", + ann: "Obolo", + anp: "Angika", + anw: "Anaang", + any: "Anyin", + aom: "Ömie", + aoz: "UabMeto", + apb: "Sa'a", + apc: "LevantineArabic", + apd: "SudaneseArabic", + apr: "Arop-Lokep", + arb: "StandardArabic", + arg: "Aragonese", + arl: "Arabela", + arq: "AlgerianArabic", + ars: "NajdiArabic", + ary: "MoroccanArabic", + arz: "EgyptianArabic", + asa: "Asu(Tanzania)", + asg: "Cishingini", + asm: "Assamese", + ast: "Asturian", + ata: "Pele-Ata", + atb: "Zaiwa", + atg: "IvbieNorth-Okpela-Arhe", + ati: "Attié", + atq: "Aralle-Tabulahan", + ava: "Avaric", + avn: "Avatime", + avu: "Avokaya", + awa: "Awadhi", + awb: "Awa(PapuaNewGuinea)", + awo: "Awak", + ayl: "LibyanArabic", + ayo: "Ayoreo", + ayp: "NorthMesopotamianArabic", + ayr: "CentralAymara", + ayz: "MaiBrat", + aze: "Azerbaijani", + azg: "SanPedroAmuzgosAmuzgo", + azz: "HighlandPueblaNahuatl", + bag: "Tuki", + bak: "Bashkir", + bam: "Bambara", + ban: "Balinese", + bao: "Waimaha", + bas: "Basa(Cameroon)", + bav: "Vengo", + bax: "Bamun", + bba: "Baatonum", + bbb: "Barai", + bbc: "BatakToba", + bbj: "Ghomálá'", + bbl: "Bats", + bbo: "NorthernBoboMadaré", + bbu: "Kulung(Nigeria)", + bcc: "SouthernBalochi", + bce: "Bamenyam", + bci: "Baoulé", + bcl: "CentralBikol", + bcs: "Kohumono", + bcw: "Bana", + bcy: "Bacama", + bcz: "Bainouk-Gunyaamolo", + bda: "Bayot", + bde: "Bade", + bdg: "Bonggi", + bdh: "Baka(SouthSudan)", + bdm: "Buduma", + bdq: "Bahnar", + bdu: "Oroko", + beb: "Bebele", + beh: "Biali", + bel: "Belarusian", + bem: "Bemba(Zambia)", + ben: "Bengali", + bep: "Besoa", + bew: "Betawi", + bex: "JurModo", + bfa: "Bari", + bfd: "Bafut", + bfo: "MalbaBirifor", + bft: "Balti", + bfy: "Bagheli", + bfz: "MahasuPahari", + bgc: "Haryanvi", + bgp: "EasternBalochi", + bgq: "Bagri", + bgr: "BawmChin", + bgt: "Bughotu", + bgw: "Bhatri", + bha: "Bharia", + bhb: "Bhili", + bhh: "Bukharic", + bho: "Bhojpuri", + bhp: "Bima", + bht: "Bhattiyali", + bhz: "Bada(Indonesia)", + bib: "Bissa", + bim: "Bimoba", + bis: "Bislama", + biv: "SouthernBirifor", + bjj: "Kanauji", + bjk: "Barok", + bjn: "Banjar", + bjr: "Binumarien", + bjt: "Balanta-Ganja", + bjv: "Bedjond", + bjw: "Bakwé", + bjz: "Baruga", + bkd: "Binukid", + bkh: "Bakoko", + bkm: "Kom(Cameroon)", + bkv: "Bekwarra", + bky: "Bokyi", + ble: "Balanta-Kentohe", + blh: "Kuwaa", + blt: "TaiDam", + blx: "Mag-IndiAyta", + blz: "Balantak", + bmm: "NorthernBetsimisarakaMalagasy", + bmq: "Bomu", + bmr: "Muinane", + bmu: "Somba-Siawari", + bmv: "Bum", + bng: "Benga", + bnm: "Batanga", + bnn: "Bunun", + bno: "Bantoanon", + bnp: "Bola", + bns: "Bundeli", + boa: "Bora", + bod: "Tibetan", + boj: "Anjam", + bom: "Berom", + bor: "Borôro", + bos: "Bosnian", + bou: "Bondei", + bov: "Tuwuli", + box: "Buamu", + bpr: "KoronadalBlaan", + bps: "SaranganiBlaan", + bqc: "Boko(Benin)", + bqg: "Bago-Kusuntu", + bqi: "Bakhtiari", + bqj: "Bandial", + bqp: "Busa", + bra: "Braj", + bre: "Breton", + brh: "Brahui", + bri: "Mokpwe", + bru: "EasternBru", + brx: "Bodo(India)", + bsc: "Bassari", + bsh: "Kati", + bsj: "Bangwinji", + bsk: "Burushaski", + bsq: "Bassa", + bss: "Akoose", + bsy: "SabahBisaya", + btd: "BatakDairi", + btm: "BatakMandailing", + bts: "BatakSimalungun", + btt: "Bete-Bendi", + btv: "Bateri", + btx: "BatakKaro", + bud: "Ntcham", + bug: "Buginese", + bul: "Bulgarian", + bum: "Bulu(Cameroon)", + buo: "Terei", + bus: "Bokobaru", + bux: "Boghom", + bvb: "Bube", + bvc: "Baelelea", + bvz: "Bauzi", + bwq: "SouthernBoboMadaré", + bwr: "Bura-Pabir", + bwu: "Buli(Ghana)", + bxf: "Bilur", + bxk: "Bukusu", + byc: "Ubaghara", + byr: "Baruya", + bys: "Burak", + byv: "Medumba", + byx: "Qaqet", + bzh: "MaposBuang", + bzi: "Bisu", + bzj: "BelizeKriolEnglish", + bzw: "Basa(Nigeria)", + caa: "Chortí", + cab: "Garifuna", + cac: "Chuj", + cak: "Kaqchikel", + cap: "Chipaya", + car: "GalibiCarib", + cas: "Tsimané", + cat: "Catalan", + cax: "Chiquitano", + cbc: "Carapana", + cbi: "Chachi", + cbr: "Cashibo-Cacataibo", + cbs: "Cashinahua", + cbt: "Chayahuita", + cbu: "Candoshi-Shapra", + cbv: "Cacua", + cce: "Chopi", + ccg: "SambaDaka", + cco: "ComaltepecChinantec", + cdj: "Churahi", + cdo: "MinDongChinese", + ceb: "Cebuano", + ceg: "Chamacoco", + cek: "EasternKhumiChin", + cen: "Cen", + ces: "Czech", + cfa: "Dijim-Bwilim", + cfm: "FalamChin", + cgc: "Kagayanen", + cgg: "Chiga", + che: "Chechen", + chf: "TabascoChontal", + chq: "QuiotepecChinantec", + chv: "Chuvash", + chz: "OzumacínChinantec", + cjk: "Chokwe", + cjo: "AshéninkaPajonal", + cjp: "Cabécar", + cjs: "Shor", + ckb: "CentralKurdish", + ckl: "Cibak", + cko: "Anufo", + ckr: "Kairak", + ckt: "Chukot", + cky: "Cakfem-Mushere", + cla: "Ron", + cle: "LealaoChinantec", + cly: "EasternHighlandChatino", + cme: "Cerma", + cmn: "MandarinChinese", + cmo: "CentralMnong", + cmr: "Mro-KhimiChin", + cnh: "HakhaChin", + cni: "Asháninka", + cnl: "LalanaChinantec", + cnt: "TepetotutlaChinantec", + coe: "Koreguaje", + cof: "Colorado", + cok: "SantaTeresaCora", + con: "Cofán", + cor: "Cornish", + cot: "Caquinte", + cou: "Wamey", + cpa: "PalantlaChinantec", + cpb: "Ucayali-YurúaAshéninka", + cpu: "PichisAshéninka", + cpx: "Pu-XianChinese", + cpy: "SouthUcayaliAshéninka", + crh: "CrimeanTatar", + crk: "PlainsCree", + crn: "ElNayarCora", + crq: "Iyo'wujwaChorote", + crs: "SeselwaCreoleFrench", + crt: "Iyojwa'jaChorote", + csk: "Jola-Kasa", + cso: "SochiapamChinantec", + ctd: "TedimChin", + cte: "TepinapaChinantec", + ctg: "Chittagonian", + ctl: "TlacoatzintepecChinantec", + cto: "Emberá-Catío", + ctu: "Chol", + cuc: "UsilaChinantec", + cui: "Cuiba", + cuk: "SanBlasKuna", + cul: "Culina", + cut: "TeutilaCuicatec", + cux: "TepeuxilaCuicatec", + cwa: "Kabwa", + cwe: "Kwere", + cwt: "Kuwaataay", + cya: "NopalaChatino", + cym: "Welsh", + daa: "Dangaléat", + dag: "Dagbani", + dah: "Gwahatike", + dan: "Danish", + dar: "Dargwa", + dav: "Taita", + dbd: "Dadiya", + dbj: "Ida'an", + dbq: "Daba", + dcc: "Deccan", + ddn: "Dendi(Benin)", + ded: "Dedua", + deg: "Degema", + des: "Desano", + deu: "German", + dga: "SouthernDagaare", + dgh: "Dghwede", + dgi: "NorthernDagara", + dgk: "Dagba", + dgo: "Dogri(individuallanguage)", + dgr: "Dogrib", + dhi: "Dhimal", + did: "Didinga", + dig: "Digo", + dik: "SouthwesternDinka", + dip: "NortheasternDinka", + div: "Dhivehi", + dje: "Zarma", + djk: "EasternMaroonCreole", + dmk: "Domaaki", + dml: "Dameli", + dnj: "Dan", + dnt: "MidGrandValleyDani", + dnw: "WesternDani", + dop: "Lukpa", + dos: "Dogosé", + dru: "Rukai", + dsb: "LowerSorbian", + dsh: "Daasanach", + dtp: "KadazanDusun", + dts: "ToroSoDogon", + dty: "Dotyali", + dua: "Duala", + dug: "Duruma", + dwr: "Dawro", + dyi: "DjiminiSenoufo", + dyo: "Jola-Fonyi", + dyu: "Dyula", + dzg: "Dazaga", + dzo: "Dzongkha", + ebu: "Embu", + ego: "Eggon", + eip: "Eipomek", + eiv: "Askopan", + eka: "Ekajuk", + ekk: "StandardEstonian", + eko: "Koti", + ekr: "Yace", + ell: "ModernGreek(1453-)", + elm: "Eleme", + emp: "NorthernEmberá", + enb: "Markweeta", + eng: "English", + enx: "Enxet", + epo: "Esperanto", + ese: "EseEjja", + ess: "CentralSiberianYupik", + esu: "CentralYupik", + eto: "Eton(Cameroon)", + ets: "Yekhee", + etu: "Ejagham", + eus: "Basque", + evn: "Evenki", + ewe: "Ewe", + ewo: "Ewondo", + eyo: "Keiyo", + eza: "Ezaa", + fal: "SouthFali", + fan: "Fang(EquatorialGuinea)", + fao: "Faroese", + far: "Fataleka", + fas: "Persian", + fat: "Fanti", + fia: "Nobiin", + fij: "Fijian", + fil: "Filipino", + fin: "Finnish", + fip: "Fipa", + fkk: "Kirya-Konzəl", + flr: "Fuliiru", + fmp: "Fe'fe'", + fmu: "FarWesternMuria", + fon: "Fon", + fra: "French", + frd: "Fordata", + fry: "WesternFrisian", + fub: "AdamawaFulfulde", + fuc: "Pulaar", + fue: "BorguFulfulde", + ful: "Fulah", + fuq: "Central-EasternNigerFulfulde", + fuv: "NigerianFulfulde", + gag: "Gagauz", + gai: "Borei", + gam: "Kandawo", + gau: "MudhiliGadaba", + gbi: "Galela", + gbk: "Gaddi", + gbm: "Garhwali", + gbo: "NorthernGrebo", + gbr: "Gbagyi", + gby: "Gbari", + gcc: "Mali", + gde: "Gude", + gdf: "Guduf-Gava", + geb: "Kire", + gej: "Gen", + ges: "Geser-Gorom", + ggg: "Gurgula", + gid: "Gidar", + gig: "Goaria", + gil: "Gilbertese", + giz: "SouthGiziga", + gjk: "KachiKoli", + gjn: "Gonja", + gju: "Gujari", + gkn: "Gokana", + gld: "Nanai", + gle: "Irish", + glg: "Galician", + glk: "Gilaki", + glv: "Manx", + glw: "Glavda", + gmv: "Gamo", + gna: "Kaansa", + gnd: "Zulgo-Gemzek", + gng: "Ngangam", + gof: "Gofa", + gog: "Gogo", + gol: "Gola", + gom: "GoanKonkani", + gor: "Gorontalo", + gqr: "Gor", + grc: "AncientGreek(to1453)", + gri: "Ghari", + grn: "Guarani", + grt: "Garo", + gsl: "Gusilay", + gso: "SouthwestGbaya", + gub: "Guajajára", + guc: "Wayuu", + gud: "YocobouéDida", + gug: "ParaguayanGuaraní", + guh: "Guahibo", + gui: "EasternBolivianGuaraní", + guj: "Gujarati", + guk: "Gumuz", + gum: "Guambiano", + guo: "Guayabero", + guq: "Aché", + gur: "Farefare", + guu: "Yanomamö", + gux: "Gourmanchéma", + guz: "Gusii", + gvc: "Guanano", + gvl: "Gulay", + gwc: "Gawri", + gwe: "Gweno", + gwi: "Gwichʼin", + gwr: "Gwere", + gwt: "Gawar-Bati", + gym: "Ngäbere", + gyr: "Guarayu", + gyz: "Geji", + had: "Hatam", + hag: "Hanga", + hah: "Hahon", + hak: "HakkaChinese", + hao: "Hakö", + hap: "Hupla", + hat: "Haitian", + hau: "Hausa", + haw: "Hawaiian", + hay: "Haya", + hbb: "Huba", + hch: "Huichol", + heb: "Hebrew", + heh: "Hehe", + her: "Herero", + hia: "Lamang", + hif: "FijiHindi", + hig: "Kamwe", + hil: "Hiligaynon", + hin: "Hindi", + hkk: "Hunjara-KainaKe", + hla: "Halia", + hlb: "Halbi", + hlt: "MatuChin", + hne: "Chhattisgarhi", + hnn: "Hanunoo", + hno: "NorthernHindko", + hns: "CaribbeanHindustani", + hoc: "Ho", + hrv: "Croatian", + hsb: "UpperSorbian", + hto: "MinicaHuitoto", + hub: "Huambisa", + hue: "SanFranciscoDelMarHuave", + hui: "Huli", + hul: "Hula", + hun: "Hungarian", + hus: "Huastec", + huu: "MuruiHuitoto", + huv: "SanMateoDelMarHuave", + hux: "NüpodeHuitoto", + hvn: "Sabu", + hwc: "Hawai'iCreoleEnglish", + hwo: "Hwana", + hye: "Armenian", + hyw: "WesternArmenian", + iba: "Iban", + ibb: "Ibibio", + ibo: "Igbo", + icr: "IslanderCreoleEnglish", + ida: "Idakho-Isukha-Tiriki", + idd: "EdeIdaca", + idu: "Idoma", + ifa: "AmganadIfugao", + ifb: "BatadIfugao", + ife: "Ifè", + ifk: "TuwaliIfugao", + ifu: "MayoyaoIfugao", + ify: "Keley-IKallahan", + igl: "Igala", + ign: "Ignaciano", + ijc: "Izon", + ijn: "Kalabari", + ikk: "Ika", + ikw: "Ikwere", + ilb: "Ila", + ilo: "Iloko", + imo: "Imbongu", + ina: "Interlingua(InternationalAuxiliaryLanguageAssociation)", + inb: "Inga", + ind: "Indonesian", + iou: "Tuma-Irumu", + ipi: "Ipili", + ipk: "Inupiaq", + iqw: "Ikwo", + iri: "Rigwe", + irk: "Iraqw", + ish: "Esan", + isl: "Icelandic", + iso: "Isoko", + ita: "Italian", + itl: "Itelmen", + its: "Isekiri", + itv: "Itawit", + itw: "Ito", + itz: "Itzá", + ixl: "Ixil", + izr: "Izere", + izz: "Izii", + jac: "Popti'", + jal: "Yalahatan", + jam: "JamaicanCreoleEnglish", + jav: "Javanese", + jax: "JambiMalay", + jbu: "JukunTakum", + jen: "Dza", + jic: "Tol", + jiv: "Shuar", + jmc: "Machame", + jmd: "Yamdena", + jmx: "WesternJuxtlahuacaMixtec", + jpn: "Japanese", + jqr: "Jaqaru", + juk: "Wapan", + jun: "Juang", + juo: "Jiba", + jvn: "CaribbeanJavanese", + kaa: "Kara-Kalpak", + kab: "Kabyle", + kac: "Kachin", + kai: "Karekare", + kaj: "Jju", + kak: "Kalanguya", + kam: "Kamba(Kenya)", + kan: "Kannada", + kao: "Xaasongaxango", + kaq: "Capanahua", + kas: "Kashmiri", + kat: "Georgian", + kay: "Kamayurá", + kaz: "Kazakh", + kbd: "Kabardian", + kbl: "Kanembu", + kbo: "Keliko", + kbp: "Kabiyè", + kbq: "Kamano", + kbr: "Kafa", + kbt: "Abadi", + kby: "MangaKanuri", + kca: "Khanty", + kcg: "Tyap", + kcn: "Nubi", + kcq: "Kamo", + kdc: "Kutu", + kde: "Makonde", + kdh: "Tem", + kdi: "Kumam", + kdj: "Karamojong", + kdl: "Tsikimba", + kdn: "Kunda", + kdt: "Kuy", + kea: "Kabuverdianu", + kek: "Kekchí", + ken: "Kenyang", + keo: "Kakwa", + ker: "Kera", + keu: "Akebu", + key: "Kupia", + kez: "Kukele", + kfb: "NorthwesternKolami", + kff: "Koya", + kfk: "Kinnauri", + kfq: "Korku", + kfr: "Kachhi", + kfw: "KharamNaga", + kfx: "KulluPahari", + kha: "Khasi", + khg: "KhamsTibetan", + khk: "HalhMongolian", + khm: "Khmer", + khq: "KoyraChiiniSonghay", + khw: "Khowar", + kia: "Kim", + kij: "Kilivila", + kik: "Kikuyu", + kin: "Kinyarwanda", + kir: "Kirghiz", + kix: "KhiamniunganNaga", + kjb: "Q'anjob'al", + kjc: "CoastalKonjo", + kje: "Kisar", + kjg: "Khmu", + kjh: "Khakas", + kjk: "HighlandKonjo", + kki: "Kagulu", + kkj: "Kako", + kle: "Kulung(Nepal)", + kln: "Kalenjin", + kls: "Kalasha", + klu: "Klao", + klv: "Maskelynes", + klw: "Tado", + kma: "Konni", + kmd: "MajukayangKalinga", + kml: "TanudanKalinga", + kmr: "NorthernKurdish", + kmu: "Kanite", + kmy: "Koma", + kna: "Dera(Nigeria)", + knb: "LubuaganKalinga", + knc: "CentralKanuri", + kne: "Kankanaey", + knf: "Mankanya", + knj: "WesternKanjobal", + knk: "Kuranko", + knn: "Konkani(individuallanguage)", + kno: "Kono(SierraLeone)", + kog: "Cogui", + kol: "Kol(PapuaNewGuinea)", + koo: "Konzo", + kor: "Korean", + kpo: "Ikposo", + kpq: "Korupun-Sela", + kps: "Tehit", + kpv: "Komi-Zyrian", + kpy: "Koryak", + kpz: "Kupsabiny", + kqe: "Kalagan", + kqo: "EasternKrahn", + kqp: "Kimré", + kqr: "Kimaragang", + kqy: "Koorete", + krc: "Karachay-Balkar", + kri: "Krio", + krj: "Kinaray-A", + krl: "Karelian", + krr: "Krung", + krs: "Gbaya(Sudan)", + kru: "Kurukh", + krx: "Karon", + ksb: "Shambala", + ksd: "Kuanua", + ksf: "Bafia", + ksr: "Borong", + kss: "SouthernKisi", + ksz: "Kodaku", + ktb: "Kambaata", + ktj: "PlapoKrumen", + kto: "Kuot", + kua: "Kuanyama", + kub: "Kutep", + kue: "Kuman(PapuaNewGuinea)", + kuh: "Kushi", + kum: "Kumyk", + kur: "Kurdish", + kus: "Kusaal", + kvn: "BorderKuna", + kvw: "Wersing", + kvx: "ParkariKoli", + kwd: "Kwaio", + kwf: "Kwara'ae", + kwi: "Awa-Cuaiquer", + kwm: "Kwambi", + kxc: "Konso", + kxf: "ManumanawKaren", + kxm: "NorthernKhmer", + kxp: "WadiyaraKoli", + kyb: "ButbutKalinga", + kyc: "Kyaka", + kyf: "Kouya", + kyg: "Keyagana", + kyo: "Kelon", + kyq: "Kenga", + kyu: "WesternKayah", + kyx: "Rapoisi", + kyz: "Kayabí", + kzf: "Da'aKaili", + kzi: "Kelabit", + lac: "Lacandon", + lag: "Rangi", + laj: "Lango(Uganda)", + lam: "Lamba", + lao: "Lao", + las: "Lama(Togo)", + lat: "Latin", + lav: "Latvian", + law: "Lauje", + lbj: "Ladakhi", + lbw: "Tolaki", + lcm: "Tungag", + lcp: "WesternLawa", + ldb: "Dũya", + led: "Lendu", + lee: "Lyélé", + lef: "Lelemi", + lem: "Nomaande", + lew: "LedoKaili", + lex: "Luang", + lgg: "Lugbara", + lgl: "Wala", + lhu: "Lahu", + lia: "West-CentralLimba", + lid: "Nyindrou", + lif: "Limbu", + lij: "Ligurian", + lin: "Lingala", + lip: "Sekpele", + lir: "LiberianEnglish", + lis: "Lisu", + lit: "Lithuanian", + lje: "Rampi", + ljp: "LampungApi", + lkb: "Kabras", + lke: "Kenyi", + lla: "Lala-Roba", + lld: "Ladin", + llg: "Lole", + lln: "Lele(Chad)", + lme: "Pévé", + lnd: "Lundayeh", + lns: "Lamnso'", + lnu: "Longuda", + loa: "Loloda", + lob: "Lobi", + lok: "Loko", + lom: "Loma(Liberia)", + lon: "MalawiLomwe", + loq: "Lobala", + lrk: "Loarki", + lsi: "Lashi", + lsm: "Saamia", + lss: "Lasi", + ltg: "Latgalian", + lth: "Thur", + lto: "Tsotso", + ltz: "Luxembourgish", + lua: "Luba-Lulua", + luc: "Aringa", + lug: "Ganda", + luo: "Luo(KenyaandTanzania)", + lus: "Lushai", + lwg: "Wanga", + lwo: "Luwo", + lww: "Lewo", + lzz: "Laz", + maa: "SanJerónimoTecóatlMazatec", + mab: "YutanduchiMixtec", + mad: "Madurese", + maf: "Mafa", + mag: "Magahi", + mah: "Marshallese", + mai: "Maithili", + maj: "JalapaDeDíazMazatec", + mak: "Makasar", + mal: "Malayalam", + mam: "Mam", + maq: "ChiquihuitlánMazatec", + mar: "Marathi", + mau: "HuautlaMazatec", + maw: "Mampruli", + max: "NorthMoluccanMalay", + maz: "CentralMazahua", + mbb: "WesternBukidnonManobo", + mbc: "Macushi", + mbh: "Mangseng", + mbj: "Nadëb", + mbt: "MatigsalugManobo", + mbu: "Mbula-Bwazza", + mca: "Maca", + mcb: "Machiguenga", + mcd: "Sharanahua", + mcf: "Matsés", + mco: "CoatlánMixe", + mcp: "Makaa", + mcq: "Ese", + mcu: "CameroonMambila", + mcx: "Mpiemo", + mda: "Mada(Nigeria)", + mdd: "Mbum", + mdv: "SantaLucíaMonteverdeMixtec", + mdy: "Male(Ethiopia)", + med: "Melpa", + mee: "Mengen", + meh: "SouthwesternTlaxiacoMixtec", + mej: "Meyah", + mek: "Mekeo", + mel: "CentralMelanau", + men: "Mende(SierraLeone)", + meq: "Merey", + mer: "Meru", + met: "Mato", + meu: "Motu", + mev: "Mano", + mfe: "Morisyen", + mfh: "Matal", + mfi: "Wandala", + mfk: "NorthMofu", + mfm: "MarghiSouth", + mfn: "CrossRiverMbembe", + mfo: "Mbe", + mfq: "Moba", + mfv: "Mandjak", + mfy: "Mayo", + mfz: "Mabaan", + mgd: "Moru", + mge: "Mango", + mgg: "Mpumpong", + mgh: "Makhuwa-Meetto", + mgi: "Lijili", + mgo: "Meta'", + mhi: "Ma'di", + mhk: "Mungaka", + mhr: "EasternMari", + mhu: "Digaro-Mishmi", + mhx: "Maru", + mhy: "Ma'anyan", + mib: "AtatláhucaMixtec", + mie: "OcotepecMixtec", + mif: "Mofu-Gudur", + mig: "SanMiguelElGrandeMixtec", + mih: "ChayucoMixtec", + mil: "PeñolesMixtec", + mim: "AlacatlatzalaMixtec", + min: "Minangkabau", + mio: "PinotepaNacionalMixtec", + mip: "Apasco-ApoalaMixtec", + miq: "Mískito", + mit: "SouthernPueblaMixtec", + miu: "CacaloxtepecMixtec", + miy: "AyutlaMixtec", + miz: "CoatzospanMixtec", + mjl: "Mandeali", + mjv: "Mannan", + mkd: "Macedonian", + mkf: "Miya", + mki: "Dhatki", + mkl: "Mokole", + mkn: "KupangMalay", + mlg: "Malagasy", + mlq: "WesternManinkakan", + mlt: "Maltese", + mmc: "MichoacánMazahua", + mmg: "NorthAmbrym", + mnb: "Muna", + mne: "Naba", + mnf: "Mundani", + mni: "Manipuri", + mnk: "Mandinka", + mnw: "Mon", + mnx: "Manikion", + moa: "Mwan", + mog: "Mongondow", + mon: "Mongolian", + mop: "MopánMaya", + mor: "Moro", + mos: "Mossi", + mox: "Molima", + moz: "Mukulu", + mpg: "Marba", + mpm: "YosondúaMixtec", + mpp: "Migabac", + mpx: "Misima-Panaeati", + mqb: "Mbuko", + mqf: "Momuna", + mqj: "Mamasa", + mqn: "Moronene", + mqy: "Manggarai", + mri: "Maori", + mrj: "WesternMari", + mrr: "Maria(India)", + mrt: "MarghiCentral", + mrw: "Maranao", + msh: "MasikoroMalagasy", + msi: "SabahMalay", + msw: "Mansoanka", + msy: "Aruamu", + mtd: "Mualang", + mtj: "Moskona", + mto: "TotontepecMixe", + mtr: "Mewari", + mtu: "TututepecMixtec", + mtx: "TidaáMixtec", + mua: "Mundang", + mug: "Musgu", + muh: "Mündü", + mui: "Musi", + mup: "Malvi", + mur: "Murle", + muv: "Muthuvan", + muy: "Muyang", + mve: "Marwari(Pakistan)", + mvp: "Duri", + mvy: "IndusKohistani", + mwq: "MünChin", + mwv: "Mentawai", + mxb: "TezoatlánMixtec", + mxq: "JuquilaMixe", + mxs: "HuitepecMixtec", + mxt: "JamiltepecMixtec", + mxu: "Mada(Cameroon)", + mxv: "MetlatónocMixtec", + mxy: "SoutheasternNochixtlánMixtec", + mya: "Burmese", + myb: "Mbay", + myk: "MamaraSenoufo", + myv: "Erzya", + myx: "Masaaba", + myy: "Macuna", + mza: "SantaMaríaZacatepecMixtec", + mzi: "IxcatlánMazatec", + mzj: "Manya", + mzk: "NigeriaMambila", + mzl: "MazatlánMixe", + mzm: "Mumuye", + mzw: "Deg", + nab: "SouthernNambikuára", + nag: "NagaPidgin", + nal: "Nalik", + nan: "Min Nan Chinese", + nap: "Neapolitan", + nas: "Naasioi", + naw: "Nawuri", + nbh: "Ngamo", + nca: "Iyo", + ncf: "Notsi", + nch: "CentralHuastecaNahuatl", + ncj: "NorthernPueblaNahuatl", + ncl: "MichoacánNahuatl", + nco: "Sibe", + ncu: "Chumburung", + ncx: "CentralPueblaNahuatl", + ndi: "SambaLeko", + ndj: "Ndamba", + ndo: "Ndonga", + ndp: "Ndo", + ndv: "Ndut", + ndy: "Lutos", + ndz: "Ndogo", + neb: "Toura(Côted'Ivoire)", + nep: "Nepali(macrolanguage)", + new: "Newari", + nfa: "Dhao", + nfr: "Nafaanra", + nga: "Ngbaka", + ngi: "Ngizim", + ngl: "Lomwe", + ngp: "Ngulu", + ngu: "GuerreroNahuatl", + nhe: "EasternHuastecaNahuatl", + nhg: "TetelcingoNahuatl", + nhi: "Zacatlán-Ahuacatlán-TepetzintlaNahuatl", + nhn: "CentralNahuatl", + nhq: "HuaxcalecaNahuatl", + nhu: "Noone", + nhw: "WesternHuastecaNahuatl", + nhx: "Isthmus-MecayapanNahuatl", + nhy: "NorthernOaxacaNahuatl", + nia: "Nias", + nij: "Ngaju", + nim: "Nilamba", + nin: "Ninzo", + nja: "Nzanyi", + nko: "Nkonya", + nla: "Ngombale", + nlc: "Nalca", + nld: "Dutch", + nlg: "Gela", + nlk: "NiniaYali", + nlv: "OrizabaNahuatl", + nmg: "Kwasio", + nmz: "Nawdm", + nnb: "Nande", + nnh: "Ngiemboon", + nnq: "Ngindo", + nnw: "SouthernNuni", + noa: "WounMeu", + nob: "NorwegianBokmål", + nod: "NorthernThai", + noe: "Nimadi", + nog: "Nogai", + not: "Nomatsiguenga", + npl: "SoutheasternPueblaNahuatl", + npy: "Napu", + nso: "Pedi", + nst: "TaseNaga", + nsu: "SierraNegraNahuatl", + ntm: "Nateni", + ntr: "Delo", + nuj: "Nyole", + nup: "Nupe-Nupe-Tako", + nus: "Nuer", + nuz: "TlamacazapaNahuatl", + nwb: "Nyabwa", + nxq: "Naxi", + nya: "Nyanja", + nyf: "Giryama", + nyn: "Nyankole", + nyo: "Nyoro", + nyu: "Nyungwe", + nyy: "Nyakyusa-Ngonde", + nzi: "Nzima", + obo: "OboManobo", + oci: "Occitan(post1500)", + odk: "Od", + odu: "Odual", + ogo: "Khana", + ojb: "NorthwesternOjibwa", + oku: "Oku", + old: "Mochi", + omw: "SouthTairora", + onb: "Lingao", + ood: "TohonoO'odham", + orc: "Orma", + orm: "Oromo", + oru: "Ormuri", + ory: "Odia", + oss: "Ossetian", + ote: "MezquitalOtomi", + otq: "QuerétaroOtomi", + ozm: "Koonzime", + pab: "Parecís", + pad: "Paumarí", + pag: "Pangasinan", + pam: "Pampanga", + pan: "Panjabi", + pao: "NorthernPaiute", + pap: "Papiamento", + pau: "Palauan", + pbb: "Páez", + pbc: "Patamona", + pbi: "Parkwa", + pbs: "CentralPame", + pbt: "SouthernPashto", + pbu: "NorthernPashto", + pce: "RuchingPalaung", + pcm: "NigerianPidgin", + pex: "Petats", + pez: "EasternPenan", + phl: "Phalura", + phr: "Pahari-Potwari", + pib: "Yine", + pil: "Yom", + pip: "Pero", + pir: "Piratapuyo", + pis: "Pijin", + piy: "Piya-Kwonci", + pjt: "Pitjantjatjara", + pkb: "Pokomo", + pko: "Pökoot", + plk: "KohistaniShina", + pls: "SanMarcosTlacoyalcoPopoloca", + plt: "PlateauMalagasy", + plw: "Brooke'sPointPalawano", + pmf: "Pamona", + pmq: "NorthernPame", + pms: "Piemontese", + pmy: "PapuanMalay", + pnb: "WesternPanjabi", + pne: "WesternPenan", + pny: "Pinyin", + poc: "Poqomam", + poe: "SanJuanAtzingoPopoloca", + poh: "Poqomchi'", + poi: "HighlandPopoluca", + pol: "Polish", + por: "Portuguese", + pov: "UpperGuineaCrioulo", + pow: "SanFelipeOtlaltepecPopoloca", + poy: "Pogolo", + ppk: "Uma", + pps: "SanLuísTemalacayucaPopoloca", + prf: "Paranan", + prk: "Parauk", + prq: "AshéninkaPerené", + prt: "Phai", + pse: "CentralMalay", + pss: "Kaulong", + pst: "CentralPashto", + ptu: "Bambam", + pua: "WesternHighlandPurepecha", + pui: "Puinave", + pus: "Pushto", + pwg: "Gapapaiwa", + pwn: "Paiwan", + pww: "PwoNorthernKaren", + pxm: "QuetzaltepecMixe", + qub: "HuallagaHuánucoQuechua", + quc: "K'iche'", + quf: "LambayequeQuechua", + qug: "ChimborazoHighlandQuichua", + quh: "SouthBolivianQuechua", + qul: "NorthBolivianQuechua", + qum: "Sipacapense", + qup: "SouthernPastazaQuechua", + qur: "YanahuancaPascoQuechua", + qus: "SantiagodelEsteroQuichua", + quv: "Sacapulteco", + quw: "TenaLowlandQuichua", + qux: "YauyosQuechua", + quy: "AyacuchoQuechua", + quz: "CuscoQuechua", + qva: "Ambo-PascoQuechua", + qvc: "CajamarcaQuechua", + qve: "EasternApurímacQuechua", + qvh: "Huamalíes-DosdeMayoHuánucoQuechua", + qvi: "ImbaburaHighlandQuichua", + qvj: "LojaHighlandQuichua", + qvl: "CajatamboNorthLimaQuechua", + qvm: "Margos-Yarowilca-LauricochaQuechua", + qvn: "NorthJunínQuechua", + qvo: "NapoLowlandQuechua", + qvs: "SanMartínQuechua", + qvw: "HuayllaWancaQuechua", + qvz: "NorthernPastazaQuichua", + qwa: "CorongoAncashQuechua", + qwh: "HuaylasAncashQuechua", + qws: "SihuasAncashQuechua", + qxa: "ChiquiánAncashQuechua", + qxh: "PanaoHuánucoQuechua", + qxl: "SalasacaHighlandQuichua", + qxn: "NorthernConchucosAncashQuechua", + qxo: "SouthernConchucosAncashQuechua", + qxp: "PunoQuechua", + qxr: "CañarHighlandQuichua", + qxt: "SantaAnadeTusiPascoQuechua", + qxu: "Arequipa-LaUniónQuechua", + qxw: "JaujaWancaQuechua", + rag: "Logooli", + rah: "Rabha", + rai: "Ramoaaina", + rap: "Rapanui", + rav: "Sampang", + raw: "Rawang", + rej: "Rejang", + rel: "Rendille", + rgu: "Ringgou", + rhg: "Rohingya", + rif: "Tarifit", + rim: "Nyaturu", + rjs: "Rajbanshi", + rkt: "Rangpuri", + rmc: "CarpathianRomani", + rmo: "SinteRomani", + rmy: "VlaxRomani", + rng: "Ronga", + rnl: "Ranglong", + rob: "Tae'", + rof: "Rombo", + roh: "Romansh", + rol: "Romblomanon", + ron: "Romanian", + roo: "Rotokas", + rop: "Kriol", + rro: "Waima", + rth: "Ratahan", + rub: "Gungu", + ruc: "Ruuli", + ruf: "Luguru", + rug: "Roviana", + run: "Rundi", + rus: "Russian", + rwm: "Amba(Uganda)", + rwr: "Marwari(India)", + sab: "Buglere", + sag: "Sango", + sah: "Yakut", + saj: "Sahu", + saq: "Samburu", + sas: "Sasak", + sau: "Saleman", + say: "Saya", + sba: "Ngambay", + sbd: "SouthernSamo", + sbl: "BotolanSambal", + sbn: "SindhiBhil", + sbp: "Sangu(Tanzania)", + sch: "Sakachep", + sck: "Sadri", + scl: "Shina", + scn: "Sicilian", + sco: "Scots", + sda: "Toraja-Sa'dan", + sdo: "Bukar-SadungBidayuh", + sea: "Semai", + seh: "Sena", + sei: "Seri", + ses: "KoyraboroSenniSonghai", + sey: "Secoya", + sgb: "Mag-antsiAyta", + sgj: "Surgujia", + sgw: "SebatBetGurage", + shi: "Tachelhit", + shk: "Shilluk", + shn: "Shan", + sho: "Shanga", + shp: "Shipibo-Conibo", + sid: "Sidamo", + sig: "Paasaal", + sil: "TumulungSisaala", + sin: "Sinhala", + sip: "Sikkimese", + siw: "Siwai", + sja: "Epena", + sjm: "Mapun", + sjp: "Surjapuri", + sjr: "Siar-Lak", + skg: "SakalavaMalagasy", + skr: "Saraiki", + sld: "Sissala", + slk: "Slovak", + slu: "Selaru", + slv: "Slovenian", + sml: "CentralSama", + smo: "Samoan", + sna: "Shona", + snc: "Sinaugoro", + snd: "Sindhi", + sne: "BauBidayuh", + snk: "Soninke", + snn: "Siona", + snp: "Siane", + snv: "Sa'ban", + snw: "Selee", + sol: "Solos", + som: "Somali", + soy: "Miyobe", + spa: "Spanish", + spp: "SupyireSenoufo", + sps: "Saposa", + spy: "Sabaot", + src: "LogudoreseSardinian", + srd: "Sardinian", + sri: "Siriano", + srm: "Saramaccan", + srn: "SrananTongo", + sro: "CampidaneseSardinian", + srp: "Serbian", + srr: "Serer", + srx: "Sirmauri", + ssi: "Sansi", + ste: "Liana-Seti", + stn: "Owa", + stp: "SoutheasternTepehuan", + sua: "Sulka", + suc: "WesternSubanon", + suk: "Sukuma", + sun: "Sundanese", + sur: "Mwaghavul", + sus: "Susu", + suv: "Puroik", + suz: "Sunwar", + sva: "Svan", + swe: "Swedish", + swh: "Swahili(individuallanguage)", + swv: "Shekhawati", + sxb: "Suba", + sxn: "Sangir", + sya: "Siang", + syl: "Sylheti", + sza: "Semelai", + szy: "Sakizaya", + tac: "LowlandTarahumara", + taj: "EasternTamang", + tam: "Tamil", + tan: "Tangale", + tao: "Yami", + tap: "Taabwa", + taq: "Tamasheq", + tar: "CentralTarahumara", + tat: "Tatar", + tav: "Tatuyo", + tay: "Atayal", + tbc: "Takia", + tbf: "Mandara", + tbg: "NorthTairora", + tbk: "CalamianTagbanwa", + tbl: "Tboli", + tby: "Tabaru", + tbz: "Ditammari", + tca: "Ticuna", + tcc: "Datooga", + tcf: "MalinaltepecMe'phaa", + tcy: "Tulu", + tcz: "ThadoChin", + tdj: "Tajio", + tdn: "Tondano", + tdx: "Tandroy-MahafalyMalagasy", + ted: "TepoKrumen", + tee: "HuehuetlaTepehua", + tel: "Telugu", + tem: "Timne", + teo: "Teso", + ter: "Tereno", + tew: "Tewa(USA)", + tex: "Tennet", + tfr: "Teribe", + tgc: "Tigak", + tgj: "Tagin", + tgk: "Tajik", + tgl: "Tagalog", + tgo: "Sudest", + tgp: "Tangoa", + tha: "Thai", + the: "ChitwaniaTharu", + thk: "Tharaka", + thl: "DangauraTharu", + thq: "KochilaTharu", + thr: "RanaTharu", + thv: "TahaggartTamahaq", + tig: "Tigre", + tih: "TimugonMurut", + tik: "Tikar", + tio: "Teop", + tir: "Tigrinya", + tkg: "TesakaMalagasy", + tkr: "Tsakhur", + tkt: "KathoriyaTharu", + tlb: "Tobelo", + tli: "Tlingit", + tlj: "Talinga-Bwisi", + tlp: "FilomenaMata-CoahuitlánTotonac", + tly: "Talysh", + tmc: "Tumak", + tmf: "Toba-Maskoy", + tna: "Tacana", + tng: "Tobanga", + tnk: "Kwamera", + tnn: "NorthTanna", + tnp: "Whitesands", + tnr: "Ménik", + tnt: "Tontemboan", + tob: "Toba", + toc: "CoyutlaTotonac", + toh: "Gitonga", + tok: "TokiPona", + tom: "Tombulu", + top: "PapantlaTotonac", + tos: "HighlandTotonac", + tpi: "TokPisin", + tpl: "TlacoapaMe'phaa", + tpm: "Tampulma", + tpp: "PisafloresTepehua", + tpt: "TlachichilcoTepehua", + tpz: "Tinputz", + tqp: "Tomoip", + trc: "CopalaTriqui", + tri: "Trió", + trn: "Trinitario", + trp: "KokBorok", + trq: "SanMartínItunyosoTriqui", + trs: "ChicahuaxtlaTriqui", + trv: "Sediq", + trw: "Torwali", + tsn: "Tswana", + tso: "Tsonga", + tsz: "Purepecha", + ttc: "Tektiteko", + tte: "Bwanabwana", + ttj: "Tooro", + ttq: "TawallammatTamajaq", + ttr: "Tera", + ttu: "Torau", + tue: "Tuyuca", + tuf: "CentralTunebo", + tui: "Tupuri", + tuk: "Turkmen", + tul: "Tula", + tuo: "Tucano", + tuq: "Tedaga", + tur: "Turkish", + tuv: "Turkana", + tuy: "Tugen", + tvo: "Tidore", + tvu: "Tunen", + tvw: "Sedoa", + twb: "WesternTawbuid", + twe: "Tewa(Indonesia)", + twu: "Termanu", + txa: "Tombonuo", + txq: "Tii", + txs: "Tonsea", + txu: "Kayapó", + txy: "TanosyMalagasy", + tye: "Kyanga", + tzh: "Tzeltal", + tzj: "Tz'utujil", + tzo: "Tzotzil", + ubl: "Buhi'nonBikol", + ubu: "Umbu-Ungu", + udl: "Wuzlam", + udm: "Udmurt", + udu: "Uduk", + uig: "Uighur", + uki: "Kui(India)", + ukr: "Ukrainian", + ukv: "Kuku", + umb: "Umbundu", + upv: "Uripiv-Wala-Rano-Atchin", + ura: "Urarina", + urb: "Urubú-Kaapor", + urd: "Urdu", + urh: "Urhobo", + urk: "UrakLawoi'", + urt: "Urat", + ury: "Orya", + ush: "Ushojo", + usp: "Uspanteco", + uzb: "Uzbek", + uzn: "NorthernUzbek", + vag: "Vagla", + vah: "Varhadi-Nagpuri", + vai: "Vai", + var: "Huarijio", + ver: "MomJango", + vid: "Vidunda", + vie: "Vietnamese", + vif: "Vili", + vmc: "JuxtlahuacaMixtec", + vmj: "IxtayutlaMixtec", + vmm: "MitlatongoMixtec", + vmp: "SoyaltepecMazatec", + vmw: "Makhuwa", + vmy: "AyautlaMazatec", + vmz: "MazatlánMazatec", + vro: "Võro", + vun: "Vunjo", + vut: "Vute", + wal: "Wolaytta", + wap: "Wapishana", + war: "Waray(Philippines)", + waw: "Waiwai", + way: "Wayana", + wba: "Warao", + wbl: "Wakhi", + wbr: "Wagdi", + wci: "WaciGbe", + weo: "Wemale", + wes: "CameroonPidgin", + wja: "Waja", + wji: "Warji", + wlo: "Wolio", + wlx: "Wali(Ghana)", + wmw: "Mwani", + wob: "WèNorthern", + wof: "GambianWolof", + wol: "Wolof", + wsg: "AdilabadGondi", + wwa: "Waama", + xal: "Kalmyk", + xdy: "MalayicDayak", + xed: "Hdi", + xer: "Xerénte", + xhe: "Khetrani", + xho: "Xhosa", + xka: "Kalkoti", + xkl: "MainstreamKenyah", + xmf: "Mingrelian", + xmm: "ManadoMalay", + xmv: "AntankaranaMalagasy", + xnj: "Ngoni(Tanzania)", + xnr: "Kangri", + xog: "Soga", + xon: "Konkomba", + xpe: "LiberiaKpelle", + xrb: "EasternKaraboro", + xsb: "Sambal", + xsm: "Kasem", + xsr: "Sherpa", + xsu: "Sanumá", + xta: "AlcozaucaMixtec", + xtd: "Diuxi-TilantongoMixtec", + xte: "Ketengban", + xti: "SinicahuaMixtec", + xtm: "MagdalenaPeñascoMixtec", + xtn: "NorthernTlaxiacoMixtec", + xtu: "CuyamecalcoMixtec", + xua: "AluKurumba", + xuo: "Kuo", + yaa: "Yaminahua", + yad: "Yagua", + yal: "Yalunka", + yam: "Yamba", + yao: "Yao", + yaq: "Yaqui", + yas: "Nugunu(Cameroon)", + yat: "Yambeta", + yav: "Yangben", + yay: "Agwagwune", + yaz: "Lokaa", + yba: "Yala", + ybb: "Yemba", + ycl: "Lolopo", + ycn: "Yucuna", + ydd: "EasternYiddish", + ydg: "Yidgha", + yea: "Ravula", + yer: "Tarok", + yes: "Nyankpa", + yka: "Yakan", + yli: "AnggurukYali", + yor: "Yoruba", + yre: "Yaouré", + yua: "Yucateco", + yue: "YueChinese", + yuz: "Yuracare", + yva: "Yawa", + zaa: "SierradeJuárezZapotec", + zab: "WesternTlacolulaValleyZapotec", + zac: "OcotlánZapotec", + zad: "CajonosZapotec", + zae: "YareniZapotec", + zai: "IsthmusZapotec", + zam: "MiahuatlánZapotec", + zao: "OzolotepecZapotec", + zaq: "AloápamZapotec", + zar: "RincónZapotec", + zas: "SantoDomingoAlbarradasZapotec", + zav: "YatzachiZapotec", + zaw: "MitlaZapotec", + zca: "CoatecasAltasZapotec", + zga: "Kinga", + zim: "Mesme", + ziw: "Zigula", + zmz: "Mbandja", + zne: "Zande(individuallanguage)", + zoc: "CopainaláZoque", + zoh: "ChimalapaZoque", + zor: "RayónZoque", + zos: "FranciscoLeónZoque", + zpc: "ChoapanZapotec", + zpg: "GueveaDeHumboldtZapotec", + zpi: "SantaMaríaQuiegolaniZapotec", + zpl: "LachixíoZapotec", + zpm: "MixtepecZapotec", + zpo: "AmatlánZapotec", + zpt: "SanVicenteCoatlánZapotec", + zpu: "YalálagZapotec", + zpv: "ChichicapanZapotec", + zpy: "MazaltepecZapotec", + zpz: "TexmelucanZapotec", + zsm: "StandardMalay", + ztg: "XanaguíaZapotec", + ztn: "SantaCatarinaAlbarradasZapotec", + ztp: "LoxichaZapotec", + ztq: "Quioquitani-QuieríZapotec", + zts: "TilquiapanZapotec", + ztu: "GüiláZapotec", + zty: "YateeZapotec", + zul: "Zulu", + zyb: "YongbeiZhuang", + zyp: "ZypheChin", + zza: "Zaza", +}; \ No newline at end of file diff --git a/sharedUtils/omniAsrSupportedLangs.ts b/sharedUtils/omniAsrSupportedLangs.ts new file mode 100644 index 000000000..bafb5995a --- /dev/null +++ b/sharedUtils/omniAsrSupportedLangs.ts @@ -0,0 +1,315 @@ +/** + * OmniASR supported-language snapshot + * ----------------------------------- + * + * Static snapshot of the language codes supported by the OmniASR transcription + * service (Meta Omnilingual ASR — `omniASR_LLM_1B_v2`). Each entry is in + * `{iso639_3}_{Script}` form, e.g. `eng_Latn`, `swh_Latn`, `urd_Arab`. + * + * We bundle this list so the extension can validate / resolve language codes + * offline, with no runtime network dependency. + * + * Regenerating + * ~~~~~~~~~~~~ + * If we change ASR providers or the underlying model, regenerate this file from + * the live `/languages` endpoint: + * + * curl -s "https://genesis-ai-dev--codex-asr-serve.modal.run/languages" \ + * | python3 -c " + * import json, sys + * d = json.load(sys.stdin) + * langs = sorted(set(d['languages'])) + * print('export const OMNI_ASR_SUPPORTED_LANGS: readonly string[] = [') + * for i in range(0, len(langs), 6): + * print(' ' + ', '.join(f'\"{c}\"' for c in langs[i:i+6]) + ',') + * print('];') + * " + * + * (Pre-rename, the host was `genesis-ai-dev--mms-zeroshot-asr-serve.modal.run`.) + * + * Snapshot taken: 2026-06-04. Server reported 1672 languages. + */ + +export const OMNI_ASR_SUPPORTED_LANGS: readonly string[] = [ + "aae_Latn", "aal_Latn", "abb_Latn", "abi_Latn", "abk_Cyrl", "abn_Latn", + "abp_Latn", "abr_Latn", "abs_Latn", "aca_Latn", "acd_Latn", "ace_Latn", + "acf_Latn", "ach_Latn", "acm_Arab", "acn_Latn", "acr_Latn", "acu_Latn", + "acw_Arab", "ade_Latn", "adh_Latn", "adj_Latn", "adx_Tibt", "ady_Cyrl", + "aeb_Arab", "aec_Arab", "aeu_Latn", "afb_Arab", "afo_Latn", "afr_Latn", + "agd_Latn", "agg_Latn", "agn_Latn", "agr_Latn", "agu_Latn", "agx_Cyrl", + "aha_Latn", "ahk_Latn", "ahl_Latn", "ahs_Latn", "aia_Latn", "ajg_Latn", + "aka_Latn", "akb_Latn", "ake_Latn", "akp_Latn", "ala_Latn", "alj_Latn", + "aln_Latn", "alo_Latn", "alp_Latn", "als_Latn", "alt_Cyrl", "alz_Latn", + "ame_Latn", "amf_Latn", "amh_Ethi", "ami_Latn", "amk_Latn", "amu_Latn", + "anc_Latn", "ank_Latn", "ann_Latn", "anp_Deva", "anw_Latn", "any_Latn", + "aom_Latn", "aoz_Latn", "apb_Latn", "apc_Arab", "apd_Arab", "apr_Latn", + "arb_Arab", "arg_Latn", "arl_Latn", "arq_Arab", "ars_Arab", "ary_Arab", + "arz_Arab", "asa_Latn", "asg_Latn", "asm_Beng", "ast_Latn", "ata_Latn", + "atb_Latn", "atg_Latn", "ati_Latn", "atq_Latn", "ava_Cyrl", "avn_Latn", + "avu_Latn", "awa_Deva", "awb_Latn", "awo_Latn", "ayl_Arab", "ayo_Latn", + "ayp_Arab", "ayr_Latn", "ayz_Latn", "aze_Arab", "aze_Cyrl", "aze_Latn", + "azg_Latn", "azz_Latn", "bag_Latn", "bak_Cyrl", "bam_Latn", "ban_Latn", + "bao_Latn", "bas_Latn", "bav_Latn", "bax_Latn", "bba_Latn", "bbb_Latn", + "bbc_Latn", "bbj_Latn", "bbl_Geor", "bbo_Latn", "bbu_Latn", "bcc_Arab", + "bcc_Latn", "bce_Latn", "bci_Latn", "bcl_Latn", "bcs_Latn", "bcw_Latn", + "bcy_Latn", "bcz_Latn", "bda_Latn", "bde_Latn", "bdg_Latn", "bdh_Latn", + "bdm_Latn", "bdq_Latn", "bdu_Latn", "beb_Latn", "beh_Latn", "bel_Cyrl", + "bem_Latn", "ben_Beng", "bep_Latn", "bew_Latn", "bex_Latn", "bfa_Latn", + "bfd_Latn", "bfo_Latn", "bft_Arab", "bfy_Deva", "bfz_Deva", "bgc_Deva", + "bgp_Arab", "bgq_Deva", "bgr_Latn", "bgt_Latn", "bgw_Deva", "bha_Deva", + "bhb_Deva", "bhh_Cyrl", "bho_Deva", "bhp_Latn", "bht_Deva", "bhz_Latn", + "bib_Latn", "bim_Latn", "bis_Latn", "biv_Latn", "bjj_Deva", "bjk_Latn", + "bjn_Latn", "bjr_Latn", "bjt_Latn", "bjv_Latn", "bjw_Latn", "bjz_Latn", + "bkd_Latn", "bkh_Latn", "bkm_Latn", "bkv_Latn", "bky_Latn", "ble_Latn", + "blh_Latn", "blt_Latn", "blx_Latn", "blz_Latn", "bmm_Latn", "bmq_Latn", + "bmr_Latn", "bmu_Latn", "bmv_Latn", "bng_Beng", "bnm_Latn", "bnn_Latn", + "bno_Latn", "bnp_Latn", "bns_Deva", "boa_Latn", "bod_Tibt", "boj_Latn", + "bom_Latn", "bor_Latn", "bos_Latn", "bou_Latn", "bov_Latn", "box_Latn", + "bpr_Latn", "bps_Latn", "bqc_Latn", "bqg_Latn", "bqi_Arab", "bqj_Latn", + "bqp_Latn", "bra_Deva", "bre_Latn", "brh_Arab", "bri_Latn", "bru_Latn", + "brx_Deva", "bsc_Latn", "bsh_Arab", "bsj_Latn", "bsk_Latn", "bsq_Latn", + "bss_Latn", "bsy_Latn", "btd_Latn", "btm_Latn", "bts_Latn", "btt_Latn", + "btv_Arab", "btx_Latn", "bud_Latn", "bug_Latn", "bul_Cyrl", "bum_Latn", + "buo_Latn", "bus_Latn", "bux_Latn", "bvb_Latn", "bvc_Latn", "bvz_Latn", + "bwq_Latn", "bwr_Latn", "bwu_Latn", "bxf_Latn", "bxk_Latn", "byc_Latn", + "byr_Latn", "bys_Latn", "byv_Latn", "byx_Latn", "bzh_Latn", "bzi_Thai", + "bzj_Latn", "bzw_Latn", "caa_Latn", "cab_Latn", "cac_Latn", "cak_Latn", + "cap_Latn", "car_Latn", "cas_Latn", "cat_Latn", "cax_Latn", "cbc_Latn", + "cbi_Latn", "cbr_Latn", "cbs_Latn", "cbt_Latn", "cbu_Latn", "cbv_Latn", + "cce_Latn", "ccg_Latn", "cco_Latn", "cdj_Deva", "cdo_Hans", "ceb_Latn", + "ceg_Latn", "cek_Latn", "cen_Latn", "ces_Latn", "cfa_Latn", "cfm_Latn", + "cgc_Latn", "cgg_Latn", "che_Cyrl", "chf_Latn", "chq_Latn", "chv_Cyrl", + "chz_Latn", "cjk_Latn", "cjo_Latn", "cjp_Latn", "cjs_Cyrl", "ckb_Arab", + "ckl_Latn", "cko_Latn", "ckr_Latn", "ckt_Cyrl", "cky_Latn", "cla_Latn", + "cle_Latn", "cly_Latn", "cme_Latn", "cmn_Hans", "cmn_Hant", "cmo_Khmr", + "cmo_Latn", "cmr_Latn", "cnh_Latn", "cni_Latn", "cnl_Latn", "cnt_Latn", + "coe_Latn", "cof_Latn", "cok_Latn", "con_Latn", "cor_Latn", "cot_Latn", + "cou_Latn", "cpa_Latn", "cpb_Latn", "cpu_Latn", "cpx_Hans", "cpy_Latn", + "crh_Cyrl", "crk_Cans", "crk_Latn", "crn_Latn", "crq_Latn", "crs_Latn", + "crt_Latn", "csk_Latn", "cso_Latn", "ctd_Latn", "cte_Latn", "ctg_Beng", + "ctl_Latn", "cto_Latn", "ctu_Latn", "cuc_Latn", "cui_Latn", "cuk_Latn", + "cul_Latn", "cut_Latn", "cux_Latn", "cwa_Latn", "cwe_Latn", "cwt_Latn", + "cya_Latn", "cym_Latn", "daa_Latn", "dag_Latn", "dah_Latn", "dan_Latn", + "dar_Cyrl", "dav_Latn", "dbd_Latn", "dbj_Latn", "dbq_Latn", "dcc_Arab", + "ddn_Latn", "ded_Latn", "deg_Latn", "des_Latn", "deu_Latn", "dga_Latn", + "dgh_Latn", "dgi_Latn", "dgk_Latn", "dgo_Deva", "dgr_Latn", "dhi_Deva", + "did_Latn", "dig_Latn", "dik_Latn", "dip_Latn", "div_Thaa", "dje_Latn", + "djk_Latn", "dmk_Arab", "dml_Arab", "dnj_Latn", "dnt_Latn", "dnw_Latn", + "dop_Latn", "dos_Latn", "dru_Latn", "dsb_Latn", "dsh_Latn", "dtp_Latn", + "dts_Latn", "dty_Deva", "dua_Latn", "dug_Latn", "dwr_Latn", "dyi_Latn", + "dyo_Latn", "dyu_Latn", "dzg_Latn", "dzo_Tibt", "ebu_Latn", "ego_Latn", + "eip_Latn", "eiv_Latn", "eka_Latn", "ekk_Latn", "eko_Latn", "ekr_Latn", + "ell_Grek", "ell_Grek_cypr1249", "elm_Latn", "emp_Latn", "enb_Latn", "eng_Latn", + "enx_Latn", "epo_Latn", "ese_Latn", "ess_Latn", "esu_Latn", "eto_Latn", + "ets_Latn", "etu_Latn", "eus_Latn", "evn_Cyrl", "ewe_Latn", "ewo_Latn", + "eyo_Latn", "eza_Latn", "fal_Latn", "fan_Latn", "fao_Latn", "far_Latn", + "fas_Arab", "fat_Latn", "fia_Latn", "fij_Latn", "fil_Latn", "fin_Latn", + "fip_Latn", "fkk_Latn", "flr_Latn", "fmp_Latn", "fmu_Deva", "fon_Latn", + "fra_Latn", "frd_Latn", "fry_Latn", "fub_Latn", "fuc_Latn", "fue_Latn", + "ful_Latn", "fuq_Latn", "fuv_Latn", "gag_Cyrl", "gag_Latn", "gai_Latn", + "gam_Latn", "gau_Telu", "gbi_Latn", "gbk_Deva", "gbm_Deva", "gbo_Latn", + "gbr_Latn", "gby_Latn", "gcc_Latn", "gde_Latn", "gdf_Latn", "geb_Latn", + "gej_Latn", "ges_Latn", "ggg_Arab", "gid_Latn", "gig_Arab", "gil_Latn", + "giz_Latn", "gjk_Arab", "gjn_Latn", "gju_Arab", "gkn_Latn", "gld_Cyrl", + "gle_Latn", "glg_Latn", "glk_Arab", "glv_Latn", "glw_Latn", "gmv_Latn", + "gna_Latn", "gnd_Latn", "gng_Latn", "gof_Latn", "gog_Latn", "gol_Latn", + "gom_Deva", "gor_Latn", "gqr_Latn", "grc_Grek", "gri_Latn", "grn_Latn", + "grt_Beng", "gsl_Latn", "gso_Latn", "gub_Latn", "guc_Latn", "gud_Latn", + "gug_Latn", "guh_Latn", "gui_Latn", "guj_Gujr", "guk_Ethi", "gum_Latn", + "guo_Latn", "guq_Latn", "gur_Latn", "guu_Latn", "gux_Latn", "guz_Latn", + "gvc_Latn", "gvl_Latn", "gwc_Arab", "gwe_Latn", "gwi_Latn", "gwr_Latn", + "gwt_Arab", "gym_Latn", "gyr_Latn", "gyz_Latn", "had_Latn", "hag_Latn", + "hah_Latn", "hak_Latn", "hao_Latn", "hap_Latn", "hat_Latn", "hau_Latn", + "haw_Latn", "hay_Latn", "hbb_Latn", "hch_Latn", "heb_Hebr", "heh_Latn", + "her_Latn", "hia_Latn", "hif_Latn", "hig_Latn", "hil_Latn", "hin_Deva", + "hkk_Latn", "hla_Latn", "hlb_Deva", "hlt_Latn", "hne_Deva", "hnn_Latn", + "hno_Arab", "hns_Latn", "hoc_Orya", "hrv_Latn", "hsb_Latn", "hto_Latn", + "hub_Latn", "hue_Latn", "hui_Latn", "hul_Latn", "hun_Latn", "hus_Latn", + "huu_Latn", "huv_Latn", "hux_Latn", "hvn_Latn", "hwc_Latn", "hwo_Latn", + "hye_Armn", "hyw_Armn", "iba_Latn", "ibb_Latn", "ibo_Latn", "icr_Latn", + "ida_Latn", "idd_Latn", "idu_Latn", "ifa_Latn", "ifb_Latn", "ife_Latn", + "ifk_Latn", "ifu_Latn", "ify_Latn", "igl_Latn", "ign_Latn", "ijc_Latn", + "ijn_Latn", "ikk_Latn", "ikw_Latn", "ilb_Latn", "ilo_Latn", "imo_Latn", + "ina_Latn", "inb_Latn", "ind_Latn", "iou_Latn", "ipi_Latn", "ipk_Latn", + "iqw_Latn", "iri_Latn", "irk_Latn", "ish_Latn", "isl_Latn", "iso_Latn", + "ita_Latn", "itl_Cyrl", "its_Latn", "itv_Latn", "itw_Latn", "itz_Latn", + "ixl_Latn", "izr_Latn", "izz_Latn", "jac_Latn", "jal_Latn", "jam_Latn", + "jav_Latn", "jax_Latn", "jbu_Latn", "jen_Latn", "jic_Latn", "jiv_Latn", + "jmc_Latn", "jmd_Latn", "jmx_Latn", "jpn_Jpan", "jqr_Latn", "juk_Latn", + "jun_Orya", "juo_Latn", "jvn_Latn", "kaa_Cyrl", "kab_Latn", "kac_Latn", + "kai_Latn", "kaj_Latn", "kak_Latn", "kam_Latn", "kan_Knda", "kao_Latn", + "kaq_Latn", "kas_Arab", "kat_Geor", "kay_Latn", "kaz_Cyrl", "kbd_Cyrl", + "kbl_Latn", "kbo_Latn", "kbp_Latn", "kbq_Latn", "kbr_Latn", "kbt_Latn", + "kby_Latn", "kca_Cyrl", "kcg_Latn", "kcn_Latn", "kcq_Latn", "kdc_Latn", + "kde_Latn", "kdh_Latn", "kdi_Latn", "kdj_Latn", "kdl_Latn", "kdn_Latn", + "kdt_Khmr", "kea_Latn", "kek_Latn", "ken_Latn", "keo_Latn", "ker_Latn", + "keu_Latn", "key_Telu", "kez_Latn", "kfb_Deva", "kff_Telu", "kfk_Deva", + "kfq_Deva", "kfr_Gujr", "kfw_Latn", "kfx_Deva", "kha_Latn", "khg_Tibt", + "khk_Cyrl", "khm_Khmr", "khq_Latn", "khw_Arab", "kia_Latn", "kij_Latn", + "kik_Latn", "kin_Latn", "kir_Cyrl", "kix_Latn", "kjb_Latn", "kjc_Latn", + "kje_Latn", "kjg_Latn", "kjh_Cyrl", "kjk_Latn", "kki_Latn", "kkj_Latn", + "kle_Deva", "kln_Latn", "kls_Latn", "klu_Latn", "klv_Latn", "klw_Latn", + "kma_Latn", "kmd_Latn", "kml_Latn", "kmr_Arab", "kmr_Cyrl", "kmr_Latn", + "kmu_Latn", "kmy_Latn", "kna_Latn", "knb_Latn", "knc_Latn", "kne_Latn", + "knf_Latn", "knj_Latn", "knk_Latn", "knn_Deva", "kno_Latn", "kog_Latn", + "kol_Latn", "koo_Latn", "kor_Hang", "kpo_Latn", "kpq_Latn", "kps_Latn", + "kpv_Cyrl", "kpy_Cyrl", "kpz_Latn", "kqe_Latn", "kqo_Latn", "kqp_Latn", + "kqr_Latn", "kqy_Ethi", "krc_Cyrl", "kri_Latn", "krj_Latn", "krl_Latn", + "krr_Khmr", "krs_Latn", "kru_Deva", "krx_Latn", "ksb_Latn", "ksd_Latn", + "ksf_Latn", "ksr_Latn", "kss_Latn", "ksz_Deva", "ktb_Ethi", "ktj_Latn", + "kto_Latn", "kua_Latn", "kub_Latn", "kue_Latn", "kuh_Latn", "kum_Cyrl", + "kur_Arab", "kus_Latn", "kvn_Latn", "kvw_Latn", "kvx_Arab", "kwd_Latn", + "kwf_Latn", "kwi_Latn", "kwm_Latn", "kxc_Ethi", "kxf_Latn", "kxm_Thai", + "kxp_Arab", "kyb_Latn", "kyc_Latn", "kyf_Latn", "kyg_Latn", "kyo_Latn", + "kyq_Latn", "kyu_Kali", "kyx_Latn", "kyz_Latn", "kzf_Latn", "kzi_Latn", + "lac_Latn", "lag_Latn", "laj_Latn", "lam_Latn", "lao_Laoo", "las_Latn", + "lat_Latn", "lav_Latn", "law_Latn", "lbj_Tibt", "lbw_Latn", "lcm_Latn", + "lcp_Thai", "ldb_Latn", "led_Latn", "lee_Latn", "lef_Latn", "lem_Latn", + "lew_Latn", "lex_Latn", "lgg_Latn", "lgl_Latn", "lhu_Latn", "lia_Latn", + "lid_Latn", "lif_Deva", "lij_Latn", "lin_Latn", "lip_Latn", "lir_Latn", + "lis_Lisu", "lit_Latn", "lje_Latn", "ljp_Latn", "lkb_Latn", "lke_Latn", + "lla_Latn", "lld_Latn_gherd", "lld_Latn_valbadia", "llg_Latn", "lln_Latn", "lme_Latn", + "lnd_Latn", "lns_Latn", "lnu_Latn", "loa_Latn", "lob_Latn", "lok_Latn", + "lom_Latn", "lon_Latn", "loq_Latn", "lrk_Arab", "lsi_Latn", "lsm_Latn", + "lss_Arab", "ltg_Latn", "lth_Latn", "lto_Latn", "ltz_Latn", "lua_Latn", + "luc_Latn", "lug_Latn", "luo_Latn", "lus_Latn", "lwg_Latn", "lwo_Latn", + "lww_Latn", "lzz_Latn", "maa_Latn", "mab_Latn", "mad_Latn", "maf_Latn", + "mag_Deva", "mah_Latn", "mai_Deva", "maj_Latn", "mak_Latn", "mal_Mlym", + "mam_Latn", "maq_Latn", "mar_Deva", "mau_Latn", "maw_Latn", "max_Latn", + "maz_Latn", "mbb_Latn", "mbc_Latn", "mbh_Latn", "mbj_Latn", "mbt_Latn", + "mbu_Latn", "mca_Latn", "mcb_Latn", "mcd_Latn", "mcf_Latn", "mco_Latn", + "mcp_Latn", "mcq_Latn", "mcu_Latn", "mcx_Latn", "mda_Latn", "mdd_Latn", + "mdv_Latn", "mdy_Ethi", "med_Latn", "mee_Latn", "meh_Latn", "mej_Latn", + "mek_Latn", "mel_Latn", "men_Latn", "meq_Latn", "mer_Latn", "met_Latn", + "meu_Latn", "mev_Latn", "mfe_Latn", "mfh_Latn", "mfi_Latn", "mfk_Latn", + "mfm_Latn", "mfn_Latn", "mfo_Latn", "mfq_Latn", "mfv_Latn", "mfy_Latn", + "mfz_Latn", "mgd_Latn", "mge_Latn", "mgg_Latn", "mgh_Latn", "mgi_Latn", + "mgo_Latn", "mhi_Latn", "mhk_Latn", "mhr_Cyrl", "mhu_Latn", "mhx_Latn", + "mhy_Latn", "mib_Latn", "mie_Latn", "mif_Latn", "mig_Latn", "mih_Latn", + "mil_Latn", "mim_Latn", "min_Latn", "mio_Latn", "mip_Latn", "miq_Latn", + "mit_Latn", "miu_Latn", "miy_Latn", "miz_Latn", "mjl_Deva", "mjv_Mlym", + "mkd_Cyrl", "mkf_Latn", "mki_Arab", "mkl_Latn", "mkn_Latn", "mlg_Latn", + "mlq_Latn", "mlt_Latn", "mmc_Latn", "mmg_Latn", "mnb_Latn", "mne_Latn", + "mnf_Latn", "mni_Beng", "mnk_Latn", "mnw_Mymr", "mnx_Latn", "moa_Latn", + "mog_Latn", "mon_Cyrl", "mop_Latn", "mor_Latn", "mos_Latn", "mox_Latn", + "moz_Latn", "mpg_Latn", "mpm_Latn", "mpp_Latn", "mpx_Latn", "mqb_Latn", + "mqf_Latn", "mqj_Latn", "mqn_Latn", "mqy_Latn", "mri_Latn", "mrj_Cyrl", + "mrr_Deva", "mrt_Latn", "mrw_Latn", "msh_Latn", "msi_Latn", "msw_Latn", + "msy_Latn", "mtd_Latn", "mtj_Latn", "mto_Latn", "mtr_Deva", "mtu_Latn", + "mtx_Latn", "mua_Latn", "mug_Latn", "muh_Latn", "mui_Latn", "mup_Deva", + "mur_Latn", "muv_Mlym", "muy_Latn", "mve_Arab", "mvp_Latn", "mvy_Arab", + "mwq_Latn", "mwv_Latn", "mxb_Latn", "mxq_Latn", "mxs_Latn", "mxt_Latn", + "mxu_Latn", "mxv_Latn", "mxy_Latn", "mya_Mymr", "myb_Latn", "myk_Latn", + "myv_Cyrl", "myx_Latn", "myy_Latn", "mza_Latn", "mzi_Latn", "mzj_Latn", + "mzk_Latn", "mzl_Latn", "mzm_Latn", "mzw_Latn", "nab_Latn", "nag_Latn", + "nal_Latn", "nan_Latn", "nap_Latn", "nas_Latn", "naw_Latn", "nbh_Latn", + "nca_Latn", "ncf_Latn", "nch_Latn", "ncj_Latn", "ncl_Latn", "nco_Latn", + "ncu_Latn", "ncx_Latn", "ndi_Latn", "ndj_Latn", "ndo_Latn", "ndp_Latn", + "ndv_Latn", "ndy_Latn", "ndz_Latn", "neb_Latn", "nep_Deva", "new_Deva", + "nfa_Latn", "nfr_Latn", "nga_Latn", "ngi_Latn", "ngl_Latn", "ngp_Latn", + "ngu_Latn", "nhe_Latn", "nhg_Latn", "nhi_Latn", "nhn_Latn", "nhq_Latn", + "nhu_Latn", "nhw_Latn", "nhx_Latn", "nhy_Latn", "nia_Latn", "nij_Latn", + "nim_Latn", "nin_Latn", "nja_Latn", "nko_Latn", "nla_Latn", "nlc_Latn", + "nld_Latn", "nlg_Latn", "nlk_Latn", "nlv_Latn", "nmg_Latn", "nmz_Latn", + "nnb_Latn", "nnh_Latn", "nnq_Latn", "nnw_Latn", "noa_Latn", "nob_Latn", + "nod_Thai", "noe_Deva", "nog_Cyrl", "not_Latn", "npl_Latn", "npy_Latn", + "nso_Latn", "nst_Latn", "nsu_Latn", "ntm_Latn", "ntr_Latn", "nuj_Latn", + "nup_Latn", "nus_Latn", "nuz_Latn", "nwb_Latn", "nxq_Latn", "nya_Latn", + "nyf_Latn", "nyn_Latn", "nyo_Latn", "nyu_Latn", "nyy_Latn", "nzi_Latn", + "obo_Latn", "oci_Latn", "odk_Arab", "odu_Latn", "ogo_Latn", "ojb_Cans", + "ojb_Latn", "oku_Latn", "old_Latn", "omw_Latn", "onb_Latn", "ood_Latn", + "orc_Latn", "orm_Latn", "oru_Arab", "ory_Orya", "oss_Cyrl", "ote_Latn", + "otq_Latn", "ozm_Latn", "pab_Latn", "pad_Latn", "pag_Latn", "pam_Latn", + "pan_Guru", "pao_Latn", "pap_Latn", "pau_Latn", "pbb_Latn", "pbc_Latn", + "pbi_Latn", "pbs_Latn", "pbt_Arab", "pbu_Arab", "pce_Thai", "pcm_Latn", + "pex_Latn", "pez_Latn", "phl_Arab", "phr_Arab", "pib_Latn", "pil_Latn", + "pip_Latn", "pir_Latn", "pis_Latn", "piy_Latn", "pjt_Latn", "pkb_Latn", + "pko_Latn", "plk_Arab", "pls_Latn", "plt_Latn", "plw_Latn", "pmf_Latn", + "pmq_Latn", "pms_Latn", "pmy_Latn", "pnb_Arab", "pne_Latn", "pny_Latn", + "poc_Latn", "poe_Latn", "poh_Latn", "poi_Latn", "pol_Latn", "por_Latn", + "pov_Latn", "pow_Latn", "poy_Latn", "ppk_Latn", "pps_Latn", "prf_Latn", + "prk_Latn", "prq_Latn", "prt_Thai", "pse_Latn", "pss_Latn", "pst_Arab", + "ptu_Latn", "pua_Latn", "pui_Latn", "pus_Arab", "pwg_Latn", "pwn_Latn", + "pww_Thai", "pxm_Latn", "qub_Latn", "quc_Latn", "quf_Latn", "qug_Latn", + "quh_Latn", "qul_Latn", "qum_Latn", "qup_Latn", "qur_Latn", "qus_Latn", + "quv_Latn", "quw_Latn", "qux_Latn", "quy_Latn", "quz_Latn", "qva_Latn", + "qvc_Latn", "qve_Latn", "qvh_Latn", "qvi_Latn", "qvj_Latn", "qvl_Latn", + "qvm_Latn", "qvn_Latn", "qvo_Latn", "qvs_Latn", "qvw_Latn", "qvz_Latn", + "qwa_Latn", "qwh_Latn", "qws_Latn", "qxa_Latn", "qxh_Latn", "qxl_Latn", + "qxn_Latn", "qxo_Latn", "qxp_Latn", "qxr_Latn", "qxt_Latn", "qxu_Latn", + "qxw_Latn", "rag_Latn", "rah_Beng", "rai_Latn", "rap_Latn", "rav_Deva", + "raw_Latn", "rej_Latn", "rel_Latn", "rgu_Latn", "rhg_Latn", "rif_Arab", + "rif_Latn", "rim_Latn", "rjs_Deva", "rkt_Beng", "rmc_Cyrl", "rmc_Latn", + "rmo_Latn", "rmy_Cyrl", "rmy_Latn", "rng_Latn", "rnl_Latn", "rob_Latn", + "rof_Latn", "roh_Latn_surs1244", "rol_Latn", "ron_Latn", "roo_Latn", "rop_Latn", + "rro_Latn", "rth_Latn", "rub_Latn", "ruc_Latn", "ruf_Latn", "rug_Latn", + "run_Latn", "rus_Cyrl", "rwm_Latn", "rwr_Deva", "sab_Latn", "sag_Latn", + "sah_Cyrl", "saj_Latn", "saq_Latn", "sas_Latn", "sau_Latn", "say_Latn", + "sba_Latn", "sbd_Latn", "sbl_Latn", "sbn_Arab", "sbp_Latn", "sch_Latn", + "sck_Deva", "scl_Arab", "scn_Latn", "sco_Latn", "sda_Latn", "sdo_Latn", + "sea_Latn", "seh_Latn", "sei_Latn", "ses_Latn", "sey_Latn", "sgb_Latn", + "sgj_Deva", "sgw_Ethi", "shi_Latn", "shk_Latn", "shn_Mymr", "sho_Latn", + "shp_Latn", "sid_Latn", "sig_Latn", "sil_Latn", "sin_Sinh", "sip_Tibt", + "siw_Latn", "sja_Latn", "sjm_Latn", "sjp_Deva", "sjr_Latn", "skg_Latn", + "skr_Arab", "sld_Latn", "slk_Latn", "slu_Latn", "slv_Latn", "sml_Latn", + "smo_Latn", "sna_Latn", "snc_Latn", "snd_Arab", "sne_Latn", "snk_Latn", + "snn_Latn", "snp_Latn", "snv_Latn", "snw_Latn", "sol_Latn", "som_Latn", + "soy_Latn", "spa_Latn", "spp_Latn", "sps_Latn", "spy_Latn", "src_Latn", + "srd_Latn", "sri_Latn", "srm_Latn", "srn_Latn", "sro_Latn", "srp_Cyrl", + "srr_Latn", "srx_Deva", "ssi_Arab", "ste_Latn", "stn_Latn", "stp_Latn", + "sua_Latn", "suc_Latn", "suk_Latn", "sun_Latn", "sur_Latn", "sus_Latn", + "suv_Latn", "suz_Deva", "sva_Geor", "swe_Latn", "swh_Latn", "swv_Deva", + "sxb_Latn", "sxn_Latn", "sya_Latn", "syl_Latn", "sza_Latn", "szy_Latn", + "tac_Latn", "taj_Deva", "tam_Taml", "tan_Latn", "tao_Latn", "tap_Latn", + "taq_Latn", "tar_Latn", "tat_Cyrl", "tav_Latn", "tay_Latn", "tbc_Latn", + "tbf_Latn", "tbg_Latn", "tbk_Latn", "tbl_Latn", "tby_Latn", "tbz_Latn", + "tca_Latn", "tcc_Latn", "tcf_Latn", "tcy_Mlym", "tcz_Latn", "tdj_Latn", + "tdn_Latn", "tdx_Latn", "ted_Latn", "tee_Latn", "tel_Telu", "tem_Latn", + "teo_Latn", "ter_Latn", "tew_Latn", "tex_Latn", "tfr_Latn", "tgc_Latn", + "tgj_Latn", "tgk_Cyrl", "tgl_Latn", "tgo_Latn", "tgp_Latn", "tha_Thai", + "the_Deva", "thk_Latn", "thl_Deva", "thq_Deva", "thr_Deva", "thv_Tfng", + "tig_Ethi", "tih_Latn", "tik_Latn", "tio_Latn", "tir_Ethi", "tkg_Latn", + "tkr_Latn", "tkt_Deva", "tlb_Latn", "tli_Latn", "tlj_Latn", "tlp_Latn", + "tly_Latn", "tmc_Latn", "tmf_Latn", "tna_Latn", "tng_Latn", "tnk_Latn", + "tnn_Latn", "tnp_Latn", "tnr_Latn", "tnt_Latn", "tob_Latn", "toc_Latn", + "toh_Latn", "tok_Latn", "tom_Latn", "top_Latn", "tos_Latn", "tpi_Latn", + "tpl_Latn", "tpm_Latn", "tpp_Latn", "tpt_Latn", "tpz_Latn", "tqp_Latn", + "trc_Latn", "tri_Latn", "trn_Latn", "trp_Latn", "trq_Latn", "trs_Latn", + "trv_Latn", "trw_Arab", "tsn_Latn", "tso_Latn", "tsz_Latn", "ttc_Latn", + "tte_Latn", "ttj_Latn", "ttq_Tfng", "ttr_Latn", "ttu_Latn", "tue_Latn", + "tuf_Latn", "tui_Latn", "tuk_Arab", "tuk_Latn", "tul_Latn", "tuo_Latn", + "tuq_Latn", "tur_Latn", "tuv_Latn", "tuy_Latn", "tvo_Latn", "tvu_Latn", + "tvw_Latn", "twb_Latn", "twe_Latn", "twu_Latn", "txa_Latn", "txq_Latn", + "txs_Latn", "txu_Latn", "txy_Latn", "tye_Latn", "tzh_Latn", "tzj_Latn", + "tzo_Latn", "ubl_Latn", "ubu_Latn", "udl_Latn", "udm_Cyrl", "udu_Latn", + "uig_Arab", "uig_Cyrl", "uki_Orya", "ukr_Cyrl", "ukv_Latn", "umb_Latn", + "upv_Latn", "ura_Latn", "urb_Latn", "urd_Arab", "urd_Deva", "urd_Latn", + "urh_Latn", "urk_Thai", "urt_Latn", "ury_Latn", "ush_Arab", "usp_Latn", + "uzb_Cyrl", "uzb_Latn", "uzn_Latn", "vag_Latn", "vah_Deva", "vai_Latn", + "var_Latn", "ver_Latn", "vid_Latn", "vie_Latn", "vif_Latn", "vmc_Latn", + "vmj_Latn", "vmm_Latn", "vmp_Latn", "vmw_Latn", "vmy_Latn", "vmz_Latn", + "vro_Latn", "vun_Latn", "vut_Latn", "wal_Ethi", "wal_Latn", "wap_Latn", + "war_Latn", "waw_Latn", "way_Latn", "wba_Latn", "wbl_Latn", "wbr_Deva", + "wci_Latn", "weo_Latn", "wes_Latn", "wja_Latn", "wji_Latn", "wlo_Latn", + "wlx_Latn", "wmw_Latn", "wob_Latn", "wof_Latn", "wol_Latn", "wsg_Telu", + "wwa_Latn", "xal_Cyrl", "xdy_Latn", "xed_Latn", "xer_Latn", "xhe_Arab", + "xho_Latn", "xka_Arab", "xkl_Latn", "xmf_Geor", "xmm_Latn", "xmv_Latn", + "xnj_Latn", "xnr_Deva", "xog_Latn", "xon_Latn", "xpe_Latn", "xrb_Latn", + "xsb_Latn", "xsm_Latn", "xsr_Deva", "xsu_Latn", "xta_Latn", "xtd_Latn", + "xte_Latn", "xti_Latn", "xtm_Latn", "xtn_Latn", "xtu_Latn", "xua_Taml", + "xuo_Latn", "yaa_Latn", "yad_Latn", "yal_Latn", "yam_Latn", "yao_Latn", + "yaq_Latn", "yas_Latn", "yat_Latn", "yav_Latn", "yay_Latn", "yaz_Latn", + "yba_Latn", "ybb_Latn", "ycl_Latn", "ycn_Latn", "ydd_Hebr", "ydg_Arab", + "yea_Mlym", "yer_Latn", "yes_Latn", "yka_Latn", "yli_Latn", "yor_Latn", + "yre_Latn", "yua_Latn", "yue_Hans", "yue_Hant", "yuz_Latn", "yva_Latn", + "zaa_Latn", "zab_Latn", "zac_Latn", "zad_Latn", "zae_Latn", "zai_Latn", + "zam_Latn", "zao_Latn", "zaq_Latn", "zar_Latn", "zas_Latn", "zav_Latn", + "zaw_Latn", "zca_Latn", "zga_Latn", "zim_Latn", "ziw_Latn", "zmz_Latn", + "zne_Latn", "zoc_Latn", "zoh_Latn", "zor_Latn", "zos_Latn", "zpc_Latn", + "zpg_Latn", "zpi_Latn", "zpl_Latn", "zpm_Latn", "zpo_Latn", "zpt_Latn", + "zpu_Latn", "zpv_Latn", "zpy_Latn", "zpz_Latn", "zsm_Latn", "ztg_Latn", + "ztn_Latn", "ztp_Latn", "ztq_Latn", "zts_Latn", "ztu_Latn", "zty_Latn", + "zul_Latn", "zyb_Latn", "zyp_Latn", "zza_Latn", +]; + +export const OMNI_ASR_SUPPORTED_LANG_SET: ReadonlySet = new Set(OMNI_ASR_SUPPORTED_LANGS); From ed5cc7a3862b7b5e93e8911c94e0ba30c3cd13a6 Mon Sep 17 00:00:00 2001 From: Luke-Bilhorn Date: Thu, 4 Jun 2026 15:21:51 -0500 Subject: [PATCH 02/12] Remove dead phonetic/IPA ASR plumbing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OmniASR doesn't support IPA output (only its now-deprecated MMS predecessor's ESPeak companion did), so the phonetic flag was never doing anything. Removes: - `codex-editor-extension.asrPhonetic` workspace setting - `phonetic` field from the asrConfig payload and the inline types in TextCellEditor + CodexCellEditor's batch transcription path - `phonetic` read/write in the Copilot + MainMenu settings panels (settings panel UI itself is unchanged — the field just stops being wired) Also nudges the stale default endpoints / provider / model strings toward OmniASR-correct values (the endpoint default is unused in production — the live endpoint comes from getAsrEndpoint() — but the old default leaked the deprecated WebSocket URL). --- package.json | 6 ------ src/copilotSettings/copilotSettings.ts | 8 +++----- .../codexCellEditorMessagehandling.ts | 6 +----- src/providers/mainMenu/mainMenuProvider.ts | 10 ++++------ .../src/CodexCellEditor/CodexCellEditor.tsx | 7 +++---- .../src/CodexCellEditor/TextCellEditor.tsx | 10 ++++++---- 6 files changed, 17 insertions(+), 30 deletions(-) diff --git a/package.json b/package.json index e2da8ab79..148a20168 100644 --- a/package.json +++ b/package.json @@ -900,12 +900,6 @@ "default": "eng", "description": "Language code for transcription. MMS requires ISO-639-3 (e.g., eng, fra, spa). 2-letter codes will be mapped where possible." }, - "codex-editor-extension.asrPhonetic": { - "title": "Return Phonetic (IPA)", - "type": "boolean", - "default": false, - "description": "If enabled and supported by provider, also return phonetic (IPA) transcription." - }, "codex-editor-extension.sourceBookWhitelist": { "title": "Source Book Whitelist", "type": "string", diff --git a/src/copilotSettings/copilotSettings.ts b/src/copilotSettings/copilotSettings.ts index fbe927f47..073fd4953 100644 --- a/src/copilotSettings/copilotSettings.ts +++ b/src/copilotSettings/copilotSettings.ts @@ -122,11 +122,10 @@ export async function openSystemMessageEditor() { try { const config = vscode.workspace.getConfiguration("codex-editor-extension"); const settings = { - endpoint: config.get("asrEndpoint", "wss://ryderwishart--asr-websocket-transcription-fastapi-asgi.modal.run/ws/transcribe"), - provider: config.get("asrProvider", "mms"), - model: config.get("asrModel", "facebook/mms-1b-all"), + endpoint: config.get("asrEndpoint", "https://genesis-ai-dev--codex-asr-serve.modal.run/transcribe"), + provider: config.get("asrProvider", "omniasr"), + model: config.get("asrModel", "omniASR_LLM_1B_v2"), language: config.get("asrLanguage", "eng"), - phonetic: config.get("asrPhonetic", false), }; panel.webview.postMessage({ command: "asrSettings", data: settings }); } catch (error) { @@ -143,7 +142,6 @@ export async function openSystemMessageEditor() { await config.update("asrProvider", message.data?.provider, target); await config.update("asrModel", message.data?.model, target); await config.update("asrLanguage", message.data?.language, target); - await config.update("asrPhonetic", !!message.data?.phonetic, target); panel.webview.postMessage({ command: "asrSettingsSaved" }); } catch (error) { console.error("[CopilotSettings] Failed to save ASR settings:", error); diff --git a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts index 39a560b99..2f9ffe3c4 100644 --- a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts +++ b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts @@ -549,11 +549,7 @@ const messageHandlers: Record Promise("asrEndpoint", "wss://ryderwishart--asr-websocket-transcription-fastapi-asgi.modal.run/ws/transcribe"); + let endpoint = config.get("asrEndpoint", "https://genesis-ai-dev--codex-asr-serve.modal.run/transcribe"); let authToken: string | undefined; // Try to get authenticated endpoint from FrontierAPI @@ -745,7 +745,7 @@ export class MainMenuProvider extends BaseWebviewProvider { new URL(endpoint); } catch (urlError) { console.error("Invalid ASR endpoint configuration:", endpoint, urlError); - endpoint = "wss://ryderwishart--asr-websocket-transcription-fastapi-asgi.modal.run/ws/transcribe"; + endpoint = "https://genesis-ai-dev--codex-asr-serve.modal.run/transcribe"; } // Warn if using authenticated endpoint without token @@ -756,10 +756,9 @@ export class MainMenuProvider extends BaseWebviewProvider { const settings = { endpoint, - provider: config.get("asrProvider", "mms"), - model: config.get("asrModel", "facebook/mms-1b-all"), + provider: config.get("asrProvider", "omniasr"), + model: config.get("asrModel", "omniASR_LLM_1B_v2"), language: config.get("asrLanguage", "eng"), - phonetic: config.get("asrPhonetic", false), authToken, }; if (this._view) { @@ -774,7 +773,6 @@ export class MainMenuProvider extends BaseWebviewProvider { await config.update("asrProvider", (message as any).data?.provider, target); await config.update("asrModel", (message as any).data?.model, target); await config.update("asrLanguage", (message as any).data?.language, target); - await config.update("asrPhonetic", !!(message as any).data?.phonetic, target); if (this._view) { safePostMessageToView(this._view, { command: "asrSettingsSaved" }, "MainMenu"); } diff --git a/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx b/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx index 3f5b452f7..d468104fd 100755 --- a/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx @@ -458,11 +458,10 @@ const CodexCellEditor: React.FC = () => { // Fetch ASR config const asrConfig = await new Promise<{ endpoint: string; - provider: string; - model: string; - language: string; - phonetic: boolean; authToken?: string; + lang?: string; + languageMode?: "auto" | "project"; + projectLanguageName?: string; }>((resolve, reject) => { let resolved = false; const onMsg = (ev: MessageEvent) => { diff --git a/webviews/codex-webviews/src/CodexCellEditor/TextCellEditor.tsx b/webviews/codex-webviews/src/CodexCellEditor/TextCellEditor.tsx index df4389851..9536c0073 100644 --- a/webviews/codex-webviews/src/CodexCellEditor/TextCellEditor.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/TextCellEditor.tsx @@ -497,11 +497,13 @@ const CellEditor: React.FC = ({ const transcriptionClientRef = useRef(null); const [asrConfig, setAsrConfig] = useState<{ endpoint: string; - provider: string; - model: string; - language: string; // ISO-639-3 expected by MMS; may be ISO-639-1 and mapped - phonetic: boolean; authToken?: string; + /** OmniASR code (e.g. `swh_Latn`) to send as `?lang=...`. Omitted in auto-detect mode. */ + lang?: string; + /** What the user picked in the gear menu: "project" (default) or "auto". */ + languageMode?: "auto" | "project"; + /** Project's target-language refName, used as fallback when the server doesn't echo `lang`. */ + projectLanguageName?: string; } | null>(null); // Helper to smoothly center the editor. Coalesces multiple calls and From 820886d5d144bd4bbb948679fa1c81801218aea6 Mon Sep 17 00:00:00 2001 From: Luke-Bilhorn Date: Thu, 4 Jun 2026 15:24:26 -0500 Subject: [PATCH 03/12] Wire OmniASR language settings into asrConfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extension-host side. The webview now receives `lang` (resolved OmniASR code), `languageMode` ("project" | "auto"), and `projectLanguageName` in the `asrConfig` message; updateCellAfterTranscription stops defaulting `language` to "unknown" — the field now carries the actual OmniASR code the server echoed (or `null` when auto-detect gave us nothing to report). New workspace settings persisting user choices from the gear menu: - `codex-editor-extension.asrLanguageMode` ("project" | "auto") - `codex-editor-extension.asrScriptPref` ("auto" | "latin" | 4-letter ISO 15924 tag) New message commands the webview calls when the user toggles these: - `setAsrLanguageMode` - `setAsrScriptPref` Both rebroadcast `asrConfig` so the live webview state stays in sync without a reload. --- package.json | 18 ++++- .../codexCellEditorMessagehandling.ts | 72 ++++++++++++++++++- types/index.d.ts | 33 ++++++++- 3 files changed, 117 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index 148a20168..8fe329d44 100644 --- a/package.json +++ b/package.json @@ -898,7 +898,23 @@ "title": "ASR Language (ISO-639-3)", "type": "string", "default": "eng", - "description": "Language code for transcription. MMS requires ISO-639-3 (e.g., eng, fra, spa). 2-letter codes will be mapped where possible." + "description": "Legacy: ISO 639-3 hint for ASR providers. OmniASR uses the project's target language by default; configure via the gear menu on the Transcribe button." + }, + "codex-editor-extension.asrLanguageMode": { + "title": "ASR Language Mode", + "type": "string", + "enum": [ + "project", + "auto" + ], + "default": "project", + "description": "Whether to send the project's target language as a hint to the ASR service (\"project\"), or let the model transcribe without language conditioning (\"auto\")." + }, + "codex-editor-extension.asrScriptPref": { + "title": "ASR Script Preference", + "type": "string", + "default": "auto", + "description": "Script subtag to pair with the ASR language code. \"auto\" picks the best-guess script per language; \"latin\" forces Latin where supported; any 4-letter ISO 15924 tag (e.g. \"Arab\", \"Cyrl\") overrides per-language." }, "codex-editor-extension.sourceBookWhitelist": { "title": "Source Book Whitelist", diff --git a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts index 2f9ffe3c4..8c5ca19a0 100644 --- a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts +++ b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts @@ -484,6 +484,25 @@ const messageHandlers: Record Promise("asrEndpoint", "http://localhost:8000/api/v1/asr/transcribe"); + // ASR language plumbing — see sharedUtils/asrLanguageUtils.ts for the resolver + // contract. The webview drives "auto-detect" vs "use project language" via the + // gear menu on the Transcribe button; that picker is persisted to the workspace + // setting `asrLanguageMode`. + const { resolveOmniAsrCode } = await import("../../../sharedUtils/asrLanguageUtils"); + const projectConfig = vscode.workspace.getConfiguration("codex-project-manager"); + const targetLanguage = projectConfig.get("targetLanguage") as + | { tag?: string; refName?: string; iso1?: string; iso2t?: string; iso2b?: string; } + | undefined; + const languageMode = (config.get("asrLanguageMode", "project") === "auto" + ? "auto" + : "project") as "auto" | "project"; + const scriptPref = config.get("asrScriptPref", "auto"); + const resolvedCode = + languageMode === "auto" + ? undefined + : resolveOmniAsrCode(targetLanguage, scriptPref); + const projectLanguageName = targetLanguage?.refName; + let authToken: string | undefined; // Try to get authenticated endpoint from FrontierAPI @@ -536,10 +555,16 @@ const messageHandlers: Record Promise Promise { + const typedEvent = event as Extract; + const mode = typedEvent.content?.mode === "auto" ? "auto" : "project"; + try { + await vscode.workspace + .getConfiguration("codex-editor-extension") + .update("asrLanguageMode", mode, vscode.ConfigurationTarget.Workspace); + } catch (err) { + console.warn("Failed to update asrLanguageMode", err); + } + // Rebroadcast so the webview can refresh its local asrConfig snapshot. + await messageHandlers.getAsrConfig({ webviewPanel } as any); + }, + + setAsrScriptPref: async ({ event, webviewPanel }) => { + const typedEvent = event as Extract; + const rawPref = typedEvent.content?.scriptPref; + // Accept "auto", "latin", or any 4-letter ISO 15924 tag. Anything else falls back to "auto". + const isFourLetter = typeof rawPref === "string" && /^[A-Za-z]{4}$/.test(rawPref); + const normalized = + rawPref === "auto" || rawPref === "latin" + ? rawPref + : isFourLetter + ? rawPref!.charAt(0).toUpperCase() + rawPref!.slice(1).toLowerCase() + : "auto"; + try { + await vscode.workspace + .getConfiguration("codex-editor-extension") + .update("asrScriptPref", normalized, vscode.ConfigurationTarget.Workspace); + } catch (err) { + console.warn("Failed to update asrScriptPref", err); + } + await messageHandlers.getAsrConfig({ webviewPanel } as any); + }, + updateCellAfterTranscription: async ({ event, document, webviewPanel, provider }) => { const typedEvent = event as Extract; const { cellId, transcribedText, language } = typedEvent.content; @@ -570,7 +631,12 @@ const messageHandlers: Record Promise Date: Thu, 4 Jun 2026 15:30:47 -0500 Subject: [PATCH 04/12] Wire ASR client to OmniASR lang plumbing; add Re-transcribe + gear menu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WhisperTranscriptionClient - transcribe() now takes { lang?, timeoutMs? } and forwards lang as ?lang= when provided - parses back result.lang (or result.language, for back-compat with the Frontier proxy's earlier field name) and returns it alongside the text CodexCellEditor / TextCellEditor - both transcription paths (per-cell button + batch run) now send the resolved OmniASR code in project mode, omit it in auto-detect mode, and persist whatever the server echoes (or what they sent) via updateCellAfterTranscription - the badge label is now computed via labelForTranscriptionLanguage: server echo → sent code → project name → "Auto Detect" (only when that's the user's chosen mode); falls through to nothing when in project mode and we have no signal — never lies about the language - deletes the dead toIso3() lookup table; the resolver handles macrolang/ISO-1→3 mapping now AudioWaveformWithTranscription - Transcribe button is always visible (no longer hidden once a transcription exists); flips label to "Re-transcribe" and stays disabled while transcribing — mirrors the Re-record button - new gear-icon popover next to it surfaces two advanced settings: Language: Project (default) / Auto-detect Script: Best guess (default) / Latin / Custom (ISO 15924 tag) Hidden on source editors where transcription policy isn't user- driven. Selections post back to the host (setAsrLanguageMode / setAsrScriptPref) which persists them to workspace settings and rebroadcasts asrConfig so the live state stays in sync. Types - asrConfig content gains lang, languageMode, scriptPref, projectLanguageName - updateCellAfterTranscription.content.language is now `string | null` (was always the hardcoded "unknown") - new EditorPostMessages: setAsrLanguageMode, setAsrScriptPref --- .../codexCellEditorMessagehandling.ts | 3 +- types/index.d.ts | 3 + .../AudioWaveformWithTranscription.tsx | 175 ++++++++++++++++-- .../src/CodexCellEditor/CodexCellEditor.tsx | 28 +-- .../src/CodexCellEditor/TextCellEditor.tsx | 64 ++++++- .../WhisperTranscriptionClient.ts | 34 +++- 6 files changed, 262 insertions(+), 45 deletions(-) diff --git a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts index 8c5ca19a0..bb8d18069 100644 --- a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts +++ b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts @@ -555,7 +555,7 @@ const messageHandlers: Record Promise Promise void; @@ -31,6 +48,17 @@ interface AudioWaveformWithTranscriptionProps { targetDuration?: number | null; // Target duration (in seconds) derived from cell timestamps. /** Total number of audio recordings for the cell (including soft-deleted). When > 0, a count badge is rendered on the History button. */ historyCount?: number; + // Advanced ASR settings (gear menu, next to the Transcribe button). + /** Whether to display the gear menu. Hide on source-text editors where the user can't drive transcription policy. */ + showAdvancedAsrMenu?: boolean; + /** Current language mode. Determines the chevron position in the gear menu. */ + asrLanguageMode?: "auto" | "project"; + /** Current script preference: "auto", "latin", or a 4-letter ISO 15924 tag (e.g. "Arab"). */ + asrScriptPref?: string; + /** Friendly project-language label for the "Project language" radio (e.g. "Swahili"). */ + projectLanguageName?: string; + onChangeAsrLanguageMode?: (mode: "auto" | "project") => void; + onChangeAsrScriptPref?: (pref: string) => void; } const AudioWaveformWithTranscription: React.FC = ({ @@ -52,10 +80,28 @@ const AudioWaveformWithTranscription: React.FC { const [audioSrc, setAudioSrc] = useState(""); const [audioDuration, setAudioDuration] = useState(null); + // The Script picker offers three "preset" choices plus a free-form 4-letter input for + // power users (e.g. someone wants `swa_Cyrl` even though the resolver would never pick + // it). We surface "Custom" only when the current value isn't one of the presets. + const isPresetScript = asrScriptPref === "auto" || asrScriptPref === "latin"; + const [scriptCustomDraft, setScriptCustomDraft] = useState( + isPresetScript ? "" : asrScriptPref + ); + useEffect(() => { + if (!isPresetScript) setScriptCustomDraft(asrScriptPref); + }, [asrScriptPref, isPresetScript]); + // Prefer the provided URL (can be blob: or data:). Fall back to creating an object URL from the blob. useEffect(() => { if (audioUrl) { @@ -142,9 +188,9 @@ const AudioWaveformWithTranscription: React.FC {transcription.content}

- {transcription.language && ( + {transcriptionLanguageLabel && ( - {transcription.language} + {transcriptionLanguageLabel} )} @@ -224,17 +270,118 @@ const AudioWaveformWithTranscription: React.FC - {!transcription && !isTranscribing && ( - + {/* Transcribe / Re-transcribe button — always visible (mirrors Re-record), + grey-out while a transcription is in flight. The label flips to + "Re-transcribe" once we have a saved transcription so the user can + re-run with different ASR settings (e.g. flip to auto-detect). */} + + {/* Gear menu — Language (auto-detect vs project) + Script (auto/Latin/custom). + Hidden on source-text editors where transcription policy isn't user-driven. */} + {showAdvancedAsrMenu && ( + + + + + +
+
Language
+ +

+ "Project" sends the language code to OmniASR for better accuracy. + "Auto-detect" omits it — OmniASR transcribes without language conditioning. +

+
+
+
Script
+ + {/* Free-form 4-letter input shown only when "Custom" is selected. + Validation happens on commit so users can type. */} + {!isPresetScript ? ( +
+ setScriptCustomDraft(e.target.value)} + placeholder="e.g. Arab, Cyrl, Hans" + maxLength={4} + className="h-7 text-xs" + /> + +
+ ) : null} +

+ Script subtag paired with the language. Best guess covers Urdu→Arabic, + Mandarin→Simplified, Cantonese→Traditional, etc. +

+
+
+
)} - {/* Gear menu — Language (auto-detect vs project) + Script (auto/Latin/custom). - Hidden on source-text editors where transcription policy isn't user-driven. */} - {showAdvancedAsrMenu && ( - - + {/* Transcribe / Re-transcribe split-button. The gear is glued to the right + edge of the main button (shared border, no gap) so it visually belongs + to the transcribe control. The label flips to "Re-transcribe" once a + saved transcription exists so the user can re-run with different ASR + settings (e.g. flip to auto-detect). Grey-out the whole group while a + transcription is in flight. */} + {(() => { + const sharedBtnClass = + "h-8 text-xs text-[var(--vscode-button-background)] border-[var(--vscode-button-background)]/20 hover:bg-[var(--vscode-button-background)]/10"; + const transcribeDisabled = + disabled || isTranscribing || (!audioUrl && !audioBlob); + return ( +
- - -
-
Language
- -

- "Project" sends the language code to OmniASR for better accuracy. - "Auto-detect" omits it — OmniASR transcribes without language conditioning. -

-
-
-
Script
- - {/* Free-form 4-letter input shown only when "Custom" is selected. - Validation happens on commit so users can type. */} - {!isPresetScript ? ( -
- setScriptCustomDraft(e.target.value)} - placeholder="e.g. Arab, Cyrl, Hans" - maxLength={4} - className="h-7 text-xs" - /> + {showAdvancedAsrMenu && ( + + -
- ) : null} -

- Script subtag paired with the language. Best guess covers Urdu→Arabic, - Mandarin→Simplified, Cantonese→Traditional, etc. -

-
-
- - )} + + +
+
Language
+ +
+
+
Script
+ + {scriptSelection === "custom" && ( +
+ + setScriptCustomDraft(e.target.value) + } + onKeyDown={(e) => { + if (e.key === "Enter") { + e.preventDefault(); + commitCustomScript(); + } + }} + placeholder="e.g. Arab, Cyrl, Hans" + maxLength={4} + className="h-7 text-xs" + /> + +
+ )} +
+
+ + )} +
+ ); + })()}