diff --git a/public/data/google/gtranslate.tsv b/public/data/google/gtranslate.tsv new file mode 100644 index 000000000..670534fdc --- /dev/null +++ b/public/data/google/gtranslate.tsv @@ -0,0 +1,257 @@ +### Metadata ### +#topic Availability of language in Google Translate +#url https://translate.google.com/ +#dateAccessed 2026-05-15 +#author Google LLC +#notes Scraped from the website + +Language Code Language Locale Writing System +abk Abkhaz +ace Acehnese +ach Acholi +aar Afar +afr Afrikaans +sqi Albanian +alz Alur +amh Amharic +ara Arabic +hye Armenian +asm Assamese +ava Avar +awa Awadhi +aym Aymara +aze Azerbaijani +ban Balinese +bal Baluchi +man/bam Bambara +bci Baoulé +bak Bashkir +eus Basque +btx Batak Karo +bts Batak Simalungun +bbc Batak Toba +bel Belarusian +bem Bemba +ben Bengali +msa/bew Betawi +bho Bhojpuri +bik Bikol +hbs/bos Bosnian +bre Breton +bul Bulgarian +bua Buryat +zho/yue Cantonese +cat Catalan +ceb Cebuano +cha Chamorro +che Chechen +nya Chichewa +zho Chinese (Simplified) +zho Chinese (Traditional) +chk Chuukese +chv Chuvash +cos Corsican +crh Crimean Tatar (Cyrillic) Cyrillic +crh Crimean Tatar (Latin Latin +hbs/hrv Croatian +ces Czech +dan Danish +fas/prs Dari +div Dhivehi +din Dinka +doi Dogri +dov Dombe +nld Dutch +man/dyu Dyula +dzo Dzongkha +eng English +epo Esperanto +est Estonian +ewe Ewe +fao Faroese +fij Fijian +fil Filipino +fin Finnish +fon Fon +fra French +fra French (Canada) CA +fry Frisian +fur Friulian +ful Fulani +gaa Ga +glg Galician +kat Georgian +deu German +ell Greek +grn Guarani +guj Gujarati +hat Haitian Creole +cnh Hakha Chin +hau Hausa +haw Hawaiian +heb Hebrew +hil Hiligaynon +hin Hindi +hmn Hmong +hun Hungarian +hrx Hunsrik +msa/iba Iban +isl Icelandic +ibo Igbo +ilo Ilocano +msa/ind Indonesian +iku Inuktut (Latin) Latin +iku Inuktut (Syllabics) Syllabics +gle Irish +ita Italian +jam Jamaican Patois +jpn Japanese +jav Javanese +kac Jingpo +iku/kal Kalaallisut +kan Kannada +kau Kanuri +pam Kapampangan +kaz Kazakh +kha Khasi +khm Khmer +nyn Kiga +kon Kikongo +kin Kinyarwanda +kon/mkw Kituba +trp Kokborok +kom Komi +kok Konkani +kor Korean +kri Krio +kmr Kurdish (Kurmanji) +ckb Kurdish (Sorani) +kir Kyrgyz +lao Lao +lav/ltg Latgalian +lat Latin +lav Latvian +lij Ligurian +lim Limburgish +lin Lingala +lit Lithuanian +lmo Lombard +lug Luganda +luw Luo +ltz Luxembourgish +mkd Macedonian +mad Madurese +mai Maithili +msa/mfp Makassar +mlg Malagasy +msa/zlm Malay +zlm Malay (Jawi) Jawi +mal Malayalam +mlt Maltese +mam Mam +glv Manx +mri Maori +mar Marathi +mah Marshallese +mwr Marwadi +mfe Mauritian Creole +mhr Meadow Mari +mni Meiteilon (Manipuri) +xrg Minang +lus Mizo +mon Mongolian +mya Myanmar (Burmese) +nqo NKo +nah Nahuatl (Eastern Huasteca) +ndc Ndau +nbl Ndebele (South) +new Nepalbhasa (Newari) +nep Nepali +nor Norwegian +nus Nuer +oci Occitan +ori Odia (Oriya) +orm Oromo +oss Ossetian +pag Pangasinan +pap Papiamento +pus Pashto +fas Persian +pol Polish +por Portuguese (Brazil) BR +por Portuguese (Portugal) PT +pan Punjabi (Gurmukhi) Gurmukhi +pan Punjabi (Shahmukhi) Shahmukhi +que Quechua +kek Qʼeqchiʼ +rom Romani +ron Romanian +run Rundi +rus Russian +sme Sami (North) +smo Samoan +sag Sango +san Sanskrit +sat Santali (Latin) Latin +sat Santali (Ol Chiki) Ol Chiki +gla Scots Gaelic +nso Sepedi +hbs/srp Serbian +sot Sesotho +crs Seychellois Creole +shn Shan +sna Shona +scn Sicilian +szl Silesian +snd Sindhi +sin Sinhala +slk Slovak +slv Slovenian +som Somali +spa Spanish +sun Sundanese +sus Susu +swa Swahili +ssw Swati +swe Swedish +tah Tahitian +tgk Tajik +zgh Tamazight +zgh Tamazight (Tifinagh) Tifinagh +tam Tamil +tat Tatar +tel Telugu +tet Tetum +tha Thai +bod Tibetan +tir Tigrinya +tiv Tiv +tpi Tok Pisin +ton Tongan +lua Tshiluba +tso Tsonga +tsn Tswana +tcy Tulu +tum Tumbuka +tur Turkish +tuk Turkmen +tyv Tuvan +aka/twi Twi +udm Udmurt +ukr Ukrainian +urd Urdu +uig Uyghur +uzb Uzbek +ven Venda +vec Venetian +vie Vietnamese +war Waray +cym Welsh +wol Wolof +xho Xhosa +sah Yakut +yid Yiddish +yor Yoruba +yua Yucatec Maya +zap Zapotec +zul Zulu \ No newline at end of file diff --git a/src/entities/language/LanguageTypes.ts b/src/entities/language/LanguageTypes.ts index 7ed220fbe..57622adb9 100644 --- a/src/entities/language/LanguageTypes.ts +++ b/src/entities/language/LanguageTypes.ts @@ -15,6 +15,7 @@ import { ScriptCode, WritingSystemData } from '@entities/writingsystem/WritingSy import { CLDRCoverageData, CLDRLanguageMatchData } from '../types/CLDRTypes'; import { + GoogleTranslateData, ObjectBase, UniversalDeclarationOfHumanRightsData, WikipediaData, @@ -119,6 +120,7 @@ export interface LanguageData extends ObjectBase { warnings: Partial>; wikipedia?: WikipediaData; udhr?: UniversalDeclarationOfHumanRightsData[]; + googleTranslate?: GoogleTranslateData[]; latitude?: number; longitude?: number; diff --git a/src/entities/types/DataTypes.tsx b/src/entities/types/DataTypes.tsx index 47d48100f..cfe35d451 100644 --- a/src/entities/types/DataTypes.tsx +++ b/src/entities/types/DataTypes.tsx @@ -60,3 +60,10 @@ export type UniversalDeclarationOfHumanRightsData = { variant: string; // e.g. "Latn", "Cyrl", or "" for undifferentiated documentURL: string; // URL to the UDHR translation document -- maybe just the final path segment, like "af-marka" in "https://www.ohchr.org/en/human-rights/universal-declaration/translations/af-marka" }; + +export type GoogleTranslateData = { + languageCodePath: string; // e.g. "man/bam" when Google lists grouped or alternate code paths + name: string; + locale?: string; + writingSystem?: string; +}; diff --git a/src/entities/ui/GoogleTranslateSupportStatus.tsx b/src/entities/ui/GoogleTranslateSupportStatus.tsx new file mode 100644 index 000000000..59c83d6b7 --- /dev/null +++ b/src/entities/ui/GoogleTranslateSupportStatus.tsx @@ -0,0 +1,27 @@ +import { CheckCircle2Icon, XCircleIcon } from 'lucide-react'; +import React from 'react'; + +import Hoverable from '@features/layers/hovercard/Hoverable'; + +import { LanguageData } from '@entities/language/LanguageTypes'; + +const GoogleTranslateSupportStatus: React.FC<{ lang: LanguageData }> = ({ lang }) => { + if (!lang.googleTranslate || lang.googleTranslate.length === 0) { + return ( + + ); + } + + const hoverContent = lang.googleTranslate.map((entry) => entry.name).join(', '); + + return ( + + + + ); +}; + +export default GoogleTranslateSupportStatus; diff --git a/src/features/data/load/SupplementalData.tsx b/src/features/data/load/SupplementalData.tsx index b9e2d71b3..608953813 100644 --- a/src/features/data/load/SupplementalData.tsx +++ b/src/features/data/load/SupplementalData.tsx @@ -9,6 +9,7 @@ import { loadCensusData } from './extra_entities/loadCensusData'; import { loadEthnologue2012Data } from './extra_entities/SILData'; import { loadCountryCoordinates } from './supplemental/loadCountryCoordinates'; import { loadECRML } from './supplemental/loadECRML'; +import { loadGoogleTranslate } from './supplemental/loadGoogleTranslate'; import { loadIndigeneity } from './supplemental/loadIndigeneity'; import { loadLandArea } from './supplemental/loadLandArea'; import { loadLanguageNamesFrench } from './supplemental/loadLanguageNamesFrench'; @@ -40,6 +41,7 @@ export async function loadSupplementalData(dataContext: DataContextType): Promis loadEthnologue2012Data(dataContext.getLanguage), loadIndigeneity(dataContext.getLanguage), loadECRML(dataContext.getLanguage), + loadGoogleTranslate(dataContext.getLanguage), loadUDHR(dataContext.getLanguage), loadVariantAnnotations(dataContext.getVariant, dataContext.getLanguage), ]); diff --git a/src/features/data/load/supplemental/loadGoogleTranslate.ts b/src/features/data/load/supplemental/loadGoogleTranslate.ts new file mode 100644 index 000000000..301f9f366 --- /dev/null +++ b/src/features/data/load/supplemental/loadGoogleTranslate.ts @@ -0,0 +1,40 @@ +import { isIgnoredLanguageCode } from '@entities/census/parseCensusLanguageRow'; +import { LanguageData } from '@entities/language/LanguageTypes'; + +/** + * Load Google Translate language availability data. + * File format: + * Language Code\tLanguage\tLocale\tWriting System + */ +export async function loadGoogleTranslate( + getLanguage: (id: string) => LanguageData | undefined, +): Promise { + await fetch('data/google/gtranslate.tsv') + .then((res) => res.text()) + .then((text) => text.split('\n').filter((line) => line.trim() !== '' && !line.startsWith('#'))) + .then((lines) => { + lines.forEach((line) => { + const parts = line.split('\t'); + if (parts.length < 2) return; + + const languageCodePath = (parts[0] ?? '').trim(); + if (languageCodePath === '' || languageCodePath === 'Language Code') return; + + const name = (parts[1] ?? '').trim(); + const locale = (parts[2] ?? '').trim() || undefined; + const writingSystem = (parts[3] ?? '').trim() || undefined; + const languageCodes = languageCodePath.split('/'); + + languageCodes.forEach((code) => { + if (isIgnoredLanguageCode(code)) return; + + const language = getLanguage(code); + if (!language) return; + + if (!language.googleTranslate) language.googleTranslate = []; + language.googleTranslate.push({ languageCodePath, name, locale, writingSystem }); + }); + }); + }) + .catch((err) => console.error('Error loading Google Translate data:', err)); +} diff --git a/src/widgets/tables/columns/LanguageDigitalSupportColumns.tsx b/src/widgets/tables/columns/LanguageDigitalSupportColumns.tsx index e0d67eeed..885859b71 100644 --- a/src/widgets/tables/columns/LanguageDigitalSupportColumns.tsx +++ b/src/widgets/tables/columns/LanguageDigitalSupportColumns.tsx @@ -11,6 +11,7 @@ import LanguageUDHRInfo, { import { ObjectCLDRCoverageLevel, ObjectCLDRLocaleCount } from '@entities/ui/CLDRCoverageInfo'; import { CoverageLevelsExplanation } from '@entities/ui/CLDRCoverageLevels'; import CLDRWarningNotes from '@entities/ui/CLDRWarningNotes'; +import GoogleTranslateSupportStatus from '@entities/ui/GoogleTranslateSupportStatus'; import ICUSupportStatus from '@entities/ui/ICUSupportStatus'; import { WikipediaActiveUsers, @@ -73,6 +74,15 @@ const columns: TableColumn[] = [ render: (lang) => kb.nameDisplay)} />, field: Field.CountOfKeyboards, }, + { + key: 'Google Translate', + description: 'Language entries available in Google Translate.', + render: (lang) => , + exportValue: (lang) => { + if (!lang.googleTranslate || lang.googleTranslate.length === 0) return 'n/a'; + return lang.googleTranslate.map((entry) => entry.name).join('; '); + }, + }, { key: 'Wikipedia Status', render: (object) => (