-
Notifications
You must be signed in to change notification settings - Fork 13
Data: Add Google Translate Support Indicator #689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,257 @@ | ||
| ### Metadata ### | ||
| #topic Availability of language in Google Translate | ||
| #url https://translate.google.com/ | ||
| #dateAccessed 2026-05-15 | ||
| #author Google LLC | ||
| #notes Scraped from the website | ||
|
|
||
| Language Code Language Locale Writing System | ||
| abk Abkhaz | ||
| ace Acehnese | ||
| ach Acholi | ||
| aar Afar | ||
| afr Afrikaans | ||
| sqi Albanian | ||
| alz Alur | ||
| amh Amharic | ||
| ara Arabic | ||
| hye Armenian | ||
| asm Assamese | ||
| ava Avar | ||
| awa Awadhi | ||
| aym Aymara | ||
| aze Azerbaijani | ||
| ban Balinese | ||
| bal Baluchi | ||
| man/bam Bambara | ||
| bci Baoulé | ||
| bak Bashkir | ||
| eus Basque | ||
| btx Batak Karo | ||
| bts Batak Simalungun | ||
| bbc Batak Toba | ||
| bel Belarusian | ||
| bem Bemba | ||
| ben Bengali | ||
| msa/bew Betawi | ||
| bho Bhojpuri | ||
| bik Bikol | ||
| hbs/bos Bosnian | ||
| bre Breton | ||
| bul Bulgarian | ||
| bua Buryat | ||
| zho/yue Cantonese | ||
| cat Catalan | ||
| ceb Cebuano | ||
| cha Chamorro | ||
| che Chechen | ||
| nya Chichewa | ||
| zho Chinese (Simplified) | ||
| zho Chinese (Traditional) | ||
| chk Chuukese | ||
| chv Chuvash | ||
| cos Corsican | ||
| crh Crimean Tatar (Cyrillic) Cyrillic | ||
| crh Crimean Tatar (Latin Latin | ||
| hbs/hrv Croatian | ||
| ces Czech | ||
| dan Danish | ||
| fas/prs Dari | ||
| div Dhivehi | ||
| din Dinka | ||
| doi Dogri | ||
| dov Dombe | ||
| nld Dutch | ||
| man/dyu Dyula | ||
| dzo Dzongkha | ||
| eng English | ||
| epo Esperanto | ||
| est Estonian | ||
| ewe Ewe | ||
| fao Faroese | ||
| fij Fijian | ||
| fil Filipino | ||
| fin Finnish | ||
| fon Fon | ||
| fra French | ||
| fra French (Canada) CA | ||
| fry Frisian | ||
| fur Friulian | ||
| ful Fulani | ||
| gaa Ga | ||
| glg Galician | ||
| kat Georgian | ||
| deu German | ||
| ell Greek | ||
| grn Guarani | ||
| guj Gujarati | ||
| hat Haitian Creole | ||
| cnh Hakha Chin | ||
| hau Hausa | ||
| haw Hawaiian | ||
| heb Hebrew | ||
| hil Hiligaynon | ||
| hin Hindi | ||
| hmn Hmong | ||
| hun Hungarian | ||
| hrx Hunsrik | ||
| msa/iba Iban | ||
| isl Icelandic | ||
| ibo Igbo | ||
| ilo Ilocano | ||
| msa/ind Indonesian | ||
| iku Inuktut (Latin) Latin | ||
| iku Inuktut (Syllabics) Syllabics | ||
| gle Irish | ||
| ita Italian | ||
| jam Jamaican Patois | ||
| jpn Japanese | ||
| jav Javanese | ||
| kac Jingpo | ||
| iku/kal Kalaallisut | ||
| kan Kannada | ||
| kau Kanuri | ||
| pam Kapampangan | ||
| kaz Kazakh | ||
| kha Khasi | ||
| khm Khmer | ||
| nyn Kiga | ||
| kon Kikongo | ||
| kin Kinyarwanda | ||
| kon/mkw Kituba | ||
| trp Kokborok | ||
| kom Komi | ||
| kok Konkani | ||
| kor Korean | ||
| kri Krio | ||
| kmr Kurdish (Kurmanji) | ||
| ckb Kurdish (Sorani) | ||
| kir Kyrgyz | ||
|
Comment on lines
+127
to
+129
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use https://translation-commons.github.io/lang-nav/data?objectType=Census&view=Reports&reportID=2 to mark macrolanguages. For example this these two are |
||
| lao Lao | ||
| lav/ltg Latgalian | ||
| lat Latin | ||
| lav Latvian | ||
| lij Ligurian | ||
| lim Limburgish | ||
| lin Lingala | ||
| lit Lithuanian | ||
| lmo Lombard | ||
| lug Luganda | ||
| luw Luo | ||
| ltz Luxembourgish | ||
| mkd Macedonian | ||
| mad Madurese | ||
| mai Maithili | ||
| msa/mfp Makassar | ||
| mlg Malagasy | ||
| msa/zlm Malay | ||
| zlm Malay (Jawi) Jawi | ||
| mal Malayalam | ||
| mlt Maltese | ||
| mam Mam | ||
| glv Manx | ||
| mri Maori | ||
| mar Marathi | ||
| mah Marshallese | ||
| mwr Marwadi | ||
| mfe Mauritian Creole | ||
| mhr Meadow Mari | ||
| mni Meiteilon (Manipuri) | ||
| xrg Minang | ||
| lus Mizo | ||
| mon Mongolian | ||
| mya Myanmar (Burmese) | ||
| nqo NKo | ||
| nah Nahuatl (Eastern Huasteca) | ||
| ndc Ndau | ||
| nbl Ndebele (South) | ||
| new Nepalbhasa (Newari) | ||
| nep Nepali | ||
| nor Norwegian | ||
| nus Nuer | ||
| oci Occitan | ||
| ori Odia (Oriya) | ||
| orm Oromo | ||
| oss Ossetian | ||
| pag Pangasinan | ||
| pap Papiamento | ||
| pus Pashto | ||
| fas Persian | ||
| pol Polish | ||
| por Portuguese (Brazil) BR | ||
| por Portuguese (Portugal) PT | ||
| pan Punjabi (Gurmukhi) Gurmukhi | ||
| pan Punjabi (Shahmukhi) Shahmukhi | ||
| que Quechua | ||
| kek Qʼeqchiʼ | ||
| rom Romani | ||
| ron Romanian | ||
| run Rundi | ||
| rus Russian | ||
| sme Sami (North) | ||
| smo Samoan | ||
| sag Sango | ||
| san Sanskrit | ||
| sat Santali (Latin) Latin | ||
| sat Santali (Ol Chiki) Ol Chiki | ||
| gla Scots Gaelic | ||
| nso Sepedi | ||
| hbs/srp Serbian | ||
| sot Sesotho | ||
| crs Seychellois Creole | ||
| shn Shan | ||
| sna Shona | ||
| scn Sicilian | ||
| szl Silesian | ||
| snd Sindhi | ||
| sin Sinhala | ||
| slk Slovak | ||
| slv Slovenian | ||
| som Somali | ||
| spa Spanish | ||
| sun Sundanese | ||
| sus Susu | ||
| swa Swahili | ||
| ssw Swati | ||
| swe Swedish | ||
| tah Tahitian | ||
| tgk Tajik | ||
| zgh Tamazight | ||
| zgh Tamazight (Tifinagh) Tifinagh | ||
| tam Tamil | ||
| tat Tatar | ||
| tel Telugu | ||
| tet Tetum | ||
| tha Thai | ||
| bod Tibetan | ||
| tir Tigrinya | ||
| tiv Tiv | ||
| tpi Tok Pisin | ||
| ton Tongan | ||
| lua Tshiluba | ||
| tso Tsonga | ||
| tsn Tswana | ||
| tcy Tulu | ||
| tum Tumbuka | ||
| tur Turkish | ||
| tuk Turkmen | ||
| tyv Tuvan | ||
| aka/twi Twi | ||
| udm Udmurt | ||
| ukr Ukrainian | ||
| urd Urdu | ||
| uig Uyghur | ||
| uzb Uzbek | ||
| ven Venda | ||
| vec Venetian | ||
| vie Vietnamese | ||
| war Waray | ||
| cym Welsh | ||
| wol Wolof | ||
| xho Xhosa | ||
| sah Yakut | ||
| yid Yiddish | ||
| yor Yoruba | ||
| yua Yucatec Maya | ||
| zap Zapotec | ||
| zul Zulu | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| import { CheckCircle2Icon, XCircleIcon } from 'lucide-react'; | ||
| import React from 'react'; | ||
|
|
||
| import Hoverable from '@features/layers/hovercard/Hoverable'; | ||
|
|
||
| import { LanguageData } from '@entities/language/LanguageTypes'; | ||
|
|
||
| const GoogleTranslateSupportStatus: React.FC<{ lang: LanguageData }> = ({ lang }) => { | ||
| if (!lang.googleTranslate || lang.googleTranslate.length === 0) { | ||
| return ( | ||
| <XCircleIcon style={{ color: 'var(--color-red)', verticalAlign: 'middle' }} size={'1em'} /> | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There seems to be some layout differences between this and the one below -- that's because the one below is part of a inline-flex component, this is not. However I wouldn't worry about this, I filed #690 to make a hoverable icon component (or something) to standardize this better. |
||
| ); | ||
| } | ||
|
|
||
| const hoverContent = lang.googleTranslate.map((entry) => entry.name).join(', '); | ||
|
|
||
| return ( | ||
| <Hoverable hoverContent={hoverContent} style={{ display: 'inline-flex', alignItems: 'center' }}> | ||
| <CheckCircle2Icon | ||
| style={{ color: 'var(--color-green)', verticalAlign: 'middle' }} | ||
| size={'1em'} | ||
| /> | ||
| </Hoverable> | ||
| ); | ||
| }; | ||
|
|
||
| export default GoogleTranslateSupportStatus; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,40 @@ | ||
| import { isIgnoredLanguageCode } from '@entities/census/parseCensusLanguageRow'; | ||
| import { LanguageData } from '@entities/language/LanguageTypes'; | ||
|
|
||
| /** | ||
| * Load Google Translate language availability data. | ||
| * File format: | ||
| * Language Code\tLanguage\tLocale\tWriting System | ||
| */ | ||
| export async function loadGoogleTranslate( | ||
| getLanguage: (id: string) => LanguageData | undefined, | ||
| ): Promise<void> { | ||
| await fetch('data/google/gtranslate.tsv') | ||
| .then((res) => res.text()) | ||
| .then((text) => text.split('\n').filter((line) => line.trim() !== '' && !line.startsWith('#'))) | ||
| .then((lines) => { | ||
| lines.forEach((line) => { | ||
| const parts = line.split('\t'); | ||
| if (parts.length < 2) return; | ||
|
|
||
| const languageCodePath = (parts[0] ?? '').trim(); | ||
| if (languageCodePath === '' || languageCodePath === 'Language Code') return; | ||
|
|
||
| const name = (parts[1] ?? '').trim(); | ||
| const locale = (parts[2] ?? '').trim() || undefined; | ||
| const writingSystem = (parts[3] ?? '').trim() || undefined; | ||
| const languageCodes = languageCodePath.split('/'); | ||
|
|
||
| languageCodes.forEach((code) => { | ||
| if (isIgnoredLanguageCode(code)) return; | ||
|
|
||
| const language = getLanguage(code); | ||
| if (!language) return; | ||
|
|
||
| if (!language.googleTranslate) language.googleTranslate = []; | ||
| language.googleTranslate.push({ languageCodePath, name, locale, writingSystem }); | ||
| }); | ||
| }); | ||
| }) | ||
| .catch((err) => console.error('Error loading Google Translate data:', err)); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
mark it as zho/cmn for both