Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 257 additions & 0 deletions public/data/google/gtranslate.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
### Metadata ###
#topic Availability of language in Google Translate
#url https://translate.google.com/
#dateAccessed 2026-05-15
#author Google LLC
#notes Scraped from the website

Language Code Language Locale Writing System
abk Abkhaz
ace Acehnese
ach Acholi
aar Afar
afr Afrikaans
sqi Albanian
alz Alur
amh Amharic
ara Arabic
hye Armenian
asm Assamese
ava Avar
awa Awadhi
aym Aymara
aze Azerbaijani
ban Balinese
bal Baluchi
man/bam Bambara
bci Baoulé
bak Bashkir
eus Basque
btx Batak Karo
bts Batak Simalungun
bbc Batak Toba
bel Belarusian
bem Bemba
ben Bengali
msa/bew Betawi
bho Bhojpuri
bik Bikol
hbs/bos Bosnian
bre Breton
bul Bulgarian
bua Buryat
zho/yue Cantonese
cat Catalan
ceb Cebuano
cha Chamorro
che Chechen
nya Chichewa
zho Chinese (Simplified)
zho Chinese (Traditional)
Comment on lines +49 to +50

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mark it as zho/cmn for both

chk Chuukese
chv Chuvash
cos Corsican
crh Crimean Tatar (Cyrillic) Cyrillic
crh Crimean Tatar (Latin Latin
hbs/hrv Croatian
ces Czech
dan Danish
fas/prs Dari
div Dhivehi
din Dinka
doi Dogri
dov Dombe
nld Dutch
man/dyu Dyula
dzo Dzongkha
eng English
epo Esperanto
est Estonian
ewe Ewe
fao Faroese
fij Fijian
fil Filipino
fin Finnish
fon Fon
fra French
fra French (Canada) CA
fry Frisian
fur Friulian
ful Fulani
gaa Ga
glg Galician
kat Georgian
deu German
ell Greek
grn Guarani
guj Gujarati
hat Haitian Creole
cnh Hakha Chin
hau Hausa
haw Hawaiian
heb Hebrew
hil Hiligaynon
hin Hindi
hmn Hmong
hun Hungarian
hrx Hunsrik
msa/iba Iban
isl Icelandic
ibo Igbo
ilo Ilocano
msa/ind Indonesian
iku Inuktut (Latin) Latin
iku Inuktut (Syllabics) Syllabics
gle Irish
ita Italian
jam Jamaican Patois
jpn Japanese
jav Javanese
kac Jingpo
iku/kal Kalaallisut
kan Kannada
kau Kanuri
pam Kapampangan
kaz Kazakh
kha Khasi
khm Khmer
nyn Kiga
kon Kikongo
kin Kinyarwanda
kon/mkw Kituba
trp Kokborok
kom Komi
kok Konkani
kor Korean
kri Krio
kmr Kurdish (Kurmanji)
ckb Kurdish (Sorani)
kir Kyrgyz
Comment on lines +127 to +129

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use https://translation-commons.github.io/lang-nav/data?objectType=Census&view=Reports&reportID=2 to mark macrolanguages. For example this these two are kur/kmr and kur/ckb

lao Lao
lav/ltg Latgalian
lat Latin
lav Latvian
lij Ligurian
lim Limburgish
lin Lingala
lit Lithuanian
lmo Lombard
lug Luganda
luw Luo
ltz Luxembourgish
mkd Macedonian
mad Madurese
mai Maithili
msa/mfp Makassar
mlg Malagasy
msa/zlm Malay
zlm Malay (Jawi) Jawi
mal Malayalam
mlt Maltese
mam Mam
glv Manx
mri Maori
mar Marathi
mah Marshallese
mwr Marwadi
mfe Mauritian Creole
mhr Meadow Mari
mni Meiteilon (Manipuri)
xrg Minang
lus Mizo
mon Mongolian
mya Myanmar (Burmese)
nqo NKo
nah Nahuatl (Eastern Huasteca)
ndc Ndau
nbl Ndebele (South)
new Nepalbhasa (Newari)
nep Nepali
nor Norwegian
nus Nuer
oci Occitan
ori Odia (Oriya)
orm Oromo
oss Ossetian
pag Pangasinan
pap Papiamento
pus Pashto
fas Persian
pol Polish
por Portuguese (Brazil) BR
por Portuguese (Portugal) PT
pan Punjabi (Gurmukhi) Gurmukhi
pan Punjabi (Shahmukhi) Shahmukhi
que Quechua
kek Qʼeqchiʼ
rom Romani
ron Romanian
run Rundi
rus Russian
sme Sami (North)
smo Samoan
sag Sango
san Sanskrit
sat Santali (Latin) Latin
sat Santali (Ol Chiki) Ol Chiki
gla Scots Gaelic
nso Sepedi
hbs/srp Serbian
sot Sesotho
crs Seychellois Creole
shn Shan
sna Shona
scn Sicilian
szl Silesian
snd Sindhi
sin Sinhala
slk Slovak
slv Slovenian
som Somali
spa Spanish
sun Sundanese
sus Susu
swa Swahili
ssw Swati
swe Swedish
tah Tahitian
tgk Tajik
zgh Tamazight
zgh Tamazight (Tifinagh) Tifinagh
tam Tamil
tat Tatar
tel Telugu
tet Tetum
tha Thai
bod Tibetan
tir Tigrinya
tiv Tiv
tpi Tok Pisin
ton Tongan
lua Tshiluba
tso Tsonga
tsn Tswana
tcy Tulu
tum Tumbuka
tur Turkish
tuk Turkmen
tyv Tuvan
aka/twi Twi
udm Udmurt
ukr Ukrainian
urd Urdu
uig Uyghur
uzb Uzbek
ven Venda
vec Venetian
vie Vietnamese
war Waray
cym Welsh
wol Wolof
xho Xhosa
sah Yakut
yid Yiddish
yor Yoruba
yua Yucatec Maya
zap Zapotec
zul Zulu
2 changes: 2 additions & 0 deletions src/entities/language/LanguageTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { ScriptCode, WritingSystemData } from '@entities/writingsystem/WritingSy

import { CLDRCoverageData, CLDRLanguageMatchData } from '../types/CLDRTypes';
import {
GoogleTranslateData,
ObjectBase,
UniversalDeclarationOfHumanRightsData,
WikipediaData,
Expand Down Expand Up @@ -119,6 +120,7 @@ export interface LanguageData extends ObjectBase {
warnings: Partial<Record<LanguageField, string>>;
wikipedia?: WikipediaData;
udhr?: UniversalDeclarationOfHumanRightsData[];
googleTranslate?: GoogleTranslateData[];

latitude?: number;
longitude?: number;
Expand Down
7 changes: 7 additions & 0 deletions src/entities/types/DataTypes.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,10 @@ export type UniversalDeclarationOfHumanRightsData = {
variant: string; // e.g. "Latn", "Cyrl", or "" for undifferentiated
documentURL: string; // URL to the UDHR translation document -- maybe just the final path segment, like "af-marka" in "https://www.ohchr.org/en/human-rights/universal-declaration/translations/af-marka"
};

export type GoogleTranslateData = {
languageCodePath: string; // e.g. "man/bam" when Google lists grouped or alternate code paths
name: string;
locale?: string;
writingSystem?: string;
};
27 changes: 27 additions & 0 deletions src/entities/ui/GoogleTranslateSupportStatus.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { CheckCircle2Icon, XCircleIcon } from 'lucide-react';
import React from 'react';

import Hoverable from '@features/layers/hovercard/Hoverable';

import { LanguageData } from '@entities/language/LanguageTypes';

const GoogleTranslateSupportStatus: React.FC<{ lang: LanguageData }> = ({ lang }) => {
if (!lang.googleTranslate || lang.googleTranslate.length === 0) {
return (
<XCircleIcon style={{ color: 'var(--color-red)', verticalAlign: 'middle' }} size={'1em'} />

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There seems to be some layout differences between this and the one below -- that's because the one below is part of a inline-flex component, this is not.

However I wouldn't worry about this, I filed #690 to make a hoverable icon component (or something) to standardize this better.

);
}

const hoverContent = lang.googleTranslate.map((entry) => entry.name).join(', ');

return (
<Hoverable hoverContent={hoverContent} style={{ display: 'inline-flex', alignItems: 'center' }}>
<CheckCircle2Icon
style={{ color: 'var(--color-green)', verticalAlign: 'middle' }}
size={'1em'}
/>
</Hoverable>
);
};

export default GoogleTranslateSupportStatus;
2 changes: 2 additions & 0 deletions src/features/data/load/SupplementalData.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { loadCensusData } from './extra_entities/loadCensusData';
import { loadEthnologue2012Data } from './extra_entities/SILData';
import { loadCountryCoordinates } from './supplemental/loadCountryCoordinates';
import { loadECRML } from './supplemental/loadECRML';
import { loadGoogleTranslate } from './supplemental/loadGoogleTranslate';
import { loadIndigeneity } from './supplemental/loadIndigeneity';
import { loadLandArea } from './supplemental/loadLandArea';
import { loadLanguageNamesFrench } from './supplemental/loadLanguageNamesFrench';
Expand Down Expand Up @@ -40,6 +41,7 @@ export async function loadSupplementalData(dataContext: DataContextType): Promis
loadEthnologue2012Data(dataContext.getLanguage),
loadIndigeneity(dataContext.getLanguage),
loadECRML(dataContext.getLanguage),
loadGoogleTranslate(dataContext.getLanguage),
loadUDHR(dataContext.getLanguage),
loadVariantAnnotations(dataContext.getVariant, dataContext.getLanguage),
]);
Expand Down
40 changes: 40 additions & 0 deletions src/features/data/load/supplemental/loadGoogleTranslate.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { isIgnoredLanguageCode } from '@entities/census/parseCensusLanguageRow';
import { LanguageData } from '@entities/language/LanguageTypes';

/**
* Load Google Translate language availability data.
* File format:
* Language Code\tLanguage\tLocale\tWriting System
*/
export async function loadGoogleTranslate(
getLanguage: (id: string) => LanguageData | undefined,
): Promise<void> {
await fetch('data/google/gtranslate.tsv')
.then((res) => res.text())
.then((text) => text.split('\n').filter((line) => line.trim() !== '' && !line.startsWith('#')))
.then((lines) => {
lines.forEach((line) => {
const parts = line.split('\t');
if (parts.length < 2) return;

const languageCodePath = (parts[0] ?? '').trim();
if (languageCodePath === '' || languageCodePath === 'Language Code') return;

const name = (parts[1] ?? '').trim();
const locale = (parts[2] ?? '').trim() || undefined;
const writingSystem = (parts[3] ?? '').trim() || undefined;
const languageCodes = languageCodePath.split('/');

languageCodes.forEach((code) => {
if (isIgnoredLanguageCode(code)) return;

const language = getLanguage(code);
if (!language) return;

if (!language.googleTranslate) language.googleTranslate = [];
language.googleTranslate.push({ languageCodePath, name, locale, writingSystem });
});
});
})
.catch((err) => console.error('Error loading Google Translate data:', err));
}
10 changes: 10 additions & 0 deletions src/widgets/tables/columns/LanguageDigitalSupportColumns.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import LanguageUDHRInfo, {
import { ObjectCLDRCoverageLevel, ObjectCLDRLocaleCount } from '@entities/ui/CLDRCoverageInfo';
import { CoverageLevelsExplanation } from '@entities/ui/CLDRCoverageLevels';
import CLDRWarningNotes from '@entities/ui/CLDRWarningNotes';
import GoogleTranslateSupportStatus from '@entities/ui/GoogleTranslateSupportStatus';
import ICUSupportStatus from '@entities/ui/ICUSupportStatus';
import {
WikipediaActiveUsers,
Expand Down Expand Up @@ -73,6 +74,15 @@ const columns: TableColumn<LanguageData>[] = [
render: (lang) => <HoverableEnumeration items={lang.keyboards?.map((kb) => kb.nameDisplay)} />,
field: Field.CountOfKeyboards,
},
{
key: 'Google Translate',
description: 'Language entries available in Google Translate.',
render: (lang) => <GoogleTranslateSupportStatus lang={lang} />,
exportValue: (lang) => {
if (!lang.googleTranslate || lang.googleTranslate.length === 0) return 'n/a';
return lang.googleTranslate.map((entry) => entry.name).join('; ');
},
},
{
key: 'Wikipedia Status',
render: (object) => (
Expand Down
Loading