From 971c2dfddd8027ed027765c4a18d5120fd28a27f Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Tue, 5 May 2026 14:16:04 -0600 Subject: [PATCH 1/2] feat(js): emit shipped-locales manifest and tighten the loader to it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Vite build plugin that scans `src/translations/` and emits `dist/supported-locales.json` — a sorted array of the locale tags we actually ship. The native iOS and Android sides consume this so the "what do we ship?" answer has exactly one source of truth. Also switches `loadTranslations` from a dynamic `import()` (which threw on a missing locale and fell back to English from the catch) to an `import.meta.glob` lookup that returns early with a warning when the tag isn't in the static map. Same exact-match-or-English behaviour, but the loader map is enumerable at build time so the failure mode is explicit rather than catch-driven. The native side now resolves consumer-supplied locales to a shipped tag before the value reaches JS, so the JS load path doesn't need its own resolver — anything not in the static glob is a bug upstream and falls back to English with a warn(). --- src/utils/localization.js | 22 +++++++++++++---- vite.config.js | 50 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/src/utils/localization.js b/src/utils/localization.js index 842f8eed..9595c4b2 100644 --- a/src/utils/localization.js +++ b/src/utils/localization.js @@ -11,6 +11,10 @@ import { warn, debug } from './logger'; const DEFAULT_LOCALE = 'en'; +// Vite statically enumerates the translation bundles at build time, so the +// loader map below is always in sync with what we actually ship. +const TRANSLATION_MODULES = import.meta.glob( '../translations/*.json' ); + /** * Initializes i18n support for the editor. * @@ -22,7 +26,11 @@ export async function configureLocale() { } /** - * Loads translations for the specified locale from the downloaded files. + * Loads translations for the specified locale from the bundled files. + * + * The native side is responsible for resolving consumer-supplied locales to a + * shipped tag before the value reaches JS. Anything that doesn't match a + * bundled translation falls back to English. * * @param {string} locale The locale to load translations for. * @@ -33,11 +41,17 @@ async function loadTranslations( locale ) { return; } + const loader = TRANSLATION_MODULES[ `../translations/${ locale }.json` ]; + if ( ! loader ) { + warn( + `Translations unavailable for locale "${ locale }". Falling back to English.` + ); + return; + } + try { debug( 'Loading translations for', locale ); - const { default: translations } = await import( - `../translations/${ locale }.json` - ); + const { default: translations } = await loader(); setLocaleData( translations ); } catch ( err ) { warn( diff --git a/vite.config.js b/vite.config.js index 970be3b6..c855406f 100644 --- a/vite.config.js +++ b/vite.config.js @@ -1,6 +1,9 @@ /** * External dependencies */ +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; import { defineConfig } from 'vite'; import react from '@vitejs/plugin-react'; import MagicString from 'magic-string'; @@ -30,7 +33,12 @@ export default defineConfig( { 'source-map-js': nodeModuleStub, }, }, - plugins: [ react(), wordPressExternals(), reactDevTools() ], + plugins: [ + react(), + wordPressExternals(), + reactDevTools(), + emitSupportedLocalesManifest(), + ], root: 'src', css: { preprocessorOptions: { @@ -199,6 +207,46 @@ function wordPressExternals() { }; } +/** + * Emit `supported-locales.json` to the build output. + * + * Scans `src/translations/` for `.json` files at build time and emits + * a single manifest listing every shipped locale tag. The native iOS and + * Android sides — and the JS-side resolver — all read this manifest so the + * "what do we actually ship?" answer has exactly one source of truth. + * + * @return {Object} Vite plugin configuration. + */ +function emitSupportedLocalesManifest() { + const translationsDir = path.resolve( + path.dirname( fileURLToPath( import.meta.url ) ), + 'src/translations' + ); + + function readSupportedLocales() { + if ( ! fs.existsSync( translationsDir ) ) { + return []; + } + return fs + .readdirSync( translationsDir ) + .filter( ( f ) => f.endsWith( '.json' ) ) + .map( ( f ) => f.replace( /\.json$/, '' ) ) + .sort(); + } + + return { + name: 'emit-supported-locales', + apply: 'build', + generateBundle() { + this.emitFile( { + type: 'asset', + fileName: 'supported-locales.json', + source: JSON.stringify( readSupportedLocales() ), + } ); + }, + }; +} + /** * Inject React Developer Tools connection script during development. * Only active when running the dev server, not in production builds. From 5d2d4ac3fee090c2e581ee33fa6d8390f5ff3d97 Mon Sep 17 00:00:00 2001 From: Jeremy Massel <1123407+jkmassel@users.noreply.github.com> Date: Tue, 5 May 2026 14:16:32 -0600 Subject: [PATCH 2/2] feat(android): resolve consumer locales against shipped translation bundles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an Android `LocaleResolver` and a new `EditorConfiguration.Builder.setLocale(locale: Locale)` that resolves against a compile-time-generated set of shipped locales before storing the tag for serialization. The previously-public `setLocale(String?)` overload is removed; an `internal` `@JvmSynthetic setLocaleTag(String?)` is reserved for `toBuilder` round-trip and tests. The set of shipped locales is generated into a Kotlin `internal object SupportedLocales` at build time from the JS-side manifest, so a missing manifest fails the gradle build instead of silently falling through to English at runtime. The Gradle task uses `JsonSlurper` to parse the manifest and validates each entry is a string; non-string entries fail the task with a clear message. ## Resolution chain For an input locale, normalised to lowercase with `_` → `-`: 1. Full tag (`xx-yy`) — match if shipped 2. Script-implied region for macrolanguages we ship disjoint regional bundles for (e.g. `zh-Hant-HK` → `zh-tw`, `zh-Hans` → `zh-cn`) 3. Language-only tag (`xx`) — match if shipped 4. Fall back to `en` Inputs are parsed as BCP-47 via `Locale.forLanguageTag`, so script- tagged inputs like `zh-Hans-CN` collapse to `zh-cn` rather than falling through to English. Variant and Unicode-extension subtags (e.g. `de-DE-u-ca-gregory`) are ignored — the editor doesn't vary translations by calendar or numbering system. Legacy ISO 639-1 codes that Android's `Locale` class still emits (`iw` → `he`, `in` → `id`, `no` → `nb`) are aliased to canonical bundle names before lookup, so Hebrew/Indonesian/Norwegian users on devices reporting the legacy codes don't silently land on English. ## Why a Locale and not a String A locale string is a lossy encoding of what the system actually knows. Android hands the consumer a `Locale` — language, region, script, variant, extensions — and any boundary that flattens that to a string before the library decodes it throws data away. Taking a `Locale` at the boundary keeps signal the string would have dropped: the script subtag lets `zh-Hant-HK` resolve to `zh-tw` instead of English, and Android's legacy ISO 639-1 codes get aliased to the canonical bundle names before lookup. ## Tests - Curated `LocaleResolverTest` covers the resolution chain (full-tag → script-implied region → language-only → `en` fallback, normalisation of `pt_BR` / `EN_GB` / etc., script subtags, legacy alias mapping). - A parameterised test asserts that every locale in the generated `SupportedLocales.ALL` resolves to itself — catches regressions where a locale gets added but the resolver mishandles it. Reads from the generated constant directly, so the test can never drift from what the resolver actually uses in production. - Builder-level integration test exercises `setLocale(Locale)` through to `config.locale` against the shipped manifest. `make test-android` now depends on `make build` so the manifest is populated before the exhaustive test runs. Refs #490. --- .github/workflows/codeql.yml | 15 ++ .gitignore | 1 + Makefile | 10 +- android/Gutenberg/build.gradle.kts | 92 +++++++++++ .../gutenberg/model/EditorConfiguration.kt | 35 ++++- .../gutenberg/model/LocaleResolver.kt | 114 ++++++++++++++ .../model/EditorConfigurationTest.kt | 41 ++++- .../gutenberg/model/GBKitGlobalTest.kt | 2 +- .../gutenberg/model/LocaleResolverTest.kt | 145 ++++++++++++++++++ 9 files changed, 440 insertions(+), 15 deletions(-) create mode 100644 android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/LocaleResolver.kt create mode 100644 android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/LocaleResolverTest.kt diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 68d2c692..07ab5aa0 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -33,6 +33,21 @@ jobs: with: languages: ${{ matrix.language }} + # The :Gutenberg Gradle build generates `SupportedLocales.kt` from + # `src/main/assets/supported-locales.json`, which is emitted by the + # JS build's Vite plugin. Without it, Autobuild fails before any + # Kotlin sources are extracted. + - name: Set up Node.js + if: matrix.language == 'java-kotlin' + uses: actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + + - name: Populate Android assets via JS build + if: matrix.language == 'java-kotlin' + run: make build + - name: Autobuild uses: github/codeql-action/autobuild@v3 diff --git a/.gitignore b/.gitignore index f876ce31..e6f70c7b 100644 --- a/.gitignore +++ b/.gitignore @@ -194,6 +194,7 @@ local.properties wp_com_oauth_credentials.json ## Production Build Products +/android/Gutenberg/src/main/assets/supported-locales.json /android/Gutenberg/src/main/assets/assets /android/Gutenberg/src/main/assets/index.html diff --git a/Makefile b/Makefile index a5144684..38ea2ab3 100644 --- a/Makefile +++ b/Makefile @@ -42,10 +42,10 @@ npm-dependencies: ## Install npm dependencies .PHONY: prep-translations prep-translations: ## Fetch and cache locale string files # Skip unless... -# - src/translations doesn't exist +# - src/translations doesn't contain any fetched bundles (only `.gitkeep` is committed) # - REFRESH_L10N is set to true or 1 # - prep-translations was invoked directly - @if [ ! -d "src/translations" ] || [ "$(REFRESH_L10N)" = "true" ] || [ "$(REFRESH_L10N)" = "1" ] || echo "$(MAKECMDGOALS)" | grep -q "^prep-translations$$"; then \ + @if [ -z "$$(find src/translations -maxdepth 1 -name '*.json' -print -quit 2>/dev/null)" ] || [ "$(REFRESH_L10N)" = "true" ] || [ "$(REFRESH_L10N)" = "1" ] || echo "$(MAKECMDGOALS)" | grep -q "^prep-translations$$"; then \ echo "--- :npm: Preparing Translations"; \ if ! npm run prep-translations -- --force; then \ if [ "$(STRICT_L10N)" = "true" ] || [ "$(STRICT_L10N)" = "1" ]; then \ @@ -56,7 +56,7 @@ prep-translations: ## Fetch and cache locale string files fi; \ fi; \ else \ - echo "--- :white_check_mark: Skipping translations fetch (src/translations already exists). Use REFRESH_L10N=1 to force refresh."; \ + echo "--- :white_check_mark: Skipping translations fetch (bundles already present in src/translations). Use REFRESH_L10N=1 to force refresh."; \ fi .PHONY: e2e-dependencies @@ -284,7 +284,7 @@ test-ios-e2e-dev: ## Run iOS E2E tests against the Vite dev server (must be runn | xcbeautify .PHONY: test-android -test-android: ## Run Android tests +test-android: build ## Run Android tests @echo "--- :android: Running Android Tests" ./android/gradlew -p ./android :gutenberg:test @@ -345,7 +345,7 @@ test-android-e2e-dev: ## Run Android E2E tests against the Vite dev server (must ./android/gradlew -p ./android :app:connectedDebugAndroidTest .PHONY: test-android-library-e2e -test-android-library-e2e: ## Run instrumented tests for the Gutenberg Android library module +test-android-library-e2e: build ## Run instrumented tests for the Gutenberg Android library module $(ENSURE_ANDROID_DEVICE) @echo "--- :android: Running Android Library Instrumented Tests" @mkdir -p android/Gutenberg/build/outputs/buildkite-logs diff --git a/android/Gutenberg/build.gradle.kts b/android/Gutenberg/build.gradle.kts index d8f839fb..d6157d30 100644 --- a/android/Gutenberg/build.gradle.kts +++ b/android/Gutenberg/build.gradle.kts @@ -1,3 +1,5 @@ +import groovy.json.JsonSlurper + plugins { alias(libs.plugins.android.library) alias(libs.plugins.jetbrains.kotlin.android) @@ -6,6 +8,82 @@ plugins { id("kotlin-parcelize") } +// Generates `SupportedLocales.kt` from the JS-build manifest so the set of +// shipped locales is checked at compile time, not at runtime. Run `make +// build` from the repo root to populate `src/main/assets/supported-locales.json` +// before assembling the library. +// +// Registered above `android { ... }` so the `main` source set can reference +// `generatedLocalesDir` directly. AGP's source-set DSL only accepts +// path-shaped notations (String / File / Path / Directory) for `srcDir`, +// so the task→consumer dependency is wired explicitly below for every +// consumer of the source set (compile + source jars for publishing). +val supportedLocalesManifest = layout.projectDirectory.file("src/main/assets/supported-locales.json") +val generatedLocalesDir = layout.buildDirectory.dir("generated/source/locales/main") + +val generateSupportedLocales = tasks.register("generateSupportedLocales") { + description = "Generates SupportedLocales.kt from the shipped translation manifest." + group = "build" + + // Use `inputs.files(...)` (plural) instead of `inputs.file(...)` so a + // missing manifest doesn't trip Gradle's strict input validation before + // our own error message can surface. + inputs.files(supportedLocalesManifest) + .withPropertyName("manifest") + .withPathSensitivity(PathSensitivity.RELATIVE) + outputs.dir(generatedLocalesDir) + + doFirst { + if (!supportedLocalesManifest.asFile.exists()) { + throw GradleException( + "supported-locales.json is missing from src/main/assets/. " + + "Run `make build` from the repo root to populate translation " + + "assets before assembling the :Gutenberg library." + ) + } + } + + doLast { + val manifest = supportedLocalesManifest.asFile + + val parsed = JsonSlurper().parse(manifest) as? List<*> + ?: throw GradleException( + "supported-locales.json is not a JSON array. Re-run `make build`." + ) + val locales = parsed.map { + it as? String ?: throw GradleException( + "supported-locales.json contains a non-string entry: $it. Re-run `make build`." + ) + }.sorted() + + if (locales.isEmpty()) { + // An empty manifest typically means `make prep-translations` was + // skipped or failed silently — `src/translations/` only had + // `.gitkeep` when Vite scanned it. Without this guard the + // library ships with a runtime-empty `SupportedLocales`, which + // resolves every locale to English. + throw GradleException( + "supported-locales.json is empty. Run `make prep-translations REFRESH_L10N=1` " + + "from the repo root, then `make build`, before assembling the :Gutenberg library." + ) + } + + val outDir = generatedLocalesDir.get().asFile + .resolve("org/wordpress/gutenberg/model") + .also { it.mkdirs() } + outDir.resolve("SupportedLocales.kt").writeText(buildString { + appendLine("// Generated by :Gutenberg:generateSupportedLocales — do not edit.") + appendLine("package org.wordpress.gutenberg.model") + appendLine() + appendLine("internal object SupportedLocales {") + appendLine(" val ALL: Set = setOf(") + locales.forEach { appendLine(" \"$it\",") } + appendLine(" )") + appendLine("}") + }) + } +} + android { namespace = "org.wordpress.gutenberg" compileSdk = 34 @@ -50,6 +128,9 @@ android { } sourceSets { + getByName("main") { + java.srcDir(generatedLocalesDir) + } getByName("androidTest") { // Make shared test fixtures available as assets for instrumented tests. assets.srcDir(rootProject.file("../test-fixtures")) @@ -108,3 +189,14 @@ project.afterEvaluate { } } } + +// Wire the generator into every task that reads the `main` source set's +// sources: Kotlin compilation and the source-jar tasks AGP creates for the +// maven publication. AGP's source-set DSL only accepts a path string for +// `srcDir`, so the dependency can't be inferred from the source set itself. +tasks.matching { + val name = it.name + (name.startsWith("compile") && name.endsWith("Kotlin")) || + (name.startsWith("source") && name.endsWith("Jar")) +}.configureEach { dependsOn(generateSupportedLocales) } + diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/EditorConfiguration.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/EditorConfiguration.kt index acf34390..37326d48 100644 --- a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/EditorConfiguration.kt +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/EditorConfiguration.kt @@ -4,6 +4,7 @@ import android.os.Parcelable import kotlinx.parcelize.IgnoredOnParcel import kotlinx.parcelize.Parcelize import java.net.URI +import java.util.Locale import java.util.UUID @Parcelize @@ -96,7 +97,37 @@ data class EditorConfiguration( fun setNamespaceExcludedPaths(namespaceExcludedPaths: Array) = apply { this.namespaceExcludedPaths = namespaceExcludedPaths } fun setAuthHeader(authHeader: String) = apply { this.authHeader = authHeader } fun setEditorSettings(editorSettings: String?) = apply { this.editorSettings = editorSettings } - fun setLocale(locale: String?) = apply { this.locale = locale } + /** + * Stores [locale] verbatim without running the resolver. Reserved for + * `toBuilder` round-trip and tests — consumers should always go + * through [setLocale] with a [Locale]. + */ + @JvmSynthetic + internal fun setLocaleTag(locale: String?) = apply { this.locale = locale } + + /** + * Resolves [locale] against the bundled translations and stores the + * resulting tag for serialization. + * + * The resolution chain tries, in order: + * 1. exact `language-region` (e.g. `pt-BR` → `pt-br`) + * 2. `language-` for macrolanguages we ship + * disjoint regional bundles for (e.g. `zh-Hant-HK` → `zh-tw`) + * 3. `language` only (e.g. `fr-CA` → `fr`) + * 4. `en` + * + * Legacy ISO 639-1 codes that Android's `Locale` class still emits + * (`iw` for Hebrew, `in` for Indonesian, `no` for Norwegian Bokmål) + * are mapped to canonical bundle names before lookup. + * + * Languages for which no bundle ships at all silently resolve to + * `en`. The resolver does not log or signal the fallback — consumers + * expecting coverage for a specific language should verify the build + * manifest includes it. + */ + fun setLocale(locale: Locale) = apply { + this.locale = LocaleResolver.Default.resolve(locale) + } fun setCookies(cookies: Map) = apply { this.cookies = cookies } fun setEnableAssetCaching(enableAssetCaching: Boolean) = apply { this.enableAssetCaching = enableAssetCaching } fun setCachedAssetHosts(cachedAssetHosts: Set) = apply { this.cachedAssetHosts = cachedAssetHosts } @@ -150,7 +181,7 @@ data class EditorConfiguration( .setNamespaceExcludedPaths(namespaceExcludedPaths) .setAuthHeader(authHeader) .setEditorSettings(editorSettings) - .setLocale(locale) + .setLocaleTag(locale) .setCookies(cookies) .setEnableAssetCaching(enableAssetCaching) .setCachedAssetHosts(cachedAssetHosts) diff --git a/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/LocaleResolver.kt b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/LocaleResolver.kt new file mode 100644 index 00000000..7185ba49 --- /dev/null +++ b/android/Gutenberg/src/main/java/org/wordpress/gutenberg/model/LocaleResolver.kt @@ -0,0 +1,114 @@ +package org.wordpress.gutenberg.model + +import java.util.Locale + +/** + * Resolves an arbitrary locale tag to one of the bundles GutenbergKit + * actually ships translations for. + * + * Consumers historically hand [EditorConfiguration] an opaque locale string + * — on Android, often the output of [Locale.getLanguage], which strips the + * region. The editor then silently falls back to English whenever the tag + * doesn't match a shipped `translations/.json` file exactly. The + * resolver moves that decision into the library, so a device configured for + * `pt_BR` ends up with the Brazilian Portuguese bundle — and a tag like + * `nl-BE`, for which we don't ship a regional bundle, falls back to `nl` + * instead of all the way to English. + * + * Resolution chain for an input locale: + * 1. `language-region` + * 2. `language-` (e.g. `zh-Hant-HK` → `zh-tw`) + * 3. `language` + * 4. `en` + * + * Inputs are parsed as BCP-47 via [Locale.forLanguageTag], so script-tagged + * inputs like `zh-Hans-CN` collapse to `zh-cn` rather than falling through + * to English. Underscore-separated identifiers (`pt_BR`, `EN_GB`) are + * pre-normalised to dashes before parsing. Legacy ISO 639-1 codes that + * Android's `Locale` class still emits (`iw` for Hebrew, `in` for + * Indonesian, `no` for Norwegian Bokmål) are mapped to their canonical + * equivalents before lookup. Variant and Unicode-extension subtags (e.g. + * `de-DE-u-ca-gregory`) are ignored — the editor doesn't vary translations + * by calendar or numbering system. + * + * The supported set is generated at build time from the JS build manifest + * (see `:Gutenberg:generateSupportedLocales`), so the resolver and the + * shipped bundles cannot drift. + */ +internal class LocaleResolver(supportedLocales: Collection) { + private val supportedLocales: Set = + supportedLocales.map { normalize(it) }.toSet() + + constructor() : this(SupportedLocales.ALL) + + /** + * Resolves a string locale tag against the shipped translation bundles. + * + * Accepts BCP-47 tags (`pt-BR`, `zh-Hant-HK`) and the underscore-separated + * variant Android's platform APIs emit (`pt_BR`). Inputs that aren't valid + * BCP-47 — POSIX locales like `pt_BR.UTF-8`, anything `Locale.forLanguageTag` + * can't parse to a non-empty language — fall back to `en`. + */ + fun resolve(tag: String?): String { + if (tag.isNullOrEmpty()) return DEFAULT_LOCALE + // Java's BCP-47 parser uses '-'; pre-normalise '_' so platform-native + // identifiers like `pt_BR` parse cleanly. + return resolve(Locale.forLanguageTag(tag.replace('_', '-'))) + } + + /** Resolves a [Locale] against the shipped translation bundles. */ + fun resolve(locale: Locale): String { + val rawLanguage = locale.language.lowercase(Locale.ROOT) + if (rawLanguage.isEmpty()) return DEFAULT_LOCALE + val language = LANGUAGE_ALIASES[rawLanguage] ?: rawLanguage + + val region = locale.country.lowercase(Locale.ROOT) + if (region.isNotEmpty()) { + val full = "$language-$region" + if (supportedLocales.contains(full)) return full + } + + // For macrolanguages where we ship disjoint regional bundles only + // (e.g. `zh-cn`/`zh-tw` with no language-only `zh`), fall back to a + // script-implied region before the language-only step. Without this, + // `zh-Hant-HK` and bare `zh-Hans` end up at English even though the + // script subtag clearly indicates Traditional/Simplified intent. + val script = locale.script.lowercase(Locale.ROOT) + if (script.isNotEmpty()) { + val implied = scriptImpliedTag(language, script) + if (implied != null && supportedLocales.contains(implied)) return implied + } + + if (supportedLocales.contains(language)) return language + + return DEFAULT_LOCALE + } + + companion object { + // Reused by `EditorConfiguration.Builder.setLocale` so the + // supported-set HashSet isn't rebuilt on every call. + val Default: LocaleResolver = LocaleResolver() + + private const val DEFAULT_LOCALE = "en" + + // Android's `Locale` class still emits the legacy ISO 639-1 codes for + // Hebrew (`iw`) and Indonesian (`in`) for backward compat, and the + // deprecated `no` macrolanguage tag survives in some configurations + // for the Bokmål bundle we ship as `nb`. Translate before lookup so + // users on those devices don't silently land on English. + private val LANGUAGE_ALIASES = mapOf( + "iw" to "he", + "in" to "id", + "no" to "nb", + ) + + private fun normalize(tag: String): String = + tag.lowercase(Locale.ROOT).replace('_', '-') + + private fun scriptImpliedTag(language: String, script: String): String? = when { + language == "zh" && script == "hans" -> "zh-cn" + language == "zh" && script == "hant" -> "zh-tw" + else -> null + } + } +} diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/EditorConfigurationTest.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/EditorConfigurationTest.kt index 28d1be8b..8bea6263 100644 --- a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/EditorConfigurationTest.kt +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/EditorConfigurationTest.kt @@ -203,14 +203,41 @@ class EditorConfigurationBuilderTest { } @Test - fun `setLocale updates locale`() { + fun `setLocaleTag stores tag verbatim`() { val config = builder() - .setLocale("fr_FR") + .setLocaleTag("fr_FR") .build() assertEquals("fr_FR", config.locale) } + @Test + fun `setLocale runs the resolver against the shipped manifest`() { + // Direct match — `pt-br` is shipped. + assertEquals( + "pt-br", + builder().setLocale(java.util.Locale("pt", "BR")).build().locale + ) + + // Language-only fallback — `fr` ships, `fr-ca` does not. + assertEquals( + "fr", + builder().setLocale(java.util.Locale("fr", "CA")).build().locale + ) + + // English fallback — `xx` is not a real language and isn't shipped. + assertEquals( + "en", + builder().setLocale(java.util.Locale("xx")).build().locale + ) + + // Script subtags strip cleanly: `zh-Hans-CN` → `zh-cn`. + assertEquals( + "zh-cn", + builder().setLocale(java.util.Locale.forLanguageTag("zh-Hans-CN")).build().locale + ) + } + @Test fun `setCookies updates cookies`() { val cookies = mapOf("session" to "abc123") @@ -287,7 +314,7 @@ class EditorConfigurationBuilderTest { .setPostId(456u) .setPlugins(true) .setThemeStyles(true) - .setLocale("de_DE") + .setLocaleTag("de_DE") .setEnableNetworkLogging(true) .build() @@ -331,7 +358,7 @@ class EditorConfigurationBuilderTest { .setNamespaceExcludedPaths(arrayOf("/excluded")) .setAuthHeader("Bearer roundtrip") .setEditorSettings("""{"roundtrip":true}""") - .setLocale("es_ES") + .setLocaleTag("es_ES") .setCookies(mapOf("roundtrip" to "cookie")) .setEnableAssetCaching(true) .setCachedAssetHosts(setOf("cdn.example.com")) @@ -717,11 +744,11 @@ class EditorConfigurationTest { @Test fun `Configurations with different locale are not equal`() { val config1 = builder() - .setLocale("en_US") + .setLocaleTag("en_US") .build() val config2 = builder() - .setLocale("fr_FR") + .setLocaleTag("fr_FR") .build() assertNotEquals(config1, config2) @@ -913,7 +940,7 @@ class EditorConfigurationTest { .setNamespaceExcludedPaths(arrayOf("users")) .setAuthHeader("Bearer token") .setEditorSettings("""{"foo":"bar"}""") - .setLocale("fr") + .setLocaleTag("fr") .setCookies(mapOf("session" to "abc123")) .setEnableAssetCaching(true) .setCachedAssetHosts(setOf("example.com", "cdn.example.com")) diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/GBKitGlobalTest.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/GBKitGlobalTest.kt index f49ec165..c1f4b50e 100644 --- a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/GBKitGlobalTest.kt +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/GBKitGlobalTest.kt @@ -61,7 +61,7 @@ class GBKitGlobalTest { .setPlugins(shouldUsePlugins) .setThemeStyles(shouldUseThemeStyles) .setHideTitle(hideTitle) - .setLocale(locale) + .setLocaleTag(locale) .setAuthHeader(authHeader) .setSiteApiNamespace(siteApiNamespace) .setNamespaceExcludedPaths(namespaceExcludedPaths) diff --git a/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/LocaleResolverTest.kt b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/LocaleResolverTest.kt new file mode 100644 index 00000000..664d821b --- /dev/null +++ b/android/Gutenberg/src/test/java/org/wordpress/gutenberg/model/LocaleResolverTest.kt @@ -0,0 +1,145 @@ +package org.wordpress.gutenberg.model + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Test +import java.util.Locale + +class LocaleResolverTest { + + // Stand-in for the manifest emitted at build time. Mirrors the real + // supported set closely enough to exercise both fallback steps. + private val resolver = LocaleResolver( + listOf( + "de", "en-gb", "es", "es-ar", "fr", "nl", "nl-be", + "pt", "pt-br", "zh-cn", "zh-tw" + ) + ) + + @Test + fun `null and empty input fall back to English`() { + assertEquals("en", resolver.resolve(null)) + assertEquals("en", resolver.resolve("")) + } + + @Test + fun `full normalized tag is returned when shipped`() { + assertEquals("pt-br", resolver.resolve("pt-br")) + assertEquals("pt-br", resolver.resolve("pt-BR")) + assertEquals("pt-br", resolver.resolve("pt_BR")) + assertEquals("en-gb", resolver.resolve("EN_GB")) + assertEquals("zh-cn", resolver.resolve("zh-CN")) + } + + @Test + fun `falls back to language-only tag when the regional bundle is absent`() { + // `fr-CA` not shipped, but `fr` is. + assertEquals("fr", resolver.resolve("fr-CA")) + // `de-AT` not shipped, but `de` is. + assertEquals("de", resolver.resolve("de-AT")) + } + + @Test + fun `falls back to English when neither full nor language match`() { + // We ship `zh-cn`/`zh-tw` but no language-only `zh`. This is the + // real-world footgun the Brazilian/Chinese examples in issue 490 + // describe — `Locale#getLanguage` returns just `zh`, which has + // historically dropped users into the English bundle. + assertEquals("en", resolver.resolve("zh")) + assertEquals("en", resolver.resolve("xx-yy")) + } + + @Test + fun `resolves Locale values via language and region`() { + assertEquals("pt-br", resolver.resolve(Locale("pt", "BR"))) + assertEquals("fr", resolver.resolve(Locale("fr", "CA"))) + assertEquals("zh-cn", resolver.resolve(Locale.SIMPLIFIED_CHINESE)) + // The footgun this issue fixes: WP-Android historically passed + // `Locale.getLanguage()` (just `zh`), which dropped Chinese users + // into English. The resolver still falls back to `en` for that + // bare tag because we ship no language-only `zh` bundle — but + // consumers who pass the full `Locale` now get `zh-cn`. + assertEquals("en", resolver.resolve(Locale("zh"))) + } + + @Test + fun `script subtags are stripped before matching`() { + // `LocaleListCompat` and `Locale.forLanguageTag` callers can produce + // script-tagged inputs. Without explicit handling these lowercase to + // `zh-hans-cn`, miss the supported set, and fall through to English + // despite a `zh-cn` bundle being available. + assertEquals("zh-cn", resolver.resolve("zh-Hans-CN")) + assertEquals("zh-tw", resolver.resolve("zh-Hant-TW")) + assertEquals("zh-cn", resolver.resolve(Locale.forLanguageTag("zh-Hans-CN"))) + assertEquals("zh-tw", resolver.resolve(Locale.forLanguageTag("zh-Hant-TW"))) + } + + @Test + fun `script subtag implies region when language-region and language are absent`() { + // We ship `zh-cn` and `zh-tw` but no language-only `zh`. Without a + // script-aware fallback, Hong Kong and Macau Traditional Chinese + // users (`zh-Hant-HK` / `zh-Hant-MO`) silently land on English even + // though `Hant` clearly indicates Traditional Chinese. + assertEquals("zh-tw", resolver.resolve("zh-Hant-HK")) + assertEquals("zh-tw", resolver.resolve("zh-Hant-MO")) + assertEquals("zh-tw", resolver.resolve(Locale.forLanguageTag("zh-Hant-HK"))) + assertEquals("zh-tw", resolver.resolve(Locale.forLanguageTag("zh-Hant-MO"))) + + // Bare `zh-Hans` / `zh-Hant` with no region still implies a bundle. + assertEquals("zh-cn", resolver.resolve("zh-Hans")) + assertEquals("zh-tw", resolver.resolve("zh-Hant")) + assertEquals("zh-cn", resolver.resolve(Locale.forLanguageTag("zh-Hans"))) + assertEquals("zh-tw", resolver.resolve(Locale.forLanguageTag("zh-Hant"))) + } + + @Test + fun `legacy ISO 639-1 codes are aliased to canonical bundles`() { + // Android's `Locale` class emits the legacy codes for Hebrew (`iw`) + // and Indonesian (`in`) — both for `Locale(String)` and for tags + // round-tripped through `Locale.forLanguageTag`. Without the alias + // map, every Hebrew or Indonesian device that hits this resolver + // via the system Locale falls back to English despite shipping the + // bundles. + val aliasResolver = LocaleResolver(listOf("he", "id", "nb")) + + assertEquals("he", aliasResolver.resolve("iw")) + assertEquals("he", aliasResolver.resolve("iw-IL")) + assertEquals("he", aliasResolver.resolve(Locale("iw", "IL"))) + + assertEquals("id", aliasResolver.resolve("in")) + assertEquals("id", aliasResolver.resolve("in-ID")) + assertEquals("id", aliasResolver.resolve(Locale("in", "ID"))) + + // Norwegian macrolanguage `no` falls through to the Bokmål bundle. + assertEquals("nb", aliasResolver.resolve("no")) + assertEquals("nb", aliasResolver.resolve(Locale("no"))) + } + + @Test + fun `variant and extension subtags are ignored`() { + // Calendar and other Unicode extensions shouldn't influence which + // bundle ships — the editor doesn't vary translations by calendar. + assertEquals("de", resolver.resolve("de-DE-u-ca-gregory")) + assertEquals("pt-br", resolver.resolve("pt-BR-u-nu-latn")) + } + + // Exhaustive coverage of the shipped manifest. Each tag must resolve to + // itself — no normalisation tricks, no accidental fallbacks. The set is + // generated from the JS build manifest at compile time, so a missing + // manifest fails the build long before we get here. + @Test + fun `every shipped locale resolves to itself`() { + assertTrue( + "SupportedLocales.ALL is empty — generator should have failed the build", + SupportedLocales.ALL.isNotEmpty() + ) + + SupportedLocales.ALL.forEach { locale -> + assertEquals( + "Shipped locale '$locale' should resolve to itself", + locale, + LocaleResolver.Default.resolve(locale) + ) + } + } +}