Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -376,20 +376,20 @@ describe("getLanguageBySubtag", () => {
it("should find languages by valid languageSubtag field", () => {
expect(getLanguageBySubtag("aaa")?.exonym).toEqual("Ghotuo");
expect(getLanguageBySubtag("ab")?.exonym).toEqual("Abkhaz");
expect(getLanguageBySubtag("uz")?.exonym).toEqual("Uzbek");
expect(getLanguageBySubtag("mg")?.iso639_3_code).toEqual("plt");
expect(getLanguageBySubtag("uz")?.iso639_3_code).toEqual("uzb");
expect(getLanguageBySubtag("mg")?.iso639_3_code).toEqual("mlg");
expect(getLanguageBySubtag("zh")?.exonym).toEqual("Chinese");
expect(getLanguageBySubtag("za")?.exonym).toEqual("Zhuang");
expect(getLanguageBySubtag("bnc")?.iso639_3_code).toEqual("lbk");
expect(getLanguageBySubtag("bnc")?.iso639_3_code).toEqual("bnc");
expect(getLanguageBySubtag("no")?.exonym).toEqual("Norwegian");
expect(getLanguageBySubtag("sh")?.iso639_3_code).toEqual("hbs");
expect(getLanguageBySubtag("hbs")?.iso639_3_code).toEqual("hbs");
expect(getLanguageBySubtag("sa")?.exonym).toEqual("Sanskrit");
expect(getLanguageBySubtag("zap")?.exonym).toEqual("Zapotec");
expect(getLanguageBySubtag("ik")?.iso639_3_code).toEqual("esk");
expect(getLanguageBySubtag("ik")?.iso639_3_code).toEqual("ipk");
expect(getLanguageBySubtag("id")?.exonym).toEqual("Indonesian");
expect(getLanguageBySubtag("ja")?.exonym).toEqual("Japanese");
expect(getLanguageBySubtag("yi")?.autonym).toEqual("יידיש");
expect(getLanguageBySubtag("luy")?.iso639_3_code).toEqual("bxk");
expect(getLanguageBySubtag("luy")?.iso639_3_code).toEqual("luy");
});
it("should find languages using the defaultSearchResultModifier", () => {
expect(
Expand All @@ -401,46 +401,75 @@ describe("getLanguageBySubtag", () => {
// The exonym for uz gets demarcated as [Uz]bek, so have to check different field
expect(
getLanguageBySubtag("uz", defaultSearchResultModifier)?.iso639_3_code
).toEqual("uzn");
).toEqual("uzb");
expect(
getLanguageBySubtag("mg", defaultSearchResultModifier)?.iso639_3_code
).toEqual("plt");
).toEqual("mlg");
const chineseResult = getLanguageBySubtag(
"zh",
defaultSearchResultModifier
);
expect(chineseResult?.exonym).toEqual("Chinese");
expect(chineseResult?.names.length).toBeGreaterThan(3);
expect(
getLanguageBySubtag("zh", defaultSearchResultModifier)?.exonym
).toEqual("Chinese");
getLanguageBySubtag("es", defaultSearchResultModifier)?.exonym
).toEqual("Spanish");
expect(
getLanguageBySubtag("za", defaultSearchResultModifier)?.exonym
).toEqual("Zhuang");
expect(
getLanguageBySubtag("bnc", defaultSearchResultModifier)?.iso639_3_code
).toEqual("lbk");
).toEqual("bnc");
expect(
getLanguageBySubtag("no", defaultSearchResultModifier)?.exonym
).toEqual("Norwegian");
expect(
getLanguageBySubtag("sh", defaultSearchResultModifier)?.iso639_3_code
).toEqual("hbs");
expect(
getLanguageBySubtag("sa", defaultSearchResultModifier)?.exonym
).toEqual("Sanskrit");
getLanguageBySubtag("hbs", defaultSearchResultModifier)?.iso639_3_code
).toEqual("hbs");
expect(
getLanguageBySubtag("zap", defaultSearchResultModifier)?.exonym
).toEqual("Zapotec");
expect(
getLanguageBySubtag("ik", defaultSearchResultModifier)?.iso639_3_code
).toEqual("esk");
).toEqual("ipk");
expect(
getLanguageBySubtag("id", defaultSearchResultModifier)?.exonym
).toEqual("Indonesian");
expect(
getLanguageBySubtag("ja", defaultSearchResultModifier)?.exonym
).toEqual("Japanese");
expect(
getLanguageBySubtag("yi", defaultSearchResultModifier)?.autonym
getLanguageBySubtag("ydd", defaultSearchResultModifier)?.autonym
).toEqual("יידיש");
expect(
getLanguageBySubtag("yi", defaultSearchResultModifier)?.exonym
).toEqual("Yiddish");
expect(
getLanguageBySubtag("luy", defaultSearchResultModifier)?.iso639_3_code
).toEqual("bxk");
).toEqual("luy");
expect(
getLanguageBySubtag("ak", defaultSearchResultModifier)?.exonym
).toEqual("Akan");
expect(
getLanguageBySubtag("sa", defaultSearchResultModifier)?.exonym
).toEqual("Sanskrit");

// Because we only demarcate one part of the card, it just so happens that only in these couple cases do we find
// demarcation on the field we are testing.
// Enhance: strip demarcation in all the tests in this group
expect(
stripDemarcation(
getLanguageBySubtag("aka", defaultSearchResultModifier)?.exonym
)
).toEqual("Akan");
expect(
stripDemarcation(
getLanguageBySubtag("san", defaultSearchResultModifier)?.exonym
)
).toEqual("Sanskrit");
});
it("should use searchResultModifier if provided", () => {
const foobar = "foobar";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,19 @@ describe("Tag parsing", () => {
expect(uznResult?.language?.iso639_3_code).toEqual("uzn");
expect(uznResult?.script?.name).toEqual("Latin");
});

it("should handle normal tags for Chinese correctly", () => {
const zhCnResult = parseLangtagFromLangChooser("zh-CN");
expect(zhCnResult?.language?.exonym).toEqual("Chinese");
expect(zhCnResult?.language?.names.length).toBeGreaterThan(3);
expect(zhCnResult?.script?.code).toEqual("Hans");

// should be case insensitive
const zhTwResult = parseLangtagFromLangChooser("zH-TW");
expect(zhTwResult?.language?.exonym).toEqual("Chinese");
expect(zhTwResult?.language?.names.length).toBeGreaterThan(3);
expect(zhTwResult?.script?.code).toEqual("Hant");
});
});

describe("defaultRegionForLangTag", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,53 +98,23 @@ export function getLanguageBySubtag(
searchString: string
) => ILanguage[]
): ILanguage | undefined {
const languages = rawLanguages as ILanguage[];
/* If the code is used for both a macrolanguage and the representative language (macrolanguageNotes.md),
return the representative language by default (BL-14824). */
const macrolanguageRepFuse = new Fuse(languages as ILanguage[], {
keys: [
"parentMacrolanguage.languageSubtag",
"parentMacrolanguage.iso639_3_code",
"isRepresentativeForMacrolanguage",
],
// For the Chinese special case, we actually want the card with iso639_3_code cmn (the individual language card)
// because it has all the data on it. At this stage it doesn't have the tag zh which we actually want; that will be
// handled
// by the searchResultModifier
const correctedCode = code.toLowerCase() === "zh" ? "cmn" : code;
const fuse = new Fuse(rawLanguages as ILanguage[], {
keys: ["languageSubtag", "iso639_3_code"],
threshold: 0, // exact matches only
useExtendedSearch: true,
});
let rawResults = macrolanguageRepFuse.search({
$and: [
{
$or: [
{ "parentMacrolanguage.languageSubtag": "=" + code },
{ "parentMacrolanguage.iso639_3_code": "=" + code },
],
},
{ isRepresentativeForMacrolanguage: "=true" },
],
});

if (rawResults.length > 1)
console.error(
"Unexpectedly found multiple representative languages for " +
code +
": " +
rawResults.map((r) => r.item.iso639_3_code).join(", ")
);

/* If search for code didn't find exactly one representative language for a macrolanguage,
do normal language search instead */
if (rawResults.length !== 1) {
const fuse = new Fuse(languages as ILanguage[], {
keys: ["languageSubtag", "iso639_3_code"],
threshold: 0, // exact matches only
useExtendedSearch: true,
});
rawResults = fuse.search("=" + code); // exact match
}

const result = rawResults[0]?.item;
const rawResults = fuse.search(`="${correctedCode}"`);
return searchResultModifier
? searchResultModifier([result], code)[0]
: result;
? searchResultModifier(
rawResults.map((r) => r.item),
code
)[0]
: rawResults[0]?.item;
}

// This is not a comprehensive language tag parser. It's just built to parse the
Expand Down
Loading