From 7ae6ee97945837c572cd8331e4c98d0ee87f0153 Mon Sep 17 00:00:00 2001 From: Atalyk Akash Date: Thu, 29 Jan 2026 14:14:11 +0000 Subject: [PATCH] Add ADDRESS_HOME_ZIP_AND_CITY field to FR model --- model/countries/FR/FR-formatting-rules.yaml | 9 ++- model/countries/FR/FR-model.yaml | 15 +++- model/countries/FR/FR-parsing-rules.yaml | 78 ++++++++++++++++++++- 3 files changed, 97 insertions(+), 5 deletions(-) diff --git a/model/countries/FR/FR-formatting-rules.yaml b/model/countries/FR/FR-formatting-rules.yaml index ad486ce..d0820b4 100644 --- a/model/countries/FR/FR-formatting-rules.yaml +++ b/model/countries/FR/FR-formatting-rules.yaml @@ -4,9 +4,7 @@ formatting-rules: - separator: "\n" - locality2 - separator: "\n" - - postal-code - - separator: " " - - locality1 + - postal-code-and-city - separator: "\n" - admin-area1 - separator: "\n" @@ -24,6 +22,11 @@ formatting-rules: - separator: " " - street + postal-code-and-city: + - postal-code + - separator: " " + - locality1 + examples: - id: name comment: | diff --git a/model/countries/FR/FR-model.yaml b/model/countries/FR/FR-model.yaml index d5359c5..746ea68 100644 --- a/model/countries/FR/FR-model.yaml +++ b/model/countries/FR/FR-model.yaml @@ -13,6 +13,19 @@ cut-off-tokens: - locality4 extra-definitions: + address: + - street-address + - street-address-alternative-1 + - locality2 + - postal-code-and-city + - admin-area1 + - country + - country-name + street-address-alternative-1: - building-location - - address-overflow \ No newline at end of file + - address-overflow + + postal-code-and-city: + - postal-code + - locality1 \ No newline at end of file diff --git a/model/countries/FR/FR-parsing-rules.yaml b/model/countries/FR/FR-parsing-rules.yaml index c4b4387..b732f11 100644 --- a/model/countries/FR/FR-parsing-rules.yaml +++ b/model/countries/FR/FR-parsing-rules.yaml @@ -20,6 +20,9 @@ regex_definitions: kCommaOrNewlineSeparator: regex_fragment: '(?:, |\n|\r|,)+' + kZipValueRe: + regex_fragment: '\b\d{5}\b' + capture_definitions: ParseBuildingLocation: capture: @@ -47,6 +50,30 @@ capture_definitions: parts: [ {regex_fragment: '(?:[^\r\n]+)'} ] quantifier: MATCH_OPTIONAL + ParsePostalCodeThenCity: + capture: + output: postal-code-and-city + parts: + - capture: + output: postal-code + parts: [ {regex_reference: kZipValueRe} ] + - separator: {regex_reference: kCommaOrWhitespaceSeparator} + - capture: + output: locality1 + parts: [ {regex_reference: kMultipleWordsRe} ] + + ParseCityThenPostalCode: + capture: + output: postal-code-and-city + parts: + - capture: + output: locality1 + parts: [ {regex_reference: kMultipleWordsRe} ] + - separator: {regex_reference: kCommaOrWhitespaceSeparator} + - capture: + output: postal-code + parts: [ {regex_reference: kZipValueRe} ] + parsing_definitions: building-location: decomposition: @@ -54,6 +81,13 @@ parsing_definitions: street-address-alternative-1: decomposition: capture_reference: StreetAddressDecomposition + postal-code-and-city: + decomposition_cascade: + alternatives: + - decomposition: + capture_reference: ParsePostalCodeThenCity + - decomposition: + capture_reference: ParseCityThenPostalCode test_parsing_definitions: - id: "Test 1" @@ -155,4 +189,46 @@ test_parsing_definitions: building-location: "1661 Place Charles de Gaulle" street: "Place Charles de Gaulle" building: "1661" - address-overflow: "Floor 5, Apartment 2" \ No newline at end of file + address-overflow: "Floor 5, Apartment 2" +- id: "Test 13" + type: postal-code-and-city + input: "59491 Villeneuve-d'Ascq" + output: + postal-code-and-city: "59491 Villeneuve-d'Ascq" + postal-code: "59491" + locality1: "Villeneuve-d'Ascq" +- id: "Test 14" + type: postal-code-and-city + input: "Paris 75002" + output: + postal-code-and-city: "75002 Paris" + locality1: "Paris" + postal-code: "75002" +- id: "Test 15" + type: postal-code-and-city + input: "69120 Vaulx-en-Velin" + output: + postal-code-and-city: "69120 Vaulx-en-Velin" + postal-code: "69120" + locality1: "Vaulx-en-Velin" +- id: "Test 16" + type: postal-code-and-city + input: "Aix-en-Provence 13100" + output: + postal-code-and-city: "Aix-en-Provence 13100" + locality1: "Aix-en-Provence" + postal-code: "13100" +- id: "Test 17" + type: postal-code-and-city # Matches the key in parsing_definitions + input: "Val de Moder 67350" + output: + postal-code-and-city: "Val de Moder 67350" + locality1: "Val de Moder" + postal-code: "67350" +- id: "Test 18" + type: postal-code-and-city # Matches the key in parsing_definitions + input: "42000 St. Étienne" + output: + postal-code-and-city: "42000 St. Étienne" + locality1: "St. Étienne" + postal-code: "42000" \ No newline at end of file