diff --git a/docs/model/PL.html b/docs/model/PL.html
index 3c5f1db..9044bfa 100644
--- a/docs/model/PL.html
+++ b/docs/model/PL.html
@@ -709,6 +709,23 @@
Overview of concepts
+
+
+
+
+
+
address-overflow
+ - An overflow field for information that is not captured differently in a form
+
+
+
+
+
+
+
+
+
+
@@ -1388,6 +1405,8 @@ Example addresses
+
+
@@ -1479,6 +1498,12 @@ Example addresses
+
+ | address-overflow | Nějaké doplňující informace |
+
+
+
+
| locality1 | Warsaw |
@@ -1556,6 +1581,7 @@ Example addresses
Output for "address":
ul. Warsaw 9/10
+Nějaké doplňující informace
01-001 Warsaw
Polska
@@ -1636,6 +1662,12 @@ Example addresses
+
+ | address-overflow | floor 5 apt 7 |
+
+
+
+
| locality1 | Warsaw |
@@ -1713,6 +1745,7 @@ Example addresses
Output for "address":
ul. Warsaw 9
+floor 5 apt 7
01-001 Warsaw
Polska
@@ -3715,7 +3748,7 @@ Formatting:
Flattened formatting:
address =
-street␣building/unit-typeunit-name
postal-code␣locality1
country-name
+street␣building/unit-typeunit-name
address-overflow
postal-code␣locality1
country-name
@@ -3980,12 +4013,26 @@ Parsing:
+
+ Decomposition Cascade
+
+
+
+ Cascade:
+
+
+ -
+
+
Decomposition
+ (ParseStreetAddressWithOverflow)
+
Anchor beginning: True
+ Capture Reference: ParseStreetAddressWithOverflow
@@ -4324,6 +4371,46 @@ Parsing:
+
+
+
+
+
+ -
+
+
+Regex Fragment:
\n
+
+
+
+
+ -
+
+
+
+
+
+ Capture address-overflow
+
+ (MATCH_REQUIRED)
+
+
+
+ Parts:
+
+
+ -
+
+
+Regex Fragment:
[\s\S]+
+
+
+
+
+
+
+
+
@@ -4342,6 +4429,382 @@ Parsing:
+
+
+
+
+
+
+ Decomposition
+
+ (ParseStreetAddressWithoutOverflow)
+
+
+ Anchor beginning: True
+
+ Capture Reference: ParseStreetAddressWithoutOverflow
+
+
+
+
+
+
+ Capture street-address-alternative-1
+
+ (MATCH_REQUIRED)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+ Capture Reference: ParseBuildingLocation
+
+
+
+
+
+
+
+ Capture building-location
+
+ (MATCH_REQUIRED)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+
+
+ Capture street
+
+ (MATCH_REQUIRED)
+
+
+ Prefix:
+
+
+
+
+ Regex Reference: kStreetOptionalPrefixRe =>
+ (?:(?:ulica|ul\.?|aleja|al\.?|plac|pl\.?|skwer|rondo|osiedle|boczna|bulwar|droga|rynek|szosa|zaulek)\s*)?
+
+
+
+
+
+
+ Parts:
+
+
+ -
+
+
+
+ Regex Reference:
kMultipleLazyWordsRe =>
+ (?:[^\s,]+(?:[^\S\r\n]+[^\s,]+)*?)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+Separator: Regex Reference
kWhitespaceSeparator =>
+ (?:^|\s+)
+
+
+
+
+
+ -
+
+
+
+
+
+ Capture building-and-unit
+
+ (MATCH_REQUIRED)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+
+
+ Capture building
+
+ (MATCH_REQUIRED)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+
+ Regex Reference: kBuildingValueRe
+
+
+
+ Regex concatenation
+
+
+ -
+
+
+Regex Fragment:
\d+
+
+
+
+
+ -
+
+
+Regex Fragment:
(?:
+
+
+
+
+ -
+
+
+Regex Fragment:
\s*[[:alpha:]]\b
+
+
+
+
+ -
+
+
+Regex Fragment:
)?
+
+
+
+
+
+ Wrap as non-capture group: False
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+Separator: Regex Reference
kHouseNumberAndUnitSeparator =>
+ (?:^|[/\s]+)
+
+
+
+
+
+ -
+
+
+
+ Capture Reference: ParseUnitWithOptionalPrefix
+
+
+
+
+
+
+
+ Capture unit
+
+ (MATCH_OPTIONAL)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+
+
+ Capture unit-type
+
+ (MATCH_OPTIONAL)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+ Regex Reference:
kUnitTypeLiteralRe =>
+ (?:mieszkanie|m\.?|lokal|lok\.?|apartment|apt\.?)?
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+Separator:
+
+
+Regex Fragment:
\s*
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+ Capture unit-name
+
+ (MATCH_REQUIRED)
+
+
+
+ Parts:
+
+
+ -
+
+
+
+ Regex Reference:
kUnitNameValueRe =>
+ (?:\d+\w?\b|\w\b)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Anchor end: True
+
+
+
+
+
+
+
+
+
@@ -4355,6 +4818,11 @@ Children:
building-location
+
+
+ address-overflow
+
+
@@ -4367,13 +4835,13 @@ Children:
Formatting:
street-address-alternative-1 =
-building-location
+building-location⏎address-overflow
Flattened formatting:
street-address-alternative-1 =
-street␣building/unit-typeunit-name
+street␣building/unit-typeunit-name
address-overflow
@@ -5446,6 +5914,42 @@ Flattened formatting:
+
+
+
+
+
+
+
+
+
+ #
+
+ address-overflow
+
+
+
+An overflow field for information that is not captured differently in a form
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/model/countries/PL/PL-formatting-rules.yaml b/model/countries/PL/PL-formatting-rules.yaml
index 8d276c4..7c289f7 100644
--- a/model/countries/PL/PL-formatting-rules.yaml
+++ b/model/countries/PL/PL-formatting-rules.yaml
@@ -13,6 +13,8 @@ formatting-rules:
street-address-alternative-1:
- building-location
+ - separator: "\n"
+ - address-overflow
building-location:
- street
@@ -44,6 +46,7 @@ examples:
building: 9
unit: 10
building-and-unit: 9/10
+ address-overflow: Nějaké doplňující informace
locality1: Warsaw
postal-code: 01-001
country: PL
@@ -54,6 +57,7 @@ examples:
show: true
text: |
ul. Warsaw 9/10
+ Nějaké doplňující informace
01-001 Warsaw
Polska
@@ -64,6 +68,7 @@ examples:
street: ul. Warsaw
building: 9
building-and-unit: 9
+ address-overflow: floor 5 apt 7
locality1: Warsaw
postal-code: 01-001
country: PL
@@ -74,5 +79,6 @@ examples:
show: true
text: |
ul. Warsaw 9
+ floor 5 apt 7
01-001 Warsaw
Polska
diff --git a/model/countries/PL/PL-model.yaml b/model/countries/PL/PL-model.yaml
index 6648a0e..962f991 100644
--- a/model/countries/PL/PL-model.yaml
+++ b/model/countries/PL/PL-model.yaml
@@ -21,4 +21,5 @@ extra-definitions:
- street
- building-and-unit
street-address-alternative-1:
- - building-location
\ No newline at end of file
+ - building-location
+ - address-overflow
\ No newline at end of file
diff --git a/model/countries/PL/PL-parsing-rules.yaml b/model/countries/PL/PL-parsing-rules.yaml
index da58b1b..7c618a8 100644
--- a/model/countries/PL/PL-parsing-rules.yaml
+++ b/model/countries/PL/PL-parsing-rules.yaml
@@ -26,6 +26,25 @@ regex_definitions:
regex_fragment: (?:^|[/\s]+)
capture_definitions:
+ ParseStreetAddressWithOverflow:
+ capture:
+ output: street-address-alternative-1
+ parts:
+ - capture_reference: ParseBuildingLocation
+ - regex_fragment: '\n'
+ - capture:
+ output: address-overflow
+ # Matches any non-empty string, including new-line characters.
+ parts: [ {regex_fragment: '[\s\S]+'} ]
+ quantifier: MATCH_REQUIRED
+
+ ParseStreetAddressWithoutOverflow:
+ capture:
+ output: street-address-alternative-1
+ parts:
+ - capture_reference: ParseBuildingLocation
+
+
ParseBuildingLocation:
capture:
output: building-location
@@ -44,6 +63,7 @@ capture_definitions:
- separator: {regex_reference: kHouseNumberAndUnitSeparator}
- capture_reference: ParseUnitWithOptionalPrefix
+
ParseUnitWithOptionalPrefix:
capture:
output: unit
@@ -65,11 +85,10 @@ parsing_definitions:
capture_reference: ParseBuildingLocation
street-address-alternative-1:
- decomposition:
- capture:
- output: street-address-alternative-1
- parts:
- - capture_reference: ParseBuildingLocation
+ decomposition_cascade:
+ alternatives:
+ - decomposition: {capture_reference: ParseStreetAddressWithOverflow}
+ - decomposition: {capture_reference: ParseStreetAddressWithoutOverflow}
building-and-unit:
decomposition:
@@ -222,4 +241,36 @@ test_parsing_definitions:
building: "9A"
unit: "m.10"
unit-type: "m."
- unit-name: "10"
\ No newline at end of file
+ unit-name: "10"
+- id: "Test 15"
+ type: street-address-alternative-1
+ input: "ul. Warsaw 9A\nthird entrance of building"
+ output:
+ street-address-alternative-1: "ul. Warsaw 9A\nthird entrance of building"
+ building-location: "ul. Warsaw 9A"
+ street: "Warsaw"
+ building-and-unit: "9A"
+ building: "9A"
+ address-overflow: "third entrance of building"
+- id: "Test 16"
+ type: street-address-alternative-1
+ input: "ul. Warsaw 9\nthird entrance of building"
+ output:
+ street-address-alternative-1: "ul. Warsaw 9\nthird entrance of building"
+ building-location: "ul. Warsaw 9"
+ street: "Warsaw"
+ building-and-unit: "9"
+ building: "9"
+ address-overflow: "third entrance of building"
+- id: "Test 17"
+ type: street-address-alternative-1
+ input: "ul. Warsaw 9/10\nNějaké doplňující informace"
+ output:
+ street-address-alternative-1: "ul. Warsaw 9\nNějaké doplňující informace"
+ building-location: "ul. Warsaw 9/10"
+ street: "Warsaw"
+ building-and-unit: "9/10"
+ building: "9"
+ unit: "10"
+ unit-name: "10"
+ address-overflow: "Nějaké doplňující informace"
\ No newline at end of file