From b95457f6a00f32c819c226c2644388c4be9ab351 Mon Sep 17 00:00:00 2001 From: Gaby Pancu Date: Tue, 1 Oct 2024 12:45:30 -0400 Subject: [PATCH] add regex fallback to splitAddress1 function, release v0.7.0 of TS package --- CHANGELOG.md | 4 +- db/data/regions/BE.yml | 3 + db/data/regions/BR.yml | 2 + db/data/regions/CL.yml | 2 + db/data/regions/DE.yml | 3 + db/data/regions/ES.yml | 2 + db/data/regions/IL.yml | 3 + db/data/regions/MX.yml | 2 + db/data/regions/NL.yml | 2 + lang/typescript/.eslintrc | 2 +- lang/typescript/CHANGELOG.md | 26 ++ lang/typescript/package.json | 2 +- .../rollup-plugin-regions-yaml/utils.ts | 7 +- .../extended-address/splitAddress1.test.ts | 367 +++++++++++++++++- .../src/extended-address/splitAddress1.ts | 48 ++- .../src/types/region-yaml-config.ts | 2 + .../src/utils/address-fields.test.ts | 91 +++++ lang/typescript/src/utils/address-fields.ts | 23 ++ lang/typescript/src/utils/regions.ts | 12 + lib/worldwide/region.rb | 5 + lib/worldwide/regions_loader.rb | 1 + .../worldwide/region_data_consistency_test.rb | 18 +- test/worldwide/region_test.rb | 10 + test/worldwide/region_yml_consistency_test.rb | 15 + 24 files changed, 618 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index abc5b93ad..1617df98a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Security in case of vulnerabilities. ## [Unreleased] -- Nil. +- Add address1_regex to regions [#281](https://github.com/Shopify/worldwide/pull/281) +- Add address1_regex for BE, CL, MX, ES, IL [#282](https://github.com/Shopify/worldwide/pull/282) +- Add address1_regex for DE [#286](https://github.com/Shopify/worldwide/pull/286) --- diff --git a/db/data/regions/BE.yml b/db/data/regions/BE.yml index 32fcd3338..d09103492 100644 --- a/db/data/regions/BE.yml +++ b/db/data/regions/BE.yml @@ -31,6 +31,9 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{address2}_{zip}{city}_{phone}" +address1_regex: + - "^(?[^\\d,]+),? (?\\d+(?: ?[a-z])?)$" + - "^(?\\d+(?: ?[a-z])?),? (?[^\\d,]+)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/BR.yml b/db/data/regions/BR.yml index 9c42e6ec4..a0797f289 100644 --- a/db/data/regions/BR.yml +++ b/db/data/regions/BR.yml @@ -28,6 +28,8 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city} {province}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{zip}_{streetName}{streetNumber}_{line2}{neighborhood}_{city}{province}_{phone}" +address1_regex: + - "^(?(?!.*\\bnúmero\\b)[^\\d,]+(?\\d+(?: ?[a-z])?)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/CL.yml b/db/data/regions/CL.yml index 9a3690180..5ba266871 100644 --- a/db/data/regions/CL.yml +++ b/db/data/regions/CL.yml @@ -16,6 +16,8 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city}_{province}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{line2}{neighborhood}_{zip}{city}_{province}_{phone}" +address1_regex: + - "^(?[^\\d,]+?),? (?(?:n|n\\.|nº|número|no\\.|no|#)? ?\\d+(?: ?[a-z])?)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/DE.yml b/db/data/regions/DE.yml index 8a60be330..07e8bb8d6 100644 --- a/db/data/regions/DE.yml +++ b/db/data/regions/DE.yml @@ -29,6 +29,9 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{address2}_{zip}{city}_{phone}" +address1_regex: +- "^(?[^\\d,]+?\\.?)[, ]{1,2}(?\\d+(?: ?[a-z])?)$" +- "^(?[^\\d,]+\\.)(?\\d+(?: ?[a-z])?)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/ES.yml b/db/data/regions/ES.yml index f32f87dbb..73262ca1a 100644 --- a/db/data/regions/ES.yml +++ b/db/data/regions/ES.yml @@ -29,6 +29,8 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city}_{province}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{address2}_{zip}{city}{province}_{phone}" +address1_regex: + - "^(?[^\\d,]+?),? (?(?:n|n\\.|nº|número|no\\.|no|#)? ?\\d+(?: ?[a-z])?)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/IL.yml b/db/data/regions/IL.yml index 51bb03f05..94ac14520 100644 --- a/db/data/regions/IL.yml +++ b/db/data/regions/IL.yml @@ -18,6 +18,9 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{address2}_{zip}{city}_{phone}" +address1_regex: + - "^(?[^\\d,]+),? (?\\d+(?:\/\\d+)?)$" + - "^(?\\d+(?:\/\\d+)?),? (?[^\\d,]+)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/MX.yml b/db/data/regions/MX.yml index fd9df402e..58ff928be 100644 --- a/db/data/regions/MX.yml +++ b/db/data/regions/MX.yml @@ -17,6 +17,8 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city} {province}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{line2}{neighborhood}_{zip}{city}{province}_{phone}" +address1_regex: + - "^(?[^\\d,]+?),? (?(?:n|n\\.|nº|número|no\\.|no|#)? ?\\d+(?: ?[a-z])?)$" additional_address_fields: - name: streetName required: true diff --git a/db/data/regions/NL.yml b/db/data/regions/NL.yml index c2e1eb332..df9ee477e 100644 --- a/db/data/regions/NL.yml +++ b/db/data/regions/NL.yml @@ -30,6 +30,8 @@ format: show: "{firstName} {lastName}_{company}_{address1}_{address2}_{zip} {city}_{country}_{phone}" format_extended: edit: "{country}_{firstName}{lastName}_{company}_{streetName}{streetNumber}_{address2}_{zip}{city}_{phone}" +address1_regex: + - "^(?[^\\d]+) (?\\d+(?: ?[a-z])?)$" additional_address_fields: - name: streetName required: true diff --git a/lang/typescript/.eslintrc b/lang/typescript/.eslintrc index b73621909..c92cbb439 100644 --- a/lang/typescript/.eslintrc +++ b/lang/typescript/.eslintrc @@ -17,7 +17,7 @@ "selector": "property", "format": ["strictCamelCase"], "filter": { - "regex": "^(combined_address_format)$", + "regex": "^(combined_address_format|address1_regex)$", "match": false } } diff --git a/lang/typescript/CHANGELOG.md b/lang/typescript/CHANGELOG.md index ad2ccd695..12e7e2e6e 100644 --- a/lang/typescript/CHANGELOG.md +++ b/lang/typescript/CHANGELOG.md @@ -1,5 +1,31 @@ # @shopify/worldwide +## 0.7.0 + +### Minor Changes + +- bbacde1: Add address1 regex for BE, BR, CL, ES, IL, MX +- b6e1c7f: Add address1 regex for DE +- ee7b7a9: Add optional tryRegexFallback param to splitAddress1 function to attempt splitting address lines that do not contain the reserved delimiter + +## 0.7.0-next.2 + +### Minor Changes + +- 3bfb56a: Add address1 regex for DE + +## 0.7.0-next.1 + +### Minor Changes + +- Add address1 regex for BE, BR, CL, ES, IL, MX + +## 0.7.0-next.0 + +### Minor Changes + +- 1f5d405: Add optional tryRegexFallback param to splitAddress1 function to attempt splitting address lines that do not contain the reserved delimiter + ## 0.6.0 ### Minor Changes diff --git a/lang/typescript/package.json b/lang/typescript/package.json index 820b84c9c..55230736d 100644 --- a/lang/typescript/package.json +++ b/lang/typescript/package.json @@ -1,7 +1,7 @@ { "name": "@shopify/worldwide", "description": "Utilities for parsing and formatting address fields", - "version": "0.6.0", + "version": "0.7.0", "repository": "git@github.com:Shopify/worldwide.git", "author": "Shopify Inc.", "homepage": "https://github.com/Shopify/worldwide/tree/main/lang/typescript#readme", diff --git a/lang/typescript/rollup-plugin-regions-yaml/utils.ts b/lang/typescript/rollup-plugin-regions-yaml/utils.ts index 216f4d6d1..67d2383dc 100644 --- a/lang/typescript/rollup-plugin-regions-yaml/utils.ts +++ b/lang/typescript/rollup-plugin-regions-yaml/utils.ts @@ -63,6 +63,7 @@ const regionYamlSchema = z.object({ }) .strict() .optional(), + address1_regex: z.optional(z.array(z.string())), }); export type RegionYaml = z.infer; @@ -106,7 +107,10 @@ export function validateRegionYaml( return regionYaml; } -export type MinimalRegionYaml = Pick; +export type MinimalRegionYaml = Pick< + RegionYaml, + 'combined_address_format' | 'address1_regex' +>; /** * Strip the YAML data down to only what we need to keep the resulting JS @@ -115,5 +119,6 @@ export type MinimalRegionYaml = Pick; export function transformRegionYaml(regionYaml: RegionYaml): MinimalRegionYaml { return { combined_address_format: regionYaml.combined_address_format, + address1_regex: regionYaml.address1_regex, }; } diff --git a/lang/typescript/src/extended-address/splitAddress1.test.ts b/lang/typescript/src/extended-address/splitAddress1.test.ts index 852d3fc41..9b99f2c01 100644 --- a/lang/typescript/src/extended-address/splitAddress1.test.ts +++ b/lang/typescript/src/extended-address/splitAddress1.test.ts @@ -2,35 +2,382 @@ import {splitAddress1} from './splitAddress1'; describe('splitAddress1', () => { test('returns null when extended address is not defined for region', () => { - expect(splitAddress1('US', '123 Main')).toBeNull(); + expect(splitAddress1('US', '123 Main', false)).toBeNull(); + expect(splitAddress1('US', '', false)).toBeNull(); + expect(splitAddress1('US', '123 Main', true)).toBeNull(); + expect(splitAddress1('US', '', true)).toBeNull(); }); test('returns empty object when extended address string is empty', () => { - expect(splitAddress1('CL', '')).toEqual({}); - expect(splitAddress1('BR', '')).toEqual({}); + expect(splitAddress1('CL', '', false)).toEqual({}); + expect(splitAddress1('BR', '', false)).toEqual({}); + expect(splitAddress1('CL', '', true)).toEqual({}); + expect(splitAddress1('BR', '', true)).toEqual({}); }); - test('returns address1 as street name when no delimiter is present', () => { - expect(splitAddress1('CL', '123 Main')).toEqual({streetName: '123 Main'}); - expect(splitAddress1('BR', '123 Main')).toEqual({streetName: '123 Main'}); + test('returns address1 as street name when no delimiter is present and tryRegexFallback is false', () => { + expect(splitAddress1('CL', '123 Main', false)).toEqual({ + streetName: '123 Main', + }); + expect(splitAddress1('BR', '123 Main', false)).toEqual({ + streetName: '123 Main', + }); }); + test('returns address1 as street name when no delimiter is present, tryRegexFallback is true, and regex is not defined', () => { + expect(splitAddress1('CL', '123 Main', true)).toEqual({ + streetName: '123 Main', + }); + expect(splitAddress1('BR', '123 Main', true)).toEqual({ + streetName: '123 Main', + }); + }); + + test.each([ + { + country: 'NL', + address: 'Kempenaar 25 11', + expected: {streetName: 'Kempenaar 25 11'}, + }, + { + country: 'NL', + address: '40 Baandersstraat', + expected: {streetName: '40 Baandersstraat'}, + }, + { + country: 'BR', + address: 'Main, 123, Apt 2', + expected: {streetName: 'Main, 123, Apt 2'}, + }, + ])( + 'returns address1 as street name when no delimiter is present, tryRegexFallback is true, and address does not match regex', + ({country, address, expected}) => { + expect(splitAddress1(country, address, true)).toEqual(expected); + }, + ); + test('returns street number if string before delimiter is empty', () => { - expect(splitAddress1('CL', '\u2060123')).toEqual({streetNumber: '123'}); - expect(splitAddress1('BR', '\u2060123')).toEqual({streetNumber: '123'}); + expect(splitAddress1('CL', '\u2060123', false)).toEqual({ + streetNumber: '123', + }); + expect(splitAddress1('BR', '\u2060123', false)).toEqual({ + streetNumber: '123', + }); + expect(splitAddress1('CL', '\u2060123', true)).toEqual({ + streetNumber: '123', + }); + expect(splitAddress1('BR', '\u2060123', true)).toEqual({ + streetNumber: '123', + }); }); test('returns full address object when separated by delimiter', () => { - expect(splitAddress1('CL', 'Main \u2060123')).toEqual({ + expect(splitAddress1('CL', 'Main \u2060123', false)).toEqual({ + streetName: 'Main', + streetNumber: '123', + }); + expect(splitAddress1('CL', 'Main \u2060123', true)).toEqual({ streetName: 'Main', streetNumber: '123', }); }); test('returns full address object when separated by delimiter and decorator', () => { - expect(splitAddress1('BR', 'Main, \u2060123')).toEqual({ + expect(splitAddress1('BR', 'Main, \u2060123', false)).toEqual({ + streetName: 'Main', + streetNumber: '123', + }); + expect(splitAddress1('BR', 'Main, \u2060123', true)).toEqual({ streetName: 'Main', streetNumber: '123', }); }); + + test.each([ + { + address: 'Mercuriusstraat 26', + expected: {streetName: 'Mercuriusstraat', streetNumber: '26'}, + }, + { + address: 'Bloemgracht 41B', + expected: {streetName: 'Bloemgracht', streetNumber: '41B'}, + }, + { + address: 'Bloemgracht 41b', + expected: {streetName: 'Bloemgracht', streetNumber: '41b'}, + }, + { + address: 'Meester Arendstraat 48 B', + expected: {streetName: 'Meester Arendstraat', streetNumber: '48 B'}, + }, + ])( + 'returns full address object when not separated by delimiter, tryRegexFallback is true and address matches regex for NL', + ({address, expected}) => { + expect(splitAddress1('NL', address, true)).toEqual(expected); + }, + ); + + test.each([ + { + address: 'Ziegeleiweg 3', + expected: {streetName: 'Ziegeleiweg', streetNumber: '3'}, + }, + { + address: 'Sexauerstraße 3a', + expected: {streetName: 'Sexauerstraße', streetNumber: '3a'}, + }, + { + address: 'Straße des Friedens 6 A', + expected: {streetName: 'Straße des Friedens', streetNumber: '6 A'}, + }, + { + address: 'Ladenspelderstr. 52', + expected: {streetName: 'Ladenspelderstr.', streetNumber: '52'}, + }, + { + address: 'Marktstr.32', + expected: {streetName: 'Marktstr.', streetNumber: '32'}, + }, + { + address: 'Ringstraße, 16', + expected: {streetName: 'Ringstraße', streetNumber: '16'}, + }, + { + address: 'Ringstraße,16', + expected: {streetName: 'Ringstraße', streetNumber: '16'}, + }, + { + address: 'Lorbeerstr., 25', + expected: {streetName: 'Lorbeerstr.', streetNumber: '25'}, + }, + ])( + 'returns full address object when not separated by delimiter, tryRegexFallback is true and address matches regex for DE', + ({address, expected}) => { + expect(splitAddress1('DE', address, true)).toEqual(expected); + }, + ); + + test.each([ + { + address: 'Doornbergstraat 30', + expected: {streetName: 'Doornbergstraat', streetNumber: '30'}, + }, + { + address: 'Moeskouterlaan, 29', + expected: {streetName: 'Moeskouterlaan', streetNumber: '29'}, + }, + { + address: 'Rue le Marais 6A', + expected: {streetName: 'Rue le Marais', streetNumber: '6A'}, + }, + { + address: 'Kiezelstraat 4a', + expected: {streetName: 'Kiezelstraat', streetNumber: '4a'}, + }, + { + address: 'Rue Grand Peine 12 C', + expected: {streetName: 'Rue Grand Peine', streetNumber: '12 C'}, + }, + { + address: '85 Rue des Floralies', + expected: {streetName: 'Rue des Floralies', streetNumber: '85'}, + }, + { + address: '39, rue de Grass', + expected: {streetName: 'rue de Grass', streetNumber: '39'}, + }, + { + address: '84 a Rue du merlo', + expected: {streetName: 'Rue du merlo', streetNumber: '84 a'}, + }, + { + address: '84A Rue du merlo', + expected: {streetName: 'Rue du merlo', streetNumber: '84A'}, + }, + ])( + 'returns full address object when not separated by delimiter, tryRegexFallback is true and address matches regex for BE', + ({address, expected}) => { + expect(splitAddress1('BE', address, true)).toEqual(expected); + }, + ); + + test.each([ + { + address: 'Alberto Risopatrón 2714', + expected: {streetName: 'Alberto Risopatrón', streetNumber: '2714'}, + }, + { + address: 'avenida nelson pereira, 1741', + expected: {streetName: 'avenida nelson pereira', streetNumber: '1741'}, + }, + { + address: 'Rancho las Cabras 9A', + expected: {streetName: 'Rancho las Cabras', streetNumber: '9A'}, + }, + { + address: 'Callejón Torreblanca 355 B', + expected: {streetName: 'Callejón Torreblanca', streetNumber: '355 B'}, + }, + { + address: 'Quebrada de Vitor #1234', + expected: {streetName: 'Quebrada de Vitor', streetNumber: '#1234'}, + }, + { + address: 'Barros Arana # 1298', + expected: {streetName: 'Barros Arana', streetNumber: '# 1298'}, + }, + { + address: 'Calle Amalia Errazuriz Nº956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'Nº956'}, + }, + { + address: 'Calle Amalia Errazuriz Nº 956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'Nº 956'}, + }, + { + address: 'Calle Amalia Errazuriz nº956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'nº956'}, + }, + { + address: 'Calle Amalia Errazuriz nº 956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'nº 956'}, + }, + { + address: 'Calle Amalia Errazuriz no956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'no956'}, + }, + { + address: 'Calle Amalia Errazuriz no 956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'no 956'}, + }, + { + address: 'Calle Amalia Errazuriz No956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'No956'}, + }, + { + address: 'Calle Amalia Errazuriz No 956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'No 956'}, + }, + { + address: 'Calle Amalia Errazuriz no.956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'no.956'}, + }, + { + address: 'Calle Amalia Errazuriz no. 956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'no. 956'}, + }, + { + address: 'Calle Amalia Errazuriz No.956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'No.956'}, + }, + { + address: 'Calle Amalia Errazuriz No. 956', + expected: {streetName: 'Calle Amalia Errazuriz', streetNumber: 'No. 956'}, + }, + { + address: 'Calle Amalia Errazuriz número 956', + expected: { + streetName: 'Calle Amalia Errazuriz', + streetNumber: 'número 956', + }, + }, + { + address: 'Calle Amalia Errazuriz Número 956', + expected: { + streetName: 'Calle Amalia Errazuriz', + streetNumber: 'Número 956', + }, + }, + ])( + 'returns full address object when not separated by delimiter, tryRegexFallback is true and address matches regex for CL, MX, ES', + ({address, expected}) => { + expect(splitAddress1('CL', address, true)).toEqual(expected); + expect(splitAddress1('MX', address, true)).toEqual(expected); + expect(splitAddress1('ES', address, true)).toEqual(expected); + }, + ); + + test.each([ + { + address: 'פטישן 22', + expected: {streetName: 'פטישן', streetNumber: '22'}, + }, + { + address: 'שבזי שלום 9', + expected: {streetName: 'שבזי שלום', streetNumber: '9'}, + }, + { + address: 'חרצית, 5', + expected: {streetName: 'חרצית', streetNumber: '5'}, + }, + { + address: 'חרצית, 500', + expected: {streetName: 'חרצית', streetNumber: '500'}, + }, + { + address: '21, הדקל', + expected: {streetName: 'הדקל', streetNumber: '21'}, + }, + { + address: '1, קיבוץ גבים', + expected: {streetName: 'קיבוץ גבים', streetNumber: '1'}, + }, + { + address: '47/2, המגינים', + expected: {streetName: 'המגינים', streetNumber: '47/2'}, + }, + { + address: 'רבי יהודה הנשיא 30/16', + expected: {streetName: 'רבי יהודה הנשיא', streetNumber: '30/16'}, + }, + { + address: 'St. Ben Ami 24', + expected: {streetName: 'St. Ben Ami', streetNumber: '24'}, + }, + { + address: 'Shevet Zvulun, 2', + expected: {streetName: 'Shevet Zvulun', streetNumber: '2'}, + }, + ])( + 'returns full address object when not separated by delimiter, tryRegexFallback is true and address matches regex for IL', + ({address, expected}) => { + expect(splitAddress1('IL', address, true)).toEqual(expected); + }, + ); + + test.each([ + { + address: 'Rua Santo Antônio 722', + expected: {streetName: 'Rua Santo Antônio', streetNumber: '722'}, + }, + { + address: 'Rua Santo Antônio, 722', + expected: {streetName: 'Rua Santo Antônio', streetNumber: '722'}, + }, + { + address: 'Rua Santo Antônio,722', + expected: {streetName: 'Rua Santo Antônio', streetNumber: '722'}, + }, + { + address: 'Rua Corumbá 47 A', + expected: {streetName: 'Rua Corumbá', streetNumber: '47 A'}, + }, + { + address: 'Rua Corumbá 47A', + expected: {streetName: 'Rua Corumbá', streetNumber: '47A'}, + }, + { + address: 'Rua Corumbá 47A', + expected: {streetName: 'Rua Corumbá', streetNumber: '47A'}, + }, + { + address: 'Rua Nair Costa Baldoino - número: 449', + expected: {streetName: 'Rua Nair Costa Baldoino - número: 449'}, + }, + ])( + 'returns full address object when not separated by delimiter, tryRegexFallback is true and address matches regex for BR', + ({address, expected}) => { + expect(splitAddress1('BR', address, true)).toEqual(expected); + }, + ); }); diff --git a/lang/typescript/src/extended-address/splitAddress1.ts b/lang/typescript/src/extended-address/splitAddress1.ts index 07bf0f92e..9d622fa07 100644 --- a/lang/typescript/src/extended-address/splitAddress1.ts +++ b/lang/typescript/src/extended-address/splitAddress1.ts @@ -1,26 +1,54 @@ import type {Address} from '../types/address'; -import {splitAddressField} from '../utils/address-fields'; -import {getRegionConfig, getConcatenationRules} from '../utils/regions'; +import { + RESERVED_DELIMITER, + splitAddressField, + regexSplitAddressField, +} from '../utils/address-fields'; +import { + getRegionConfig, + getConcatenationRules, + getAddress1Regex, +} from '../utils/regions'; /** - * Parse a concatenated address1 string based on the region specified by - * country code + * Splits an address string into sub-fields using a reserved delimiter. + * Optionally provides a fallback mechanism to parse the address using + * a regex when the delimiter is absent, which should be used with caution + * as it may not provide accurate results. + * * @param countryCode 2-letter country code string - * @param concatenatedAddress Combined address1 string + * @param address Combined address1 string + * @param tryRegexFallback Flag to attempt regex parsing as a fallback mechanism * @returns Partial Address object containing parsed address fields or null if * the region does not define an extended address format */ export function splitAddress1( countryCode: string, - concatenatedAddress: string, + address: string, + tryRegexFallback = false, ): Partial
| null { const config = getRegionConfig(countryCode); const fieldConcatenationRules = config - ? getConcatenationRules(config, concatenatedAddress, 'address1') + ? getConcatenationRules(config, address, 'address1') : undefined; - if (fieldConcatenationRules) { - return splitAddressField(fieldConcatenationRules, concatenatedAddress); + const address1Regex = config ? getAddress1Regex(config) : undefined; + if (!fieldConcatenationRules) { + return null; } - return null; + if (address === '') { + return {}; + } + + if (address.includes(RESERVED_DELIMITER)) { + return splitAddressField(fieldConcatenationRules, address); + } + if (tryRegexFallback && address1Regex) { + return regexSplitAddressField( + fieldConcatenationRules, + address1Regex, + address, + ); + } + return {[fieldConcatenationRules[0].key]: address}; } diff --git a/lang/typescript/src/types/region-yaml-config.ts b/lang/typescript/src/types/region-yaml-config.ts index c01a49d67..30ff96d87 100644 --- a/lang/typescript/src/types/region-yaml-config.ts +++ b/lang/typescript/src/types/region-yaml-config.ts @@ -15,4 +15,6 @@ export type FieldDefinitions = Record< export type RegionYamlConfig = Record & { /** Format definition for an extended address */ combined_address_format?: CombinedAddressFormat; + /** Regex patterns for standard address1 */ + address1_regex?: string[]; }; diff --git a/lang/typescript/src/utils/address-fields.test.ts b/lang/typescript/src/utils/address-fields.test.ts index a4b8d7065..68cdc4151 100644 --- a/lang/typescript/src/utils/address-fields.test.ts +++ b/lang/typescript/src/utils/address-fields.test.ts @@ -3,6 +3,7 @@ import type {FieldConcatenationRule} from 'src/types/region-yaml-config'; import { RESERVED_DELIMITER, concatAddressField, + regexSplitAddressField, splitAddressField, } from './address-fields'; @@ -214,4 +215,94 @@ describe('splitAddressField', () => { }); }); }); + + describe('regexSplitAddressField', () => { + test('creates an address object from string matching one of the defined regexes', () => { + const fieldDefinition: FieldConcatenationRule[] = [ + {key: 'streetName'}, + {key: 'streetNumber'}, + ]; + const regexPatterns = [ + new RegExp('^(?\\d+) (?[^\\d]+)$'), + new RegExp('^(?[^\\d]+) (?\\d+)$'), + ]; + const address = 'Main 123'; + + expect( + regexSplitAddressField(fieldDefinition, regexPatterns, address), + ).toEqual({ + streetName: 'Main', + streetNumber: '123', + }); + }); + + test('creates an address object from string matching multiple of the defined regexes', () => { + const fieldDefinition: FieldConcatenationRule[] = [ + {key: 'streetName'}, + {key: 'streetNumber'}, + ]; + const regexPatterns = [ + new RegExp('^(?[^\\d]+) (?\\d+)$'), + new RegExp('^(?[^\\d]+) (?\\d+)$'), + ]; + const address = 'Main 123'; + + expect( + regexSplitAddressField(fieldDefinition, regexPatterns, address), + ).toEqual({ + streetName: 'Main', + streetNumber: '123', + }); + }); + + test('creates a partial address object from string that does not match one of the defined regexes', () => { + const fieldDefinition: FieldConcatenationRule[] = [ + {key: 'streetName'}, + {key: 'streetNumber'}, + ]; + const regexPatterns = [ + new RegExp('^(?\\d+) (?[^\\d]+)$'), + ]; + const address = 'Main 123'; + + expect( + regexSplitAddressField(fieldDefinition, regexPatterns, address), + ).toEqual({ + streetName: 'Main 123', + }); + }); + + test('field definition order matters', () => { + const fieldDefinitionNumberFirst: FieldConcatenationRule[] = [ + {key: 'streetNumber'}, + {key: 'streetName'}, + ]; + const fieldDefinitionNameFirst: FieldConcatenationRule[] = [ + {key: 'streetName'}, + {key: 'streetNumber'}, + ]; + const regexPatterns = [ + new RegExp('^(?[^\\d]+) (?\\d+)$'), + ]; + const address = 'Main'; + expect( + regexSplitAddressField( + fieldDefinitionNumberFirst, + regexPatterns, + address, + ), + ).toEqual({ + streetNumber: 'Main', + }); + expect( + regexSplitAddressField( + fieldDefinitionNameFirst, + regexPatterns, + address, + ), + ).toEqual({ + streetName: 'Main', + }); + }); + }); }); diff --git a/lang/typescript/src/utils/address-fields.ts b/lang/typescript/src/utils/address-fields.ts index e1ff4af5d..bf7eebdf2 100644 --- a/lang/typescript/src/utils/address-fields.ts +++ b/lang/typescript/src/utils/address-fields.ts @@ -71,3 +71,26 @@ export function splitAddressField( return parsedAddressObject; } + +/** + * Utility function that attempts to parse an address string based on a regex + * + * @param fieldDefinition Array of definitions of address sub-fields + * @param regexPatterns Regex patterns for parsing sub-fields from an address string + * @param address Address string to parse + * @returns Partial Address object of fields parsed from string + */ +export function regexSplitAddressField( + fieldDefinition: FieldConcatenationRule[], + regexPatterns: RegExp[], + address: string, +): Partial
{ + for (const regex of regexPatterns) { + const match = address.match(regex); + if (match?.groups) { + // Return the first group that matches + return match.groups; + } + } + return {[fieldDefinition[0].key]: address}; +} diff --git a/lang/typescript/src/utils/regions.ts b/lang/typescript/src/utils/regions.ts index 7100b6bba..d252c5cfe 100644 --- a/lang/typescript/src/utils/regions.ts +++ b/lang/typescript/src/utils/regions.ts @@ -100,3 +100,15 @@ export function getConcatenationRules( } return concatenationRules; } + +/** + * The regex patterns to use for splitting address1 strings that do not + * contain a reserved delimiter + */ +export function getAddress1Regex(config: RegionYamlConfig): RegExp[] { + if (config.address1_regex === undefined) { + return []; + } + + return config.address1_regex.map((pattern) => new RegExp(pattern, 'i')); +} diff --git a/lib/worldwide/region.rb b/lib/worldwide/region.rb index 1560c4b86..5c244bf05 100644 --- a/lib/worldwide/region.rb +++ b/lib/worldwide/region.rb @@ -36,6 +36,7 @@ class Region :zip_requirement, :additional_address_fields, :combined_address_format, + :address1_regex, ] # A region may have more than one parent. @@ -208,6 +209,9 @@ class Region # A hash of the rules for concatening the additional address fields into the standard fields attr_accessor :combined_address_format + # An array of regex patterns of the address1 field, capturing the supported additional address fields + attr_accessor :address1_regex + def initialize( alpha_three: nil, continent: false, @@ -245,6 +249,7 @@ def initialize( @additional_address_fields = [] @combined_address_format = {} + @address1_regex = [] @building_number_required = false @building_number_may_be_in_address2 = false @currency = nil diff --git a/lib/worldwide/regions_loader.rb b/lib/worldwide/regions_loader.rb index 113024c10..033f44997 100644 --- a/lib/worldwide/regions_loader.rb +++ b/lib/worldwide/regions_loader.rb @@ -97,6 +97,7 @@ def apply_hierarchy(parent:, code:, children:) def apply_territory_attributes(region, spec) region.additional_address_fields = spec["additional_address_fields"] || [] region.combined_address_format = spec["combined_address_format"] || {} + region.address1_regex = spec["address1_regex"] || [] region.building_number_required = spec["building_number_required"] || false region.building_number_may_be_in_address2 = spec["building_number_may_be_in_address2"] || false currency_code = spec["currency"] diff --git a/test/worldwide/region_data_consistency_test.rb b/test/worldwide/region_data_consistency_test.rb index 6cf382e32..100bf8a8e 100644 --- a/test/worldwide/region_data_consistency_test.rb +++ b/test/worldwide/region_data_consistency_test.rb @@ -53,27 +53,25 @@ class RegionDataConsistencyTest < ActiveSupport::TestCase Regions.all.select(&:province?).each do |province| next if province.zip_prefixes.nil? - # rubocop:disable Performance/RedundantEqualityComparisonBlock - assert province.zip_prefixes.all? { |prefix| prefix == prefix.upcase } - # rubocop:enable Performance/RedundantEqualityComparisonBlock + assert province.zip_prefixes.all?(&:upcase) end end test "country codes match expected formats" do Regions.all.select(&:country?).each do |country| - assert country.iso_code.match?(/^([A-Z]{2})$/), "alpha2 for #{country.legacy_name}" - assert country.alpha_three.match?(/^([A-Z]{3})$/), "alpha3 for #{country.legacy_name}" + assert_match(/^([A-Z]{2})$/, country.iso_code, "alpha2 for #{country.legacy_name}") + assert_match(/^([A-Z]{3})$/, country.alpha_three, "alpha3 for #{country.legacy_name}") unless country.numeric_three.nil? - assert country.numeric_three.match?(/^([0-9]{3})$/), "numeric3 for #{country.legacy_name}" + assert_match(/^([0-9]{3})$/, country.numeric_three, "numeric3 for #{country.legacy_name}") end end end test "province codes match expected formats" do Regions.all.select(&:province?).each do |province| - assert(province.iso_code.match?(/^[A-Z0-9-]+$/), "Unexpected iso_code for #{province.legacy_name}") - assert(province.legacy_code.match?(/^[A-Z0-9 -]+$/), "Unexpected legacy for #{province.legacy_name}") + assert_match(/^[A-Z0-9-]+$/, province.iso_code, "Unexpected iso_code for #{province.legacy_name}") + assert_match(/^[A-Z0-9 -]+$/, province.legacy_code, "Unexpected legacy for #{province.legacy_name}") end end @@ -115,7 +113,7 @@ class RegionDataConsistencyTest < ActiveSupport::TestCase Regions.all.select(&:province?).each do |province| next if RegionDataTestHelper::ES_AND_US_DUAL_STATUS_PROVINCES.include?(province.iso_code) - assert province.iso_code.match?(/^[A-Z][A-Z]-[A-Z0-9]{1,3}$/), "#{province.iso_code} unexpected" + assert_match(/^[A-Z][A-Z]-[A-Z0-9]{1,3}$/, province.iso_code, "#{province.iso_code} unexpected") end end @@ -200,7 +198,7 @@ class RegionDataConsistencyTest < ActiveSupport::TestCase next if zip.blank? - assert zip.is_a?(String), "zip #{zip.inspect} for #{country.iso_code} is not a String" + assert_kind_of String, zip, "zip #{zip.inspect} for #{country.iso_code} is not a String" assert country.valid_zip?(zip), "zip #{zip.inspect} invalid for #{country.iso_code}" next if country.zones.none?(&:province?) diff --git a/test/worldwide/region_test.rb b/test/worldwide/region_test.rb index d82630765..b3df988c4 100644 --- a/test/worldwide/region_test.rb +++ b/test/worldwide/region_test.rb @@ -485,5 +485,15 @@ class RegionTest < ActiveSupport::TestCase assert_empty region.parents assert_empty region.zones end + + test "address1_regex returns values as expected" do + [ + [:us, []], + [:nl, ["^(?[^\\d]+) (?\\d+(?: ?[a-z])?)$"]], + [:be, ["^(?[^\\d,]+),? (?\\d+(?: ?[a-z])?)$", "^(?\\d+(?: ?[a-z])?),? (?[^\\d,]+)$"]], + ].each do |region_code, expected_value| + assert_equal expected_value, Worldwide.region(code: region_code).address1_regex + end + end end end diff --git a/test/worldwide/region_yml_consistency_test.rb b/test/worldwide/region_yml_consistency_test.rb index 35d3dce51..447cd164c 100644 --- a/test/worldwide/region_yml_consistency_test.rb +++ b/test/worldwide/region_yml_consistency_test.rb @@ -192,6 +192,21 @@ class RegionYmlConsistencyTest < ActiveSupport::TestCase end end + test "address1_regex capture groups must belong to a limited set of allowed names" do + Regions.all.select(&:country?).each do |country| + next if country.address1_regex.empty? + + allowed_names = country.combined_address_format["default"]["address1"].map { |field| field["key"] } + country.address1_regex.each do |regex| + address1_regex = Regexp.new(regex) + + address1_regex.names do |capture_group| + assert_includes allowed_names, capture_group, "#{country.iso_code} regex capture group #{capture_group} is not a supported additional address field" + end + end + end + end + test "example_address contains the word joiner when additional_address_fields are present" do word_joiner = "\u2060"