From bc71a0d348fc6672ee512495e1125a6c8b685c2f Mon Sep 17 00:00:00 2001 From: Geoffrey Aldebert Date: Fri, 11 Feb 2022 15:12:47 +0100 Subject: [PATCH 1/3] Add item for code_departement label detection --- .../detect_labels/FR/geo/code_departement/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csv_detective/detect_labels/FR/geo/code_departement/__init__.py b/csv_detective/detect_labels/FR/geo/code_departement/__init__.py index 055b802..458e39b 100644 --- a/csv_detective/detect_labels/FR/geo/code_departement/__init__.py +++ b/csv_detective/detect_labels/FR/geo/code_departement/__init__.py @@ -6,10 +6,10 @@ def _is(header): '''Returns 1 if the (processed) header matches one of the expected words combination, else 0''' - words_combinations_list = ['code departement', 'dep'] #'dep': Possible confusion with dep name? + words_combinations_list = ['code departement', 'dep', 'departement'] #'dep': Possible confusion with dep name? processed_header = _process_text(header) header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list])) words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list])) - return max(header_matches_words_combination, words_combination_in_header) \ No newline at end of file + return max(header_matches_words_combination, words_combination_in_header) From 2f9ffc72ff1be19965e2f8fd5f93e17f618b1124 Mon Sep 17 00:00:00 2001 From: Geoffrey Aldebert Date: Fri, 11 Feb 2022 15:13:17 +0100 Subject: [PATCH 2/3] Add item for code_region label detection --- csv_detective/detect_labels/FR/geo/code_region/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csv_detective/detect_labels/FR/geo/code_region/__init__.py b/csv_detective/detect_labels/FR/geo/code_region/__init__.py index 4c090f2..08bc97d 100644 --- a/csv_detective/detect_labels/FR/geo/code_region/__init__.py +++ b/csv_detective/detect_labels/FR/geo/code_region/__init__.py @@ -6,10 +6,10 @@ def _is(header): '''Returns 1 if the (processed) header matches one of the expected words combination, else 0''' - words_combinations_list = ['code region', 'reg', 'code insee region'] #'reg' : possible confusion with region name? + words_combinations_list = ['code region', 'reg', 'code insee region', 'region'] #'reg' : possible confusion with region name? processed_header = _process_text(header) header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list])) words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list])) - return max(header_matches_words_combination, words_combination_in_header) \ No newline at end of file + return max(header_matches_words_combination, words_combination_in_header) From 7c9c918531a512bfe0ab0dc353b8f3e649d350e5 Mon Sep 17 00:00:00 2001 From: Geoffrey Aldebert Date: Fri, 11 Feb 2022 19:25:43 +0100 Subject: [PATCH 3/3] Add item for code_commune_insee label detection --- .../detect_labels/FR/geo/code_commune_insee/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py b/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py index c4f7ddb..9638547 100644 --- a/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py +++ b/csv_detective/detect_labels/FR/geo/code_commune_insee/__init__.py @@ -6,10 +6,10 @@ def _is(header): '''Returns 1 if the (processed) header matches one of the expected words combination, else 0''' - words_combinations_list = ['code commune insee', 'code insee', 'codes insee'] + words_combinations_list = ['code commune insee', 'code insee', 'codes insee', 'code commune'] processed_header = _process_text(header) header_matches_words_combination = float(any([words_combination == processed_header for words_combination in words_combinations_list])) words_combination_in_header = 0.5*float(any([full_word_strictly_inside_string(words_combination, processed_header) for words_combination in words_combinations_list])) - return max(header_matches_words_combination, words_combination_in_header) \ No newline at end of file + return max(header_matches_words_combination, words_combination_in_header)