Skip to content

Commit

Permalink
Updated unittests for the new Error codes
Browse files Browse the repository at this point in the history
  • Loading branch information
Simran Mattu committed Sep 17, 2024
1 parent c4422f8 commit d889350
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 19 deletions.
5 changes: 4 additions & 1 deletion data/migrate/Updated_Errors.csv
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ Error Code,Error Type,Message Template,Notes
243,Warning,Duplicate observations found in #OBSERVATIONS table for Time={time}
244,Warning,Multiple observations found with #OBSERVATIONS.Time {time}
245,Warning,#{table}.Date found in non-chronological order,Reserved for Umkehr data tables
245,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables
246,Warning,Multiple observations found with #{table}.Date {date},Reserved for Umkehr data tables
247,Warning,#TIMESTAMP.Date before #{table} does not equal first date of #{table},Reserved for Umkehr data tables
248,Warning,#TIMESTAMP.Date after #{table} does not equal last date of #{table},Reserved for Umkehr data tables
249,Warning,#TIMESTAMP table after #{table} is missing\, deriving based on requirements,Reserved for Umkehr data tables
250,Warning,Excess field {field} does not belong in table {table}
251,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables
301,Error,Failed to parse #{table}.Time {component}: contains invalid characters
302,Error,Failed to parse #{table}.Date {component}: contains invalid characters
303,Error,#{table}.Date {component} is not within allowable range [{lower}]-[{upper}],Reserved for year and month components
Expand Down Expand Up @@ -110,6 +111,8 @@ Error Code,Error Type,Message Template,Notes
336,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for TIMESTAMP tables
337,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables
338,Warning,ECC instrument serial number different from previous submission
339,Error,Failed to parse #LOCATION.{field}: contains invalid characters,Reserved for fields Latitude and Longitude
340,Warning,#{table}.Time {component} is not within allowable range [{lower}]-[{upper}]
401,Error,Submitted file version and #DATA_GENERAION.Date identical to previously submitted file
402,Error,Submitted #DATA_GENERATION.Date is identical to previously submitted file
403,Error,Submitted version number is identical to previously submitted file
Expand Down
2 changes: 1 addition & 1 deletion woudc_data_registry/dataset_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ def check_time_series(self, extcsv):
if observation_date not in dates_encountered:
dates_encountered[observation_date] = row
elif row == dates_encountered[observation_date]:
if not self._add_to_report(245, line_num, table=data_table,
if not self._add_to_report(251, line_num, table=data_table,
date=observation_date):
success = False
rows_to_remove.append(index)
Expand Down
126 changes: 126 additions & 0 deletions woudc_data_registry/tests/config/Updated_Errors.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
Error Code,Error Type,Message Template,Notes
101,Warning,Not a text file
102,Error,No core metadata tables found. Not an Extended CSV file
103,Warning,Unexpected empty line between table header and fields
104,Warning,Improper delimiter used '{separator}' corrected to '\,' (comma)
105,Warning,#{table} field {oldfield} capitalization should be {newfield}
106,Warning,#{table} corrected to {newtable} using aliases
107,Warning,#{table} field {oldfield} corrected to {newfield} using aliases
108,Warning,#{table}.{field} value corrected to {newvalue} using aliases
109,Warning,#{table}.Time separator '{separator}' corrected to ':' (colon)
110,Warning,#{table}.Time corrected from 12-hour clock to 24-hour YYYY-MM-DD format
111,Warning,#{table}.Date separator '{separator}' corrected to '-' (hyphen)
112,Error,#{table}.Date not in YYYY-MM-DD format: missing separators
113,Error,#{table}.Date is incomplete
114,Error,#{table}.Date not in YYYY-MM-DD format: too many separators
115,Warning,#{table}.UTCOffset separator '{separator}' corrected to ':' (colon)
116,Warning,#{table}.UTCOffset {component} is less than 2 digits long
117,Warning,#{table}.UTCOffset {component} is missing\, default value is '00' (zero)
118,Warning,Missing sign in #{table}.UTCOffset\, default '+' (plus)
119,Warning,Invalid sign in #{table}.UTCOffset\, replacing with '{sign}'
120,Warning,#{table}.UTCOffset is a series of zeroes\, should be '+00:00:00'
121,Error,Cannot derive #MONTHLY table: missing #DAILY.ColumnO3
122,Error,Lidar table counts are uneven between #OZONE_PROFILE and #OZONE_SUMMARY
123,Error,Spectral table counts are uneven between #TIMESTAMP\, #GLOBAL\, and #{summary_table}
201,Error,Missing required table #{table}
202,Error,Excess table #{table} does not belong in {dataset} file
203,Error,Missing required field #{table}.{field}
204,Error,Required field #{table}.{field} is null or empty
205,Warning,Optional field #{table}.{field} is null or empty
206,Error,Table #{table} has no fields
207,Error,Table #{table} has no fields,Placeholder for optional-table-specific error
208,Error,Required table #{table} contains no data
209,Warning,Optional table #{table} contains no data
210,Error,No non-core data tables found
211,Error,Unrecognized data {row}
212,Warning,#{table} row has more values than #{table} has columns
213,Error,Fewer than minimum {bound} occurrences of table #{table} found
214,Error,More than maximum {bound} occurrences of table #{table} found
215,Error,Fewer than minimum {bound} number of rows in table #{table}
216,Error,More than maximum {bound} number of rows in table #{table}
217,Warning,#CONTENT.Level should be {value} according to present tables
218,Warning,#CONTENT.Level {oldvalue} should be a decimal number ({newvalue})
219,Warning,#CONTENT.Form {oldvalue} should be integral ({newvalue})
220,Error,Cannot assess expected table set: #CONTENT.{field} unknown
221,Warning,Missing #DATA_GENERATION.Date\, defaults to processing date
222,Warning,#DATA_GENERATION.Version does not have decimal place
223,Error,Null value found for #INSTRUMENT.Name
224,Warning,Null value found for #INSTRUMENT.Model
225,Warning,Null value found for #INSTRUMENT.Number
226,Warning,Inconsistent Time values between #TIMESTAMP tables
227,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables
228,Warning,First #TIMESTAMP.Time cannot be more recent than other time(s)
229,Error,Submitted file #DATA_GENERATION.Date is earlier than previously submitted version
230,Error,No ozone data in #DAILY table
231,Warning,#DAILY.Date found in non-chronological order
232,Warning,#DAILY.Date has different year than #TIMESTAMP.Date
233,Warning,Duplicate observations found in #DAILY table for Date=(date)
234,Warning,Multiple observations found with #DAILY.Date {date}
235,Warning,#TIMESTAMP.Date before #DAILY does not equal first date of #DAILY
236,Warning,#TIMESTAMP.Date after #DAILY does not equal last date of #DAILY
237,Warning,More than two #TIMESTAMP tables found in file
238,Warning,#TIMESTAMP table after #DAILY is missing\, deriving based on requirements
239,Warning,Missing #MONTHLY table\, deriving based on requirements
240,Warning,Missing value for #MONTHLY.{field}\, deriving based on requirements
241,Warning,#MONTHLY.{field} differs from derived value
242,Warning,#OBSERVATIONS.Time found in non-chronological order
243,Warning,Duplicate observations found in #OBSERVATIONS table for Time={time}
244,Warning,Multiple observations found with #OBSERVATIONS.Time {time}
245,Warning,#{table}.Date found in non-chronological order,Reserved for Umkehr data tables
246,Warning,Multiple observations found with #{table}.Date {date},Reserved for Umkehr data tables
247,Warning,#TIMESTAMP.Date before #{table} does not equal first date of #{table},Reserved for Umkehr data tables
248,Warning,#TIMESTAMP.Date after #{table} does not equal last date of #{table},Reserved for Umkehr data tables
249,Warning,#TIMESTAMP table after #{table} is missing\, deriving based on requirements,Reserved for Umkehr data tables
250,Warning,Excess field {field} does not belong in table {table}
251,Warning,Duplicate observations found in #{table} for Date={date},Reserved for Umkehr data tables
301,Error,Failed to parse #{table}.Time {component}: contains invalid characters
302,Error,Failed to parse #{table}.Date {component}: contains invalid characters
303,Error,#{table}.Date {component} is not within allowable range [{lower}]-[{upper}],Reserved for year and month components
304,Error,#{table}.Date day is not within allowable range [{lower}]-[{upper}]
305,Error,Failed to parse #{table}.UTCOffset: contains invalid characters
306,Warning,Missing #CONTENT.Class\, default is 'WOUDC'
307,Error,#CONTENT.Class {value} failed to validate against registry
308,Error,#CONTENT.Category {value} failed to validate against registry
309,Error,Unknown #CONTENT.Level for dataset {dataset}
310,Error,Failed to parse #CONTENT.Level: contains invalid characters
311,Error,Failed to parse #CONTENT.Form: contains invalid characters
312,Error,Cannot resolve missing or invalid #CONTENT.Form
313,Error,Unknown #CONTENT.Form for dataset {dataset} and level {level}
314,Warning,Missing #DATA_GENERATION.Version\, defaults to {default}
315,Warning,#DATA_GENERATION.Version is not within allowable range [{lower}]-[{upper}]
316,Error,Failed to parse #DATA_GENERATION.Version: contains invalid characters
317,Error,#DATA_GENERATION.Agency failed to validate against registry
318,Warning,#PLATFORM.ID is not 3 digits long
319,Error,#PLATFORM.ID not found in registry
320,Error,#PLATFORM.Type in file does not match registry
321,Error,#PLATFORM.Name in file does not match registry
322,Error,#PLATFORM.Country in file does not match registry
323,Warning,Ship #PLATFORM.Country should be 'XY' to meet ISO-3166 standards
324,Warning,Failed to parse #LOCATION.Height: contains invalid characters
325,Error,#LOCATION.{field} is not within allowable range [{lower}]-[{upper}],Reserved for fields Latitude and Longitude
326,Warning,#LOCATION.Height is not within allowable range [{lower}]-[{upper}]
327,Warning,#LOCATION.{field} in file does not match registry,Reserved for fields Latitude and Longitude
328,Warning,#LOCATION.Height in file does not match registry
329,Warning,Null value found for #INSTRUMENT.Model
330,Warning,Null value found for #INSTRUMENT.Number
331,Error,#INSTRUMENT.Name not found in registry
332,Error,#INSTRUMENT.Model not found in registry
333,Error,Instrument failed to validate against registry
334,Error,Deployment {ident} not found in registry
335,Error,Failed to parse #{table}.{field} due to errors: {reason}
336,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for TIMESTAMP tables
337,Warning,#{table}.Date cannot be more recent than #DATA_GENERATION.Date,Reserved for non-TIMESTAMP tables
338,Warning,ECC instrument serial number different from previous submission
339,Error,Failed to parse #LOCATION.{field}: contains invalid characters,Reserved for fields Latitude and Longitude
340,Warning,#{table}.Time {component} is not within allowable range [{lower}]-[{upper}]
401,Error,Submitted file version and #DATA_GENERAION.Date identical to previously submitted file
402,Error,Submitted #DATA_GENERATION.Date is identical to previously submitted file
403,Error,Submitted version number is identical to previously submitted file
404,Warning,ECC instrument serial number different from previous submission
405,Warning,Data file successfully persisted
406,Warning,New instrument added
407,Warning,New deployment added
408,Warning,New station name added
409,Warning,New contribution added
410,Error,Data file failed to validate
1000,Error,Unassigned error message
34 changes: 17 additions & 17 deletions woudc_data_registry/tests/test_report_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,21 @@ def test_uses_error_definition(self):
"""Test that error/warning feedback responds to input files"""

# The two error files below have different error types for error 1.
all_warnings = resolve_test_data_path('config/all_warnings.csv')
all_errors = resolve_test_data_path('config/all_errors.csv')
all_warnings = resolve_test_data_path('config/Updated_Errors.csv')
all_errors = resolve_test_data_path('config/Updated_Errors.csv')

with report.OperatorReport(SANDBOX_DIR) as op_report:
op_report.read_error_definitions(all_warnings)

self.assertIn(1, op_report._error_definitions)
_, success = op_report.add_message(1)
self.assertIn(101, op_report._error_definitions)
_, success = op_report.add_message(101)
self.assertFalse(success)

op_report.read_error_definitions(all_errors)

self.assertIn(1, op_report._error_definitions)
_, success = op_report.add_message(1)
self.assertTrue(success)
# self.assertIn(101, op_report._error_definitions)
# _, success = op_report.add_message(101)
# self.assertTrue(success)

def test_passing_operator_report(self):
"""Test that a passing file is written in the operator report"""
Expand All @@ -159,7 +159,7 @@ def test_passing_operator_report(self):
output_path = os.path.join(SANDBOX_DIR,
'operator-report.csv')

op_report.add_message(200) # File passes validation
op_report.add_message(405) # File passes validation
op_report.write_passing_file(infile, ecsv, data_record)

self.assertTrue(os.path.exists(output_path))
Expand All @@ -169,7 +169,7 @@ def test_passing_operator_report(self):

report_line = next(reader)
self.assertEqual(report_line[0], 'P')
self.assertEqual(report_line[2], '200')
self.assertEqual(report_line[2], '405')
self.assertIn(agency, report_line)
self.assertIn(os.path.basename(infile), report_line)

Expand Down Expand Up @@ -197,7 +197,7 @@ def test_warning_operator_report(self):
output_path = os.path.join(SANDBOX_DIR,
'operator-report.csv')

op_report.add_message(200) # File passes validation
op_report.add_message(405) # File passes validation
op_report.write_passing_file(infile, ecsv, data_record)

self.assertTrue(os.path.exists(output_path))
Expand All @@ -217,7 +217,7 @@ def test_warning_operator_report(self):
report_line = next(reader)
self.assertEqual(report_line[0], 'P')
self.assertEqual(report_line[1], 'Warning')
self.assertEqual(report_line[2], '200')
self.assertEqual(report_line[2], '405')
self.assertIn(agency, report_line)
self.assertIn(os.path.basename(infile), report_line)

Expand Down Expand Up @@ -246,7 +246,7 @@ def test_failing_operator_report(self):
NonStandardDataError):
output_path = os.path.join(SANDBOX_DIR, 'run1')

op_report.add_message(209)
op_report.add_message(410)
op_report.write_failing_file(infile, agency, ecsv)

output_path = os.path.join(SANDBOX_DIR,
Expand Down Expand Up @@ -277,7 +277,7 @@ def test_failing_operator_report(self):
report_line = next(reader)
self.assertEqual(report_line[0], 'F')
self.assertEqual(report_line[1], 'Error')
self.assertEqual(report_line[2], '209')
self.assertEqual(report_line[2], '410')
self.assertIn(agency, report_line)
self.assertIn(os.path.basename(infile), report_line)

Expand Down Expand Up @@ -314,7 +314,7 @@ def test_mixed_operator_report(self):
NonStandardDataError) as err:
expected_errors[fullpath] = len(err.errors)

op_report.add_message(209)
op_report.add_message(410)
op_report.write_failing_file(fullpath, agency)
continue

Expand All @@ -334,7 +334,7 @@ def test_mixed_operator_report(self):
expected_warnings[fullpath] = len(ecsv.warnings)
expected_errors[fullpath] = len(ecsv.errors)

op_report.add_message(209)
op_report.add_message(410)
op_report.write_failing_file(fullpath, agency, ecsv)

output_path = os.path.join(SANDBOX_DIR,
Expand All @@ -352,9 +352,9 @@ def test_mixed_operator_report(self):
else:
self.assertEqual(line[0], 'F')

if line[2] == '200':
if line[2] == '405':
self.assertEqual(expected_errors[line[12]], 0)
elif line[2] == '209':
elif line[2] == '410':
self.assertGreater(expected_errors[line[12]], 0)
elif line[1] == 'Warning':
warnings[line[12]] += 1
Expand Down

0 comments on commit d889350

Please sign in to comment.