diff --git a/tests/inputs/test_sample_info.yaml b/tests/inputs/test_sample_info.yaml index b089e0e..1ef8357 100644 --- a/tests/inputs/test_sample_info.yaml +++ b/tests/inputs/test_sample_info.yaml @@ -39,4 +39,47 @@ tests: id: TEST:captest1 text: bill clinton output: - text: Bill Clinton \ No newline at end of file + text: Bill Clinton + - description: missing space 1 + sample: + id: TEST:missing_space1 + text: 400g/L + output: + text: 400 g/L + - description: missing space 2 + sample: + id: TEST:missing_space2 + text: 1% + output: + text: 1 % + - description: categorical vs numerical + sample: + id: TEST:cat_vs_num + text: Halophile + output: + text: NaN + - description: capital units + sample: + id: TEST:cap_units + text: 0.2 psu + output: + text: 0.2 PSU + - description: capital units and space + sample: + id: TEST:cap_units_and_space + text: 0.2psu + output: + text: 0.2 PSU + - description: plus minus error + sample: + id: TEST:plus_min_error + text: 17.3 +/- 2.0 PPT + output: + text: NaN + - description: with unit description + sample: + id: TEST:with_unit_descr + text: 40 PSU (practical salinity units) + output: + text: 40 PSU + diff --git a/tests/test_salinity.py b/tests/test_salinity.py new file mode 100644 index 0000000..d96b35a --- /dev/null +++ b/tests/test_salinity.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +import os +import unittest + +import yaml + +from sample_annotator import capitalizer +# MODEL_DIR, INPUT_DIR, OUTPUT_DIR +from tests import INPUT_DIR + +"""Test the ability to capitalize a text slot.""" + +"""Run as follows to get see test-time printouts:""" + +"""python -m pytest -sv tests/test_salinity.py""" + +# INPUT_DIR comes from __init__.py +PWD = os.path.dirname(os.path.realpath(__file__)) +TEST_DATA = os.path.join(INPUT_DIR, 'test_sample_info.yaml') + + +class TestSalinity(unittest.TestCase): + """salinity unit tests.""" + + def test_missing_space1(self): + with open(TEST_DATA) as stream: + test_obj = yaml.load(stream, Loader=yaml.FullLoader) + for t in test_obj.get('tests'): + desc = t.get('description', None) + # pdb.set_trace() + if desc == 'missing space 1': + current_input = t['sample']['text'] + processed_input = capitalizer.capitalizer(current_input) + expected_output = t['output']['text'] + assert processed_input == expected_output \ No newline at end of file diff --git a/tests/test_salinity_annotate.py b/tests/test_salinity_annotate.py new file mode 100644 index 0000000..d15cbfb --- /dev/null +++ b/tests/test_salinity_annotate.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +import os +import yaml +import unittest +from sample_annotator.measurements.measurements import MeasurementEngine +from sample_annotator.report_model import AnnotationReport + +# MODEL_DIR, INPUT_DIR, OUTPUT_DIR +from tests import INPUT_DIR + +"""Test the ability to capitalize a text slot.""" + +"""Run as follows to get see test-time printouts:""" + +"""python -m pytest -sv tests/test_salinity.py""" + +# INPUT_DIR comes from __init__.py +PWD = os.path.dirname(os.path.realpath(__file__)) +TEST_DATA = os.path.join(INPUT_DIR, 'test_sample_info.yaml') + + +class TestSalinityAnnotate(unittest.TestCase): + """salinity unit tests.""" + + report = AnnotationReport(messages=[]) + m = MeasurementEngine() + + with open(TEST_DATA) as stream: + test_obj = yaml.load(stream, Loader=yaml.FullLoader) + for t in test_obj.get('tests'): + desc = t.get('description', None) + # pdb.set_trace() + if desc == 'missing space 1': + current_input = t['sample']['text'] + + processed_input = m.repair(current_input, report=report) + print(processed_input) + + expected_output = t['output']['text'] + assert processed_input == expected_output \ No newline at end of file