diff --git a/tests/test_ml_material_parser.py b/tests/test_ml_material_parser.py index 2f460f3..498d57b 100644 --- a/tests/test_ml_material_parser.py +++ b/tests/test_ml_material_parser.py @@ -3,50 +3,35 @@ expand_formula, resolve_variables, generate_permutations, cluster_by_label -def test(): - model = MaterialParserML(MaterialParserFormulas()) - result = model.process( - ["j9f9j209 underdoped LaFeBO7", "La Fe B 8-x with x = 0.1", "underdoped single crystal LaFeB09 (TLL222)"]) - print(result) +# def test(): +# model = MaterialParserML(MaterialParserFormulas()) +# result = model.process( +# ["j9f9j209 underdoped LaFeBO7", "La Fe B 8-x with x = 0.1", "underdoped single crystal LaFeB09 (TLL222)"]) +# print(result) def test_extract_results(): model = MaterialParserML(MaterialParserFormulas(), model_path=None) - output = { - 'software': 'DeLFT', - 'date': '2023-12-19T19:15:48.334995', - 'model': 'material-BidLSTM_CRF', - 'texts': [ - { - 'text': 'powderss underdoped LaFeB07', - 'entities': [ - {'text': 'powderss', 'class': '', 'score': 1.0, 'beginOffset': 0, 'endOffset': 7}, - {'text': 'underdoped', 'class': '', 'score': 1.0, 'beginOffset': 9, 'endOffset': 18}, - {'text': 'LaFeBO7', 'class': '', 'score': 1.0, 'beginOffset': 20, 'endOffset': 26} - ] - }, - { - 'text': 'La Fe B 8-x with x = 0.1, 0.2', - 'entities': [ - {'text': 'La Fe B 8-x', 'class': '', 'score': 1.0, 'beginOffset': 0, 'endOffset': 10}, - {'text': 'x', 'class': '', 'score': 1.0, 'beginOffset': 17, 'endOffset': 17}, - {'text': '0.1', 'class': '', 'score': 1.0, 'beginOffset': 21, 'endOffset': 23}, - {'text': '0.2', 'class': '', 'score': 1.0, 'beginOffset': 21, 'endOffset': 23} - ] - }, - { - 'text': 'underdoped single crystal LaFeB09 (TLL222)', - 'entities': [ - {'text': 'underdoped', 'class': '', 'score': 1.0, 'beginOffset': 0, 'endOffset': 9}, - {'text': 'single crystal', 'class': '', 'score': 1.0, 'beginOffset': 11, - 'endOffset': 24}, - {'text': 'LaFeB09', 'class': '', 'score': 1.0, 'beginOffset': 26, 'endOffset': 32}, - {'text': 'TLL222', 'class': '', 'score': 1.0, 'beginOffset': 35, 'endOffset': 40} - ] - } + output = [ + [ + {'text': 'powderss', 'class': '', 'score': 1.0, 'beginOffset': 0, 'endOffset': 7}, + {'text': 'underdoped', 'class': '', 'score': 1.0, 'beginOffset': 9, 'endOffset': 18}, + {'text': 'LaFeBO7', 'class': '', 'score': 1.0, 'beginOffset': 20, 'endOffset': 26} ], - 'runtime': 3.248 - } + [ + {'text': 'La Fe B 8-x', 'class': '', 'score': 1.0, 'beginOffset': 0, 'endOffset': 10}, + {'text': 'x', 'class': '', 'score': 1.0, 'beginOffset': 17, 'endOffset': 17}, + {'text': '0.1', 'class': '', 'score': 1.0, 'beginOffset': 21, 'endOffset': 23}, + {'text': '0.2', 'class': '', 'score': 1.0, 'beginOffset': 21, 'endOffset': 23} + ], + [ + {'text': 'underdoped', 'class': '', 'score': 1.0, 'beginOffset': 0, 'endOffset': 9}, + {'text': 'single crystal', 'class': '', 'score': 1.0, 'beginOffset': 11, + 'endOffset': 24}, + {'text': 'LaFeB09', 'class': '', 'score': 1.0, 'beginOffset': 26, 'endOffset': 32}, + {'text': 'TLL222', 'class': '', 'score': 1.0, 'beginOffset': 35, 'endOffset': 40} + ] + ] entities = model.extract_results(output) @@ -54,7 +39,7 @@ def test_extract_results(): assert entities[0]['shape'] == "powderss" assert entities[0]['doping'] == "underdoped" - assert entities[0]['formula']['raw_value'] == "LaFeB07" + assert entities[0]['formula']['raw_value'] == "LaFeBO7" assert entities[1]['formula']['raw_value'] == "La Fe B 8-x" assert entities[1]['variables'] == {'x': ['0.1', '0.2']} @@ -280,13 +265,42 @@ def test_cluster_1(): def test_cluster_2(): - results = [[('under', 'B-'), ('-', 'I-'), ('doped', 'I-'), (' ', 'I-'), - ('La', 'B-'), (' ', 'I-'), ('x', 'I-'), (' ', 'I-'), - ('Fe', 'I-'), (' ', 'I-'), ('8', 'I-'), (' ', 'I-'), - ('O', 'I-'), ('7', 'I-'), (' ', 'I-'), ('single', 'B-'), - (' ', 'I-'), ('crystals', 'I-')], [('MgB', 'B-'), ('2', 'I-')], - [('Oxygen', 'B-')], [('Hydrogen', 'B-')]] + results = [ + [ + ('under', 'B-'), + ('-', 'I-'), + ('doped', 'I-'), + (' ', 'I-'), + ('La', 'B-'), + (' ', 'I-'), + ('x', 'I-'), + (' ', 'I-'), + ('Fe', 'I-'), + (' ', 'I-'), + ('8', 'I-'), + (' ', 'I-'), + ('O', 'I-'), + ('7', 'I-'), + (' ', 'I-'), + ('single', 'B-'), + (' ', 'I-'), + ('crystals', 'I-') + ], [ + ('MgB', 'B-'), + ('2', 'I-') + ], + [ + ('Oxygen', 'B-') + ], + [ + ('Hydrogen', 'B-') + ] + ] clusters = cluster_by_label(results) - print(clusters) + assert len(clusters) == 4 + assert len(clusters[0]) == 3 + assert len(clusters[1]) == 1 + assert len(clusters[2]) == 1 + assert len(clusters[3]) == 1