From 6e2d6ea8370edc220c67b6c6bde524e32a695d17 Mon Sep 17 00:00:00 2001 From: Hector Date: Thu, 19 Jan 2023 14:56:40 -0500 Subject: [PATCH] Update default serialization to the new `json` method. (#918) --- docs/conf.py | 5 +- .../audio/speaker_segmentation_pipeline.py | 4 +- .../ecosystem_script_only.py | 14 +- .../classification/amazon_review_sentiment.py | 4 +- .../classification/bank_customer_intent.py | 4 +- .../clinical_pipeline/utterance_searcher.py | 4 +- .../model/config_model_clean.py | 2 +- examples/content_rewriter/rewriter.py | 4 +- examples/serialization/README.md | 20 +- examples/wiki_parser/wiki_dump_parse.py | 10 +- forte/data/base_reader.py | 6 +- forte/data/readers/deserialize_reader.py | 18 +- forte/processors/base/writers.py | 14 +- forte/processors/writers.py | 4 +- setup.py | 15 +- tests/forte/data/data_pack_test.py | 27 ++- .../data/data_store_serialization_test.py | 197 ++++++++++------- tests/forte/data/data_store_test.py | 142 +++++------- .../forte/data/entry_data_structures_test.py | 15 +- tests/forte/data/multi_pack_test.py | 45 ++-- .../data/ontology/ndarray_attribute_test.py | 42 ++-- .../ontology/ontology_code_generator_test.py | 105 +++++---- .../readers/classification_reader_test.py | 67 +++--- tests/forte/data/readers/html_reader_test.py | 4 +- .../dbpedia/dbpedia_datasets_test.py | 4 +- tests/forte/grid_test.py | 27 ++- tests/forte/image_annotation_test.py | 10 +- tests/forte/notebooks/audio_tutorial_test.py | 3 +- tests/forte/notebooks/ocr_test.py | 207 +++++++++++++++++- .../notebooks/tutorial_MT_with_forte_test.py | 5 +- .../algorithms/data_augmentation_op_test.py | 154 ++++++------- ...mbedding_similarity_replacement_op_test.py | 6 +- tests/forte/utils/payload_decorator_test.py | 36 ++- 33 files changed, 713 insertions(+), 511 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 592b51626..cea38d122 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -407,7 +407,4 @@ def setup(sphinx): # Enable hypothesis.is in comments # https://sphinx-comments.readthedocs.io/en/latest/hypothesis.html#activate-hypothes-is -comments_config = { - "hypothesis": True -} - +comments_config = {"hypothesis": True} diff --git a/examples/audio/speaker_segmentation_pipeline.py b/examples/audio/speaker_segmentation_pipeline.py index a8be68c66..2afb827d5 100644 --- a/examples/audio/speaker_segmentation_pipeline.py +++ b/examples/audio/speaker_segmentation_pipeline.py @@ -50,7 +50,7 @@ def _process(self, input_pack: DataPack): audio_utter: AudioUtterance = AudioUtterance( pack=input_pack, begin=int(turn.start * input_pack.sample_rate), - end=int(turn.end * input_pack.sample_rate) + end=int(turn.end * input_pack.sample_rate), ) audio_utter.speaker = speaker @@ -97,7 +97,7 @@ def _process(self, input_pack: DataPack): text_utter: Utterance = Utterance( pack=input_pack, begin=len(input_pack.text) - len(transcription[0]), - end=len(input_pack.text) + end=len(input_pack.text), ) text_utter.speaker = audio_utter.speaker Link(pack=input_pack, parent=audio_utter, child=text_utter) diff --git a/examples/blog_post_examples/ecosystem_script_only.py b/examples/blog_post_examples/ecosystem_script_only.py index aaaff7e61..551e476ec 100644 --- a/examples/blog_post_examples/ecosystem_script_only.py +++ b/examples/blog_post_examples/ecosystem_script_only.py @@ -20,16 +20,12 @@ from forte.processors.stave import StaveProcessor from fortex.spacy import SpacyProcessor -Pipeline[DataPack]( -).set_reader( - HTMLReader() -).add( - SpacyProcessor(), config={ +Pipeline[DataPack]().set_reader(HTMLReader()).add( + SpacyProcessor(), + config={ "processors": ["sentence", "tokenize", "pos", "ner", "dep", "umls_link"] - } -).add( - StaveProcessor() -).run( + }, +).add(StaveProcessor()).run( "

" "she does not have SVS syndrome from an axillary vein thrombosis." "

" diff --git a/examples/classification/amazon_review_sentiment.py b/examples/classification/amazon_review_sentiment.py index c8ec29e4d..a342bab8c 100644 --- a/examples/classification/amazon_review_sentiment.py +++ b/examples/classification/amazon_review_sentiment.py @@ -37,7 +37,9 @@ for pack in pl.process_dataset(csv_path): for sent in pack.get(Sentence): if ( - input("Type n for the next documentation and its prediction: ").lower() + input( + "Type n for the next documentation and its prediction: " + ).lower() == "n" ): sent_text = sent.text diff --git a/examples/classification/bank_customer_intent.py b/examples/classification/bank_customer_intent.py index e95afc1a1..2c8f9e694 100644 --- a/examples/classification/bank_customer_intent.py +++ b/examples/classification/bank_customer_intent.py @@ -113,9 +113,7 @@ "label", ], "index2class": index2class, - "text_fields": [ - "ft.onto.base_ontology.Body" - ], + "text_fields": ["ft.onto.base_ontology.Body"], "digit_label": False, "one_based_index_label": False, } diff --git a/examples/clinical_pipeline/utterance_searcher.py b/examples/clinical_pipeline/utterance_searcher.py index b235f73fc..5e8dbecc0 100644 --- a/examples/clinical_pipeline/utterance_searcher.py +++ b/examples/clinical_pipeline/utterance_searcher.py @@ -94,8 +94,8 @@ def _process(self, input_pack: DataPack): else: links: List[str] = create_links(self.configs.url_stub, answers) response_text: str = ( - "I found the following results:
-- " - + "
-- ".join(links) + "I found the following results:
-- " + + "
-- ".join(links) ) print(response_text) diff --git a/examples/content_rewriter/model/config_model_clean.py b/examples/content_rewriter/model/config_model_clean.py index 7d252c9da..ec23cf854 100644 --- a/examples/content_rewriter/model/config_model_clean.py +++ b/examples/content_rewriter/model/config_model_clean.py @@ -10,7 +10,7 @@ def get_embedder_hparams(dimension, name): "type": "random_normal_initializer", "kwargs": { "mean": 0.0, - "stddev": dimension ** -0.5, + "stddev": dimension**-0.5, }, }, } diff --git a/examples/content_rewriter/rewriter.py b/examples/content_rewriter/rewriter.py index 14637d73c..8081c4f8a 100644 --- a/examples/content_rewriter/rewriter.py +++ b/examples/content_rewriter/rewriter.py @@ -121,6 +121,4 @@ def prepare_data(self, context: UtteranceContext, utterance: Utterance): @classmethod def default_configs(cls) -> Dict[str, Any]: - return { - "model_dir": "content_rewriter/model" - } + return {"model_dir": "content_rewriter/model"} diff --git a/examples/serialization/README.md b/examples/serialization/README.md index 06e3625f9..5edcd2b28 100644 --- a/examples/serialization/README.md +++ b/examples/serialization/README.md @@ -1,6 +1,22 @@ This is a very simple serialization demo that use the built-in JSON serializer. -Just run the following command in this directory: + +First, let's install some simple processors via: + +` +pip install forte.nltk +` + +To ensure you are using the current version of Forte, go to Forte root and install from source: + +` +cd +pip install . +` + +Then just run the following command from this example directory: ` python serialize_example.py "../../data_samples/ontonotes/00/" -` \ No newline at end of file +` + +You should be able to see the progress and the serialized content. \ No newline at end of file diff --git a/examples/wiki_parser/wiki_dump_parse.py b/examples/wiki_parser/wiki_dump_parse.py index f10416e9c..507f6dfad 100644 --- a/examples/wiki_parser/wiki_dump_parse.py +++ b/examples/wiki_parser/wiki_dump_parse.py @@ -94,12 +94,12 @@ def add_wiki_info( if resume_from_last: if not os.path.exists(out_index_path): - raise ValueError(f"Configured to do resume but path " - f"{out_index_path} does not exists.") + raise ValueError( + f"Configured to do resume but path " + f"{out_index_path} does not exists." + ) - print_progress( - f"\nWill resume from last from {out_index_path}", "\n" - ) + print_progress(f"\nWill resume from last from {out_index_path}", "\n") pl.set_reader( reader, config={ diff --git a/forte/data/base_reader.py b/forte/data/base_reader.py index e342464b8..fa58e4771 100644 --- a/forte/data/base_reader.py +++ b/forte/data/base_reader.py @@ -107,11 +107,11 @@ def default_configs(cls): False. - serialize_method: The method used to serialize the data. Current - available options are `jsonpickle` and `pickle`. Default is - `jsonpickle`. + available options are `json`, `jsonpickle` and `pickle`. Default is + `json`. """ - return {"zip_pack": False, "serialize_method": "jsonpickle"} + return {"zip_pack": False, "serialize_method": "json"} @staticmethod def pack_type(): diff --git a/forte/data/readers/deserialize_reader.py b/forte/data/readers/deserialize_reader.py index 26b15432a..52ffcb4e8 100644 --- a/forte/data/readers/deserialize_reader.py +++ b/forte/data/readers/deserialize_reader.py @@ -74,8 +74,8 @@ def default_configs(cls): default value is None. - serialize_method: The method used to serialize the data. Current - available options are `jsonpickle` and `pickle`. Default is - `jsonpickle`. + available options are `json`, `jsonpickle` and `pickle`. Default is + `json`. Returns: The default configuration of this writer. @@ -83,7 +83,7 @@ def default_configs(cls): return { "zip_pack": False, "indent": None, - "serialize_method": "jsonpickle", + "serialize_method": "json", } @@ -262,13 +262,13 @@ def default_configs(cls): Here: - serialize_method: The method used to serialize the data. Current - available options are `jsonpickle` and `pickle`. Default is - `jsonpickle`. + available options are `json`, `jsonpickle` and `pickle`. Default is + `json`. Returns: The default configuration of this writer. """ return { - "serialize_method": "jsonpickle", + "serialize_method": "json", } @@ -326,8 +326,8 @@ def default_configs(cls): - serialize_method (str): The method used to serialize the data, this should be the same as how serialization is done. The current - options are `jsonpickle` and `pickle`. The default method - is `jsonpickle`. + options are `json`, `jsonpickle` and `pickle`. The default method + is `json`. - zip_pack (bool): whether to zip the data pack. The default value is False. @@ -338,7 +338,7 @@ def default_configs(cls): "multi_pack_dir": None, "data_pack_dir": None, "suffix": ".json", - "serialize_method": "jsonpickle", + "serialize_method": "json", "zip_pack": False, } diff --git a/forte/processors/base/writers.py b/forte/processors/base/writers.py index ef2f011ab..506ea6d3f 100644 --- a/forte/processors/base/writers.py +++ b/forte/processors/base/writers.py @@ -49,7 +49,7 @@ def write_pack( zip_pack: bool = False, overwrite: bool = False, drop_record: bool = False, - serialize_method: str = "jsonpickle", + serialize_method: str = "json", ) -> str: """ Write a pack to a path. @@ -63,8 +63,8 @@ def write_pack( overwrite: Whether to overwrite the file if already exists. drop_record: Whether to drop the creation records in the serialization. serialize_method: The method used to serialize the data. Current - available options are `jsonpickle` and `pickle`. - Default is `jsonpickle`. + available options are `json`, `jsonpickle` and `pickle`. + Default is `json`. Returns: If successfully written, will return the path of the output file. @@ -144,8 +144,8 @@ def default_configs(cls): the default value is False. - serialize_method: The method used to serialize the data. Current - available options are `jsonpickle` and `pickle`. Default is - "jsonpickle". + available options are `json`, `jsonpickle` and `pickle`. Default is + "json". Returns: The default configuration of this writer. """ @@ -154,7 +154,7 @@ def default_configs(cls): "zip_pack": False, "indent": None, "drop_record": False, - "serialize_method": "jsonpickle", + "serialize_method": "json", } def _process(self, input_pack: DataPack): @@ -260,5 +260,5 @@ def default_configs(cls) -> Dict[str, Any]: "zip_pack": False, "indent": None, "drop_record": False, - "serialize_method": "jsonpickle", + "serialize_method": "json", } diff --git a/forte/processors/writers.py b/forte/processors/writers.py index 7802db4a1..e7e95729d 100644 --- a/forte/processors/writers.py +++ b/forte/processors/writers.py @@ -22,7 +22,7 @@ class PackIdJsonPackWriter(PackWriter): """ A writer implementation that writes data pack to disk. The default - serialization uses jsonpickle (readable). The file name of each data pack + serialization uses json. The file name of each data pack is the auto generated pack id of each pack. """ @@ -51,7 +51,7 @@ def default_configs(cls): class PackNameJsonPackWriter(PackWriter): """ A writer implementation that writes data pack to disk. The default - serialization uses jsonpickle (readable). The file name of + serialization uses json. The file name of each data pack is the assigned name of each pack. """ diff --git a/setup.py b/setup.py index 73d75d628..3e9aa37a0 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ VERSION_VAR = "VERSION" version = {} with open( - os.path.join(os.path.dirname(os.path.abspath(__file__)), "forte/version.py") + os.path.join(os.path.dirname(os.path.abspath(__file__)), "forte/version.py") ) as fp: exec(fp.read(), version) if VERSION_VAR not in version or not version[VERSION_VAR]: @@ -26,7 +26,7 @@ version=version[VERSION_VAR], url="https://github.com/asyml/forte", description="Forte is extensible framework for building composable and " - "modularized NLP workflows.", + "modularized NLP workflows.", long_description=long_description, long_description_content_type="text/markdown", license="Apache License Version 2.0", @@ -60,7 +60,12 @@ "requests", ], "ir": ["texar-pytorch>=0.1.4", "tensorflow>=1.15.0"], - "remote": ["fastapi>=0.65.2, <=0.75.2", "pydantic<=1.9.2", "uvicorn>=0.14.0", "requests"], + "remote": [ + "fastapi>=0.65.2, <=0.75.2", + "pydantic<=1.9.2", + "uvicorn>=0.14.0", + "requests", + ], "audio_ext": ["soundfile>=0.10.3"], "stave": ["stave>=0.0.1.dev12"], "models": [ @@ -90,9 +95,9 @@ "soundfile>=0.10.3", "Pillow", "requests", - "urlpath>=1.2.0" + "urlpath>=1.2.0", ], - "ocr_tutorial": ["Pillow", "requests", "pytesseract"] + "ocr_tutorial": ["Pillow", "requests", "pytesseract"], }, entry_points={ "console_scripts": [ diff --git a/tests/forte/data/data_pack_test.py b/tests/forte/data/data_pack_test.py index 9cf0dd119..4514a0d3f 100644 --- a/tests/forte/data/data_pack_test.py +++ b/tests/forte/data/data_pack_test.py @@ -318,25 +318,25 @@ def test_get_entries(self): with self.assertRaises(ValueError): for doc in self.data_pack.get("forte.data.data_pack.DataPack"): print(doc) - + # Test get raw entries # fetching documents - primitive_documents = list(self.data_pack.get(Document, get_raw = True)) + primitive_documents = list(self.data_pack.get(Document, get_raw=True)) object_documents = list(self.data_pack.get(Document)) self.assertEqual( primitive_documents[0], { - 'begin': 0, - 'end': 228, - 'payload_idx': 0, - 'document_class': [], - 'sentiment': {}, - 'classifications': {}, - 'tid': object_documents[0].tid, - 'type': 'ft.onto.base_ontology.Document' - } + "begin": 0, + "end": 228, + "payload_idx": 0, + "document_class": [], + "sentiment": {}, + "classifications": {}, + "tid": object_documents[0].tid, + "type": "ft.onto.base_ontology.Document", + }, ) # fetching groups @@ -353,12 +353,11 @@ def test_get_entries(self): em_object = self.data_pack.get_entry(em) members.append(em_object.text) group_members.append(sorted(members)) - + self.assertEqual( group_members, - [["He", "The Indonesian billionaire James Riady", "he"]] + [["He", "The Indonesian billionaire James Riady", "he"]], ) - def test_delete_entry(self): # test delete entry diff --git a/tests/forte/data/data_store_serialization_test.py b/tests/forte/data/data_store_serialization_test.py index 0db86f554..6a9219b56 100644 --- a/tests/forte/data/data_store_serialization_test.py +++ b/tests/forte/data/data_store_serialization_test.py @@ -30,8 +30,6 @@ class DataStoreTest(unittest.TestCase): - - def setUp(self) -> None: self.data_store = DataStore() # This test setup changes the order of fields and delete one field of @@ -75,17 +73,17 @@ def setUp(self) -> None: }, "forte.data.ontology.top.Group": { "attributes": { - 'members': {'type': (list, (int,)), 'index': 2}, - 'member_type': {'type': (type(None), (str,)), 'index': 3} + "members": {"type": (list, (int,)), "index": 2}, + "member_type": {"type": (type(None), (str,)), "index": 3}, }, "parent_entry": "forte.data.ontology.core.BaseGroup", }, "forte.data.ontology.top.Link": { "attributes": { - 'parent_type': {'type': (type(None), (str,)), 'index': 2}, - 'child_type': {'type': (type(None), (str,)), 'index': 3}, - 'parent': {'type': (Union, (int, type(None))), 'index': 4}, - 'child': {'type': (Union, (int, type(None))), 'index': 5} + "parent_type": {"type": (type(None), (str,)), "index": 2}, + "child_type": {"type": (type(None), (str,)), "index": 3}, + "parent": {"type": (Union, (int, type(None))), "index": 4}, + "child": {"type": (Union, (int, type(None))), "index": 5}, }, "parent_entry": "forte.data.ontology.core.BaseLink", }, @@ -147,7 +145,8 @@ def setUp(self) -> None: "Class E", ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "ft.onto.base_ontology.Sentence": SortedList( [ [ @@ -203,19 +202,20 @@ def setUp(self) -> None: "good", ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "forte.data.ontology.top.Group": [ [ 10123, "forte.data.ontology.top.Group", [9999, 1234567], - "ft.onto.base_ontology.Sentence" + "ft.onto.base_ontology.Sentence", ], [ 23456, "forte.data.ontology.top.Group", [1234, 3456], - "ft.onto.base_ontology.Document" + "ft.onto.base_ontology.Document", ], ], "forte.data.ontology.top.Link": [ @@ -226,7 +226,6 @@ def setUp(self) -> None: "ft.onto.base_ontology.Document", 9999, 1234, - ], ], } @@ -284,7 +283,10 @@ def test_save_attribute_pickle(self): "attributes": { "begin": {"index": 2, "type": (type(None), (int,))}, "end": {"index": 3, "type": (type(None), (int,))}, - "payload_idx": {"index": 4, "type": (type(None), (int,))}, + "payload_idx": { + "index": 4, + "type": (type(None), (int,)), + }, "document_class": {"index": 5, "type": (list, (str,))}, "sentiment": {"index": 6, "type": (dict, (str, float))}, "classifications": { @@ -296,17 +298,11 @@ def test_save_attribute_pickle(self): }, "ft.onto.base_ontology.Sentence": { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, "payload_idx": { "index": 4, - "type": (type(None), (int,)) + "type": (type(None), (int,)), }, "speaker": { "index": 5, @@ -316,10 +312,7 @@ def test_save_attribute_pickle(self): "index": 6, "type": (Union, (int, type(None))), }, - "sentiment": { - "index": 7, - "type": (dict, (str, float)) - }, + "sentiment": {"index": 7, "type": (dict, (str, float))}, "classification": { "index": 8, "type": (dict, (str, float)), @@ -333,17 +326,32 @@ def test_save_attribute_pickle(self): }, "forte.data.ontology.top.Group": { "attributes": { - 'members': {'type': (list, (int,)), 'index': 2}, - 'member_type': {'type': (type(None), (str,)), 'index': 3} + "members": {"type": (list, (int,)), "index": 2}, + "member_type": { + "type": (type(None), (str,)), + "index": 3, + }, }, "parent_entry": "forte.data.ontology.core.BaseGroup", }, "forte.data.ontology.top.Link": { "attributes": { - 'parent_type': {'type': (type(None), (str,)), 'index': 2}, - 'child_type': {'type': (type(None), (str,)), 'index': 3}, - 'parent': {'type': (Union, (int, type(None))), 'index': 4}, - 'child': {'type': (Union, (int, type(None))), 'index': 5} + "parent_type": { + "type": (type(None), (str,)), + "index": 2, + }, + "child_type": { + "type": (type(None), (str,)), + "index": 3, + }, + "parent": { + "type": (Union, (int, type(None))), + "index": 4, + }, + "child": { + "type": (Union, (int, type(None))), + "index": 5, + }, }, "parent_entry": "forte.data.ontology.core.BaseLink", }, @@ -416,7 +424,8 @@ def test_save_attribute_pickle(self): "Class E", ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "ft.onto.base_ontology.Sentence": SortedList( [ [ @@ -468,19 +477,20 @@ def test_save_attribute_pickle(self): "class2", ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "forte.data.ontology.top.Group": [ [ 10123, "forte.data.ontology.top.Group", [9999, 1234567], - "ft.onto.base_ontology.Sentence" + "ft.onto.base_ontology.Sentence", ], [ 23456, "forte.data.ontology.top.Group", [1234, 3456], - "ft.onto.base_ontology.Document" + "ft.onto.base_ontology.Document", ], ], "forte.data.ontology.top.Link": [ @@ -491,10 +501,8 @@ def test_save_attribute_pickle(self): "ft.onto.base_ontology.Document", 9999, 1234, - ], ], - }, ) self.assertEqual( @@ -598,9 +606,10 @@ def test_save_attribute_pickle(self): 0, "Very Positive", "Class E", - ] + ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "ft.onto.base_ontology.Sentence": SortedList( [ [ @@ -628,7 +637,6 @@ def test_save_attribute_pickle(self): "Class C", "Class D", "abc", - ], [ 100, @@ -657,20 +665,20 @@ def test_save_attribute_pickle(self): "good", ], ], - key=self.sorting_fn + key=self.sorting_fn, ), "forte.data.ontology.top.Group": [ [ 10123, "forte.data.ontology.top.Group", [9999, 1234567], - "ft.onto.base_ontology.Sentence" + "ft.onto.base_ontology.Sentence", ], [ 23456, "forte.data.ontology.top.Group", [1234, 3456], - "ft.onto.base_ontology.Document" + "ft.onto.base_ontology.Document", ], ], "forte.data.ontology.top.Link": [ @@ -681,7 +689,6 @@ def test_save_attribute_pickle(self): "ft.onto.base_ontology.Document", 9999, 1234, - ], ], }, @@ -754,7 +761,10 @@ def test_fast_pickle(self): "attributes": { "begin": {"index": 2, "type": (type(None), (int,))}, "end": {"index": 3, "type": (type(None), (int,))}, - "payload_idx": {"index": 4, "type": (type(None), (int,))}, + "payload_idx": { + "index": 4, + "type": (type(None), (int,)), + }, "document_class": {"index": 5, "type": (list, (str,))}, "sentiment": {"index": 6, "type": (dict, (str, float))}, "classifications": { @@ -766,17 +776,11 @@ def test_fast_pickle(self): }, "ft.onto.base_ontology.Sentence": { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, "payload_idx": { "index": 4, - "type": (type(None), (int,)) + "type": (type(None), (int,)), }, "speaker": { "index": 5, @@ -800,17 +804,32 @@ def test_fast_pickle(self): }, "forte.data.ontology.top.Group": { "attributes": { - 'members': {'type': (list, (int,)), 'index': 2}, - 'member_type': {'type': (type(None), (str,)), 'index': 3} + "members": {"type": (list, (int,)), "index": 2}, + "member_type": { + "type": (type(None), (str,)), + "index": 3, + }, }, "parent_entry": "forte.data.ontology.core.BaseGroup", }, "forte.data.ontology.top.Link": { "attributes": { - 'parent_type': {'type': (type(None), (str,)), 'index': 2}, - 'child_type': {'type': (type(None), (str,)), 'index': 3}, - 'parent': {'type': (Union, (int, type(None))), 'index': 4}, - 'child': {'type': (Union, (int, type(None))), 'index': 5} + "parent_type": { + "type": (type(None), (str,)), + "index": 2, + }, + "child_type": { + "type": (type(None), (str,)), + "index": 3, + }, + "parent": { + "type": (Union, (int, type(None))), + "index": 4, + }, + "child": { + "type": (Union, (int, type(None))), + "index": 5, + }, }, "parent_entry": "forte.data.ontology.core.BaseLink", }, @@ -877,7 +896,10 @@ def test_delete_serialize(self): "attributes": { "begin": {"index": 2, "type": (type(None), (int,))}, "end": {"index": 3, "type": (type(None), (int,))}, - "payload_idx": {"index": 4, "type": (type(None), (int,))}, + "payload_idx": { + "index": 4, + "type": (type(None), (int,)), + }, "document_class": {"index": 5, "type": (list, (str,))}, "sentiment": {"index": 6, "type": (dict, (str, float))}, "classifications": { @@ -889,17 +911,11 @@ def test_delete_serialize(self): }, "ft.onto.base_ontology.Sentence": { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, "payload_idx": { "index": 4, - "type": (type(None), (int,)) + "type": (type(None), (int,)), }, "speaker": { "index": 5, @@ -923,17 +939,32 @@ def test_delete_serialize(self): }, "forte.data.ontology.top.Group": { "attributes": { - 'members': {'type': (list, (int,)), 'index': 2}, - 'member_type': {'type': (type(None), (str,)), 'index': 3} + "members": {"type": (list, (int,)), "index": 2}, + "member_type": { + "type": (type(None), (str,)), + "index": 3, + }, }, "parent_entry": "forte.data.ontology.core.BaseGroup", }, "forte.data.ontology.top.Link": { "attributes": { - 'parent_type': {'type': (type(None), (str,)), 'index': 2}, - 'child_type': {'type': (type(None), (str,)), 'index': 3}, - 'parent': {'type': (Union, (int, type(None))), 'index': 4}, - 'child': {'type': (Union, (int, type(None))), 'index': 5} + "parent_type": { + "type": (type(None), (str,)), + "index": 2, + }, + "child_type": { + "type": (type(None), (str,)), + "index": 3, + }, + "parent": { + "type": (Union, (int, type(None))), + "index": 4, + }, + "child": { + "type": (Union, (int, type(None))), + "index": 5, + }, }, "parent_entry": "forte.data.ontology.core.BaseLink", }, @@ -994,7 +1025,8 @@ def test_delete_serialize(self): "Class E", ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "ft.onto.base_ontology.Sentence": SortedList( [ [ @@ -1044,16 +1076,16 @@ def test_delete_serialize(self): "Positive", None, "class2", - ], ], - key=self.sorting_fn), + key=self.sorting_fn, + ), "forte.data.ontology.top.Group": [ [ 23456, "forte.data.ontology.top.Group", [1234, 3456], - "ft.onto.base_ontology.Document" + "ft.onto.base_ontology.Document", ], ], "forte.data.ontology.top.Link": [ @@ -1064,7 +1096,6 @@ def test_delete_serialize(self): "ft.onto.base_ontology.Document", 9999, 1234, - ], ], }, diff --git a/tests/forte/data/data_store_test.py b/tests/forte/data/data_store_test.py index b6338190e..33fc1fc93 100644 --- a/tests/forte/data/data_store_test.py +++ b/tests/forte/data/data_store_test.py @@ -139,18 +139,9 @@ def setUp(self) -> None: }, "ft.onto.base_ontology.Sentence": { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, - "payload_idx": { - "index": 4, - "type": (type(None), (int,)) - }, + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, + "payload_idx": {"index": 4, "type": (type(None), (int,))}, "speaker": { "index": 5, "type": (Union, (str, type(None))), @@ -173,18 +164,9 @@ def setUp(self) -> None: }, "forte.data.ontology.top.Annotation": { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, - "payload_idx": { - "index": 4, - "type": (type(None), (int,)) - } + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, + "payload_idx": {"index": 4, "type": (type(None), (int,))}, }, "parent_class": {"Entry"}, }, @@ -194,56 +176,47 @@ def setUp(self) -> None: "forte.data.ontology.top.Generics": {"parent_class": {"Entry"}}, "forte.data.ontology.top.Link": { "attributes": { - 'parent_type': {'type': (type(None), (str,)), 'index': 2}, - 'child_type': {'type': (type(None), (str,)), 'index': 3}, - 'parent': {'type': (Union, (int, type(None))), 'index': 4}, - 'child': {'type': (Union, (int, type(None))), 'index': 5} + "parent_type": {"type": (type(None), (str,)), "index": 2}, + "child_type": {"type": (type(None), (str,)), "index": 3}, + "parent": {"type": (Union, (int, type(None))), "index": 4}, + "child": {"type": (Union, (int, type(None))), "index": 5}, }, - "parent_class": {"BaseLink"} + "parent_class": {"BaseLink"}, }, "forte.data.ontology.top.Group": { "attributes": { - 'members': {'type': (list, (int,)), 'index': 2}, - 'member_type': {'type': (type(None), (str,)), 'index': 3} + "members": {"type": (list, (int,)), "index": 2}, + "member_type": {"type": (type(None), (str,)), "index": 3}, }, - "parent_class": {"Entry", "BaseGroup"} + "parent_class": {"Entry", "BaseGroup"}, }, "forte.data.ontology.top.MultiPackGeneric": { "parent_class": {"Entry", "MultiEntry"} }, "forte.data.ontology.top.MultiPackLink": { "attributes": { - 'parent_type': {'type': (type(None), (str,)), 'index': 2}, - 'child_type': {'type': (type(None), (str,)), 'index': 3}, - 'parent': {'type': (tuple, (Tuple,)), 'index': 4}, - 'child': {'type': (tuple, (Tuple,)), 'index': 5} + "parent_type": {"type": (type(None), (str,)), "index": 2}, + "child_type": {"type": (type(None), (str,)), "index": 3}, + "parent": {"type": (tuple, (Tuple,)), "index": 4}, + "child": {"type": (tuple, (Tuple,)), "index": 5}, }, - "parent_class": {"MultiEntry", "BaseLink"} + "parent_class": {"MultiEntry", "BaseLink"}, }, "forte.data.ontology.top.MultiPackGroup": { "attributes": { - 'members': {'type': (list, (Tuple, int, int)), 'index': 2}, - 'member_type': {'type': (type(None), (str,)), 'index': 3} + "members": {"type": (list, (Tuple, int, int)), "index": 2}, + "member_type": {"type": (type(None), (str,)), "index": 3}, }, - "parent_class": {"Entry", "MultiEntry", "BaseGroup"} + "parent_class": {"Entry", "MultiEntry", "BaseGroup"}, }, "forte.data.ontology.top.Query": {"parent_class": {"Generics"}}, "forte.data.ontology.top.AudioAnnotation": { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, - "payload_idx": { - "index": 4, - "type": (type(None), (int,)) - } + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, + "payload_idx": {"index": 4, "type": (type(None), (int,))}, }, - "parent_class": {"Entry"} + "parent_class": {"Entry"}, }, } @@ -264,18 +237,9 @@ def setUp(self) -> None: DataStore._type_attributes["ft.onto.base_ontology.Sentence"] = { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, - "payload_idx": { - "index": 4, - "type": (type(None), (int,)) - }, + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, + "payload_idx": {"index": 4, "type": (type(None), (int,))}, "speaker": { "index": 5, "type": (Union, (str, type(None))), @@ -299,18 +263,9 @@ def setUp(self) -> None: DataStore._type_attributes["forte.data.ontology.top.Annotation"] = { "attributes": { - "begin": { - "index": 2, - "type": (type(None), (int,)) - }, - "end": { - "index": 3, - "type": (type(None), (int,)) - }, - "payload_idx": { - "index": 4, - "type": (type(None), (int,)) - } + "begin": {"index": 2, "type": (type(None), (int,))}, + "end": {"index": 3, "type": (type(None), (int,))}, + "payload_idx": {"index": 4, "type": (type(None), (int,))}, }, "parent_class": {"Entry"}, } @@ -748,13 +703,13 @@ def test_add_annotation_raw(self): # test add Document entry tid_doc: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Document", - attribute_data=[1,5], + attribute_data=[1, 5], ) # test add Sentence entry tid_sent: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Sentence", - attribute_data=[5,8], + attribute_data=[5, 8], ) num_doc = self.data_store.get_length("ft.onto.base_ontology.Document") @@ -786,7 +741,7 @@ def test_add_annotation_raw(self): # test add new annotation type tid_em: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.EntityMention", - attribute_data=[10,12], + attribute_data=[10, 12], ) num_phrase = self.data_store.get_length( "ft.onto.base_ontology.EntityMention" @@ -803,7 +758,7 @@ def test_add_annotation_raw(self): tid_sent_duplicate: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Sentence", allow_duplicate=False, - attribute_data=[5,8], + attribute_data=[5, 8], ) self.assertEqual( len( @@ -817,7 +772,7 @@ def test_add_annotation_raw(self): self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Sentence", allow_duplicate=False, - attribute_data=[5,9], + attribute_data=[5, 9], ) self.assertEqual( len( @@ -833,7 +788,7 @@ def test_add_annotation_raw(self): self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Sentence", tid=tid, - attribute_data=[0,1], + attribute_data=[0, 1], ) self.assertEqual( self.data_store.get_entry(tid=77)[0], @@ -855,16 +810,16 @@ def test_add_audio_annotation_raw(self): # test add Document entry tid_recording: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Recording", - attribute_data=[1,5], + attribute_data=[1, 5], ) # test add Sentence entry tid_audio_utterance: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.AudioUtterance", - attribute_data=[5,8], + attribute_data=[5, 8], ) tid_utterance: int = self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Utterance", - attribute_data=[5,8], + attribute_data=[5, 8], ) # check number of Recording self.assertEqual( @@ -897,7 +852,7 @@ def test_add_audio_annotation_raw(self): self.data_store.add_entry_raw( type_name="ft.onto.base_ontology.Recording", tid=tid, - attribute_data=[0,1], + attribute_data=[0, 1], ) self.assertEqual( self.data_store.get_entry(tid=77)[0], @@ -933,8 +888,8 @@ def test_add_link_raw(self): type_name="forte.data.ontology.top.Link", attribute_data=[ "ft.onto.base_ontology.Sentence", - "ft.onto.base_ontology.Document" - ] + "ft.onto.base_ontology.Document", + ], ) # check number of Link self.assertEqual( @@ -952,7 +907,7 @@ def test_add_link_raw(self): type_name="forte.data.ontology.top.Link", attribute_data=[ "ft.onto.base_ontology.Sentence", - "ft.onto.base_ontology.Document" + "ft.onto.base_ontology.Document", ], tid=tid, ) @@ -1030,7 +985,10 @@ def test_add_multientry_raw(self): self.data_store.add_entry_raw( type_name="forte.data.ontology.top.MultiPackLink", - attribute_data=["ft.onto.base_ontology.Sentence", "ft.onto.base_ontology.Document"] + attribute_data=[ + "ft.onto.base_ontology.Sentence", + "ft.onto.base_ontology.Document", + ], ) # check number of MultiPackGeneric self.assertEqual( @@ -1048,7 +1006,7 @@ def test_add_multientry_raw(self): type_name="forte.data.ontology.top.MultiPackLink", attribute_data=[ "ft.onto.base_ontology.Sentence", - "ft.onto.base_ontology.Document" + "ft.onto.base_ontology.Document", ], tid=tid, ) diff --git a/tests/forte/data/entry_data_structures_test.py b/tests/forte/data/entry_data_structures_test.py index 4790d2179..c57df9726 100644 --- a/tests/forte/data/entry_data_structures_test.py +++ b/tests/forte/data/entry_data_structures_test.py @@ -48,6 +48,7 @@ def __init__(self, pack: MultiPack): super().__init__(pack) self.entries = FList[ExampleEntry](self) + @dataclass class EntryWithDict(Generics): """ @@ -182,7 +183,10 @@ def test_mp_pointer_serialization_schema(self): recovered_mp = MultiPack.from_string(serialized_mp) - s_packs: List[str] = ["""{"_json_class": "forte.data.data_pack.DataPack", "_json_state": {"pack_version": "0.0.2", "_meta": {"_json_class": "forte.data.data_pack.Meta", "_json_state": {"pack_name": null, "_pack_id": 143667891708823045988051900980860741991, "record": {}, "language": "eng", "span_unit": "character", "sample_rate": null, "info": {}}}, "_data_store": {"_json_class": "forte.data.data_store.DataStore", "_json_state": {"_onto_file_path": null, "_dynamically_add_type": true, "fields": {"forte.data.ontology.top.Generics": {}, "forte.data.ontology.top.Annotation": {}, "forte.data.ontology.top.Link": {}, "forte.data.ontology.top.Group": {}, "forte.data.ontology.top.MultiPackGeneric": {}, "forte.data.ontology.top.MultiPackLink": {}, "forte.data.ontology.top.MultiPackGroup": {}, "forte.data.ontology.top.Query": {}, "forte.data.ontology.top.AudioAnnotation": {}, "forte.data.ontology.top.ImageAnnotation": {}, "forte.data.ontology.top.Region": {}, "forte.data.ontology.top.Box": {}, "forte.data.ontology.top.Payload": {}, "entry_data_structures_test.EntryWithList": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.ExampleEntry": {"attributes": {"secret_number": {"index": 2}}}, "entry_data_structures_test.EntryWithDict": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.EntryAsAttribute": {"attributes": {}}, "entry_data_structures_test.ExampleMPEntry": {"attributes": {"refer_entry": {"index": 2}}}}, "entries": {"entry_data_structures_test.ExampleEntry": [[264470699675371126293844966884005648796, "entry_data_structures_test.ExampleEntry", 1]]}}}, "text_payloads": [], "audio_payloads": [], "image_payloads": [], "_creation_records": {}, "_field_records": {}}}""", """{"_json_class": "forte.data.data_pack.DataPack", "_json_state": {"pack_version": "0.0.2", "_meta": {"_json_class": "forte.data.data_pack.Meta", "_json_state": {"pack_name": null, "_pack_id": 55501035046014418523039697578490139411, "record": {}, "language": "eng", "span_unit": "character", "sample_rate": null, "info": {}}}, "_data_store": {"_json_class": "forte.data.data_store.DataStore", "_json_state": {"_onto_file_path": null, "_dynamically_add_type": true, "fields": {"forte.data.ontology.top.Generics": {}, "forte.data.ontology.top.Annotation": {}, "forte.data.ontology.top.Link": {}, "forte.data.ontology.top.Group": {}, "forte.data.ontology.top.MultiPackGeneric": {}, "forte.data.ontology.top.MultiPackLink": {}, "forte.data.ontology.top.MultiPackGroup": {}, "forte.data.ontology.top.Query": {}, "forte.data.ontology.top.AudioAnnotation": {}, "forte.data.ontology.top.ImageAnnotation": {}, "forte.data.ontology.top.Region": {}, "forte.data.ontology.top.Box": {}, "forte.data.ontology.top.Payload": {}, "entry_data_structures_test.EntryWithList": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.ExampleEntry": {"attributes": {"secret_number": {"index": 2}}}, "entry_data_structures_test.EntryWithDict": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.EntryAsAttribute": {"attributes": {}}, "entry_data_structures_test.ExampleMPEntry": {"attributes": {"refer_entry": {"index": 2}}}}, "entries": {"entry_data_structures_test.ExampleEntry": [[211693204381909586800582877426087499308, "entry_data_structures_test.ExampleEntry", null]]}}}, "text_payloads": [], "audio_payloads": [], "image_payloads": [], "_creation_records": {}, "_field_records": {}}}"""] + s_packs: List[str] = [ + """{"_json_class": "forte.data.data_pack.DataPack", "_json_state": {"pack_version": "0.0.2", "_meta": {"_json_class": "forte.data.data_pack.Meta", "_json_state": {"pack_name": null, "_pack_id": 143667891708823045988051900980860741991, "record": {}, "language": "eng", "span_unit": "character", "sample_rate": null, "info": {}}}, "_data_store": {"_json_class": "forte.data.data_store.DataStore", "_json_state": {"_onto_file_path": null, "_dynamically_add_type": true, "fields": {"forte.data.ontology.top.Generics": {}, "forte.data.ontology.top.Annotation": {}, "forte.data.ontology.top.Link": {}, "forte.data.ontology.top.Group": {}, "forte.data.ontology.top.MultiPackGeneric": {}, "forte.data.ontology.top.MultiPackLink": {}, "forte.data.ontology.top.MultiPackGroup": {}, "forte.data.ontology.top.Query": {}, "forte.data.ontology.top.AudioAnnotation": {}, "forte.data.ontology.top.ImageAnnotation": {}, "forte.data.ontology.top.Region": {}, "forte.data.ontology.top.Box": {}, "forte.data.ontology.top.Payload": {}, "entry_data_structures_test.EntryWithList": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.ExampleEntry": {"attributes": {"secret_number": {"index": 2}}}, "entry_data_structures_test.EntryWithDict": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.EntryAsAttribute": {"attributes": {}}, "entry_data_structures_test.ExampleMPEntry": {"attributes": {"refer_entry": {"index": 2}}}}, "entries": {"entry_data_structures_test.ExampleEntry": [[264470699675371126293844966884005648796, "entry_data_structures_test.ExampleEntry", 1]]}}}, "text_payloads": [], "audio_payloads": [], "image_payloads": [], "_creation_records": {}, "_field_records": {}}}""", + """{"_json_class": "forte.data.data_pack.DataPack", "_json_state": {"pack_version": "0.0.2", "_meta": {"_json_class": "forte.data.data_pack.Meta", "_json_state": {"pack_name": null, "_pack_id": 55501035046014418523039697578490139411, "record": {}, "language": "eng", "span_unit": "character", "sample_rate": null, "info": {}}}, "_data_store": {"_json_class": "forte.data.data_store.DataStore", "_json_state": {"_onto_file_path": null, "_dynamically_add_type": true, "fields": {"forte.data.ontology.top.Generics": {}, "forte.data.ontology.top.Annotation": {}, "forte.data.ontology.top.Link": {}, "forte.data.ontology.top.Group": {}, "forte.data.ontology.top.MultiPackGeneric": {}, "forte.data.ontology.top.MultiPackLink": {}, "forte.data.ontology.top.MultiPackGroup": {}, "forte.data.ontology.top.Query": {}, "forte.data.ontology.top.AudioAnnotation": {}, "forte.data.ontology.top.ImageAnnotation": {}, "forte.data.ontology.top.Region": {}, "forte.data.ontology.top.Box": {}, "forte.data.ontology.top.Payload": {}, "entry_data_structures_test.EntryWithList": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.ExampleEntry": {"attributes": {"secret_number": {"index": 2}}}, "entry_data_structures_test.EntryWithDict": {"attributes": {"entries": {"index": 2}}}, "entry_data_structures_test.EntryAsAttribute": {"attributes": {}}, "entry_data_structures_test.ExampleMPEntry": {"attributes": {"refer_entry": {"index": 2}}}}, "entries": {"entry_data_structures_test.ExampleEntry": [[211693204381909586800582877426087499308, "entry_data_structures_test.ExampleEntry", null]]}}}, "text_payloads": [], "audio_payloads": [], "image_payloads": [], "_creation_records": {}, "_field_records": {}}}""", + ] recovered_packs = [DataPack.from_string(s) for s in s_packs] @@ -285,10 +289,10 @@ def test_entry_dict(self): def test_entry_key_memories(self): pack = ( Pipeline[MultiPack]() - .set_reader(EmptyReader()) - .add(ChildEntryAnnotator()) - .initialize() - .process(["pack1", "pack2"]) + .set_reader(EmptyReader()) + .add(ChildEntryAnnotator()) + .initialize() + .process(["pack1", "pack2"]) ) DataPack.from_string(pack.to_string(True)) @@ -337,5 +341,6 @@ def test_not_hashable(self): with self.assertRaises(TypeError): hash(anno1) + if __name__ == "__main__": unittest.main() diff --git a/tests/forte/data/multi_pack_test.py b/tests/forte/data/multi_pack_test.py index 338b2dfe9..996fd82b0 100644 --- a/tests/forte/data/multi_pack_test.py +++ b/tests/forte/data/multi_pack_test.py @@ -56,7 +56,7 @@ def test_serialization(self): for pack_name in mp.pack_names: self.assertEqual( recovered_mp.get_pack(pack_name).pack_id, - mp.get_pack(pack_name).pack_id + mp.get_pack(pack_name).pack_id, ) def test_add_pack(self): @@ -127,14 +127,12 @@ def test_multipack_groups(self): for grp in self.multi_pack.get(MultiPackGroup, get_raw=True): temp_list = [] # Note here that grp represents a dictionary and not an object - for pack, mem in grp['members']: + for pack, mem in grp["members"]: mem_obj = self.multi_pack.get_subentry(pack, mem) temp_list.append(mem_obj.text) group_content.append(tuple(temp_list)) self.assertListEqual(expected_content, group_content) - - def test_multipack_entries(self): """ @@ -223,7 +221,8 @@ def test_multipack_entries(self): right_tokens_recovered = [t.text for t in recovered_packs[1].get(Token)] self.assertListEqual( - left_tokens_recovered, ["This", "pack", "contains", "some", "sample", "data."] + left_tokens_recovered, + ["This", "pack", "contains", "some", "sample", "data."], ) self.assertListEqual( right_tokens_recovered, @@ -319,7 +318,7 @@ def test_remove_pack(self): # Add MultiPackLink to data_pack11 and data_pack12 & Add MultiPackGroup to data_pack11, data_pack12 # and data_pack13 # Add tokens to each pack. - for pack in self.multi_pack.packs[ref_id11: ref_id12 + 1]: + for pack in self.multi_pack.packs[ref_id11 : ref_id12 + 1]: _space_token(pack) # Create some group. @@ -369,13 +368,9 @@ def test_remove_pack(self): # print('check_list_name_all:', self.check_list_name) ## Preparation for remaining pack ID list check ## - expected_id_list_1 = list( - set(check_list_id) - set([ref_id10]) - ) + expected_id_list_1 = list(set(check_list_id) - set([ref_id10])) expected_id_list_2 = list( - set(check_list_id) - - set([ref_id10]) - - set([ref_id11]) + set(check_list_id) - set([ref_id10]) - set([ref_id11]) ) expected_id_list_3 = list( set(check_list_id) @@ -427,9 +422,7 @@ def test_remove_pack(self): self.assertListEqual(expected_id_list_1, remaining_id_1) ## remaining pack name alignment check self.assertNotIn(["remove pack 10"], self.multi_pack.pack_names) - self.assertListEqual( - self.multi_pack.pack_names, expected_name_list_1 - ) + self.assertListEqual(self.multi_pack.pack_names, expected_name_list_1) # Test to remove the added pack from multi_pack with MultiPackGroup self.multi_pack.remove_pack(ref_id11, True) @@ -443,9 +436,7 @@ def test_remove_pack(self): self.assertListEqual(expected_id_list_2, remaining_id_2) ## remaining pack name alignment check self.assertNotIn(["remove pack 11"], self.multi_pack.pack_names) - self.assertListEqual( - self.multi_pack.pack_names, expected_name_list_2 - ) + self.assertListEqual(self.multi_pack.pack_names, expected_name_list_2) # Test to remove the added pack from multi_pack with MultiPackGroup and MultiPackLink self.multi_pack.remove_pack(ref_id12, True) @@ -454,9 +445,7 @@ def test_remove_pack(self): self.assertListEqual(expected_id_list_3, remaining_id_3) ## remaining pack name alignment check self.assertNotIn(["remove pack 12"], self.multi_pack.pack_names) - self.assertListEqual( - self.multi_pack.pack_names, expected_name_list_3 - ) + self.assertListEqual(self.multi_pack.pack_names, expected_name_list_3) self.multi_pack.purge_deleted_packs() self.assertListEqual( @@ -509,7 +498,7 @@ def test_remove_pack_auto_purge(self): # Add MultiPackLink to data_pack11 and data_pack12 & Add MultiPackGroup to data_pack11, data_pack12 # and data_pack13 # Add tokens to each pack. - for pack in self.multi_pack.packs[ref_id11: ref_id12 + 1]: + for pack in self.multi_pack.packs[ref_id11 : ref_id12 + 1]: _space_token(pack) # Create some group. @@ -559,13 +548,9 @@ def test_remove_pack_auto_purge(self): # print('check_list_name_all:', self.check_list_name) ## Preparation for remaining pack ID list check ## - expected_id_list_1 = list( - set(check_list_id) - set([ref_id10]) - ) + expected_id_list_1 = list(set(check_list_id) - set([ref_id10])) expected_id_list_2 = list( - set(check_list_id) - - set([ref_id10]) - - set([ref_id11]) + set(check_list_id) - set([ref_id10]) - set([ref_id11]) ) expected_id_list_3 = list( set(check_list_id) @@ -602,9 +587,7 @@ def test_remove_pack_auto_purge(self): self.assertListEqual(expected_id_list_1, remaining_id_1) ## remaining pack name alignment check self.assertNotIn(["remove pack 10"], self.multi_pack.pack_names) - self.assertListEqual( - self.multi_pack.pack_names, expected_name_list_1 - ) + self.assertListEqual(self.multi_pack.pack_names, expected_name_list_1) # Test to remove the added pack from multi_pack with MultiPackGroup self.multi_pack.remove_pack(ref_id11, True) diff --git a/tests/forte/data/ontology/ndarray_attribute_test.py b/tests/forte/data/ontology/ndarray_attribute_test.py index 7e962bb74..a869439e9 100644 --- a/tests/forte/data/ontology/ndarray_attribute_test.py +++ b/tests/forte/data/ontology/ndarray_attribute_test.py @@ -10,7 +10,9 @@ # import the NdEntry classes manually module_name = "ft.onto.sample_ndarray" -module_path = os.path.join(os.path.dirname(__file__), "test_outputs/ft/onto/sample_ndarray.py") +module_path = os.path.join( + os.path.dirname(__file__), "test_outputs/ft/onto/sample_ndarray.py" +) spec = importlib.util.spec_from_file_location(module_name, module_path) module = importlib.util.module_from_spec(spec) sys.modules[module_name] = module @@ -20,11 +22,13 @@ NdEntry2 = module.NdEntry2 NdEntry3 = module.NdEntry3 -globals().update({ - "NdEntry1": module.NdEntry1, - "NdEntry2": module.NdEntry2, - "NdEntry3": module.NdEntry3, -}) +globals().update( + { + "NdEntry1": module.NdEntry1, + "NdEntry2": module.NdEntry2, + "NdEntry3": module.NdEntry3, + } +) """ NdEntry1, NdEntry2, and NdEntry3 are sample Entry containing NdArray attributes @@ -36,11 +40,7 @@ @ddt class SerializationTest(unittest.TestCase): - @data( - NdEntry1, - NdEntry2, - NdEntry3 - ) + @data(NdEntry1, NdEntry2, NdEntry3) def test_serialization(self, TestEntry): data_pack = DataPack() nd_entry = TestEntry(data_pack) @@ -54,11 +54,17 @@ def test_serialization(self, TestEntry): nd_entry_deseri = datapack_deseri.get_single(TestEntry) if nd_entry.value.dtype: - self.assertEqual(nd_entry.value.dtype, nd_entry_deseri.value.dtype) + self.assertEqual( + nd_entry.value.dtype, nd_entry_deseri.value.dtype + ) if nd_entry.value.shape: - self.assertEqual(nd_entry.value.shape, nd_entry_deseri.value.shape) + self.assertEqual( + nd_entry.value.shape, nd_entry_deseri.value.shape + ) if nd_entry.value.data is not None: - self.assertEqual(np.sum(nd_entry.value.data - nd_entry_deseri.value.data), 0) + self.assertEqual( + np.sum(nd_entry.value.data - nd_entry_deseri.value.data), 0 + ) @ddt @@ -119,13 +125,13 @@ def test_invalid_input(self, input_data): @data( (NdEntry1, [[1, 1], [1, 1]]), - (NdEntry1, [[1., 1.], [1., 1.]]), + (NdEntry1, [[1.0, 1.0], [1.0, 1.0]]), (NdEntry2, [[1, 1], [1, 1]]), (NdEntry2, [1]), - (NdEntry2, [1.]), - (NdEntry2, [[1., 1.], [1., 1.]]), + (NdEntry2, [1.0]), + (NdEntry2, [[1.0, 1.0], [1.0, 1.0]]), (NdEntry3, [[1, 1], [1, 1]]), - (NdEntry3, [[1., 1.], [1., 1.]]), + (NdEntry3, [[1.0, 1.0], [1.0, 1.0]]), ) def test_valid_py_list(self, input_data): TestEntry, input_list = input_data diff --git a/tests/forte/data/ontology/ontology_code_generator_test.py b/tests/forte/data/ontology/ontology_code_generator_test.py index f1af8a497..369f114c1 100644 --- a/tests/forte/data/ontology/ontology_code_generator_test.py +++ b/tests/forte/data/ontology/ontology_code_generator_test.py @@ -77,7 +77,6 @@ def assert_generation_equal(self, file_a, file_b): continue self.assertEqual(la, lb) - @data( ( "example_ontology", @@ -90,7 +89,7 @@ def assert_generation_equal(self, file_a, file_b): ), ("race_qa_onto", ["ft/onto/race_qa_ontology"]), ("test_top_attribute", ["ft/onto/sample_top_attribute"]), - ("test_ndarray_attribute", ["ft/onto/sample_ndarray"]) + ("test_ndarray_attribute", ["ft/onto/sample_ndarray"]), ) def test_generated_code(self, value): input_file_name, file_paths = value @@ -163,28 +162,45 @@ def test_namespace_depth(self, namespace_depth): ] exp_file_path = exp_file_path_all[namespace_depth:] exp_files = sorted( - [ - f"{os.path.join(folder_path, file)}" - for file in exp_file_path - ] + [f"{os.path.join(folder_path, file)}" for file in exp_file_path] ) self.assertEqual(gen_files, exp_files) @data( (True, "test_duplicate_entry.json", DuplicateEntriesWarning, True), - (True, "test_duplicate_attr_name.json", DuplicatedAttributesWarning, True), + ( + True, + "test_duplicate_attr_name.json", + DuplicatedAttributesWarning, + True, + ), (True, "test_ndarray_dtype_only.json", UserWarning, True), (True, "test_ndarray_shape_only.json", UserWarning, True), (True, "test_self_reference.json", UserWarning, False), (False, "example_ontology.json", OntologySourceNotFoundException, True), - (False, "test_invalid_parent.json", ParentEntryNotSupportedException, True), + ( + False, + "test_invalid_parent.json", + ParentEntryNotSupportedException, + True, + ), (False, "test_invalid_attribute.json", TypeNotDeclaredException, True), (False, "test_nested_item_type.json", UnsupportedTypeException, True), (False, "test_no_item_type.json", TypeNotDeclaredException, True), (False, "test_unknown_item_type.json", TypeNotDeclaredException, True), - (False, "test_invalid_entry_name.json", InvalidIdentifierException, True), - (False, "test_invalid_attr_name.json", InvalidIdentifierException, True), + ( + False, + "test_invalid_entry_name.json", + InvalidIdentifierException, + True, + ), + ( + False, + "test_invalid_attr_name.json", + InvalidIdentifierException, + True, + ), (False, "test_non_string_keys.json", CodeGenerationException, True), ) def test_warnings_errors(self, value): @@ -289,41 +305,43 @@ def test_invalid_json(self, value): @data( [1], - [3, ], + [ + 3, + ], [2, 2], - [[1, 2], [3, 4]] + [[1, 2], [3, 4]], ) def test_ndarray_valid_shape(self, shape): - mapping = { - "dtype": '"int"', - "shape": f"{shape}" - } - template_file = os.path.join(self.spec_dir, "test_ndarray_template.json") + mapping = {"dtype": '"int"', "shape": f"{shape}"} + template_file = os.path.join( + self.spec_dir, "test_ndarray_template.json" + ) with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(template_file, temp_dir) _modify_test_template( template_file=temp_filename, mapping=mapping, - output_path=temp_filename) + output_path=temp_filename, + ) utils.validate_json_schema(temp_filename) - @data( - (False, 3), - (True, [2, 2]) - ) + @data((False, 3), (True, [2, 2])) def test_ndarray_invalid_shape(self, value): is_string, shape = value mapping = { "dtype": '"int"', - "shape": '"' + f"{shape}" + '"' if is_string else f"{shape}" + "shape": '"' + f"{shape}" + '"' if is_string else f"{shape}", } - template_file = "./tests/forte/data/ontology/test_specs/test_ndarray_template.json" + template_file = ( + "./tests/forte/data/ontology/test_specs/test_ndarray_template.json" + ) with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(template_file, temp_dir) _modify_test_template( template_file=temp_filename, mapping=mapping, - output_path=temp_filename) + output_path=temp_filename, + ) with self.assertRaises(ValidationError): utils.validate_json_schema(temp_filename) @@ -345,38 +363,35 @@ def test_ndarray_invalid_shape(self, value): "complex", "complex128", "complex192", - "complex256" + "complex256", ) def test_ndarray_valid_dtype(self, dtype): - mapping = { - "dtype": '"' + f"{dtype}" + '"', - "shape": [2, 2] - } - template_file = "./tests/forte/data/ontology/test_specs/test_ndarray_template.json" + mapping = {"dtype": '"' + f"{dtype}" + '"', "shape": [2, 2]} + template_file = ( + "./tests/forte/data/ontology/test_specs/test_ndarray_template.json" + ) with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(template_file, temp_dir) _modify_test_template( template_file=temp_filename, mapping=mapping, - output_path=temp_filename) + output_path=temp_filename, + ) utils.validate_json_schema(temp_filename) - @data( - "xint", - "undefined_dtype" - ) + @data("xint", "undefined_dtype") def test_ndarray_invalid_dtype(self, dtype): - mapping = { - "dtype": '"' + f"{dtype}" + '"', - "shape": [2, 2] - } - template_file = "./tests/forte/data/ontology/test_specs/test_ndarray_template.json" + mapping = {"dtype": '"' + f"{dtype}" + '"', "shape": [2, 2]} + template_file = ( + "./tests/forte/data/ontology/test_specs/test_ndarray_template.json" + ) with tempfile.TemporaryDirectory() as temp_dir: temp_filename = _get_temp_filename(template_file, temp_dir) _modify_test_template( template_file=temp_filename, mapping=mapping, - output_path=temp_filename) + output_path=temp_filename, + ) with self.assertRaises(ValidationError): utils.validate_json_schema(temp_filename) @@ -410,11 +425,9 @@ def _modify_test_template(template_file, mapping, output_path): mapping (dict): mapping to substitute key words. output_path (str): output path of the generated file. """ - with open(template_file, 'r') \ - as template_file: + with open(template_file, "r") as template_file: data = template_file.read() data = Template(data) data = data.substitute(mapping) - with open(output_path, 'w') \ - as output_json: + with open(output_path, "w") as output_json: output_json.write(data) diff --git a/tests/forte/data/readers/classification_reader_test.py b/tests/forte/data/readers/classification_reader_test.py index cf13941f0..f73d3f437 100644 --- a/tests/forte/data/readers/classification_reader_test.py +++ b/tests/forte/data/readers/classification_reader_test.py @@ -24,6 +24,7 @@ from forte.data.readers import ClassificationDatasetReader from forte.data.data_pack import DataPack + class ClassificationDatasetReaderTest(unittest.TestCase): def setUp(self): self.sample_file1: str = os.path.abspath( @@ -51,45 +52,53 @@ def setUp(self): def test_classification_dataset_reader(self): # test incompatible forte data field `ft.onto.base_ontology.Document` - + with self.assertRaises(ProcessorConfigError): self.pipeline = Pipeline() - self.pipeline.set_reader(ClassificationDatasetReader(), - config={"index2class": self.index2class1, - "skip_k_starting_lines": 0, - "forte_data_fields": - [ - "label", - "ft.onto.base_ontology.Title", - "ft.onto.base_ontology.Document", - ]}) + self.pipeline.set_reader( + ClassificationDatasetReader(), + config={ + "index2class": self.index2class1, + "skip_k_starting_lines": 0, + "forte_data_fields": [ + "label", + "ft.onto.base_ontology.Title", + "ft.onto.base_ontology.Document", + ], + }, + ) self.pipeline.initialize() # test wrong length of forte_data_fields with self.assertRaises(ProcessorConfigError): self.pipeline = Pipeline() - self.pipeline.set_reader(ClassificationDatasetReader(), - config={"index2class": self.index2class1, - "skip_k_starting_lines": 0, - "forte_data_fields": - [ - "label", - "ft.onto.base_ontology.Body", - ]}) + self.pipeline.set_reader( + ClassificationDatasetReader(), + config={ + "index2class": self.index2class1, + "skip_k_starting_lines": 0, + "forte_data_fields": [ + "label", + "ft.onto.base_ontology.Body", + ], + }, + ) self.pipeline.initialize() # length check happens while processing data for data_pack in self.pipeline.process_dataset(self.sample_file1): continue self.pipeline = Pipeline() - self.pipeline.set_reader(ClassificationDatasetReader(), - config={ - "forte_data_fields": - [ - "label", - "ft.onto.base_ontology.Title", - "ft.onto.base_ontology.Body", - ], - "index2class": self.index2class1, - "skip_k_starting_lines": 0}) + self.pipeline.set_reader( + ClassificationDatasetReader(), + config={ + "forte_data_fields": [ + "label", + "ft.onto.base_ontology.Title", + "ft.onto.base_ontology.Body", + ], + "index2class": self.index2class1, + "skip_k_starting_lines": 0, + }, + ) self.pipeline.initialize() for data_pack in self.pipeline.process_dataset(self.sample_file1): ( @@ -114,7 +123,7 @@ def test_classification_dataset_reader(self): self.assertTrue(len(doc_class) == 1) # print(class_idx_to_name, expected_class_id) self.assertEqual( - doc_class[0], self.class_idx_to_name[expected_class_id] + doc_class[0], self.class_idx_to_name[expected_class_id] ) # Test Title title_entries = list(data_pack.get(Title)) diff --git a/tests/forte/data/readers/html_reader_test.py b/tests/forte/data/readers/html_reader_test.py index 41c10dfb4..fb75d84b8 100644 --- a/tests/forte/data/readers/html_reader_test.py +++ b/tests/forte/data/readers/html_reader_test.py @@ -57,8 +57,8 @@ def tearDown(self): "Page TitleThis is a paragraph", ), ( - "This example has a broken end tagThis example has a broken end tag diff --git a/tests/forte/datasets/wikipedia/dbpedia/dbpedia_datasets_test.py b/tests/forte/datasets/wikipedia/dbpedia/dbpedia_datasets_test.py index 9b75a5f9d..e5813f952 100644 --- a/tests/forte/datasets/wikipedia/dbpedia/dbpedia_datasets_test.py +++ b/tests/forte/datasets/wikipedia/dbpedia/dbpedia_datasets_test.py @@ -123,7 +123,9 @@ def test_anchor(self): self.num_indexed(output, 1) pack = DataPack.deserialize( - glob.glob(output + "/**/*.json.gz")[0], serialize_method="jsonpickle", zip_pack=True + glob.glob(output + "/**/*.json.gz")[0], + serialize_method="json", + zip_pack=True, ) self.assertEqual(len(list(pack.get("ft.onto.wikipedia.WikiAnchor"))), 4) diff --git a/tests/forte/grid_test.py b/tests/forte/grid_test.py index 58b51ca47..6621d3507 100644 --- a/tests/forte/grid_test.py +++ b/tests/forte/grid_test.py @@ -25,7 +25,6 @@ from forte.data.ontology.top import ImagePayload - class GridTest(unittest.TestCase): """ Test Grid related ontologies and operations. @@ -72,19 +71,16 @@ def test_get_grid_cell_value_error(self): def fn1(): self.grid._get_image_within_grid_cell(self.line, 2, 0) - self.assertRaises(ValueError, fn1) def fn2(): self.grid._get_image_within_grid_cell(self.line, 0, 3) - self.assertRaises(ValueError, fn2) def fn3(): self.grid._get_image_within_grid_cell(self.line, -1, 0) - self.assertRaises(ValueError, fn3) def fn4(): @@ -92,15 +88,18 @@ def fn4(): self.assertRaises(ValueError, fn4) - - def test_get_overlapped_grid_cell_indices(self): - self.assertEqual(self.grid.get_overlapped_grid_cell_indices(self.line), [(0,0), (1,1)]) - - line = np.zeros((4, 6)) - line[0,0]= 1 - line[2,0]=1 - line[0,2]=1 - line[0,4]=1 - self.assertEqual(self.grid.get_overlapped_grid_cell_indices(line), [(0, 0), (0, 1), (0, 2), (1, 0)]) + self.assertEqual( + self.grid.get_overlapped_grid_cell_indices(self.line), + [(0, 0), (1, 1)], + ) + line = np.zeros((4, 6)) + line[0, 0] = 1 + line[2, 0] = 1 + line[0, 2] = 1 + line[0, 4] = 1 + self.assertEqual( + self.grid.get_overlapped_grid_cell_indices(line), + [(0, 0), (0, 1), (0, 2), (1, 0)], + ) diff --git a/tests/forte/image_annotation_test.py b/tests/forte/image_annotation_test.py index 700f845cb..fad2c0778 100644 --- a/tests/forte/image_annotation_test.py +++ b/tests/forte/image_annotation_test.py @@ -47,14 +47,17 @@ def test_datapack_image_operation(self): datapack = DataPack("image2") datapack.set_image(self.line, 0) self.assertTrue(np.array_equal(datapack.image, self.datapack.image)) + def fn(): # invalid image index datapack.set_image(self.line, 2) + self.assertRaises(ProcessExecutionException, fn) def fn(): # invalid image index datapack.get_image(1) + self.assertRaises(ProcessExecutionException, fn) datapack.add_image(self.line) @@ -66,29 +69,28 @@ def fn(): ImageAnnotation(self.datapack) self.datapack.add_all_remaining_entries() - def test_datapack_image_operation(self): datapack = DataPack("image2") datapack.set_image(self.line, 0) self.assertTrue(np.array_equal(datapack.image, self.datapack.image)) + def fn(): # invalid image index datapack.set_image(self.line, 2) + self.assertRaises(ProcessExecutionException, fn) def fn(): # invalid image index datapack.get_image(1) + self.assertRaises(ProcessExecutionException, fn) datapack.add_image(self.line) self.assertTrue(np.array_equal(datapack.get_image(1), self.line)) self.datapack.add_all_remaining_entries() - - - def test_image_annotation(self): self.assertEqual( self.datapack.get_single(ImageAnnotation).image_payload_idx, 0 diff --git a/tests/forte/notebooks/audio_tutorial_test.py b/tests/forte/notebooks/audio_tutorial_test.py index 16da086d8..eca1ffee5 100644 --- a/tests/forte/notebooks/audio_tutorial_test.py +++ b/tests/forte/notebooks/audio_tutorial_test.py @@ -3,8 +3,7 @@ @testbook( - "docs/notebook_tutorial/Automatic_Speech_Recognition.ipynb", - execute=False + "docs/notebook_tutorial/Automatic_Speech_Recognition.ipynb", execute=False ) def test_Automatic_Speech_Recognition(tb): if sys.version_info[0] > 3.7: diff --git a/tests/forte/notebooks/ocr_test.py b/tests/forte/notebooks/ocr_test.py index f1d5456cb..6baf31ad8 100644 --- a/tests/forte/notebooks/ocr_test.py +++ b/tests/forte/notebooks/ocr_test.py @@ -1,9 +1,8 @@ from testbook import testbook import os -@testbook( - "docs/notebook_tutorial/ocr.ipynb", execute=False -) + +@testbook("docs/notebook_tutorial/ocr.ipynb", execute=False) def test_wrap_MT_inference_pipeline(tb): # if we just want to run through the notebook tb.execute_cell("ocr_reader") @@ -12,12 +11,202 @@ def test_wrap_MT_inference_pipeline(tb): tb.execute_cell("get_image") tb.execute_cell("pipeline") tb.execute_cell("recognize_char") - recognized_chars = ['T', 'e', 'x', 't', 'M', 'e', 's', 's', 'a', 'g', 'e', 'T', 'o', 'd', 'a', 'y', '1', '5', ':', '4', '6', 'I', 't', 's', 'E', 'm', 'm', 'a', '.', '|', 't', 'r', 'i', 'e', 'd', 't', 'o', 'c', 'a', 'l', 'l', 'y', 'o', 'u', 'b', 'u', 't', 's', 'i', 'g', 'n', 'a', 'l', 'b', 'a', 'd', '.', '|', 'b', 'e', 'e', 'n', 't', 'a', 'k', 'e', 'n', 't', 'o', 'h', 'o', 's', 'p', 'i', 't', 'a', 'l', 'a', 'f', 't', 'e', 'r', 'h', 'a', 'v', 'i', 'n', 'g', 'a', 'f', 'a', 'l', 'l', 't', 'h', 'i', 's', 'm', 'o', 'r', 'n', 'i', 'n', 'g', '.', 'I', 'f', 'p', 'o', 's', 's', 'i', 'b', 'l', 'e', 'c', 'a', 'n', 'y', 'o', 'u', 'd', 'o', 'm', 'e', 'a', 'q', 'u', 'i', 'c', 'k', 'f', 'a', 'v', 'o', 'u', 'r', 'a', 'n', 'd', 't', 'e', 'x', 't', 'm', 'e', 'x'] - recognized_chars_output = "Recognized characters: \n" + " " + str(recognized_chars) + recognized_chars = [ + "T", + "e", + "x", + "t", + "M", + "e", + "s", + "s", + "a", + "g", + "e", + "T", + "o", + "d", + "a", + "y", + "1", + "5", + ":", + "4", + "6", + "I", + "t", + "s", + "E", + "m", + "m", + "a", + ".", + "|", + "t", + "r", + "i", + "e", + "d", + "t", + "o", + "c", + "a", + "l", + "l", + "y", + "o", + "u", + "b", + "u", + "t", + "s", + "i", + "g", + "n", + "a", + "l", + "b", + "a", + "d", + ".", + "|", + "b", + "e", + "e", + "n", + "t", + "a", + "k", + "e", + "n", + "t", + "o", + "h", + "o", + "s", + "p", + "i", + "t", + "a", + "l", + "a", + "f", + "t", + "e", + "r", + "h", + "a", + "v", + "i", + "n", + "g", + "a", + "f", + "a", + "l", + "l", + "t", + "h", + "i", + "s", + "m", + "o", + "r", + "n", + "i", + "n", + "g", + ".", + "I", + "f", + "p", + "o", + "s", + "s", + "i", + "b", + "l", + "e", + "c", + "a", + "n", + "y", + "o", + "u", + "d", + "o", + "m", + "e", + "a", + "q", + "u", + "i", + "c", + "k", + "f", + "a", + "v", + "o", + "u", + "r", + "a", + "n", + "d", + "t", + "e", + "x", + "t", + "m", + "e", + "x", + ] + recognized_chars_output = ( + "Recognized characters: \n" + " " + str(recognized_chars) + ) assert tb.cell_output_text("recognize_char") == recognized_chars_output - + tb.execute_cell("recognize_token") - recognized_tokens = ['Text', 'Message', 'Today', '15:46', 'Its', 'Emma.', '|', 'tried', 'to', 'call', 'you', 'but', 'signal', 'bad.', '|', 'been', 'taken', 'to', 'hospital', 'after', 'having', 'a', 'fall', 'this', 'morning.', 'If', 'possible', 'can', 'you', 'do', 'me', 'a', 'quick', 'favour', 'and', 'text', 'me', 'x'] - recognize_token_output = "Recognized tokens: \n" + " " + str(recognized_tokens) + recognized_tokens = [ + "Text", + "Message", + "Today", + "15:46", + "Its", + "Emma.", + "|", + "tried", + "to", + "call", + "you", + "but", + "signal", + "bad.", + "|", + "been", + "taken", + "to", + "hospital", + "after", + "having", + "a", + "fall", + "this", + "morning.", + "If", + "possible", + "can", + "you", + "do", + "me", + "a", + "quick", + "favour", + "and", + "text", + "me", + "x", + ] + recognize_token_output = ( + "Recognized tokens: \n" + " " + str(recognized_tokens) + ) assert tb.cell_output_text("recognize_token") == recognize_token_output - diff --git a/tests/forte/notebooks/tutorial_MT_with_forte_test.py b/tests/forte/notebooks/tutorial_MT_with_forte_test.py index 02d18d87b..da3349c5d 100644 --- a/tests/forte/notebooks/tutorial_MT_with_forte_test.py +++ b/tests/forte/notebooks/tutorial_MT_with_forte_test.py @@ -1,9 +1,8 @@ from testbook import testbook import os -@testbook( - "docs/notebook_tutorial/tutorial_MT_with_forte.ipynb", execute=False -) + +@testbook("docs/notebook_tutorial/tutorial_MT_with_forte.ipynb", execute=False) def test_wrap_MT_inference_pipeline(tb): # if we just want to run through the notebook tb.execute_cell("pip_install") diff --git a/tests/forte/processors/data_augment/algorithms/data_augmentation_op_test.py b/tests/forte/processors/data_augment/algorithms/data_augmentation_op_test.py index 8d96b3556..5ce375ba9 100644 --- a/tests/forte/processors/data_augment/algorithms/data_augmentation_op_test.py +++ b/tests/forte/processors/data_augment/algorithms/data_augmentation_op_test.py @@ -172,26 +172,26 @@ def test_operations(self) -> None: (1, [[0, 1], [1, 1], [1, 3]], [[0, 2], [2, 5], [5, 8]], False, True, 5), (1, [[0, 1], [1, 1], [2, 3]], [[0, 2], [2, 5], [6, 8]], False, True, 5), ( - 1, - [[0, 1], [1, 1], [1, 3]], - [[0, 2], [2, 5], [5, 8]], - False, - False, - 2, + 1, + [[0, 1], [1, 1], [1, 3]], + [[0, 2], [2, 5], [5, 8]], + False, + False, + 2, ), ( - 1, - [[0, 1], [1, 1], [2, 3]], - [[0, 2], [2, 5], [6, 8]], - False, - False, - 2, + 1, + [[0, 1], [1, 1], [2, 3]], + [[0, 2], [2, 5], [6, 8]], + False, + False, + 2, ), (0, [[1, 2], [2, 3]], [[1, 4], [4, 5]], True, True, 0), ) @unpack def test_modify_index( - self, index, old_spans, new_spans, is_begin, is_inclusive, aligned_index + self, index, old_spans, new_spans, is_begin, is_inclusive, aligned_index ): old_spans = [Span(span[0], span[1]) for span in old_spans] new_spans = [Span(span[0], span[1]) for span in new_spans] @@ -221,35 +221,35 @@ def test_multi_pack_copy_link_or_group(self): @data( ( - [ - "Mary and Samantha arrived at the bus station early but waited \ + [ + "Mary and Samantha arrived at the bus station early but waited \ until noon for the bus ." - ], - [ - "MaTherery Avoidand arrived at the bus station early but waited \ + ], + [ + "MaTherery Avoidand arrived at the bus station early but waited \ until noon for the bus Last" - ], + ], + [ [ - [ - "MaTherery", - "There", - "and", - "arrived", - "at", - "the", - "bus", - "station", - "early", - "but", - "waited", - "until", - "noon", - "for", - "the", - "bus", - "Last", - ] - ], + "MaTherery", + "There", + "and", + "arrived", + "at", + "the", + "bus", + "station", + "early", + "but", + "waited", + "until", + "noon", + "for", + "the", + "bus", + "Last", + ] + ], ) ) @unpack @@ -259,7 +259,7 @@ def test_pipeline(self, texts, expected_outputs, expected_tokens): boxer_config = {"pack_name": "input"} replacer_op = ( - DummyAugmenter.__module__ + "." + DummyAugmenter.__qualname__ + DummyAugmenter.__module__ + "." + DummyAugmenter.__qualname__ ) processor_config = { @@ -289,49 +289,49 @@ def test_pipeline(self, texts, expected_outputs, expected_tokens): @data( ( - [ - "Mary and Samantha arrived at the bus station early but waited \ + [ + "Mary and Samantha arrived at the bus station early but waited \ until noon for the bus ." - ], - [ - " NLP Ma NLP ry Samantha NLP arrived at the bus station early but waited \ + ], + [ + " NLP Ma NLP ry Samantha NLP arrived at the bus station early but waited \ until noon for the bus NLP . NLP" - ], - [ - "Ma NLP ry Samantha NLP arrived at the bus station early but waited \ + ], + [ + "Ma NLP ry Samantha NLP arrived at the bus station early but waited \ until noon for the bus NLP ." - ], + ], + [ [ - [ - "Ma NLP ry", - "Samantha", - "arrived", - "at", - "the", - "bus", - "station", - "early", - "but", - "waited", - "until", - "noon", - "for", - "the", - "bus", - ".", - ], + "Ma NLP ry", + "Samantha", + "arrived", + "at", + "the", + "bus", + "station", + "early", + "but", + "waited", + "until", + "noon", + "for", + "the", + "bus", + ".", ], - [["til", "noon", "for", "the", "bus", "."]], + ], + [["til", "noon", "for", "the", "bus", "."]], ) ) @unpack def test_replace_token( - self, - texts, - expected_outputs, - expected_sentences, - expected_tokens, - expected_links, + self, + texts, + expected_outputs, + expected_sentences, + expected_tokens, + expected_links, ): for idx, text in enumerate(texts): file_path = os.path.join(self.test_dir, f"{idx + 1}.txt") @@ -348,9 +348,9 @@ def test_replace_token( nlp.add(component=WhiteSpaceTokenizer(), selector=AllPackSelector()) replacer_op = ( - ReplacementAugmentTest.__module__ - + "." - + ReplacementAugmentTest.__qualname__ + ReplacementAugmentTest.__module__ + + "." + + ReplacementAugmentTest.__qualname__ ) processor_config = { diff --git a/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py b/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py index be7361aea..ed81ba75a 100644 --- a/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py +++ b/tests/forte/processors/data_augment/algorithms/embedding_similarity_replacement_op_test.py @@ -78,7 +78,7 @@ def test_replace(self): )[0] self.assertIn( augmented_token.text, - ["yahoo", "aol", "microsoft", "web", "internet"] + ["yahoo", "aol", "microsoft", "web", "internet"], ) @data( @@ -113,9 +113,7 @@ def test_pipeline(self, texts, expected_outputs): }, "augment_pack_names": {"input": "augmented_input"}, } - nlp.add( - component=(DataAugProcessor()), config=processor_config - ) + nlp.add(component=(DataAugProcessor()), config=processor_config) nlp.initialize() for idx, m_pack in enumerate(nlp.process_dataset(texts)): diff --git a/tests/forte/utils/payload_decorator_test.py b/tests/forte/utils/payload_decorator_test.py index 626683bbe..243e4a2e9 100644 --- a/tests/forte/utils/payload_decorator_test.py +++ b/tests/forte/utils/payload_decorator_test.py @@ -21,10 +21,7 @@ from forte.data.data_pack import DataPack from forte.data.ontology.top import load_func, AudioPayload -from ft.onto.payload_ontology import ( - JpegPayload, - SoundFilePayload -) +from ft.onto.payload_ontology import JpegPayload, SoundFilePayload class PillowJpegPayload(JpegPayload): @@ -96,7 +93,7 @@ def load(payload: SoundFilePayload): ) from e def get_first( - seq, + seq, ): # takes the first item as soundfile returns a tuple of (data, samplerate) return seq[0] @@ -138,6 +135,7 @@ def test_local_image_payload(self): uri = "https://raw.githubusercontent.com/asyml/forte/assets/ocr_tutorial/ocr.jpg" local_path = "ocr.jpg" import urllib.request + urllib.request.urlretrieve(uri, local_path) payload = PillowJpegPayload(datapack) @@ -150,10 +148,10 @@ def test_local_image_payload(self): def test_load_from_parent(self): """ - In this test we try to call the load function of the parent classes. + In this test we try to call the load function of the parent classes. - For example, we registered PillowJpegPayload, its parent is JpegPayload. The - behavior is that we will invoke the function registering at the right level. + For example, we registered PillowJpegPayload, its parent is JpegPayload. The + behavior is that we will invoke the function registering at the right level. """ datapack = DataPack("load_from_parent") @@ -165,7 +163,7 @@ def test_load_from_parent(self): datapack.add_entry(jpeg_payload) self.assertEqual( jpeg_payload.cache, - f"unimplemented parent JpegPayload with {jpeg_uri}" + f"unimplemented parent JpegPayload with {jpeg_uri}", ) # Add a `AudioPayload`, which is the parent of `SoundFilePayload` @@ -176,23 +174,23 @@ def test_load_from_parent(self): datapack.add_entry(audio_payload) self.assertEqual( audio_payload.cache, - f"unimplemented parent AudioPayload with {audio_uri}" + f"unimplemented parent AudioPayload with {audio_uri}", ) def test_audio_payload(self): datapack = DataPack("audio") payload = SoundFilePayload(datapack) payload.uri = ( - os.path.abspath( - os.path.join( - os.path.dirname(os.path.abspath(__file__)), - os.pardir, - os.pardir, - os.pardir, - "data_samples/audio_reader_test", - ) + os.path.abspath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + os.pardir, + os.pardir, + os.pardir, + "data_samples/audio_reader_test", ) - + "/test_audio_0.flac" + ) + + "/test_audio_0.flac" ) payload.load()