From b7a2898c19bc043a4fa3db7940056843e6c5ec04 Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 13 Dec 2022 16:37:46 +0400 Subject: [PATCH 01/12] Initial commit for the performance change relating to class loadind in new code after underline changes related to data pack. After discussed with Hector using standard LRU_Cache instead of custom cache. --- forte/utils/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 74813db4a..f462bdbd6 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -23,6 +23,8 @@ from sortedcontainers import SortedList from typing_inspect import is_union_type, get_origin +from functools import lru_cache + __all__ = [ "get_full_module_name", "get_class_name", @@ -77,6 +79,7 @@ def get_class_name(o, lower: bool = False) -> str: return o.__name__ +@lru_cache def get_class(class_name: str, module_paths: Optional[List[str]] = None): r"""Returns the class based on class name. From 30d1e236d9d2f2f90fa24c228dd144b18b2d5257 Mon Sep 17 00:00:00 2001 From: JamesX Date: Wed, 1 Feb 2023 17:27:21 +0400 Subject: [PATCH 02/12] Option 2 for considering dynamic type list as mentioned in #765: created an non-cached version of get_class (get_class_nc) and used in get_class to avoid dynamic list issue as mentioned in comments of #765 as pointed out my Hector. --- forte/data/data_store.py | 4 ++-- forte/utils/utils.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index f46ae34f2..edc271b0c 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -22,7 +22,7 @@ from sortedcontainers import SortedList from typing_inspect import get_origin, get_args, is_generic_type -from forte.utils import get_class +from forte.utils import get_class, get_class_nc from forte.utils.utils import get_full_module_name from forte.data.ontology.code_generation_objects import EntryTree from forte.data.ontology.ontology_code_generator import OntologyCodeGenerator @@ -879,7 +879,7 @@ def _is_subclass( if cls_qualified_name in type_name_parent_class: return True else: - entry_class = get_class(type_name) + entry_class = get_class_nc(type_name) if issubclass(entry_class, cls): type_name_parent_class.add(cls_qualified_name) return True diff --git a/forte/utils/utils.py b/forte/utils/utils.py index f462bdbd6..cbc675bc8 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -29,6 +29,7 @@ "get_full_module_name", "get_class_name", "get_class", + "get_class_nc", "get_qual_name", "create_class_with_kwargs", "check_type", @@ -81,6 +82,12 @@ def get_class_name(o, lower: bool = False) -> str: @lru_cache def get_class(class_name: str, module_paths: Optional[List[str]] = None): + r"""This is the cached version of get_class_nc. + """ + return get_class_nc(str, module_paths) + + +def get_class_nc(class_name: str, module_paths: Optional[List[str]] = None): r"""Returns the class based on class name. Args: From daa9cc90ff8ef3b7c55a67bf4727279985fec5ac Mon Sep 17 00:00:00 2001 From: JamesX Date: Thu, 2 Feb 2023 16:49:24 +0400 Subject: [PATCH 03/12] Fixed related method sequence issue and added doc --- forte/utils/utils.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index cbc675bc8..b402c27a5 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -80,13 +80,6 @@ def get_class_name(o, lower: bool = False) -> str: return o.__name__ -@lru_cache -def get_class(class_name: str, module_paths: Optional[List[str]] = None): - r"""This is the cached version of get_class_nc. - """ - return get_class_nc(str, module_paths) - - def get_class_nc(class_name: str, module_paths: Optional[List[str]] = None): r"""Returns the class based on class name. @@ -122,6 +115,24 @@ def get_class_nc(class_name: str, module_paths: Optional[List[str]] = None): return class_ +@lru_cache +def get_class(class_name: str, module_paths: Optional[List[str]] = None): + r"""This is the cached version of get_class_nc. + + Args: + class_name (str): Name or full path to the class. + module_paths (list): Paths to candidate modules to search for the + class. This is used if the class cannot be located solely based on + ``class_name``. The first module in the list that contains the class + is used. + + Returns: + The target class. + + """ + return get_class_nc(class_name, module_paths) + + def get_qual_name(o: object, lower: bool = False) -> str: r"""Returns the qualified name of an object ``o``. From 684e10ba1c75c190a356945e8018f63bf0bfbeab Mon Sep 17 00:00:00 2001 From: JamesX Date: Mon, 6 Feb 2023 11:45:00 +0400 Subject: [PATCH 04/12] made lru_cache compatible with Python 3.7 --- forte/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index b402c27a5..07cc35f63 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -115,7 +115,7 @@ def get_class_nc(class_name: str, module_paths: Optional[List[str]] = None): return class_ -@lru_cache +@lru_cache() def get_class(class_name: str, module_paths: Optional[List[str]] = None): r"""This is the cached version of get_class_nc. From 3a180e3c84fc0bd24da7d00e83ad3f51e2da266d Mon Sep 17 00:00:00 2001 From: JamesX Date: Mon, 6 Feb 2023 12:11:44 +0400 Subject: [PATCH 05/12] using full_class_name instead of class_name in get_class_nc --- forte/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index e4b4a146b..08a74679f 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -81,7 +81,7 @@ def get_class_name(o, lower: bool = False) -> str: return o.__name__ -def get_class_nc(class_name: str, module_paths: Optional[List[str]] = None): +def get_class_nc(full_class_name: str, module_paths: Optional[List[str]] = None): r"""Returns the class based on class name. Args: From 4868917860864ee5d51794111afa44d5f16b7489 Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 7 Feb 2023 14:19:09 +0400 Subject: [PATCH 06/12] fixed black format issue --- forte/utils/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 08a74679f..1a57d9630 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -81,7 +81,9 @@ def get_class_name(o, lower: bool = False) -> str: return o.__name__ -def get_class_nc(full_class_name: str, module_paths: Optional[List[str]] = None): +def get_class_nc( + full_class_name: str, module_paths: Optional[List[str]] = None +): r"""Returns the class based on class name. Args: From 357be3f5d7b3dca976e168fac86f23a72bbbac9a Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 14 Feb 2023 10:32:09 +0400 Subject: [PATCH 07/12] fixed lint format issue (import sequence) --- forte/utils/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 1a57d9630..9c1b4b658 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -21,11 +21,10 @@ from pydoc import locate from typing import Dict, List, Optional, get_type_hints, Tuple +from functools import lru_cache from sortedcontainers import SortedList from typing_inspect import is_union_type, get_origin -from functools import lru_cache - __all__ = [ "get_full_module_name", "get_class_name", From 104a5eba607ddd340ff87557528855e027af588f Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 14 Feb 2023 10:55:00 +0400 Subject: [PATCH 08/12] fixed for lint issue (import grouping) --- forte/utils/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 9c1b4b658..4f1d1c395 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -16,12 +16,11 @@ """ import sys import difflib -from functools import wraps +from functools import wraps, lru_cache from inspect import getfullargspec from pydoc import locate from typing import Dict, List, Optional, get_type_hints, Tuple -from functools import lru_cache from sortedcontainers import SortedList from typing_inspect import is_union_type, get_origin From e334d0088f74f448d67813cf751f0378b6d09f34 Mon Sep 17 00:00:00 2001 From: JamesX Date: Wed, 22 Mar 2023 15:52:46 +0400 Subject: [PATCH 09/12] Fix CI warning (in ontology) --- forte/data/ontology/ontology_code_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forte/data/ontology/ontology_code_generator.py b/forte/data/ontology/ontology_code_generator.py index c822f103f..28ecd6e0f 100644 --- a/forte/data/ontology/ontology_code_generator.py +++ b/forte/data/ontology/ontology_code_generator.py @@ -41,7 +41,7 @@ except ImportError: # Try backported to PY<39 `importlib_resources`. import importlib_resources as resources # type: ignore - from importlib_resources.abc import Traversable # type: ignore + from importlib_resources.abc import Traversable from forte.data.ontology import top, utils from forte.data.ontology.code_generation_exceptions import ( From 762d0d9a6ed0b6bb5ed3f662ca871a9efe6c97e0 Mon Sep 17 00:00:00 2001 From: JamesX Date: Thu, 30 Mar 2023 12:54:16 +0400 Subject: [PATCH 10/12] Fix "unhashable key type issue" when using LRU cache for get_class that have optional list as parameter -- change to cache lower level result from "locate" method called in this method, and also provide a non-cached version (same as before) for resolving #765. --- forte/utils/utils.py | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 4f1d1c395..23092119f 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -82,7 +82,7 @@ def get_class_name(o, lower: bool = False) -> str: def get_class_nc( full_class_name: str, module_paths: Optional[List[str]] = None ): - r"""Returns the class based on class name. + r"""Returns the class based on class name, not cached. Args: full_class_name (str): Name or full path to the class. @@ -131,8 +131,12 @@ def get_class_nc( @lru_cache() -def get_class(class_name: str, module_paths: Optional[List[str]] = None): - r"""This is the cached version of get_class_nc. +def cached_locate(name_to_locate_class): + return locate(name_to_locate_class) + + +def get_class(full_class_name: str, module_paths: Optional[List[str]] = None): + r"""Returns the class based on class name, with cache to improve speed. Args: class_name (str): Name or full path to the class. @@ -145,7 +149,36 @@ def get_class(class_name: str, module_paths: Optional[List[str]] = None): The target class. """ - return get_class_nc(class_name, module_paths) + class_ = cached_locate(full_class_name) + if (class_ is None) and (module_paths is not None): + for module_path in module_paths: + class_ = cached_locate(".".join([module_path, full_class_name])) + if class_ is not None: + break + + # Try to find classes that are dynamically loaded, class_ will still be None if failed. + if class_ is None: + try: + module_name, class_name = full_class_name.rsplit(".", 1) + try: + class_ = getattr(sys.modules[module_name], class_name) + except (AttributeError, KeyError): + # ignore when cannot find the module in sys.modules or cannot find the class + # in the module. + pass + except ValueError: + # ignoring when the full class name doesn't have multiple parts. + pass + + if class_ is None: + if module_paths: + raise ValueError( + f"Class not found in {module_paths}: {full_class_name}" + ) + else: + raise ValueError(f"Class not found in {full_class_name}") + + return class_ def get_qual_name(o: object, lower: bool = False) -> str: From ad63fcab364dda3b3f59ed772eee9430b5887789 Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 4 Apr 2023 12:15:05 +0400 Subject: [PATCH 11/12] Fix comments on duplicate code of "get_class" and "get_class_nc" by re-factoring into one method with "cached_lookup" optional flag -- by default this is true but in _get_sub_class this flag needs to set to "false" based on comments in #765 (for dynamic class loading scenario). --- forte/data/data_store.py | 11 +++--- forte/utils/utils.py | 73 ++++++++++++---------------------------- 2 files changed, 26 insertions(+), 58 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 6a154c3d3..7048e5c7c 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -22,7 +22,7 @@ from sortedcontainers import SortedList from typing_inspect import get_origin, get_args, is_generic_type -from forte.utils import get_class, get_class_nc +from forte.utils import get_class from forte.utils.utils import get_full_module_name from forte.data.ontology.code_generation_objects import EntryTree from forte.data.ontology.ontology_code_generator import OntologyCodeGenerator @@ -806,7 +806,6 @@ def fetch_entry_type_data( else: attr_fields: Dict = self._get_entry_attributes_by_class(type_name) for attr_name, attr_info in attr_fields.items(): - attr_class = get_origin(attr_info.type) # Since we store the class specified by get_origin, # if the output it None, we store the class for it, @@ -896,7 +895,7 @@ def _is_subclass( if cls_qualified_name in type_name_parent_class: return True else: - entry_class = get_class_nc(type_name) + entry_class = get_class(type_name, cached_lookup=False) if issubclass(entry_class, cls): type_name_parent_class.add(cls_qualified_name) return True @@ -1047,7 +1046,6 @@ def _add_entry_raw( self._is_subclass(type_name, cls) for cls in (list(SinglePackEntries) + list(MultiPackEntries)) ): - try: self.__elements[type_name].append(entry) except KeyError: @@ -1246,7 +1244,6 @@ def add_entry_raw( allow_duplicate: bool = True, attribute_data: Optional[List] = None, ) -> int: - r""" This function provides a general implementation to add all types of entries to the data store. It can add namely @@ -1870,7 +1867,9 @@ def co_iterator_annotation_like( self.get_datastore_attr_idx(tn, constants.BEGIN_ATTR_NAME), self.get_datastore_attr_idx(tn, constants.END_ATTR_NAME), ) - except IndexError as e: # all_entries_range[tn][0] will be caught here. + except ( + IndexError + ) as e: # all_entries_range[tn][0] will be caught here. raise ValueError( f"Entry list of type name, {tn} which is" " one list item of input argument `type_names`," diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 23092119f..e8e302aff 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -79,80 +79,49 @@ def get_class_name(o, lower: bool = False) -> str: return o.__name__ -def get_class_nc( - full_class_name: str, module_paths: Optional[List[str]] = None -): - r"""Returns the class based on class name, not cached. - +@lru_cache() +def cached_locate(name_to_locate_class): + r"""Wrapped version of locate for loading class based on + ``name_to_locate_class``, cached by lru_cache. Args: - full_class_name (str): Name or full path to the class. - module_paths (list): Paths to candidate modules to search for the - class. This is used if the class cannot be located solely based on - ``class_name``. The first module in the list that contains the class - is used. + name_to_locate_class (str): Name or full path to the class. Returns: The target class. - Raises: - ValueError: If class is not found based on :attr:`class_name` and - :attr:`module_paths`. """ - class_ = locate(full_class_name) - if (class_ is None) and (module_paths is not None): - for module_path in module_paths: - class_ = locate(".".join([module_path, full_class_name])) - if class_ is not None: - break - - # Try to find classes that are dynamically loaded, class_ will still be None if failed. - if class_ is None: - try: - module_name, class_name = full_class_name.rsplit(".", 1) - try: - class_ = getattr(sys.modules[module_name], class_name) - except (AttributeError, KeyError): - # ignore when cannot find the module in sys.modules or cannot find the class - # in the module. - pass - except ValueError: - # ignoring when the full class name doesn't have multiple parts. - pass - - if class_ is None: - if module_paths: - raise ValueError( - f"Class not found in {module_paths}: {full_class_name}" - ) - else: - raise ValueError(f"Class not found in {full_class_name}") - - return class_ - - -@lru_cache() -def cached_locate(name_to_locate_class): return locate(name_to_locate_class) -def get_class(full_class_name: str, module_paths: Optional[List[str]] = None): - r"""Returns the class based on class name, with cache to improve speed. +def get_class( + full_class_name: str, + module_paths: Optional[List[str]] = None, + cached_lookup: Optional[bool] = True, +): + r"""Returns the class based on class name, with cached lookup option. Args: - class_name (str): Name or full path to the class. + full_class_name (str): Name or full path to the class. module_paths (list): Paths to candidate modules to search for the class. This is used if the class cannot be located solely based on ``class_name``. The first module in the list that contains the class is used. + cached_lookup (bool): Flag to use "cached_locate" for class loading Returns: The target class. """ - class_ = cached_locate(full_class_name) + if cached_lookup: + class_ = cached_locate(full_class_name) + else: + class_ = locate(full_class_name) if (class_ is None) and (module_paths is not None): for module_path in module_paths: - class_ = cached_locate(".".join([module_path, full_class_name])) + if cached_lookup: + class_ = cached_locate(".".join([module_path, full_class_name])) + else: + class_ = locate(".".join([module_path, full_class_name])) if class_ is not None: break From e2939a11cf2573c2bd670351349abb16468fb2b8 Mon Sep 17 00:00:00 2001 From: JamesX Date: Tue, 4 Apr 2023 12:30:22 +0400 Subject: [PATCH 12/12] Need to remove the old "get_class_nc" signature removed in refactoring --- forte/utils/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/forte/utils/utils.py b/forte/utils/utils.py index e8e302aff..b17f9a4d3 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -28,7 +28,6 @@ "get_full_module_name", "get_class_name", "get_class", - "get_class_nc", "get_qual_name", "create_class_with_kwargs", "check_type",