From 35c1cedd9ad1c35225cd8626a457031b08348d57 Mon Sep 17 00:00:00 2001 From: J007X <92790555+J007X@users.noreply.github.com> Date: Wed, 5 Apr 2023 08:35:04 +0400 Subject: [PATCH] Improving class loading efficiency and considering dynamic type list (#919) * Initial commit for the performance change relating to class loadind in new code after underline changes related to data pack. After discussed with Hector using standard LRU_Cache instead of custom cache. * Option 2 for considering dynamic type list as mentioned in #765: created an non-cached version of get_class (get_class_nc) and used in get_class to avoid dynamic list issue as mentioned in comments of #765 as pointed out my Hector. * Fixed related method sequence issue and added doc * made lru_cache compatible with Python 3.7 * using full_class_name instead of class_name in get_class_nc * fixed black format issue * fixed lint format issue (import sequence) * fixed for lint issue (import grouping) * Fix CI warning (in ontology) * Fix "unhashable key type issue" when using LRU cache for get_class that have optional list as parameter -- change to cache lower level result from "locate" method called in this method, and also provide a non-cached version (same as before) for resolving #765. * Fix comments on duplicate code of "get_class" and "get_class_nc" by re-factoring into one method with "cached_lookup" optional flag -- by default this is true but in _get_sub_class this flag needs to set to "false" based on comments in #765 (for dynamic class loading scenario). * Need to remove the old "get_class_nc" signature removed in refactoring --------- Co-authored-by: Hector --- forte/data/data_store.py | 2 +- forte/utils/utils.py | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/forte/data/data_store.py b/forte/data/data_store.py index 016ed2991..aab05364b 100644 --- a/forte/data/data_store.py +++ b/forte/data/data_store.py @@ -895,7 +895,7 @@ def _is_subclass( if cls_qualified_name in type_name_parent_class: return True else: - entry_class = get_class(type_name) + entry_class = get_class(type_name, cached_lookup=False) if issubclass(entry_class, cls): type_name_parent_class.add(cls_qualified_name) return True diff --git a/forte/utils/utils.py b/forte/utils/utils.py index 4035509e9..b17f9a4d3 100644 --- a/forte/utils/utils.py +++ b/forte/utils/utils.py @@ -16,7 +16,7 @@ """ import sys import difflib -from functools import wraps +from functools import wraps, lru_cache from inspect import getfullargspec from pydoc import locate from typing import Dict, List, Optional, get_type_hints, Tuple @@ -78,8 +78,26 @@ def get_class_name(o, lower: bool = False) -> str: return o.__name__ -def get_class(full_class_name: str, module_paths: Optional[List[str]] = None): - r"""Returns the class based on class name. +@lru_cache() +def cached_locate(name_to_locate_class): + r"""Wrapped version of locate for loading class based on + ``name_to_locate_class``, cached by lru_cache. + Args: + name_to_locate_class (str): Name or full path to the class. + + Returns: + The target class. + + """ + return locate(name_to_locate_class) + + +def get_class( + full_class_name: str, + module_paths: Optional[List[str]] = None, + cached_lookup: Optional[bool] = True, +): + r"""Returns the class based on class name, with cached lookup option. Args: full_class_name (str): Name or full path to the class. @@ -87,18 +105,22 @@ def get_class(full_class_name: str, module_paths: Optional[List[str]] = None): class. This is used if the class cannot be located solely based on ``class_name``. The first module in the list that contains the class is used. + cached_lookup (bool): Flag to use "cached_locate" for class loading Returns: The target class. - Raises: - ValueError: If class is not found based on :attr:`class_name` and - :attr:`module_paths`. """ - class_ = locate(full_class_name) + if cached_lookup: + class_ = cached_locate(full_class_name) + else: + class_ = locate(full_class_name) if (class_ is None) and (module_paths is not None): for module_path in module_paths: - class_ = locate(".".join([module_path, full_class_name])) + if cached_lookup: + class_ = cached_locate(".".join([module_path, full_class_name])) + else: + class_ = locate(".".join([module_path, full_class_name])) if class_ is not None: break