diff --git a/mindmeld/app.py b/mindmeld/app.py index 481ffd1ab..71616cf60 100644 --- a/mindmeld/app.py +++ b/mindmeld/app.py @@ -245,7 +245,7 @@ def register_func(self, name=None): def _decorator(func): func_name = name or func.__name__ if not callable(func): - raise TypeError("Invalid function type %s.", func_name) # pylint: disable=W0715 + raise TypeError(f"Invalid function type {func_name}.") self.registry.functions_registry[func_name] = func return _decorator diff --git a/mindmeld/cli.py b/mindmeld/cli.py index eb08043ca..98a26f0c8 100644 --- a/mindmeld/cli.py +++ b/mindmeld/cli.py @@ -780,7 +780,7 @@ def num_parser(ctx, start, port): DUCKLING_VERSION, os.path.basename(exec_path), ] - url = os.path.join(*url_components) + url = "/".join(url_components) logger.info( "Could not find %s binary file, downloading from %s", exec_path, url ) diff --git a/mindmeld/constants.py b/mindmeld/constants.py index 9cd072703..4e22d1fbd 100644 --- a/mindmeld/constants.py +++ b/mindmeld/constants.py @@ -21,6 +21,15 @@ BINARIES_URL = "https://binaries.mindmeld.com" DUCKLING_VERSION = "20211005" +EMBEDDINGS_VERSION = "6B" +EMBEDDINGS_FILE = f"glove.{EMBEDDINGS_VERSION}.zip" +EMBEDDINGS_URL = '/'.join([ + BINARIES_URL, + "glove", + EMBEDDINGS_VERSION, + EMBEDDINGS_FILE, +]) + # ACTIVE LEARNING CONSTANTS class TuneLevel(Enum): diff --git a/mindmeld/models/containers.py b/mindmeld/models/containers.py index 9574dd922..1675861a9 100644 --- a/mindmeld/models/containers.py +++ b/mindmeld/models/containers.py @@ -21,12 +21,10 @@ from tqdm import tqdm from ._util import _is_module_available, _get_module_or_attr as _getattr +from ..constants import EMBEDDINGS_URL from ..core import Bunch from ..exceptions import EmbeddingDownloadError -from ..path import ( - EMBEDDINGS_FILE_PATH, - EMBEDDINGS_FOLDER_PATH, -) +from ..path import EMBEDDINGS_FILE_PATH, EMBEDDINGS_FOLDER_PATH from ..resource_loader import Hasher logger = logging.getLogger(__name__) @@ -61,7 +59,6 @@ class GloVeEmbeddingsContainer: """ CONTAINER_LOOKUP = {} - GLOVE_DOWNLOAD_LINK = "http://nlp.stanford.edu/data/glove.6B.zip" EMBEDDING_FILE_PATH_TEMPLATE = "glove.6B.{}d.txt" ALLOWED_WORD_EMBEDDING_DIMENSIONS = [50, 100, 200, 300] @@ -96,34 +93,24 @@ def get_pretrained_word_to_embeddings_dict(self): def _download_embeddings_and_return_zip_handle(self): - logger.info("Downloading embedding from %s", GloVeEmbeddingsContainer.GLOVE_DOWNLOAD_LINK) + logger.info("Downloading embedding from %s", EMBEDDINGS_URL) # Make the folder that will contain the embeddings if not os.path.exists(EMBEDDINGS_FOLDER_PATH): os.makedirs(EMBEDDINGS_FOLDER_PATH) - with TqdmUpTo( - unit="B", unit_scale=True, miniters=1, desc=GloVeEmbeddingsContainer.GLOVE_DOWNLOAD_LINK - ) as t: + with TqdmUpTo(unit="B", unit_scale=True, miniters=1, desc=EMBEDDINGS_URL) as t: try: - urlretrieve( - GloVeEmbeddingsContainer.GLOVE_DOWNLOAD_LINK, EMBEDDINGS_FILE_PATH, - reporthook=t.update_to - ) + urlretrieve(EMBEDDINGS_URL, EMBEDDINGS_FILE_PATH, reporthook=t.update_to) except ConnectionError as e: - logger.error( - "There was an issue downloading from this " - "link %s with the following error: " - "%s", - GloVeEmbeddingsContainer.GLOVE_DOWNLOAD_LINK, - e, - ) + logger.error("Error downloading from %s: %s", EMBEDDINGS_URL, e) return file_name = GloVeEmbeddingsContainer.EMBEDDING_FILE_PATH_TEMPLATE.format( - self.token_dimension) + self.token_dimension + ) zip_file_object = zipfile.ZipFile(EMBEDDINGS_FILE_PATH, "r") if file_name not in zip_file_object.namelist(): diff --git a/mindmeld/models/taggers/embeddings.py b/mindmeld/models/taggers/embeddings.py index ed7e6a959..8c8116a1f 100644 --- a/mindmeld/models/taggers/embeddings.py +++ b/mindmeld/models/taggers/embeddings.py @@ -24,7 +24,6 @@ logger = logging.getLogger(__name__) -GLOVE_DOWNLOAD_LINK = "http://nlp.stanford.edu/data/glove.6B.zip" EMBEDDING_FILE_PATH_TEMPLATE = "glove.6B.{}d.txt" ALLOWED_WORD_EMBEDDING_DIMENSIONS = [50, 100, 200, 300] diff --git a/mindmeld/path.py b/mindmeld/path.py index fc0f39200..7addd6be7 100644 --- a/mindmeld/path.py +++ b/mindmeld/path.py @@ -22,6 +22,7 @@ from functools import wraps from importlib.machinery import SourceFileLoader +from .constants import EMBEDDINGS_FILE from .exceptions import MindMeldImportError MINDMELD_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -129,7 +130,7 @@ } EMBEDDINGS_FOLDER_PATH = os.path.join(MINDMELD_ROOT, "data") -EMBEDDINGS_FILE_PATH = os.path.join(EMBEDDINGS_FOLDER_PATH, "glove.6B.zip") +EMBEDDINGS_FILE_PATH = os.path.join(EMBEDDINGS_FOLDER_PATH, EMBEDDINGS_FILE) PREVIOUSLY_USED_CHAR_EMBEDDINGS_FILE_PATH = os.path.join( EMBEDDINGS_FOLDER_PATH, "previously_used_char_embeddings.pkl" )