From 45b4ffcf3a0b6f9a65d782afdf697e4f7de4ea45 Mon Sep 17 00:00:00 2001 From: Andrea Vallebueno Date: Fri, 8 Nov 2024 16:35:43 -0800 Subject: [PATCH] Documentation and gensim int --- .pre-commit-config.yaml | 8 - README.md | 71 ++- glove_v/__init__.py | 13 +- glove_v/data.py | 78 +-- glove_v/docs/MOC_vs_DM.ipynb | 920 +++++++++++++++++----------------- glove_v/docs/tutorial.ipynb | 233 +++++++-- glove_v/gensim_integration.py | 88 +++- glove_v/propagate.py | 20 +- glove_v/variance.py | 60 ++- glove_v/vector.py | 49 +- pyproject.toml | 20 +- requirements.txt | 10 - setup.py | 7 - uv.lock | 403 +++++++++------ 14 files changed, 1150 insertions(+), 830 deletions(-) delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ca8dbdf..562e205 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,14 +16,6 @@ repos: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace - - repo: https://github.com/roy-ht/pre-commit-jupyter - rev: v1.2.1 - hooks: - - id: jupyter-notebook-cleanup - args: - # - --remove-kernel-metadata - - --pin-patterns - - "[pin];[donotremove]" - repo: https://github.com/pre-commit/mirrors-prettier rev: v3.1.0 hooks: diff --git a/README.md b/README.md index fc93c64..da0eedc 100644 --- a/README.md +++ b/README.md @@ -5,43 +5,46 @@ This is the code repository for the paper "Statistical Uncertainty in Word Embed **We introduce a method to obtain approximate, easy-to-use, and scalable uncertainty estimates for the GloVe word embeddings and demonstrate its usefulness in natural language tasks and computational social science analysis. This code repository contains code to download pre-computed GloVe embeddings and GloVe-V variances for several corpora from our HuggingFace repository, to interact with these data products and propagate uncertainty to downstream tasks.** - ![GloVe-V](figures/glove_diagram.jpg) -## HuggingFace Repository -We store our data products on HuggingFace. You can find them [here](https://huggingface.co/datasets/reglab/glove-v). +## Available Corpora We provide embeddings and variances for the following corpora: -- **Toy Corpus (300-dim)**: a subset of 11 words from the Corpus of Historical American English (1900-1999) -- **Corpus of Historical American English (COHA) (1900-1999) (300-dim)** +- **Toy Corpus (300-dim)**: a subset of 11 words from the Corpus of Historical American English (1900-1999). Downloadable as `Toy-Embeddings` +- **Corpus of Historical American English (COHA) (1900-1999) (300-dim)**: Downloadable as `COHA_1900-1999_300d` +- More to come! + +## HuggingFace Repository +We store our data products on HuggingFace. You can find them [here](https://huggingface.co/datasets/reglab/glove-v). -Each dataset contains the following files: +Each dataset contains the following files (see the **Storage of GloVe-V Variances** section below for more details on the differences between the complete and approximated variances): - `vocab.txt`: a list of the words in the corpus with associated frequencies - `vectors.safetensors`: a safetensors file containing the embeddings for each word in the corpus - `complete_chunk_{i}.safetensors`: a set of safetensors file containing the complete variances for each word in the corpus. These variances are size $D \times D$, where $D$ is the embedding dimensionality, and thus are very storage-intensive. -- `approx_info.txt`: a text file containing information on the approximation used to the full variance of each word (diagonal approximation, or SVD approximation) +- `approx_info.txt`: a text file containing information on the approximation used to approximate the full variance of each word (diagonal approximation, or SVD approximation) - `ApproximationVariances.safetensors`: a safetensors file containing the approximation variances for each word in the corpus. These approximations require storing much fewer floating point numbers than the full variances. If a word has been approximated by a diagonal approximation, then this file will contain only $D$ floating point numbers for each word. Alternatively, if a word has been approximated by an SVD approximation of rank $k$, then this file will contain $k(2D + 1)$ floating point numbers for each word. -If using the approximated variances, the `glove_v.variance.load_variance` function automaticallyhandles the reconstruction of the variances from these files. +If using the approximated variances, the `glove_v.variance.load_variance` function automatically handles the reconstruction of the variances from these files. ## Storage of GloVe-V Variances Let $V$ be the size of the vocabulary and $D$ be the embedding dimension. While GloVe embeddings only require storing $V \times D$ floating point numbers, the GloVe-V variances require storing $V \times (D x D)$ floating point numbers. For this reason, we offer two download options: 1. **Approximation Variances**: These are approximations to the full GloVe-V variances that can use either a diagonal approximation to the full variance, or a low-rank Singular Value Decomposition (SVD) approximation. We optimize this approximation at the level of each word to guarantee at least 90% reconstruction of the original variance. These approximations require storing much fewer floating point numbers than the full variances. -2. **Complete Variances**: These are the full GloVe-V variances, which require storing $V \times (D x D)$ floating point numbers. For example, in the case of the 300-dimensional embeddings for the COHA (1900-1999) corpus, this would be approximately 6.4 billion floating point numbers! +2. **Complete Variances**: These are the full GloVe-V variances, which require storing $V \times (D x D)$ floating point numbers. For example, in the case of the 300-dimensional embeddings for the COHA (1900-1999) corpus, this would be approximately 6.4 billion floating point numbers! +Our [tutorial](https://github.com/reglab/glove-v/blob/main/glove_v/docs/tutorial.ipynb) compares results using the approximated and complete variances with an illustration from the paper. ## Setup First, clone this repo: ```bash -git clone https://github.com/reglab/glove-v.git myword +git clone https://github.com/reglab/glove-v.git glove_v ``` -Next, install uv if you haven't already: +Next, install uv: ```bash curl -LsSf https://astral.sh/uv/install.sh | sh @@ -50,7 +53,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh Then, create a virtual environment: ```bash -cd myword +cd glove_v uv venv # optionally add --python 3.11 or another version ``` @@ -58,8 +61,50 @@ To activate the virtual environment: ```bash source .venv/bin/activate # If using fish shell, use `source .venv/bin/activate.fish` instead + +uv sync ``` ## Usage -Our tutorial notebook is available [here](https://github.com/reglab/glove-v/blob/main/glove_v/docs/tutorial.ipynb) and walks through the process of downloading and interacting with the GloVe-V data products. +Our tutorial notebook is available [here](https://github.com/reglab/glove-v/blob/main/glove_v/docs/tutorial.ipynb) and offers a more detailed walkthrough of the process of downloading and interacting with the GloVe-V data products. + +Here is a quick example of how to download the approximated embeddings for the Toy Corpus: + +```python +glove_v.data.download_embeddings( + embedding_name='Toy-Embeddings', + approximation=True, +) +``` + +We can easily load the vocabulary and embeddings for the Toy Corpus in several formats (dictionary, numpy arrays, gensim KeyedVectors): +```python +vocab, ivocab = glove_v.vector.load_vocab( + embedding_name='Toy-Embeddings', +) +vectors = glove_v.vector.load_vectors( + embedding_name='Toy-Embeddings', + format='dictionary' +) +``` + +Next, we load the approximated variances for the Toy Corpus. This function automatically handles the reconstruction of the variances from the approximated files, such that the variances in the `approx_variances` dictionary are of size $D \times D$. + +```python +approx_variances = {} +for word in list(vocab.keys()): + approx_variances[word] = glove_v.variance.load_variance( + embedding_name='Toy-Embeddings', + approximation=True, + word_idx=vocab[word], + ) +``` + +We also offer a Gensim integration for working with GloVe-V embeddings using Gensim's KeyedVectors. + +```python +gensim_glovev_kv = glove_v.GloVeVKeyedVectors( + embedding_name='Toy-Embeddings', +) +``` diff --git a/glove_v/__init__.py b/glove_v/__init__.py index cf33b66..d49e907 100644 --- a/glove_v/__init__.py +++ b/glove_v/__init__.py @@ -1,4 +1,11 @@ -from . import variance +from . import propagate, variance, vector from .data import download_embeddings -from . import vector -from . import propagate +from .gensim_integration import GloVeVKeyedVectors + +__all__ = [ + "propagate", + "variance", + "vector", + "download_embeddings", + "GloVeVKeyedVectors", +] diff --git a/glove_v/data.py b/glove_v/data.py index 93fabe3..a270313 100644 --- a/glove_v/data.py +++ b/glove_v/data.py @@ -4,28 +4,29 @@ more lightweight, and guarantee 90% reconstruction of the original variance for each word. """ -import os -from huggingface_hub import hf_hub_download +from pathlib import Path + import numpy as np +from huggingface_hub import hf_hub_download from safetensors import safe_open -from safetensors.numpy import save_file\ +from safetensors.numpy import save_file import glove_v.utils.file as file_utils - AVAILABLE_EMBEDDINGS = [ "Toy-Embeddings", - 'COHA_1900-1999_300d', + "COHA_1900-1999_300d", ] + def download_embeddings( embedding_name: str, approximation: bool = True, - download_dir: str = f'{file_utils.get_data_path()}/glove-v' + download_dir: str = f"{file_utils.get_data_path()}/glove-v", ) -> None: """ Downloads the vectors and variances for a selected corpus. - + Args: embedding_name: (str) The specific embedding to download. This should match one of the keys in the AVAILABLE_EMBEDDINGS dictionary approximation: (bool) Whether to download the approximate or complete GloVe-V variances. The GloVe embeddings @@ -33,51 +34,54 @@ def download_embeddings( download_dir: (str) path where GloVe-V files should be saved """ if embedding_name not in AVAILABLE_EMBEDDINGS: - raise ValueError(f"[ERROR] Embeddings should be one of the following: {AVAILABLE_EMBEDDINGS}") + raise ValueError( + f"[ERROR] Embeddings should be one of the following: {AVAILABLE_EMBEDDINGS}" + ) - final_download_dir = os.path.join(download_dir, f"{embedding_name}") - os.makedirs(final_download_dir, exist_ok=True) + final_download_dir = Path(download_dir) / embedding_name + final_download_dir.mkdir(parents=True, exist_ok=True) # Download vocabulary and embeddings - for file in ['vocab.txt', 'vectors.safetensors']: - if not os.path.exists(os.path.join(final_download_dir, file)): - file_path = hf_hub_download( + for file in ["vocab.txt", "vectors.safetensors"]: + file_path = final_download_dir / file + if not file_path.exists(): + downloaded_path = hf_hub_download( repo_id="reglab/glove-v", filename=f"{embedding_name}/{file}", local_dir=download_dir, repo_type="dataset", ) - print(f"[INFO] Downloaded {file}: {file_path}") + print(f"[INFO] Downloaded {file}: {downloaded_path}") else: print(f"[INFO] {file} already exists in {final_download_dir}") # Download variances if approximation: - print('[INFO] Downloading file containing approximated variances.') - for file in ['ApproximationVariances.safetensors', 'approx_info.txt']: - if not os.path.exists(os.path.join(final_download_dir, file)): - file_path = hf_hub_download( + print("[INFO] Downloading file containing approximated variances.") + for file in ["ApproximationVariances.safetensors", "approx_info.txt"]: + file_path = final_download_dir / file + if not file_path.exists(): + downloaded_path = hf_hub_download( repo_id="reglab/glove-v", filename=f"{embedding_name}/{file}", local_dir=download_dir, repo_type="dataset", ) - print(f"[INFO] Downloaded {file}: {file_path}") + print(f"[INFO] Downloaded {file}: {downloaded_path}") else: print(f"[INFO] {file} already exists in {final_download_dir}") else: - print('[INFO] Downloading file containing complete variances.') - - output_path = os.path.join(final_download_dir, "CompleteVariances.safetensors") - if not os.path.exists(output_path): + print("[INFO] Downloading file containing complete variances.") + + output_path = final_download_dir / "CompleteVariances.safetensors" + if not output_path.exists(): download_and_reconstruct_complete_safetensor( embedding_name=embedding_name, download_dir=download_dir, - output_path=output_path, + output_path=str(output_path), ) else: - print(f"[INFO] Complete.safetensors already exists in {final_download_dir}") - + print(f"[INFO] Complete.safetensors already exists in {final_download_dir}") def download_and_reconstruct_complete_safetensor( @@ -88,7 +92,7 @@ def download_and_reconstruct_complete_safetensor( """ Downloads chunked safetensor files from HuggingFace and reconstructs the complete safetensor containing the original variances. - + Args: embedding_name: Name of the corpus on HuggingFace download_dir: Path where to save the downloaded chunks @@ -96,7 +100,7 @@ def download_and_reconstruct_complete_safetensor( """ chunk_idx = 0 all_variances = [] - + while True: try: # Download chunk @@ -104,25 +108,23 @@ def download_and_reconstruct_complete_safetensor( repo_id="reglab/glove-v", filename=f"{embedding_name}/complete_chunk_{chunk_idx}.safetensors", local_dir=download_dir, - repo_type="dataset" + repo_type="dataset", ) - + # Load chunk data with safe_open(chunk_path, framework="numpy") as f: # Get variances from chunk variances_chunk = f.get_tensor("variances") all_variances.append(variances_chunk) - + chunk_idx += 1 - - except Exception as e: + + except Exception: # No more chunks to download break - + # Concatenate all variance chunks complete_variances = np.concatenate(all_variances, axis=0) - + # Save reconstructed complete safetensor - save_file({ - "variances": complete_variances - }, output_path) + save_file({"variances": complete_variances}, output_path) diff --git a/glove_v/docs/MOC_vs_DM.ipynb b/glove_v/docs/MOC_vs_DM.ipynb index 4d97346..18a8e44 100644 --- a/glove_v/docs/MOC_vs_DM.ipynb +++ b/glove_v/docs/MOC_vs_DM.ipynb @@ -1,470 +1,472 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "tlg83ca6HWp6" - }, - "source": [ - "\n", - "# Tutorial: Using the Delta Method and the Method of Composition for uncertainty propagation\n", - "\n", - "In this tutorial, we illustrate how to use the **Delta Method** and the **Method of Composition** approaches to propagate uncertainty to downstream tasks using GloVe-V, our word-level variance estimates for GloVe. As an example, we compute uncertainty intervals for the cosine similarity of the words `doctor` and `surgeon` using both approaches. This computation was performed using the Method of Composition in Figure 5 of our paper." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "L1AdgsDEHYcE" - }, - "source": [ - "\n", - "\n", - "## Background\n", - "\n", - "Our GloVe-V framework computes the following Normal distribution for word $i$:\n", - "\n", - "$$ w_i \\sim N(\\mu_i, \\Sigma_i),$$\n", - "\n", - "where $\\mu_i$ is the $d$-dimensional GloVe-trained word embedding for word $i$ and $\\Sigma_i$ is the $d \\times d$ GloVe-V covariance matrix, as given by Equation 6 in the paper." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Delta Method**\n", - "\n", - "The Delta Method states that if $\\sqrt{n}(W - \\hat{W})$ converges to $N(0, \\Sigma)$, then\n", - "\n", - "$$ \\sqrt{n}(\\phi(W) - \\phi(\\hat{W})) \\rightarrow N(0, \\phi^{\\prime}(W)^T\\Sigma\\phi^{\\prime}(W)) ,$$ \n", - "\n", - "where $\\phi(\\cdot)$ is a differentiable function of $W$ and $\\phi^{\\prime}(\\cdot)$ is its gradient with respect to $W$. \n", - "\n", - "In our example, $\\phi(\\cdot)$ is the cosine similarity of the point estimates of the words $j = $ `doctor` and $k=$ `surgeon`:\n", - "\n", - "$$\\phi(w_j, w_k) = \\frac{w_j^T w_k}{\\|w_j\\| \\|w_k\\|} $$\n", - "\n", - "We now compute $\\frac{\\partial \\cos(w_j, w_k)}{\\partial w_j}$, the derivative of the cosine similarity with respect to one of the vectors, which is symmetrical for $w_j$ and $w_k$.\n", - "\n", - "$$d_j := \\frac{\\partial \\cos(w_j, w_k)}{\\partial w_j} = \\frac{w_k}{\\|w_k\\| \\|w_j\\|} - \\cos(w_j, w_k) \\cdot\\frac{w_j}{\\|\n", - " w_j\\|^2} $$\n", - "\n", - "Then, the variance of $\\phi(W)$ is given by:\n", - "$$ \\text{var}(\\phi(W)) = \\phi^{\\prime}(W)^T\\Sigma\\phi^{\\prime}(W) = \\sum_{i \\in \\{j, k\\}} d_i^T \\Sigma_i d_i$$\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Method of Composition (Tanner, 1996)**\n", - "\n", - "The Method of Composition propagates the uncertainty from a set of input variables to an output variable $Y$, generating independent and identically distributed samples of the output variable. In our example, $Y = \\cos(w_j, w_k)$.\n", - "\n", - "Let $K$ be the number of iterations. In the $k$th iteration, we draw one sample from each of the input variables $x_j \\sim N(\\mu_j, \\Sigma_j)$ and $x_k \\sim N(\\mu_k, \\Sigma_k)$, and compute $Y^{(k)} = \\cos(x_j, x_k)$. Then, ($Y^{(1)}, ..., Y^{(K)}$) are i.i.d. from the marginal distribution of $Y$, and we can compute an estimate of the mean and variance of $Y$ as follows:\n", - "\n", - "$$\\hat{Y} = \\frac{1}{K} \\sum_k Y^{(k)}$$\n", - "\n", - "$$ \\text{var}(\\hat{Y}) = \\frac{1}{K-1} \\sum_k (Y^{(k)} - \\hat{Y})^2 $$" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "dYjIJ7paHUOS" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/avaimar/Documents/Projects/Legal-NLP/glove-v/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "# Set up environment\n", - "import glove_v\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import os\n", - "import pandas as pd\n", - "import seaborn as sns" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A1LTwygwHlYD" - }, - "source": [ - "### Download COHA (1900-1999) vectors and pre-computed variances\n", - "\n", - "We start by downloading the pre-computed variances for the COHA (1900-1999) corpus. In this example, we download only a small subset which includes the vectors and variances for the words `doctor` and `surgeon`, which we make available in the `Toy-Embeddings` folder. \n", - "\n", - "To obtain the vectors and variances for the full vocabulary of the 1900-1999 COHA corpus, you can use `COHA_1900-1999_300d` as the `embedding_name` argument in the `download_embeddings` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "glove_v.data.download_embeddings(\n", - " embedding_name='Toy-Embeddings',\n", - " approximation=False,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0LVvMY84N84_" - }, - "source": [ - "### Load the vocabulary, vectors and variances" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "EGyrmAKIIDDO" - }, - "outputs": [], - "source": [ - "# Vocabulary and inverse vocabulary\n", - "vocab, ivocab = glove_v.vector.load_vocab(\n", - " embedding_name='Toy-Embeddings',\n", - ")\n", - "# Vectors and variances \n", - "vectors = glove_v.vector.load_vectors(\n", - " embedding_name='Toy-Embeddings',\n", - " format='dictionary'\n", - ")\n", - "variances = {}\n", - "for word in list(vocab.keys()):\n", - " variances[word] = glove_v.variance.load_variance(\n", - " embedding_name='Toy-Embeddings',\n", - " approximation=False,\n", - " word_idx=vocab[word],\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZeZV6aU3PHwn" - }, - "source": [ - "We can see that the dictionaries containing the vectors and pre-computed variances include the keys `doctor` and `surgeon`, as well as other occupations used in the generation of Figure 5 in the paper." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jTmmTTvYPGmv", - "outputId": "ca822af6-39de-4e8d-c221-01015d41e72b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Keys in vectors dictionary: dict_keys(['doctor', 'surgeon', 'dentist', 'psychiatrist', 'therapist', 'veterinarian', 'obstetrician', 'pediatrician', 'pharmacist', 'neurologist', 'gynecologist'])\n", - "Keys in variances dictionary: dict_keys(['doctor', 'surgeon', 'dentist', 'psychiatrist', 'therapist', 'veterinarian', 'obstetrician', 'pediatrician', 'pharmacist', 'neurologist', 'gynecologist'])\n" - ] - } - ], - "source": [ - "print(f'Keys in vectors dictionary: {vectors.keys()}')\n", - "print(f'Keys in variances dictionary: {variances.keys()}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ck6zkYFlIKp7" - }, - "source": [ - "## Cosine similarity point estimate\n", - "We now compute the point estimate for the cosine similarity between `doctor` and `surgeon` using the GloVe-trained vectors." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "djj_heViIDIf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cosine similarity between \"doctor\" and \"surgeon\": 0.443281888961792\n" - ] - } - ], - "source": [ - "cs_pe = np.dot(vectors['doctor'], vectors['surgeon'])\n", - "cs_pe /= (np.linalg.norm(vectors['doctor']) * np.linalg.norm(vectors['surgeon']))\n", - "print(f'Cosine similarity between \"doctor\" and \"surgeon\": {cs_pe}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JhtFLFfaINFG" - }, - "source": [ - "## Delta Method approach" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aGcl9d-NQnVl" - }, - "source": [ - "We'll start by building a dictionary of derivatives for each word. We use the `cosine_derivative` function in `glove_v.propagate`, which implements the following computation for the derivative of the cosine similarity with respect to one of the vectors:\n", - "\n", - "$$d_j := \\frac{\\partial \\cos(w_j, w_k)}{\\partial w_j} = \\frac{w_k}{\\|w_k\\| \\|w_j\\|} - \\cos(w_j, w_k) \\cdot\\frac{w_j}{\\|w_j\\|^2} $$" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "3GiUjvcYP9j5" - }, - "outputs": [], - "source": [ - "deriv_dict = {}\n", - "for w in ['doctor', 'surgeon']:\n", - " w_vec = vectors[w]\n", - " other_w = 'doctor' if w == 'surgeon' else 'surgeon'\n", - " c_vec = vectors[other_w]\n", - " w_der = glove_v.propagate.cosine_derivative(u=w_vec, v=c_vec)\n", - " deriv_dict[w] = w_der.reshape(1, -1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tKedczE4RZPa" - }, - "source": [ - "Next, we compute the variance of the cosine similarity, $\\text{var}(\\phi(W))$, as follows, using the `delta_method_variance` function in `glove_v.propagate`:\n", - "\n", - "$$ \\text{var}(\\phi(W)) = \\sum_{i \\in \\{j, k\\}} d_i^T \\Sigma_i d_i$$" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "T5W-mgFlRD5g" - }, - "outputs": [], - "source": [ - "cs_variance = glove_v.propagate.delta_method_variance(\n", - " deriv_dict=deriv_dict,\n", - " variance_dict=variances,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "cFfFUZxkIR_R" - }, - "outputs": [], - "source": [ - "DM_dict = {\n", - " 'Method': ['Delta Method'],\n", - " 'Mean': [cs_pe], 'Standard Deviation': [np.sqrt(cs_variance)]\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tQZx9Jq2SH6y" - }, - "source": [ - "The **Delta Method** gives us a standard deviation of 0.010045 for the cosine similarity." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WEIxOYzMIR8k", - "outputId": "54b1b40e-1be7-451a-811e-874bc1e20acf" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'Method': ['Delta Method'], 'Mean': [0.4432819], 'Standard Deviation': [0.010045475617181882]}\n" - ] - } - ], - "source": [ - "print(DM_dict)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wfs2oLdwIU1o" - }, - "source": [ - "## Method of Composition approach\n", - "\n", - "In this approach, we obtain $K = 100,000$ samples of the cosine similarity of these two words, using random draws from the Normal distributions of each word. We then compute an estimate for the cosine similarity and its standard deviation by looking at the mean and standard deviation over the computed samples." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "dtAO-Y21IR5m" - }, - "outputs": [], - "source": [ - "K = 100_000\n", - "\n", - "sample_matrix_doctor = glove_v.propagate.sample_vector(\n", - " variance=variances['doctor'],\n", - " vector=vectors['doctor'],\n", - " n=K,\n", - ")\n", - "\n", - "sample_matrix_surgeon = glove_v.propagate.sample_vector(\n", - " variance=variances['surgeon'],\n", - " vector=vectors['surgeon'],\n", - " n=K,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ltWQu4iiWsle" - }, - "source": [ - "We now compute ($Y^{(1)}, ..., Y^{(K)}$), the i.i.d. samples of the cosine similarity." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "oZmg0umYIZCB" - }, - "outputs": [], - "source": [ - "moc_cs = np.sum(sample_matrix_doctor * sample_matrix_surgeon, axis=1)\n", - "moc_cs = moc_cs / (np.linalg.norm(sample_matrix_doctor, axis=1) * np.linalg.norm(sample_matrix_surgeon, axis=1))" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "EPkOSksHIctf" - }, - "outputs": [], - "source": [ - "MOC_dict = {\n", - " 'Method': ['Method of Composition'],\n", - " 'Mean': [np.mean(moc_cs)],\n", - " 'Standard Deviation': [np.sqrt(np.var(moc_cs))]\n", - "}" - ] - }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "tlg83ca6HWp6" + }, + "source": [ + "\n", + "# Tutorial: Using the Delta Method and the Method of Composition for uncertainty propagation\n", + "\n", + "In this tutorial, we illustrate how to use the **Delta Method** and the **Method of Composition** approaches to propagate uncertainty to downstream tasks using GloVe-V, our word-level variance estimates for GloVe. As an example, we compute uncertainty intervals for the cosine similarity of the words `doctor` and `surgeon` using both approaches. This computation was performed using the Method of Composition in Figure 5 of our paper." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L1AdgsDEHYcE" + }, + "source": [ + "\n", + "\n", + "## Background\n", + "\n", + "Our GloVe-V framework computes the following Normal distribution for word $i$:\n", + "\n", + "$$ w_i \\sim N(\\mu_i, \\Sigma_i),$$\n", + "\n", + "where $\\mu_i$ is the $d$-dimensional GloVe-trained word embedding for word $i$ and $\\Sigma_i$ is the $d \\times d$ GloVe-V covariance matrix, as given by Equation 6 in the paper." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Delta Method**\n", + "\n", + "The Delta Method states that if $\\sqrt{n}(W - \\hat{W})$ converges to $N(0, \\Sigma)$, then\n", + "\n", + "$$ \\sqrt{n}(\\phi(W) - \\phi(\\hat{W})) \\rightarrow N(0, \\phi^{\\prime}(W)^T\\Sigma\\phi^{\\prime}(W)) ,$$ \n", + "\n", + "where $\\phi(\\cdot)$ is a differentiable function of $W$ and $\\phi^{\\prime}(\\cdot)$ is its gradient with respect to $W$. \n", + "\n", + "In our example, $\\phi(\\cdot)$ is the cosine similarity of the point estimates of the words $j = $ `doctor` and $k=$ `surgeon`:\n", + "\n", + "$$\\phi(w_j, w_k) = \\frac{w_j^T w_k}{\\|w_j\\| \\|w_k\\|} $$\n", + "\n", + "We now compute $\\frac{\\partial \\cos(w_j, w_k)}{\\partial w_j}$, the derivative of the cosine similarity with respect to one of the vectors, which is symmetrical for $w_j$ and $w_k$.\n", + "\n", + "$$d_j := \\frac{\\partial \\cos(w_j, w_k)}{\\partial w_j} = \\frac{w_k}{\\|w_k\\| \\|w_j\\|} - \\cos(w_j, w_k) \\cdot\\frac{w_j}{\\|\n", + " w_j\\|^2} $$\n", + "\n", + "Then, the variance of $\\phi(W)$ is given by:\n", + "$$ \\text{var}(\\phi(W)) = \\phi^{\\prime}(W)^T\\Sigma\\phi^{\\prime}(W) = \\sum_{i \\in \\{j, k\\}} d_i^T \\Sigma_i d_i$$\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Method of Composition (Tanner, 1996)**\n", + "\n", + "The Method of Composition propagates the uncertainty from a set of input variables to an output variable $Y$, generating independent and identically distributed samples of the output variable. In our example, $Y = \\cos(w_j, w_k)$.\n", + "\n", + "Let $K$ be the number of iterations. In the $k$th iteration, we draw one sample from each of the input variables $x_j \\sim N(\\mu_j, \\Sigma_j)$ and $x_k \\sim N(\\mu_k, \\Sigma_k)$, and compute $Y^{(k)} = \\cos(x_j, x_k)$. Then, ($Y^{(1)}, ..., Y^{(K)}$) are i.i.d. from the marginal distribution of $Y$, and we can compute an estimate of the mean and variance of $Y$ as follows:\n", + "\n", + "$$\\hat{Y} = \\frac{1}{K} \\sum_k Y^{(k)}$$\n", + "\n", + "$$ \\text{var}(\\hat{Y}) = \\frac{1}{K-1} \\sum_k (Y^{(k)} - \\hat{Y})^2 $$" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "dYjIJ7paHUOS" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "Oba2H1V_Ic8b" - }, - "source": [ - "## Comparison: Delta Method vs. Method of Composition\n", - "\n", - "We can now compare the results from the **Delta Method** and the **Method of Composition**. We see that both approaches give very similar results, with the **Delta Method** centered around the cosine similarity of the point estimates of the words and the **Method of Composition** centered around the mean of the cosine similarity samples." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/avaimar/Documents/Projects/Legal-NLP/glove-v/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "# Set up environment\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import glove_v" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A1LTwygwHlYD" + }, + "source": [ + "### Download COHA (1900-1999) vectors and pre-computed variances\n", + "\n", + "We start by downloading the pre-computed variances for the COHA (1900-1999) corpus. In this example, we download only a small subset which includes the vectors and variances for the words `doctor` and `surgeon`, which we make available in the `Toy-Embeddings` folder. \n", + "\n", + "To obtain the vectors and variances for the full vocabulary of the 1900-1999 COHA corpus, you can use `COHA_1900-1999_300d` as the `embedding_name` argument in the `download_embeddings` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "glove_v.data.download_embeddings(\n", + " embedding_name=\"Toy-Embeddings\",\n", + " approximation=False,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0LVvMY84N84_" + }, + "source": [ + "### Load the vocabulary, vectors and variances" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "EGyrmAKIIDDO" + }, + "outputs": [], + "source": [ + "# Vocabulary and inverse vocabulary\n", + "vocab, ivocab = glove_v.vector.load_vocab(\n", + " embedding_name=\"Toy-Embeddings\",\n", + ")\n", + "# Vectors and variances\n", + "vectors = glove_v.vector.load_vectors(\n", + " embedding_name=\"Toy-Embeddings\", format=\"dictionary\"\n", + ")\n", + "variances = {}\n", + "for word in list(vocab.keys()):\n", + " variances[word] = glove_v.variance.load_variance(\n", + " embedding_name=\"Toy-Embeddings\",\n", + " approximation=False,\n", + " word_idx=vocab[word],\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZeZV6aU3PHwn" + }, + "source": [ + "We can see that the dictionaries containing the vectors and pre-computed variances include the keys `doctor` and `surgeon`, as well as other occupations used in the generation of Figure 5 in the paper." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "jTmmTTvYPGmv", + "outputId": "ca822af6-39de-4e8d-c221-01015d41e72b" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "id": "FtpYXoaIIgPt" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Method Mean Standard Deviation\n", - "0 Delta Method 0.443282 0.010045\n", - "0 Method of Composition 0.431803 0.009874\n" - ] - } - ], - "source": [ - "df = pd.DataFrame.from_dict(DM_dict)\n", - "df = pd.concat([df, pd.DataFrame.from_dict(MOC_dict)])\n", - "\n", - "print(df)" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "Keys in vectors dictionary: dict_keys(['doctor', 'surgeon', 'dentist', 'psychiatrist', 'therapist', 'veterinarian', 'obstetrician', 'pediatrician', 'pharmacist', 'neurologist', 'gynecologist'])\n", + "Keys in variances dictionary: dict_keys(['doctor', 'surgeon', 'dentist', 'psychiatrist', 'therapist', 'veterinarian', 'obstetrician', 'pediatrician', 'pharmacist', 'neurologist', 'gynecologist'])\n" + ] + } + ], + "source": [ + "print(f\"Keys in vectors dictionary: {vectors.keys()}\")\n", + "print(f\"Keys in variances dictionary: {variances.keys()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ck6zkYFlIKp7" + }, + "source": [ + "## Cosine similarity point estimate\n", + "We now compute the point estimate for the cosine similarity between `doctor` and `surgeon` using the GloVe-trained vectors." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "djj_heViIDIf" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "-_CXHh7aIhvJ" - }, - "source": [ - "## References\n", - "M. A. Tanner, *Tools for Statistical Inference: Methods for the Exploration of Posterior Distributions and Likelihood Functions*, Springer Series in Statistics (Springer New York, 1996)." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Cosine similarity between \"doctor\" and \"surgeon\": 0.443281888961792\n" + ] } - ], - "metadata": { + ], + "source": [ + "cs_pe = np.dot(vectors[\"doctor\"], vectors[\"surgeon\"])\n", + "cs_pe /= np.linalg.norm(vectors[\"doctor\"]) * np.linalg.norm(vectors[\"surgeon\"])\n", + "print(f'Cosine similarity between \"doctor\" and \"surgeon\": {cs_pe}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JhtFLFfaINFG" + }, + "source": [ + "## Delta Method approach" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aGcl9d-NQnVl" + }, + "source": [ + "We'll start by building a dictionary of derivatives for each word. We use the `cosine_derivative` function in `glove_v.propagate`, which implements the following computation for the derivative of the cosine similarity with respect to one of the vectors:\n", + "\n", + "$$d_j := \\frac{\\partial \\cos(w_j, w_k)}{\\partial w_j} = \\frac{w_k}{\\|w_k\\| \\|w_j\\|} - \\cos(w_j, w_k) \\cdot\\frac{w_j}{\\|w_j\\|^2} $$" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "3GiUjvcYP9j5" + }, + "outputs": [], + "source": [ + "deriv_dict = {}\n", + "for w in [\"doctor\", \"surgeon\"]:\n", + " w_vec = vectors[w]\n", + " other_w = \"doctor\" if w == \"surgeon\" else \"surgeon\"\n", + " c_vec = vectors[other_w]\n", + " w_der = glove_v.propagate.cosine_derivative(u=w_vec, v=c_vec)\n", + " deriv_dict[w] = w_der.reshape(1, -1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tKedczE4RZPa" + }, + "source": [ + "Next, we compute the variance of the cosine similarity, $\\text{var}(\\phi(W))$, as follows, using the `delta_method_variance` function in `glove_v.propagate`:\n", + "\n", + "$$ \\text{var}(\\phi(W)) = \\sum_{i \\in \\{j, k\\}} d_i^T \\Sigma_i d_i$$" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "T5W-mgFlRD5g" + }, + "outputs": [], + "source": [ + "cs_variance = glove_v.propagate.delta_method_variance(\n", + " deriv_dict=deriv_dict,\n", + " variance_dict=variances,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "cFfFUZxkIR_R" + }, + "outputs": [], + "source": [ + "DM_dict = {\n", + " \"Method\": [\"Delta Method\"],\n", + " \"Mean\": [cs_pe],\n", + " \"Standard Deviation\": [np.sqrt(cs_variance)],\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tQZx9Jq2SH6y" + }, + "source": [ + "The **Delta Method** gives us a standard deviation of 0.010045 for the cosine similarity." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.10" + "id": "WEIxOYzMIR8k", + "outputId": "54b1b40e-1be7-451a-811e-874bc1e20acf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Method': ['Delta Method'], 'Mean': [0.4432819], 'Standard Deviation': [0.010045475617181882]}\n" + ] } + ], + "source": [ + "print(DM_dict)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wfs2oLdwIU1o" + }, + "source": [ + "## Method of Composition approach\n", + "\n", + "In this approach, we obtain $K = 100,000$ samples of the cosine similarity of these two words, using random draws from the Normal distributions of each word. We then compute an estimate for the cosine similarity and its standard deviation by looking at the mean and standard deviation over the computed samples." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "dtAO-Y21IR5m" + }, + "outputs": [], + "source": [ + "K = 100_000\n", + "\n", + "sample_matrix_doctor = glove_v.propagate.sample_vector(\n", + " variance=variances[\"doctor\"],\n", + " vector=vectors[\"doctor\"],\n", + " n=K,\n", + ")\n", + "\n", + "sample_matrix_surgeon = glove_v.propagate.sample_vector(\n", + " variance=variances[\"surgeon\"],\n", + " vector=vectors[\"surgeon\"],\n", + " n=K,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ltWQu4iiWsle" + }, + "source": [ + "We now compute ($Y^{(1)}, ..., Y^{(K)}$), the i.i.d. samples of the cosine similarity." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "oZmg0umYIZCB" + }, + "outputs": [], + "source": [ + "moc_cs = np.sum(sample_matrix_doctor * sample_matrix_surgeon, axis=1)\n", + "moc_cs = moc_cs / (\n", + " np.linalg.norm(sample_matrix_doctor, axis=1)\n", + " * np.linalg.norm(sample_matrix_surgeon, axis=1)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "EPkOSksHIctf" + }, + "outputs": [], + "source": [ + "MOC_dict = {\n", + " \"Method\": [\"Method of Composition\"],\n", + " \"Mean\": [np.mean(moc_cs)],\n", + " \"Standard Deviation\": [np.sqrt(np.var(moc_cs))],\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Oba2H1V_Ic8b" + }, + "source": [ + "## Comparison: Delta Method vs. Method of Composition\n", + "\n", + "We can now compare the results from the **Delta Method** and the **Method of Composition**. We see that both approaches give very similar results, with the **Delta Method** centered around the cosine similarity of the point estimates of the words and the **Method of Composition** centered around the mean of the cosine similarity samples." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "FtpYXoaIIgPt" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Method Mean Standard Deviation\n", + "0 Delta Method 0.443282 0.010045\n", + "0 Method of Composition 0.431803 0.009874\n" + ] + } + ], + "source": [ + "df = pd.DataFrame.from_dict(DM_dict)\n", + "df = pd.concat([df, pd.DataFrame.from_dict(MOC_dict)])\n", + "\n", + "print(df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-_CXHh7aIhvJ" + }, + "source": [ + "## References\n", + "M. A. Tanner, *Tools for Statistical Inference: Methods for the Exploration of Posterior Distributions and Likelihood Functions*, Springer Series in Statistics (Springer New York, 1996)." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/glove_v/docs/tutorial.ipynb b/glove_v/docs/tutorial.ipynb index a8b528f..e04fbf5 100644 --- a/glove_v/docs/tutorial.ipynb +++ b/glove_v/docs/tutorial.ipynb @@ -25,13 +25,15 @@ ], "source": [ "# Set up environment\n", - "import glove_v\n", - "import numpy as np\n", + "from pathlib import Path\n", + "\n", "import matplotlib.pyplot as plt\n", - "from IPython.display import Image\n", - "import os\n", + "import numpy as np\n", "import pandas as pd\n", - "import seaborn as sns" + "import seaborn as sns\n", + "from IPython.display import Image\n", + "\n", + "import glove_v" ] }, { @@ -74,7 +76,7 @@ ], "source": [ "glove_v.data.download_embeddings(\n", - " embedding_name='Toy-Embeddings',\n", + " embedding_name=\"Toy-Embeddings\",\n", " approximation=True,\n", ")" ] @@ -96,7 +98,7 @@ ], "source": [ "glove_v.data.download_embeddings(\n", - " embedding_name='Toy-Embeddings',\n", + " embedding_name=\"Toy-Embeddings\",\n", " approximation=False,\n", ")" ] @@ -112,24 +114,23 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "vocab, ivocab = glove_v.vector.load_vocab(\n", - " embedding_name='Toy-Embeddings',\n", + " embedding_name=\"Toy-Embeddings\",\n", ")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "vectors = glove_v.vector.load_vectors(\n", - " embedding_name='Toy-Embeddings',\n", - " format='dictionary'\n", + " embedding_name=\"Toy-Embeddings\", format=\"dictionary\"\n", ")" ] }, @@ -142,17 +143,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "approx_variances = {}\n", "for word in list(vocab.keys()):\n", " approx_variances[word] = glove_v.variance.load_variance(\n", - " embedding_name='Toy-Embeddings',\n", + " embedding_name=\"Toy-Embeddings\",\n", " approximation=True,\n", " word_idx=vocab[word],\n", - " )\n" + " )" ] }, { @@ -164,7 +165,7 @@ "variances = {}\n", "for word in list(vocab.keys()):\n", " variances[word] = glove_v.variance.load_variance(\n", - " embedding_name='Toy-Embeddings',\n", + " embedding_name=\"Toy-Embeddings\",\n", " approximation=False,\n", " word_idx=vocab[word],\n", " )" @@ -203,17 +204,22 @@ ], "source": [ "print('\\nComplete Variance for the word \"doctor\":')\n", - "print(variances['doctor'][:3, :3])\n", + "print(variances[\"doctor\"][:3, :3])\n", "\n", "print('\\nApproximation Variance for the word \"doctor\":')\n", - "print(approx_variances['doctor'][:3, :3])\n", + "print(approx_variances[\"doctor\"][:3, :3])\n", + "\n", "\n", + "def frob_norm_prop(a, b):\n", + " return np.linalg.norm(a, \"fro\") / np.linalg.norm(b, \"fro\")\n", "\n", - "def frob_norm_prop(A, B):\n", - " return np.linalg.norm(A, 'fro') / np.linalg.norm(B, 'fro')\n", "\n", - "prop_reconstruction = frob_norm_prop(approx_variances['doctor'], variances['doctor']) * 100\n", - "print(f'\\nApproximation variance recovers {prop_reconstruction:.2f}% of the complete variance.')" + "prop_reconstruction = (\n", + " frob_norm_prop(approx_variances[\"doctor\"], variances[\"doctor\"]) * 100\n", + ")\n", + "print(\n", + " f\"\\nApproximation variance recovers {prop_reconstruction:.2f}% of the complete variance.\"\n", + ")" ] }, { @@ -247,8 +253,8 @@ } ], "source": [ - "file_path = os.path.join(glove_v.utils.file.get_data_path(), '..', 'figures', 'figure5.jpg')\n", - "Image(file_path, width=300, alt='Figure 5 from the GloVe-V paper')" + "file_path = Path(glove_v.utils.file.get_data_path()) / \"..\" / \"figures\" / \"figure5.jpg\"\n", + "Image(file_path, width=300, alt=\"Figure 5 from the GloVe-V paper\")" ] }, { @@ -264,21 +270,25 @@ "metadata": {}, "outputs": [], "source": [ - "occupations = [w for w in vocab.keys() if w != 'doctor']\n", + "occupations = [w for w in vocab.keys() if w != \"doctor\"]\n", "\n", "results = pd.DataFrame()\n", - "for variance_dict, variance_name in zip([approx_variances, variances], ['Approximation', 'Complete'], strict=True):\n", + "for variance_dict, variance_name in zip(\n", + " [approx_variances, variances], [\"Approximation\", \"Complete\"], strict=True\n", + "):\n", " for occupation in occupations:\n", " # Point estimate\n", - " w_vec = vectors['doctor']\n", + " w_vec = vectors[\"doctor\"]\n", " c_vec = vectors[occupation]\n", - " cs_estimate = np.matmul(w_vec, c_vec.T) / (np.linalg.norm(w_vec) * np.linalg.norm(c_vec))\n", + " cs_estimate = np.matmul(w_vec, c_vec.T) / (\n", + " np.linalg.norm(w_vec) * np.linalg.norm(c_vec)\n", + " )\n", "\n", " # Dictionary of derivatives\n", " deriv_dict = {}\n", - " for w in ['doctor', occupation]:\n", + " for w in [\"doctor\", occupation]:\n", " w_vec = vectors[w]\n", - " other_w = 'doctor' if w == occupation else occupation\n", + " other_w = \"doctor\" if w == occupation else occupation\n", " c_vec = vectors[other_w]\n", " w_der = glove_v.propagate.cosine_derivative(u=w_vec, v=c_vec)\n", " deriv_dict[w] = w_der.reshape(1, -1)\n", @@ -291,19 +301,19 @@ "\n", " # Compute 95% confidence intervals for a Normal distribution\n", " cs_lower, cs_upper = glove_v.propagate.compute_normal_confint(\n", - " point_estimate=cs_estimate,\n", - " variance=cs_variance,\n", - " alpha=0.05\n", + " point_estimate=cs_estimate, variance=cs_variance, alpha=0.05\n", " )\n", "\n", - " w_results = pd.DataFrame.from_dict({\n", - " 'Occupation': [occupation],\n", - " 'Variance Type': [variance_name],\n", - " 'Estimate': [cs_estimate],\n", - " 'Variance': [cs_variance],\n", - " 'Lower': [cs_lower],\n", - " 'Upper': [cs_upper],\n", - " })\n", + " w_results = pd.DataFrame.from_dict(\n", + " {\n", + " \"Occupation\": [occupation],\n", + " \"Variance Type\": [variance_name],\n", + " \"Estimate\": [cs_estimate],\n", + " \"Variance\": [cs_variance],\n", + " \"Lower\": [cs_lower],\n", + " \"Upper\": [cs_upper],\n", + " }\n", + " )\n", " results = pd.concat([results, w_results], ignore_index=True)" ] }, @@ -333,22 +343,31 @@ "source": [ "fig, axs = plt.subplots(figsize=(8, 2.8), nrows=1, ncols=2)\n", "\n", - "for ax, variance_type in zip(axs, ['Approximation', 'Complete'], strict=True):\n", - " ax.set_title(f'Variance: {variance_type}')\n", - " sub_results = results[results['Variance Type'] == variance_type].copy()\n", + "for ax, variance_type in zip(axs, [\"Approximation\", \"Complete\"], strict=True):\n", + " ax.set_title(f\"Variance: {variance_type}\")\n", + " sub_results = results[results[\"Variance Type\"] == variance_type].copy()\n", " ax.errorbar(\n", - " ls='none',\n", - " y=sub_results['Occupation'], x=sub_results['Estimate'],\n", + " ls=\"none\",\n", + " y=sub_results[\"Occupation\"],\n", + " x=sub_results[\"Estimate\"],\n", " xerr=(\n", - " np.abs(sub_results['Lower'] - sub_results['Estimate']),\n", - " np.abs(sub_results['Upper'] - sub_results['Estimate'])), zorder=1, linewidth=2)\n", + " np.abs(sub_results[\"Lower\"] - sub_results[\"Estimate\"]),\n", + " np.abs(sub_results[\"Upper\"] - sub_results[\"Estimate\"]),\n", + " ),\n", + " zorder=1,\n", + " linewidth=2,\n", + " )\n", " sns.scatterplot(\n", " ax=ax,\n", - " data=sub_results, y='Occupation', x='Estimate', \n", - " zorder=5, size=1, legend=False\n", - " )\n", + " data=sub_results,\n", + " y=\"Occupation\",\n", + " x=\"Estimate\",\n", + " zorder=5,\n", + " size=1,\n", + " legend=False,\n", + " )\n", "\n", - " ax.set_ylabel('Occupation', fontsize=8)\n", + " ax.set_ylabel(\"Occupation\", fontsize=8)\n", " ax.set_xlabel('Cosime similarity with \"doctor\"', fontsize=8)\n", "\n", " ax.set_xlim((0.05, 0.6))\n", @@ -361,13 +380,119 @@ "source": [ "## 4. GloVe-V integration with Gensim\n", "\n", - "The Gensim library is a popular Python library for working with word embeddings. We provide a wrapper for the Gensim API that allows you to easily download and use GloVe-V embeddings and variances with Gensim." + "The Gensim library is a popular Python library for working with word embeddings. We provide a wrapper for the Gensim API that allows you to easily download and use GloVe-V embeddings and variances with Gensim. In this way, you can use gensim functionality (e.g. `most_similar`) and compute uncertainty estimates on these downstream tasks." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can download the vectors directly in Gensim format:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "gensim_vectors = glove_v.vector.load_vectors(\n", + " embedding_name=\"Toy-Embeddings\", format=\"gensim\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('surgeon', 0.443281888961792),\n", + " ('dentist', 0.4145265519618988),\n", + " ('psychiatrist', 0.4063537120819092),\n", + " ('therapist', 0.22661727666854858),\n", + " ('veterinarian', 0.22298000752925873),\n", + " ('obstetrician', 0.19687847793102264),\n", + " ('pediatrician', 0.18491394817829132),\n", + " ('pharmacist', 0.17255671322345734),\n", + " ('neurologist', 0.1698257327079773),\n", + " ('gynecologist', 0.16823916137218475)]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gensim_vectors.most_similar(\"doctor\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, if we are using the associated variances to compute uncertainty estimates, we can load them into a `GloVeVKeyedVectors` object:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "gensim_glovev_kv = glove_v.GloVeVKeyedVectors(\n", + " embedding_name=\"Toy-Embeddings\",\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, - "source": [] + "source": [ + "And compute uncertainty on downstream tasks performed with Gensim as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading variances: 100%|██████████| 11/11 [00:00<00:00, 342.52it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mean similarity: 0.4312233890766127\n", + "95% CI: [0.41453011 0.45168073]\n" + ] + } + ], + "source": [ + "K = 100\n", + "sampled_vectors = gensim_glovev_kv.sample_vectors(\n", + " approximation=False,\n", + " verbose=True,\n", + " store=True,\n", + " sample_size=K,\n", + ")\n", + "similarities = []\n", + "for sampled_vector in sampled_vectors:\n", + " # Use Gensim's similarity function\n", + " similarities.append(sampled_vector.similarity(\"doctor\", \"surgeon\"))\n", + "\n", + "\n", + "print(f\"Mean similarity: {np.mean(similarities)}\")\n", + "print(f\"95% CI: {np.percentile(similarities, [2.5, 97.5])}\")" + ] } ], "metadata": { diff --git a/glove_v/gensim_integration.py b/glove_v/gensim_integration.py index b1202f7..bd451ed 100644 --- a/glove_v/gensim_integration.py +++ b/glove_v/gensim_integration.py @@ -1,18 +1,86 @@ import numpy as np from gensim.models import KeyedVectors +from tqdm import tqdm -from .data import get_variances +import glove_v.utils.file as file_utils +from .propagate import sample_vector +from .variance import load_variance +from .vector import load_vectors, load_vocab -class GloVeVEmbeddings(KeyedVectors): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.variances = get_variances() # Load your variances here - def get_vector_with_variance(self, word): - return self[word], self.variances.get(word, np.zeros((300, 300))) +class GloVeVKeyedVectors: + """ + A class for working with GloVe-V embeddings using Gensim. + """ + def __init__( + self, + embedding_name: str, + download_dir: str = f"{file_utils.get_data_path()}/glove-v", + ): + vocab, _ = load_vocab(embedding_name=embedding_name) -def load_variance_embeddings(path): - # Load and return GloVeVEmbeddings instance - pass + self.embedding_name = embedding_name + self.V = len(vocab) + self.vocab = vocab + self.vectors = load_vectors(embedding_name, download_dir, "gensim") + self.d = self.vectors.vector_size + + def sample_vectors( + self, + approximation: bool, + sample_size: int = 1, + verbose: bool = False, + store: bool = False, + ): + """ + Sample vectors from the GloVe-V embedding model. If store is True, the variances are stored in memory to avoid reloading + at every single sample. + + Args: + approximation (bool): Whether to use the approximate variance or the exact variance. + sample_size (int): The number of samples to draw. + verbose (bool): Whether to print progress. + store (bool): Whether to store the variances in memory. + """ + + variances = None + if store: + variances = {} + for word_idx in tqdm( + self.vocab.values(), desc="Loading variances", disable=not verbose + ): + word_var = load_variance( + embedding_name=self.embedding_name, + approximation=approximation, + word_idx=word_idx, + ) + variances[word_idx] = word_var + + sampled_matrix = np.zeros((sample_size, self.V, self.d)) + for word, word_idx in self.vocab.items(): + if store: + word_var = variances[word_idx] + else: + word_var = load_variance( + embedding_name=self.embedding_name, + approximation=approximation, + word_idx=word_idx, + ) + sample_matrix_word = sample_vector( + variance=word_var, + vector=self.vectors[word], + n=sample_size, + ) + sampled_matrix[:, word_idx, :] = sample_matrix_word + + sampled_vectors = [] + for k in range(sample_size): + kv = KeyedVectors(self.d) + kv.vectors = sampled_matrix[k] + kv.index_to_key = list(self.vocab.keys()) + kv.key_to_index = self.vocab + sampled_vectors.append(kv) + + return sampled_vectors diff --git a/glove_v/propagate.py b/glove_v/propagate.py index 535a9cb..8d382f4 100644 --- a/glove_v/propagate.py +++ b/glove_v/propagate.py @@ -1,11 +1,12 @@ -import numpy as np import statistics +import numpy as np + def sample_vector( - variance: np.array, - vector: np.array, - n: int, + variance: np.array, + vector: np.array, + n: int, ) -> np.array: """ Returns a matrix of n samples for a word from the Normal distribution given by @@ -21,8 +22,8 @@ def sample_vector( def delta_method_variance( - deriv_dict: dict[str, np.array], - variance_dict: dict[str, np.array], + deriv_dict: dict[str, np.array], + variance_dict: dict[str, np.array], ) -> float: """ Computes the variance of of a test statistic using the Delta Method, given a dictionary of word-level derivatives and a dictionary of word-level variances. @@ -44,6 +45,7 @@ def cosine_derivative(u: np.array, v: np.array) -> np.array: u: Vector u v: Vector v """ + def cossim(u, v): u_re = u.reshape(-1, 1) v_re = v.reshape(-1, 1) @@ -56,10 +58,12 @@ def cossim(u, v): u_norm = np.linalg.norm(u_re) v_norm = np.linalg.norm(v_re) - return v_re / (u_norm * v_norm) - cossim(u, v) * u_re / (u_norm ** 2) + return v_re / (u_norm * v_norm) - cossim(u, v) * u_re / (u_norm**2) -def compute_normal_confint(point_estimate: float, variance: float, alpha: float = 0.05) -> tuple[float, float]: +def compute_normal_confint( + point_estimate: float, variance: float, alpha: float = 0.05 +) -> tuple[float, float]: """ Computes the 100(1-alpha)% two-sided confidence intervals for a Normal distribution. diff --git a/glove_v/variance.py b/glove_v/variance.py index 47ab024..cc889da 100644 --- a/glove_v/variance.py +++ b/glove_v/variance.py @@ -1,63 +1,69 @@ -from safetensors import safe_open +from pathlib import Path + import numpy as np -import os +from safetensors import safe_open from glove_v.utils import file as file_utils def load_variance( - embedding_name: str, - word_idx: int, - approximation: bool = True, - download_dir: str = f'{file_utils.get_data_path()}/glove-v', + embedding_name: str, + word_idx: int, + approximation: bool = True, + download_dir: str = f"{file_utils.get_data_path()}/glove-v", ) -> np.ndarray: """ Reconstruct the approximated variance matrix for word at index i from safetensor file. - + Args: embedding_name: Name of the embedding to load word_idx: Index of the word in the vocabulary download_dir: Path to the directory where the embedding is saved - + Returns: np.ndarray: Reconstructed GloVe-V variance matrix """ if approximation: - approximation_path = f"{download_dir}/{embedding_name}/ApproximationVariances.safetensors" - if not os.path.exists(approximation_path): - raise FileNotFoundError(file_utils.file_loading_error_message( - 'ApproximationVariances.safetensors', - download_dir, - embedding_name - )) + approximation_path = ( + Path(download_dir) / embedding_name / "ApproximationVariances.safetensors" + ) + if not approximation_path.exists(): + raise FileNotFoundError( + file_utils.file_loading_error_message( + "ApproximationVariances.safetensors", download_dir, embedding_name + ) + ) with safe_open(approximation_path, framework="numpy") as f: # Check if this is a diagonal approximation if f"diag_{word_idx}" in f.keys(): diagonal = f.get_tensor(f"diag_{word_idx}") return np.diag(diagonal) - + # Otherwise, it must be an SVD approximation elif f"U_{word_idx}" in f.keys(): U = f.get_tensor(f"U_{word_idx}") s = f.get_tensor(f"s_{word_idx}") Vt = f.get_tensor(f"Vt_{word_idx}") - + # Reconstruct using SVD components: U * diag(s) * Vt return U @ np.diag(s) @ Vt - + else: - raise KeyError(f"[ERROR No approximation found for word index {word_idx}") + raise KeyError( + f"[ERROR No approximation found for word index {word_idx}" + ) else: - complete_path = f"{download_dir}/{embedding_name}/CompleteVariances.safetensors" - if not os.path.exists(complete_path): - raise FileNotFoundError(file_utils.file_loading_error_message( - 'CompleteVariances.safetensors', - download_dir, - embedding_name - )) - + complete_path = ( + Path(download_dir) / embedding_name / "CompleteVariances.safetensors" + ) + if not complete_path.exists(): + raise FileNotFoundError( + file_utils.file_loading_error_message( + "CompleteVariances.safetensors", download_dir, embedding_name + ) + ) with safe_open(complete_path, framework="numpy") as f: word_var = f.get_tensor("variances")[word_idx] return word_var diff --git a/glove_v/vector.py b/glove_v/vector.py index 90dd708..dda857e 100644 --- a/glove_v/vector.py +++ b/glove_v/vector.py @@ -1,14 +1,14 @@ -from typing import Union +from pathlib import Path + import numpy as np -from safetensors import safe_open from gensim.models import KeyedVectors +from safetensors import safe_open from glove_v.utils import file as file_utils def load_vocab( - embedding_name: str, - download_dir: str = f'{file_utils.get_data_path()}/glove-v' + embedding_name: str, download_dir: str = f"{file_utils.get_data_path()}/glove-v" ) -> tuple[dict[str, int], dict[int, str]]: """ Loads dictionaries of word-to-index and index-to-word vocabulary conversions. @@ -18,38 +18,47 @@ def load_vocab( download_dir: (str) Path to the directory where the embedding is saved """ try: - with open(f'{download_dir}/{embedding_name}/vocab.txt', 'r') as f: - words = [x.rstrip().split(' ')[0] for x in f.readlines()] + vocab_path = Path(download_dir) / embedding_name / "vocab.txt" + with vocab_path.open() as f: + words = [x.rstrip().split(" ")[0] for x in f.readlines()] except FileNotFoundError as err: - raise file_utils.file_loading_error_message('vocab.txt', download_dir, embedding_name) from err + raise file_utils.file_loading_error_message( + "vocab.txt", download_dir, embedding_name + ) from err vocab = {w: idx for idx, w in enumerate(words)} - ivocab = {idx: w for idx, w in enumerate(words)} + ivocab = dict(enumerate(words)) return vocab, ivocab def load_vectors( embedding_name: str, - download_dir: str = f'{file_utils.get_data_path()}/glove-v', - format: str = 'numpy' -) -> Union[dict[str, np.ndarray], np.ndarray, KeyedVectors]: - - possible_formats = ['numpy', 'gensim', 'dictionary'] - assert format in possible_formats, f"Format should be one of the following: {possible_formats}" + download_dir: str = f"{file_utils.get_data_path()}/glove-v", + format: str = "numpy", +) -> dict[str, np.ndarray] | np.ndarray | KeyedVectors: + possible_formats = ["numpy", "gensim", "dictionary"] + assert ( + format in possible_formats + ), f"Format should be one of the following: {possible_formats}" try: - with safe_open(f'{download_dir}/{embedding_name}/vectors.safetensors', framework="numpy") as f: + with safe_open( + f"{download_dir}/{embedding_name}/vectors.safetensors", framework="numpy" + ) as f: vectors = f.get_tensor("center_vectors") except FileNotFoundError as err: - raise file_utils.file_loading_error_message('vectors.safetensors', download_dir, embedding_name) from err - + raise file_utils.file_loading_error_message( + "vectors.safetensors", download_dir, embedding_name + ) from err + vocab, ivocab = load_vocab(embedding_name, download_dir) - if format == 'numpy': + if format == "numpy": return vectors - elif format == 'dictionary': + elif format == "dictionary": return {ivocab[idx]: vectors[idx] for idx in range(vectors.shape[0])} - elif format == 'gensim': + elif format == "gensim": kv = KeyedVectors(vectors.shape[1]) kv.vectors = vectors kv.index_to_key = list(vocab.keys()) + kv.key_to_index = vocab return kv diff --git a/pyproject.toml b/pyproject.toml index 9e3ab30..64e43b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,9 @@ name = "glove-v" version = "0.1.0" authors = [ { name = "Andrea Vallebueno", email = "avaimar@law.stanford.edu" }, - { name = "Cassandra Handan-Nader", email = "XX@stanford.edu" }, + { name = "Cassandra Handan-Nader", email = "cassandra.handan-nader@nyu.edu" }, + { name = "Christopher D. Manning", email = "manning@cs.stanford.edu" }, + { name = "Daniel E. Ho", email = "deho@stanford.edu" }, ] description = "A Python package for downloading and using GloVe vectors and GloVe-V variances." readme = "README.md" @@ -15,15 +17,15 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "diskcache", - "fire", - "gdown", - "pandas>=2.2.3", - "pdf2image", - "pillow", - "pytesseract", "seaborn>=0.13.2", "tqdm", + "huggingface-hub==0.26.2", + "gensim==4.3.3", + "numpy==1.26.4", + "scipy==1.13.1", + "smart-open==7.0.4", + "wrapt==1.16.0", + "safetensors", ] [tool.uv] @@ -47,5 +49,5 @@ packages = ["glove_v"] line-length = 88 [tool.ruff.lint] -extend-select = ["B", "E", "I", "N", "PTH", "COM", "C4", "UP"] +extend-select = ["B", "E", "I", "PTH", "COM", "C4", "UP"] extend-ignore = ["COM812", "E712", "E501"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5d23d90..0000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -huggingface-hub==0.26.2 -gensim==4.3.3 -numpy==1.26.4 -scipy==1.13.1 -smart-open==7.0.4 -wrapt==1.16.0 -safetensors -pandas -seaborn - diff --git a/setup.py b/setup.py deleted file mode 100644 index d5461ad..0000000 --- a/setup.py +++ /dev/null @@ -1,7 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name="glove_v", - packages=find_packages(), - version="0.1.0", -) \ No newline at end of file diff --git a/uv.lock b/uv.lock index 26fec21..d397657 100644 --- a/uv.lock +++ b/uv.lock @@ -6,18 +6,6 @@ resolution-markers = [ "python_full_version >= '3.12'", ] -[[package]] -name = "beautifulsoup4" -version = "4.12.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "soupsieve" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, -] - [[package]] name = "certifi" version = "2024.8.30" @@ -185,15 +173,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 }, ] -[[package]] -name = "diskcache" -version = "5.6.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550 }, -] - [[package]] name = "distlib" version = "0.3.9" @@ -212,15 +191,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, ] -[[package]] -name = "fire" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "termcolor" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6b/b6/82c7e601d6d3c3278c40b7bd35e17e82aa227f050aa9f66cb7b7fce29471/fire-0.7.0.tar.gz", hash = "sha256:961550f07936eaf65ad1dc8360f2b2bf8408fad46abbfa4d2a3794f8d2a95cdf", size = 87189 } - [[package]] name = "fonttools" version = "4.54.1" @@ -261,18 +231,40 @@ wheels = [ ] [[package]] -name = "gdown" -version = "5.2.0" +name = "fsspec" +version = "2024.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a0/52/f16a068ebadae42526484c31f4398e62962504e5724a8ba5dc3409483df2/fsspec-2024.10.0.tar.gz", hash = "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493", size = 286853 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/b2/454d6e7f0158951d8a78c2e1eb4f69ae81beb8dca5fee9809c6c99e9d0d0/fsspec-2024.10.0-py3-none-any.whl", hash = "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871", size = 179641 }, +] + +[[package]] +name = "gensim" +version = "4.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "beautifulsoup4" }, - { name = "filelock" }, - { name = "requests", extra = ["socks"] }, - { name = "tqdm" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "smart-open" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/09/6a/37e6b70c5bda3161e40265861e63b64a86bfc6ca6a8f1c35328a675c84fd/gdown-5.2.0.tar.gz", hash = "sha256:2145165062d85520a3cd98b356c9ed522c5e7984d408535409fd46f94defc787", size = 284647 } +sdist = { url = "https://files.pythonhosted.org/packages/ec/bc/36ce4d510085cf150f17d79bb5e88cde942aeba2a894aed5893812ea1e6d/gensim-4.3.3.tar.gz", hash = "sha256:84852076a6a3d88d7dac5be245e24c21c3b819b565e14c1b61fa3e5ee76dcf57", size = 23258708 } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/70/e07c381e6488a77094f04c85c9caf1c8008cdc30778f7019bc52e5285ef0/gdown-5.2.0-py3-none-any.whl", hash = "sha256:33083832d82b1101bdd0e9df3edd0fbc0e1c5f14c9d8c38d2a35bf1683b526d6", size = 18235 }, + { url = "https://files.pythonhosted.org/packages/27/12/047dc8b6bed7c4833bcdfbafc10af0f96dc3847ce37be63b14bd6e6c7767/gensim-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4e72840adfbea35c5804fd559bc0cb6bc9f439926220a37d852b7ce76eb325c1", size = 24086876 }, + { url = "https://files.pythonhosted.org/packages/ff/6e/7c6d7dda41924b83c4b1eb096942b68b85ba305df7f0963ad0642ac0d73f/gensim-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4019263c9d9afae7c669f880c17e09461e77a71afce04ed4d79cf71a4cad2848", size = 24041730 }, + { url = "https://files.pythonhosted.org/packages/73/f4/376290613da44ea9d11bdce3a1705ba7cc25f971edb2b460dc192092068c/gensim-4.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dea62d3e2ada547687bde6cbba37efa50b534db77e9d44fd5802676bb072c9d9", size = 26398007 }, + { url = "https://files.pythonhosted.org/packages/de/63/776ee55c773f55fa9d4fc1596f2e5e15de109921a6727dfe29cc4f0baeb7/gensim-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fac93ef5e44982defef9d3c1e4cd00245506b8a29cec19ec5e00f0221b8144c", size = 26506925 }, + { url = "https://files.pythonhosted.org/packages/cd/4a/f07e2f255aedd6bb4bd0ae420a465f228a4a91bc78ac359216ea20557be6/gensim-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:7c3409f755fb8d62da99cea65e7a40a99d21f8fd86443a3aaf2d90eb68995021", size = 24012924 }, + { url = "https://files.pythonhosted.org/packages/7b/f4/f43fd909aa29fd92f0e6d703d90c0e6507a7c6be3d686a025b1e192afa3a/gensim-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:99e7b70352aecc6c1674dde82b75f453e7a5d1cc71ac1cfbc460bf1fe20501b7", size = 24082968 }, + { url = "https://files.pythonhosted.org/packages/2a/15/aca2fc3b9e97bd0e28be4a4302793c43757b04b828223c6d103c72132f19/gensim-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:32a4cac3f3c38af2069eab9524609fc92ebaeb2692b7280cfda365a3517a280a", size = 24036231 }, + { url = "https://files.pythonhosted.org/packages/ef/84/e46049a16fa7daa26ac9e83e41b3bc3b30867da832a5d7cb0779da893255/gensim-4.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c071b4329ed1be02446eb7ef637b94c68cf0080c15c57fbcde667fce2e49c3fe", size = 26558362 }, + { url = "https://files.pythonhosted.org/packages/78/4f/f6045d5d5f8e7838c42572607ce440f95dbf4de5da41ae664198c2839c05/gensim-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d662bf96e3d741b6ab61a54be842a7cbf5e45193008b2f4225c758cafd7f9cdc", size = 26662669 }, + { url = "https://files.pythonhosted.org/packages/f5/57/f2e6568dbf464a4b270954e5fa3dee4a4054d163a41c0e7bf0a34eb40f0f/gensim-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a54bd53a0e6f991abb837f126663353657270e75be53287e8a568ada0b35b1b0", size = 24010102 }, + { url = "https://files.pythonhosted.org/packages/40/f1/3231b3fd6f7424f28d7d673679c843da0c61659538262a234f9f43ed5b10/gensim-4.3.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a65ed1a8c1fc83890b4eb2a45ae2b32e82a0209c970c8c74694d0374c2415cb", size = 24079041 }, + { url = "https://files.pythonhosted.org/packages/1f/76/616bc781bc19ee76b387a101211f73e00cf59368fcc221e77f88ea907d04/gensim-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4db485e08a0287e0fd6a029d89b90913d1df38f1dcd34cd2ab758873ba9255f3", size = 24035496 }, + { url = "https://files.pythonhosted.org/packages/e0/b7/a316ba52548ca405413c23967c1c6c77d00f82cf6b0cb63d268001e023aa/gensim-4.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7198987116373ab99f034b292a04ac841531d12b56345851c98b40a3fcd93a85", size = 26487104 }, + { url = "https://files.pythonhosted.org/packages/1a/07/7a0d5e6cab4da2769c8018f2472690ccb8cab191bf2fe46342dfd627486b/gensim-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6237a50de4da7a037b19b2b6c430b6537243dcdedebf94afeb089e951953e601", size = 26606101 }, + { url = "https://files.pythonhosted.org/packages/79/7b/747fcb06280764cf20353361162eff68c6b0a3be34c43ead5ae393d3b18e/gensim-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:c910c2d5a71f532273166a3a82762959973f0513b221a495fa5a2a07652ee66d", size = 24009244 }, ] [[package]] @@ -280,15 +272,15 @@ name = "glove-v" version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "diskcache" }, - { name = "fire" }, - { name = "gdown" }, - { name = "pandas" }, - { name = "pdf2image" }, - { name = "pillow" }, - { name = "pytesseract" }, + { name = "gensim" }, + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "safetensors" }, + { name = "scipy" }, { name = "seaborn" }, + { name = "smart-open" }, { name = "tqdm" }, + { name = "wrapt" }, ] [package.dev-dependencies] @@ -300,15 +292,15 @@ dev = [ [package.metadata] requires-dist = [ - { name = "diskcache" }, - { name = "fire" }, - { name = "gdown" }, - { name = "pandas", specifier = ">=2.2.3" }, - { name = "pdf2image" }, - { name = "pillow" }, - { name = "pytesseract" }, + { name = "gensim", specifier = "==4.3.3" }, + { name = "huggingface-hub", specifier = "==0.26.2" }, + { name = "numpy", specifier = "==1.26.4" }, + { name = "safetensors" }, + { name = "scipy", specifier = "==1.13.1" }, { name = "seaborn", specifier = ">=0.13.2" }, + { name = "smart-open", specifier = "==7.0.4" }, { name = "tqdm" }, + { name = "wrapt", specifier = "==1.16.0" }, ] [package.metadata.requires-dev] @@ -318,6 +310,24 @@ dev = [ { name = "snakeviz" }, ] +[[package]] +name = "huggingface-hub" +version = "0.26.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/a8/882ae5d1cfa7c9c5be32feee4cee56d9873078913953423e47a756da110d/huggingface_hub-0.26.2.tar.gz", hash = "sha256:b100d853465d965733964d123939ba287da60a547087783ddff8a323f340332b", size = 375621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/bf/cea0b9720c32fa01b0c4ec4b16b9f4ae34ca106b202ebbae9f03ab98cd8f/huggingface_hub-0.26.2-py3-none-any.whl", hash = "sha256:98c2a5a8e786c7b2cb6fdeb2740893cba4d53e312572ed3d8afafda65b128c46", size = 447536 }, +] + [[package]] name = "identify" version = "2.6.1" @@ -473,62 +483,34 @@ wheels = [ [[package]] name = "numpy" -version = "2.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4b/d1/8a730ea07f4a37d94f9172f4ce1d81064b7a64766b460378be278952de75/numpy-2.1.2.tar.gz", hash = "sha256:13532a088217fa624c99b843eeb54640de23b3414b14aa66d023805eb731066c", size = 18878063 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/a2/40a76d357f168e9f9f06d6cc2c8e22dd5fb2bfbe63fe2c433057258c145a/numpy-2.1.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:30d53720b726ec36a7f88dc873f0eec8447fbc93d93a8f079dfac2629598d6ee", size = 21150947 }, - { url = "https://files.pythonhosted.org/packages/b5/d0/ba271ea9108d7278d3889a7eb38d77370a88713fb94339964e71ac184d4a/numpy-2.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e8d3ca0a72dd8846eb6f7dfe8f19088060fcb76931ed592d29128e0219652884", size = 13758184 }, - { url = "https://files.pythonhosted.org/packages/7c/b9/5c6507439cd756201010f7937bf90712c2469052ae094584af14557dd64f/numpy-2.1.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648", size = 5354091 }, - { url = "https://files.pythonhosted.org/packages/60/21/7938cf724d9e84e45fb886f3fc794ab431d71facfebc261e3e9f19f3233a/numpy-2.1.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7c1c60328bd964b53f8b835df69ae8198659e2b9302ff9ebb7de4e5a5994db3d", size = 6887169 }, - { url = "https://files.pythonhosted.org/packages/09/8d/42a124657f5d31902fca73921b25a0d022cead2b32ce7e6975762cd2995a/numpy-2.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6cdb606a7478f9ad91c6283e238544451e3a95f30fb5467fbf715964341a8a86", size = 13888165 }, - { url = "https://files.pythonhosted.org/packages/fb/25/ba023652a39a2c127200e85aed975fc6119b421e2c348e5d0171e2046edb/numpy-2.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d666cb72687559689e9906197e3bec7b736764df6a2e58ee265e360663e9baf7", size = 16326954 }, - { url = "https://files.pythonhosted.org/packages/34/58/23e6b07fad492b7c47cf09cd8bad6983658f0f925b6c535fd008e3e86274/numpy-2.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6eef7a2dbd0abfb0d9eaf78b73017dbfd0b54051102ff4e6a7b2980d5ac1a03", size = 16702916 }, - { url = "https://files.pythonhosted.org/packages/91/24/37b5cf2dc7d385ac97f7b7fe50cba312abb70a2a5eac74c23af028811f73/numpy-2.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:12edb90831ff481f7ef5f6bc6431a9d74dc0e5ff401559a71e5e4611d4f2d466", size = 14384372 }, - { url = "https://files.pythonhosted.org/packages/ea/ec/0f6d471058a01d1a05a50d2793898de1549280fa715a8537987ee866b5d9/numpy-2.1.2-cp310-cp310-win32.whl", hash = "sha256:a65acfdb9c6ebb8368490dbafe83c03c7e277b37e6857f0caeadbbc56e12f4fb", size = 6535361 }, - { url = "https://files.pythonhosted.org/packages/c2/3d/293cc5927f916a7bc6bf74da8f6defab63d1b13f0959d7e21878ad8a20d8/numpy-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:860ec6e63e2c5c2ee5e9121808145c7bf86c96cca9ad396c0bd3e0f2798ccbe2", size = 12865501 }, - { url = "https://files.pythonhosted.org/packages/aa/9c/9a6ec3ae89cd0648d419781284308f2956d2a61d932b5ac9682c956a171b/numpy-2.1.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b42a1a511c81cc78cbc4539675713bbcf9d9c3913386243ceff0e9429ca892fe", size = 21154845 }, - { url = "https://files.pythonhosted.org/packages/02/69/9f05c4ecc75fabf297b17743996371b4c3dfc4d92e15c5c38d8bb3db8d74/numpy-2.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1", size = 13789409 }, - { url = "https://files.pythonhosted.org/packages/34/4e/f95c99217bf77bbfaaf660d693c10bd0dc03b6032d19316d316088c9e479/numpy-2.1.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:c82af4b2ddd2ee72d1fc0c6695048d457e00b3582ccde72d8a1c991b808bb20f", size = 5352097 }, - { url = "https://files.pythonhosted.org/packages/06/13/f5d87a497c16658e9af8920449b0b5692b469586b8231340c672962071c5/numpy-2.1.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:13602b3174432a35b16c4cfb5de9a12d229727c3dd47a6ce35111f2ebdf66ff4", size = 6891195 }, - { url = "https://files.pythonhosted.org/packages/6c/89/691ac07429ac061b344d5e37fa8e94be51a6017734aea15f2d9d7c6d119a/numpy-2.1.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ebec5fd716c5a5b3d8dfcc439be82a8407b7b24b230d0ad28a81b61c2f4659a", size = 13895153 }, - { url = "https://files.pythonhosted.org/packages/23/69/538317f0d925095537745f12aced33be1570bbdc4acde49b33748669af96/numpy-2.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2b49c3c0804e8ecb05d59af8386ec2f74877f7ca8fd9c1e00be2672e4d399b1", size = 16338306 }, - { url = "https://files.pythonhosted.org/packages/af/03/863fe7062c2106d3c151f7df9353f2ae2237c1dd6900f127a3eb1f24cb1b/numpy-2.1.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2cbba4b30bf31ddbe97f1c7205ef976909a93a66bb1583e983adbd155ba72ac2", size = 16710893 }, - { url = "https://files.pythonhosted.org/packages/70/77/0ad9efe25482009873f9660d29a40a8c41a6f0e8b541195e3c95c70684c5/numpy-2.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8e00ea6fc82e8a804433d3e9cedaa1051a1422cb6e443011590c14d2dea59146", size = 14398048 }, - { url = "https://files.pythonhosted.org/packages/3e/0f/e785fe75544db9f2b0bb1c181e13ceff349ce49753d807fd9672916aa06d/numpy-2.1.2-cp311-cp311-win32.whl", hash = "sha256:5006b13a06e0b38d561fab5ccc37581f23c9511879be7693bd33c7cd15ca227c", size = 6533458 }, - { url = "https://files.pythonhosted.org/packages/d4/96/450054662295125af861d48d2c4bc081dadcf1974a879b2104613157aa62/numpy-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:f1eb068ead09f4994dec71c24b2844f1e4e4e013b9629f812f292f04bd1510d9", size = 12870896 }, - { url = "https://files.pythonhosted.org/packages/a0/7d/554a6838f37f3ada5a55f25173c619d556ae98092a6e01afb6e710501d70/numpy-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7bf0a4f9f15b32b5ba53147369e94296f5fffb783db5aacc1be15b4bf72f43b", size = 20848077 }, - { url = "https://files.pythonhosted.org/packages/b0/29/cb48a402ea879e645b16218718f3f7d9588a77d674a9dcf22e4c43487636/numpy-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b1d0fcae4f0949f215d4632be684a539859b295e2d0cb14f78ec231915d644db", size = 13493242 }, - { url = "https://files.pythonhosted.org/packages/56/44/f899b0581766c230da42f751b7b8896d096640b19b312164c267e48d36cb/numpy-2.1.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f751ed0a2f250541e19dfca9f1eafa31a392c71c832b6bb9e113b10d050cb0f1", size = 5089219 }, - { url = "https://files.pythonhosted.org/packages/79/8f/b987070d45161a7a4504afc67ed38544ed2c0ed5576263599a0402204a9c/numpy-2.1.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:bd33f82e95ba7ad632bc57837ee99dba3d7e006536200c4e9124089e1bf42426", size = 6620167 }, - { url = "https://files.pythonhosted.org/packages/c4/a7/af3329fda3c3ec31d9b650e42bbcd3422fc62a765cbb1405fde4177a0996/numpy-2.1.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b8cde4f11f0a975d1fd59373b32e2f5a562ade7cde4f85b7137f3de8fbb29a0", size = 13604905 }, - { url = "https://files.pythonhosted.org/packages/9b/b4/e3c7e6fab0f77fff6194afa173d1f2342073d91b1d3b4b30b17c3fb4407a/numpy-2.1.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d95f286b8244b3649b477ac066c6906fbb2905f8ac19b170e2175d3d799f4df", size = 16041825 }, - { url = "https://files.pythonhosted.org/packages/e9/50/6828e66a78aa03147c111f84d55f33ce2dde547cb578d6744a3b06a0124b/numpy-2.1.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ab4754d432e3ac42d33a269c8567413bdb541689b02d93788af4131018cbf366", size = 16409541 }, - { url = "https://files.pythonhosted.org/packages/bf/72/66af7916d9c3c6dbfbc8acdd4930c65461e1953374a2bc43d00f948f004a/numpy-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e585c8ae871fd38ac50598f4763d73ec5497b0de9a0ab4ef5b69f01c6a046142", size = 14081134 }, - { url = "https://files.pythonhosted.org/packages/dc/5a/59a67d84f33fe00ae74f0b5b69dd4f93a586a4aba7f7e19b54b2133db038/numpy-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9c6c754df29ce6a89ed23afb25550d1c2d5fdb9901d9c67a16e0b16eaf7e2550", size = 6237784 }, - { url = "https://files.pythonhosted.org/packages/4c/79/73735a6a5dad6059c085f240a4e74c9270feccd2bc66e4d31b5ca01d329c/numpy-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:456e3b11cb79ac9946c822a56346ec80275eaf2950314b249b512896c0d2505e", size = 12568254 }, - { url = "https://files.pythonhosted.org/packages/16/72/716fa1dbe92395a9a623d5049203ff8ddb0cfce65b9df9117c3696ccc011/numpy-2.1.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a84498e0d0a1174f2b3ed769b67b656aa5460c92c9554039e11f20a05650f00d", size = 20834690 }, - { url = "https://files.pythonhosted.org/packages/1e/fb/3e85a39511586053b5c6a59a643879e376fae22230ebfef9cfabb0e032e2/numpy-2.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4d6ec0d4222e8ffdab1744da2560f07856421b367928026fb540e1945f2eeeaf", size = 13507474 }, - { url = "https://files.pythonhosted.org/packages/35/eb/5677556d9ba13436dab51e129f98d4829d95cd1b6bd0e199c14485a4bdb9/numpy-2.1.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:259ec80d54999cc34cd1eb8ded513cb053c3bf4829152a2e00de2371bd406f5e", size = 5074742 }, - { url = "https://files.pythonhosted.org/packages/3e/c5/6c5ef5ba41b65a7e51bed50dbf3e1483eb578055633dd013e811a28e96a1/numpy-2.1.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:675c741d4739af2dc20cd6c6a5c4b7355c728167845e3c6b0e824e4e5d36a6c3", size = 6606787 }, - { url = "https://files.pythonhosted.org/packages/08/ac/f2f29dd4fd325b379c7dc932a0ebab22f0e031dbe80b2f6019b291a3a544/numpy-2.1.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b2d4e667895cc55e3ff2b56077e4c8a5604361fc21a042845ea3ad67465aa8", size = 13601333 }, - { url = "https://files.pythonhosted.org/packages/44/26/63f5f4e5089654dfb858f4892215ed968cd1a68e6f4a83f9961f84f855cb/numpy-2.1.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43cca367bf94a14aca50b89e9bc2061683116cfe864e56740e083392f533ce7a", size = 16038090 }, - { url = "https://files.pythonhosted.org/packages/1d/21/015e0594de9c3a8d5edd24943d2bd23f102ec71aec026083f822f86497e2/numpy-2.1.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:76322dcdb16fccf2ac56f99048af32259dcc488d9b7e25b51e5eca5147a3fb98", size = 16410865 }, - { url = "https://files.pythonhosted.org/packages/df/01/c1bcf9e6025d79077fbf3f3ee503b50aa7bfabfcd8f4b54f5829f4c00f3f/numpy-2.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:32e16a03138cabe0cb28e1007ee82264296ac0983714094380b408097a418cfe", size = 14078077 }, - { url = "https://files.pythonhosted.org/packages/ba/06/db9d127d63bd11591770ba9f3d960f8041e0f895184b9351d4b1b5b56983/numpy-2.1.2-cp313-cp313-win32.whl", hash = "sha256:242b39d00e4944431a3cd2db2f5377e15b5785920421993770cddb89992c3f3a", size = 6234904 }, - { url = "https://files.pythonhosted.org/packages/a9/96/9f61f8f95b6e0ea0aa08633b704c75d1882bdcb331bdf8bfd63263b25b00/numpy-2.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:f2ded8d9b6f68cc26f8425eda5d3877b47343e68ca23d0d0846f4d312ecaa445", size = 12561910 }, - { url = "https://files.pythonhosted.org/packages/36/b8/033f627821784a48e8f75c218033471eebbaacdd933f8979c79637a1b44b/numpy-2.1.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2ffef621c14ebb0188a8633348504a35c13680d6da93ab5cb86f4e54b7e922b5", size = 20857719 }, - { url = "https://files.pythonhosted.org/packages/96/46/af5726fde5b74ed83f2f17a73386d399319b7ed4d51279fb23b721d0816d/numpy-2.1.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad369ed238b1959dfbade9018a740fb9392c5ac4f9b5173f420bd4f37ba1f7a0", size = 13518826 }, - { url = "https://files.pythonhosted.org/packages/db/6e/8ce677edf36da1c4dae80afe5529f47690697eb55b4864673af260ccea7b/numpy-2.1.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d82075752f40c0ddf57e6e02673a17f6cb0f8eb3f587f63ca1eaab5594da5b17", size = 5115036 }, - { url = "https://files.pythonhosted.org/packages/6a/ba/3cce44fb1b8438042c11847048812a776f75ee0e7070179c22e4cfbf420c/numpy-2.1.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1600068c262af1ca9580a527d43dc9d959b0b1d8e56f8a05d830eea39b7c8af6", size = 6628641 }, - { url = "https://files.pythonhosted.org/packages/59/c8/e722998720ccbd35ffbcf1d1b8ed0aa2304af88d3f1c38e06ebf983599b3/numpy-2.1.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a26ae94658d3ba3781d5e103ac07a876b3e9b29db53f68ed7df432fd033358a8", size = 13574803 }, - { url = "https://files.pythonhosted.org/packages/7c/8e/fc1fdd83a55476765329ac2913321c4aed5b082a7915095628c4ca30ea72/numpy-2.1.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13311c2db4c5f7609b462bc0f43d3c465424d25c626d95040f073e30f7570e35", size = 16021174 }, - { url = "https://files.pythonhosted.org/packages/2a/b6/a790742aa88067adb4bd6c89a946778c1417d4deaeafce3ca928f26d4c52/numpy-2.1.2-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:2abbf905a0b568706391ec6fa15161fad0fb5d8b68d73c461b3c1bab6064dd62", size = 16400117 }, - { url = "https://files.pythonhosted.org/packages/48/6f/129e3c17e3befe7fefdeaa6890f4c4df3f3cf0831aa053802c3862da67aa/numpy-2.1.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:ef444c57d664d35cac4e18c298c47d7b504c66b17c2ea91312e979fcfbdfb08a", size = 14066202 }, - { url = "https://files.pythonhosted.org/packages/73/c9/3e1d6bbe6d3d2e2c5a9483b24b2f29a229b323f62054278a3bba7fee11e5/numpy-2.1.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bdd407c40483463898b84490770199d5714dcc9dd9b792f6c6caccc523c00952", size = 20981945 }, - { url = "https://files.pythonhosted.org/packages/6e/62/989c4988bde1a8e08117fccc3bab73d2886421fb98cde597168714f3c54e/numpy-2.1.2-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:da65fb46d4cbb75cb417cddf6ba5e7582eb7bb0b47db4b99c9fe5787ce5d91f5", size = 6750558 }, - { url = "https://files.pythonhosted.org/packages/53/b1/00ef9f30975f1312a53257f68e57b4513d14d537e03d507e2773a684b1e8/numpy-2.1.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c193d0b0238638e6fc5f10f1b074a6993cb13b0b431f64079a509d63d3aa8b7", size = 16141552 }, - { url = "https://files.pythonhosted.org/packages/c0/ec/0c04903b48dfea6be1d7b47ba70f98709fb7198fd970784a1400c391d522/numpy-2.1.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a7d80b2e904faa63068ead63107189164ca443b42dd1930299e0d1cb041cec2e", size = 12789924 }, +version = "1.26.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/94/ace0fdea5241a27d13543ee117cbc65868e82213fb31a8eb7fe9ff23f313/numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0", size = 20631468 }, + { url = "https://files.pythonhosted.org/packages/20/f7/b24208eba89f9d1b58c1668bc6c8c4fd472b20c45573cb767f59d49fb0f6/numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a", size = 13966411 }, + { url = "https://files.pythonhosted.org/packages/fc/a5/4beee6488160798683eed5bdb7eead455892c3b4e1f78d79d8d3f3b084ac/numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4", size = 14219016 }, + { url = "https://files.pythonhosted.org/packages/4b/d7/ecf66c1cd12dc28b4040b15ab4d17b773b87fa9d29ca16125de01adb36cd/numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f", size = 18240889 }, + { url = "https://files.pythonhosted.org/packages/24/03/6f229fe3187546435c4f6f89f6d26c129d4f5bed40552899fcf1f0bf9e50/numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a", size = 13876746 }, + { url = "https://files.pythonhosted.org/packages/39/fe/39ada9b094f01f5a35486577c848fe274e374bbf8d8f472e1423a0bbd26d/numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2", size = 18078620 }, + { url = "https://files.pythonhosted.org/packages/d5/ef/6ad11d51197aad206a9ad2286dc1aac6a378059e06e8cf22cd08ed4f20dc/numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07", size = 5972659 }, + { url = "https://files.pythonhosted.org/packages/19/77/538f202862b9183f54108557bfda67e17603fc560c384559e769321c9d92/numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5", size = 15808905 }, + { url = "https://files.pythonhosted.org/packages/11/57/baae43d14fe163fa0e4c47f307b6b2511ab8d7d30177c491960504252053/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", size = 20630554 }, + { url = "https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", size = 13997127 }, + { url = "https://files.pythonhosted.org/packages/79/ae/7e5b85136806f9dadf4878bf73cf223fe5c2636818ba3ab1c585d0403164/numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", size = 14222994 }, + { url = "https://files.pythonhosted.org/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", size = 18252005 }, + { url = "https://files.pythonhosted.org/packages/09/bf/2b1aaf8f525f2923ff6cfcf134ae5e750e279ac65ebf386c75a0cf6da06a/numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", size = 13885297 }, + { url = "https://files.pythonhosted.org/packages/df/a0/4e0f14d847cfc2a633a1c8621d00724f3206cfeddeb66d35698c4e2cf3d2/numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", size = 18093567 }, + { url = "https://files.pythonhosted.org/packages/d2/b7/a734c733286e10a7f1a8ad1ae8c90f2d33bf604a96548e0a4a3a6739b468/numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", size = 5968812 }, + { url = "https://files.pythonhosted.org/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", size = 15811913 }, + { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901 }, + { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868 }, + { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109 }, + { url = "https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613 }, + { url = "https://files.pythonhosted.org/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172 }, + { url = "https://files.pythonhosted.org/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643 }, + { url = "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803 }, + { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754 }, ] [[package]] @@ -588,18 +570,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 }, ] -[[package]] -name = "pdf2image" -version = "1.17.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pillow" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/00/d8/b280f01045555dc257b8153c00dee3bc75830f91a744cd5f84ef3a0a64b1/pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57", size = 12811 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/33/61766ae033518957f877ab246f87ca30a85b778ebaad65b7f74fa7e52988/pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2", size = 11618 }, -] - [[package]] name = "pillow" version = "11.0.0" @@ -701,28 +671,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/ec/2eb3cd785efd67806c46c13a17339708ddc346cbb684eade7a6e6f79536a/pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84", size = 106921 }, ] -[[package]] -name = "pysocks" -version = "1.7.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725 }, -] - -[[package]] -name = "pytesseract" -version = "0.3.13" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, - { name = "pillow" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9f/a6/7d679b83c285974a7cb94d739b461fa7e7a9b17a3abfd7bf6cbc5c2394b0/pytesseract-0.3.13.tar.gz", hash = "sha256:4bf5f880c99406f52a3cfc2633e42d9dc67615e69d8a509d74867d3baddb5db9", size = 17689 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/33/8312d7ce74670c9d39a532b2c246a853861120486be9443eebf048043637/pytesseract-0.3.13-py3-none-any.whl", hash = "sha256:7a99c6c2ac598360693d83a416e36e0b33a67638bb9d77fdcac094a3589d4b34", size = 14705 }, -] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -803,11 +751,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, ] -[package.optional-dependencies] -socks = [ - { name = "pysocks" }, -] - [[package]] name = "ruff" version = "0.7.1" @@ -833,6 +776,96 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/884553415e9f0a9bf358ed52fb68b934e67ef6c5a62397ace924a1afdf9a/ruff-0.7.1-py3-none-win_arm64.whl", hash = "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37", size = 8717402 }, ] +[[package]] +name = "safetensors" +version = "0.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/46/a1c56ed856c6ac3b1a8b37abe5be0cac53219367af1331e721b04d122577/safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310", size = 65702 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/10/0798ec2c8704c2d172620d8a3725bed92cdd75516357b1a3e64d4229ea4e/safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7", size = 392312 }, + { url = "https://files.pythonhosted.org/packages/2b/9e/9648d8dbb485c40a4a0212b7537626ae440b48156cc74601ca0b7a7615e0/safetensors-0.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27", size = 381858 }, + { url = "https://files.pythonhosted.org/packages/8b/67/49556aeacc00df353767ed31d68b492fecf38c3f664c52692e4d92aa0032/safetensors-0.4.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6885016f34bef80ea1085b7e99b3c1f92cb1be78a49839203060f67b40aee761", size = 441382 }, + { url = "https://files.pythonhosted.org/packages/5d/ce/e9f4869a37bb11229e6cdb4e73a6ef23b4f360eee9dca5f7e40982779704/safetensors-0.4.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:133620f443450429322f238fda74d512c4008621227fccf2f8cf4a76206fea7c", size = 439001 }, + { url = "https://files.pythonhosted.org/packages/a0/27/aee8cf031b89c34caf83194ec6b7f2eed28d053fff8b6da6d00c85c56035/safetensors-0.4.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4fb3e0609ec12d2a77e882f07cced530b8262027f64b75d399f1504ffec0ba56", size = 478026 }, + { url = "https://files.pythonhosted.org/packages/da/33/1d9fc4805c623636e7d460f28eec92ebd1856f7a552df8eb78398a1ef4de/safetensors-0.4.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0f1dd769f064adc33831f5e97ad07babbd728427f98e3e1db6902e369122737", size = 495545 }, + { url = "https://files.pythonhosted.org/packages/b9/df/6f766b56690709d22e83836e4067a1109a7d84ea152a6deb5692743a2805/safetensors-0.4.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6d156bdb26732feada84f9388a9f135528c1ef5b05fae153da365ad4319c4c5", size = 435016 }, + { url = "https://files.pythonhosted.org/packages/90/fa/7bc3f18086201b1e55a42c88b822ae197d0158e12c54cd45c887305f1b7e/safetensors-0.4.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9e347d77e2c77eb7624400ccd09bed69d35c0332f417ce8c048d404a096c593b", size = 456273 }, + { url = "https://files.pythonhosted.org/packages/3e/59/2ae50150d37a65c1c5f01aec74dc737707b8bbecdc76307e5a1a12c8a376/safetensors-0.4.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9f556eea3aec1d3d955403159fe2123ddd68e880f83954ee9b4a3f2e15e716b6", size = 619669 }, + { url = "https://files.pythonhosted.org/packages/fe/43/10f0bb597aef62c9c154152e265057089f3c729bdd980e6c32c3ec2407a4/safetensors-0.4.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9483f42be3b6bc8ff77dd67302de8ae411c4db39f7224dec66b0eb95822e4163", size = 605212 }, + { url = "https://files.pythonhosted.org/packages/7c/75/ede6887ea0ceaba55730988bfc7668dc147a8758f907fa6db26fbb681b8e/safetensors-0.4.5-cp310-none-win32.whl", hash = "sha256:7389129c03fadd1ccc37fd1ebbc773f2b031483b04700923c3511d2a939252cc", size = 272652 }, + { url = "https://files.pythonhosted.org/packages/ba/f0/919c72a9eef843781e652d0650f2819039943e69b69d5af2d0451a23edc3/safetensors-0.4.5-cp310-none-win_amd64.whl", hash = "sha256:e98ef5524f8b6620c8cdef97220c0b6a5c1cef69852fcd2f174bb96c2bb316b1", size = 285879 }, + { url = "https://files.pythonhosted.org/packages/9a/a5/25bcf75e373412daf1fd88045ab3aa8140a0d804ef0e70712c4f2c5b94d8/safetensors-0.4.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:21f848d7aebd5954f92538552d6d75f7c1b4500f51664078b5b49720d180e47c", size = 392256 }, + { url = "https://files.pythonhosted.org/packages/08/8c/ece3bf8756506a890bd980eca02f47f9d98dfbf5ce16eda1368f53560f67/safetensors-0.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bb07000b19d41e35eecef9a454f31a8b4718a185293f0d0b1c4b61d6e4487971", size = 381490 }, + { url = "https://files.pythonhosted.org/packages/39/83/c4a7ce01d626e46ea2b45887f2e59b16441408031e2ce2f9fe01860c6946/safetensors-0.4.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42", size = 441093 }, + { url = "https://files.pythonhosted.org/packages/47/26/cc52de647e71bd9a0b0d78ead0d31d9c462b35550a817aa9e0cab51d6db4/safetensors-0.4.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688", size = 438960 }, + { url = "https://files.pythonhosted.org/packages/06/78/332538546775ee97e749867df2d58f2282d9c48a1681e4891eed8b94ec94/safetensors-0.4.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68", size = 478031 }, + { url = "https://files.pythonhosted.org/packages/d9/03/a3c8663f1ddda54e624ecf43fce651659b49e8e1603c52c3e464b442acfa/safetensors-0.4.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df", size = 494754 }, + { url = "https://files.pythonhosted.org/packages/e6/ee/69e498a892f208bd1da4104d4b9be887f8611bf4942144718b6738482250/safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090", size = 435013 }, + { url = "https://files.pythonhosted.org/packages/a2/61/f0cfce984515b86d1260f556ba3b782158e2855e6a318446ac2613786fa9/safetensors-0.4.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943", size = 455984 }, + { url = "https://files.pythonhosted.org/packages/e7/a9/3e3b48fcaade3eb4e347d39ebf0bd44291db21a3e4507854b42a7cb910ac/safetensors-0.4.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0", size = 619513 }, + { url = "https://files.pythonhosted.org/packages/80/23/2a7a1be24258c0e44c1d356896fd63dc0545a98d2d0184925fa09cd3ec76/safetensors-0.4.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f", size = 604841 }, + { url = "https://files.pythonhosted.org/packages/b4/5c/34d082ff1fffffd8545fb22cbae3285ab4236f1f0cfc64b7e58261c2363b/safetensors-0.4.5-cp311-none-win32.whl", hash = "sha256:a01e232e6d3d5cf8b1667bc3b657a77bdab73f0743c26c1d3c5dd7ce86bd3a92", size = 272602 }, + { url = "https://files.pythonhosted.org/packages/6d/41/948c96c8a7e9fef57c2e051f1871c108a6dbbc6d285598bdb1d89b98617c/safetensors-0.4.5-cp311-none-win_amd64.whl", hash = "sha256:cbd39cae1ad3e3ef6f63a6f07296b080c951f24cec60188378e43d3713000c04", size = 285973 }, + { url = "https://files.pythonhosted.org/packages/bf/ac/5a63082f931e99200db95fd46fb6734f050bb6e96bf02521904c6518b7aa/safetensors-0.4.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:473300314e026bd1043cef391bb16a8689453363381561b8a3e443870937cc1e", size = 392015 }, + { url = "https://files.pythonhosted.org/packages/73/95/ab32aa6e9bdc832ff87784cdf9da26192b93de3ef82b8d1ada8f345c5044/safetensors-0.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:801183a0f76dc647f51a2d9141ad341f9665602a7899a693207a82fb102cc53e", size = 381774 }, + { url = "https://files.pythonhosted.org/packages/d6/6c/7e04b7626809fc63f3698f4c50e43aff2864b40089aa4506c918a75b8eed/safetensors-0.4.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f", size = 441134 }, + { url = "https://files.pythonhosted.org/packages/58/2b/ffe7c86a277e6c1595fbdf415cfe2903f253f574a5405e93fda8baaa582c/safetensors-0.4.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461", size = 438467 }, + { url = "https://files.pythonhosted.org/packages/67/9c/f271bd804e08c7fda954d17b70ff281228a88077337a9e70feace4f4cc93/safetensors-0.4.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea", size = 476566 }, + { url = "https://files.pythonhosted.org/packages/4c/ad/4cf76a3e430a8a26108407fa6cb93e6f80d996a5cb75d9540c8fe3862990/safetensors-0.4.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed", size = 492253 }, + { url = "https://files.pythonhosted.org/packages/d9/40/a6f75ea449a9647423ec8b6f72c16998d35aa4b43cb38536ac060c5c7bf5/safetensors-0.4.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c", size = 434769 }, + { url = "https://files.pythonhosted.org/packages/52/47/d4b49b1231abf3131f7bb0bc60ebb94b27ee33e0a1f9569da05f8ac65dee/safetensors-0.4.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1", size = 457166 }, + { url = "https://files.pythonhosted.org/packages/c3/cd/006468b03b0fa42ff82d795d47c4193e99001e96c3f08bd62ef1b5cab586/safetensors-0.4.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4", size = 619280 }, + { url = "https://files.pythonhosted.org/packages/22/4d/b6208d918e83daa84b424c0ac3191ae61b44b3191613a3a5a7b38f94b8ad/safetensors-0.4.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646", size = 605390 }, + { url = "https://files.pythonhosted.org/packages/e8/20/bf0e01825dc01ed75538021a98b9a046e60ead63c6c6700764c821a8c873/safetensors-0.4.5-cp312-none-win32.whl", hash = "sha256:c859c7ed90b0047f58ee27751c8e56951452ed36a67afee1b0a87847d065eec6", size = 273250 }, + { url = "https://files.pythonhosted.org/packages/f1/5f/ab6b6cec85b40789801f35b7d2fb579ae242d8193929974a106d5ff5c835/safetensors-0.4.5-cp312-none-win_amd64.whl", hash = "sha256:b5a8810ad6a6f933fff6c276eae92c1da217b39b4d8b1bc1c0b8af2d270dc532", size = 286307 }, + { url = "https://files.pythonhosted.org/packages/90/61/0e27b1403e311cba0be20026bee4ee822d90eda7dad372179e7f18bb99f3/safetensors-0.4.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:25e5f8e2e92a74f05b4ca55686234c32aac19927903792b30ee6d7bd5653d54e", size = 392062 }, + { url = "https://files.pythonhosted.org/packages/b1/9f/cc31fafc9f5d79da10a83a820ca37f069bab0717895ad8cbcacf629dd1c5/safetensors-0.4.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81efb124b58af39fcd684254c645e35692fea81c51627259cdf6d67ff4458916", size = 382517 }, + { url = "https://files.pythonhosted.org/packages/a4/c7/4fda8a0ebb96662550433378f4a74c677fa5fc4d0a43a7ec287d1df254a9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679", size = 441378 }, + { url = "https://files.pythonhosted.org/packages/14/31/9abb431f6209de9c80dab83e1112ebd769f1e32e7ab7ab228a02424a4693/safetensors-0.4.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89", size = 438831 }, + { url = "https://files.pythonhosted.org/packages/37/37/99bfb195578a808b8d045159ee9264f8da58d017ac0701853dcacda14d4e/safetensors-0.4.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f", size = 477112 }, + { url = "https://files.pythonhosted.org/packages/7d/05/fac3ef107e60d2a78532bed171a91669d4bb259e1236f5ea8c67a6976c75/safetensors-0.4.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a", size = 493373 }, + { url = "https://files.pythonhosted.org/packages/cf/7a/825800ee8c68214b4fd3506d5e19209338c69b41e01c6e14dd13969cc8b9/safetensors-0.4.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3", size = 435422 }, + { url = "https://files.pythonhosted.org/packages/5e/6c/7a3233c08bde558d6c33a41219119866cb596139a4673cc6c24024710ffd/safetensors-0.4.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35", size = 457382 }, + { url = "https://files.pythonhosted.org/packages/a0/58/0b7bcba3788ff503990cf9278d611b56c029400612ba93e772c987b5aa03/safetensors-0.4.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523", size = 619301 }, + { url = "https://files.pythonhosted.org/packages/82/cc/9c2cf58611daf1c83ce5d37f9de66353e23fcda36008b13fd3409a760aa3/safetensors-0.4.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142", size = 605580 }, + { url = "https://files.pythonhosted.org/packages/cf/ff/037ae4c0ee32db496669365e66079b6329906c6814722b159aa700e67208/safetensors-0.4.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdadf66b5a22ceb645d5435a0be7a0292ce59648ca1d46b352f13cff3ea80410", size = 392951 }, + { url = "https://files.pythonhosted.org/packages/f1/d6/6621e16b35bf83ae099eaab07338f04991a26c9aa43879d05f19f35e149c/safetensors-0.4.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d42ffd4c2259f31832cb17ff866c111684c87bd930892a1ba53fed28370c918c", size = 383417 }, + { url = "https://files.pythonhosted.org/packages/ae/88/3068e1bb16f5e9f9068901de3cf7b3db270b9bfe6e7d51d4b55c1da0425d/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd8a1f6d2063a92cd04145c7fd9e31a1c7d85fbec20113a14b487563fdbc0597", size = 442311 }, + { url = "https://files.pythonhosted.org/packages/f7/15/a2bb77ebbaa76b61ec2e9f731fe4db7f9473fd855d881957c51b3a168892/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:951d2fcf1817f4fb0ef0b48f6696688a4e852a95922a042b3f96aaa67eedc920", size = 436678 }, + { url = "https://files.pythonhosted.org/packages/ec/79/9608c4546cdbfe3860dd7aa59e3562c9289113398b1a0bd89b68ce0a9d41/safetensors-0.4.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ac85d9a8c1af0e3132371d9f2d134695a06a96993c2e2f0bbe25debb9e3f67a", size = 457316 }, + { url = "https://files.pythonhosted.org/packages/0f/23/b17b483f2857835962ad33e38014efd4911791187e177bc23b057d35bee8/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e3cec4a29eb7fe8da0b1c7988bc3828183080439dd559f720414450de076fcab", size = 620565 }, + { url = "https://files.pythonhosted.org/packages/19/46/5d11dc300feaad285c2f1bd784ff3f689f5e0ab6be49aaf568f3a77019eb/safetensors-0.4.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", size = 606660 }, +] + +[[package]] +name = "scipy" +version = "1.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ae/00/48c2f661e2816ccf2ecd77982f6605b2950afe60f60a52b4cbbc2504aa8f/scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c", size = 57210720 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/59/41b2529908c002ade869623b87eecff3e11e3ce62e996d0bdcb536984187/scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca", size = 39328076 }, + { url = "https://files.pythonhosted.org/packages/d5/33/f1307601f492f764062ce7dd471a14750f3360e33cd0f8c614dae208492c/scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f", size = 30306232 }, + { url = "https://files.pythonhosted.org/packages/c0/66/9cd4f501dd5ea03e4a4572ecd874936d0da296bd04d1c45ae1a4a75d9c3a/scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989", size = 33743202 }, + { url = "https://files.pythonhosted.org/packages/a3/ba/7255e5dc82a65adbe83771c72f384d99c43063648456796436c9a5585ec3/scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f", size = 38577335 }, + { url = "https://files.pythonhosted.org/packages/49/a5/bb9ded8326e9f0cdfdc412eeda1054b914dfea952bda2097d174f8832cc0/scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94", size = 38820728 }, + { url = "https://files.pythonhosted.org/packages/12/30/df7a8fcc08f9b4a83f5f27cfaaa7d43f9a2d2ad0b6562cced433e5b04e31/scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54", size = 46210588 }, + { url = "https://files.pythonhosted.org/packages/b4/15/4a4bb1b15bbd2cd2786c4f46e76b871b28799b67891f23f455323a0cdcfb/scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9", size = 39333805 }, + { url = "https://files.pythonhosted.org/packages/ba/92/42476de1af309c27710004f5cdebc27bec62c204db42e05b23a302cb0c9a/scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326", size = 30317687 }, + { url = "https://files.pythonhosted.org/packages/80/ba/8be64fe225360a4beb6840f3cbee494c107c0887f33350d0a47d55400b01/scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299", size = 33694638 }, + { url = "https://files.pythonhosted.org/packages/36/07/035d22ff9795129c5a847c64cb43c1fa9188826b59344fee28a3ab02e283/scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa", size = 38569931 }, + { url = "https://files.pythonhosted.org/packages/d9/10/f9b43de37e5ed91facc0cfff31d45ed0104f359e4f9a68416cbf4e790241/scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59", size = 38838145 }, + { url = "https://files.pythonhosted.org/packages/4a/48/4513a1a5623a23e95f94abd675ed91cfb19989c58e9f6f7d03990f6caf3d/scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b", size = 46196227 }, + { url = "https://files.pythonhosted.org/packages/f2/7b/fb6b46fbee30fc7051913068758414f2721003a89dd9a707ad49174e3843/scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1", size = 39357301 }, + { url = "https://files.pythonhosted.org/packages/dc/5a/2043a3bde1443d94014aaa41e0b50c39d046dda8360abd3b2a1d3f79907d/scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d", size = 30363348 }, + { url = "https://files.pythonhosted.org/packages/e7/cb/26e4a47364bbfdb3b7fb3363be6d8a1c543bcd70a7753ab397350f5f189a/scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627", size = 33406062 }, + { url = "https://files.pythonhosted.org/packages/88/ab/6ecdc526d509d33814835447bbbeedbebdec7cca46ef495a61b00a35b4bf/scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884", size = 38218311 }, + { url = "https://files.pythonhosted.org/packages/0b/00/9f54554f0f8318100a71515122d8f4f503b1a2c4b4cfab3b4b68c0eb08fa/scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16", size = 38442493 }, + { url = "https://files.pythonhosted.org/packages/3e/df/963384e90733e08eac978cd103c34df181d1fec424de383cdc443f418dd4/scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949", size = 45910955 }, +] + [[package]] name = "seaborn" version = "0.13.2" @@ -857,33 +890,27 @@ wheels = [ ] [[package]] -name = "snakeviz" -version = "2.2.0" +name = "smart-open" +version = "7.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "tornado" }, + { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/9b/3983c41e913676d55e4b3de869aa0561e053ad3505f1fd35181670244b70/snakeviz-2.2.0.tar.gz", hash = "sha256:7bfd00be7ae147eb4a170a471578e1cd3f41f803238958b6b8efcf2c698a6aa9", size = 181033 } +sdist = { url = "https://files.pythonhosted.org/packages/06/84/c6e6276a72a78996f11118b8bc1d9e9b619aa78201f408210f4a584bd377/smart_open-7.0.4.tar.gz", hash = "sha256:62b65852bdd1d1d516839fcb1f6bc50cd0f16e05b4ec44b52f43d38bcb838524", size = 71149 } wheels = [ - { url = "https://files.pythonhosted.org/packages/64/f6/d83a7003a1d104a08fc4623c0ac92ed45c394c18e79a5011a4ed87c67501/snakeviz-2.2.0-py2.py3-none-any.whl", hash = "sha256:569e2d71c47f80a886aa6e70d6405cb6d30aa3520969ad956b06f824c5f02b8e", size = 283662 }, + { url = "https://files.pythonhosted.org/packages/65/12/cc24847b4b0b124501a33cd8f7963f79f6f6584bc7f2f4fc16bbbaa54c8f/smart_open-7.0.4-py3-none-any.whl", hash = "sha256:4e98489932b3372595cddc075e6033194775165702887216b65eba760dfd8d47", size = 61215 }, ] [[package]] -name = "soupsieve" -version = "2.6" +name = "snakeviz" +version = "2.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, +dependencies = [ + { name = "tornado" }, ] - -[[package]] -name = "termcolor" -version = "2.5.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/37/72/88311445fd44c455c7d553e61f95412cf89054308a1aa2434ab835075fc5/termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f", size = 13057 } +sdist = { url = "https://files.pythonhosted.org/packages/64/9b/3983c41e913676d55e4b3de869aa0561e053ad3505f1fd35181670244b70/snakeviz-2.2.0.tar.gz", hash = "sha256:7bfd00be7ae147eb4a170a471578e1cd3f41f803238958b6b8efcf2c698a6aa9", size = 181033 } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/be/df630c387a0a054815d60be6a97eb4e8f17385d5d6fe660e1c02750062b4/termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8", size = 7755 }, + { url = "https://files.pythonhosted.org/packages/64/f6/d83a7003a1d104a08fc4623c0ac92ed45c394c18e79a5011a4ed87c67501/snakeviz-2.2.0-py2.py3-none-any.whl", hash = "sha256:569e2d71c47f80a886aa6e70d6405cb6d30aa3520969ad956b06f824c5f02b8e", size = 283662 }, ] [[package]] @@ -916,6 +943,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/73/02342de9c2d20922115f787e101527b831c0cffd2105c946c4a4826bcfd4/tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63", size = 78326 }, ] +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + [[package]] name = "tzdata" version = "2024.2" @@ -947,3 +983,42 @@ sdist = { url = "https://files.pythonhosted.org/packages/8c/b3/7b6a79c5c8cf6d90e wheels = [ { url = "https://files.pythonhosted.org/packages/ae/92/78324ff89391e00c8f4cf6b8526c41c6ef36b4ea2d2c132250b1a6fc2b8d/virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4", size = 3117838 }, ] + +[[package]] +name = "wrapt" +version = "1.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/4c/063a912e20bcef7124e0df97282a8af3ff3e4b603ce84c481d6d7346be0a/wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d", size = 53972 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/c6/5375258add3777494671d8cec27cdf5402abd91016dee24aa2972c61fedf/wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4", size = 37315 }, + { url = "https://files.pythonhosted.org/packages/32/12/e11adfde33444986135d8881b401e4de6cbb4cced046edc6b464e6ad7547/wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020", size = 38160 }, + { url = "https://files.pythonhosted.org/packages/70/7d/3dcc4a7e96f8d3e398450ec7703db384413f79bd6c0196e0e139055ce00f/wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440", size = 80419 }, + { url = "https://files.pythonhosted.org/packages/d1/c4/8dfdc3c2f0b38be85c8d9fdf0011ebad2f54e40897f9549a356bebb63a97/wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487", size = 72669 }, + { url = "https://files.pythonhosted.org/packages/49/83/b40bc1ad04a868b5b5bcec86349f06c1ee1ea7afe51dc3e46131e4f39308/wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf", size = 80271 }, + { url = "https://files.pythonhosted.org/packages/19/d4/cd33d3a82df73a064c9b6401d14f346e1d2fb372885f0295516ec08ed2ee/wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72", size = 84748 }, + { url = "https://files.pythonhosted.org/packages/ef/58/2fde309415b5fa98fd8f5f4a11886cbf276824c4c64d45a39da342fff6fe/wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0", size = 77522 }, + { url = "https://files.pythonhosted.org/packages/07/44/359e4724a92369b88dbf09878a7cde7393cf3da885567ea898e5904049a3/wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136", size = 84780 }, + { url = "https://files.pythonhosted.org/packages/88/8f/706f2fee019360cc1da652353330350c76aa5746b4e191082e45d6838faf/wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d", size = 35335 }, + { url = "https://files.pythonhosted.org/packages/19/2b/548d23362e3002ebbfaefe649b833fa43f6ca37ac3e95472130c4b69e0b4/wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2", size = 37528 }, + { url = "https://files.pythonhosted.org/packages/fd/03/c188ac517f402775b90d6f312955a5e53b866c964b32119f2ed76315697e/wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09", size = 37313 }, + { url = "https://files.pythonhosted.org/packages/0f/16/ea627d7817394db04518f62934a5de59874b587b792300991b3c347ff5e0/wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d", size = 38164 }, + { url = "https://files.pythonhosted.org/packages/7f/a7/f1212ba098f3de0fd244e2de0f8791ad2539c03bef6c05a9fcb03e45b089/wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389", size = 80890 }, + { url = "https://files.pythonhosted.org/packages/b7/96/bb5e08b3d6db003c9ab219c487714c13a237ee7dcc572a555eaf1ce7dc82/wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060", size = 73118 }, + { url = "https://files.pythonhosted.org/packages/6e/52/2da48b35193e39ac53cfb141467d9f259851522d0e8c87153f0ba4205fb1/wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1", size = 80746 }, + { url = "https://files.pythonhosted.org/packages/11/fb/18ec40265ab81c0e82a934de04596b6ce972c27ba2592c8b53d5585e6bcd/wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3", size = 85668 }, + { url = "https://files.pythonhosted.org/packages/0f/ef/0ecb1fa23145560431b970418dce575cfaec555ab08617d82eb92afc7ccf/wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956", size = 78556 }, + { url = "https://files.pythonhosted.org/packages/25/62/cd284b2b747f175b5a96cbd8092b32e7369edab0644c45784871528eb852/wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d", size = 85712 }, + { url = "https://files.pythonhosted.org/packages/e5/a7/47b7ff74fbadf81b696872d5ba504966591a3468f1bc86bca2f407baef68/wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362", size = 35327 }, + { url = "https://files.pythonhosted.org/packages/cf/c3/0084351951d9579ae83a3d9e38c140371e4c6b038136909235079f2e6e78/wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89", size = 37523 }, + { url = "https://files.pythonhosted.org/packages/92/17/224132494c1e23521868cdd57cd1e903f3b6a7ba6996b7b8f077ff8ac7fe/wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b", size = 37614 }, + { url = "https://files.pythonhosted.org/packages/6a/d7/cfcd73e8f4858079ac59d9db1ec5a1349bc486ae8e9ba55698cc1f4a1dff/wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36", size = 38316 }, + { url = "https://files.pythonhosted.org/packages/7e/79/5ff0a5c54bda5aec75b36453d06be4f83d5cd4932cc84b7cb2b52cee23e2/wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73", size = 86322 }, + { url = "https://files.pythonhosted.org/packages/c4/81/e799bf5d419f422d8712108837c1d9bf6ebe3cb2a81ad94413449543a923/wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809", size = 79055 }, + { url = "https://files.pythonhosted.org/packages/62/62/30ca2405de6a20448ee557ab2cd61ab9c5900be7cbd18a2639db595f0b98/wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b", size = 87291 }, + { url = "https://files.pythonhosted.org/packages/49/4e/5d2f6d7b57fc9956bf06e944eb00463551f7d52fc73ca35cfc4c2cdb7aed/wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81", size = 90374 }, + { url = "https://files.pythonhosted.org/packages/a6/9b/c2c21b44ff5b9bf14a83252a8b973fb84923764ff63db3e6dfc3895cf2e0/wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9", size = 83896 }, + { url = "https://files.pythonhosted.org/packages/14/26/93a9fa02c6f257df54d7570dfe8011995138118d11939a4ecd82cb849613/wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c", size = 91738 }, + { url = "https://files.pythonhosted.org/packages/a2/5b/4660897233eb2c8c4de3dc7cefed114c61bacb3c28327e64150dc44ee2f6/wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc", size = 35568 }, + { url = "https://files.pythonhosted.org/packages/5c/cc/8297f9658506b224aa4bd71906447dea6bb0ba629861a758c28f67428b91/wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8", size = 37653 }, + { url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362 }, +]