Skip to content

Commit

Permalink
Fix pre-commit and re-ran linters/fixers
Browse files Browse the repository at this point in the history
  • Loading branch information
WarmCyan committed Aug 30, 2022
1 parent f1d5e88 commit 656f481
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ sphinx/.buildinfo
tx2.egg-info
dist/*
build/*
data/*
data/*
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ repos:
- id: trailing-whitespace
- id: flake8
- repo: https://github.com/psf/black
rev: 19.3b0
rev: 22.6.0
hooks:
- id: black
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Updating example jupyter notebooks to use new versions of packages.
- Datasources in jupyter example notebooks.

### Fixed
### Fixed
- Updated to patched numpy version 1.22.
- Potential issue in calc.frequent_words_in_cluster() where clusters of empty
string values would stop computation.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_wrapper_prepare_no_crash(

wrapper.prepare(umap_args=dict(n_neighbors=2))


def test_wrapper_np_prepare_no_crash(
dummy_np_data, dummy_encodings, dummy_model, clear_files_teardown
):
Expand Down
183 changes: 182 additions & 1 deletion tx2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,184 @@
__version__ = "1.1.0"

STOPWORDS = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't", "via"]
STOPWORDS = [
"i",
"me",
"my",
"myself",
"we",
"our",
"ours",
"ourselves",
"you",
"you're",
"you've",
"you'll",
"you'd",
"your",
"yours",
"yourself",
"yourselves",
"he",
"him",
"his",
"himself",
"she",
"she's",
"her",
"hers",
"herself",
"it",
"it's",
"its",
"itself",
"they",
"them",
"their",
"theirs",
"themselves",
"what",
"which",
"who",
"whom",
"this",
"that",
"that'll",
"these",
"those",
"am",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"having",
"do",
"does",
"did",
"doing",
"a",
"an",
"the",
"and",
"but",
"if",
"or",
"because",
"as",
"until",
"while",
"of",
"at",
"by",
"for",
"with",
"about",
"against",
"between",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"to",
"from",
"up",
"down",
"in",
"out",
"on",
"off",
"over",
"under",
"again",
"further",
"then",
"once",
"here",
"there",
"when",
"where",
"why",
"how",
"all",
"any",
"both",
"each",
"few",
"more",
"most",
"other",
"some",
"such",
"no",
"nor",
"not",
"only",
"own",
"same",
"so",
"than",
"too",
"very",
"s",
"t",
"can",
"will",
"just",
"don",
"don't",
"should",
"should've",
"now",
"d",
"ll",
"m",
"o",
"re",
"ve",
"y",
"ain",
"aren",
"aren't",
"couldn",
"couldn't",
"didn",
"didn't",
"doesn",
"doesn't",
"hadn",
"hadn't",
"hasn",
"hasn't",
"haven",
"haven't",
"isn",
"isn't",
"ma",
"mightn",
"mightn't",
"mustn",
"mustn't",
"needn",
"needn't",
"shan",
"shan't",
"shouldn",
"shouldn't",
"wasn",
"wasn't",
"weren",
"weren't",
"won",
"won't",
"wouldn",
"wouldn't",
"via",
]
12 changes: 6 additions & 6 deletions tx2/calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def frequent_words_in_cluster(
:return: A list of tuples, each tuple containing the word and the number of times
it appears in that cluster.
"""
freq_words = [("",0)]
freq_words = [("", 0)]
try:
counter = CountVectorizer(stop_words=STOPWORDS)
cv_fit = counter.fit_transform(texts)
Expand All @@ -146,8 +146,10 @@ def frequent_words_in_cluster(
key=lambda x: x[1],
reverse=True,
)
except ValueError as e:
logging.warning(f"ValueError in frequent_words_in_cluster. Could be caused by cluster of empty text.")
except ValueError:
logging.warning(
"ValueError in frequent_words_in_cluster. Could be caused by cluster of empty text."
)
finally:
return freq_words

Expand Down Expand Up @@ -186,9 +188,7 @@ def frequent_words_by_class_in_cluster(
# iterate through each classification and get the number of entries with that word in it
for classification in encodings.values():
local_df = working_df[working_df.target == classification]
counter = CountVectorizer(
stop_words=STOPWORDS, vocabulary=vocab
)
counter = CountVectorizer(stop_words=STOPWORDS, vocabulary=vocab)
cv_fit = counter.fit_transform(local_df.text.values)
class_freq_words = list(
zip(counter.get_feature_names_out(), cv_fit.toarray().sum(axis=0))
Expand Down
1 change: 0 additions & 1 deletion tx2/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import logging
import numpy as np
from torch import cuda, has_mps

Expand Down
12 changes: 3 additions & 9 deletions tx2/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
import itertools
import math
from typing import Dict, List, Union
import logging

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import clear_output, display
from matplotlib.lines import Line2D
from sklearn.metrics import confusion_matrix
from wordcloud import WordCloud
Expand Down Expand Up @@ -384,8 +382,8 @@ def plot_embedding_projections(text, dashboard, prediction=None):

# if text differs from the selected index, render new point
if (
dashboard.prior_reference_point is None or
dashboard.prior_reference_text != text
dashboard.prior_reference_point is None
or dashboard.prior_reference_text != text
):
text_projection = dashboard.transformer_wrapper.project([text])[0]
if prediction is None:
Expand Down Expand Up @@ -471,11 +469,7 @@ def plot_embedding_projections(text, dashboard, prediction=None):
fmt = InteractiveShell.instance().display_formatter.format
data, metadata = fmt(fig)
dashboard.out_projection_scatter.outputs = (
{
'output_type': 'display_data',
'data': data,
'metadata': metadata
},
{"output_type": "display_data", "data": data, "metadata": metadata},
)

dashboard.html_graph_status.value = (
Expand Down
4 changes: 2 additions & 2 deletions tx2/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ class is being used and has a layer representing the output of just the language
<https://huggingface.co/transformers/main_classes/tokenizer.html>`_. Note that **this
argument is not required**, if the user intends to manually specify encode and
classification functions.
:param device: Set the device for pytorch to place tensors on, pass either "cpu",
"cuda", or "mps". This variable is used by the default embedding function.
:param device: Set the device for pytorch to place tensors on, pass either "cpu",
"cuda", or "mps". This variable is used by the default embedding function.
If unspecified, "cuda" or "mps" will be used if GPU is found, otherwise it defaults to "cpu".
:param cache_path: The directory path to cache intermediate outputs from the
:meth:`tx2.wrapper.Wrapper.prepare` function. This allows the wrapper to precompute
Expand Down

0 comments on commit 656f481

Please sign in to comment.