Skip to content

Commit

Permalink
FIX: remove imblearn (#665)
Browse files Browse the repository at this point in the history
* FIX: remove imblearn

* FIX: remove config option

* FIX: remove oversample test
  • Loading branch information
benleetownsend authored Nov 18, 2021
1 parent fbf8310 commit 31fe2ef
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 29 deletions.
2 changes: 0 additions & 2 deletions finetune/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ class Settings(dict):
:param num_layers_trained: How many layers to finetune. Specifying a value less than model's number of layers will train layers starting from model output. Defaults to `12`.
:param train_embeddings: Should embedding layer be finetuned? Defaults to `True`.
:param class_weights: One of 'log', 'linear', or 'sqrt'. Auto-scales gradient updates based on class frequency. Can also be a dictionary that maps from true class name to loss coefficient. Defaults to `None`.
:param oversample: Should rare classes be oversampled? Defaults to `False`.
:param eval_acc: if True, calculates accuracy and writes it to the tensorboard summary files for valudation runs.
:param save_dtype: specifies what precision to save model weights with. Defaults to `np.float32`.
:param regression_loss: the loss to use for regression models. One of `L1` or `L2`, defaults to `L2`.
Expand Down Expand Up @@ -244,7 +243,6 @@ def get_default_config():
#
# Class Imbalance
class_weights=None,
oversample=False,
#
# Optimization Params
optimizer="AdamW",
Expand Down
16 changes: 0 additions & 16 deletions finetune/target_models/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from imblearn.over_sampling import RandomOverSampler
from sklearn.utils import shuffle

from finetune.base import BaseModel
Expand All @@ -16,21 +15,6 @@


class ClassificationPipeline(BasePipeline):
def resampling(self, Xs, Y, context=None):
if context is not None:
if self.config.oversample:
idxs, Ys, contexts = shuffle(
*RandomOverSampler().fit_sample([[i] for i in range(len(Xs))], Y, context)
)
return [Xs[i[0]] for i in idxs], Ys, contexts
return Xs, Y, context
else:
if self.config.oversample:
idxs, Ys = shuffle(
*RandomOverSampler().fit_sample([[i] for i in range(len(Xs))], Y)
)
return [Xs[i[0]] for i in idxs], Ys, None
return Xs, Y, None

def _target_encoder(self):
return OneHotLabelEncoder()
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ pytest>=3.6.3
h5py>=2.8.0
joblib>=0.12.0
bs4>=0.0.1
imbalanced-learn>=0.6.0,<0.7.0
nltk>=3.2.4
regex>=2019.03.12
lxml>=4.3.3
Expand Down
10 changes: 0 additions & 10 deletions tests/test_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,6 @@ def test_fit_predict_low_memory(self):
for proba in probabilities:
self.assertIsInstance(proba, dict)

def test_oversample(self):
"""
Ensure model training does not error out when oversampling is set to True
"""

model = Classifier(**self.default_config())
model.config.oversample = True
train_sample = self.dataset.sample(n=self.n_sample)
model.fit(train_sample.Text.values, train_sample.Target.values)

def test_class_weights(self):
# testing class weights
train_sample = self.dataset.sample(n=self.n_sample * 3)
Expand Down

0 comments on commit 31fe2ef

Please sign in to comment.