LookupError when trying to augment data with SynonymAug #332

moonman239 · 2023-03-08T01:12:43Z

When I try to run this code:

#augment data
import importlib
import os
import nltk
os.environ["MODEL_DIR"] = '../model'
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc

from nlpaug.util import Action

aug = naw.SynonymAug(aug_src='wordnet', lang='spa')
print(aug.augment("Hola Mundo"))

I get this error (with traceback):

---------------------------------------------------------------------------
LookupError                               Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __load(self)
     79             except LookupError as e:
---> 80                 try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
     81                 except LookupError: raise e

/opt/conda/lib/python3.7/site-packages/nltk/data.py in find(resource_name, paths)
    652     resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
--> 653     raise LookupError(resource_not_found)
    654 

LookupError: 
**********************************************************************
  Resource 'corpora/wordnet.zip/wordnet/.zip/' not found.  Please
  use the NLTK Downloader to obtain the resource:  >>>
  nltk.download()
  Searched in:
    - '/root/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

During handling of the above exception, another exception occurred:

LookupError                               Traceback (most recent call last)
/tmp/ipykernel_27/2829682906.py in <module>
     12 
     13 aug = naw.SynonymAug(aug_src='wordnet', lang='spa')
---> 14 print(aug.augment("Hola Mundo"))
     15 print("augmenting")
     16 augmented_titles = df["title"].apply(aug.augment)

/opt/conda/lib/python3.7/site-packages/nlpaug/base_augmenter.py in augment(self, data, n, num_thread)
    117                 # Single Thread
    118                 if num_thread == 1:
--> 119                     augmented_results = [action_fx(clean_data) for _ in range(n)]
    120 
    121                 # Multi Thread

/opt/conda/lib/python3.7/site-packages/nlpaug/base_augmenter.py in <listcomp>(.0)
    117                 # Single Thread
    118                 if num_thread == 1:
--> 119                     augmented_results = [action_fx(clean_data) for _ in range(n)]
    120 
    121                 # Multi Thread

/opt/conda/lib/python3.7/site-packages/nlpaug/augmenter/word/synonym.py in substitute(self, data)
    137             else:
    138                 for word_pos in word_poses:
--> 139                     candidates.extend(self.model.predict(pos[aug_idx][0], pos=word_pos))
    140 
    141             candidates = [c for c in candidates if c.lower() != original_token.lower()]

/opt/conda/lib/python3.7/site-packages/nlpaug/model/word_dict/wordnet.py in predict(self, word, pos)
     44     def predict(self, word, pos=None):
     45         results = []
---> 46         for synonym in self.model.synsets(word, pos=pos, lang=self.lang):
     47             for lemma in synonym.lemmas(lang=self.lang):
     48                 if self.is_synonym:

/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __getattr__(self, attr)
    114             raise AttributeError("LazyCorpusLoader object has no attribute '__bases__'")
    115 
--> 116         self.__load()
    117         # This looks circular, but its not, since __load() changes our
    118         # __class__ to something new:

/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __load(self)
     79             except LookupError as e:
     80                 try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
---> 81                 except LookupError: raise e
     82 
     83         # Load the corpus.

/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __load(self)
     76         else:
     77             try:
---> 78                 root = nltk.data.find('{}/{}'.format(self.subdir, self.__name))
     79             except LookupError as e:
     80                 try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))

/opt/conda/lib/python3.7/site-packages/nltk/data.py in find(resource_name, paths)
    651     sep = '*' * 70
    652     resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
--> 653     raise LookupError(resource_not_found)
    654 
    655 

LookupError: 
**********************************************************************
  Resource 'corpora/wordnet' not found.  Please use the NLTK
  Downloader to obtain the resource:  >>> nltk.download()
  Searched in:
    - '/root/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

nltk version is >= 3.4.5

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

LookupError when trying to augment data with SynonymAug #332

LookupError when trying to augment data with SynonymAug #332

moonman239 commented Mar 8, 2023

LookupError when trying to augment data with SynonymAug #332

LookupError when trying to augment data with SynonymAug #332

Comments

moonman239 commented Mar 8, 2023