Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LookupError when trying to augment data with SynonymAug #332

Open
moonman239 opened this issue Mar 8, 2023 · 0 comments
Open

LookupError when trying to augment data with SynonymAug #332

moonman239 opened this issue Mar 8, 2023 · 0 comments

Comments

@moonman239
Copy link

When I try to run this code:

#augment data
import importlib
import os
import nltk
os.environ["MODEL_DIR"] = '../model'
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc

from nlpaug.util import Action

aug = naw.SynonymAug(aug_src='wordnet', lang='spa')
print(aug.augment("Hola Mundo"))

I get this error (with traceback):

---------------------------------------------------------------------------
LookupError                               Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __load(self)
     79             except LookupError as e:
---> 80                 try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
     81                 except LookupError: raise e

/opt/conda/lib/python3.7/site-packages/nltk/data.py in find(resource_name, paths)
    652     resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
--> 653     raise LookupError(resource_not_found)
    654 

LookupError: 
**********************************************************************
  Resource 'corpora/wordnet.zip/wordnet/.zip/' not found.  Please
  use the NLTK Downloader to obtain the resource:  >>>
  nltk.download()
  Searched in:
    - '/root/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

During handling of the above exception, another exception occurred:

LookupError                               Traceback (most recent call last)
/tmp/ipykernel_27/2829682906.py in <module>
     12 
     13 aug = naw.SynonymAug(aug_src='wordnet', lang='spa')
---> 14 print(aug.augment("Hola Mundo"))
     15 print("augmenting")
     16 augmented_titles = df["title"].apply(aug.augment)

/opt/conda/lib/python3.7/site-packages/nlpaug/base_augmenter.py in augment(self, data, n, num_thread)
    117                 # Single Thread
    118                 if num_thread == 1:
--> 119                     augmented_results = [action_fx(clean_data) for _ in range(n)]
    120 
    121                 # Multi Thread

/opt/conda/lib/python3.7/site-packages/nlpaug/base_augmenter.py in <listcomp>(.0)
    117                 # Single Thread
    118                 if num_thread == 1:
--> 119                     augmented_results = [action_fx(clean_data) for _ in range(n)]
    120 
    121                 # Multi Thread

/opt/conda/lib/python3.7/site-packages/nlpaug/augmenter/word/synonym.py in substitute(self, data)
    137             else:
    138                 for word_pos in word_poses:
--> 139                     candidates.extend(self.model.predict(pos[aug_idx][0], pos=word_pos))
    140 
    141             candidates = [c for c in candidates if c.lower() != original_token.lower()]

/opt/conda/lib/python3.7/site-packages/nlpaug/model/word_dict/wordnet.py in predict(self, word, pos)
     44     def predict(self, word, pos=None):
     45         results = []
---> 46         for synonym in self.model.synsets(word, pos=pos, lang=self.lang):
     47             for lemma in synonym.lemmas(lang=self.lang):
     48                 if self.is_synonym:

/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __getattr__(self, attr)
    114             raise AttributeError("LazyCorpusLoader object has no attribute '__bases__'")
    115 
--> 116         self.__load()
    117         # This looks circular, but its not, since __load() changes our
    118         # __class__ to something new:

/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __load(self)
     79             except LookupError as e:
     80                 try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
---> 81                 except LookupError: raise e
     82 
     83         # Load the corpus.

/opt/conda/lib/python3.7/site-packages/nltk/corpus/util.py in __load(self)
     76         else:
     77             try:
---> 78                 root = nltk.data.find('{}/{}'.format(self.subdir, self.__name))
     79             except LookupError as e:
     80                 try: root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))

/opt/conda/lib/python3.7/site-packages/nltk/data.py in find(resource_name, paths)
    651     sep = '*' * 70
    652     resource_not_found = '\n%s\n%s\n%s' % (sep, msg, sep)
--> 653     raise LookupError(resource_not_found)
    654 
    655 

LookupError: 
**********************************************************************
  Resource 'corpora/wordnet' not found.  Please use the NLTK
  Downloader to obtain the resource:  >>> nltk.download()
  Searched in:
    - '/root/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

nltk version is >= 3.4.5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant