Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unable to run quick_start_demo #64

Open
nikogamulin opened this issue Dec 24, 2023 · 0 comments
Open

Unable to run quick_start_demo #64

nikogamulin opened this issue Dec 24, 2023 · 0 comments

Comments

@nikogamulin
Copy link

Hi,

When I run the second cell, I get the following error:

`---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[2], line 5
2 from papermage.recipes import CoreRecipe
3 fixture_path = pathlib.Path(pwd).parent / "tests/fixtures"
----> 5 recipe = CoreRecipe()
6 doc = recipe.run(fixture_path / "papermage.pdf")

File ~/workspace/papermage/papermage/recipes/core_recipe.py:94, in CoreRecipe.init(self, ivila_predictor_path, bio_roberta_predictor_path, svm_word_predictor_path, dpi)
92 with warnings.catch_warnings():
93 warnings.simplefilter("ignore")
---> 94 self.word_predictor = SVMWordPredictor.from_path(svm_word_predictor_path)
96 self.publaynet_block_predictor = LPEffDetPubLayNetBlockPredictor.from_pretrained()
97 self.ivila_predictor = IVILATokenClassificationPredictor.from_pretrained(ivila_predictor_path)

File ~/workspace/papermage/papermage/predictors/word_predictors.py:227, in SVMWordPredictor.from_path(cls, tar_path)
225 @classmethod
226 def from_path(cls, tar_path: str):
--> 227 classifier = SVMClassifier.from_path(tar_path=tar_path)
228 predictor = SVMWordPredictor(classifier=classifier)
229 return predictor

File ~/workspace/papermage/papermage/predictors/word_predictors.py:107, in SVMClassifier.from_path(cls, tar_path)
105 with tarfile.open(tar_path, "r:gz") as tar:
106 tar.extractall(path=tmp_dir)
--> 107 return cls.from_directory(tmp_dir)

File ~/workspace/papermage/papermage/predictors/word_predictors.py:111, in SVMClassifier.from_directory(cls, dir)
109 @classmethod
110 def from_directory(cls, dir: str):
--> 111 classifier = SVMClassifier.from_paths(
112 ohe_encoder_path=os.path.join(dir, "svm_word_predictor/ohencoder.joblib"),
113 scaler_path=os.path.join(dir, "svm_word_predictor/scaler.joblib"),
114 estimator_path=os.path.join(dir, "svm_word_predictor/hyphen_clf.joblib"),
115 unigram_probs_path=os.path.join(dir, "svm_word_predictor/unigram_probs.pkl"),
116 )
117 return classifier

File ~/workspace/papermage/papermage/predictors/word_predictors.py:128, in SVMClassifier.from_paths(cls, ohe_encoder_path, scaler_path, estimator_path, unigram_probs_path)
119 @classmethod
120 def from_paths(
121 cls,
(...)
125 unigram_probs_path: str,
126 ):
127 ohe_encoder = load(ohe_encoder_path)
--> 128 scaler = load(scaler_path)
129 estimator = load(estimator_path)
130 unigram_probs = load(unigram_probs_path)

File ~/anaconda3/envs/papermage/lib/python3.11/site-packages/joblib/numpy_pickle.py:587, in load(filename, mmap_mode)
581 if isinstance(fobj, str):
582 # if the returned file object is a string, this means we
583 # try to load a pickle file generated with an version of
584 # Joblib so we load it with joblib compatibility function.
585 return load_compatibility(fobj)
--> 587 obj = _unpickle(fobj, filename, mmap_mode)
588 return obj

File ~/anaconda3/envs/papermage/lib/python3.11/site-packages/joblib/numpy_pickle.py:506, in _unpickle(fobj, filename, mmap_mode)
504 obj = None
505 try:
--> 506 obj = unpickler.load()
507 if unpickler.compat_mode:
508 warnings.warn("The file '%s' has been generated with a "
509 "joblib version less than 0.10. "
510 "Please regenerate this pickle file."
511 % filename,
512 DeprecationWarning, stacklevel=3)

File ~/anaconda3/envs/papermage/lib/python3.11/pickle.py:1213, in _Unpickler.load(self)
1211 raise EOFError
1212 assert isinstance(key, bytes_types)
-> 1213 dispatchkey[0]
1214 except _Stop as stopinst:
1215 return stopinst.value

KeyError: 173`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant