Skip to content

Commit

Permalink
warning update
Browse files Browse the repository at this point in the history
Signed-off-by: Praateek <praateekm@gmail.com>
  • Loading branch information
praateekmahajan committed Nov 18, 2024
1 parent 76d7b4d commit 75eca85
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions nemo_curator/modules/fuzzy_dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,9 @@ def minhash32(

if not MINHASH_PERMUTED_AVAILABLE:
warnings.warn(
"Using an outdated minhash implementation, please update to cuDF version 24.12"
" or later for improved performance",
"Using an outdated minhash implementation, please update to cuDF version 24.12 "
"or later for improved performance. "
"Install the latest version of cuDF using `pip install curator[cuda12x_nighlty]`",
category=FutureWarning,
)
seeds = cudf.Series(seeds, dtype="uint32")
Expand All @@ -198,8 +199,11 @@ def minhash64(
if not isinstance(ser, cudf.Series):
raise TypeError("Expected data of type cudf.Series")
if not MINHASH_PERMUTED_AVAILABLE:
self._logger.warning(
"Using an older implementation of minhash, update to cudf >= 24.12"
warnings.warn(
"Using an outdated minhash implementation, please update to cuDF version 24.12 "
"or later for improved performance. "
"Install the latest version of cuDF using `pip install curator[cuda12x_nighlty]`",
category=FutureWarning,
)
seeds = cudf.Series(seeds, dtype="uint64")
return ser.str.minhash64(seeds=seeds, width=char_ngram)
Expand Down

0 comments on commit 75eca85

Please sign in to comment.