Skip to content

Commit

Permalink
Cross-encoder: remove some unused and tidy up some comments
Browse files Browse the repository at this point in the history
Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
  • Loading branch information
markstur committed Sep 12, 2024
1 parent 2cb6183 commit 8fa67cc
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions caikit_nlp/modules/text_embedding/crossencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,19 +514,17 @@ def _truncation_needed(self, encoding, texts):

# Find the last offset by counting attn masks
# and keeping the last non-zero offset end.
token_count = 0
index = 0 # index of longest
type_id = 0 # track type_id of longest

for n, attn in enumerate(attn_mask):
if attn == 1:
token_count += 1
end = offsets[n][1] # Index to end character from offset
if end > index: # Grab last non-zero end index (ensures increasing too)
type_id = type_ids[n]
index = end
end_index = index # longest
end_typeid = type_id # longest
end_index = index # longest last char index
end_typeid = type_id # longest type (query or text)

# If last token offset is before the last char, then it was truncated
return end_index < len(texts[end_typeid].strip())
Expand Down Expand Up @@ -629,8 +627,8 @@ def predict(
if truncation_needed_indexes:
self.raise_truncation_error(max_len, truncation_needed_indexes)

# # We cannot send offset_mapping to the model with features,
# # but we needed offset_mapping for other uses.
# We cannot send offset_mapping to the model with features,
# but we needed offset_mapping for other uses.
if "offset_mapping" in features:
del features["offset_mapping"]

Expand Down

0 comments on commit 8fa67cc

Please sign in to comment.