Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* fix

* fix mistral
  • Loading branch information
zucchini-nlp authored Oct 29, 2024
1 parent 808d6c5 commit 63ca6d9
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/transformers/generation/flax_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ def generate(
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)"
)
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
else: # by default let's always generate 10 new tokens
generation_config.max_length = generation_config.max_length + input_ids_seq_length

if generation_config.min_length is not None and generation_config.min_length > generation_config.max_length:
raise ValueError(
Expand Down
4 changes: 4 additions & 0 deletions tests/generation/test_flax_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def test_greedy_generate_pt_fx(self):
pt_model = pt_model_class(config).eval()
pt_model = load_flax_weights_in_pytorch_model(pt_model, flax_model.params)

# Generate max 5 tokens only otherwise seems to be numerical error accumulation
pt_model.generation_config.max_length = 5
flax_model.generation_config.max_length = 5

flax_generation_outputs = flax_model.generate(input_ids).sequences
pt_generation_outputs = pt_model.generate(torch.tensor(input_ids, dtype=torch.long))

Expand Down
2 changes: 1 addition & 1 deletion tests/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3002,7 +3002,7 @@ def test_inputs_embeds_matches_input_ids(self):

def test_inputs_embeds_matches_input_ids_with_generate(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
for model_class in self.all_generative_model_classes:
if model_class.__name__ not in [
*get_values(MODEL_FOR_CAUSAL_LM_MAPPING_NAMES),
*get_values(MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES),
Expand Down

0 comments on commit 63ca6d9

Please sign in to comment.