Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RuntimeError: probability tensor contains either inf, nan or element < 0 #9

Open
yisampi opened this issue Jul 24, 2023 · 6 comments
Open

Comments

@yisampi
Copy link

yisampi commented Jul 24, 2023

$ python scripts/inference/inference.py --visualcla_model visualcla --image_file pics/examples/food.jpg --load_in_8bit
[INFO|tokenization_utils_base.py:1837] 2023-07-24 16:05:27,669 >> loading file tokenizer.model
[INFO|tokenization_utils_base.py:1837] 2023-07-24 16:05:27,669 >> loading file added_tokens.json
[INFO|tokenization_utils_base.py:1837] 2023-07-24 16:05:27,669 >> loading file special_tokens_map.json
[INFO|tokenization_utils_base.py:1837] 2023-07-24 16:05:27,669 >> loading file tokenizer_config.json
[WARNING|logging.py:295] 2023-07-24 16:05:27,670 >> You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at huggingface/transformers#24565
[INFO|tokenization_utils.py:426] 2023-07-24 16:05:27,697 >> Adding to the vocabulary
[INFO|tokenization_utils.py:426] 2023-07-24 16:05:27,697 >> Adding to the vocabulary
[INFO|tokenization_utils.py:426] 2023-07-24 16:05:27,697 >> Adding to the vocabulary
[INFO|tokenization_utils.py:426] 2023-07-24 16:05:27,697 >> Adding <img_token> to the vocabulary
2023-07-24 16:05:27,698 - INFO - visualcla.modeling_utils - Init VisualCLA model from pretrained
[INFO|configuration_utils.py:710] 2023-07-24 16:05:27,698 >> loading configuration file visualcla/config.json
[INFO|configuration_utils.py:768] 2023-07-24 16:05:27,699 >> Model config VisualCLAConfig {
"image_size": 224,
"initializer_range": 0.02,
"layer_norm_eps": 1e-12,
"model_type": "visualcla",
"text_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": [
"LlamaForCausalLM"
],
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": 1,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "silu",
"hidden_size": 4096,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"initializer_range": 0.02,
"intermediate_size": 11008,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 2048,
"min_length": 0,
"model_type": "llama",
"no_repeat_ngram_size": 0,
"num_attention_heads": 32,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 0,
"prefix": null,
"pretraining_tp": 1,
"problem_type": null,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": "float16",
"torchscript": false,
"transformers_version": "4.31.0",
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 49954
},
"tie_word_embeddings": false,
"transformers_version": "4.31.0",
"use_visual_resampler": true,
"vision_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.0,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "quick_gelu",
"hidden_size": 1024,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "clip_vision_model",
"no_repeat_ngram_size": 0,
"num_attention_heads": 16,
"num_beam_groups": 1,
"num_beams": 1,
"num_channels": 3,
"num_hidden_layers": 24,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 14,
"prefix": null,
"problem_type": null,
"projection_dim": 768,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"transformers_version": "4.31.0",
"typical_p": 1.0,
"use_bfloat16": false
},
"visual_resampler_config": {
"hidden_size": 1024,
"intermediate_size": 4096,
"num_attention_heads": 16,
"num_hidden_layers": 6,
"num_query_tokens": 64
},
"vocab_size": 49958
}

[INFO|configuration_utils.py:710] 2023-07-24 16:05:27,844 >> loading configuration file visualcla/text_encoder/config.json
[INFO|configuration_utils.py:768] 2023-07-24 16:05:27,845 >> Model config LlamaConfig {
"_name_or_path": "chinese-alpaca-plus-7b",
"architectures": [
"LlamaForCausalLM"
],
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 2048,
"model_type": "llama",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"num_key_value_heads": 32,
"pad_token_id": 0,
"pretraining_tp": 1,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"tie_word_embeddings": false,
"torch_dtype": "float16",
"transformers_version": "4.31.0",
"use_cache": true,
"vocab_size": 49954
}

[INFO|modeling_utils.py:2600] 2023-07-24 16:05:27,845 >> loading weights file visualcla/text_encoder/pytorch_model.bin.index.json
[INFO|modeling_utils.py:1172] 2023-07-24 16:05:27,845 >> Instantiating LlamaForCausalLM model under default dtype torch.float16.
[INFO|configuration_utils.py:599] 2023-07-24 16:05:27,846 >> Generate config GenerationConfig {
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 0,
"transformers_version": "4.31.0"
}

[INFO|modeling_utils.py:2715] 2023-07-24 16:05:28,053 >> Detected 8-bit loading: activating 8-bit loading for this model
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:08<00:00, 4.24s/it]
[INFO|modeling_utils.py:3329] 2023-07-24 16:05:44,119 >> All model checkpoint weights were used when initializing LlamaForCausalLM.

[INFO|modeling_utils.py:3337] 2023-07-24 16:05:44,119 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at visualcla/text_encoder.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
[INFO|configuration_utils.py:559] 2023-07-24 16:05:44,122 >> loading configuration file visualcla/text_encoder/generation_config.json
[INFO|configuration_utils.py:599] 2023-07-24 16:05:44,122 >> Generate config GenerationConfig {
"_from_model_config": true,
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 0,
"transformers_version": "4.31.0"
}

[INFO|configuration_utils.py:710] 2023-07-24 16:05:44,188 >> loading configuration file visualcla/vision_encoder/config.json
[INFO|configuration_utils.py:768] 2023-07-24 16:05:44,188 >> Model config CLIPVisionConfig {
"_name_or_path": "clip-vit-large-patch14",
"architectures": [
"CLIPVisionModel"
],
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "quick_gelu",
"hidden_size": 1024,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 24,
"patch_size": 14,
"projection_dim": 768,
"torch_dtype": "float16",
"transformers_version": "4.31.0"
}

[INFO|modeling_utils.py:2600] 2023-07-24 16:05:44,188 >> loading weights file visualcla/vision_encoder/pytorch_model.bin
[INFO|modeling_utils.py:1172] 2023-07-24 16:05:44,483 >> Instantiating CLIPVisionModel model under default dtype torch.float16.
[INFO|modeling_utils.py:3329] 2023-07-24 16:05:45,066 >> All model checkpoint weights were used when initializing CLIPVisionModel.

[INFO|modeling_utils.py:3337] 2023-07-24 16:05:45,066 >> All the weights of CLIPVisionModel were initialized from the model checkpoint at visualcla/vision_encoder.
If your task is similar to the task the model of the checkpoint was trained on, you can already use CLIPVisionModel for predictions without further training.
[INFO|image_processing_utils.py:337] 2023-07-24 16:05:46,059 >> loading configuration file visualcla/preprocessor_config.json
[INFO|image_processing_utils.py:389] 2023-07-24 16:05:46,059 >> Image processor CLIPImageProcessor {
"crop_size": {
"height": 224,
"width": 224
},
"do_center_crop": true,
"do_convert_rgb": true,
"do_normalize": true,
"do_rescale": true,
"do_resize": true,
"feature_extractor_type": "CLIPFeatureExtractor",
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_processor_type": "CLIPImageProcessor",
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"resample": 3,
"rescale_factor": 0.00392156862745098,
"size": {
"shortest_edge": 224
}
}

2023-07-24 16:05:46,062 - INFO - main - *** Start Inference ***

========== Usage ==========

Start Inference with instruction mode.
You can enter instruction or special control commands after '>'. Below are the usage of the control commands

change image:[image_path] load the image from [image_path]
clear Clear chat history. This command will not change the image.
exit Exit Inference

Image: pics/examples/food.jpg

图片中有哪些食物
Traceback (most recent call last):
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/scripts/inference/inference.py", line 119, in
main()
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/scripts/inference/inference.py", line 110, in main
response, history = visualcla.chat(model, image=image_path, text=text, history=history)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/models/visualcla/modeling_utils.py", line 167, in chat
outputs = model.generate(
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/models/visualcla/modeling_visualcla.py", line 382, in generate
outputs = self.text_model.generate(
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/transformers/generation/utils.py", line 1588, in generate
return self.sample(
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/transformers/generation/utils.py", line 2678, in sample
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
RuntimeError: probability tensor contains either inf, nan or element < 0

@airaria
Copy link
Owner

airaria commented Jul 24, 2023

我运行colab的notebook没有问题。
是不是transformers版本问题?安装4.30.2试试?

@yisampi
Copy link
Author

yisampi commented Jul 25, 2023

还是报错 用的包是4.30.2的
========== Usage ==========

Start Inference with instruction mode.
You can enter instruction or special control commands after '>'. Below are the usage of the control commands

change image:[image_path] load the image from [image_path]
clear Clear chat history. This command will not change the image.
exit Exit Inference

Image: pics/examples/food.jpg

图片中有哪些食物
Traceback (most recent call last):
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/scripts/inference/inference.py", line 119, in
main()
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/scripts/inference/inference.py", line 110, in main
response, history = visualcla.chat(model, image=image_path, text=text, history=history)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/models/visualcla/modeling_utils.py", line 167, in chat
outputs = model.generate(
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/models/visualcla/modeling_visualcla.py", line 382, in generate
outputs = self.text_model.generate(
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/transformers/generation/utils.py", line 1572, in generate
return self.sample(
File "/home/yibo/Visual-Chinese-LLaMA-Alpaca/env/lib/python3.10/site-packages/transformers/generation/utils.py", line 2655, in sample
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
RuntimeError: probability tensor contains either inf, nan or element < 0

$ pip list|grep trans
transformers 4.30.2

@GoGoJoestar
Copy link
Collaborator

使用的模型是在4.30.2的transformers下合并的吗?可以查看下模型文件的sha256值是否一致。

model file SHA256
vcla/text_encoder/pytorch_model-00001-of-00002.bin 49b85640d6a7018f232480e9a456cb608b20cd8f8a57a3f0d012024b3e2f01ef
vcla/text_encoder/pytorch_model-00002-of-00002.bin b2acf3114832cb44ee364a2c5cd7cf79bc23e169027b0eecbbc7e9e8fbf5f16f
vcla/vision_encoder/pytorch_model.bin 0bda0cfbf762fadecabf497b2868e622ce2ab715fe0857bce753b182abf58efb
vcla/pytorch_model.bin ddf47efa9d28513f1b21d5aa0527277094c0bad3d9d099d9b11841a5b5605c6b

@yisampi
Copy link
Author

yisampi commented Jul 26, 2023

$ sha256sum pytorch_model-00001-of-00002.bin
49b85640d6a7018f232480e9a456cb608b20cd8f8a57a3f0d012024b3e2f01ef pytorch_model-00001-of-00002.bin
$ sha256sum pytorch_model-00002-of-00002.bin
b2acf3114832cb44ee364a2c5cd7cf79bc23e169027b0eecbbc7e9e8fbf5f16f pytorch_model-00002-of-00002.bin
$ sha256sum ../vision_encoder/pytorch_model.bin
0bda0cfbf762fadecabf497b2868e622ce2ab715fe0857bce753b182abf58efb ../vision_encoder/pytorch_model.bin
$ cd ..
$ sha256sum pytorch_model.bin
ddf47efa9d28513f1b21d5aa0527277094c0bad3d9d099d9b11841a5b5605c6b pytorch_model.bin

模型是在4.30.2的transformers下合并的 sha256值应该是一样

@GoGoJoestar
Copy link
Collaborator

GoGoJoestar commented Jul 26, 2023

试下不使用load_in_8bit,看能否正常运行?
这个issue的一些方法也可以参考一下

@yisampi
Copy link
Author

yisampi commented Jul 26, 2023

不知道为啥 换了台机器 重新来一遍就好了

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants