From c66716bcfce2270de3bd50f0b7d05590a96b6fc1 Mon Sep 17 00:00:00 2001 From: cranechu <1340390339@qq.com> Date: Thu, 12 Sep 2024 16:17:45 +0800 Subject: [PATCH 1/2] fix: add npu to ppl --- .../benchmark/harness/lm-evaluation-harness | 1 + .../dev/benchmark/perplexity/run_wikitext.py | 20 +++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) create mode 160000 python/llm/dev/benchmark/harness/lm-evaluation-harness diff --git a/python/llm/dev/benchmark/harness/lm-evaluation-harness b/python/llm/dev/benchmark/harness/lm-evaluation-harness new file mode 160000 index 00000000000..b281b0921b6 --- /dev/null +++ b/python/llm/dev/benchmark/harness/lm-evaluation-harness @@ -0,0 +1 @@ +Subproject commit b281b0921b636bc36ad05c0b0b0763bd6dd43463 diff --git a/python/llm/dev/benchmark/perplexity/run_wikitext.py b/python/llm/dev/benchmark/perplexity/run_wikitext.py index 061c87babb6..245bc037d35 100644 --- a/python/llm/dev/benchmark/perplexity/run_wikitext.py +++ b/python/llm/dev/benchmark/perplexity/run_wikitext.py @@ -36,7 +36,18 @@ parser.add_argument("--mixed_precision", action="store_true") args = parser.parse_args() -if args.precision == "fp16": # ipex fp16 +if args.device == "npu": + from ipex_llm.transformers.npu_model import AutoModelForCausalLM + model = AutoModelForCausalLM.from_pretrained( + args.model_path, + trust_remote_code=True, + torch_dtype=torch.float16, + max_output_len=4096, + max_prompt_len=4096, + load_in_low_bit=args.precision, + attn_implementation="eager" + ) +elif args.precision == "fp16": # ipex fp16 from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(args.model_path, use_cache=args.use_cache, @@ -57,7 +68,7 @@ trust_remote_code=True, mixed_precision=args.mixed_precision) model = model.half() -model = model.to(args.device) + model = model.eval() from transformers import AutoTokenizer @@ -98,7 +109,7 @@ def parse_kwargs(kwstr): else: end_loc = begin_loc + stride trg_len = -stride//2 - input_ids = encodings.input_ids[:, begin_loc:end_loc].to(args.device) + input_ids = encodings.input_ids[:, begin_loc:end_loc] if args.stride == 0: input_ids[:, 0] = tokenizer.bos_token_id target_ids = input_ids.clone() target_ids[:, :-trg_len] = -100 @@ -110,6 +121,7 @@ def parse_kwargs(kwstr): # N.B. the model only calculates loss over trg_len - 1 labels, because it internally shifts the labels # to the left by 1. neg_log_likelihood = outputs.loss + print(neg_log_likelihood) nlls.append(neg_log_likelihood) if "xpu" in args.device: @@ -118,6 +130,6 @@ def parse_kwargs(kwstr): prev_end_loc = end_loc if end_loc == seq_len: break - +print(neg_log_likelihood) ppl = torch.exp(torch.stack(nlls).mean()) print("Final ppl estimate: {}".format(ppl.item())) From 70ee2e70191e87d84d9560448ed19ac41e31b5bf Mon Sep 17 00:00:00 2001 From: cranechu <1340390339@qq.com> Date: Thu, 12 Sep 2024 16:51:27 +0800 Subject: [PATCH 2/2] fix: remove lm_eval --- python/llm/dev/benchmark/harness/lm-evaluation-harness | 1 - 1 file changed, 1 deletion(-) delete mode 160000 python/llm/dev/benchmark/harness/lm-evaluation-harness diff --git a/python/llm/dev/benchmark/harness/lm-evaluation-harness b/python/llm/dev/benchmark/harness/lm-evaluation-harness deleted file mode 160000 index b281b0921b6..00000000000 --- a/python/llm/dev/benchmark/harness/lm-evaluation-harness +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b281b0921b636bc36ad05c0b0b0763bd6dd43463