From 6500c3c7118331b2bc076fa226b65f857040c1de Mon Sep 17 00:00:00 2001 From: zhumakhan Date: Sat, 3 Aug 2024 02:11:11 +0500 Subject: [PATCH] workaround for gpt-j (#395) Some models initialize tensors during the first forward pass and reuse it for next iterations. This causes model to recompile . One temporary solution is to run torch model once before compilation. Related issue is here: https://github.com/CentML/hidet/issues/291 Co-authored-by: Zhumakhan --- tests/benchmarks/bench_transformer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/benchmarks/bench_transformer.py b/tests/benchmarks/bench_transformer.py index 0e2d5340e..4839c03e9 100644 --- a/tests/benchmarks/bench_transformer.py +++ b/tests/benchmarks/bench_transformer.py @@ -59,6 +59,11 @@ def bench_causal_lm(model_name, bs, genlen, dtype, backend, mode): inputs = tokenizer(input_string_batch, return_tensors='pt')['input_ids'].cuda() with torch.no_grad(), torch.autocast("cuda"): + # Temporary workaround for gpt-j + # gpt-j initializes tensors during the first forwasd pass + # which causes recompilation during the second forward pass + if model_name == 'EleutherAI/gpt-j-6B': + model(inputs) model = comp_backend.compile(model) latency = bench_gen_model(model, tokenizer, inputs, bs=bs, genlen=genlen) del model