From f5fcd09724fb29ca4536398c9e100d81aced33fd Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Thu, 14 Sep 2023 16:04:11 +0300 Subject: [PATCH 01/10] Created some models --- models/huggingface_models.py | 60 ++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 models/huggingface_models.py diff --git a/models/huggingface_models.py b/models/huggingface_models.py new file mode 100644 index 0000000..d8ef7f3 --- /dev/null +++ b/models/huggingface_models.py @@ -0,0 +1,60 @@ +from transformers import LlamaForCausalLM, LlamaTokenizer +from transformers import AutoTokenizer, T5ForConditionalGeneration +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer +from transformers import AutoModel +from transformers import RobertaTokenizer, T5ForConditionalGeneration +import torch + +# Does not work locally because of the size of the models. + +''' +def llama_model(text: str) -> str: + tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") + model = AutoModel.from_pretrained("daryl149/llama-2-7b-chat-hf") + + + # tokenizer = LlamaTokenizer.from_pretrained("/output/path") + # model = LlamaForCausalLM.from_pretrained("/output/path") + tokenized = tokenizer.encode([text]) + output = model.generate(**tokenized) + return output + + +def codet5_model(text: str) -> str: + checkpoint = "Salesforce/instructcodet5p-16b" + device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + tokenizer = AutoTokenizer.from_pretrained(checkpoint) + model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, + torch_dtype=torch.float16, + low_cpu_mem_usage=True, + trust_remote_code=True).to(device) + + encoding = tokenizer("def print_hello_world():", return_tensors="pt").to(device) + encoding['decoder_input_ids'] = encoding['input_ids'].clone() + outputs = model.generate(**encoding, max_length=15) + print(tokenizer.decode(outputs[0], skip_special_tokens=True)) +''' + + +def codet5_base_model(text: str): + tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base') + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') + input_ids = tokenizer(text, return_tensors="pt").input_ids + generated_ids = model.generate(input_ids, max_length=8) + print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) + + +def codet5_small_model(text: str): + tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-small') + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small') + input_ids = tokenizer(text, return_tensors="pt").input_ids + generated_ids = model.generate(input_ids, max_length=8) + print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) + +''' +if __name__ == "__main__": + # codet5_model("123") + # llama_model("def print_hello_world():") + codet5_base_model("def print_hello_world():") + codet5_small_model("def print_hello_world():") +''' \ No newline at end of file From e39fdf8b38fa1a35e083a5458e8ced5da1719150 Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Thu, 14 Sep 2023 17:28:01 +0300 Subject: [PATCH 02/10] Starcoder + llama --- models/huggingface_models.py | 45 ++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/models/huggingface_models.py b/models/huggingface_models.py index d8ef7f3..52343a4 100644 --- a/models/huggingface_models.py +++ b/models/huggingface_models.py @@ -3,6 +3,8 @@ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from transformers import AutoModel from transformers import RobertaTokenizer, T5ForConditionalGeneration +from transformers import AutoModelForCausalLM +import transformers import torch # Does not work locally because of the size of the models. @@ -51,10 +53,45 @@ def codet5_small_model(text: str): generated_ids = model.generate(input_ids, max_length=8) print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) -''' + +def starcoder_model(text: str): + checkpoint = "bigcode/starcoderbase-1b" + device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + tokenizer = AutoTokenizer.from_pretrained(checkpoint) + model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) + inputs = tokenizer.encode(text, return_tensors="pt").to(device) + outputs = model.generate(inputs) + print(tokenizer.decode(outputs[0])) + + +def llama_model(text: str): + model = "codellama/CodeLlama-7b-hf" + tokenizer = AutoTokenizer.from_pretrained(model) + pipeline = transformers.pipeline( + "text-generation", + model=model, + torch_dtype=torch.float16, + device_map="auto", + ) + + sequences = pipeline( + text, + do_sample=True, + top_k=10, + temperature=0.1, + top_p=0.95, + num_return_sequences=1, + eos_token_id=tokenizer.eos_token_id, + max_length=200, + ) + for seq in sequences: + print(f"Result: {seq['generated_text']}") + + if __name__ == "__main__": # codet5_model("123") # llama_model("def print_hello_world():") - codet5_base_model("def print_hello_world():") - codet5_small_model("def print_hello_world():") -''' \ No newline at end of file + # codet5_base_model("def print_hello_world():") + # codet5_small_model("def print_hello_world():") + # starcoder_model("def print_hello_world():") + llama_model("def print_hello_world():") From 1c2d1b12eab39e1e92290a12a758083b7a213690 Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Thu, 14 Sep 2023 17:31:34 +0300 Subject: [PATCH 03/10] Moved models folder to server folder --- {models => server/models}/huggingface_models.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {models => server/models}/huggingface_models.py (100%) diff --git a/models/huggingface_models.py b/server/models/huggingface_models.py similarity index 100% rename from models/huggingface_models.py rename to server/models/huggingface_models.py From 0a5a16d61f45efd5f328bf2385abda9b435c039c Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Thu, 14 Sep 2023 19:04:17 +0300 Subject: [PATCH 04/10] Moved models to GPU --- server/models/huggingface_models.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 52343a4..47997ca 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -13,10 +13,6 @@ def llama_model(text: str) -> str: tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") model = AutoModel.from_pretrained("daryl149/llama-2-7b-chat-hf") - - - # tokenizer = LlamaTokenizer.from_pretrained("/output/path") - # model = LlamaForCausalLM.from_pretrained("/output/path") tokenized = tokenizer.encode([text]) output = model.generate(**tokenized) return output @@ -33,22 +29,24 @@ def codet5_model(text: str) -> str: encoding = tokenizer("def print_hello_world():", return_tensors="pt").to(device) encoding['decoder_input_ids'] = encoding['input_ids'].clone() - outputs = model.generate(**encoding, max_length=15) + outputs = model.generate(**encoding, max_length=8) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) ''' def codet5_base_model(text: str): + device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base') - model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base').to(device) input_ids = tokenizer(text, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=8) print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) def codet5_small_model(text: str): + device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-small') - model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small') + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small').to(device) input_ids = tokenizer(text, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=8) print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) @@ -60,13 +58,13 @@ def starcoder_model(text: str): tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) inputs = tokenizer.encode(text, return_tensors="pt").to(device) - outputs = model.generate(inputs) + outputs = model.generate(inputs, max_length=8) print(tokenizer.decode(outputs[0])) def llama_model(text: str): model = "codellama/CodeLlama-7b-hf" - tokenizer = AutoTokenizer.from_pretrained(model) + tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True) pipeline = transformers.pipeline( "text-generation", model=model, From d6279b0f74c0d040221cec2641530b7e7cc8943d Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Fri, 15 Sep 2023 02:04:27 +0300 Subject: [PATCH 05/10] Not tested version with the requirements The reason why I cannot test locally - I have Metal architecture. The most common error is the inability to use int4 in Metal, but it is used in model implementation, so I simply have not got access to this part of the code. --- server/models/huggingface_models.py | 89 ++++++++++++++++++----------- 1 file changed, 55 insertions(+), 34 deletions(-) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 47997ca..4584fc6 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -1,52 +1,39 @@ -from transformers import LlamaForCausalLM, LlamaTokenizer +from transformers import LlamaForCausalLM, LlamaTokenizer, LlamaConfig from transformers import AutoTokenizer, T5ForConditionalGeneration from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from transformers import AutoModel from transformers import RobertaTokenizer, T5ForConditionalGeneration from transformers import AutoModelForCausalLM +from transformers import BitsAndBytesConfig +import argparse import transformers import torch -# Does not work locally because of the size of the models. - -''' -def llama_model(text: str) -> str: - tokenizer = AutoTokenizer.from_pretrained("daryl149/llama-2-7b-chat-hf") - model = AutoModel.from_pretrained("daryl149/llama-2-7b-chat-hf") - tokenized = tokenizer.encode([text]) - output = model.generate(**tokenized) - return output +nf4_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_use_double_quant=True, + bnb_4bit_compute_dtype=torch.bfloat16 +) -def codet5_model(text: str) -> str: - checkpoint = "Salesforce/instructcodet5p-16b" - device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") - tokenizer = AutoTokenizer.from_pretrained(checkpoint) - model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, - torch_dtype=torch.float16, - low_cpu_mem_usage=True, - trust_remote_code=True).to(device) - - encoding = tokenizer("def print_hello_world():", return_tensors="pt").to(device) - encoding['decoder_input_ids'] = encoding['input_ids'].clone() - outputs = model.generate(**encoding, max_length=8) - print(tokenizer.decode(outputs[0], skip_special_tokens=True)) -''' +def read_from_file(path: str) -> str: + opened_file = open(path, "r") + data = opened_file.read() + return data def codet5_base_model(text: str): - device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base') - model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base').to(device) + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') input_ids = tokenizer(text, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=8) print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) def codet5_small_model(text: str): - device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-small') - model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small').to(device) + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small') input_ids = tokenizer(text, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=8) print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) @@ -54,21 +41,52 @@ def codet5_small_model(text: str): def starcoder_model(text: str): checkpoint = "bigcode/starcoderbase-1b" - device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") tokenizer = AutoTokenizer.from_pretrained(checkpoint) - model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) - inputs = tokenizer.encode(text, return_tensors="pt").to(device) + model = AutoModelForCausalLM.from_pretrained(checkpoint) + inputs = tokenizer.encode(text, return_tensors="pt") outputs = model.generate(inputs, max_length=8) print(tokenizer.decode(outputs[0])) +def healing(tokenizer, model, prefix, outputs): + pass + + +def llama_model(text: str): + model_name = "codellama/CodeLlama-7b-hf" + tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) + model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=nf4_config) + + pipeline = transformers.pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + torch_dtype=torch.float32, + device_map="auto", + ) + + sequences = pipeline( + text, + do_sample=True, + top_k=10, + temperature=0.1, + top_p=0.95, + num_return_sequences=1, + eos_token_id=tokenizer.eos_token_id, + max_length=200, + ) + for seq in sequences: + print(f"Result: {seq['generated_text']}") + + +''' def llama_model(text: str): model = "codellama/CodeLlama-7b-hf" tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True) pipeline = transformers.pipeline( "text-generation", model=model, - torch_dtype=torch.float16, + torch_dtype=torch.float32, device_map="auto", ) @@ -84,7 +102,7 @@ def llama_model(text: str): ) for seq in sequences: print(f"Result: {seq['generated_text']}") - +''' if __name__ == "__main__": # codet5_model("123") @@ -92,4 +110,7 @@ def llama_model(text: str): # codet5_base_model("def print_hello_world():") # codet5_small_model("def print_hello_world():") # starcoder_model("def print_hello_world():") - llama_model("def print_hello_world():") + # In idea, it might be run using command like this: python huggingface_models.py --model codellama/CodeLlama-7b-hf + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, default="codellama/CodeLlama-7b-hf") + args = parser.parse_args() From 7334eab6b649efabe00e72b1d42b2a6ae7fef975 Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Fri, 15 Sep 2023 09:49:01 +0300 Subject: [PATCH 06/10] Update huggingface_models.py BitsAndBytes, without local testing --- server/models/huggingface_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 4584fc6..7afd120 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -56,6 +56,7 @@ def llama_model(text: str): model_name = "codellama/CodeLlama-7b-hf" tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=nf4_config) + # model = LlamaForCausalLM.from_pretrained(model_name) pipeline = transformers.pipeline( "text-generation", @@ -114,3 +115,4 @@ def llama_model(text: str): parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, default="codellama/CodeLlama-7b-hf") args = parser.parse_args() + From 35041164c74d52fbbbf839da649278596e506e84 Mon Sep 17 00:00:00 2001 From: Yaroslav Chelombitko Date: Fri, 15 Sep 2023 12:02:03 +0300 Subject: [PATCH 07/10] Starcoder + codet5 --- server/models/huggingface_models.py | 60 +++++++---------------------- 1 file changed, 13 insertions(+), 47 deletions(-) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 7afd120..6eaaa0e 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -4,17 +4,17 @@ from transformers import AutoModel from transformers import RobertaTokenizer, T5ForConditionalGeneration from transformers import AutoModelForCausalLM -from transformers import BitsAndBytesConfig +# from transformers import BitsAndBytesConfig import argparse import transformers import torch -nf4_config = BitsAndBytesConfig( - load_in_4bit=True, - bnb_4bit_quant_type="nf4", - bnb_4bit_use_double_quant=True, - bnb_4bit_compute_dtype=torch.bfloat16 -) +# nf4_config = BitsAndBytesConfig( +# load_in_4bit=True, +# bnb_4bit_quant_type="nf4", +# bnb_4bit_use_double_quant=True, +# bnb_4bit_compute_dtype=torch.bfloat16 +# ) def read_from_file(path: str) -> str: @@ -23,28 +23,20 @@ def read_from_file(path: str) -> str: return data -def codet5_base_model(text: str): +def codet5_base_model(text: str, max_len: int): tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base') model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') input_ids = tokenizer(text, return_tensors="pt").input_ids - generated_ids = model.generate(input_ids, max_length=8) + generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5) print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) -def codet5_small_model(text: str): - tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-small') - model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-small') - input_ids = tokenizer(text, return_tensors="pt").input_ids - generated_ids = model.generate(input_ids, max_length=8) - print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) - - -def starcoder_model(text: str): +def starcoder_model(text: str, max_len: int): checkpoint = "bigcode/starcoderbase-1b" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForCausalLM.from_pretrained(checkpoint) inputs = tokenizer.encode(text, return_tensors="pt") - outputs = model.generate(inputs, max_length=8) + outputs = model.generate(inputs, max_length=max_len, do_sample=True, num_return_sequences=5) print(tokenizer.decode(outputs[0])) @@ -55,8 +47,8 @@ def healing(tokenizer, model, prefix, outputs): def llama_model(text: str): model_name = "codellama/CodeLlama-7b-hf" tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) - model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=nf4_config) - # model = LlamaForCausalLM.from_pretrained(model_name) + # model = LlamaForCausalLM.from_pretrained(model_name, quantization_config=nf4_config) + model = LlamaForCausalLM.from_pretrained(model_name) pipeline = transformers.pipeline( "text-generation", @@ -80,31 +72,6 @@ def llama_model(text: str): print(f"Result: {seq['generated_text']}") -''' -def llama_model(text: str): - model = "codellama/CodeLlama-7b-hf" - tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True) - pipeline = transformers.pipeline( - "text-generation", - model=model, - torch_dtype=torch.float32, - device_map="auto", - ) - - sequences = pipeline( - text, - do_sample=True, - top_k=10, - temperature=0.1, - top_p=0.95, - num_return_sequences=1, - eos_token_id=tokenizer.eos_token_id, - max_length=200, - ) - for seq in sequences: - print(f"Result: {seq['generated_text']}") -''' - if __name__ == "__main__": # codet5_model("123") # llama_model("def print_hello_world():") @@ -115,4 +82,3 @@ def llama_model(text: str): parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, default="codellama/CodeLlama-7b-hf") args = parser.parse_args() - From db6c9d0a58184f1dae56db37980071be3fd4e13d Mon Sep 17 00:00:00 2001 From: F47-503 Date: Fri, 15 Sep 2023 12:26:10 +0300 Subject: [PATCH 08/10] server, models: compatibility with models ensured --- server/models/huggingface_models.py | 8 ++++---- server/server.py | 22 +++++++++++++++++++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 6eaaa0e..9f6cf67 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -28,16 +28,16 @@ def codet5_base_model(text: str, max_len: int): model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') input_ids = tokenizer(text, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5) - print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) + return map(lambda prompt_ans: tokenizer.decode(prompt_ans, skip_special_tokens=True), generated_ids) def starcoder_model(text: str, max_len: int): checkpoint = "bigcode/starcoderbase-1b" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForCausalLM.from_pretrained(checkpoint) - inputs = tokenizer.encode(text, return_tensors="pt") - outputs = model.generate(inputs, max_length=max_len, do_sample=True, num_return_sequences=5) - print(tokenizer.decode(outputs[0])) + input_ids = tokenizer(text, return_tensors="pt").input_ids + generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5) + return map(lambda prompt_ans: tokenizer.decode(prompt_ans, skip_special_tokens=True), generated_ids) def healing(tokenizer, model, prefix, outputs): diff --git a/server/server.py b/server/server.py index 1edde2d..bc38a94 100644 --- a/server/server.py +++ b/server/server.py @@ -1,6 +1,13 @@ from http.server import HTTPServer, BaseHTTPRequestHandler import argparse import json +from models.huggingface_models import codet5_base_model, starcoder_model + +models = [{"model_name": "StarCoder"}, {"model_name": "codeT5-base"}] +maper = { + "codeT5-base": codet5_base_model, + "StarCoder": starcoder_model +} class RequestHandler(BaseHTTPRequestHandler): @@ -13,15 +20,24 @@ def do_POST(self): self._set_headers() content_len = int(self.headers.get("Content-Length", 0)) post_body = self.rfile.read(content_len) - text_received = post_body.decode("utf-8") - json_bytes = json.dumps({"text": text_received}).encode("utf-8") + json_data = json.loads(post_body) + text_received = json_data["prompt"] + model = maper(json_data["model"]) + processed_texts = model(text_received, json_data["max_new_tokens"]) + json_bytes = json.dumps( + {"results" : list(map(lambda x: {"text": x}, processed_texts))} + ).encode("utf-8") self.wfile.write(json_bytes) + def do_GET(self): + self._set_headers() + models_json = json.dumps({"models": models}) + self.wfile.write(models_json.encode("utf-8")) + def run(port, addr): server_address = (addr, port) httpd = HTTPServer(server_address, RequestHandler) - print(f"Starting httpd server on {addr}:{port}") httpd.serve_forever() From d2357e0a6f01e2f63ed6db48dec896d1d1bf5fde Mon Sep 17 00:00:00 2001 From: F47-503 Date: Sun, 17 Sep 2023 06:21:57 +0300 Subject: [PATCH 09/10] Revert "server, models: compatibility with models ensured" This reverts commit db6c9d0a58184f1dae56db37980071be3fd4e13d. --- server/models/huggingface_models.py | 8 ++++---- server/server.py | 22 +++------------------- 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 9f6cf67..6eaaa0e 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -28,16 +28,16 @@ def codet5_base_model(text: str, max_len: int): model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') input_ids = tokenizer(text, return_tensors="pt").input_ids generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5) - return map(lambda prompt_ans: tokenizer.decode(prompt_ans, skip_special_tokens=True), generated_ids) + print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) def starcoder_model(text: str, max_len: int): checkpoint = "bigcode/starcoderbase-1b" tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForCausalLM.from_pretrained(checkpoint) - input_ids = tokenizer(text, return_tensors="pt").input_ids - generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5) - return map(lambda prompt_ans: tokenizer.decode(prompt_ans, skip_special_tokens=True), generated_ids) + inputs = tokenizer.encode(text, return_tensors="pt") + outputs = model.generate(inputs, max_length=max_len, do_sample=True, num_return_sequences=5) + print(tokenizer.decode(outputs[0])) def healing(tokenizer, model, prefix, outputs): diff --git a/server/server.py b/server/server.py index bc38a94..1edde2d 100644 --- a/server/server.py +++ b/server/server.py @@ -1,13 +1,6 @@ from http.server import HTTPServer, BaseHTTPRequestHandler import argparse import json -from models.huggingface_models import codet5_base_model, starcoder_model - -models = [{"model_name": "StarCoder"}, {"model_name": "codeT5-base"}] -maper = { - "codeT5-base": codet5_base_model, - "StarCoder": starcoder_model -} class RequestHandler(BaseHTTPRequestHandler): @@ -20,24 +13,15 @@ def do_POST(self): self._set_headers() content_len = int(self.headers.get("Content-Length", 0)) post_body = self.rfile.read(content_len) - json_data = json.loads(post_body) - text_received = json_data["prompt"] - model = maper(json_data["model"]) - processed_texts = model(text_received, json_data["max_new_tokens"]) - json_bytes = json.dumps( - {"results" : list(map(lambda x: {"text": x}, processed_texts))} - ).encode("utf-8") + text_received = post_body.decode("utf-8") + json_bytes = json.dumps({"text": text_received}).encode("utf-8") self.wfile.write(json_bytes) - def do_GET(self): - self._set_headers() - models_json = json.dumps({"models": models}) - self.wfile.write(models_json.encode("utf-8")) - def run(port, addr): server_address = (addr, port) httpd = HTTPServer(server_address, RequestHandler) + print(f"Starting httpd server on {addr}:{port}") httpd.serve_forever() From ce05e305f96d6d457da4d4f6006fa946e8502af9 Mon Sep 17 00:00:00 2001 From: F47-503 Date: Sun, 17 Sep 2023 06:29:52 +0300 Subject: [PATCH 10/10] server, models: server with models compatibility ensured --- server/models/huggingface_models.py | 23 ++++++++++++++--------- server/server.py | 18 ++++++++++++------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/server/models/huggingface_models.py b/server/models/huggingface_models.py index 6eaaa0e..b442610 100644 --- a/server/models/huggingface_models.py +++ b/server/models/huggingface_models.py @@ -8,7 +8,7 @@ import argparse import transformers import torch - +device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") # nf4_config = BitsAndBytesConfig( # load_in_4bit=True, # bnb_4bit_quant_type="nf4", @@ -24,20 +24,25 @@ def read_from_file(path: str) -> str: def codet5_base_model(text: str, max_len: int): + global device tokenizer = RobertaTokenizer.from_pretrained('Salesforce/codet5-base') - model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base') - input_ids = tokenizer(text, return_tensors="pt").input_ids - generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5) - print(tokenizer.decode(generated_ids[0], skip_special_tokens=True)) + model = T5ForConditionalGeneration.from_pretrained('Salesforce/codet5-base').to(device) + input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device) + generated_ids = model.generate(input_ids, max_length=max_len, do_sample=True, num_return_sequences=5).cpu() + print(generated_ids) + return map(lambda prompt_ans: tokenizer.decode(prompt_ans, skip_special_tokens=True), generated_ids) def starcoder_model(text: str, max_len: int): + global device checkpoint = "bigcode/starcoderbase-1b" tokenizer = AutoTokenizer.from_pretrained(checkpoint) - model = AutoModelForCausalLM.from_pretrained(checkpoint) - inputs = tokenizer.encode(text, return_tensors="pt") - outputs = model.generate(inputs, max_length=max_len, do_sample=True, num_return_sequences=5) - print(tokenizer.decode(outputs[0])) + model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) + inputs = tokenizer.encode(text, return_tensors="pt").to(device) + outputs = model.generate(inputs, max_length=max_len, do_sample=True, num_return_sequences=5).cpu() + print(outputs) + print("sent") + return map(lambda prompt_ans: tokenizer.decode(prompt_ans, skip_special_tokens=True), outputs) def healing(tokenizer, model, prefix, outputs): diff --git a/server/server.py b/server/server.py index 84e7619..2ae9480 100644 --- a/server/server.py +++ b/server/server.py @@ -1,9 +1,13 @@ from http.server import HTTPServer, BaseHTTPRequestHandler import argparse import json -from models.huggingface_models import codet5_small_model +from models.huggingface_models import codet5_base_model, starcoder_model -models = [{"model_name": "StarCoder"}, {"model_name": "codellama"}, {"model_name": "codet5"}] +models = [{"model_name": "StarCoder"}, {"model_name": "codeT5-base"}] +maper = { + "StarCoder": starcoder_model, + "codeT5-base": codet5_base_model +} class RequestHandler(BaseHTTPRequestHandler): @@ -18,10 +22,13 @@ def do_POST(self): post_body = self.rfile.read(content_len) json_data = json.loads(post_body) text_received = json_data["prompt"] - processed_texts = codet5_small_model(text_received, json_data["max_new_tokens"]) + model = maper[json_data["model"]] + processed_texts = model(text_received, json_data["max_new_tokens"]) + start_index = len(text_received) if json_data["model"] == "StarCoder" else 0 json_bytes = json.dumps( - {"results" : [{"text": text_received}, {"text": processed_texts}]} - ).encode("utf-8") + {"results" : list(map(lambda x: {"text": x[start_index:]}, processed_texts))} + ).encode("utf-8") + print(json_bytes) self.wfile.write(json_bytes) def do_GET(self): @@ -33,7 +40,6 @@ def do_GET(self): def run(port, addr): server_address = (addr, port) httpd = HTTPServer(server_address, RequestHandler) - print(f"Starting httpd server on {addr}:{port}") httpd.serve_forever()