-
Notifications
You must be signed in to change notification settings - Fork 0
/
QWen_api.py
80 lines (64 loc) · 2.24 KB
/
QWen_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from modelscope import AutoTokenizer, AutoModelForCausalLM, snapshot_download
import copy
import os
import platform
import time
import readline
import json
from shutil import copyfile
from aiohttp import web
model_dir = snapshot_download("qwen/Qwen-14B-Chat-Int4",revision = 'v1.0.5')
# Note: The default behavior now has injection attack prevention off.
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_dir,
device_map="auto",
trust_remote_code=True
).eval()
# query = "今天天气好像很好呢"
# history = None
# prompt_path = 'prompt_wrap.txt'
# prompt = open(prompt_path).read() + "\n" + query
# T1 = time.time()
# response, history = model.chat(tokenizer, prompt, history=history)
# T2 = time.time()
# print('程序运行时间:%s毫秒' % ((T2 - T1) * 1000))
# print(response)
address = '0.0.0.0'
port = '48283'
async def handle(request):
print("\n\n\n\n\n\n\n")
data = await request.json()
#print(data)
prompt_path = data['prompt']
print("\n\n\n\n\n\n\n this is prompt_path:{}\n\n\n\n\n\n".format(prompt_path) )
prompt = open(prompt_path).read()
print(prompt)
query = data['query']
print(query)
print(type(query))
if (query == None) or (query == ""):
response, history = model.chat(tokenizer, prompt, history=None)
else:
response, history = model.chat(tokenizer, prompt, history=None)
print(history)
print("\n\n\n\n\n\n")
history_list = data['history']
if history_list != None:
history = history + [history_list[i:i+2] for i in range(0,len(history_list),2)]
print(history)
print("\n\n\n\n\n\n")
response, history = model.chat(tokenizer, query, history=history)
#print('用户提问:\n ',query)
#print('历史消息:\n ',history)
T1 = time.time()
print("this is response:")
print(type(response))
print(response)
#print("\n\n\n\n多少沾点逆天")
T2 = time.time()
print('\n\n\n\n程序运行时间:%s毫秒' % ((T2 - T1) * 1000))
return web.Response(text=response)
app = web.Application()
app.add_routes([web.post('/', handle)])
web.run_app(app, host=address, port=port)