Skip to content

Commit

Permalink
stable 0.2.0 (#188)
Browse files Browse the repository at this point in the history
1.  修复minio 不配置导致的bug
2.  修改默认配置注释
  • Loading branch information
yaojin3616 authored Dec 7, 2023
2 parents f8356ba + 538702b commit c5b5bf7
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 24 deletions.
4 changes: 2 additions & 2 deletions src/backend/bisheng/api/v1/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def upload_file(*, file: UploadFile = File(...)):
# 缓存本地
file_path = save_uploaded_file(file.file, 'bisheng', file_name)
if not isinstance(file_path, str):
file_path = str(file_path) + '_' + file_name
file_path = str(file_path)
return UploadFileResponse(file_path=file_path)
except Exception as exc:
logger.error(f'Error saving file: {exc}')
Expand Down Expand Up @@ -116,7 +116,7 @@ async def process_knowledge(*,
result = []
for path in file_path:
filepath, file_name = file_download(path)
md5_ = filepath.rsplit('/', 1)[1].split('.')[0]
md5_ = filepath.rsplit('/', 1)[1].split('.')[0].split('_')[0]
# 是否包含重复文件
repeat = session.exec(select(KnowledgeFile)
.where(KnowledgeFile.md5 == md5_,
Expand Down
5 changes: 2 additions & 3 deletions src/backend/bisheng/cache/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,7 @@ def save_uploaded_file(file, folder_name, file_name):
minio_client.upload_tmp(file_name, file_byte)
file_path = minio_client.get_share_link(file_name, tmp_bucket)
else:
file_type = md5_name.split('.')[-1]
file_path = folder_path / f'{md5_name}.{file_type}'
file_path = folder_path / f'{md5_name}_{file_name}'
with open(file_path, 'wb') as new_file:
while chunk := file.read(8192):
new_file.write(chunk)
Expand Down Expand Up @@ -248,7 +247,7 @@ def file_download(file_path: str):
return file_path, filename
elif not os.path.isfile(file_path):
raise ValueError('File path %s is not a valid file or url' % file_path)
return file_path, ''
return file_path, file_path.split('_', 1)[1] if '_' in file_path else ''


def _is_valid_url(url: str):
Expand Down
2 changes: 1 addition & 1 deletion src/backend/bisheng/chat/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def refresh_graph_data(self, graph_data: dict, node_data: List[dict]):
if url_path.netloc:
file_name = unquote(url_path.path.split('/')[-1])
else:
file_path, file_name = file_path.split('_', 1)
file_name = file_path.split('_', 1)[1] if '_' in file_path else ''
nd['value'] = file_name
tweak[nd.get('id')] = {'file_path': file_path, 'value': file_name}
elif 'VariableNode' in nd.get('id'):
Expand Down
22 changes: 11 additions & 11 deletions src/backend/bisheng/initdb_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@ knowledges: # 知识库相关配置
vectorstores:
# Milvus 最低要求cpu 4C 8G 推荐4C 16G
Milvus: # 如果需要切换其他vectordb,确保其他服务已经启动,然后配置对应参数
connection_args: {'host': 'milvus', 'port': '19530', 'user': '', 'password': '', 'secure': False}
connection_args: {'host': '110.16.193.170', 'port': '50032', 'user': '', 'password': '', 'secure': False}
# 可选配置,有些类型的场景使用ES可以提高召回效果
ElasticKeywordsSearch:
elasticsearch_url: 'http://elasticsearch:9200'
ssl_verify: "{'basic_auth': ('elastic', 'password')}"
minio: # 如果要支持溯源功能,由于溯源会展示源文件,必须配置 oss 存储
SCHEMA: true
CERT_CHECK: false
MINIO_ENDPOINT: "milvus:9001"
MINIO_SHAREPOIN: "milvus:9001"
MINIO_ACCESS_KEY: "minioadmin"
MINIO_SECRET_KEY: "minioadmin"
# ElasticKeywordsSearch:
# elasticsearch_url: 'http://elasticsearch:9200'
# ssl_verify: "{'basic_auth': ('elastic', 'password')}"
# minio: # 如果要支持溯源功能,由于溯源会展示源文件,必须配置 oss 存储
# SCHEMA: false # 是否支持 https
# CERT_CHECK: false # 是否校验 http证书
# MINIO_ENDPOINT: "milvus:9001" # 这个地址用来写请求
# MINIO_SHAREPOIN: "milvus:9001" # 为保证外网和内网隔离。 浏览器获取连接是这个域名
# MINIO_ACCESS_KEY: "minioadmin"
# MINIO_SECRET_KEY: "minioadmin"
#

# 全局配置大模型
Expand Down
8 changes: 6 additions & 2 deletions src/backend/bisheng/interface/chains/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,12 @@ def initialize(cls,
document_prompt: BasePromptTemplate = None,
token_max: str = -1):
if chain_type == 'stuff':
return load_qa_chain(llm=llm, chain_type=chain_type, prompt=prompt,
token_max=token_max, document_prompt=document_prompt)
if document_prompt:
return load_qa_chain(llm=llm, chain_type=chain_type, prompt=prompt,
token_max=token_max, document_prompt=document_prompt)
else:
return load_qa_chain(llm=llm, chain_type=chain_type, prompt=prompt,
token_max=token_max)
else:
return load_qa_chain(llm=llm, chain_type=chain_type)

Expand Down
16 changes: 14 additions & 2 deletions src/backend/bisheng/interface/initialize/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def instantiate_based_on_type(class_object, base_type, node_type, params, param_
if base_type == 'agents':
return instantiate_agent(node_type, class_object, params)
elif base_type == 'prompts':
return instantiate_prompt(node_type, class_object, params)
return instantiate_prompt(node_type, class_object, params, param_id_dict)
elif base_type == 'tools':
tool = instantiate_tool(node_type, class_object, params)
if hasattr(tool, 'name') and isinstance(tool, BaseTool):
Expand Down Expand Up @@ -320,7 +320,7 @@ def instantiate_agent(node_type, class_object: Type[agent_module.Agent], params:
return load_agent_executor(class_object, params)


def instantiate_prompt(node_type, class_object, params: Dict):
def instantiate_prompt(node_type, class_object, params: Dict, param_id_dict: Dict):

if node_type == 'ZeroShotPrompt':
if 'tools' not in params:
Expand All @@ -339,6 +339,10 @@ def instantiate_prompt(node_type, class_object, params: Dict):
else:
prompt = class_object(**params)

no_human_input = set(param_id_dict.keys())
human_input = set(prompt.input_variables).difference(no_human_input)
order_input = list(human_input) + list(set(prompt.input_variables) & no_human_input)
prompt.input_variables = order_input
format_kwargs: Dict[str, Any] = {}
for input_variable in prompt.input_variables:
if input_variable in params:
Expand All @@ -348,6 +352,14 @@ def instantiate_prompt(node_type, class_object, params: Dict):
elif isinstance(variable, BaseOutputParser) and hasattr(variable,
'get_format_instructions'):
format_kwargs[input_variable] = variable.get_format_instructions()
elif isinstance(variable, dict):
# variable node
if len(variable) == 0:
format_kwargs[input_variable] = ''
continue
elif len(variable) != 1:
raise ValueError(f'VariableNode contains multi-key {variable.keys()}')
format_kwargs[input_variable] = list(variable.values())[0]
elif isinstance(variable, List) and all(
isinstance(item, Document) for item in variable):
# Format document to contain page_content and metadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,13 @@ def _completion_with_retry(**kwargs: Any) -> Any:
'functions': kwargs.get('functions', [])
}
response = self.client.post(self.elemai_base_url, json=params)
if response.status_code != 200:
raise
return response.json()

return _completion_with_retry(**kwargs)
rsp_dict = _completion_with_retry(**kwargs)
if 200 != rsp_dict.get('status_code'):
logger.error(f'proxy_llm_error resp={rsp_dict}')
raise Exception(rsp_dict)
return rsp_dict

def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
overall_token_usage: dict = {}
Expand Down

0 comments on commit c5b5bf7

Please sign in to comment.