Skip to content

Commit

Permalink
[AddVersion]update version 1.0.0 to 1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
leofang3 committed Aug 11, 2023
1 parent 6ed8ce3 commit 020ee12
Show file tree
Hide file tree
Showing 131 changed files with 4,978 additions and 1,902 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="pythinker",
version="1.0.0",
version="1.1.0",
description="A DeepLearning inference framework for venus",
author="listenai",
author_email="lingerthinker@listenai.com",
Expand Down
44 changes: 32 additions & 12 deletions thinker/combine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (C) 2022 listenai Co.Ltd
# All rights reserved.
# Created by leifang on 2022.09.31
# modifyed by leifang on 2023.08.11

import numpy as np
from typing import List, Dict, Tuple
Expand All @@ -10,6 +11,19 @@
from .enum_defines import MemType, DevType, Platform
from .resource_packer.memory import memory_plan, get_memory_size

def parse_memory(s:str) -> Dict[str, Tuple[int]]:
dynamic_memory = dict()
if s is None:
return dynamic_memory
s1 = s.split(",")
for i in range(len(s1)):
s2 = s1[i].split(":")
if len(s2)==1:
assert s1[0].lower() in ['psram', 'share-memory', 'flash']
return {'params':s1[0]}
assert s2[1].lower() in ['psram', 'share-memory', 'flash']
dynamic_memory[s2[0]] = s2[1]
return dynamic_memory

def _get_next_id(threshold: List[int]) -> int:
begin = len(threshold)
Expand All @@ -25,7 +39,7 @@ def _get_next_id(threshold: List[int]) -> int:


def _judge_end(threshold: List[int]) -> int:
for i in range(len(threshold) - 2):
for i in range(len(threshold) - 1):
if threshold[i] == 1 and threshold[i + 1] == -1:
return i
return None
Expand All @@ -45,6 +59,7 @@ def _get_memory_plan(

def _graph_bind_device(graph: Graph, platform: Platform, memory: str) -> Graph:
cpus = Platform.get_cpu_list(platform)
dy_memory = parse_memory(memory)
if len(cpus) > 1:
cpus = cpus[::-1]

Expand All @@ -61,14 +76,19 @@ def _graph_bind_device(graph: Graph, platform: Platform, memory: str) -> Graph:

for i in range(len(node.inputs)):
if node.inputs[i].is_constant():
node.inputs[i].tensor.mem_type = MemType.from_str(memory)
elif node.inputs[i] in graph.inputs:
node.inputs[i].tensor.mem_type = MemType.SHARE_MEM
if 'params' in dy_memory:
node.inputs[i].tensor.mem_type = MemType.from_str(dy_memory['params'])
else:
node.inputs[i].tensor.mem_type = MemType.PSRAM
elif node.inputs[i] in dy_memory:
node.inputs[i].tensor.mem_type = MemType.from_str(dy_memory[node.inputs[i].name])
elif node.inputs[i].tensor.mem_type == None:
node.inputs[i].tensor.mem_type = MemType.SHARE_MEM

for i in range(len(node.outputs)):
if node.outputs[i].tensor.mem_type == None:
if node.outputs[i].name in dy_memory:
node.outputs[i].tensor.mem_type = MemType.from_str(dy_memory[node.outputs[i].name])
elif node.outputs[i].tensor.mem_type == None:
node.outputs[i].tensor.mem_type = MemType.SHARE_MEM

return graph
Expand All @@ -77,10 +97,10 @@ def _graph_bind_device(graph: Graph, platform: Platform, memory: str) -> Graph:
def graph_adapter(
graph: Graph, platform: Platform, local_mem: str, is_dump: bool
) -> Tuple[Graph, Dict[int, List[int]]]:
MIN_THRESHOLD = 32 * 1024
MIN_THRESHOLD = 16 * 1024
MAX_THRESHOLD = 640 * 1024
STEP_THRESHOLD = 10 * 1024
threshold = np.arange(MIN_THRESHOLD, MAX_THRESHOLD, STEP_THRESHOLD)
STEP_THRESHOLD = 16 * 1024
threshold = np.arange(MIN_THRESHOLD, MAX_THRESHOLD + 1, STEP_THRESHOLD)
threshold_mask = np.zeros(len(threshold))
print("set linearint threshold and analyze memory begin")
print("try threshold:{}".format(threshold[-1]))
Expand All @@ -89,7 +109,7 @@ def graph_adapter(
graph, platform, local_mem, threshold[-1], is_dump
)
memory_tobe_allocated = get_memory_size(memory_planer, MemType.SHARE_MEM)

if memory_tobe_allocated > MAX_THRESHOLD:
threshold_mask[-1] = -1
print("try threshold:{}".format(threshold[0]))
Expand All @@ -100,8 +120,8 @@ def graph_adapter(

if memory_tobe_allocated > MAX_THRESHOLD:
print(
"WARNING:SHARE-MEM to be allocated was {}, exceed 640KB".format(
memory_tobe_allocated
"WARNING:SHARE-MEM to be allocated was {}, exceed {}".format(
memory_tobe_allocated, MAX_THRESHOLD
)
)
else:
Expand All @@ -121,7 +141,7 @@ def graph_adapter(
else:
threshold_mask[id] = 1
final_id = _judge_end(threshold_mask)
if final_id:
if final_id != None:
print("the best threshold:{}".format(threshold[final_id]))
new_graph, memory_planer, is_linearint = _get_memory_plan(
graph, platform, local_mem, threshold[final_id], is_dump
Expand Down
6 changes: 6 additions & 0 deletions thinker/enum_defines.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,15 @@ def get_support_op(platform: str) -> List[str]:
"iqSigmoid",
"iqTanh",
"iqCat",
"iqPad",
"Transpose",
"Reshape",
"Squeeze",
"Unsqueeze",
"Flatten",
"Slice",
"iqSum",
"iqSub",
"iqAdd",
"iqMul",
"iqDiv",
Expand All @@ -93,6 +95,10 @@ def get_support_op(platform: str) -> List[str]:
"Requant",
"LayerNormInt",
"iqVar",
"Cast",
"Expand",
"Tile",
"LogSoftmax",
]


Expand Down
112 changes: 92 additions & 20 deletions thinker/executor/c_api/thinker_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ typedef struct _t_Instance_ {
tModel *model_;
double *shape_scalars_;
tDMA_List *dma_list_;

uint32_t force_stop_flag;
int32_t reserved_args[8];
} tExecInst;

Expand All @@ -86,15 +86,15 @@ tStatus tGetMemoryPlan(tMemory *memory_list, int32_t *num_memory,
return T_ERR_RES_INCOMPLETE;
}
// check CRC
if (res_hdr->crc32_ != 0) {
uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
if (res_hdr->crc32_ != crc_check) {
printf("%d,%d", res_hdr->crc32_, crc_check);
return T_ERR_RES_CRC_CHECK;
}
}
if (res_hdr->crc32_ != 0) {
uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
if (res_hdr->crc32_ != crc_check) {
printf("%d,%d", res_hdr->crc32_, crc_check);
return T_ERR_RES_CRC_CHECK;
}
}
// model_inst_size
int32_t model_inst_size = 0;
model_inst_size += ALIGN16(sizeof(tModel));
Expand Down Expand Up @@ -166,6 +166,9 @@ tStatus tGetMemoryPlan(tMemory *memory_list, int32_t *num_memory,
return T_SUCCESS;
}

#if !(defined(WIN32) || defined(linux))
#pragma clang optimize off
#endif
tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
const tMemory *memory_list, const int32_t num_memory) {
tModelHeader *res_hdr = (tModelHeader *)res;
Expand All @@ -179,14 +182,14 @@ tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
return T_ERR_RES_INCOMPLETE;
}
// check CRC
if (res_hdr->crc32_ != 0) {
uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
if (res_hdr->crc32_ != crc_check) {
return T_ERR_RES_CRC_CHECK;
}
}
if (res_hdr->crc32_ != 0) {
uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
if (res_hdr->crc32_ != crc_check) {
return T_ERR_RES_CRC_CHECK;
}
}

int32_t inst_size = 0;
inst_size += ALIGN16(sizeof(tModel));
Expand Down Expand Up @@ -315,7 +318,7 @@ tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
tensor->dptr_ = memory->dptr_ + offset;
}

inst->debug_info = (tDebugList *)ptr;
inst->debug_info = (tDebugList *)ptr;
inst->debug_info->tensor_name_count_ = debug_hdr.tensor_name_count_;
inst->debug_info->tensor_name_list_ =
(void *)(res + res_hdr->debug_offset_ + sizeof(tDebugList));
Expand All @@ -334,6 +337,9 @@ tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
*hdl = ~((tModelHandle)inst);
return T_SUCCESS;
}
#if !(defined(WIN32) || defined(linux))
#pragma clang optimize on
#endif

tStatus tModelFini(tModelHandle hdl) {
#if !THINKER_USE_ACL
Expand All @@ -352,6 +358,34 @@ int32_t tGetInputCount(const tModelHandle hdl) {
return model->num_input_;
}

tStatus tGetInputInfo(const tExecHandle hdl, const int32_t idx,
tData *input) {
tExecInst *inst = (tExecInst *)~hdl;
tModel *model = inst->model_;
if (inst == NULL || inst->flag_ != THINKER_INST_FLAG) {
return T_ERR_INVALID_INST;
}

if (idx < 0 || idx >= model->num_input_) {
return T_ERR_INDEX_OF_BOUND;
}

if (input == NULL) {
return T_ERR_INVALID_DATA;
}

{
tTensor *tensor = inst->tensor_ + model->io_tensors_[idx];
input->dev_type_ = tensor->mem_.type_;
input->dtype_ = tensor->dtype_;
input->scale_ = tensor->scale_;
input->shape_ = tensor->shape_;
input->zero_ = tensor->zero_;
input->dptr_ = (void *)tensor->dptr_;
}
return T_SUCCESS;
}

const char *tGetInputName(const tModelHandle hdl, const int32_t idx) {
tModel *model = (tModel *)~hdl;
return model->io_names_ + idx * model->io_name_len_;
Expand Down Expand Up @@ -398,6 +432,9 @@ tShape tGetOutputShape(const tModelHandle hdl, const int32_t idx) {
return tensor->shape_;
}

#if !(defined(WIN32) || defined(linux))
#pragma clang optimize off
#endif
tStatus tCreateExecutor(const tModelHandle model_hdl, tExecHandle *hdl,
const tMemory *memory_list, const int32_t num_memory) {
tModel *model = (tModel *)~model_hdl;
Expand Down Expand Up @@ -502,6 +539,9 @@ tStatus tCreateExecutor(const tModelHandle model_hdl, tExecHandle *hdl,
*hdl = ~((tModelHandle)inst);
return T_SUCCESS;
}
#if !(defined(WIN32) || defined(linux))
#pragma clang optimize on
#endif

tStatus tReleaseExecutor(tExecHandle hdl) {
tExecInst *inst = (tExecInst *)~hdl;
Expand Down Expand Up @@ -573,7 +613,8 @@ tStatus tSetInput(const tExecHandle hdl, const int32_t idx,
tensor->shape_ = input->shape_;
tensor->scale_ = input->scale_;
uint64_t bytes = getShapeSize(&tensor->shape_) * (dtype & 0xFF);
memcpy((void *)tensor->dptr_, input->dptr_, bytes);
if ((uint64_t)tensor->dptr_ != (uint64_t)input->dptr_)
memcpy((void *)tensor->dptr_, input->dptr_, bytes);
}
return T_SUCCESS;
}
Expand Down Expand Up @@ -679,6 +720,11 @@ tStatus tForward(const tExecHandle hdl) {
uint32_t num_tensor = op->num_input_ + op->num_output_ + op->num_temp_;
tOperatorAPI *op_api = model->op_api_[op->op_id_];

if (T_FORCE_STOP_VALUE == inst->force_stop_flag) //user force to stop
{
return T_FORCE_STOP_VALUE;
}

for (ii = 0; ii < num_tensor; ++ii) {
local_tensor[ii] = inst->tensor_ + tensor_ids[ii];
}
Expand Down Expand Up @@ -706,6 +752,28 @@ tStatus tForward(const tExecHandle hdl) {
return T_SUCCESS;
}

tStatus tExecutorStart(tExecHandle hdl)
{
tExecInst *inst = (tExecInst *)~hdl;
if (inst == NULL || inst->flag_ != THINKER_INST_FLAG)
{
return T_ERR_INVALID_INST;
}
inst->force_stop_flag = 0;
return T_SUCCESS;
}

tStatus tExecutorStop(tExecHandle hdl)
{
tExecInst *inst = (tExecInst *)~hdl;
if (inst == NULL || inst->flag_ != THINKER_INST_FLAG)
{
return T_ERR_INVALID_INST;
}
inst->force_stop_flag = T_FORCE_STOP_VALUE;
return T_SUCCESS;
}

static thinkerApi g_api;
const thinkerApi *thinkerGetApi() {
g_api.tInitialize = tInitialize;
Expand All @@ -717,6 +785,7 @@ const thinkerApi *thinkerGetApi() {
g_api.tModelFini = tModelFini;

g_api.tGetInputCount = tGetInputCount;
g_api.tGetInputInfo = tGetInputInfo;
g_api.tGetInputName = tGetInputName;
g_api.tGetOutputCount = tGetOutputCount;
g_api.tGetOutputName = tGetOutputName;
Expand All @@ -734,5 +803,8 @@ const thinkerApi *thinkerGetApi() {
g_api.tGetOutputByName = tGetOutputByName;
g_api.tForward = tForward;

g_api.tExecutorStart = tExecutorStart;
g_api.tExecutorStop = tExecutorStop;

return &g_api;
}
Loading

0 comments on commit 020ee12

Please sign in to comment.