[AddVersion]update version 1.0.0 to 1.1.0

LISTENAI · Aug 11, 2023 · 020ee12 · 020ee12
1 parent 6ed8ce3
commit 020ee12
Show file tree

Hide file tree

Showing 131 changed files with 4,978 additions and 1,902 deletions.
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
 	name="pythinker",
-	version="1.0.0",
+	version="1.1.0",
 	description="A DeepLearning inference framework for venus",
 	author="listenai",
 	author_email="lingerthinker@listenai.com",

diff --git a/thinker/combine.py b/thinker/combine.py
@@ -1,6 +1,7 @@
 # Copyright (C) 2022 listenai Co.Ltd
 # All rights reserved. 
 # Created by leifang on 2022.09.31
+# modifyed by leifang on 2023.08.11
 
 import numpy as np
 from typing import List, Dict, Tuple
@@ -10,6 +11,19 @@
 from .enum_defines import MemType, DevType, Platform
 from .resource_packer.memory import memory_plan, get_memory_size
 
+def parse_memory(s:str) -> Dict[str, Tuple[int]]:
+    dynamic_memory = dict()
+    if s is None:
+        return dynamic_memory
+    s1 = s.split(",")
+    for i in range(len(s1)):
+      s2 = s1[i].split(":")
+      if len(s2)==1:
+        assert s1[0].lower() in ['psram', 'share-memory', 'flash']
+        return {'params':s1[0]}
+      assert s2[1].lower() in ['psram', 'share-memory', 'flash']
+      dynamic_memory[s2[0]] = s2[1]
+    return dynamic_memory
 
 def _get_next_id(threshold: List[int]) -> int:
     begin = len(threshold)
@@ -25,7 +39,7 @@ def _get_next_id(threshold: List[int]) -> int:
 
 
 def _judge_end(threshold: List[int]) -> int:
-    for i in range(len(threshold) - 2):
+    for i in range(len(threshold) - 1):
         if threshold[i] == 1 and threshold[i + 1] == -1:
             return i
     return None
@@ -45,6 +59,7 @@ def _get_memory_plan(
 
 def _graph_bind_device(graph: Graph, platform: Platform, memory: str) -> Graph:
     cpus = Platform.get_cpu_list(platform)
+    dy_memory = parse_memory(memory)    
     if len(cpus) > 1:
         cpus = cpus[::-1]
 
@@ -61,14 +76,19 @@ def _graph_bind_device(graph: Graph, platform: Platform, memory: str) -> Graph:
 
         for i in range(len(node.inputs)):
             if node.inputs[i].is_constant():
-                node.inputs[i].tensor.mem_type = MemType.from_str(memory)
-            elif node.inputs[i] in graph.inputs:
-                node.inputs[i].tensor.mem_type = MemType.SHARE_MEM
+                if 'params' in dy_memory:
+                    node.inputs[i].tensor.mem_type = MemType.from_str(dy_memory['params'])
+                else:
+                    node.inputs[i].tensor.mem_type = MemType.PSRAM
+            elif node.inputs[i] in dy_memory:
+                node.inputs[i].tensor.mem_type = MemType.from_str(dy_memory[node.inputs[i].name])
             elif node.inputs[i].tensor.mem_type == None:
                 node.inputs[i].tensor.mem_type = MemType.SHARE_MEM
 
         for i in range(len(node.outputs)):
-            if node.outputs[i].tensor.mem_type == None:
+            if node.outputs[i].name in dy_memory:
+                node.outputs[i].tensor.mem_type = MemType.from_str(dy_memory[node.outputs[i].name])
+            elif node.outputs[i].tensor.mem_type == None:
                 node.outputs[i].tensor.mem_type = MemType.SHARE_MEM
 
     return graph
@@ -77,10 +97,10 @@ def _graph_bind_device(graph: Graph, platform: Platform, memory: str) -> Graph:
 def graph_adapter(
     graph: Graph, platform: Platform, local_mem: str, is_dump: bool
 ) -> Tuple[Graph, Dict[int, List[int]]]:
-    MIN_THRESHOLD = 32 * 1024
+    MIN_THRESHOLD = 16 * 1024
     MAX_THRESHOLD = 640 * 1024
-    STEP_THRESHOLD = 10 * 1024
-    threshold = np.arange(MIN_THRESHOLD, MAX_THRESHOLD, STEP_THRESHOLD)
+    STEP_THRESHOLD = 16 * 1024
+    threshold = np.arange(MIN_THRESHOLD, MAX_THRESHOLD + 1, STEP_THRESHOLD)
     threshold_mask = np.zeros(len(threshold))
     print("set linearint threshold and analyze memory begin")
     print("try threshold:{}".format(threshold[-1]))
@@ -89,7 +109,7 @@ def graph_adapter(
         graph, platform, local_mem, threshold[-1], is_dump
     )
     memory_tobe_allocated = get_memory_size(memory_planer, MemType.SHARE_MEM)
-
+    
     if memory_tobe_allocated > MAX_THRESHOLD:
         threshold_mask[-1] = -1
         print("try threshold:{}".format(threshold[0]))
@@ -100,8 +120,8 @@ def graph_adapter(
 
         if memory_tobe_allocated > MAX_THRESHOLD:
             print(
-                "WARNING:SHARE-MEM to be allocated was {}, exceed 640KB".format(
-                    memory_tobe_allocated
+                "WARNING:SHARE-MEM to be allocated was {}, exceed {}".format(
+                    memory_tobe_allocated, MAX_THRESHOLD
                 )
             )
         else:
@@ -121,7 +141,7 @@ def graph_adapter(
                 else:
                     threshold_mask[id] = 1
                 final_id = _judge_end(threshold_mask)
-                if final_id:
+                if final_id != None:
                     print("the best threshold：{}".format(threshold[final_id]))
                     new_graph, memory_planer, is_linearint = _get_memory_plan(
                         graph, platform, local_mem, threshold[final_id], is_dump

diff --git a/thinker/enum_defines.py b/thinker/enum_defines.py
@@ -70,13 +70,15 @@ def get_support_op(platform: str) -> List[str]:
                 "iqSigmoid",
                 "iqTanh",
                 "iqCat",
+                "iqPad",
                 "Transpose",
                 "Reshape",
                 "Squeeze",
                 "Unsqueeze",
                 "Flatten",
                 "Slice",
                 "iqSum",
+                "iqSub",
                 "iqAdd",
                 "iqMul",
                 "iqDiv",
@@ -93,6 +95,10 @@ def get_support_op(platform: str) -> List[str]:
                 "Requant",
                 "LayerNormInt",
                 "iqVar",
+                "Cast",
+                "Expand", 
+                "Tile",
+                "LogSoftmax",              
             ]
 
 

diff --git a/thinker/executor/c_api/thinker_api.c b/thinker/executor/c_api/thinker_api.c
@@ -62,7 +62,7 @@ typedef struct _t_Instance_ {
   tModel *model_;
   double *shape_scalars_;
   tDMA_List *dma_list_;
-
+  uint32_t force_stop_flag;
   int32_t reserved_args[8];
 } tExecInst;
 
@@ -86,15 +86,15 @@ tStatus tGetMemoryPlan(tMemory *memory_list, int32_t *num_memory,
     return T_ERR_RES_INCOMPLETE;
   }
   // check CRC
-  if (res_hdr->crc32_ != 0) {
-    uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
-    int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
-    int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
-    if (res_hdr->crc32_ != crc_check) {
-      printf("%d,%d", res_hdr->crc32_, crc_check);
-      return T_ERR_RES_CRC_CHECK;
-    }
-  }
+ if (res_hdr->crc32_ != 0) {
+   uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
+   int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
+   int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
+   if (res_hdr->crc32_ != crc_check) {
+     printf("%d,%d", res_hdr->crc32_, crc_check);
+     return T_ERR_RES_CRC_CHECK;
+   }
+ }
   //  model_inst_size
   int32_t model_inst_size = 0;
   model_inst_size += ALIGN16(sizeof(tModel));
@@ -166,6 +166,9 @@ tStatus tGetMemoryPlan(tMemory *memory_list, int32_t *num_memory,
   return T_SUCCESS;
 }
 
+#if !(defined(WIN32) || defined(linux))
+#pragma clang optimize off
+#endif
 tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
                    const tMemory *memory_list, const int32_t num_memory) {
   tModelHeader *res_hdr = (tModelHeader *)res;
@@ -179,14 +182,14 @@ tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
     return T_ERR_RES_INCOMPLETE;
   }
   // check CRC
-  if (res_hdr->crc32_ != 0) {
-    uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
-    int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
-    int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
-    if (res_hdr->crc32_ != crc_check) {
-      return T_ERR_RES_CRC_CHECK;
-    }
-  }
+ if (res_hdr->crc32_ != 0) {
+   uint8_t *res_model_ptr = (uint8_t *)res + ALIGN16(sizeof(tModelHeader));
+   int32_t res_model_size = size - ALIGN16(sizeof(tModelHeader));
+   int32_t crc_check = crc24(0, res_model_ptr, res_model_size);
+   if (res_hdr->crc32_ != crc_check) {
+     return T_ERR_RES_CRC_CHECK;
+   }
+ }
 
   int32_t inst_size = 0;
   inst_size += ALIGN16(sizeof(tModel));
@@ -315,7 +318,7 @@ tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
     tensor->dptr_ = memory->dptr_ + offset;
   }
 
-  inst->debug_info = (tDebugList *)ptr;
+ inst->debug_info = (tDebugList *)ptr;
   inst->debug_info->tensor_name_count_ = debug_hdr.tensor_name_count_;
   inst->debug_info->tensor_name_list_ =
       (void *)(res + res_hdr->debug_offset_ + sizeof(tDebugList));
@@ -334,6 +337,9 @@ tStatus tModelInit(tModelHandle *hdl, const int8_t *res, const uint64_t size,
   *hdl = ~((tModelHandle)inst);
   return T_SUCCESS;
 }
+#if !(defined(WIN32) || defined(linux))
+#pragma clang optimize on
+#endif
 
 tStatus tModelFini(tModelHandle hdl) {
 #if !THINKER_USE_ACL
@@ -352,6 +358,34 @@ int32_t tGetInputCount(const tModelHandle hdl) {
   return model->num_input_;
 }
 
+tStatus tGetInputInfo(const tExecHandle hdl, const int32_t idx,
+                  tData *input) {
+  tExecInst *inst = (tExecInst *)~hdl;
+  tModel *model = inst->model_;
+  if (inst == NULL || inst->flag_ != THINKER_INST_FLAG) {
+    return T_ERR_INVALID_INST;
+  }
+
+  if (idx < 0 || idx >= model->num_input_) {
+    return T_ERR_INDEX_OF_BOUND;
+  }
+
+  if (input == NULL) {
+    return T_ERR_INVALID_DATA;
+  }
+
+  {
+    tTensor *tensor = inst->tensor_ + model->io_tensors_[idx];
+    input->dev_type_ = tensor->mem_.type_;
+    input->dtype_ = tensor->dtype_;
+    input->scale_ = tensor->scale_;
+    input->shape_ = tensor->shape_;
+    input->zero_ = tensor->zero_;
+    input->dptr_ = (void *)tensor->dptr_;
+  }
+  return T_SUCCESS;
+}
+
 const char *tGetInputName(const tModelHandle hdl, const int32_t idx) {
   tModel *model = (tModel *)~hdl;
   return model->io_names_ + idx * model->io_name_len_;
@@ -398,6 +432,9 @@ tShape tGetOutputShape(const tModelHandle hdl, const int32_t idx) {
   return tensor->shape_;
 }
 
+#if !(defined(WIN32) || defined(linux))
+#pragma clang optimize off
+#endif
 tStatus tCreateExecutor(const tModelHandle model_hdl, tExecHandle *hdl,
                         const tMemory *memory_list, const int32_t num_memory) {
   tModel *model = (tModel *)~model_hdl;
@@ -502,6 +539,9 @@ tStatus tCreateExecutor(const tModelHandle model_hdl, tExecHandle *hdl,
   *hdl = ~((tModelHandle)inst);
   return T_SUCCESS;
 }
+#if !(defined(WIN32) || defined(linux))
+#pragma clang optimize on
+#endif
 
 tStatus tReleaseExecutor(tExecHandle hdl) {
   tExecInst *inst = (tExecInst *)~hdl;
@@ -573,7 +613,8 @@ tStatus tSetInput(const tExecHandle hdl, const int32_t idx,
     tensor->shape_ = input->shape_;
     tensor->scale_ = input->scale_;
     uint64_t bytes = getShapeSize(&tensor->shape_) * (dtype & 0xFF);
-    memcpy((void *)tensor->dptr_, input->dptr_, bytes);
+    if ((uint64_t)tensor->dptr_ != (uint64_t)input->dptr_)
+      memcpy((void *)tensor->dptr_, input->dptr_, bytes);
   }
   return T_SUCCESS;
 }
@@ -679,6 +720,11 @@ tStatus tForward(const tExecHandle hdl) {
     uint32_t num_tensor = op->num_input_ + op->num_output_ + op->num_temp_;
     tOperatorAPI *op_api = model->op_api_[op->op_id_];
 
+    if (T_FORCE_STOP_VALUE == inst->force_stop_flag)  //user force to stop
+    {
+        return T_FORCE_STOP_VALUE;
+    }
+
     for (ii = 0; ii < num_tensor; ++ii) {
       local_tensor[ii] = inst->tensor_ + tensor_ids[ii];
     }
@@ -706,6 +752,28 @@ tStatus tForward(const tExecHandle hdl) {
   return T_SUCCESS;
 }
 
+tStatus tExecutorStart(tExecHandle hdl)
+{
+    tExecInst *inst = (tExecInst *)~hdl;
+    if (inst == NULL || inst->flag_ != THINKER_INST_FLAG)
+    {
+        return T_ERR_INVALID_INST;
+    }
+    inst->force_stop_flag = 0;
+    return T_SUCCESS;
+}
+
+tStatus tExecutorStop(tExecHandle hdl)
+{
+    tExecInst *inst = (tExecInst *)~hdl;
+    if (inst == NULL || inst->flag_ != THINKER_INST_FLAG)
+    {
+        return T_ERR_INVALID_INST;
+    }
+    inst->force_stop_flag = T_FORCE_STOP_VALUE;
+    return T_SUCCESS;
+}
+
 static thinkerApi g_api;
 const thinkerApi *thinkerGetApi() {
   g_api.tInitialize = tInitialize;
@@ -717,6 +785,7 @@ const thinkerApi *thinkerGetApi() {
   g_api.tModelFini = tModelFini;
 
   g_api.tGetInputCount = tGetInputCount;
+  g_api.tGetInputInfo = tGetInputInfo;
   g_api.tGetInputName = tGetInputName;
   g_api.tGetOutputCount = tGetOutputCount;
   g_api.tGetOutputName = tGetOutputName;
@@ -734,5 +803,8 @@ const thinkerApi *thinkerGetApi() {
   g_api.tGetOutputByName = tGetOutputByName;
   g_api.tForward = tForward;
 
+  g_api.tExecutorStart = tExecutorStart;
+  g_api.tExecutorStop = tExecutorStop;
+
   return &g_api;
 }