add auto_padding

36a14c81 · HexToString · df1729b5 · 36a14c81 · 36a14c81 · 36a14c81
5 changed file
--- a/core/predictor/framework/bsf-inl.h
+++ b/core/predictor/framework/bsf-inl.h
@@ -86,14 +86,15 @@ bool Task<InItemT, OutItemT>::task_fetch_create(BatchTasks<TaskT>& batchTask) {
        // 此时 lod 为空。
        tensor_out.lod = batchTask._batch_out[fetchvar_index].lod;
        // resize all batch memory at one time
        size_t databuf_size = fetchvar_batch * fetchvar_bytesize_index;
-        void* databuf_data = MempoolWrapper::instance().malloc(databuf_size,memoryPtr);
+        void* databuf_data =
+            MempoolWrapper::instance().malloc(databuf_size, memoryPtr);
        paddle::PaddleBuf paddleBuf(databuf_data, databuf_size);
        tensor_out.data = paddleBuf;
-        //tensor_out.data.Resize(databuf_size);
+        // tensor_out.data.Resize(databuf_size);
      } else {
        // 当taskmeta_num = 1时，由于同时只有一个taskMeta操作task
        // 不涉及线程安全问题，所以此时可以直接由taskMeta->task->resize->copy
@@ -213,7 +214,8 @@ void TaskExecutor<TaskT>::stop() {
 template <typename TaskT>
 TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
    const void* inVectorT_ptr,
-    void* outVectorT_ptr, MempoolRegion* memoryPtr) {  // NOLINT
+    void* outVectorT_ptr,
+    MempoolRegion* memoryPtr) {  // NOLINT
  TaskT* task = butil::get_object<TaskT>();
  if (!task) {
    LOG(ERROR) << "Failed get TaskT from object pool";
@@ -240,7 +242,7 @@ TaskHandler<TaskT> TaskExecutor<TaskT>::schedule(
  task->write_fd = fds[1];
  task->owner_tid = ::syscall(SYS_gettid);
  task->memoryPtr = memoryPtr;
-  //task->_bspec_key = _bspec_key;
+  // task->_bspec_key = _bspec_key;
  task->inVectorT_ptr = (const InVectorT*)inVectorT_ptr;
  task->outVectorT_ptr = (OutVectorT*)outVectorT_ptr;
  if (!task->task_init()) {
@@ -309,7 +311,7 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
    }
    // combine_task_valid负责判断是否能够合并
-    // 除最外层的shape外，内层shape应一致才能合并。
+    // 除最外层的shape外，内层shape应一致或者允许Padding才能合并。
    // 否则跳出循环,放入下一个batchTask中。
    // 以此保证batch.append_task(task)中的task的内层shape相同。
@@ -317,12 +319,15 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
    // 所以要求该feedvar必须相等，才能合并。
    // 否则跳出循环,放入下一个batchTask中。
    // 目前没有PaddleTensor和PaddleBuff没有重载==，所以只能比较内存.
-    // TODO(HexToString): 可以考虑后期支持AutoPadding.
    if (previous_task != nullptr) {
-      if (!task->combine_task_valid(previous_task)) {
+      if (task->combine_task_valid(previous_task) == 0) {
        break;
      }
    }
+    if (batchTask.padding(task) != 2) {
+      break;
+    }
    size_t rem = batchTask.append_task(task);
    previous_task = task;
    if (task->rem <= 0) {
@@ -407,10 +412,11 @@ int TaskExecutor<TaskT>::work(ThreadContext<TaskT>* context) {
 }
 template <typename InItemT, typename OutItemT>
-bool TaskManager<InItemT, OutItemT>::schedule(const void* in,
+bool TaskManager<InItemT, OutItemT>::schedule(
-                                              void* out, MempoolRegion* memoryPtr) {  // NOLINT
+    const void* in, void* out, MempoolRegion* memoryPtr) {  // NOLINT
  TaskHandler<TaskT> handler =
-      TaskExecutorVector<TaskT>::instance()[_model_index].schedule(in, out, memoryPtr);
+      TaskExecutorVector<TaskT>::instance()[_model_index].schedule(
+          in, out, memoryPtr);
  if (handler.valid()) {
    _task_owned = handler;

--- a/core/predictor/framework/bsf.h
+++ b/core/predictor/framework/bsf.h
--- a/doc/C++_Serving/2+_model.md
+++ b/doc/C++_Serving/2+_model.md
@@ -220,4 +220,4 @@ python3 自定义.py ocr_det_client ocr_rec_client
 #ocr_det_client为第一个模型的Client端proto文件夹的相对路径
 #ocr_rec_client为第二个模型的Client端proto文件夹的相对路径
 ```
-此时，对于Server端而言，输入的数据的格式与`第一个模型的Client端proto格式`定义的一致，输出的数据格式与`最后一个模型的Client端proto`文件一致。一般情况下您无须关注此事，当您需要了解详细的[proto的定义，请参考此处](../Serving_Configure_CN.md)。
+此时，对于Server端而言，输入的数据的格式与`第一个模型的Client端proto格式`定义的一致，输出的数据格式与`最后一个模型的Client端proto`文件一致。一般情况下您无须关注此事，当您需要了解详细的[proto的定义，请参考此处](./Serving_Configure_CN.md)。
--- a/doc/C++_Serving/DAG_CN.md
+++ b/doc/C++_Serving/DAG_CN.md
@@ -40,7 +40,7 @@ op_seq_maker.add_op(general_infer_op)
 op_seq_maker.add_op(general_response_op)
 ```
-如果使用`命令行 + 配置文件的方式启动C++Server`只需[修改配置文件](../Serving_Configure_CN.md)即可,无须修改👆的代码。
+如果使用`命令行 + 配置文件的方式启动C++Server`只需[修改配置文件]((./Serving_Configure_CN.md))即可,无须修改👆的代码。
 对于简单的串联逻辑，我们将其简化为`Sequence`，使用`OpSeqMaker`进行构建。用户可以不指定每个节点的前继，默认按加入`OpSeqMaker`的顺序来确定前继。

--- a/doc/C++_Serving/DAG_EN.md
+++ b/doc/C++_Serving/DAG_EN.md
@@ -39,7 +39,7 @@ op_seq_maker.add_op(general_infer_op)
 op_seq_maker.add_op(general_response_op)
 ```
-If you use `the command line + configuration file method to start C++ server`, you only need to modify [the configuration file](../Serving_Configure_CN.md), don`t need to change any line of 👆 code.
+If you use `the command line + configuration file method to start C++ server`, you only need to modify [the configuration file](./Serving_Configure_CN.md), don`t need to change any line of 👆 code.
 For simple series logic, we simplify it and build it with `OpSeqMaker`. You can determine the successor by default according to the order of joining `OpSeqMaker` without specifying the successor of each node.