add 2 lod fix

410064ac · HexToString · cda5dc7c · 410064ac
隐藏空白更改
内联并排

Showing with 144 addition and 13 deletion

core/predictor/framework/bsf.h core/predictor/framework/bsf.h +144 -13

未找到文件。
--- a/core/predictor/framework/bsf.h
+++ b/core/predictor/framework/bsf.h
@@ -332,6 +332,9 @@ struct Task {
    // feature_vector[0]是由shape0_index的范围值组成的vector,包含两个元素最小和最大值。
    // feature_vector[1]是由lod组成的vector，包含指定batch的lod信息.
    // feature_vector[2]是由单个元素的组成的vector，元素值为1表示是nobatch的feedvar。
+    // feature_vector[3]是2维lod组成的vector,包含指定batch的2-level lod。
+    // 之所以把二维lod
+    // 加入到feature_vector[3]，是为了兼容原有代码，尽可能小的改动。

    // if 为 nobatch feedvar情况。
    // else if 为带lod的feedvar情况。
@@ -343,6 +346,7 @@ struct Task {
    } else if (set_feed_lod_index.size() > 0 &&
               set_feed_lod_index.find(feedvar_index) !=
                   set_feed_lod_index.end()) {
+      int lod_size = (*inVectorT_ptr)[feedvar_index].lod.size();
      std::vector<size_t> feed_lod_vector(end_batch - start_batch);
      for (size_t lod_index = start_batch + 1, vector_index = 0;
           lod_index < end_batch + 1;
@@ -351,9 +355,35 @@ struct Task {
            (*inVectorT_ptr)[feedvar_index].lod[0][lod_index] -
            (*inVectorT_ptr)[feedvar_index].lod[0][start_batch];
      }
-      size_t shape0_start = (*inVectorT_ptr)[feedvar_index].lod[0][start_batch];
-      size_t shape0_end = (*inVectorT_ptr)[feedvar_index].lod[0][end_batch];
-      feature_vector = {{shape0_start, shape0_end}, feed_lod_vector};
+      if (lod_size == 1) {
+        size_t shape0_start =
+            (*inVectorT_ptr)[feedvar_index].lod[0][start_batch];
+        size_t shape0_end = (*inVectorT_ptr)[feedvar_index].lod[0][end_batch];
+        feature_vector = {{shape0_start, shape0_end}, feed_lod_vector};
+      } else if (lod_size == 2) {
+        size_t 2level_lod_start_index =
+            (*inVectorT_ptr)[feedvar_index].lod[0][start_batch];
+        size_t 2level_lod_end_index =
+            (*inVectorT_ptr)[feedvar_index].lod[0][end_batch];
+        int 2level_lod_size = 2level_lod_end_index - 2level_lod_start_index;
+        std::vector<size_t> feed_2level_lod_vector(2level_lod_size);
+        for (size_t 2lod_index = 2level_lod_start_index + 1, vector_index = 0;
+             2lod_index < 2level_lod_end_index + 1;
+             ++vector_index, ++2lod_index) {
+          feed_2level_lod_vector[vector_index] =
+              (*inVectorT_ptr)[feedvar_index].lod[1][2lod_index] -
+              (*inVectorT_ptr)[feedvar_index].lod[1][2level_lod_start_index];
+        }
+        size_t shape0_start =
+            (*inVectorT_ptr)[feedvar_index].lod[1][2level_lod_start_index];
+        size_t shape0_end =
+            (*inVectorT_ptr)[feedvar_index].lod[1][2level_lod_end_index];
+        feature_vector = {{shape0_start, shape0_end},
+                          feed_lod_vector,
+                          {},
+                          feed_2level_lod_vector};
+      }
+
      // feature_vector.push_back(feed_lod_vector);
    } else {
      feature_vector = {{start_batch, end_batch}};
@@ -368,15 +398,23 @@ struct Task {
      for (size_t index = 0; index < vector_fetch_lod_index.size(); ++index) {
        size_t data_length = 0;
        size_t lod_length = 0;
+        size_t 2lod_length = 0;
        size_t total_shape0 = 0;
+        int lod_size = 1;
        size_t feedvar_index = vector_fetch_lod_index[index];
        // 由于PaddleTensor的resize实现，是每次都会清空，所以必须先统计总长度。
        for (size_t taskmeta_index = 0; taskmeta_index < total_taskmeta_num;
             ++taskmeta_index) {
+          lod_size = outLodTensorVector[taskmeta_index][index].lod.size();
          data_length +=
              outLodTensorVector[taskmeta_index][index].data.length();
          lod_length += outLodTensorVector[taskmeta_index][index].lod[0].size();
          total_shape0 += outLodTensorVector[taskmeta_index][index].shape[0];
+          if (lod_size == 2) {
+            2lod_length +=
+                outLodTensorVector[taskmeta_index][index].lod[0]
+                                                             [lod_length - 1];
+          }
        }
        // 一次性扩容PaddleTensor中的data和lod
        paddle::PaddleTensor& fetchVarTensor = (*outVectorT_ptr)[feedvar_index];
@@ -394,12 +432,22 @@ struct Task {
          fetchVarTensor.lod[0].push_back(0);
        }
        fetchVarTensor.lod[0].resize(lod_length + 1, 0);
+        if (lod_size == 2) {
+          if (fetchVarTensor.lod.size() <= 1) {
+            fetchVarTensor.lod.push_back({0});
+          } else if (fetchVarTensor.lod[1].size() <= 0) {
+            fetchVarTensor.lod[1].push_back(0);
+          }
+          fetchVarTensor.lod[1].resize(2lod_length + 1, 0);
+        }

        //
        size_t data_length_offset = 0;
        size_t lod_length_offset = 0;
+        size_t 2lod_length_offset = 0;
        size_t once_data_length = 0;
        size_t once_lod_length = 0;
+        size_t once_2lod_length = 0;
        for (size_t taskmeta_index = 0; taskmeta_index < total_taskmeta_num;
             ++taskmeta_index) {
          // process data
@@ -421,6 +469,18 @@ struct Task {
                outLodTensorVector[taskmeta_index][index].lod[0][once_index];
            lod_length_offset++;
          }
+          if (lod_size == 2) {
+            size_t last_2lod_value = fetchVarTensor.lod[1][2lod_length_offset];
+            once_2lod_length =
+                outLodTensorVector[taskmeta_index][index].lod[1].size();
+            for (size_t once_index = 0; once_index < once_2lod_length;
+                 ++once_index) {
+              fetchVarTensor.lod[1][2lod_length_offset + 1] =
+                  last_2lod_value +
+                  outLodTensorVector[taskmeta_index][index].lod[1][once_index];
+              2lod_length_offset++;
+            }
+          }
        }
      }
    }
@@ -467,10 +527,16 @@ struct TaskMeta {
      feedvar_type.push_back(feature.size());
      if (feature.size() == 1) {
        feed_lod_vector.push_back({});
+        feed_2level_lod_vector.push_back({});
      } else if (feature.size() == 2) {
        feed_lod_vector.push_back(feature[1]);
-      } else {
+        feed_2level_lod_vector.push_back({});
+      } else if (feature.size() == 3) {
        feed_lod_vector.push_back({});
+        feed_2level_lod_vector.push_back({});
+      } else if (feature.size() == 4) {
+        feed_lod_vector.push_back(feature[1]);
+        feed_2level_lod_vector.push_back(feature[3]);
      }
    }
  }
@@ -482,6 +548,7 @@ struct TaskMeta {
  size_t taskmeta_index;
  std::vector<std::vector<size_t>> feed_shape0_range;
  std::vector<std::vector<size_t>> feed_lod_vector;
+  std::vector<std::vector<size_t>> feed_2level_lod_vector;
  std::vector<size_t> feedvar_type;
 };

@@ -647,8 +714,15 @@ class BatchTasks {
        _total_shape0_batch_in[feedvar_index] +=
            tm.feed_shape0_range[feedvar_index][1] -
            tm.feed_shape0_range[feedvar_index][0];
-      } else if (tm.feedvar_type[feedvar_index] == 2) {
-        // lod类型的feedvar
+      } else if (tm.feedvar_type[feedvar_index] == 3) {
+        // tm.feedvar_type[feedvar_index] == 3
+        // nobatch类型的feedvar.
+        // 此时不累加，且值应为1
+        _total_shape0_batch_in[feedvar_index] =
+            tm.feed_shape0_range[feedvar_index][1] -
+            tm.feed_shape0_range[feedvar_index][0];
+      } else {
+        // lod类型的feedvar 可能是1维lod 也可能是2维lod
        // 累计计算shape0的累加值，为后面初始化PaddleTensor做准备。
        _total_shape0_batch_in[feedvar_index] +=
            tm.feed_shape0_range[feedvar_index][1] -
@@ -667,13 +741,23 @@ class BatchTasks {
          _batch_in_lod[feedvar_index][0].push_back(
              last_lod_value + tm.feed_lod_vector[feedvar_index][lod_index]);
        }
-      } else {
-        // tm.feedvar_type[feedvar_index] == 3
-        // nobatch类型的feedvar.
-        // 此时不累加，且值应为1
-        _total_shape0_batch_in[feedvar_index] =
-            tm.feed_shape0_range[feedvar_index][1] -
-            tm.feed_shape0_range[feedvar_index][0];
+
+        // 2维lod 需要额外处理2维lod信息。
+        if (tm.feedvar_type[feedvar_index] == 4) {
+          if (_batch_in_lod[feedvar_index].size() <= 1) {
+            _batch_in_lod[feedvar_index].push_back({0});
+          } else if (_batch_in_lod[feedvar_index][1].size() <= 0) {
+            _batch_in_lod[feedvar_index][1].push_back(0);
+          }
+          size_t last_lod_value = _batch_in_lod[feedvar_index][1].back();
+          for (size_t lod_index = 0;
+               lod_index < tm.feed_2level_lod_vector[feedvar_index].size();
+               ++lod_index) {
+            _batch_in_lod[feedvar_index][1].push_back(
+                last_lod_value +
+                tm.feed_2level_lod_vector[feedvar_index][lod_index]);
+          }
+        }
      }
    }
    return _rem_size;
@@ -1165,6 +1249,14 @@ class BatchTasks {
          size_t shape0_index_end =
              _batch_out[fetchvar_index].lod[0][last_batch + add];
          size_t shape0_length = shape0_index_end - shape0_index_start;
+          size_t lod_size = _batch_out[fetchvar_index].lod.size();
+          if (lod_size == 2) {
+            shape0_index_start =
+                _batch_out[fetchvar_index].lod[1][shape0_index_start];
+            shape0_index_end =
+                _batch_out[fetchvar_index].lod[1][shape0_index_end];
+            shape0_length = shape0_index_end - shape0_index_start;
+          }
          // task被拆分为多个taskmeta时，不能直接拷入task->outVectorT_ptr
          // 此时,先拷入task->outLodTensorVector[taskmeta_index]
          // 当task所有的taskmeta都完成时，再按照顺序进行拷贝回task->outVectorT_ptr。
@@ -1198,6 +1290,24 @@ class BatchTasks {
                  (_batch_out[fetchvar_index].lod[0][lod_index] -
                   last_lod_value);
            }
+            if (lod_size == 2) {
+              if (fetchVarTensor.lod.size() <= 1) {
+                fetchVarTensor.lod.push_back({});
+              }
+              size_t last_lod0_value =
+                  _batch_out[fetchvar_index].lod[0][last_batch];
+              size_t end_lod0_value =
+                  _batch_out[fetchvar_index].lod[0][last_batch + add];
+              size_t lod1_size = end_lod0_value - last_lod0_value;
+              fetchVarTensor.lod[1].resize(lod1_size, 0);
+              for (size_t lod_index = last_lod0_value + 1, my_index = 0;
+                   lod_index < end_lod0_value + 1;
+                   ++lod_index, ++my_index) {
+                fetchVarTensor.lod[1][my_index] =
+                    _batch_out[fetchvar_index].lod[1][lod_index] -
+                    _batch_out[fetchvar_index].lod[1][last_lod0_value];
+              }
+            }
          } else {
            // task未被拆分为多个taskmeta，故只有某个线程中的taskmeta会操作task不存在多线程竞争
            // 此时resize后，直接写入task->outVectorT_ptr中即可。
@@ -1239,6 +1349,27 @@ class BatchTasks {
                  (_batch_out[fetchvar_index].lod[0][lod_index] -
                   last_lod_value);
            }
+
+            if (lod_size == 2) {
+              if (fetchVarTensor.lod.size() <= 1) {
+                fetchVarTensor.lod.push_back({});
+              } else if (fetchVarTensor.lod[1].size() <= 0) {
+                fetchVarTensor.lod[1].push_back(0);
+              }
+              size_t last_lod0_value =
+                  _batch_out[fetchvar_index].lod[0][last_batch];
+              size_t end_lod0_value =
+                  _batch_out[fetchvar_index].lod[0][last_batch + add];
+              size_t lod1_size = end_lod0_value - last_lod0_value;
+              fetchVarTensor.lod[1].resize(lod1_size + 1, 0);
+              for (size_t lod_index = last_lod0_value + 1, my_index = 1;
+                   lod_index < end_lod0_value + 1;
+                   ++lod_index, ++my_index) {
+                fetchVarTensor.lod[1][my_index] =
+                    _batch_out[fetchvar_index].lod[1][lod_index] -
+                    _batch_out[fetchvar_index].lod[1][last_lod0_value];
+              }
+            }
          }
        } else {
          // 普通fetchvar情况，此时该Task总的fetchvar_batch =