add tensor mutable and lod

1df40eed · eclipsess · abc1eaf3 · 1df40eed · 1df40eed
隐藏空白更改
内联并排

Showing with 18 addition and 9 deletion

src/operators/kernel/arm/im2sequence_kernel.cpp src/operators/kernel/arm/im2sequence_kernel.cpp +13 -4

test/net/test_eng.cpp test/net/test_eng.cpp +5 -5

未找到文件。
--- a/src/operators/kernel/arm/im2sequence_kernel.cpp
+++ b/src/operators/kernel/arm/im2sequence_kernel.cpp
@@ -35,7 +35,7 @@ template <>
 void Im2SequenceKernel<CPU, float>::Compute(
    const Im2SequenceParam<CPU> &param) const {
  const Tensor *in_x = param.Input();
-  Tensor *out = param.Output();
+  framework::LoDTensor *out = param.Output();
  out->mutable_data<float>();
  std::vector<int> kernels = param.Kernels();
@@ -52,22 +52,31 @@ void Im2SequenceKernel<CPU, float>::Compute(
                                       paddings[2], strides[0]);
  int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
                                      paddings[3], strides[1]);
-  const std::vector<int> dilations({1, 1});
+  out->mutable_data<float>({batch_size * output_height * output_width,
+                            img_channels * kernels[0] * kernels[1]});
+  const std::vector<int> dilations({1, 1});
  // TODO: verify
  auto out_dims = out->dims();
  out->Resize({batch_size, out->numel() / batch_size});
  for (int i = 0; i < batch_size; i++) {
    const Tensor src =
        in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
    Tensor dst = out->Slice(i, i + 1).Resize(
        {output_height, output_width, img_channels, kernels[0], kernels[1]});
    math::Im2ColFunctor<math::ColFormat::kOCF, CPU, float> f;
    f(src, dilations, strides, paddings, &dst);
  }
  out->Resize(out_dims);
+  framework::LoD lod(1);
+  lod[0].reserve(batch_size + 1);
+  int offset = 0;
+  lod[0].push_back(offset);
+  for (int i = 0; i < batch_size; ++i) {
+    offset += output_height * output_width;
+    lod[0].push_back(offset);
+  }
+  out->set_lod(lod);
 }
 template class Im2SequenceKernel<CPU, float>;

--- a/test/net/test_eng.cpp
+++ b/test/net/test_eng.cpp
@@ -27,9 +27,9 @@ int main() {
                         true)) {
    auto time2 = time();
    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
-    std::vector<int64_t> dims{1, 1, 48, 48};
+    std::vector<int64_t> dims{1, 1, 48, 512};
    LoDTensor input_tensor;
-    SetupTensor<float>(&input_tensor, {1, 1, 48, 48}, static_cast<float>(0),
+    SetupTensor<float>(&input_tensor, {1, 1, 48, 512}, static_cast<float>(0),
                       static_cast<float>(1));
    std::vector<float> input(input_tensor.data<float>(),
@@ -39,9 +39,9 @@ int main() {
      paddle_mobile.PredictLod(input_tensor);
    }
    auto time3 = time();
-    //        for (int i = 0; i < 10; ++i) {
+    for (int i = 0; i < 1; ++i) {
-    //            paddle_mobile.Predict(input, dims);
+      paddle_mobile.PredictLod(input_tensor);
-    //        }
+    }
    auto time4 = time();
    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
              << std::endl;