[LITE][BM] release demo, test=develop (#3320)

* [LITE][BM] add fill_const assign_value ops, test=develop

[LITE][BM] release demo, test=develop (#3320)
* [LITE][BM] add fill_const assign_value ops, test=develop
5bd83188 · Santa An · GitHub · 84cc0685 · 5bd83188 · 5bd83188
13 changed file
--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -9,7 +9,7 @@ if (LITE_ON_TINY_PUBLISH)
    set(CMAKE_C_FLAGS_RELEASE "-Os -DNDEBUG")
 endif()
 set(light_lib_DEPS light_api paddle_api paddle_api_light optimizer)
-if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"))
+if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH_BM OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"))
    #full api dynamic library
    add_library(paddle_full_api_shared SHARED "")
    target_sources(paddle_full_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc)
@@ -262,7 +262,8 @@ if (NOT LITE_ON_TINY_PUBLISH)
        CV_DEPS paddle_cv_arm
        NPU_DEPS ${npu_kernels}
        CL_DEPS ${opencl_kernels}
-        FPGA_DEPS ${fpga_kernels})
+        FPGA_DEPS ${fpga_kernels}
+        BM_DEPS ${bm_kernels})
    # The final inference library for just MobileConfig.
    bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
    target_link_libraries(paddle_api_full ${cuda_deps})

--- a/lite/api/test_classify_lite_bm.cc
+++ b/lite/api/test_classify_lite_bm.cc
@@ -36,7 +36,8 @@ void TestModel(const std::vector<Place>& valid_places) {
  predictor.Build(FLAGS_model_dir, "", "", valid_places, passes);
  auto* input_tensor = predictor.GetInput(0);
-  input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
+  input_tensor->Resize(DDim(
+      std::vector<DDim::value_type>({1, 3, FLAGS_im_height, FLAGS_im_width})));
  auto* data = input_tensor->mutable_data<float>();
  auto item_size = input_tensor->dims().production();
  if (FLAGS_input_img_txt_path.empty()) {
@@ -67,15 +68,13 @@ void TestModel(const std::vector<Place>& valid_places) {
            << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
            << " ms in average.";
-  auto* out = predictor.GetOutput(0);
+  auto out = predictor.GetOutputs();
-  ASSERT_EQ(out->dims().size(), 2);
-  ASSERT_EQ(out->dims()[0], 1);
-  ASSERT_EQ(out->dims()[1], 1000);
-  auto* out_data = out->data<float>();
  FILE* fp = fopen("result.txt", "wb");
-  for (int i = 0; i < out->numel(); i++) {
+  for (int i = 0; i < out.size(); i++) {
-    fprintf(fp, "%f\n", out_data[i]);
+    auto* out_data = out[i]->data<float>();
+    for (int j = 0; j < out[i]->numel(); j++) {
+      fprintf(fp, "%f\n", out_data[j]);
+    }
  }
  fclose(fp);
 }

--- a/lite/api/transform_test.cc
+++ b/lite/api/transform_test.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 #include <gflags/gflags.h>
+#ifdef PADDLE_WITH_TESTING
 #include <gtest/gtest.h>
+#endif
 #include <string>
 #include <vector>
 #include "lite/api/cxx_api.h"

--- a/lite/kernels/bm/bridges/CMakeLists.txt
+++ b/lite/kernels/bm/bridges/CMakeLists.txt
@@ -30,6 +30,8 @@ lite_cc_library(subgraph_bridge_conv_transpose_op_bm SRCS conv_transpose_op.cc D
 lite_cc_library(subgraph_bridge_reduce_full_op_bm SRCS reduce_full_op.cc DEPS ${bm_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_squeeze_op_bm SRCS squeeze_op.cc DEPS ${bm_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_cast_op_bm SRCS cast_op.cc DEPS ${bm_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_fill_constant_op_bm SRCS fill_constant_op.cc DEPS ${bm_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_assign_value_op_bm SRCS assign_value_op.cc DEPS ${bm_subgraph_bridge_deps})
 set(bm_subgraph_bridges
        subgraph_bridge_registry
@@ -58,4 +60,6 @@ set(bm_subgraph_bridges
        subgraph_bridge_reduce_full_op_bm
        subgraph_bridge_squeeze_op_bm
        subgraph_bridge_cast_op_bm
+        subgraph_bridge_fill_constant_op_bm
+        subgraph_bridge_assign_value_op_bm
        CACHE INTERNAL "bm_subgraph_bridges")
--- a/lite/kernels/bm/bridges/act_op.cc
+++ b/lite/kernels/bm/bridges/act_op.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <bmcompiler_if.h>
+#include <bmcompiler_if_lite.h>
 #include <bmcompiler_op_code.h>
 #include "lite/kernels/bm/bridges/graph.h"
 #include "lite/kernels/npu/bridges/registry.h"
@@ -35,16 +36,14 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto output_var_name = op_info->Output("Out").front();
  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
  auto output_dims = output->dims();
-  const int64_t* x_shape_data = const_cast<const int64_t*>(&x_dims.data()[0]);
+  bool x_is_const = !graph->HasNode(x_var_name);
-  const int64_t* output_shape_data =
-      const_cast<const int64_t*>(&output_dims.data()[0]);
  std::vector<int32_t> i_x_shape_data(x_dims.size());
  std::vector<int32_t> i_output_shape_data(output_dims.size());
  for (size_t i = 0; i < x_dims.size(); i++) {
-    i_x_shape_data[i] = static_cast<int>(x_shape_data[i]);
+    i_x_shape_data[i] = x_dims[i];
  }
  for (size_t i = 0; i < output_dims.size(); i++) {
-    i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
+    i_output_shape_data[i] = output_dims[i];
  }
  float alpha = 0.f;
  int active_type_id = 0;
@@ -59,6 +58,15 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    LOG(FATAL) << "[BM] unsupport act type";
    return FAILED;
  }
+  const float* x_data = const_cast<const float*>(x->mutable_data<float>());
+  if (x_is_const) {
+    bm_add_const_tensor(graph->GetCompilerHandle(),
+                        static_cast<const char*>(x_var_name.c_str()),
+                        const_cast<const int*>(&i_x_shape_data[0]),
+                        x_dims.size(),
+                        static_cast<bm_data_type_t>(DTYPE_FP32),
+                        static_cast<const void*>(x_data));
+  }
  if (op_type == "relu" || op_type == "leaky_relu") {
    add_relu_layer(graph->GetCompilerHandle(),
                   const_cast<const int*>(&i_x_shape_data[0]),

--- a/lite/kernels/bm/bridges/assign_value_op.cc
+++ b/lite/kernels/bm/bridges/assign_value_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <bmcompiler_defs.h>
+#include <bmcompiler_if.h>
+#include <bmcompiler_if_lite.h>
+#include "lite/kernels/bm/bridges/graph.h"
+#include "lite/kernels/bm/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace bm {
+int AssignValueConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto scope = op->scope();
+  auto op_info = op->op_info();
+  auto output_var_name = op_info->Output("Out").front();
+  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
+  auto output_dims = output->dims();
+  std::vector<int32_t> i_output_shape_data(output_dims.size());
+  int buffer_size = 1;
+  for (size_t i = 0; i < output_dims.size(); i++) {
+    i_output_shape_data[i] = static_cast<int>(output_dims[i]);
+    buffer_size *= i_output_shape_data[i];
+  }
+  auto fp32_values = op_info->GetAttr<std::vector<float>>("fp32_values");
+  float* assign_data =
+      reinterpret_cast<float*>(malloc(buffer_size * sizeof(float)));
+  CHECK(assign_data != nullptr);
+  CHECK_EQ(buffer_size, fp32_values.size());
+  bm_add_const_tensor(graph->GetCompilerHandle(),
+                      static_cast<const char*>(output_var_name.c_str()),
+                      const_cast<const int*>(i_output_shape_data.data()),
+                      output_dims.size(),
+                      static_cast<bm_data_type_t>(DTYPE_FP32),
+                      reinterpret_cast<const void*>(assign_data));
+  graph->AddNode(output_var_name);
+  return SUCCESS;
+}
+}  // namespace bm
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(assign_value,
+                         kBM,
+                         paddle::lite::subgraph::bm::AssignValueConverter);
--- a/lite/kernels/bm/bridges/conv_op.cc
+++ b/lite/kernels/bm/bridges/conv_op.cc
@@ -39,6 +39,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto filter_var_name = op_info->Input("Filter").front();
  auto filter = scope->FindVar(filter_var_name)->GetMutable<lite::Tensor>();
  auto filter_dims = filter->dims();
  CHECK_EQ(input_dims.size(), 4);
  CHECK_EQ(output_dims.size(), 4);
  CHECK_EQ(filter_dims.size(), 4);
@@ -90,6 +91,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
                 dilations[1],
                 static_cast<int>(has_bias));
  graph->AddNode(output_var_name);
+  LOG(INFO) << output_var_name << input_dims << " " << output_dims;
  return SUCCESS;
 }

--- a/lite/kernels/bm/bridges/elementwise_ops.cc
+++ b/lite/kernels/bm/bridges/elementwise_ops.cc
@@ -65,6 +65,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto output_dims = output->dims();
  const int64_t* output_shape_data =
      const_cast<const int64_t*>(&output_dims.data()[0]);
+  LOG(INFO) << x_dims << " " << output_dims;
  std::vector<int32_t> i_output_shape_data(output_dims.size());
  for (size_t i = 0; i < output_dims.size(); i++) {
    i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);

--- a/lite/kernels/bm/bridges/fill_constant_op.cc
+++ b/lite/kernels/bm/bridges/fill_constant_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <bmcompiler_defs.h>
+#include <bmcompiler_if.h>
+#include <bmcompiler_if_lite.h>
+#include "lite/kernels/bm/bridges/graph.h"
+#include "lite/kernels/bm/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace bm {
+int FillConstantConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto scope = op->scope();
+  auto op_info = op->op_info();
+  auto output_var_name = op_info->Output("Out").front();
+  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
+  auto output_dims = output->dims();
+  std::vector<int32_t> i_output_shape_data(output_dims.size());
+  int buffer_size = 1;
+  for (size_t i = 0; i < output_dims.size(); i++) {
+    i_output_shape_data[i] = static_cast<int>(output_dims[i]);
+  }
+  float* const_data =
+      reinterpret_cast<float*>(malloc(buffer_size * sizeof(float)));
+  CHECK(const_data != nullptr);
+  auto value = op_info->GetAttr<float>("value");
+  for (size_t i = 0; i < buffer_size; i++) {
+    const_data[i] = value;
+  }
+  bm_add_const_tensor(graph->GetCompilerHandle(),
+                      static_cast<const char*>(output_var_name.c_str()),
+                      const_cast<const int*>(i_output_shape_data.data()),
+                      output_dims.size(),
+                      static_cast<bm_data_type_t>(DTYPE_FP32),
+                      reinterpret_cast<const void*>(const_data));
+  graph->AddNode(output_var_name);
+  return SUCCESS;
+}
+}  // namespace bm
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(fill_constant,
+                         kBM,
+                         paddle::lite::subgraph::bm::FillConstantConverter);
--- a/lite/kernels/bm/bridges/mul_op.cc
+++ b/lite/kernels/bm/bridges/mul_op.cc
@@ -29,7 +29,6 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto op_info = op->op_info();
  auto op_type = op_info->Type();
  auto unique_op_name = lite::subgraph::bm::UniqueName(op_type);
-  // only support y is const
  // input
  auto x_var_name = op_info->Input("X").front();
  auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
@@ -61,6 +60,12 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  auto y_var_name = op_info->Input("Y").front();
  auto y = scope->FindVar(y_var_name)->GetMutable<lite::Tensor>();
  auto y_dims = y->dims();
+  bool y_is_const = !graph->HasNode(y_var_name);
+  CHECK_EQ(y_dims.size(), 2);
+  int i_y_shape_data[2];
+  for (size_t i = 0; i < 2; i++) {
+    i_y_shape_data[i] = y_dims[i];
+  }
  // output
  auto output_var_name = op_info->Output("Out").front();
  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
@@ -71,20 +76,39 @@ int MulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  for (size_t i = 0; i < output_dims.size(); i++) {
    i_output_shape_data[i] = static_cast<int>(output_shape_data[i]);
  }
-  add_fc_layer(graph->GetCompilerHandle(),
+  if (y_is_const) {
-               const_cast<const int*>(&i_x_reshape_shape_data[0]),
+    add_fc_layer(graph->GetCompilerHandle(),
-               2,
+                 const_cast<const int*>(&i_x_reshape_shape_data[0]),
-               static_cast<const char*>(unique_op_reshape_name.c_str()),
+                 2,
-               const_cast<const int*>(&i_output_shape_data[0]),
+                 static_cast<const char*>(unique_op_reshape_name.c_str()),
-               output_dims.size(),
+                 const_cast<const int*>(&i_output_shape_data[0]),
-               static_cast<const char*>(output_var_name.c_str()),
+                 output_dims.size(),
-               static_cast<const char*>(unique_op_name.c_str()),
+                 static_cast<const char*>(output_var_name.c_str()),
-               i_x_reshape_shape_data[1],
+                 static_cast<const char*>(unique_op_name.c_str()),
-               i_output_shape_data[1],
+                 i_x_reshape_shape_data[1],
-               static_cast<const float*>(y->mutable_data<float>()),
+                 i_output_shape_data[1],
-               nullptr,
+                 static_cast<const float*>(y->mutable_data<float>()),
-               0,
+                 nullptr,
-               0);
+                 0,
+                 0);
+  } else {
+    add_fc_weight_layer(
+        graph->GetCompilerHandle(),
+        const_cast<const int*>(&i_x_reshape_shape_data[0]),
+        2,
+        static_cast<const char*>(unique_op_reshape_name.c_str()),
+        const_cast<const int*>(&i_output_shape_data[0]),
+        output_dims.size(),
+        static_cast<const char*>(output_var_name.c_str()),
+        static_cast<const char*>(unique_op_name.c_str()),
+        const_cast<const int*>(&i_y_shape_data[0]),
+        2,
+        static_cast<const char*>(y_var_name.c_str()),
+        i_x_reshape_shape_data[1],
+        nullptr,
+        0,
+        0);
+  }
  graph->AddNode(output_var_name);
  return SUCCESS;
 }

--- a/lite/kernels/bm/bridges/paddle_use_bridges.h
+++ b/lite/kernels/bm/bridges/paddle_use_bridges.h
@@ -51,3 +51,5 @@ USE_SUBGRAPH_BRIDGE(reduce_mean, kBM);
 USE_SUBGRAPH_BRIDGE(squeeze, kBM);
 USE_SUBGRAPH_BRIDGE(squeeze2, kBM);
 USE_SUBGRAPH_BRIDGE(cast, kBM);
+USE_SUBGRAPH_BRIDGE(fill_constant, kBM);
+USE_SUBGRAPH_BRIDGE(assign_value, kBM);
--- a/lite/kernels/bm/subgraph_compute.cc
+++ b/lite/kernels/bm/subgraph_compute.cc
@@ -35,7 +35,7 @@ int SubgraphEngine::BuildDeviceProgram() {
  graph.CreateCompilerHandle();
  auto& ctx = this->ctx_->template As<BMContext>();
  for (auto& inst : origin_program_) {
-    auto op = inst.op();
+    auto op = const_cast<OpLite*>(inst.op());
    CHECK(op);
    op->CheckShape();
    op->InferShape();

--- a/lite/tools/build_bm.sh
+++ b/lite/tools/build_bm.sh
@@ -5,7 +5,7 @@ set -ex
 BM_SDK_ROOT="$(pwd)/third-party/bmlibs/bm_sc3_libs"     # BM SDK
 TARGET_NAME="BM1682"     # default target
 BUILD_EXTRA=OFF                     # ON(with sequence ops)/OFF
-WITH_TESTING=ON                    # ON/OFF
+WITH_TESTING=OFF                    # ON/OFF
 function print_usage {
    echo -e "\nUSAGE:"