!3906 fix allocator for gpu

Merge pull request !3906 from wandongdong/master

!3906 fix allocator for gpu
Merge pull request !3906 from wandongdong/master
935e365c · mindspore-ci-bot · Gitee · b0b4fa08 · cfb58a2c · 935e365c
8 changed file
--- a/build.sh
+++ b/build.sh
@@ -527,7 +527,7 @@ build_lite()
 {
    echo "start build mindspore lite project"

-    if [[ "${ENABLE_GPU}" == "on" ]]; then
+    if [ "${ENABLE_GPU}" == "on" ] || [ "${LITE_PLATFORM}" == "arm64" ]; then
      echo "start build opencl"
      build_opencl
    fi
@@ -554,7 +554,7 @@ build_lite()
              -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang"  \
              -DANDROID_STL="c++_shared" -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN}                     \
              -DBUILD_DEVICE=on -DPLATFORM_ARM64=on -DBUILD_CONVERTER=off -DENABLE_NEON=on -DENABLE_FP16="off"      \
-              -DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} "${BASEPATH}/mindspore/lite"
+              -DSUPPORT_GPU=on -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} "${BASEPATH}/mindspore/lite"
    elif [[ "${LITE_PLATFORM}" == "arm32" ]]; then
        checkndk
        cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19"      \

--- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
@@ -50,12 +50,12 @@ int SubGraphOpenCLKernel::UnInit() {
  }
  for (const auto tensor : inputs_) {
    if (tensor != nullptr) {
-      tensor->FreeData(allocator_);
+      tensor->FreeData();
    }
  }
  for (const auto tensor : outputs_) {
    if (tensor != nullptr) {
-      tensor->FreeData(allocator_);
+      tensor->FreeData();
    }
  }
  return 0;

--- a/mindspore/lite/src/runtime/opencl/opencl_executor.cc
+++ b/mindspore/lite/src/runtime/opencl/opencl_executor.cc
@@ -44,7 +44,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
    auto &outputs = kernel->GetOutputs();
    for (auto *output : outputs) {
      MS_ASSERT(nullptr != output);
-      output->MallocData(allocator_);
+      output->MallocData();
    }
    kernel::CallBackParam callbackParam;
    callbackParam.name_callback_aram = kernel->Name();
@@ -67,7 +67,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
    }
    for (auto input_kernel : kernel->GetInKernels()) {
      MS_EXCEPTION_IF_NULL(input_kernel);
-      ret = input_kernel->DecOutTensorRefCount(allocator_);
+      ret = input_kernel->DecOutTensorRefCount();
      if (0 != ret) {
        MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->Name() << " failed";
      }

--- a/mindspore/lite/test/st/benchmark_test.cc
+++ b/mindspore/lite/test/st/benchmark_test.cc
@@ -41,6 +41,14 @@ TEST_F(BenchmarkTest, TestOCR_02) {
  ASSERT_EQ(status, RET_OK);
 }

+TEST_F(BenchmarkTest, TestOCR_02_GPU) {
+const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms"
+                                     "--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin"
+                                     "--calibDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.txt"};
+auto status = RunBenchmark(2, argv);
+ASSERT_EQ(status, RET_OK);
+}
+
 TEST_F(BenchmarkTest, TestHebing) {
  const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms"
                                       "--inDataPath=./hiai/model_hebing_3branch.bin"

--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc
@@ -52,7 +52,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
  auto *arith_kernel = new kernel::MatMulOpenCLKernel(nullptr, inputs, outputs, false);
  arith_kernel->Init();

-  std::vector<LiteKernel *> kernels{arith_kernel};
+  std::vector<kernel::LiteKernel *> kernels{arith_kernel};
  auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
  pGraph->Init();


--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_cl_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_cl_tests.cc
@@ -51,7 +51,7 @@ TEST_F(TestSoftmaxOpenCL, SoftmaxFp32) {
  MS_LOG(INFO) << "create OpenCL Kernel";
  auto *Softmax_kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
  Softmax_kernel->Init();
-  std::vector<LiteKernel *> kernels{Softmax_kernel};
+  std::vector<kernel::LiteKernel *> kernels{Softmax_kernel};

  MS_LOG(INFO) << "create SubGraphOpenCLKernel";
  auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);

--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -351,6 +351,8 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
  auto context = new lite::Context;
  if (_flags->device == "CPU") {
    context->device_ctx_.type = lite::DT_CPU;
+  } else if (_flags->device == "GPU") {
+      context->device_ctx_.type = lite::DT_GPU;
  } else {
    context->device_ctx_.type = lite::DT_NPU;
  }

--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -57,7 +57,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
    AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", "");
    AddFlag(&BenchmarkFlags::inDataTypeIn, "inDataType", "Input data type. img | bin", "bin");
    AddFlag(&BenchmarkFlags::omModelPath, "omModelPath", "OM model path, only required when device is NPU", "");
-    AddFlag(&BenchmarkFlags::device, "device", "CPU | NPU", "CPU");
+    AddFlag(&BenchmarkFlags::device, "device", "CPU | NPU | GPU", "CPU");
    AddFlag(&BenchmarkFlags::cpuBindMode, "cpuBindMode",
            "Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1);
    // MarkPerformance