提交 935e365c 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!3906 fix allocator for gpu

Merge pull request !3906 from wandongdong/master
......@@ -527,7 +527,7 @@ build_lite()
{
echo "start build mindspore lite project"
if [[ "${ENABLE_GPU}" == "on" ]]; then
if [ "${ENABLE_GPU}" == "on" ] || [ "${LITE_PLATFORM}" == "arm64" ]; then
echo "start build opencl"
build_opencl
fi
......@@ -554,7 +554,7 @@ build_lite()
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="arm64-v8a" -DANDROID_TOOLCHAIN_NAME="aarch64-linux-android-clang" \
-DANDROID_STL="c++_shared" -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN} \
-DBUILD_DEVICE=on -DPLATFORM_ARM64=on -DBUILD_CONVERTER=off -DENABLE_NEON=on -DENABLE_FP16="off" \
-DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} "${BASEPATH}/mindspore/lite"
-DSUPPORT_GPU=on -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} "${BASEPATH}/mindspore/lite"
elif [[ "${LITE_PLATFORM}" == "arm32" ]]; then
checkndk
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
......
......@@ -50,12 +50,12 @@ int SubGraphOpenCLKernel::UnInit() {
}
for (const auto tensor : inputs_) {
if (tensor != nullptr) {
tensor->FreeData(allocator_);
tensor->FreeData();
}
}
for (const auto tensor : outputs_) {
if (tensor != nullptr) {
tensor->FreeData(allocator_);
tensor->FreeData();
}
}
return 0;
......
......@@ -44,7 +44,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
auto &outputs = kernel->GetOutputs();
for (auto *output : outputs) {
MS_ASSERT(nullptr != output);
output->MallocData(allocator_);
output->MallocData();
}
kernel::CallBackParam callbackParam;
callbackParam.name_callback_aram = kernel->Name();
......@@ -67,7 +67,7 @@ int OpenCLExecutor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tenso
}
for (auto input_kernel : kernel->GetInKernels()) {
MS_EXCEPTION_IF_NULL(input_kernel);
ret = input_kernel->DecOutTensorRefCount(allocator_);
ret = input_kernel->DecOutTensorRefCount();
if (0 != ret) {
MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->Name() << " failed";
}
......
......@@ -41,6 +41,14 @@ TEST_F(BenchmarkTest, TestOCR_02) {
ASSERT_EQ(status, RET_OK);
}
TEST_F(BenchmarkTest, TestOCR_02_GPU) {
const char *argv[] = {"./benchmark", "--modelPath=./hiai/hiai_cv_focusShootOCRMOdel_02.ms"
"--inDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.bin"
"--calibDataPath=./hiai/hiai_cv_focusShootOCRMOdel_02.txt"};
auto status = RunBenchmark(2, argv);
ASSERT_EQ(status, RET_OK);
}
TEST_F(BenchmarkTest, TestHebing) {
const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms"
"--inDataPath=./hiai/model_hebing_3branch.bin"
......
......@@ -52,7 +52,7 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) {
auto *arith_kernel = new kernel::MatMulOpenCLKernel(nullptr, inputs, outputs, false);
arith_kernel->Init();
std::vector<LiteKernel *> kernels{arith_kernel};
std::vector<kernel::LiteKernel *> kernels{arith_kernel};
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
pGraph->Init();
......
......@@ -51,7 +51,7 @@ TEST_F(TestSoftmaxOpenCL, SoftmaxFp32) {
MS_LOG(INFO) << "create OpenCL Kernel";
auto *Softmax_kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
Softmax_kernel->Init();
std::vector<LiteKernel *> kernels{Softmax_kernel};
std::vector<kernel::LiteKernel *> kernels{Softmax_kernel};
MS_LOG(INFO) << "create SubGraphOpenCLKernel";
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
......
......@@ -351,6 +351,8 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
auto context = new lite::Context;
if (_flags->device == "CPU") {
context->device_ctx_.type = lite::DT_CPU;
} else if (_flags->device == "GPU") {
context->device_ctx_.type = lite::DT_GPU;
} else {
context->device_ctx_.type = lite::DT_NPU;
}
......
......@@ -57,7 +57,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
AddFlag(&BenchmarkFlags::inDataPath, "inDataPath", "Input data path, if not set, use random input", "");
AddFlag(&BenchmarkFlags::inDataTypeIn, "inDataType", "Input data type. img | bin", "bin");
AddFlag(&BenchmarkFlags::omModelPath, "omModelPath", "OM model path, only required when device is NPU", "");
AddFlag(&BenchmarkFlags::device, "device", "CPU | NPU", "CPU");
AddFlag(&BenchmarkFlags::device, "device", "CPU | NPU | GPU", "CPU");
AddFlag(&BenchmarkFlags::cpuBindMode, "cpuBindMode",
"Input -1 for MID_CPU, 1 for HIGHER_CPU, 0 for NO_BIND, defalut value: 1", 1);
// MarkPerformance
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册