提交 990c645c 编写于 作者: C chenzomi

code clean for opencl

上级 f20e68a8
......@@ -112,7 +112,6 @@ int PoolingOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->name() << " Running!";
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
// attribute
int slices = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
cl_int4 input_shape = {in_tensors_[0]->Height(), in_tensors_[0]->Width(), in_tensors_[0]->Channel(), slices};
cl_int4 output_shape = {out_tensors_[0]->Height(), out_tensors_[0]->Width(), out_tensors_[0]->Channel(), slices};
......@@ -120,7 +119,6 @@ int PoolingOpenCLKernel::Run() {
cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
// binding parameters
int arg_idx = 0;
ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->Data());
ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->Data());
......@@ -130,14 +128,12 @@ int PoolingOpenCLKernel::Run() {
ocl_runtime->SetKernelArg(kernel_, arg_idx++, kernel_size);
ocl_runtime->SetKernelArg(kernel_, arg_idx++, padding);
// set work group size
std::vector<size_t> local_size;
std::vector<size_t> global_size = InitGlobalSize();
int max_work_group_size = ocl_runtime->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime->Device())());
local_size = GetCommonLocalSize(global_size, max_work_group_size);
global_size = GetCommonGlobalSize(local_size, global_size);
// run opengl kernel
ocl_runtime->RunKernel(kernel_, global_size, local_size, nullptr);
return RET_OK;
}
......
......@@ -162,10 +162,13 @@ kernel::LiteKernel *OpenCLSoftMaxKernelCreator(const std::vector<lite::tensor::T
auto *kernel = new (std::nothrow) SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
delete kernel;
return nullptr;
}
if (inputs[0]->shape()[0] > 1) {
MS_LOG(ERROR) << "Init `Softmax` kernel failed: Unsupported multi-batch.";
delete kernel;
return nullptr;
}
auto ret = kernel->Init();
if (0 != ret) {
......
......@@ -58,7 +58,7 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
ocl_runtime->Init();
MS_LOG(INFO) << "create PoolingParameter";
auto param = new PoolingParameter();
auto param = new (std::nothrow) PoolingParameter();
InitAvgPoolingParam(param);
MS_LOG(INFO) << "create Tensors";
......@@ -76,18 +76,37 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) {
};
auto data_type = kNumberTypeFloat32;
auto tensorType = schema::NodeType_ValueNode;
lite::tensor::Tensor *tensor_in = new lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType);
lite::tensor::Tensor *tensor_out = new lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType);
lite::tensor::Tensor *tensor_in =
new (std::nothrow) lite::tensor::Tensor(data_type, shape_in, schema::Format_NHWC, tensorType);
lite::tensor::Tensor *tensor_out =
new (std::nothrow) lite::tensor::Tensor(data_type, shape_out, schema::Format_NHWC, tensorType);
if (tensor_in == nullptr) {
MS_LOG(ERROR) << "tensor_in null";
return;
}
if (tensor_out == nullptr) {
MS_LOG(ERROR) << "tensor_out null";
return;
}
std::vector<lite::tensor::Tensor *> inputs{tensor_in};
std::vector<lite::tensor::Tensor *> outputs{tensor_out};
MS_LOG(INFO) << "create OpenCL Kernel";
auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
auto *pooling_kernel =
new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (pooling_kernel == nullptr) {
MS_LOG(ERROR) << "pooling_kernel null";
return;
}
pooling_kernel->Init();
std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
MS_LOG(INFO) << "create SubGraphOpenCLKernel";
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph null";
return;
}
pGraph->Init();
MS_LOG(INFO) << "initialize data";
......
......@@ -46,7 +46,7 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
auto allocator = ocl_runtime->GetAllocator();
MS_LOG(INFO) << "PoolingParameter";
auto param = new PoolingParameter;
auto param = new (std::nothrow) PoolingParameter;
InitParameter(param);
// define tensor
......@@ -56,21 +56,39 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) {
auto data_type = kNumberTypeFloat32;
auto tensorType = schema::NodeType_ValueNode;
MS_LOG(INFO) << "define tensor2";
auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType);
auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType);
auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, schema::Format_NHWC4, tensorType);
auto output_tensor =
new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensorType);
if (input_tensor == nullptr) {
MS_LOG(ERROR) << "input_tensor null";
return;
}
if (output_tensor == nullptr) {
MS_LOG(ERROR) << "output_tensor null";
return;
}
MS_LOG(INFO) << "define input";
std::vector<lite::tensor::Tensor *> inputs{input_tensor};
std::vector<lite::tensor::Tensor *> outputs{output_tensor};
// run
MS_LOG(INFO) << "pooling_kernel";
auto *pooling_kernel = new kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
auto *pooling_kernel =
new (std::nothrow) kernel::PoolingOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (pooling_kernel == nullptr) {
MS_LOG(ERROR) << "pooling_kernel null";
return;
}
MS_LOG(INFO) << "pooling_kernel init";
pooling_kernel->Init();
std::vector<kernel::LiteKernel *> kernels{pooling_kernel};
inputs[0]->MallocData(allocator);
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph null";
return;
}
MS_LOG(INFO) << "pGraph init";
pGraph->Init();
......
......@@ -28,41 +28,49 @@ class TestSoftmaxOpenCL : public mindspore::CommonTest {};
void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, std::string input_file,
std::string expect_file, SoftmaxParameter *param, schema::Format format) {
std::cout << "runtime" << std::endl;
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
ocl_runtime->Init();
auto allocator = ocl_runtime->GetAllocator();
// define tensor
MS_LOG(INFO) << "defineTensor";
std::cout << "defineTensor" << std::endl;
auto data_type = kNumberTypeFloat32;
auto tensorType = schema::NodeType_ValueNode;
auto input_tensor = new lite::tensor::Tensor(data_type, input_shape, format, tensorType);
auto output_tensor = new lite::tensor::Tensor(data_type, output_shape, format, tensorType);
auto input_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, input_shape, format, tensorType);
auto output_tensor = new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, format, tensorType);
if (input_tensor == nullptr) {
MS_LOG(ERROR) << "input tensor null";
return;
}
if (output_tensor == nullptr) {
MS_LOG(ERROR) << "output tensor null";
return;
}
std::vector<lite::tensor::Tensor *> inputs{input_tensor};
std::vector<lite::tensor::Tensor *> outputs{output_tensor};
// run
MS_LOG(INFO) << "NewOpenCLKernel";
std::cout << "NewOpenCLKernel" << std::endl;
auto *kernel = new kernel::SoftmaxOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel null";
return;
}
MS_LOG(INFO) << "KernelInit";
std::cout << "KernelInit" << std::endl;
kernel->Init();
std::cout << "LiteKernel" << std::endl;
std::vector<kernel::LiteKernel *> kernels{kernel};
inputs[0]->MallocData(allocator);
std::cout << "SubGraphOpenCLKernel" << std::endl;
auto *pGraph = new kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
auto *pGraph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
if (pGraph == nullptr) {
MS_LOG(ERROR) << "pGraph null";
return;
}
MS_LOG(INFO) << "pGraphinit";
pGraph->Init();
// load data
MS_LOG(INFO) << "load data1";
LoadTestData(input_tensor->Data(), input_tensor->Size(), input_file);
auto *input_data = reinterpret_cast<float *>(input_tensor->Data());
printf("\ninput[0:10]:");
......@@ -75,7 +83,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st
pGraph->Run();
MS_LOG(INFO) << "compare result";
std::cout << "compare result" << std::endl;
CompareOutput(output_tensor, expect_file);
}
......@@ -84,23 +91,11 @@ TEST_F(TestSoftmaxOpenCL, Softmax_1) {
std::vector<int> output_shape = {1, 2, 2, 8};
std::string input_file = "softmax_in.bin";
std::string expect_file = "softmax_out.bin";
auto param = new SoftmaxParameter;
auto param = new (std::nothrow) SoftmaxParameter;
param->axis_ = 3;
schema::Format format = schema::Format_NHWC4;
RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
}
// TEST_F(TestSoftmaxOpenCL, Softmax_1x1) {
// std::vector<int> input_shape = {1, 100};
// std::vector<int> output_shape = {1, 100};
// std::string input_file = "softmax1x1_in.bin";
// std::string expect_file = "softmax1x1_out.bin";
// auto param = new SoftmaxParameter;
// param->axis_ = 1;
// schema::Format format = schema::Format_NHWC4;
//
// RunTestCase(input_shape, output_shape, input_file, expect_file, param, format);
//}
} // namespace mindspore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部