From 1a8af5ef68d758c8db854647fdc4b123d3d2a247 Mon Sep 17 00:00:00 2001 From: yangfei Date: Fri, 12 Oct 2018 11:00:45 +0800 Subject: [PATCH] imp some function --- src/framework/cl/cl_image.cpp | 123 ++++++++++++++++++ src/framework/cl/cl_image.h | 13 +- src/framework/cl/cl_scope.h | 4 +- src/framework/executor.cpp | 7 +- src/framework/operator.cpp | 30 +++++ src/operators/feed_op.cpp | 16 ++- src/operators/feed_op.h | 35 +++-- .../kernel/cl/cl_kernel/feed_kernel.cl | 15 +++ src/operators/kernel/cl/conv_kernel.cpp | 95 +++++++------- src/operators/kernel/cl/feed_kernel.cpp | 53 ++++++++ src/operators/kernel/feed_kernel.h | 32 +++++ src/operators/op_param.h | 10 +- 12 files changed, 355 insertions(+), 78 deletions(-) create mode 100644 src/framework/cl/cl_image.cpp create mode 100644 src/operators/kernel/cl/cl_kernel/feed_kernel.cl create mode 100644 src/operators/kernel/cl/feed_kernel.cpp create mode 100644 src/operators/kernel/feed_kernel.h diff --git a/src/framework/cl/cl_image.cpp b/src/framework/cl/cl_image.cpp new file mode 100644 index 0000000000..94d5bb8602 --- /dev/null +++ b/src/framework/cl/cl_image.cpp @@ -0,0 +1,123 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "cl_image.h" +namespace paddle_mobile { + namespace framework { + void CLImageToTensor(CLImage *cl_image, Tensor *tensor,cl_command_queue commandQueue){ + + DDim ddim = cl_image->dims(); + size_t N,C,H,W; + if(ddim.size()==4){ + N = ddim[0]; + if(N<0){ + N = 1; + } + C = ddim[1]; + H = ddim[2]; + W = ddim[3]; + }else if(ddim.size()==1){ + N = 1; + C = ddim[0]; + H = 1; + W = 1; + } + + size_t width = W * ((C + 3) / 4); + size_t height = H * N; + + float *p = tensor->data(); + half imageData[width * height * 4]; + cl_int err; + cl_mem image = cl_image->GetCLImage(); + size_t origin[3] = {0,0,0}; + size_t region[3] = {width,height,1}; + err = clEnqueueReadImage(commandQueue,image,CL_TRUE,origin,region,0,0,imageData,0,NULL,NULL); + size_t i0 = 0; + for (int n = 0; n < N; n++) { + for (int c = 0; c < C; c++) { + size_t i1 = i0; + for (int h = 0; h < H; h++) { + size_t i2 = (i1<<2) + c % 4; + for (int w = 0; w < W; w++) { + *p = half2float(imageData[i2]); + i2 += 4; + p++; + } + i1 += width; + } + } + i0 += width * H; + } + + + if (err != CL_SUCCESS) { + // TODO: error handling + } + + } + void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,cl_command_queue commandQueue){ + + DDim ddim = cl_image->dims(); + size_t N,C,H,W; + if(ddim.size()==4){ + N = ddim[0]; + if(N<0){ + N = 1; + } + C = ddim[1]; + H = ddim[2]; + W = ddim[3]; + }else if(ddim.size()==1){ + N = 1; + C = ddim[0]; + H = 1; + W = 1; + } + + size_t width = W * ((C + 3) / 4); + size_t height = H * N; + + const float *p = tensor->data(); + half imageData[width * height * 4]; + cl_mem image = cl_image->GetCLImage(); + size_t origin[3] = {0,0,0}; + size_t region[3] = {width,height,1}; + cl_int err; + err = clEnqueueReadImage(commandQueue,image,CL_TRUE,origin,region,0,0,imageData,0,NULL,NULL); + if (err != CL_SUCCESS) { + // TODO: error handling + } + size_t i0 = 0; + for (int n = 0; n < N; n++) { + for (int c = 0; c < C; c++) { + size_t i1 = i0; + for (int h = 0; h < H; h++) { + size_t i2 = (i1<<2) + c % 4; + for (int w = 0; w < W; w++) { + imageData[i2] = float2half(*p); + i2 += 4; + p++; + } + i1 += width; + } + } + i0 += width * H; + } + + + } + } +} + diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index 58c8ea6bc4..ad86713f90 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -57,8 +57,6 @@ class CLImage { height_of_one_block_ = H; } - DLOG << "-------InitMemory-------"; - size_t width = W * ((C + 3) / 4); size_t height = H * N; @@ -95,7 +93,6 @@ class CLImage { i0 += width * H; } } - DLOG << "-------InitMemory-------"; cl_int err; cl_image_ = clCreateImage2D( context, // cl_context context @@ -156,13 +153,9 @@ class CLImage { cl_context context_; }; -// void TensorToCLImage(Tensor *tensor, CLImage *image) { -// -//} -// -// void CLImageToTensor(CLImage *image, Tensor *tensor) { -// -//} +void TensorToCLImage(Tensor *tensor, CLImage *image); + +void CLImageToTensor(CLImage *image, Tensor *tensor); } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/cl/cl_scope.h b/src/framework/cl/cl_scope.h index 61082808b5..5fb400ca9a 100644 --- a/src/framework/cl/cl_scope.h +++ b/src/framework/cl/cl_scope.h @@ -53,8 +53,8 @@ class CLScope { return it->second.get(); } - auto program = - CLEngine::Instance()->CreateProgramWith(context_.get(), "./cl_kernel/" + file_name); + auto program = CLEngine::Instance()->CreateProgramWith( + context_.get(), "./cl_kernel/" + file_name); programs_[file_name] = std::move(program); status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0); diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 35532103be..c8ef6763a2 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -938,9 +938,9 @@ void Executor::InitMemory() { cl_context context = program_.scope->GetCLScpoe()->Context(); const framework::TensorDesc &desc = var_desc->Tensor_desc(); - framework::DDim ddim = framework::make_ddim(desc.Dims()); + // framework::DDim ddim = framework::make_ddim(desc.Dims()); + framework::DDim ddim = cl_image->dims(); DLOG << var_desc->Name(); - cl_image->Init(context, ddim); } } @@ -989,7 +989,8 @@ void Executor::InitCombineMemory() { cl_context context = program_.scope->GetCLScpoe()->Context(); const framework::TensorDesc &desc = var_desc->Tensor_desc(); - framework::DDim ddim = framework::make_ddim(desc.Dims()); + framework::DDim ddim = cl_image->dims(); + // framework::DDim ddim = framework::make_ddim(desc.Dims()); cl_image->Init(context, ddim); } diff --git a/src/framework/operator.cpp b/src/framework/operator.cpp index c8d83c9599..fa04ac2e46 100644 --- a/src/framework/operator.cpp +++ b/src/framework/operator.cpp @@ -66,8 +66,26 @@ void OperatorBase::Run() { for (int i = 0; i < var_vec_in.size(); ++i) { auto vari = scope_->FindVar(var_vec_in[i]); if (vari->IsInitialized()) { +#ifdef PADDLE_MOBILE_CL + if (type_ == "feed") { + Tensor *tensor = vari->template GetMutable(); + if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; + } else { + CLImage *cl_image = vari->template GetMutable(); + // cl_command_queue commandQueue = + // scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ; + // CLImageToTensor(cl_image,tmp,commandQueue); + // tmp->Resize(cl_image->dims()); + if (cl_image) { + // DLOG< +void FeedOp::InferShape() const { + auto out_dims = this->param_.Out()->dims(); + out_dims[0] = this->param_.BatchSize(); + this->param_.Out()->Resize(out_dims); +} + +template +void FeedOp::RunImpl() { + this->kernel_.Compute(this->param_); +} +#endif +} } // namespace paddle_mobile namespace ops = paddle_mobile::operators; diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index fe444b206b..0890a45c9e 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -16,11 +16,32 @@ limitations under the License. */ #include #include "framework/operator.h" +#include "operators/kernel/feed_kernel.h" #include "operators/op_param.h" namespace paddle_mobile { namespace operators { using std::string; +#ifdef PADDLE_MOBILE_CL +template +class FeedOp + : public framework::OperatorWithKernel, + FeedKernel> { + public: + FeedOp(const string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const framework::AttributeMap attrs, + std::shared_ptr scope) + : framework::OperatorWithKernel, + FeedKernel>( + type, inputs, outputs, attrs, scope) {} + + void InferShape() const override; + + void RunImpl() override; + + protected: +}; +#else template class FeedOp : public framework::OperatorBase { public: @@ -74,22 +95,20 @@ class FeedOp : public framework::OperatorBase { } #else -#ifdef PADDLE_MOBILE_CL - void Init() {} - void RunImpl() {} -#else + void Init() {} void RunImpl() { - param_.Out()->ShareDataWith(*param_.InputX()); - param_.Out()->set_lod(param_.InputX()->lod()); + param_.Out()->ShareDataWith(*param_.InputX()); + param_.Out()->set_lod(param_.InputX()->lod()); } -#endif -#endif protected: FeedParam param_; }; +#endif +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/cl/cl_kernel/feed_kernel.cl b/src/operators/kernel/cl/cl_kernel/feed_kernel.cl new file mode 100644 index 0000000000..d52f2fadaf --- /dev/null +++ b/src/operators/kernel/cl/cl_kernel/feed_kernel.cl @@ -0,0 +1,15 @@ +__kernel void feed(__global float* in, __write_only image2d_t outputImage,int h,int w) + { + int j = get_global_id(0); + int i = get_global_id(1); + float4 pixel; + pixel.x = in[(i * w + j)]; + pixel.y = in[h * w + (i * w + j)]; + pixel.z = in[2 * h * w + (i * w + j)]; + pixel.w = 0; + int2 coords; + coords.x = j; + coords.y = i; + + write_imagef(outputImage,coords,pixel); + } diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp index ec265b7992..ee7b56629a 100644 --- a/src/operators/kernel/cl/conv_kernel.cpp +++ b/src/operators/kernel/cl/conv_kernel.cpp @@ -21,61 +21,58 @@ namespace operators { template <> bool ConvKernel::Init(ConvParam *param) { - PADDLE_MOBILE_ENFORCE( - param->Filter()->dims()[2] == param->Filter()->dims()[3] && - param->Paddings()[0] == param->Paddings()[1], - "need equal"); - int offset = static_cast(param->Filter()->dims()[2]) / 2 - - static_cast(param->Paddings()[1]); - param->SetOffset(offset); - - if (param->Filter()->WidthOfOneBlock() == 1 && - param->Filter()->HeightOfOneBlock() == 1) { - this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); - } else if (param->Filter()->dims()[1] == 1) { - this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); - } else if (param->Filter()->WidthOfOneBlock() == 3 && - param->Filter()->HeightOfOneBlock() == 3) { - this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); - } else { - PADDLE_MOBILE_THROW_EXCEPTION(" not support "); - } + // PADDLE_MOBILE_ENFORCE( + // param->Filter()->dims()[2] == param->Filter()->dims()[3] && + // param->Paddings()[0] == param->Paddings()[1], + // "need equal"); + // int offset = static_cast(param->Filter()->dims()[2]) / 2 - + // static_cast(param->Paddings()[1]); + // param->SetOffset(offset); + // + // if (param->Filter()->WidthOfOneBlock() == 1 && + // param->Filter()->HeightOfOneBlock() == 1) { + // this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); + // } else if (param->Filter()->dims()[1] == 1) { + // this->cl_helper_.AddKernel("depth_conv_3x3", + // "conv_add_bn_relu_kernel.cl"); + // } else if (param->Filter()->WidthOfOneBlock() == 3 && + // param->Filter()->HeightOfOneBlock() == 3) { + // this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); + // } else { + // PADDLE_MOBILE_THROW_EXCEPTION(" not support "); + // } return true; } template <> void ConvKernel::Compute(const ConvParam ¶m) { - auto kernel = this->cl_helper_.KernelAt(0); - auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); - int c_block = default_work_size[0]; - int w = default_work_size[1]; - int nh = default_work_size[2]; - auto input = param.Input()->GetCLImage(); - auto filter = param.Filter()->GetCLImage(); - auto output = param.Output(); - int stride = param.Strides()[0]; - int offset = param.Offset(); - int input_c = param.Input()->CBlock(); - int dilation = param.Dilations()[0]; - int input_width = param.Input()->WidthOfOneBlock(); - int input_height = param.Input()->HeightOfOneBlock(); - - clSetKernelArg(kernel, 0, sizeof(int), &c_block); - clSetKernelArg(kernel, 1, sizeof(int), &w); - clSetKernelArg(kernel, 2, sizeof(int), &nh); - clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); - clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); - clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); - clSetKernelArg(kernel, 6, sizeof(int), &stride); - clSetKernelArg(kernel, 7, sizeof(int), &offset); - clSetKernelArg(kernel, 8, sizeof(int), &input_c); - clSetKernelArg(kernel, 9, sizeof(int), &dilation); - clSetKernelArg(kernel, 10, sizeof(int), &input_width); - clSetKernelArg(kernel, 11, sizeof(int), &input_height); - - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, - default_work_size.data(), NULL, 0, NULL, NULL); + // auto kernel = this->cl_helper_.KernelAt(0); + // auto default_work_size = + // this->cl_helper_.DefaultWorkSize(*param.Output()); int c_block = + // default_work_size[0]; int w = default_work_size[1]; int nh = + // default_work_size[2]; auto input = param.Input()->GetCLImage(); auto + // filter = param.Filter()->GetCLImage(); auto output = param.Output(); int + // stride = param.Strides()[0]; int offset = param.Offset(); int input_c = + // param.Input()->CBlock(); int dilation = param.Dilations()[0]; int + // input_width = param.Input()->WidthOfOneBlock(); int input_height = + // param.Input()->HeightOfOneBlock(); + // + // clSetKernelArg(kernel, 0, sizeof(int), &c_block); + // clSetKernelArg(kernel, 1, sizeof(int), &w); + // clSetKernelArg(kernel, 2, sizeof(int), &nh); + // clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); + // clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); + // clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); + // clSetKernelArg(kernel, 6, sizeof(int), &stride); + // clSetKernelArg(kernel, 7, sizeof(int), &offset); + // clSetKernelArg(kernel, 8, sizeof(int), &input_c); + // clSetKernelArg(kernel, 9, sizeof(int), &dilation); + // clSetKernelArg(kernel, 10, sizeof(int), &input_width); + // clSetKernelArg(kernel, 11, sizeof(int), &input_height); + // + // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + // default_work_size.data(), NULL, 0, NULL, NULL); // auto kernel = this->cl_helper_.KernelAt(0); // size_t global_work_size[3] = {1, 2, 3}; diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp new file mode 100644 index 0000000000..8703518322 --- /dev/null +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + + +#include "operators/kernel/feed_kernel.h" + +namespace paddle_mobile { + namespace operators { + + template <> + bool FeedKernel::Init(FeedParam *param) { + DLOG<<"Init feed"; + this->cl_helper_.AddKernel("feed", "feed_kernel.cl"); + return true; + } + + template <> + void FeedKernel::Compute(const FeedParam ¶m) { + + DLOG<<"feed_kernel"; + auto kernel = this->cl_helper_.KernelAt(0); + cl_int status; + auto output = param.Out(); + auto input = param.InputX(); + const float *input_data = input->data(); + cl_mem cl_image = output->GetCLImage(); + int height = output->dims()[2]; + int width = output->dims()[3]; + status = clSetKernelArg(kernel,0, sizeof(cl_mem),&input_data); + status = clSetKernelArg(kernel,0, sizeof(cl_mem),&cl_image); + status = clSetKernelArg(kernel,0, sizeof(cl_mem),&width); + status = clSetKernelArg(kernel,0, sizeof(cl_mem),&height); + + size_t global_work_size[2] = {height,width}; + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL); + } + + template class FeedKernel; + + } // namespace operators +} // namespace paddle_mobile + diff --git a/src/operators/kernel/feed_kernel.h b/src/operators/kernel/feed_kernel.h new file mode 100644 index 0000000000..ed287221ab --- /dev/null +++ b/src/operators/kernel/feed_kernel.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "framework/operator.h" +#include "operators/op_param.h" + +namespace paddle_mobile { + namespace operators { + using namespace framework; + template + class FeedKernel + : public framework::OpKernelBase>{ + public: + void Compute(const FeedParam ¶m); + bool Init(FeedParam *param); + }; + + } // namespace operators +} // namespace paddle_mobile diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 0fafc19152..1a1f910d11 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -911,13 +911,13 @@ class FeedParam : public OpParam { public: FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, Scope *scope) { - input_x_ = InputXFrom(inputs, *scope); - out_ = OutFrom(outputs, *scope); - auto var = scope->Var("batch_size"); + const AttributeMap &attrs, const Scope &scope) { + input_x_ = InputXFrom(inputs, scope); + out_ = OutFrom(outputs, scope); + auto var = scope.FindVar("batch_size"); batch_size = var->GetValue(); } - const GType *InputX() const { return input_x_; } + const LoDTensor *InputX() const { return input_x_; } GType *Out() const { return out_; } const int BatchSize() const { return batch_size; } -- GitLab