提交 819523a8 编写于 作者: Y yangfei

imp some function

上级 fe7f8763
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "cl_image.h"
namespace paddle_mobile {
namespace framework {
void CLImageToTensor(CLImage *cl_image, Tensor *tensor,cl_command_queue commandQueue){
DDim ddim = cl_image->dims();
size_t N,C,H,W;
if(ddim.size()==4){
N = ddim[0];
if(N<0){
N = 1;
}
C = ddim[1];
H = ddim[2];
W = ddim[3];
}else if(ddim.size()==1){
N = 1;
C = ddim[0];
H = 1;
W = 1;
}
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
float *p = tensor->data<float>();
half imageData[width * height * 4];
cl_int err;
cl_mem image = cl_image->GetCLImage();
size_t origin[3] = {0,0,0};
size_t region[3] = {width,height,1};
err = clEnqueueReadImage(commandQueue,image,CL_TRUE,origin,region,0,0,imageData,0,NULL,NULL);
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0;
for (int h = 0; h < H; h++) {
size_t i2 = (i1<<2) + c % 4;
for (int w = 0; w < W; w++) {
*p = half2float(imageData[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
if (err != CL_SUCCESS) {
// TODO: error handling
}
}
void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,cl_command_queue commandQueue){
DDim ddim = cl_image->dims();
size_t N,C,H,W;
if(ddim.size()==4){
N = ddim[0];
if(N<0){
N = 1;
}
C = ddim[1];
H = ddim[2];
W = ddim[3];
}else if(ddim.size()==1){
N = 1;
C = ddim[0];
H = 1;
W = 1;
}
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
const float *p = tensor->data<float>();
half imageData[width * height * 4];
cl_mem image = cl_image->GetCLImage();
size_t origin[3] = {0,0,0};
size_t region[3] = {width,height,1};
cl_int err;
err = clEnqueueReadImage(commandQueue,image,CL_TRUE,origin,region,0,0,imageData,0,NULL,NULL);
if (err != CL_SUCCESS) {
// TODO: error handling
}
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0;
for (int h = 0; h < H; h++) {
size_t i2 = (i1<<2) + c % 4;
for (int w = 0; w < W; w++) {
imageData[i2] = float2half(*p);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
}
}
...@@ -57,8 +57,6 @@ class CLImage { ...@@ -57,8 +57,6 @@ class CLImage {
height_of_one_block_ = H; height_of_one_block_ = H;
} }
DLOG << "-------InitMemory-------";
size_t width = W * ((C + 3) / 4); size_t width = W * ((C + 3) / 4);
size_t height = H * N; size_t height = H * N;
...@@ -95,7 +93,6 @@ class CLImage { ...@@ -95,7 +93,6 @@ class CLImage {
i0 += width * H; i0 += width * H;
} }
} }
DLOG << "-------InitMemory-------";
cl_int err; cl_int err;
cl_image_ = clCreateImage2D( cl_image_ = clCreateImage2D(
context, // cl_context context context, // cl_context context
...@@ -156,13 +153,9 @@ class CLImage { ...@@ -156,13 +153,9 @@ class CLImage {
cl_context context_; cl_context context_;
}; };
// void TensorToCLImage(Tensor *tensor, CLImage *image) { void TensorToCLImage(Tensor *tensor, CLImage *image);
//
//} void CLImageToTensor(CLImage *image, Tensor *tensor);
//
// void CLImageToTensor(CLImage *image, Tensor *tensor) {
//
//}
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -53,8 +53,8 @@ class CLScope { ...@@ -53,8 +53,8 @@ class CLScope {
return it->second.get(); return it->second.get();
} }
auto program = auto program = CLEngine::Instance()->CreateProgramWith(
CLEngine::Instance()->CreateProgramWith(context_.get(), "./cl_kernel/" + file_name); context_.get(), "./cl_kernel/" + file_name);
programs_[file_name] = std::move(program); programs_[file_name] = std::move(program);
status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0); status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0);
......
...@@ -938,9 +938,9 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() { ...@@ -938,9 +938,9 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = framework::make_ddim(desc.Dims()); // framework::DDim ddim = framework::make_ddim(desc.Dims());
framework::DDim ddim = cl_image->dims();
DLOG << var_desc->Name(); DLOG << var_desc->Name();
cl_image->Init(context, ddim); cl_image->Init(context, ddim);
} }
} }
...@@ -989,7 +989,8 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() { ...@@ -989,7 +989,8 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
cl_context context = program_.scope->GetCLScpoe()->Context(); cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc(); const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = framework::make_ddim(desc.Dims()); framework::DDim ddim = cl_image->dims();
// framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image->Init(context, ddim); cl_image->Init(context, ddim);
} }
......
...@@ -66,8 +66,26 @@ void OperatorBase<Dtype>::Run() { ...@@ -66,8 +66,26 @@ void OperatorBase<Dtype>::Run() {
for (int i = 0; i < var_vec_in.size(); ++i) { for (int i = 0; i < var_vec_in.size(); ++i) {
auto vari = scope_->FindVar(var_vec_in[i]); auto vari = scope_->FindVar(var_vec_in[i]);
if (vari->IsInitialized()) { if (vari->IsInitialized()) {
#ifdef PADDLE_MOBILE_CL
if (type_ == "feed") {
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
} else {
CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue =
// scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ;
// CLImageToTensor(cl_image,tmp,commandQueue);
// tmp->Resize(cl_image->dims());
if (cl_image) {
// DLOG<<type_<<" input- "<<key<<"="<<*tmp;
DLOG << type_ << " input- " << key << "=" << cl_image->dims();
}
}
#else
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>(); Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
#endif
} }
} }
} }
...@@ -76,8 +94,20 @@ void OperatorBase<Dtype>::Run() { ...@@ -76,8 +94,20 @@ void OperatorBase<Dtype>::Run() {
for (int i = 0; i < var_vec_out.size(); ++i) { for (int i = 0; i < var_vec_out.size(); ++i) {
auto vari = scope_->FindVar(var_vec_out[i]); auto vari = scope_->FindVar(var_vec_out[i]);
if (vari->IsInitialized()) { if (vari->IsInitialized()) {
#ifdef PADDLE_MOBILE_CL
CLImage *cl_image = vari->template GetMutable<framework::CLImage>();
// cl_command_queue commandQueue =
// scope_->GetCLScpoe()->CommandQueue(); Tensor *tmp ;
// CLImageToTensor(cl_image,tmp,commandQueue);
// tmp->Resize(cl_image->dims());
if (cl_image) {
// DLOG<<type_<<" output- "<<key<<"="<<*tmp;
DLOG << type_ << " output- " << key << "=" << cl_image->dims();
}
#else
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>(); Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor; if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
#endif
} }
} }
} }
......
...@@ -14,7 +14,21 @@ limitations under the License. */ ...@@ -14,7 +14,21 @@ limitations under the License. */
#include "feed_op.h" #include "feed_op.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators {} namespace operators {
#ifdef PADDLE_MOBILE_CL
template <typename DeviceType, typename T>
void FeedOp<DeviceType, T>::InferShape() const {
auto out_dims = this->param_.Out()->dims();
out_dims[0] = this->param_.BatchSize();
this->param_.Out()->Resize(out_dims);
}
template <typename DeviceType, typename T>
void FeedOp<DeviceType, T>::RunImpl() {
this->kernel_.Compute(this->param_);
}
#endif
}
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
......
...@@ -16,11 +16,32 @@ limitations under the License. */ ...@@ -16,11 +16,32 @@ limitations under the License. */
#include <string> #include <string>
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/feed_kernel.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using std::string; using std::string;
#ifdef PADDLE_MOBILE_CL
template <typename DeviceType, typename T>
class FeedOp
: public framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>,
FeedKernel<DeviceType, T>> {
public:
FeedOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>,
FeedKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
void InferShape() const override;
void RunImpl() override;
protected:
};
#else
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FeedOp : public framework::OperatorBase<DeviceType> { class FeedOp : public framework::OperatorBase<DeviceType> {
public: public:
...@@ -74,22 +95,20 @@ class FeedOp : public framework::OperatorBase<DeviceType> { ...@@ -74,22 +95,20 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
} }
#else #else
#ifdef PADDLE_MOBILE_CL
void Init() {}
void RunImpl() {}
#else
void Init() {} void Init() {}
void RunImpl() { void RunImpl() {
param_.Out()->ShareDataWith(*param_.InputX()); param_.Out()->ShareDataWith(*param_.InputX());
param_.Out()->set_lod(param_.InputX()->lod()); param_.Out()->set_lod(param_.InputX()->lod());
} }
#endif
#endif
protected: protected:
FeedParam<DeviceType> param_; FeedParam<DeviceType> param_;
}; };
#endif
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
__kernel void feed(__global float* in, __write_only image2d_t outputImage,int h,int w)
{
int j = get_global_id(0);
int i = get_global_id(1);
float4 pixel;
pixel.x = in[(i * w + j)];
pixel.y = in[h * w + (i * w + j)];
pixel.z = in[2 * h * w + (i * w + j)];
pixel.w = 0;
int2 coords;
coords.x = j;
coords.y = i;
write_imagef(outputImage,coords,pixel);
}
...@@ -21,61 +21,58 @@ namespace operators { ...@@ -21,61 +21,58 @@ namespace operators {
template <> template <>
bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) { bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE( // PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] && // param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1], // param->Paddings()[0] == param->Paddings()[1],
"need equal"); // "need equal");
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 - // int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]); // static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset); // param->SetOffset(offset);
//
if (param->Filter()->WidthOfOneBlock() == 1 && // if (param->Filter()->WidthOfOneBlock() == 1 &&
param->Filter()->HeightOfOneBlock() == 1) { // param->Filter()->HeightOfOneBlock() == 1) {
this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); // this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
} else if (param->Filter()->dims()[1] == 1) { // } else if (param->Filter()->dims()[1] == 1) {
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); // this->cl_helper_.AddKernel("depth_conv_3x3",
} else if (param->Filter()->WidthOfOneBlock() == 3 && // "conv_add_bn_relu_kernel.cl");
param->Filter()->HeightOfOneBlock() == 3) { // } else if (param->Filter()->WidthOfOneBlock() == 3 &&
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); // param->Filter()->HeightOfOneBlock() == 3) {
} else { // this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
PADDLE_MOBILE_THROW_EXCEPTION(" not support "); // } else {
} // PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
// }
return true; return true;
} }
template <> template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) { void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0); // auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); // auto default_work_size =
int c_block = default_work_size[0]; // this->cl_helper_.DefaultWorkSize(*param.Output()); int c_block =
int w = default_work_size[1]; // default_work_size[0]; int w = default_work_size[1]; int nh =
int nh = default_work_size[2]; // default_work_size[2]; auto input = param.Input()->GetCLImage(); auto
auto input = param.Input()->GetCLImage(); // filter = param.Filter()->GetCLImage(); auto output = param.Output(); int
auto filter = param.Filter()->GetCLImage(); // stride = param.Strides()[0]; int offset = param.Offset(); int input_c =
auto output = param.Output(); // param.Input()->CBlock(); int dilation = param.Dilations()[0]; int
int stride = param.Strides()[0]; // input_width = param.Input()->WidthOfOneBlock(); int input_height =
int offset = param.Offset(); // param.Input()->HeightOfOneBlock();
int input_c = param.Input()->CBlock(); //
int dilation = param.Dilations()[0]; // clSetKernelArg(kernel, 0, sizeof(int), &c_block);
int input_width = param.Input()->WidthOfOneBlock(); // clSetKernelArg(kernel, 1, sizeof(int), &w);
int input_height = param.Input()->HeightOfOneBlock(); // clSetKernelArg(kernel, 2, sizeof(int), &nh);
// clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 0, sizeof(int), &c_block); // clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
clSetKernelArg(kernel, 1, sizeof(int), &w); // clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
clSetKernelArg(kernel, 2, sizeof(int), &nh); // clSetKernelArg(kernel, 6, sizeof(int), &stride);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); // clSetKernelArg(kernel, 7, sizeof(int), &offset);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); // clSetKernelArg(kernel, 8, sizeof(int), &input_c);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); // clSetKernelArg(kernel, 9, sizeof(int), &dilation);
clSetKernelArg(kernel, 6, sizeof(int), &stride); // clSetKernelArg(kernel, 10, sizeof(int), &input_width);
clSetKernelArg(kernel, 7, sizeof(int), &offset); // clSetKernelArg(kernel, 11, sizeof(int), &input_height);
clSetKernelArg(kernel, 8, sizeof(int), &input_c); //
clSetKernelArg(kernel, 9, sizeof(int), &dilation); // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
clSetKernelArg(kernel, 10, sizeof(int), &input_width); // default_work_size.data(), NULL, 0, NULL, NULL);
clSetKernelArg(kernel, 11, sizeof(int), &input_height);
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
// auto kernel = this->cl_helper_.KernelAt(0); // auto kernel = this->cl_helper_.KernelAt(0);
// size_t global_work_size[3] = {1, 2, 3}; // size_t global_work_size[3] = {1, 2, 3};
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
DLOG<<"Init feed";
this->cl_helper_.AddKernel("feed", "feed_kernel.cl");
return true;
}
template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
DLOG<<"feed_kernel";
auto kernel = this->cl_helper_.KernelAt(0);
cl_int status;
auto output = param.Out();
auto input = param.InputX();
const float *input_data = input->data<float>();
cl_mem cl_image = output->GetCLImage();
int height = output->dims()[2];
int width = output->dims()[3];
status = clSetKernelArg(kernel,0, sizeof(cl_mem),&input_data);
status = clSetKernelArg(kernel,0, sizeof(cl_mem),&cl_image);
status = clSetKernelArg(kernel,0, sizeof(cl_mem),&width);
status = clSetKernelArg(kernel,0, sizeof(cl_mem),&height);
size_t global_work_size[2] = {height,width};
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
}
template class FeedKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class FeedKernel
: public framework::OpKernelBase<DeviceType, FeedParam<DeviceType>>{
public:
void Compute(const FeedParam<DeviceType> &param);
bool Init(FeedParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
...@@ -911,13 +911,13 @@ class FeedParam : public OpParam { ...@@ -911,13 +911,13 @@ class FeedParam : public OpParam {
public: public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope) { const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, *scope); input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<GType>(outputs, *scope); out_ = OutFrom<GType>(outputs, scope);
auto var = scope->Var("batch_size"); auto var = scope.FindVar("batch_size");
batch_size = var->GetValue<int>(); batch_size = var->GetValue<int>();
} }
const GType *InputX() const { return input_x_; } const LoDTensor *InputX() const { return input_x_; }
GType *Out() const { return out_; } GType *Out() const { return out_; }
const int BatchSize() const { return batch_size; } const int BatchSize() const { return batch_size; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册