提交 84e1d25f 编写于 作者: J Jiaying Zhao 提交者: GitHub

[MOBILE][OPENCL]Add elementwise_sub op; Fix instancenorm op; (#2622)

上级 a39f92ea
...@@ -134,6 +134,8 @@ const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE = ...@@ -134,6 +134,8 @@ const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE =
"fill_constant_batch_size_like"; "fill_constant_batch_size_like";
const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu"; const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu";
const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle"; const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle";
const char *G_OP_TYPE_EXPAND = "expand";
const char *G_OP_TYPE_GRID_SAMPLER = "grid_sampler";
std::unordered_map< std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
...@@ -156,7 +158,7 @@ std::unordered_map< ...@@ -156,7 +158,7 @@ std::unordered_map<
{G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}}, {G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}}, {G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_INSTANCENORM, {{"X"}, {"Out"}}}, {G_OP_TYPE_INSTANCENORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_FUSION_INSTANCENORM_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_FUSION_INSTANCENORM_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}}, {G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}}, {G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
...@@ -258,5 +260,7 @@ std::unordered_map< ...@@ -258,5 +260,7 @@ std::unordered_map<
{{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}}, {{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}},
{G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}, {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}}; {G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}},
{G_OP_TYPE_EXPAND, {{"X"}, {"Out"}}},
{G_OP_TYPE_GRID_SAMPLER, {{"X", "Grid"}, {"Output"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -265,6 +265,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; ...@@ -265,6 +265,8 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU; extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU;
extern const char *G_OP_TYPE_PIXEL_SHUFFLE; extern const char *G_OP_TYPE_PIXEL_SHUFFLE;
extern const char *G_OP_TYPE_EXPAND;
extern const char *G_OP_TYPE_GRID_SAMPLER;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
...@@ -246,7 +246,7 @@ LOAD_OP2(fusion_conv_bn, CPU, FPGA); ...@@ -246,7 +246,7 @@ LOAD_OP2(fusion_conv_bn, CPU, FPGA);
LOAD_FUSION_MATCHER(fusion_conv_bn); LOAD_FUSION_MATCHER(fusion_conv_bn);
#endif #endif
#ifdef ELEMENTWISESUB_OP #ifdef ELEMENTWISESUB_OP
LOAD_OP1(elementwise_sub, CPU) LOAD_OP2(elementwise_sub, CPU, GPU_CL)
#endif #endif
#ifdef TOP_K_OP #ifdef TOP_K_OP
LOAD_OP1(top_k, CPU) LOAD_OP1(top_k, CPU)
...@@ -380,3 +380,9 @@ LOAD_OP1(reduce_prod, CPU); ...@@ -380,3 +380,9 @@ LOAD_OP1(reduce_prod, CPU);
#ifdef PIXEL_SHUFFLE_OP #ifdef PIXEL_SHUFFLE_OP
LOAD_OP1(pixel_shuffle, GPU_CL); LOAD_OP1(pixel_shuffle, GPU_CL);
#endif #endif
#ifdef EXPAND_OP
LOAD_OP1(expand, GPU_CL);
#endif
#ifdef GRID_SAMPLER_OP
LOAD_OP1(grid_sampler, GPU_CL);
#endif
...@@ -32,6 +32,9 @@ namespace ops = paddle_mobile::operators; ...@@ -32,6 +32,9 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp); REGISTER_OPERATOR_CPU(elementwise_sub, ops::ElementwiseSubOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(elementwise_sub, ops::ElementwiseSubOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -45,7 +45,7 @@ class FusionInstanceNormReluMatcher : public framework::FusionOpMatcher { ...@@ -45,7 +45,7 @@ class FusionInstanceNormReluMatcher : public framework::FusionOpMatcher {
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FusionInstanceNormReluOp class FusionInstanceNormReluOp
: public framework::OperatorWithKernel< : public framework::OperatorWithKernel<
DeviceType, InstanceNormParam<DeviceType>, DeviceType, FusionInstanceNormReluParam<DeviceType>,
operators::InstanceNormReluKernel<DeviceType, T>> { operators::InstanceNormReluKernel<DeviceType, T>> {
public: public:
FusionInstanceNormReluOp(const string &type, const VariableNameMap &inputs, FusionInstanceNormReluOp(const string &type, const VariableNameMap &inputs,
...@@ -53,7 +53,7 @@ class FusionInstanceNormReluOp ...@@ -53,7 +53,7 @@ class FusionInstanceNormReluOp
const framework::AttributeMap &attrs, const framework::AttributeMap &attrs,
framework::Scope *scope) framework::Scope *scope)
: framework::OperatorWithKernel< : framework::OperatorWithKernel<
DeviceType, InstanceNormParam<DeviceType>, DeviceType, FusionInstanceNormReluParam<DeviceType>,
operators::InstanceNormReluKernel<DeviceType, T>>( operators::InstanceNormReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
......
...@@ -24,7 +24,7 @@ namespace operators { ...@@ -24,7 +24,7 @@ namespace operators {
template <typename Dtype, typename T> template <typename Dtype, typename T>
void InstanceNormOp<Dtype, T>::InferShape() const { void InstanceNormOp<Dtype, T>::InferShape() const {
auto x_dims = this->param_.InputX()->dims(); auto x_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dims); this->param_.OutputY()->Resize(x_dims);
} }
} // namespace operators } // namespace operators
......
...@@ -17,17 +17,17 @@ limitations under the License. */ ...@@ -17,17 +17,17 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
void InstanceNorm(framework::CLHelper *cl_helper, void InstanceNorm(framework::CLHelper *cl_helper,
const InstanceNormParam<GPU_CL> &param) { const framework::CLImage *input, framework::CLImage *output,
float epsilon) {
auto kernel = cl_helper->KernelAt(0); auto kernel = cl_helper->KernelAt(0);
auto &dims = param.Out()->dims(); auto &dims = output->dims();
const int n = dims[0]; const int n = dims[0];
const int c_group = (dims[1] + 3) / 4; const int c_group = (dims[1] + 3) / 4;
const int h = dims[2]; const int h = dims[2];
const int w = dims[3]; const int w = dims[3];
auto epsilon = param.Epsilon(); auto input_image = input->GetCLImage();
auto input = param.InputX()->GetCLImage(); auto out_image = output->GetCLImage();
auto out = param.Out()->GetCLImage();
// DLOG << "Epsilon: " << epsilon; // DLOG << "Epsilon: " << epsilon;
...@@ -66,9 +66,9 @@ void InstanceNorm(framework::CLHelper *cl_helper, ...@@ -66,9 +66,9 @@ void InstanceNorm(framework::CLHelper *cl_helper,
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon); clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &input); clSetKernelArg(kernel, 6, sizeof(cl_mem), &input_image);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 7, sizeof(cl_mem), &out); clSetKernelArg(kernel, 7, sizeof(cl_mem), &out_image);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
clEnqueueNDRangeKernel(cl_helper->CLCommandQueue(), kernel, 3, NULL, clEnqueueNDRangeKernel(cl_helper->CLCommandQueue(), kernel, 3, NULL,
work_size, local_work_size, 0, NULL, NULL); work_size, local_work_size, 0, NULL, NULL);
......
...@@ -21,7 +21,8 @@ limitations under the License. */ ...@@ -21,7 +21,8 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
void InstanceNorm(framework::CLHelper *cl_helper, void InstanceNorm(framework::CLHelper *cl_helper,
const InstanceNormParam<GPU_CL> &param); const framework::CLImage *input, framework::CLImage *output,
float epsilon);
} }
} // namespace paddle_mobile } // namespace paddle_mobile
#endif #endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void elementwise_sub(__global image2d_t inputImage, __global image2d_t bias, __write_only image2d_t outputImage) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
half4 input = read_imageh(inputImage, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords);
half4 output = input - biase;
write_imageh(outputImage, coords, output);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISESUB_OP
#include "operators/kernel/elementwise_sub_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ElementwiseSubKernel<GPU_CL, float>::Init(
ElementwiseSubParam<GPU_CL> *param) {
framework::CLImage *bias = reinterpret_cast<framework::CLImage *>(
const_cast<framework::CLImage *>(param->InputY()));
if (bias->dims().size() == 4) {
if (!bias->isInit()) {
bias->InitNormalCLImage(cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
}
DLOG << " bias: " << *bias;
this->cl_helper_.AddKernel("elementwise_sub", "elementwise_sub_kernel.cl");
} else {
DLOG << "error:bias dims not support";
}
return true;
}
template <>
void ElementwiseSubKernel<GPU_CL, float>::Compute(
const ElementwiseSubParam<GPU_CL> &param) {
auto input = param.InputX();
auto bias = param.InputY();
auto output = param.Out();
cl_int status;
auto kernel = this->cl_helper_.KernelAt(0);
if (bias->dims().size() == 4) {
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bias_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &output_image);
CL_CHECK_ERRORS(status);
auto width = input->ImageWidth();
auto height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else {
DLOG << "error:bias dims not support";
}
}
template class ElementwiseSubKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -23,7 +23,7 @@ namespace operators { ...@@ -23,7 +23,7 @@ namespace operators {
template <> template <>
bool InstanceNormKernel<GPU_CL, float>::Init(InstanceNormParam<GPU_CL> *param) { bool InstanceNormKernel<GPU_CL, float>::Init(InstanceNormParam<GPU_CL> *param) {
auto &dims = param->Out()->dims(); auto &dims = param->OutputY()->dims();
const int h = dims[2]; const int h = dims[2];
std::string build_options = ""; std::string build_options = "";
if (h == 128) { if (h == 128) {
...@@ -41,7 +41,8 @@ bool InstanceNormKernel<GPU_CL, float>::Init(InstanceNormParam<GPU_CL> *param) { ...@@ -41,7 +41,8 @@ bool InstanceNormKernel<GPU_CL, float>::Init(InstanceNormParam<GPU_CL> *param) {
template <> template <>
void InstanceNormKernel<GPU_CL, float>::Compute( void InstanceNormKernel<GPU_CL, float>::Compute(
const InstanceNormParam<GPU_CL> &param) { const InstanceNormParam<GPU_CL> &param) {
InstanceNorm(&this->cl_helper_, param); InstanceNorm(&this->cl_helper_, param.InputX(), param.OutputY(),
param.Epsilon());
} }
template class InstanceNormKernel<GPU_CL, float>; template class InstanceNormKernel<GPU_CL, float>;
......
...@@ -23,7 +23,7 @@ namespace operators { ...@@ -23,7 +23,7 @@ namespace operators {
template <> template <>
bool InstanceNormReluKernel<GPU_CL, float>::Init( bool InstanceNormReluKernel<GPU_CL, float>::Init(
InstanceNormParam<GPU_CL> *param) { FusionInstanceNormReluParam<GPU_CL> *param) {
auto &dims = param->Out()->dims(); auto &dims = param->Out()->dims();
const int h = dims[2]; const int h = dims[2];
std::string build_options = "-DRELU"; std::string build_options = "-DRELU";
...@@ -41,8 +41,8 @@ bool InstanceNormReluKernel<GPU_CL, float>::Init( ...@@ -41,8 +41,8 @@ bool InstanceNormReluKernel<GPU_CL, float>::Init(
template <> template <>
void InstanceNormReluKernel<GPU_CL, float>::Compute( void InstanceNormReluKernel<GPU_CL, float>::Compute(
const InstanceNormParam<GPU_CL> &param) { const FusionInstanceNormReluParam<GPU_CL> &param) {
InstanceNorm(&this->cl_helper_, param); InstanceNorm(&this->cl_helper_, param.InputX(), param.Out(), param.Epsilon());
} }
template class InstanceNormReluKernel<GPU_CL, float>; template class InstanceNormReluKernel<GPU_CL, float>;
......
...@@ -30,10 +30,10 @@ using framework::OpKernelBase; ...@@ -30,10 +30,10 @@ using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class InstanceNormReluKernel class InstanceNormReluKernel
: public OpKernelBase<DeviceType, InstanceNormParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionInstanceNormReluParam<DeviceType>> {
public: public:
void Compute(const InstanceNormParam<DeviceType> &param); void Compute(const FusionInstanceNormReluParam<DeviceType> &param);
bool Init(InstanceNormParam<DeviceType> *param); bool Init(FusionInstanceNormReluParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -927,6 +927,35 @@ class InstanceNormParam : public OpParam { ...@@ -927,6 +927,35 @@ class InstanceNormParam : public OpParam {
Scope *scope) Scope *scope)
: OpParam(inputs, outputs, attrs, scope) { : OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<GType>(inputs, *scope); input_x_ = InputXFrom<GType>(inputs, *scope);
output_y_ = OutputYFrom<GType>(outputs, *scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
}
const GType *InputX() const { return input_x_; }
GType *OutputY() const { return output_y_; }
const float &Epsilon() const { return epsilon_; }
private:
GType *input_x_;
GType *output_y_;
float epsilon_;
};
#endif
#ifdef FUSION_INSTANCENORM_RELU_OP
template <typename Dtype>
class FusionInstanceNormReluParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
FusionInstanceNormReluParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<GType>(inputs, *scope);
out_ = OutFrom<GType>(outputs, *scope); out_ = OutFrom<GType>(outputs, *scope);
epsilon_ = GetAttr<float>("epsilon", attrs); epsilon_ = GetAttr<float>("epsilon", attrs);
} }
...@@ -3658,5 +3687,56 @@ class PixelShuffleParam : public OpParam { ...@@ -3658,5 +3687,56 @@ class PixelShuffleParam : public OpParam {
}; };
#endif #endif
#ifdef EXPAND_OP
template <typename Dtype>
class ExpandParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
ExpandParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<GType>(inputs, *scope);
out_ = OutFrom<GType>(outputs, *scope);
expand_times_ = GetAttr<std::vector<int>>("expand_times", attrs);
}
const GType *InputX() const { return input_x_; }
GType *Out() const { return out_; }
private:
GType *input_x_;
GType *out_;
std::vector<int> expand_times_;
};
#endif
#ifdef GRID_SAMPLER_OP
template <typename Dtype>
class GridSamplerParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
GridSamplerParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<GType>(inputs, *scope);
output_ = OutputFrom<GType>(outputs, *scope);
}
const GType *InputX() const { return input_x_; }
GType *Output() const { return output_; }
private:
GType *input_x_;
GType *output_;
};
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -379,6 +379,8 @@ if(NOT FOUND_MATCH) ...@@ -379,6 +379,8 @@ if(NOT FOUND_MATCH)
set(REDUCE_PROD_OP ON) set(REDUCE_PROD_OP ON)
set(FUSION_INSTANCENORM_RELU_OP ON) set(FUSION_INSTANCENORM_RELU_OP ON)
set(PIXEL_SHUFFLE_OP ON) set(PIXEL_SHUFFLE_OP ON)
set(EXPAND_OP ON)
set(GRID_SAMPLER_OP ON)
endif() endif()
# option(BATCHNORM_OP "" ON) # option(BATCHNORM_OP "" ON)
...@@ -755,3 +757,10 @@ endif() ...@@ -755,3 +757,10 @@ endif()
if (PIXEL_SHUFFLE_OP) if (PIXEL_SHUFFLE_OP)
add_definitions(-DPIXEL_SHUFFLE_OP) add_definitions(-DPIXEL_SHUFFLE_OP)
endif() endif()
if (EXPAND_OP)
add_definitions(-DEXPAND_OP)
endif()
if (GRID_SAMPLER_OP)
add_definitions(-DGRID_SAMPLER_OP)
endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册