未验证 提交 77811367 编写于 作者: Y Yanzhan Yang 提交者: GitHub

1. fix group logic for convolution op. 2. add pixel shuffle op for OpenCL. (#2178)

上级 7931104f
...@@ -133,6 +133,7 @@ const char *G_OP_TYPE_BEAM_SEARCH_DECODE = "beam_search_decode"; ...@@ -133,6 +133,7 @@ const char *G_OP_TYPE_BEAM_SEARCH_DECODE = "beam_search_decode";
const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE = const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE =
"fill_constant_batch_size_like"; "fill_constant_batch_size_like";
const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu"; const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu";
const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle";
std::unordered_map< std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
...@@ -256,5 +257,6 @@ std::unordered_map< ...@@ -256,5 +257,6 @@ std::unordered_map<
{G_OP_TYPE_BEAM_SEARCH_DECODE, {G_OP_TYPE_BEAM_SEARCH_DECODE,
{{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}}, {{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}},
{G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}}}; {G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -264,6 +264,7 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU; ...@@ -264,6 +264,7 @@ extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN; extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU; extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU; extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU;
extern const char *G_OP_TYPE_PIXEL_SHUFFLE;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
...@@ -377,3 +377,6 @@ LOAD_OP1(range, CPU); ...@@ -377,3 +377,6 @@ LOAD_OP1(range, CPU);
#ifdef REDUCE_PROD_OP #ifdef REDUCE_PROD_OP
LOAD_OP1(reduce_prod, CPU); LOAD_OP1(reduce_prod, CPU);
#endif #endif
#ifdef PIXEL_SHUFFLE_OP
LOAD_OP1(pixel_shuffle, GPU_CL);
#endif
...@@ -59,6 +59,7 @@ void ConvAddBnReluPt1x2(framework::CLHelper *cl_helper, ...@@ -59,6 +59,7 @@ void ConvAddBnReluPt1x2(framework::CLHelper *cl_helper,
int input_height = param.Input()->dims()[2]; int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3]; int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2]; int output_height = param.Output()->dims()[2];
int output_c = param.Output()->dims()[1];
int filter_channel = param.Filter()->dims()[1]; int filter_channel = param.Filter()->dims()[1];
int input_channel = param.Input()->dims()[1]; int input_channel = param.Input()->dims()[1];
// //
...@@ -216,6 +217,7 @@ void ConvAddBnRelu(framework::CLHelper *cl_helper, ...@@ -216,6 +217,7 @@ void ConvAddBnRelu(framework::CLHelper *cl_helper,
int input_height = param.Input()->dims()[2]; int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3]; int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2]; int output_height = param.Output()->dims()[2];
int output_c = param.Output()->dims()[1];
int filter_channel = param.Filter()->dims()[1]; int filter_channel = param.Filter()->dims()[1];
int input_channel = param.Input()->dims()[1]; int input_channel = param.Input()->dims()[1];
...@@ -397,21 +399,21 @@ void ConvAddBnRelu(framework::CLHelper *cl_helper, ...@@ -397,21 +399,21 @@ void ConvAddBnRelu(framework::CLHelper *cl_helper,
status = clSetKernelArg(kernel, index++, sizeof(int), &output_height); status = clSetKernelArg(kernel, index++, sizeof(int), &output_height);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, index++, sizeof(int), &output_c);
CL_CHECK_ERRORS(status);
if (param.Filter()->dims()[2] == 3 && param.Filter()->dims()[3] == 3) { if (param.Filter()->dims()[2] == 3 && param.Filter()->dims()[3] == 3) {
if (filter_channel != input_channel) { if (filter_channel != input_channel) {
if (filter_channel != 1) { status = clSetKernelArg(kernel, index++, sizeof(int), &filter_channel);
status = CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, index++, sizeof(int), &filter_channel); int group = input_channel / filter_channel;
CL_CHECK_ERRORS(status); status = clSetKernelArg(kernel, index++, sizeof(int), &group);
int has_group = 1; CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, index++, sizeof(int), &has_group);
CL_CHECK_ERRORS(status);
}
} else { } else {
status = clSetKernelArg(kernel, index++, sizeof(int), &filter_channel); status = clSetKernelArg(kernel, index++, sizeof(int), &filter_channel);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
int has_group = 0; int group = 1;
status = clSetKernelArg(kernel, index++, sizeof(int), &has_group); status = clSetKernelArg(kernel, index++, sizeof(int), &group);
CL_CHECK_ERRORS(status); CL_CHECK_ERRORS(status);
} }
} }
......
...@@ -48,8 +48,9 @@ __kernel void conv_3x3(__private const int global_size_dim0, ...@@ -48,8 +48,9 @@ __kernel void conv_3x3(__private const int global_size_dim0,
__private const int input_height,/* of one block */ __private const int input_height,/* of one block */
__private const int output_width, __private const int output_width,
__private const int output_height, __private const int output_height,
__private const int output_c,
__private const int filter_channel, __private const int filter_channel,
__private const int has_group) { __private const int group) {
const int out_c = get_global_id(0); const int out_c = get_global_id(0);
const int out_w = get_global_id(1); const int out_w = get_global_id(1);
...@@ -90,7 +91,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, ...@@ -90,7 +91,7 @@ __kernel void conv_3x3(__private const int global_size_dim0,
#endif #endif
half4 input[9]; half4 input[9];
if (has_group == 0) { if (group == 1) {
for (int i = 0; i < input_c; ++i) { for (int i = 0; i < input_c; ++i) {
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y); int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y);
input[0] = select(read_imageh(input_image, sampler, input[0] = select(read_imageh(input_image, sampler,
...@@ -326,7 +327,7 @@ __kernel void conv_3x3(__private const int global_size_dim0, ...@@ -326,7 +327,7 @@ __kernel void conv_3x3(__private const int global_size_dim0,
} }
} else { } else {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
int used_input_channel_num = (out_c * 4 + i) * filter_channel; int used_input_channel_num = (out_c * 4 + i) / (output_c / group) * filter_channel;
for (int f_c = 0; f_c < filter_channel; ++f_c) { for (int f_c = 0; f_c < filter_channel; ++f_c) {
int input_c = used_input_channel_num + f_c; int input_c = used_input_channel_num + f_c;
int input_block = input_c / 4; int input_block = input_c / 4;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void pixel_shuffle(__read_only image2d_t input_image,
__write_only image2d_t output_image,
__private const int in_N,
__private const int in_C,
__private const int in_H,
__private const int in_W,
__private const int out_N,
__private const int out_C,
__private const int out_H,
__private const int out_W,
__private const int upscale_factor) {
const int out_c4 = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
int out_h = out_nh % out_H;
int out_n = out_nh / out_H;
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
int in_h = out_h / upscale_factor;
int in_w = out_w / upscale_factor;
int in_nh = out_n * in_H + in_h;
half4 res;
int out_c;
int in_c;
half4 in;
int2 in_pos;
out_c = out_c4 * 4 + 0;
in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor);
in_pos.x = (in_c / 4) * in_W + in_w;
in_pos.y = in_nh;
in = read_imageh(input_image, sampler, in_pos);
if (in_c % 4 == 0) {
res.x = in.x;
} else if (in_c % 4 == 1) {
res.x = in.y;
} else if (in_c % 4 == 2) {
res.x = in.z;
} else if (in_c % 4 == 3) {
res.x = in.w;
}
out_c = out_c4 * 4 + 1;
in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor);
in_pos.x = (in_c / 4) * in_W + in_w;
in_pos.y = in_nh;
in = read_imageh(input_image, sampler, in_pos);
if (in_c % 4 == 0) {
res.y = in.x;
} else if (in_c % 4 == 1) {
res.y = in.y;
} else if (in_c % 4 == 2) {
res.y = in.z;
} else if (in_c % 4 == 3) {
res.y = in.w;
}
out_c = out_c4 * 4 + 2;
in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor);
in_pos.x = (in_c / 4) * in_W + in_w;
in_pos.y = in_nh;
in = read_imageh(input_image, sampler, in_pos);
if (in_c % 4 == 0) {
res.z = in.x;
} else if (in_c % 4 == 1) {
res.z = in.y;
} else if (in_c % 4 == 2) {
res.z = in.z;
} else if (in_c % 4 == 3) {
res.z = in.w;
}
out_c = out_c4 * 4 + 3;
in_c = out_c * upscale_factor * upscale_factor + (out_h % upscale_factor) * upscale_factor + (out_w % upscale_factor);
in_pos.x = (in_c / 4) * in_W + in_w;
in_pos.y = in_nh;
in = read_imageh(input_image, sampler, in_pos);
if (in_c % 4 == 0) {
res.w = in.x;
} else if (in_c % 4 == 1) {
res.w = in.y;
} else if (in_c % 4 == 2) {
res.w = in.z;
} else if (in_c % 4 == 3) {
res.w = in.w;
}
int2 out_pos;
out_pos.x = out_c4 * out_W + out_w;
out_pos.y = out_nh;
write_imageh(output_image, out_pos, res);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PIXEL_SHUFFLE_OP
#include "operators/kernel/pixel_shuffle_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool PixelShuffleKernel<GPU_CL, float>::Init(PixelShuffleParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("pixel_shuffle", "pixel_shuffle_kernel.cl");
return true;
}
template <>
void PixelShuffleKernel<GPU_CL, float>::Compute(
const PixelShuffleParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Out());
auto input_image = param.InputX()->GetCLImage();
auto output_image = param.Out()->GetCLImage();
auto upscale_factor = param.upscale_factor();
int input_n = param.InputX()->dims()[0];
int input_c = param.InputX()->dims()[1];
int input_h = param.InputX()->dims()[2];
int input_w = param.InputX()->dims()[3];
int output_n = param.Out()->dims()[0];
int output_c = param.Out()->dims()[1];
int output_h = param.Out()->dims()[2];
int output_w = param.Out()->dims()[3];
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(int), &input_n);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(int), &input_c);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 4, sizeof(int), &input_h);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 5, sizeof(int), &input_w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(int), &output_n);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(int), &output_c);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 8, sizeof(int), &output_h);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 9, sizeof(int), &output_w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 10, sizeof(int), &upscale_factor);
CL_CHECK_ERRORS(status);
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef LRN_OP
#include <cmath>
#ifdef _OPENMP
#include <omp.h>
#endif
#ifdef __ARM_NEON
#include <arm_neon.h>
#include "operators/math/math.h"
#endif
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class PixelShuffleKernel
: public framework::OpKernelBase<DeviceType,
PixelShuffleParam<DeviceType>> {
public:
void Compute(const PixelShuffleParam<DeviceType> &param);
bool Init(PixelShuffleParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -3628,5 +3628,35 @@ class EXPParam : public OpParam { ...@@ -3628,5 +3628,35 @@ class EXPParam : public OpParam {
GType *out_; GType *out_;
}; };
#endif #endif
#ifdef PIXEL_SHUFFLE_OP
template <typename Dtype>
class PixelShuffleParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
PixelShuffleParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<GType>(inputs, *scope);
out_ = OutFrom<GType>(outputs, *scope);
upscale_factor_ = GetAttr<int>("upscale_factor", attrs);
}
const GType *InputX() const { return input_x_; }
GType *Out() const { return out_; }
const int &upscale_factor() const { return upscale_factor_; }
private:
GType *input_x_;
GType *out_;
int upscale_factor_;
};
#endif
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PIXEL_SHUFFLE_OP
#include "operators/pixel_shuffle_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void PixelShuffleOp<Dtype, T>::InferShape() const {
auto x_dims = this->param_.InputX()->dims();
int n = x_dims[0];
int c = x_dims[1];
int h = x_dims[2];
int w = x_dims[3];
int upscale_factor = this->param_.upscale_factor();
this->param_.Out()->Resize(
framework::make_ddim({n, c / (upscale_factor * upscale_factor),
h * upscale_factor, w * upscale_factor}));
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(pixel_shuffle, ops::PixelShuffleOp);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PIXEL_SHUFFLE_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/pixel_shuffle_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class PixelShuffleOp : public framework::OperatorWithKernel<
DeviceType, PixelShuffleParam<DeviceType>,
operators::PixelShuffleKernel<DeviceType, T>> {
public:
PixelShuffleOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs, framework::Scope *scope)
: framework::OperatorWithKernel<
DeviceType, PixelShuffleParam<DeviceType>,
operators::PixelShuffleKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -378,6 +378,7 @@ if(NOT FOUND_MATCH) ...@@ -378,6 +378,7 @@ if(NOT FOUND_MATCH)
set(RANGE_OP ON) set(RANGE_OP ON)
set(REDUCE_PROD_OP ON) set(REDUCE_PROD_OP ON)
set(FUSION_INSTANCENORM_RELU_OP ON) set(FUSION_INSTANCENORM_RELU_OP ON)
set(PIXEL_SHUFFLE_OP ON)
endif() endif()
# option(BATCHNORM_OP "" ON) # option(BATCHNORM_OP "" ON)
...@@ -751,3 +752,6 @@ endif() ...@@ -751,3 +752,6 @@ endif()
if (REDUCE_PROD_OP) if (REDUCE_PROD_OP)
add_definitions(-DREDUCE_PROD_OP) add_definitions(-DREDUCE_PROD_OP)
endif() endif()
if (PIXEL_SHUFFLE_OP)
add_definitions(-DPIXEL_SHUFFLE_OP)
endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册