提交 452900fd 编写于 作者: J Jiaying Zhao 提交者: GitHub

add opencl Scale op (#1645)

* fix memcpy size in opencl fetch kernel

* add opencl Scale op
上级 6b32f9ad
......@@ -46,6 +46,7 @@ const char *G_OP_TYPE_RELU6 = "relu6";
const char *G_OP_TYPE_LEAKY_RELU = "leaky_relu";
const char *G_OP_TYPE_RESHAPE = "reshape";
const char *G_OP_TYPE_RESHAPE2 = "reshape2";
const char *G_OP_TYPE_SCALE = "scale";
const char *G_OP_TYPE_SIGMOID = "sigmoid";
const char *G_OP_TYPE_SOFTMAX = "softmax";
const char *G_OP_TYPE_TRANSPOSE = "transpose";
......@@ -128,6 +129,7 @@ std::unordered_map<
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_RELU6, {{"X"}, {"Out"}}},
{G_OP_TYPE_LEAKY_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SCALE, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_SIGMOID, {{"X"}, {"Out"}}},
{G_OP_TYPE_MUL, {{"X"}, {"Out"}}},
......
......@@ -144,6 +144,7 @@ extern const char *G_OP_TYPE_RELU;
extern const char *G_OP_TYPE_RELU6;
extern const char *G_OP_TYPE_LEAKY_RELU;
extern const char *G_OP_TYPE_RESHAPE;
extern const char *G_OP_TYPE_SCALE;
extern const char *G_OP_TYPE_SIGMOID;
extern const char *G_OP_TYPE_SOFTMAX;
extern const char *G_OP_TYPE_TRANSPOSE;
......
......@@ -142,7 +142,7 @@ LOAD_OP1(depthwise_conv2d, CPU);
LOAD_OP1(conv2d_transpose, CPU);
#endif
#ifdef SCALE_OP
LOAD_OP1(scale, CPU);
LOAD_OP2(scale, CPU, GPU_CL);
#endif
#ifdef ELEMENTWISEADD_OP
LOAD_OP2(elementwise_add, CPU, GPU_CL);
......
......@@ -22,6 +22,11 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
template <>
bool ScaleKernel<CPU, float>::Init(ScaleParam<CPU> *param) {
return true;
}
template <>
void ScaleKernel<CPU, float>::Compute(const ScaleParam<CPU> &param) {
const auto input = param.InputX();
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void scale(__read_only image2d_t input,
__write_only image2d_t output,
__private float scale,
__private float bias,
__private float out_width){
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
int pos_x = mad24(out_c, out_width, out_w);
half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh));
in = scale * in + bias;
write_imageh(output, (int2)(pos_x, out_nh), in);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SCALE_OP
#include "operators/kernel/scale_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ScaleKernel<GPU_CL, float>::Init(ScaleParam<GPU_CL>* param) {
this->cl_helper_.AddKernel("scale", "scale_kernel.cl");
return true;
}
template <>
void ScaleKernel<GPU_CL, float>::Compute(const ScaleParam<GPU_CL>& param) {
auto kernel = this->cl_helper_.KernelAt(0);
const auto* input = param.InputX();
auto* output = param.Out();
const float scale = param.Scale();
const float bias = param.Bias();
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
int out_width = output->dims()[3];
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
clSetKernelArg(kernel, 2, sizeof(float), &scale);
clSetKernelArg(kernel, 3, sizeof(float), &bias);
clSetKernelArg(kernel, 4, sizeof(int), &out_width);
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel,
default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
}
template class ScaleKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SCALE_OP
#include "framework/operator.h"
#include "operators/op_param.h"
......@@ -25,6 +27,9 @@ class ScaleKernel
: public framework::OpKernelBase<DeviceType, ScaleParam<DeviceType>> {
public:
void Compute(const ScaleParam<DeviceType>& param);
bool Init(ScaleParam<DeviceType>* param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -32,5 +32,7 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(scale, ops::ScaleOp);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(scale, ops::ScaleOp);
#endif
#endif
......@@ -43,6 +43,7 @@ static const char *g_inceptionv3 =
"../models/InceptionV3_Spatial_Attention_Model";
static const char *g_nlp = "../models/nlp";
static const char *g_super = "../models/superresoltion";
static const char *g_superv2 = "../models/superv2";
static const char *g_resnet_50 = "../models/resnet_50";
static const char *g_resnet = "../models/resnet";
static const char *g_googlenet_combine = "../models/googlenet_combine";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册