提交 3ea21620 编写于 作者: N NazgulLee 提交者: Yanzhan Yang

fusion instancenorm and relu. test=develop (#1986)

上级 9796c57d
......@@ -132,6 +132,7 @@ const char *G_OP_TYPE_WHILE = "while";
const char *G_OP_TYPE_BEAM_SEARCH_DECODE = "beam_search_decode";
const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE =
"fill_constant_batch_size_like";
const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu";
std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......@@ -155,6 +156,7 @@ std::unordered_map<
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_INSTANCENORM, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_INSTANCENORM_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}},
......
......@@ -257,8 +257,7 @@ extern const char *G_OP_TYPE_PAD2D;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU;
extern const char *G_OP_TYPE_PAD2D;
extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU;
extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_INSTANCENORM_RELU_OP
#include "operators/fusion_instancenorm_relu_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void FusionInstanceNormReluOp<Dtype, T>::InferShape() const {
auto x_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dims);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
REGISTER_FUSION_MATCHER(fusion_instancenorm_relu,
ops::FusionInstanceNormReluMatcher);
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_instancenorm_relu, ops::FusionInstanceNormReluOp);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_INSTANCENORM_RELU_OP
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/instancenorm_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
class FusionInstanceNormReluMatcher : public framework::FusionOpMatcher {
public:
FusionInstanceNormReluMatcher() {
node_ = framework::Node(G_OP_TYPE_INSTANCENORM);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
}
void FolderNodes(
framework::Node *node,
std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
node->Folder(node_.Depth(), Type(), {}, removed_nodes);
}
std::string Type() { return G_OP_TYPE_FUSION_INSTANCENORM_RELU; }
};
template <typename DeviceType, typename T>
class FusionInstanceNormReluOp
: public framework::OperatorWithKernel<
DeviceType, InstanceNormParam<DeviceType>,
operators::InstanceNormReluKernel<DeviceType, T>> {
public:
FusionInstanceNormReluOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
framework::Scope *scope)
: framework::OperatorWithKernel<
DeviceType, InstanceNormParam<DeviceType>,
operators::InstanceNormReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#include "cl_common.h"
__kernel void instancenorm(__private const int in_width,
__private const int in_height,
......@@ -109,7 +109,11 @@ __kernel void instancenorm(__private const int in_width,
for (int yIndex = h; yIndex < in_height; yIndex += local_work_size_y) {
int2 intout_pos = (int2)(mad24(c, in_width, xIndex), mad24(n, in_height, yIndex));
float4 in_val = read_imagef(input, sampler, intout_pos);
write_imageh(output, intout_pos, convert_half4((in_val - mean_val) * s));
half4 out_val = convert_half4((in_val - mean_val) * s);
#ifdef RELU
out_val = activation(out_val);
#endif
write_imageh(output, intout_pos, out_val);
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_INSTANCENORM_RELU_OP
#include "operators/kernel/instancenorm_relu_kernel.h"
#include <cmath>
namespace paddle_mobile {
namespace operators {
template <>
bool InstanceNormReluKernel<GPU_CL, float>::Init(
InstanceNormParam<GPU_CL> *param) {
const std::string build_options = "-DRELU";
this->cl_helper_.AddKernel("instancenorm", "instancenorm_kernel.cl",
build_options);
return true;
}
template <>
void InstanceNormReluKernel<GPU_CL, float>::Compute(
const InstanceNormParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto &dims = param.Out()->dims();
const int n = dims[0];
const int c_group = (dims[1] + 3) / 4;
const int h = dims[2];
const int w = dims[3];
auto epsilon = param.Epsilon();
auto input = param.InputX()->GetCLImage();
auto out = param.Out()->GetCLImage();
DLOG << "Epsilon: " << epsilon;
auto local_work_size_info = this->cl_helper_.LocalWorkSizeInfo();
DLOG << local_work_size_info.max_work_group_size;
DLOG << local_work_size_info.max_work_item_size0;
DLOG << local_work_size_info.max_work_item_size1;
DLOG << local_work_size_info.max_work_item_size2;
const int max_work_group_size =
std::min(256, static_cast<int>(local_work_size_info.max_work_group_size));
int local_work_size1 = 1;
int local_work_size2 = 1;
for (int i = 1; i <= local_work_size_info.max_work_item_size1 && i <= w;
i++) {
for (int j = 1; j <= local_work_size_info.max_work_item_size2 && j <= h;
j++) {
if (i * j <= max_work_group_size) {
if (i * j > local_work_size1 * local_work_size2) {
local_work_size1 = i;
local_work_size2 = j;
}
}
}
}
const size_t work_size[3] = {(size_t)(n * c_group), (size_t)local_work_size1,
(size_t)local_work_size2};
const size_t local_work_size[3] = {(size_t)1, (size_t)local_work_size1,
(size_t)local_work_size2};
DLOG << "work_size" << work_size[0] << " " << work_size[1] << " "
<< work_size[2];
DLOG << "local_work_size" << local_work_size[0] << " " << local_work_size[1]
<< " " << local_work_size[2];
cl_int status;
clSetKernelArg(kernel, 0, sizeof(cl_int), &w);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 1, sizeof(cl_int), &h);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 2, sizeof(cl_int), &c_group);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 3, sizeof(cl_int), &local_work_size1);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 4, sizeof(cl_int), &local_work_size2);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 5, sizeof(cl_float), &epsilon);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status);
clSetKernelArg(kernel, 7, sizeof(cl_mem), &out);
CL_CHECK_ERRORS(status);
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
work_size, local_work_size, 0, NULL, NULL);
}
template class InstanceNormReluKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef FUSION_INSTANCENORM_RELU_OP
#include <vector>
#include "framework/operator.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using framework::OpKernelBase;
template <typename DeviceType, typename T>
class InstanceNormReluKernel
: public OpKernelBase<DeviceType, InstanceNormParam<DeviceType>> {
public:
void Compute(const InstanceNormParam<DeviceType> &param);
bool Init(InstanceNormParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -377,6 +377,7 @@ if(NOT FOUND_MATCH)
set(FILL_CONSTANT_BATCH_SIZE_LIKE_OP ON)
set(RANGE_OP ON)
set(REDUCE_PROD_OP ON)
set(FUSION_INSTANCENORM_RELU_OP ON)
endif()
# option(BATCHNORM_OP "" ON)
......@@ -413,6 +414,9 @@ endif()
if (INSTANCENORM_OP)
add_definitions(-DINSTANCENORM_OP)
endif()
if (FUSION_INSTANCENORM_RELU_OP)
add_definitions(-DFUSION_INSTANCENORM_RELU_OP)
endif()
if (BOXCODER_OP)
add_definitions(-DBOXCODER_OP)
endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册