提交 76c60710 编写于 作者: qnqinan's avatar qnqinan 提交者: GitHub

Merge pull request #1303 from zhangyang0701/develop

add kernels for V1 for FPGA track
...@@ -12,49 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,49 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP #ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h" #include "operators/kernel/conv_add_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool ConvAddBNReluKernel<FPGA, float>::Init( bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
FusionConvAddBNReluParam<FPGA> *param) { bool relu_enabled = false;
bool relu_enabled = true;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
auto bn_bias_ptr = param->InputBias()->data<float>();
const float epsilon = param->Epsilon();
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
bias->dims()[0] == param->InputBias()->dims()[0],
"Output channel should be equal to bias number");
const int channel = out->dims()[1]; PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
int channel = out->dims()[1];
auto bs_ptr = auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
auto new_scale = new Tensor();
auto new_bias = new Tensor();
auto new_scale_ptr = new_scale->mutable_data<float>({channel});
auto new_bias_ptr = new_bias->mutable_data<float>({channel});
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
new_scale_ptr[i] = bn_scale_ptr[i] / bs_ptr[i + channel] = 1;
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5)); bs_ptr[i] = bias_ptr[i];
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i + 2] = new_scale_ptr[i];
bs_ptr[i] = new_bias_ptr[i];
} }
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
float max_value = fpga::filter_find_max(filter); float max_value = fpga::filter_find_max(filter);
fpga::format_filter(filter, max_value, param->Groups()); fpga::format_filter(filter, max_value, param->Groups());
...@@ -75,8 +57,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -75,8 +57,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
} }
template <> template <>
void ConvAddBNReluKernel<FPGA, float>::Compute( void ConvAddKernel<FPGA, float>::Compute(
const FusionConvAddBNReluParam<FPGA> &param) { const FusionConvAddParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
...@@ -11,55 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,55 +11,26 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_FCRELU_OP
#include "operators/kernel/fc_relu_kernel.h" #ifdef FUSION_DECONVADD_OP
#include "operators/kernel/deconv_add_kernel.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
bool relu_enabled = true;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY());
auto input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
auto out = param->Out();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
"Image channel should be equal to weight number");
int channel = (uint32_t)out->dims()[1];
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i];
}
int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0];
PADDLE_MOBILE_ENFORCE(
chw == input_x->numel(),
"Filter element num should be equal to IFM element num");
int height = (uint32_t)input_x->dims()[2];
int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width;
out->Resize(framework::make_ddim({1, channel, 1, 1}));
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
fpga::format_fc_data(filter, out, bs_ptr);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true; return true;
} }
template <> template <>
void FusionFcReluKernel<FPGA, float>::Compute( void DeconvAddKernel<FPGA, float>::Compute(
const FusionFcReluParam<FPGA> &param) { const FusionDeconvAddParam<FPGA> &param) {}
fpga::ComputeFpgaConv(param.FpgaArgs());
}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#endif #endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_DECONVADDRELU_OP
#include "operators/kernel/deconv_add_relu_kernel.h"
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <>
bool DeconvAddReluKernel<FPGA, float>::Init(
FusionDeconvAddReluParam<FPGA> *param) {
return true;
}
template <>
void DeconvAddReluKernel<FPGA, float>::Compute(
const FusionDeconvAddReluParam<FPGA> &param) {}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -11,60 +11,54 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,60 +11,54 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef FUSION_FCRELU_OP #ifdef ELEMENTWISEADD_OP
#include "operators/kernel/fc_relu_kernel.h"
#include "operators/kernel/elementwise_add_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
bool relu_enabled = true; bool relu_enabled = false;
auto input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto input_z = param->InputZ(); auto *out = param->Out();
auto input_z_ptr = input_z->data<float>(); auto input_x_ptr = input_x->data<float>();
auto out = param->Out(); auto input_y_ptr = input_y->data<float>();
PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
"Image channel should be equal to weight number");
int channel = (uint32_t)out->dims()[1];
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i];
}
int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0];
PADDLE_MOBILE_ENFORCE(
chw == input_x->numel(),
"Filter element num should be equal to IFM element num");
int height = (uint32_t)input_x->dims()[2];
int width = (uint32_t)input_x->dims()[3];
int filter_channel = chw / height / width;
out->Resize(framework::make_ddim({1, channel, 1, 1}));
filter->Resize(framework::make_ddim({num, filter_channel, height, width}));
float max_value = fpga::filter_find_max(filter);
fpga::format_fc_filter(filter, max_value);
int element_num_per_div = fpga::get_filter_num_per_div(filter, 1);
fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel);
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<float>();
fpga::SplitConvArgs conv_arg = {0}; fpga::EWAddArgs ewaddArgs = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, ewaddArgs.relu_enabled = relu_enabled;
0, 0, bs_ptr); ewaddArgs.const0 = 0x3c00; // =1
param->SetFpgaArgs(conv_arg); ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr;
ewaddArgs.image0.channels = (uint32_t)input_x->dims()[1];
ewaddArgs.image0.scale_address = input_x->scale;
ewaddArgs.image0.height = (uint32_t)input_x->dims()[2];
ewaddArgs.image0.width = (uint32_t)input_x->dims()[3];
ewaddArgs.image0.pad_height = 0;
ewaddArgs.image0.pad_width = 0;
ewaddArgs.image1.address = input_y_ptr;
ewaddArgs.image1.channels = (uint32_t)input_y->dims()[1];
ewaddArgs.image1.scale_address = input_y->scale;
ewaddArgs.image1.height = (uint32_t)input_y->dims()[2];
ewaddArgs.image1.width = (uint32_t)input_y->dims()[3];
ewaddArgs.image1.pad_height = 0;
ewaddArgs.image1.pad_width = 0;
ewaddArgs.output.scale_address = out->scale;
ewaddArgs.output.address = out_ptr;
param->SetFpgaArgs(ewaddArgs);
return true; return true;
} }
template <> template <>
void FusionFcReluKernel<FPGA, float>::Compute( void ElementwiseAddKernel<FPGA, float>::Compute(
const FusionFcReluParam<FPGA> &param) { const ElementwiseAddParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaEWAdd(param.FpgaArgs());
} }
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#endif #endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SPLIT_OP
#include "operators/kernel/split_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool SplitKernel<FPGA, float>::Init(SplitParam<FPGA>* param) {
return true;
}
template <>
void SplitKernel<FPGA, float>::Compute(const SplitParam<FPGA>& param) {}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef CONV_TRANSPOSE_OP #ifdef TANH_OP
#include "operators/kernel/conv_transpose_kernel.h" #include "operators/kernel/tanh_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <> template <>
bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) { bool TanhKernel<FPGA, float>::Init(TanhParam<FPGA> *param) {
return true; return true;
} }
template <> template <>
void ConvTransposeKernel<FPGA, float>::Compute( void TanhKernel<FPGA, float>::Compute(const TanhParam<FPGA> &param) {}
const ConvTransposeParam<FPGA> &param) {}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#include "operators/kernel/transpose2_kernel.h"
#include "operators/kernel/central-arm-func/transpose2_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Transpose2Kernel<FPGA, float>::Init(Transpose2Param<FPGA> *param) {
return true;
}
template <>
void Transpose2Kernel<FPGA, float>::Compute(
const Transpose2Param<FPGA> &param) {
// Transpose2Compute<float>(param);
}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -2211,7 +2211,6 @@ class DropoutParam : public OpParam { ...@@ -2211,7 +2211,6 @@ class DropoutParam : public OpParam {
}; };
#endif #endif
#ifdef CONV_TRANSPOSE_OP
template <typename Dtype> template <typename Dtype>
class ConvTransposeParam : public OpParam { class ConvTransposeParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
...@@ -2266,7 +2265,7 @@ class ConvTransposeParam : public OpParam { ...@@ -2266,7 +2265,7 @@ class ConvTransposeParam : public OpParam {
void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; }
#endif #endif
}; };
#endif
#ifdef FUSION_DECONVADD_OP #ifdef FUSION_DECONVADD_OP
template <typename Dtype> template <typename Dtype>
class FusionDeconvAddParam : public ConvTransposeParam<Dtype> { class FusionDeconvAddParam : public ConvTransposeParam<Dtype> {
......
...@@ -109,16 +109,19 @@ list(FIND NET "FPGA_NET_V1" CON) ...@@ -109,16 +109,19 @@ list(FIND NET "FPGA_NET_V1" CON)
if (CON GREATER -1) if (CON GREATER -1)
message("FPGA_NET_V1 enabled") message("FPGA_NET_V1 enabled")
set(FUSION_CONVADDRELU_OP ON) set(FUSION_CONVADDRELU_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON)
set(FUSION_ELEMENTWISEADDRELU_OP ON) set(FUSION_ELEMENTWISEADDRELU_OP ON)
set(FUSION_FC_OP ON) set(FUSION_FC_OP ON)
set(FUSION_FCRELU_OP ON)
set(POOL_OP ON) set(POOL_OP ON)
set(CONCAT_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBNRELU_OP ON)
set(FUSION_CONVBN_OP ON) set(FUSION_CONVBN_OP ON)
set(TANH_OP ON)
set(ELEMENTWISEADD_OP ON)
set(TRANSPOSE2_OP ON)
set(FUSION_CONVADD_OP ON)
set(SPLIT_OP ON)
set(FUSION_DECONVADD_OP ON)
set(FUSION_DECONVADDRELU_OP ON)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif() endif()
...@@ -132,7 +135,6 @@ if (CON GREATER -1) ...@@ -132,7 +135,6 @@ if (CON GREATER -1)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBNRELU_OP ON)
set(FUSION_CONVBN_OP ON) set(FUSION_CONVBN_OP ON)
set(CONV_TRANSPOSE_OP ON)
set(TANH_OP ON) set(TANH_OP ON)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(TRANSPOSE2_OP ON) set(TRANSPOSE2_OP ON)
...@@ -140,8 +142,6 @@ if (CON GREATER -1) ...@@ -140,8 +142,6 @@ if (CON GREATER -1)
set(SPLIT_OP ON) set(SPLIT_OP ON)
set(FUSION_DECONVADD_OP ON) set(FUSION_DECONVADD_OP ON)
set(FUSION_DECONVADDRELU_OP ON) set(FUSION_DECONVADDRELU_OP ON)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif() endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册