提交 c72044e0 编写于 作者: qnqinan's avatar qnqinan

update some files related with static quantization in FPGA V2 track

上级 842596ec
......@@ -32,6 +32,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
const int groups = param->Groups();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
......@@ -63,9 +64,12 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
// bs_ptr[i] = new_bias_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0;
bs_ptr[i] = new_bias_ptr[i] * 127.0 / So;
if (groups == channel) {
new_scale_ptr[i] = new_scale_ptr[i] * Si / So;
new_bias_ptr[i] = new_bias_ptr[i] * 127.0f / So;
}
}
const int groups = param->Groups();
if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0};
......
......@@ -32,6 +32,9 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
// auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
// PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
// "Output channel should be equal to bias number");
......@@ -53,6 +56,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = 0; // bias_ptr[i % (channel)];
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
......@@ -62,6 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = 0; // bias_ptr[i % (channel)];
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
......@@ -32,7 +32,9 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
int channel = out->dims()[1];
......@@ -53,6 +55,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
......@@ -62,6 +68,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
......@@ -33,7 +33,9 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
int channel = out->dims()[1];
......@@ -54,6 +56,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
......@@ -63,6 +69,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
......@@ -32,7 +32,9 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
int channel = out->dims()[1];
......@@ -53,6 +55,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
......@@ -62,6 +68,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
......@@ -33,7 +33,9 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
int channel = out->dims()[1];
......@@ -54,6 +56,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
......@@ -63,6 +69,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
......@@ -34,6 +34,9 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>();
......@@ -56,12 +59,22 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
int sub_conv_n = param->Strides()[0];
auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT
sizeof(float)); // NOLINT
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel];
bs_ptr[i] = new_bias_ptr[i % (channel)];
// for (int i = 0; i < channel * sub_conv_n; i++) {
// bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel];
// bs_ptr[i] = new_bias_ptr[i % (channel)];
// }
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel] * Si / So;
bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So;
}
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] =
new_scale_ptr[i % channel] * Si / So * Sf / 127.0f;
bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So;
}
}
PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
"stride_width should be equal to stride_height ");
PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
......
......@@ -34,7 +34,11 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto input_y_ptr = input_y->data<half>();
fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<half>();
float Si_1 = input_x->scale[0];
float Si_2 = input_y->scale[0];
float So = out->scale[0];
float C1 = Si_1 / So;
float C2 = Si_2 / So;
fpga::EWAddArgs ewaddArgs = {0};
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
......
......@@ -32,7 +32,11 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto input_y_ptr = input_y->data<half>();
fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<half>();
float Si_1 = input_x->scale[0];
float Si_2 = input_y->scale[0];
float So = out->scale[0];
float C1 = Si_1 / So;
float C2 = Si_2 / So;
fpga::EWAddArgs ewaddArgs = {0};
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
......
......@@ -29,6 +29,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
auto out = param->Out();
float Si = input_x->scale[0];
float Sf = filter->scale[0];
float So = out->scale[0];
// PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
// "Image channel should be equal to weight number");
......@@ -36,8 +39,10 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i];
// bs_ptr[i + channel] = 1;
// bs_ptr[i] = input_z_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = input_z_ptr[i] * 127.0f / So;
}
int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0];
......
......@@ -29,6 +29,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>();
auto out = param->Out();
float Si = input_x->scale[0];
float Sf = filter->scale[0];
float So = out->scale[0];
// PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
// "Image channel should be equal to weight number");
......@@ -36,8 +39,10 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i];
// bs_ptr[i + channel] = 1;
// bs_ptr[i] = input_z_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = input_z_ptr[i] * 127.0f / So;
}
int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0];
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PAD2D_OP
#include "operators/kernel/pad2d_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Pad2DKernel<FPGA, float>::Init(Pad2DParam<FPGA> *param) {
Tensor *output = param->output_;
fpga::format_fp16_ofm(output);
return true;
}
void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
auto input_data = (input->data<half>());
auto output_data = (output->data<half>());
auto input_c = input->dims()[1];
auto input_h = input->dims()[2];
auto input_w = input->dims()[3];
auto output_c = output->dims()[1];
auto output_w = output->dims()[3];
auto copysize = input_c * input_w;
for (int h = 0; h < input_h; ++h) {
auto input_offset = h * input_c * input_w;
auto output_offset = h * paddle_mobile::fpga::align_to_x(
output_c * output_w, IMAGE_ALIGNMENT);
memcpy((output_data + output_offset), (input_data + input_offset),
copysize * sizeof(half));
}
}
template <>
void Pad2DKernel<FPGA, float>::Compute(const Pad2DParam<FPGA> &param) {
auto in_x = param.input_;
auto out = param.output_;
fpga::fpga_invalidate((void *)in_x->data<half>(), // NOLINT
in_x->numel() * sizeof(half));
pad2dFunc(in_x, out);
(out->scale)[0] = (in_x->scale)[0];
(out->scale)[1] = (in_x->scale)[1];
DLOG << (out->scale)[0];
DLOG << (out->scale)[1];
size_t outputSize =
out->dims()[2] *
paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]),
IMAGE_ALIGNMENT) *
sizeof(half);
fpga::fpga_flush(out->data<half>(), outputSize);
}
} // namespace operators
} // namespace paddle_mobile
#endif // PAD2D_OP
......@@ -44,11 +44,13 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
auto input_ptr = input->data<half>();
fpga::format_fp16_ofm(output);
auto output_ptr = output->mutable_data<half>();
float Si = input->scale[0];
float So = output->scale[0];
fpga::PoolingArgs poolArgs = {0};
poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1
poolArgs.kernel_reciprocal =
fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1]))); // NOLINT
poolArgs.kernel_reciprocal = fpga::fp32_2_fp16(
float(1.0 / (ksize[0] * ksize[1]) * Si / So)); // NOLINT
poolArgs.image.address = input_ptr;
poolArgs.image.channels = (uint32_t)input->dims()[1];
poolArgs.image.height = (uint32_t)input->dims()[2];
......
......@@ -163,6 +163,26 @@ if (CON GREATER -1)
set(SPLIT_OP ON)
set(FUSION_DECONVADD_OP ON)
set(FUSION_DECONVADDRELU_OP ON)
set(RESHAPE_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON)
set(RESHAPE2_OP ON)
set(PSROI_POOL_OP ON)
set(ROIALIGN_POOL_OP ON)
set(PROPOSAL_OP ON)
set(ANCHOR_GENERATOR_OP ON)
set(SLICE_OP ON)
set(SIGMOID_OP ON)
set(CONCAT_OP ON)
set(CONV_TRANSPOSE_OP ON)
set(FUSION_DECONVADDBNRELU_OP ON)
set(FUSION_DECONVADDBN_OP ON)
set(FUSION_DECONVBNRELU_OP ON)
set(CONV_OP ON)
set(ELEMENTWISEMUL_OP ON)
set(FUSION_FCRELU_OP ON)
set(RELU_OP ON)
set(FOUND_MATCH ON)
endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册