未验证 提交 769c8083 编写于 作者: J jameswu2014 提交者: GitHub

Merge pull request #1571 from qnqinan/develop

update some files related with static quantization in FPGA V2 track fixed#1570
...@@ -33,6 +33,9 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -33,6 +33,9 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>(); auto bn_var_ptr = param->InputVariance()->data<float>();
...@@ -56,8 +59,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -56,8 +59,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5)); static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] = new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i]; bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i]; // bs_ptr[i + channel] = new_scale_ptr[i];
bs_ptr[i] = new_bias_ptr[i]; // bs_ptr[i] = new_bias_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0;
bs_ptr[i] = new_bias_ptr[i] * 127.0 / So;
} }
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
......
...@@ -32,7 +32,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -32,7 +32,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
const int groups = param->Groups();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
vector<int> paddings = param->Paddings(); vector<int> paddings = param->Paddings();
vector<int> strides = param->Strides(); vector<int> strides = param->Strides();
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
...@@ -57,11 +60,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -57,11 +60,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5)); static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] = new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i]; bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i]; // bs_ptr[i + channel] = new_scale_ptr[i];
bs_ptr[i] = new_bias_ptr[i]; // bs_ptr[i] = new_bias_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0;
bs_ptr[i] = new_bias_ptr[i] * 127.0 / So;
if (groups == channel) {
new_scale_ptr[i] = new_scale_ptr[i] * Si / So;
new_bias_ptr[i] = new_bias_ptr[i] * 127.0f / So;
}
} }
const int groups = param->Groups();
if (groups == channel) { if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0}; fpga::DWconvArgs dwconv_arg = {0};
......
...@@ -30,6 +30,9 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { ...@@ -30,6 +30,9 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
...@@ -37,8 +40,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { ...@@ -37,8 +40,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
auto bs_ptr = auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; // bs_ptr[i + channel] = 1;
bs_ptr[i] = bias_ptr[i]; // bs_ptr[i] = bias_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0;
bs_ptr[i] = bias_ptr[i] * 127.0 / So;
} }
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
......
...@@ -30,6 +30,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -30,6 +30,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
...@@ -37,8 +40,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -37,8 +40,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto bs_ptr = auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; // bs_ptr[i + channel] = 1;
bs_ptr[i] = bias_ptr[i]; // bs_ptr[i] = bias_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0;
bs_ptr[i] = bias_ptr[i] * 127.0 / So;
} }
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
......
...@@ -32,6 +32,9 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) { ...@@ -32,6 +32,9 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
// auto bias_ptr = bias->data<float>(); // auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
// PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
// "Output channel should be equal to bias number"); // "Output channel should be equal to bias number");
...@@ -53,6 +56,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) { ...@@ -53,6 +56,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) { if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = 0; // bias_ptr[i % (channel)];
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
...@@ -62,6 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) { ...@@ -62,6 +69,10 @@ bool ConvTransposeKernel<FPGA, float>::Init(ConvTransposeParam<FPGA> *param) {
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = 0; // bias_ptr[i % (channel)];
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
...@@ -32,7 +32,9 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) { ...@@ -32,7 +32,9 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
int channel = out->dims()[1]; int channel = out->dims()[1];
...@@ -53,6 +55,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) { ...@@ -53,6 +55,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) { if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
...@@ -62,6 +68,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) { ...@@ -62,6 +68,10 @@ bool DeconvAddBNKernel<FPGA, float>::Init(FusionDeconvAddBNParam<FPGA> *param) {
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
...@@ -33,7 +33,9 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init( ...@@ -33,7 +33,9 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
int channel = out->dims()[1]; int channel = out->dims()[1];
...@@ -54,6 +56,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init( ...@@ -54,6 +56,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) { if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
...@@ -63,6 +69,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init( ...@@ -63,6 +69,10 @@ bool DeconvAddBNReluKernel<FPGA, float>::Init(
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
...@@ -32,7 +32,9 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -32,7 +32,9 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
int channel = out->dims()[1]; int channel = out->dims()[1];
...@@ -53,6 +55,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -53,6 +55,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) { if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
...@@ -62,6 +68,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -62,6 +68,10 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
...@@ -33,7 +33,9 @@ bool DeconvAddReluKernel<FPGA, float>::Init( ...@@ -33,7 +33,9 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number"); "Output channel should be equal to bias number");
int channel = out->dims()[1]; int channel = out->dims()[1];
...@@ -54,6 +56,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init( ...@@ -54,6 +56,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
if (param->Groups() == channel) { if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n); sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0}; fpga::DWDeconvArgs DWDeconv_arg = {0};
...@@ -63,6 +69,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init( ...@@ -63,6 +69,10 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
param->Paddings()[0], param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg); param->SetFpgaArgs(DWDeconv_arg);
} else { } else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So;
}
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0}; fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
......
...@@ -34,6 +34,9 @@ bool DeconvBNReluKernel<FPGA, float>::Init( ...@@ -34,6 +34,9 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter()); auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
auto bn_mean_ptr = param->InputMean()->data<float>(); auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>(); auto bn_var_ptr = param->InputVariance()->data<float>();
auto bn_scale_ptr = param->InputScale()->data<float>(); auto bn_scale_ptr = param->InputScale()->data<float>();
...@@ -56,12 +59,22 @@ bool DeconvBNReluKernel<FPGA, float>::Init( ...@@ -56,12 +59,22 @@ bool DeconvBNReluKernel<FPGA, float>::Init(
int sub_conv_n = param->Strides()[0]; int sub_conv_n = param->Strides()[0];
auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT
sizeof(float)); // NOLINT sizeof(float)); // NOLINT
// for (int i = 0; i < channel * sub_conv_n; i++) {
for (int i = 0; i < channel * sub_conv_n; i++) { // bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel];
bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel]; // bs_ptr[i] = new_bias_ptr[i % (channel)];
bs_ptr[i] = new_bias_ptr[i % (channel)]; // }
if (param->Groups() == channel) {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel] * Si / So;
bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So;
}
} else {
for (int i = 0; i < channel * sub_conv_n; i++) {
bs_ptr[i + sub_conv_n * channel] =
new_scale_ptr[i % channel] * Si / So * Sf / 127.0f;
bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So;
}
} }
PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0],
"stride_width should be equal to stride_height "); "stride_width should be equal to stride_height ");
PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3],
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/kernel/elementwise_add_kernel.h" #include "operators/kernel/elementwise_add_kernel.h"
#include <string> #include <string>
#include "fpga/V1/api.h" #include "fpga/V2/api.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -34,7 +34,11 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { ...@@ -34,7 +34,11 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto input_y_ptr = input_y->data<half>(); auto input_y_ptr = input_y->data<half>();
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
float Si_1 = input_x->scale[0];
float Si_2 = input_y->scale[0];
float So = out->scale[0];
float C1 = Si_1 / So;
float C2 = Si_2 / So;
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
// ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable; ewaddArgs.output.activation.activation_type = activation_enable;
......
...@@ -32,7 +32,11 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -32,7 +32,11 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto input_y_ptr = input_y->data<half>(); auto input_y_ptr = input_y->data<half>();
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
float Si_1 = input_x->scale[0];
float Si_2 = input_y->scale[0];
float So = out->scale[0];
float C1 = Si_1 / So;
float C2 = Si_2 / So;
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
// ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable; ewaddArgs.output.activation.activation_type = activation_enable;
......
...@@ -29,6 +29,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -29,6 +29,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
auto out = param->Out(); auto out = param->Out();
float Si = input_x->scale[0];
float Sf = filter->scale[0];
float So = out->scale[0];
// PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
// "Image channel should be equal to weight number"); // "Image channel should be equal to weight number");
...@@ -36,8 +39,10 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -36,8 +39,10 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
auto bs_ptr = auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; // bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i]; // bs_ptr[i] = input_z_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = input_z_ptr[i] * 127.0f / So;
} }
int num = (uint32_t)filter->dims()[1]; int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0]; int chw = (uint32_t)filter->dims()[0];
......
...@@ -29,6 +29,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -29,6 +29,9 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
auto input_z_ptr = input_z->data<float>(); auto input_z_ptr = input_z->data<float>();
auto out = param->Out(); auto out = param->Out();
float Si = input_x->scale[0];
float Sf = filter->scale[0];
float So = out->scale[0];
// PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0],
// "Image channel should be equal to weight number"); // "Image channel should be equal to weight number");
...@@ -36,8 +39,10 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -36,8 +39,10 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
auto bs_ptr = auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1; // bs_ptr[i + channel] = 1;
bs_ptr[i] = input_z_ptr[i]; // bs_ptr[i] = input_z_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0f;
bs_ptr[i] = input_z_ptr[i] * 127.0f / So;
} }
int num = (uint32_t)filter->dims()[1]; int num = (uint32_t)filter->dims()[1];
int chw = (uint32_t)filter->dims()[0]; int chw = (uint32_t)filter->dims()[0];
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PAD2D_OP
#include "operators/kernel/pad2d_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Pad2DKernel<FPGA, float>::Init(Pad2DParam<FPGA> *param) {
Tensor *output = param->output_;
fpga::format_fp16_ofm(output);
return true;
}
void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) {
auto input_data = (input->data<half>());
auto output_data = (output->data<half>());
auto input_c = input->dims()[1];
auto input_h = input->dims()[2];
auto input_w = input->dims()[3];
auto output_c = output->dims()[1];
auto output_w = output->dims()[3];
auto copysize = input_c * input_w;
for (int h = 0; h < input_h; ++h) {
auto input_offset = h * input_c * input_w;
auto output_offset = h * paddle_mobile::fpga::align_to_x(
output_c * output_w, IMAGE_ALIGNMENT);
memcpy((output_data + output_offset), (input_data + input_offset),
copysize * sizeof(half));
}
}
template <>
void Pad2DKernel<FPGA, float>::Compute(const Pad2DParam<FPGA> &param) {
auto in_x = param.input_;
auto out = param.output_;
fpga::fpga_invalidate((void *)in_x->data<half>(), // NOLINT
in_x->numel() * sizeof(half));
pad2dFunc(in_x, out);
(out->scale)[0] = (in_x->scale)[0];
(out->scale)[1] = (in_x->scale)[1];
DLOG << (out->scale)[0];
DLOG << (out->scale)[1];
size_t outputSize =
out->dims()[2] *
paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]),
IMAGE_ALIGNMENT) *
sizeof(half);
fpga::fpga_flush(out->data<half>(), outputSize);
}
} // namespace operators
} // namespace paddle_mobile
#endif // PAD2D_OP
...@@ -44,11 +44,13 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -44,11 +44,13 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
fpga::format_fp16_ofm(output); fpga::format_fp16_ofm(output);
auto output_ptr = output->mutable_data<half>(); auto output_ptr = output->mutable_data<half>();
float Si = input->scale[0];
float So = output->scale[0];
fpga::PoolingArgs poolArgs = {0}; fpga::PoolingArgs poolArgs = {0};
poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1 poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1
poolArgs.kernel_reciprocal = poolArgs.kernel_reciprocal = fpga::fp32_2_fp16(
fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1]))); // NOLINT float(1.0 / (ksize[0] * ksize[1]) * Si / So)); // NOLINT
poolArgs.image.address = input_ptr; poolArgs.image.address = input_ptr;
poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.channels = (uint32_t)input->dims()[1];
poolArgs.image.height = (uint32_t)input->dims()[2]; poolArgs.image.height = (uint32_t)input->dims()[2];
......
...@@ -18,8 +18,8 @@ limitations under the License. */ ...@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "operators/kernel/detection_kernel.h" #include "operators/kernel/detection_kernel.h"
#include "fpga/V1/api.h" #include "fpga/V2/api.h"
#include "fpga/V1/image.h" #include "fpga/V2/image.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -18,8 +18,8 @@ limitations under the License. */ ...@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "operators/kernel/detection_kernel.h" #include "operators/kernel/detection_kernel.h"
#include "fpga/V1/api.h" #include "fpga/V2/api.h"
#include "fpga/V1/image.h" #include "fpga/V2/image.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
......
...@@ -163,6 +163,26 @@ if (CON GREATER -1) ...@@ -163,6 +163,26 @@ if (CON GREATER -1)
set(SPLIT_OP ON) set(SPLIT_OP ON)
set(FUSION_DECONVADD_OP ON) set(FUSION_DECONVADD_OP ON)
set(FUSION_DECONVADDRELU_OP ON) set(FUSION_DECONVADDRELU_OP ON)
set(RESHAPE_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDBN_OP ON)
set(RESHAPE2_OP ON)
set(PSROI_POOL_OP ON)
set(ROIALIGN_POOL_OP ON)
set(PROPOSAL_OP ON)
set(ANCHOR_GENERATOR_OP ON)
set(SLICE_OP ON)
set(SIGMOID_OP ON)
set(CONCAT_OP ON)
set(CONV_TRANSPOSE_OP ON)
set(FUSION_DECONVADDBNRELU_OP ON)
set(FUSION_DECONVADDBN_OP ON)
set(FUSION_DECONVBNRELU_OP ON)
set(CONV_OP ON)
set(ELEMENTWISEMUL_OP ON)
set(FUSION_FCRELU_OP ON)
set(RELU_OP ON)
set(FOUND_MATCH ON) set(FOUND_MATCH ON)
endif() endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册