提交 048cb9a7 编写于 作者: qnqinan's avatar qnqinan

update some files in FPGA V2

上级 7d999238
......@@ -33,6 +33,9 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
auto bn_mean_ptr = param->InputMean()->data<float>();
auto bn_var_ptr = param->InputVariance()->data<float>();
......@@ -56,8 +59,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i];
bs_ptr[i] = new_bias_ptr[i];
// bs_ptr[i + channel] = new_scale_ptr[i];
// bs_ptr[i] = new_bias_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0;
bs_ptr[i] = new_bias_ptr[i] * 127.0 / So;
}
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
......
......@@ -32,7 +32,9 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
vector<int> paddings = param->Paddings();
vector<int> strides = param->Strides();
auto bn_mean_ptr = param->InputMean()->data<float>();
......@@ -57,8 +59,10 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
new_bias_ptr[i] =
bn_bias_ptr[i] + (bias_ptr[i] - bn_mean_ptr[i]) * new_scale_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i];
bs_ptr[i] = new_bias_ptr[i];
// bs_ptr[i + channel] = new_scale_ptr[i];
// bs_ptr[i] = new_bias_ptr[i];
bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0;
bs_ptr[i] = new_bias_ptr[i] * 127.0 / So;
}
const int groups = param->Groups();
......
......@@ -30,6 +30,9 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
......@@ -37,8 +40,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = bias_ptr[i];
// bs_ptr[i + channel] = 1;
// bs_ptr[i] = bias_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0;
bs_ptr[i] = bias_ptr[i] * 127.0 / So;
}
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
......
......@@ -30,6 +30,9 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto bias_ptr = bias->data<float>();
auto filter = const_cast<LoDTensor *>(param->Filter());
auto out = param->Output();
float Si = input->scale[0];
float So = out->scale[0];
float Sf = fpga::filter_find_max(filter);
PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
"Output channel should be equal to bias number");
......@@ -37,8 +40,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto bs_ptr =
(float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT
for (int i = 0; i < channel; i++) {
bs_ptr[i + channel] = 1;
bs_ptr[i] = bias_ptr[i];
// bs_ptr[i + channel] = 1;
// bs_ptr[i] = bias_ptr[i];
bs_ptr[i + channel] = Si / So * Sf / 127.0;
bs_ptr[i] = bias_ptr[i] * 127.0 / So;
}
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/kernel/elementwise_add_kernel.h"
#include <string>
#include "fpga/V1/api.h"
#include "fpga/V2/api.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector>
#include "operators/kernel/detection_kernel.h"
#include "fpga/V1/api.h"
#include "fpga/V1/image.h"
#include "fpga/V2/api.h"
#include "fpga/V2/image.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector>
#include "operators/kernel/detection_kernel.h"
#include "fpga/V1/api.h"
#include "fpga/V1/image.h"
#include "fpga/V2/api.h"
#include "fpga/V2/image.h"
namespace paddle_mobile {
namespace operators {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册