From 4286caa26cf862e20e50ffe537a00c085f472278 Mon Sep 17 00:00:00 2001 From: qnqinan Date: Mon, 22 Apr 2019 20:37:21 +0800 Subject: [PATCH] update some files related with static quantization in FPGA V2 track --- .../fpga/V2/conv_add_bn_relu_kernel.cpp | 6 +- .../kernel/fpga/V2/conv_transpose_kernel.cpp | 11 ++++ .../kernel/fpga/V2/deconv_add_bn_kernel.cpp | 12 +++- .../fpga/V2/deconv_add_bn_relu_kernel.cpp | 12 +++- .../kernel/fpga/V2/deconv_add_kernel.cpp | 12 +++- .../kernel/fpga/V2/deconv_add_relu_kernel.cpp | 12 +++- .../kernel/fpga/V2/deconv_bn_relu_kernel.cpp | 23 +++++-- .../kernel/fpga/V2/elementwise_add_kernel.cpp | 6 +- .../fpga/V2/elementwise_add_relu_kernel.cpp | 6 +- .../kernel/fpga/V2/fusion_fc_kernel.cpp | 9 ++- .../kernel/fpga/V2/fusion_fc_relu_kernel.cpp | 9 ++- src/operators/kernel/fpga/V2/pad2d_kernel.cpp | 61 ------------------- src/operators/kernel/fpga/V2/pool_kernel.cpp | 6 +- tools/op.cmake | 20 ++++++ 14 files changed, 126 insertions(+), 79 deletions(-) delete mode 100644 src/operators/kernel/fpga/V2/pad2d_kernel.cpp diff --git a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp index ded6654081..d16ec56d70 100644 --- a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp @@ -32,6 +32,7 @@ bool ConvAddBNReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); + const int groups = param->Groups(); float Si = input->scale[0]; float So = out->scale[0]; float Sf = fpga::filter_find_max(filter); @@ -63,9 +64,12 @@ bool ConvAddBNReluKernel::Init( // bs_ptr[i] = new_bias_ptr[i]; bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0; bs_ptr[i] = new_bias_ptr[i] * 127.0 / So; + if (groups == channel) { + new_scale_ptr[i] = new_scale_ptr[i] * Si / So; + new_bias_ptr[i] = new_bias_ptr[i] * 127.0f / So; + } } - const int groups = param->Groups(); if (groups == channel) { fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::DWconvArgs dwconv_arg = {0}; diff --git a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp index 1597885e43..76889b0dd9 100644 --- a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp +++ b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp @@ -32,6 +32,9 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { // auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], // "Output channel should be equal to bias number"); @@ -53,6 +56,10 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = 0; // bias_ptr[i % (channel)]; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -62,6 +69,10 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = 0; // bias_ptr[i % (channel)]; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp index a8205df3c9..5e3417f8c6 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp @@ -32,7 +32,9 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -53,6 +55,10 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -62,6 +68,10 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp index b27f5cf870..2913a628dd 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp @@ -33,7 +33,9 @@ bool DeconvAddBNReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -54,6 +56,10 @@ bool DeconvAddBNReluKernel::Init( PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -63,6 +69,10 @@ bool DeconvAddBNReluKernel::Init( param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp index 41844d008b..dcafcbea9c 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp @@ -32,7 +32,9 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -53,6 +55,10 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -62,6 +68,10 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp index c6fc9d1955..1364b4b5aa 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp @@ -33,7 +33,9 @@ bool DeconvAddReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -54,6 +56,10 @@ bool DeconvAddReluKernel::Init( PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -63,6 +69,10 @@ bool DeconvAddReluKernel::Init( param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp index 75597f0ecd..6aae1ea729 100644 --- a/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp @@ -34,6 +34,9 @@ bool DeconvBNReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); auto bn_scale_ptr = param->InputScale()->data(); @@ -56,12 +59,22 @@ bool DeconvBNReluKernel::Init( int sub_conv_n = param->Strides()[0]; auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT sizeof(float)); // NOLINT - - for (int i = 0; i < channel * sub_conv_n; i++) { - bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel]; - bs_ptr[i] = new_bias_ptr[i % (channel)]; + // for (int i = 0; i < channel * sub_conv_n; i++) { + // bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel]; + // bs_ptr[i] = new_bias_ptr[i % (channel)]; + // } + if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel] * Si / So; + bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So; + } + } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = + new_scale_ptr[i % channel] * Si / So * Sf / 127.0f; + bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So; + } } - PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], "stride_width should be equal to stride_height "); PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], diff --git a/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp index eec058edc4..145d7851f0 100644 --- a/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp +++ b/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp @@ -34,7 +34,11 @@ bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { auto input_y_ptr = input_y->data(); fpga::format_fp16_ofm(out); auto out_ptr = out->mutable_data(); - + float Si_1 = input_x->scale[0]; + float Si_2 = input_y->scale[0]; + float So = out->scale[0]; + float C1 = Si_1 / So; + float C2 = Si_2 / So; fpga::EWAddArgs ewaddArgs = {0}; // ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.output.activation.activation_type = activation_enable; diff --git a/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp index f36206a8a1..44266049a2 100644 --- a/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp @@ -32,7 +32,11 @@ bool ElementwiseAddReluKernel::Init( auto input_y_ptr = input_y->data(); fpga::format_fp16_ofm(out); auto out_ptr = out->mutable_data(); - + float Si_1 = input_x->scale[0]; + float Si_2 = input_y->scale[0]; + float So = out->scale[0]; + float C1 = Si_1 / So; + float C2 = Si_2 / So; fpga::EWAddArgs ewaddArgs = {0}; // ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.output.activation.activation_type = activation_enable; diff --git a/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp index 3a29104d0f..1f85beb532 100644 --- a/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp @@ -29,6 +29,9 @@ bool FusionFcKernel::Init(FusionFcParam *param) { const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); auto out = param->Out(); + float Si = input_x->scale[0]; + float Sf = filter->scale[0]; + float So = out->scale[0]; // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], // "Image channel should be equal to weight number"); @@ -36,8 +39,10 @@ bool FusionFcKernel::Init(FusionFcParam *param) { auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = input_z_ptr[i]; + // bs_ptr[i + channel] = 1; + // bs_ptr[i] = input_z_ptr[i]; + bs_ptr[i + channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = input_z_ptr[i] * 127.0f / So; } int num = (uint32_t)filter->dims()[1]; int chw = (uint32_t)filter->dims()[0]; diff --git a/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp b/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp index fef370515e..0ccec45195 100644 --- a/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp @@ -29,6 +29,9 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); auto out = param->Out(); + float Si = input_x->scale[0]; + float Sf = filter->scale[0]; + float So = out->scale[0]; // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], // "Image channel should be equal to weight number"); @@ -36,8 +39,10 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = input_z_ptr[i]; + // bs_ptr[i + channel] = 1; + // bs_ptr[i] = input_z_ptr[i]; + bs_ptr[i + channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = input_z_ptr[i] * 127.0f / So; } int num = (uint32_t)filter->dims()[1]; int chw = (uint32_t)filter->dims()[0]; diff --git a/src/operators/kernel/fpga/V2/pad2d_kernel.cpp b/src/operators/kernel/fpga/V2/pad2d_kernel.cpp deleted file mode 100644 index e5328dc319..0000000000 --- a/src/operators/kernel/fpga/V2/pad2d_kernel.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#ifdef PAD2D_OP -#include "operators/kernel/pad2d_kernel.h" -namespace paddle_mobile { -namespace operators { -template <> -bool Pad2DKernel::Init(Pad2DParam *param) { - Tensor *output = param->output_; - fpga::format_fp16_ofm(output); - return true; -} -void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) { - auto input_data = (input->data()); - auto output_data = (output->data()); - auto input_c = input->dims()[1]; - auto input_h = input->dims()[2]; - auto input_w = input->dims()[3]; - auto output_c = output->dims()[1]; - auto output_w = output->dims()[3]; - auto copysize = input_c * input_w; - for (int h = 0; h < input_h; ++h) { - auto input_offset = h * input_c * input_w; - auto output_offset = h * paddle_mobile::fpga::align_to_x( - output_c * output_w, IMAGE_ALIGNMENT); - memcpy((output_data + output_offset), (input_data + input_offset), - copysize * sizeof(half)); - } -} -template <> -void Pad2DKernel::Compute(const Pad2DParam ¶m) { - auto in_x = param.input_; - auto out = param.output_; - fpga::fpga_invalidate((void *)in_x->data(), // NOLINT - in_x->numel() * sizeof(half)); - pad2dFunc(in_x, out); - (out->scale)[0] = (in_x->scale)[0]; - (out->scale)[1] = (in_x->scale)[1]; - DLOG << (out->scale)[0]; - DLOG << (out->scale)[1]; - size_t outputSize = - out->dims()[2] * - paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]), - IMAGE_ALIGNMENT) * - sizeof(half); - fpga::fpga_flush(out->data(), outputSize); -} -} // namespace operators -} // namespace paddle_mobile -#endif // PAD2D_OP diff --git a/src/operators/kernel/fpga/V2/pool_kernel.cpp b/src/operators/kernel/fpga/V2/pool_kernel.cpp index 7c8dba1696..60bd3786aa 100644 --- a/src/operators/kernel/fpga/V2/pool_kernel.cpp +++ b/src/operators/kernel/fpga/V2/pool_kernel.cpp @@ -44,11 +44,13 @@ bool PoolKernel::Init(PoolParam *param) { auto input_ptr = input->data(); fpga::format_fp16_ofm(output); auto output_ptr = output->mutable_data(); + float Si = input->scale[0]; + float So = output->scale[0]; fpga::PoolingArgs poolArgs = {0}; poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1 - poolArgs.kernel_reciprocal = - fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1]))); // NOLINT + poolArgs.kernel_reciprocal = fpga::fp32_2_fp16( + float(1.0 / (ksize[0] * ksize[1]) * Si / So)); // NOLINT poolArgs.image.address = input_ptr; poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.height = (uint32_t)input->dims()[2]; diff --git a/tools/op.cmake b/tools/op.cmake index 5847c60e94..eb6501de22 100755 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -163,6 +163,26 @@ if (CON GREATER -1) set(SPLIT_OP ON) set(FUSION_DECONVADD_OP ON) set(FUSION_DECONVADDRELU_OP ON) + + set(RESHAPE_OP ON) + set(FUSION_CONVADDBNRELU_OP ON) + set(FUSION_CONVADDBN_OP ON) + set(RESHAPE2_OP ON) + set(PSROI_POOL_OP ON) + set(ROIALIGN_POOL_OP ON) + set(PROPOSAL_OP ON) + set(ANCHOR_GENERATOR_OP ON) + set(SLICE_OP ON) + set(SIGMOID_OP ON) + set(CONCAT_OP ON) + set(CONV_TRANSPOSE_OP ON) + set(FUSION_DECONVADDBNRELU_OP ON) + set(FUSION_DECONVADDBN_OP ON) + set(FUSION_DECONVBNRELU_OP ON) + set(CONV_OP ON) + set(ELEMENTWISEMUL_OP ON) + set(FUSION_FCRELU_OP ON) + set(RELU_OP ON) set(FOUND_MATCH ON) endif() -- GitLab