diff --git a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp index ded66540814e23d0fe5fdafed0458b7ff072f5c7..d16ec56d701ae5a861bdca7f1a334da80073c78f 100644 --- a/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/conv_add_bn_relu_kernel.cpp @@ -32,6 +32,7 @@ bool ConvAddBNReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); + const int groups = param->Groups(); float Si = input->scale[0]; float So = out->scale[0]; float Sf = fpga::filter_find_max(filter); @@ -63,9 +64,12 @@ bool ConvAddBNReluKernel::Init( // bs_ptr[i] = new_bias_ptr[i]; bs_ptr[i + channel] = new_scale_ptr[i] * Si / So * Sf / 127.0; bs_ptr[i] = new_bias_ptr[i] * 127.0 / So; + if (groups == channel) { + new_scale_ptr[i] = new_scale_ptr[i] * Si / So; + new_bias_ptr[i] = new_bias_ptr[i] * 127.0f / So; + } } - const int groups = param->Groups(); if (groups == channel) { fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::DWconvArgs dwconv_arg = {0}; diff --git a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp index 1597885e43e01895b6acd425031341af70d5eaf7..76889b0dd914c7e61a91c1de9c35faf9baf42648 100644 --- a/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp +++ b/src/operators/kernel/fpga/V2/conv_transpose_kernel.cpp @@ -32,6 +32,9 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { // auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); // PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], // "Output channel should be equal to bias number"); @@ -53,6 +56,10 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = 0; // bias_ptr[i % (channel)]; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -62,6 +69,10 @@ bool ConvTransposeKernel::Init(ConvTransposeParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = 0; // bias_ptr[i % (channel)]; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp index a8205df3c9c1052055ba15ca58fd215f1d49ba0e..5e3417f8c69ea901d9d671981d543c6ce3e50340 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_bn_kernel.cpp @@ -32,7 +32,9 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -53,6 +55,10 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -62,6 +68,10 @@ bool DeconvAddBNKernel::Init(FusionDeconvAddBNParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp index b27f5cf870d2e3220bec31ee63bb27361cb2c8cf..2913a628dddb9e89ce5dd327cb500f7c3e2d4390 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_bn_relu_kernel.cpp @@ -33,7 +33,9 @@ bool DeconvAddBNReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -54,6 +56,10 @@ bool DeconvAddBNReluKernel::Init( PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -63,6 +69,10 @@ bool DeconvAddBNReluKernel::Init( param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp index 41844d008b2c8313fc8f1ac75a00d9864b5a20a5..dcafcbea9c5f995af1a129ca0fd16d96aea8f79c 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_kernel.cpp @@ -32,7 +32,9 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -53,6 +55,10 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -62,6 +68,10 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp index c6fc9d195511ae3218450fa58393ba420444eb92..1364b4b5aa8b00728ab2dc63d2507449f7e96200 100644 --- a/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_add_relu_kernel.cpp @@ -33,7 +33,9 @@ bool DeconvAddReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); - + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0], "Output channel should be equal to bias number"); int channel = out->dims()[1]; @@ -54,6 +56,10 @@ bool DeconvAddReluKernel::Init( PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DWDeconvArgs DWDeconv_arg = {0}; @@ -63,6 +69,10 @@ bool DeconvAddReluKernel::Init( param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(DWDeconv_arg); } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = bias_ptr[i % (channel)] * 127.0f / So; + } fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); fpga::DeconvArgs deconv_arg = {0}; fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, diff --git a/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp index 75597f0ecd570b6b21894a2f9a0ff0ad91a54ea4..6aae1ea7298e7d927f2db0dd89399ca4817a38b7 100644 --- a/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/deconv_bn_relu_kernel.cpp @@ -34,6 +34,9 @@ bool DeconvBNReluKernel::Init( auto bias_ptr = bias->data(); auto filter = const_cast(param->Filter()); auto out = param->Output(); + float Si = input->scale[0]; + float So = out->scale[0]; + float Sf = fpga::filter_find_max(filter); auto bn_mean_ptr = param->InputMean()->data(); auto bn_var_ptr = param->InputVariance()->data(); auto bn_scale_ptr = param->InputScale()->data(); @@ -56,12 +59,22 @@ bool DeconvBNReluKernel::Init( int sub_conv_n = param->Strides()[0]; auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sub_conv_n * // NOLINT sizeof(float)); // NOLINT - - for (int i = 0; i < channel * sub_conv_n; i++) { - bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel]; - bs_ptr[i] = new_bias_ptr[i % (channel)]; + // for (int i = 0; i < channel * sub_conv_n; i++) { + // bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel]; + // bs_ptr[i] = new_bias_ptr[i % (channel)]; + // } + if (param->Groups() == channel) { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = new_scale_ptr[i % channel] * Si / So; + bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So; + } + } else { + for (int i = 0; i < channel * sub_conv_n; i++) { + bs_ptr[i + sub_conv_n * channel] = + new_scale_ptr[i % channel] * Si / So * Sf / 127.0f; + bs_ptr[i] = new_bias_ptr[i % (channel)] * 127.0f / So; + } } - PADDLE_MOBILE_ENFORCE(param->Strides()[1] == param->Strides()[0], "stride_width should be equal to stride_height "); PADDLE_MOBILE_ENFORCE(filter->dims()[2] == filter->dims()[3], diff --git a/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp index eec058edc40f23dbbd98e12b084d6c232eaf08e4..145d7851f0bba14ac3fcba3d858e91a826683871 100644 --- a/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp +++ b/src/operators/kernel/fpga/V2/elementwise_add_kernel.cpp @@ -34,7 +34,11 @@ bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { auto input_y_ptr = input_y->data(); fpga::format_fp16_ofm(out); auto out_ptr = out->mutable_data(); - + float Si_1 = input_x->scale[0]; + float Si_2 = input_y->scale[0]; + float So = out->scale[0]; + float C1 = Si_1 / So; + float C2 = Si_2 / So; fpga::EWAddArgs ewaddArgs = {0}; // ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.output.activation.activation_type = activation_enable; diff --git a/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp index f36206a8a15451144a00a16aad176ca67c4a4114..44266049a27c6110346a328e81713486942f4110 100644 --- a/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/elementwise_add_relu_kernel.cpp @@ -32,7 +32,11 @@ bool ElementwiseAddReluKernel::Init( auto input_y_ptr = input_y->data(); fpga::format_fp16_ofm(out); auto out_ptr = out->mutable_data(); - + float Si_1 = input_x->scale[0]; + float Si_2 = input_y->scale[0]; + float So = out->scale[0]; + float C1 = Si_1 / So; + float C2 = Si_2 / So; fpga::EWAddArgs ewaddArgs = {0}; // ewaddArgs.relu_enabled = relu_enabled; ewaddArgs.output.activation.activation_type = activation_enable; diff --git a/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp index 3a29104d0fe0e3c69c9369fb1137b2c94ef04e43..1f85beb532a805e69418bf4edffe66c178764769 100644 --- a/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/V2/fusion_fc_kernel.cpp @@ -29,6 +29,9 @@ bool FusionFcKernel::Init(FusionFcParam *param) { const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); auto out = param->Out(); + float Si = input_x->scale[0]; + float Sf = filter->scale[0]; + float So = out->scale[0]; // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], // "Image channel should be equal to weight number"); @@ -36,8 +39,10 @@ bool FusionFcKernel::Init(FusionFcParam *param) { auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = input_z_ptr[i]; + // bs_ptr[i + channel] = 1; + // bs_ptr[i] = input_z_ptr[i]; + bs_ptr[i + channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = input_z_ptr[i] * 127.0f / So; } int num = (uint32_t)filter->dims()[1]; int chw = (uint32_t)filter->dims()[0]; diff --git a/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp b/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp index fef370515e9e9ffa1d90c184e62919235533b8a5..0ccec45195ec2efa90ae097d57547373a3253843 100644 --- a/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V2/fusion_fc_relu_kernel.cpp @@ -29,6 +29,9 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { const Tensor *input_z = param->InputZ(); auto input_z_ptr = input_z->data(); auto out = param->Out(); + float Si = input_x->scale[0]; + float Sf = filter->scale[0]; + float So = out->scale[0]; // PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], // "Image channel should be equal to weight number"); @@ -36,8 +39,10 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { auto bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = input_z_ptr[i]; + // bs_ptr[i + channel] = 1; + // bs_ptr[i] = input_z_ptr[i]; + bs_ptr[i + channel] = Si / So * Sf / 127.0f; + bs_ptr[i] = input_z_ptr[i] * 127.0f / So; } int num = (uint32_t)filter->dims()[1]; int chw = (uint32_t)filter->dims()[0]; diff --git a/src/operators/kernel/fpga/V2/pad2d_kernel.cpp b/src/operators/kernel/fpga/V2/pad2d_kernel.cpp deleted file mode 100644 index e5328dc31978ad4da6ba2872881be52c0975e692..0000000000000000000000000000000000000000 --- a/src/operators/kernel/fpga/V2/pad2d_kernel.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#ifdef PAD2D_OP -#include "operators/kernel/pad2d_kernel.h" -namespace paddle_mobile { -namespace operators { -template <> -bool Pad2DKernel::Init(Pad2DParam *param) { - Tensor *output = param->output_; - fpga::format_fp16_ofm(output); - return true; -} -void pad2dFunc(const framework::Tensor *input, framework::Tensor *output) { - auto input_data = (input->data()); - auto output_data = (output->data()); - auto input_c = input->dims()[1]; - auto input_h = input->dims()[2]; - auto input_w = input->dims()[3]; - auto output_c = output->dims()[1]; - auto output_w = output->dims()[3]; - auto copysize = input_c * input_w; - for (int h = 0; h < input_h; ++h) { - auto input_offset = h * input_c * input_w; - auto output_offset = h * paddle_mobile::fpga::align_to_x( - output_c * output_w, IMAGE_ALIGNMENT); - memcpy((output_data + output_offset), (input_data + input_offset), - copysize * sizeof(half)); - } -} -template <> -void Pad2DKernel::Compute(const Pad2DParam ¶m) { - auto in_x = param.input_; - auto out = param.output_; - fpga::fpga_invalidate((void *)in_x->data(), // NOLINT - in_x->numel() * sizeof(half)); - pad2dFunc(in_x, out); - (out->scale)[0] = (in_x->scale)[0]; - (out->scale)[1] = (in_x->scale)[1]; - DLOG << (out->scale)[0]; - DLOG << (out->scale)[1]; - size_t outputSize = - out->dims()[2] * - paddle_mobile::fpga::align_to_x((out->dims()[1]) * (out->dims()[3]), - IMAGE_ALIGNMENT) * - sizeof(half); - fpga::fpga_flush(out->data(), outputSize); -} -} // namespace operators -} // namespace paddle_mobile -#endif // PAD2D_OP diff --git a/src/operators/kernel/fpga/V2/pool_kernel.cpp b/src/operators/kernel/fpga/V2/pool_kernel.cpp index 7c8dba1696ecc15ba9748aabf1973445d23de95c..60bd3786aa589eb300a7453325be55d839500754 100644 --- a/src/operators/kernel/fpga/V2/pool_kernel.cpp +++ b/src/operators/kernel/fpga/V2/pool_kernel.cpp @@ -44,11 +44,13 @@ bool PoolKernel::Init(PoolParam *param) { auto input_ptr = input->data(); fpga::format_fp16_ofm(output); auto output_ptr = output->mutable_data(); + float Si = input->scale[0]; + float So = output->scale[0]; fpga::PoolingArgs poolArgs = {0}; poolArgs.mode = pooling_type == "max" ? 0 : 1; // max:0, avg:1 - poolArgs.kernel_reciprocal = - fpga::fp32_2_fp16(float(1.0 / (ksize[0] * ksize[1]))); // NOLINT + poolArgs.kernel_reciprocal = fpga::fp32_2_fp16( + float(1.0 / (ksize[0] * ksize[1]) * Si / So)); // NOLINT poolArgs.image.address = input_ptr; poolArgs.image.channels = (uint32_t)input->dims()[1]; poolArgs.image.height = (uint32_t)input->dims()[2]; diff --git a/tools/op.cmake b/tools/op.cmake index 5847c60e9471d74dbaf4bdb415518af2a4de797c..eb6501de2232ae160acded85931c31122055ab4e 100755 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -163,6 +163,26 @@ if (CON GREATER -1) set(SPLIT_OP ON) set(FUSION_DECONVADD_OP ON) set(FUSION_DECONVADDRELU_OP ON) + + set(RESHAPE_OP ON) + set(FUSION_CONVADDBNRELU_OP ON) + set(FUSION_CONVADDBN_OP ON) + set(RESHAPE2_OP ON) + set(PSROI_POOL_OP ON) + set(ROIALIGN_POOL_OP ON) + set(PROPOSAL_OP ON) + set(ANCHOR_GENERATOR_OP ON) + set(SLICE_OP ON) + set(SIGMOID_OP ON) + set(CONCAT_OP ON) + set(CONV_TRANSPOSE_OP ON) + set(FUSION_DECONVADDBNRELU_OP ON) + set(FUSION_DECONVADDBN_OP ON) + set(FUSION_DECONVBNRELU_OP ON) + set(CONV_OP ON) + set(ELEMENTWISEMUL_OP ON) + set(FUSION_FCRELU_OP ON) + set(RELU_OP ON) set(FOUND_MATCH ON) endif()