diff --git a/CMakeLists.txt b/CMakeLists.txt index 9268c9a2d1ab3791805c539eb408560bc3aaff26..a98d815943cf4d4bb3d632ccfcb83fc7818e047d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,9 +2,9 @@ cmake_minimum_required(VERSION 3.0) project(paddle-mobile) # select the platform to build -option(CPU "armv7 with neon support" ON) +option(CPU "armv7 with neon support" OFF) option(MALI_GPU "mali gpu support" OFF) -option(FPGA "fpga support" OFF) +option(FPGA "fpga support" ON) option(USE_OPENMP "openmp support" OFF) option(DEBUGING "enable debug mode" ON) @@ -29,7 +29,10 @@ if(DEBUGING) message(STATUS "debugging mode") add_definitions(-DPADDLE_MOBILE_DEBUG) else() - add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden) + if(FPGA) + else() + add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden) + endif() endif() if(USE_EXCEPTION) @@ -93,8 +96,7 @@ else() endif() if(FPGA) - set(DEBUGING ON) - add_definitions(-DPADDLE_MOBILE_DEBUG) + message("FPGA mode enabled") add_definitions(-DPADDLE_MOBILE_FPGA) else() file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc) @@ -177,6 +179,10 @@ if(DEBUGING) else() add_subdirectory(test) endif() +elseif(FPGA) + add_subdirectory(test) endif() + + diff --git a/src/common/variant.h b/src/common/variant.h index ca2fcc090769bc49603176dc361d5f8c8e22890c..2d81160a840668e26ab052afbdd05367cde5189a 100644 --- a/src/common/variant.h +++ b/src/common/variant.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include - +#include #include "common/enforce.h" #include "common/log.h" diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp index 725895ae6a3da161af545646c2a74bda16be532f..97746d0b203523b9337af17346b623d96dbf5a88 100644 --- a/src/fpga/api.cpp +++ b/src/fpga/api.cpp @@ -22,7 +22,7 @@ limitations under the License. */ #include "fpga/filter.h" #include "fpga/image.h" #define FPGA_TEST_MODE -//#define PADDLE_MOBILE_OS_LINUX +// #define PADDLE_MOBILE_OS_LINUX namespace paddle_mobile { namespace fpga { @@ -149,7 +149,7 @@ int ComputeBasicConv(const struct ConvArgs &args) { return do_ioctl(IOCTL_CONFIG_CONV, &args); } -int ComputeFpgaConv(const struct WrapperConvArgs &args) { +int ComputeFpgaConv(const struct SplitConvArgs &args) { #ifdef FPGA_TEST_MODE DLOG << "=============ComputeFPGAConv==========="; DLOG << " filter_num:" << args.filter_num @@ -194,8 +194,8 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { #ifdef FPGA_TEST_MODE DLOG << "=============ComputeFpgaEWAdd==========="; DLOG << " relu_enabled:" << args.relu_enabled - << " const0:" << fp16_2_fp32(short(args.const0)) - << " const1:" << fp16_2_fp32(short(args.const1)); + << " const0:" << fp16_2_fp32(int16_t(args.const0)) + << " const1:" << fp16_2_fp32(int16_t(args.const1)); DLOG << " image0_address:" << args.image0.address << " image0_scale_address:" << args.image0.scale_address << " image0_channels:" << args.image0.channels @@ -383,10 +383,10 @@ void format_concat_output(framework::Tensor *out, int height, int width, out->reset_data_ptr(data_ptr); } -void fill_conv_arg(struct WrapperConvArgs *arg, framework::Tensor *input, - framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int group_num, int stride_h, int stride_w, - int padding_h, int padding_w, float *bs_ptr) { +void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, + framework::Tensor *out, framework::Tensor *filter, + bool relu_enabled, int group_num, int stride_h, + int stride_w, int padding_h, int padding_w, float *bs_ptr) { auto input_ptr = input->data(); auto filter_ptr = filter->data(); auto out_ptr = out->data(); diff --git a/src/fpga/api.h b/src/fpga/api.h index a4f71e119c83de40771f321abfc8bb2821e4523a..f535975a35ecc3c454bbac597b31d8c3670cbf91 100644 --- a/src/fpga/api.h +++ b/src/fpga/api.h @@ -89,7 +89,7 @@ struct ConcatArgs { uint32_t width; }; -struct WrapperConvArgs { +struct SplitConvArgs { uint32_t split_num; uint32_t group_num; uint32_t filter_num; @@ -98,6 +98,14 @@ struct WrapperConvArgs { struct ConcatArgs concat_arg; }; +struct GroupConvArgs { + uint32_t group_num; + uint32_t filter_num; + struct ImageOutputArgs output; + struct SplitConvArgs* conv_args; + struct ConcatArgs concat_arg; +}; + struct PoolingArgs { int16_t mode; // mode: 0:max, 1:avg half kernel_reciprocal; @@ -159,30 +167,6 @@ struct MemoryCacheArgs { #define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 28, struct FpgaRegReadArgs) #define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 29, struct FpgaRegWriteArgs) -enum FPGA_ERR_TYPE { - ERR_IOCTL_CMD = -1, - ERR_TIMEOUT = -2, - ERR_COMPLETION_TIMEOUT = -3, - ERR_INVALID_FPGA_ADDR = -4, - ERR_NOMEM = -5, - ERR_NO_RESERVE_MEM = -6, - ERR_COPY_FROM_USER = -7, - ERR_COPY_TO_USER = -8, - ERR_DEL_TIMER = -9, - ERR_ENABLE_MSI = -10, - ERR_REGISTER_IRQ = -11, - ERR_PCIE_REGISTER = -12, - ERR_PCIE_PROBE = -13, - ERR_REGISTER_BLOCK = -14, - ERR_ALLOC_GENDISK = -15, - ERR_INIT_QUEUE = -16, - ERR_WAIT = -17, - ERR_ECC_ERROR = -31, - ERR_FPGA_FAIL_STOP = -64, - ERR_FPGA_DEBUG_STOP = -113, - DEV_TMP_UNAVAILABLE = -128 -}; - //============================== API ============================= int open_device(); @@ -195,7 +179,7 @@ int fpga_flush(void* address, size_t size); int fpga_invalidate(void* address, size_t size); int PerformBypass(const struct BypassArgs& args); -int ComputeFpgaConv(const struct WrapperConvArgs& args); +int ComputeFpgaConv(const struct SplitConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaEWAdd(const struct EWAddArgs& args); int ComputeFPGAConcat(const struct ConcatArgs& args); @@ -220,10 +204,10 @@ void format_bias_scale_array(float** bias_scale_array, void format_concat_output(framework::Tensor* out, int height, int width, int image_num, uint32_t* channel_num); -void fill_conv_arg(struct WrapperConvArgs* arg, framework::Tensor* input, - framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int group_num, int stride_h, int stride_w, - int padding_h, int padding_w, float* bs_ptr); +void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, + framework::Tensor* out, framework::Tensor* filter, + bool relu_enabled, int group_num, int stride_h, + int stride_w, int padding_h, int padding_w, float* bs_ptr); half fp32_2_fp16(float fp32_num); float fp16_2_fp32(half fp16_num); diff --git a/src/fpga/filter.cpp b/src/fpga/filter.cpp index c824b446ce3a4c3f13ad788780997a3920a1484c..db851b926bbbd549205ee5d75bc46a6c04888098 100644 --- a/src/fpga/filter.cpp +++ b/src/fpga/filter.cpp @@ -21,7 +21,10 @@ namespace paddle_mobile { namespace fpga { namespace filter { -int calc_division_capacity(int chw) { return 2048 / ((chw + 15) / 16) * 32; } +int calc_division_capacity(int chw) { + int n = 2048 / ((chw + 15) / 16) * 32; + return n < 2048 ? n : 2048; +} int calc_split_num(int num, int division_capacity) { return (num + division_capacity - 1) / division_capacity; diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index 671df76967b4537d111695cdbe091b9c7de2c5a2..9b3944fc9a9ab308d9fe8b791a34e09651b87e6e 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -66,10 +66,11 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], param->Strides()[1], - param->Paddings()[0], param->Paddings()[1], bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, + param->Groups(), param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp index ea01245f1207739d4234ea3509451a2de1d321f4..83f74e97d04eda29f3aaa6a0cc16ed7d194321d8 100644 --- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp @@ -65,10 +65,11 @@ bool ConvAddBNReluKernel::Init( fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], param->Strides()[1], - param->Paddings()[0], param->Paddings()[1], bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, + param->Groups(), param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp index 928b73e4d30144cdf1128a018628b6208fcfd5f0..4975f2a905dcd76c5b7f013eafaa376dd2bb1646 100644 --- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp @@ -47,10 +47,11 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], param->Strides()[1], - param->Paddings()[0], param->Paddings()[1], bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, + param->Groups(), param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp index fea211af74b634fc0dd8dcee1db7c2c004145561..276e71b6a44e9a7beba0d5db2f51472a9927d8da 100644 --- a/src/operators/kernel/fpga/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp @@ -59,10 +59,11 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], param->Strides()[1], - param->Paddings()[0], param->Paddings()[1], bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, + param->Groups(), param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp index 87fe12664e75717c78d79ec50821a9bb6201c5a0..f519a37cb57378a603969adae255f88ae8a5df2a 100644 --- a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp @@ -59,10 +59,11 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], param->Strides()[1], - param->Paddings()[0], param->Paddings()[1], bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, + param->Groups(), param->Strides()[0], + param->Strides()[1], param->Paddings()[0], + param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp index 7c7bceaaee82617122da9c0fd2a5fa6b688f1153..52d7c0a4e69080e11f86d1507829e7e779a69228 100644 --- a/src/operators/kernel/fpga/fc_relu_kernel.cpp +++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp @@ -53,9 +53,9 @@ bool FusionFcReluKernel::Init(FusionFcReluParam *param) { fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, - 0, bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, + 0, 0, bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index d543e1ea46bea09ee7331d03760633ee240454d5..407e14238d542604e876ced624d5a0db698a6101 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -54,9 +54,9 @@ bool FusionFcKernel::Init(FusionFcParam *param) { fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); fpga::format_fp16_ofm(out); - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, - 0, bs_ptr); + fpga::SplitConvArgs conv_arg = {0}; + fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, + 0, 0, bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/mul_kernel.cpp b/src/operators/kernel/fpga/mul_kernel.cpp deleted file mode 100644 index 9e282bd27b744cb48fccdc8e4602ae2fc9a1ad79..0000000000000000000000000000000000000000 --- a/src/operators/kernel/fpga/mul_kernel.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef MUL_OP - -#include "operators/kernel/mul_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool MulKernel::Init(MulParam *param) { - bool relu_enabled = false; - auto input_x = const_cast(param->InputX()); - auto filter = const_cast(param->InputY()); - auto out = param->Out(); - - PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == filter->dims()[0], - "Image channel should be equal to weight number"); - int channel = (uint32_t)out->dims()[1]; - auto bs_ptr = - (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); // NOLINT - for (int i = 0; i < channel; i++) { - bs_ptr[i + channel] = 1; - bs_ptr[i] = 0; - } - int num = (uint32_t)filter->dims()[1]; - int chw = (uint32_t)filter->dims()[0]; - PADDLE_MOBILE_ENFORCE( - chw == input_x->numel(), - "Filter element num should be equal to IFM element num"); - int height = (uint32_t)input_x->dims()[2]; - int width = (uint32_t)input_x->dims()[3]; - int filter_channel = chw / height / width; - - out->Resize(framework::make_ddim({1, channel, 1, 1})); - filter->Resize(framework::make_ddim({num, filter_channel, height, width})); - float max_value = fpga::filter_find_max(filter); - fpga::format_fc_filter(filter, max_value); - - int element_num_per_div = fpga::get_filter_num_per_div(filter, 1); - fpga::format_bias_scale_array(&bs_ptr, element_num_per_div, channel); - fpga::format_fp16_ofm(out); - - fpga::WrapperConvArgs conv_arg = {0}; - fpga::fill_conv_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, 0, - 0, bs_ptr); - param->SetFpgaArgs(conv_arg); - return true; -} - -template <> -void MulKernel::Compute(const MulParam ¶m) const { - fpga::ComputeFpgaConv(param.FpgaArgs()); -} - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/math/gemm_int8.cpp b/src/operators/math/gemm_int8.cpp index bd5286dbcb5c871d5d327875b836ad9777c270bf..aacf24d05802fcf01fffacc37290d8329df614e8 100644 --- a/src/operators/math/gemm_int8.cpp +++ b/src/operators/math/gemm_int8.cpp @@ -652,7 +652,7 @@ void Gemm::WriteBasic(int32_t mc, int32_t nc, int32_t *c, int32_t *C, int32_t *C0, *c0; c_ptr = c; C_ptr = C; - if (nc1 > 0) { + /*if (nc1 > 0) { asm volatile( "subs %[mc], %[mc], #1 \n\t" "blt end_mc_%= \n\t" @@ -684,7 +684,7 @@ void Gemm::WriteBasic(int32_t mc, int32_t nc, int32_t *c, int32_t *C, : [C_ptr] "r"(C_ptr), [c_ptr] "r"(c_ptr), [mc] "r"(m), [nc1] "r"(nc1), [step] "r"(step), [step1] "r"(step1) : "memory", "r5", "r6", "q0", "q1", "q2", "q3"); - } + }*/ if (_nc1 != 0) { for (int32_t i = 0; i < mc; i++) { diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 70562da8f8961daed9c0057f3ebc8e1a1a6e340e..fb4368d4bd5896e8c3c30c7b0cbc2d365f04e663 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -546,11 +546,11 @@ class MulParam : OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1401,11 +1401,11 @@ class FusionFcParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; @@ -1441,11 +1441,11 @@ class FusionConvAddParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; @@ -1496,11 +1496,11 @@ class FusionConvAddPReluParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1554,11 +1554,11 @@ class FusionConvAddAddPReluParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1629,11 +1629,11 @@ class FusionConvAddBNReluParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1715,11 +1715,11 @@ class FusionConvBNAddReluParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1782,11 +1782,11 @@ class FusionConvBNParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1857,11 +1857,11 @@ class FusionConvAddBNParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1983,11 +1983,11 @@ class FusionConvBNReluParam : public ConvParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::WrapperConvArgs fpga_conv_args; + fpga::SplitConvArgs fpga_conv_args; public: - const fpga::WrapperConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::WrapperConvArgs &args) { fpga_conv_args = args; } + const fpga::SplitConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::SplitConvArgs &args) { fpga_conv_args = args; } #endif }; #endif diff --git a/src/operators/sum_op.cpp b/src/operators/sum_op.cpp index 8c0638c63ca7cab01047b757476549cf3832bf8a..f821364b92f74534b76ea6069e94a8233ee0a769 100644 --- a/src/operators/sum_op.cpp +++ b/src/operators/sum_op.cpp @@ -65,7 +65,6 @@ REGISTER_OPERATOR_CPU(sum, ops::SumOp); REGISTER_OPERATOR_MALI_GPU(sum, ops::ConcatOp); #endif #ifdef PADDLE_MOBILE_FPGA -REGISTER_OPERATOR_FPGA(sum, ops::ConcatOp); #endif #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a4191954a82928b7e6cd7ea79073cc2f0142f256..7ecae142e495f46be8e1bf4078478eafe195c581 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -61,38 +61,11 @@ endif () list(FIND NET "FPGAnets" CON) if (CON GREATER -1) - ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-resnet paddle-mobile) - ADD_EXECUTABLE(test-resnet50 fpga/test_resnet50.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet50 paddle-mobile) - ADD_EXECUTABLE(test-fpga-EW fpga/test_fpga_EW.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-fpga-EW paddle-mobile) - - ADD_EXECUTABLE(test-fpga-conv fpga/test_fpga_conv.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-fpga-conv paddle-mobile) - - ADD_EXECUTABLE(test-fpga-pooling fpga/test_fpga_pooling.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-fpga-pooling paddle-mobile) - - ADD_EXECUTABLE(test-fpga-bypass fpga/test_fpga_bypass.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-fpga-bypass paddle-mobile) - - ADD_EXECUTABLE(test-fpga-softmax fpga/test_fpga_softmax.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-fpga-softmax paddle-mobile) - - ADD_EXECUTABLE(test-fpga-concat fpga/test_fpga_concat.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-fpga-concat paddle-mobile) - - ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-tensor-quant paddle-mobile) - - ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h test_include.h) - target_link_libraries(test-fpga-concat-op paddle-mobile) - - ADD_EXECUTABLE(test-format-data fpga/test_format_data.cpp test_helper.h test_include.h) - target_link_libraries(test-format-data paddle-mobile) +# ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-resnet paddle-mobile) set(FOUND_MATCH ON) endif () diff --git a/test/fpga/test_resnet50.cpp b/test/fpga/test_resnet50.cpp index f850eb3e5ea3a03fe90d82c1eca2af6c9f8e9106..6754a51fa55b0744b94ee70209da1a3fe88f2f32 100644 --- a/test/fpga/test_resnet50.cpp +++ b/test/fpga/test_resnet50.cpp @@ -30,7 +30,11 @@ int main() { input_tensor.data() + input_tensor.numel()); paddle_mobile.FeedData(input_tensor); - paddle_mobile.Predict_To(-1); + for (int i = 0; i < 1000; i++) { + paddle_mobile.Predict_To(-1); + if (i % 100 == 0) std::cout << i << std::endl; + } + // paddle_mobile.Predict_From(73); // paddle_mobile.Predict_From_To(72, 73); diff --git a/tools/op.cmake b/tools/op.cmake index 3abe18bb7c74362bda4d564cea61ba31d61404bd..6e89fa4f66073c13ae216583d48d10327e6631ce 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -118,12 +118,9 @@ if (CON GREATER -1) set(POOL_OP ON) set(CONCAT_OP ON) set(SOFTMAX_OP ON) - set(DROPOUT_OP ON) set(FUSION_CONVBNRELU_OP ON) set(FUSION_CONVBN_OP ON) set(FUSION_CONVADD_OP ON) - set(MUL_OP ON) - set(FOUND_MATCH ON) endif()