diff --git a/src/fpga/common/fpga_common.cpp b/src/fpga/common/fpga_common.cpp index 52ad3565b35fff5420ad5bd8252bd361aa73787d..2c589b3ef6250275acd82d4a04d38620ac410ba4 100644 --- a/src/fpga/common/fpga_common.cpp +++ b/src/fpga/common/fpga_common.cpp @@ -97,7 +97,7 @@ float fp16_2_fp32(int16_t fp16_num) { } else if (se_fp16 < 63) { e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23); offset = 1024; - } else { + } else { // se_fp16 == 63 e_fp32 = 0xC7800000; offset = 1024; } diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h index 95b725b64155487d5de4898a7771d74b9b223d5e..a798d54459b86f67a28c158dc30c82131ea48626 100644 --- a/src/fpga/common/fpga_common.h +++ b/src/fpga/common/fpga_common.h @@ -34,8 +34,10 @@ limitations under the License. */ #define BS_NUM_ALIGNMENT (8) #define BIAS_SCALE_DMA_NUM (4) #define RESULT_ALIGNMENT (32) + #define PE_COLUMN (8) #define ROW_PARALLEL_NUM (2) + #define BIAS_NUM_ALIGNMENT (16) #endif @@ -92,13 +94,14 @@ struct ImageOutputArgs { activation; // To select activation and specify (Leaky)Relu parameter. }; +// #ifdef PADDLE_MOBILE_FPGA_V1 struct ConvDriverParam { uint64_t filter_per_group; uint64_t channel_per_group; - uint64_t image_one_pad_per_row; uint64_t deconv_param; + // new uint64_t col_padding_up; uint64_t col_padding_down; uint64_t row_padding_up; @@ -108,39 +111,49 @@ struct ConvDriverParam { uint64_t filter_pad_width_mul_channel; uint64_t image_win_cnt; uint64_t image_win_cnt_last; + uint64_t filter_row; uint64_t filter_width; uint64_t filter_height; uint64_t skip_window; uint64_t stride_h; + uint64_t filter_amount_all; uint64_t prog_full_cnt; uint64_t filter_align; uint64_t filter_num; + uint64_t output_width; uint64_t output_amount_per_row; uint64_t res_row_data_align4_pad; uint64_t cal_res_num; uint64_t last_cal_res_row_num; uint64_t post_prog_full_cnt; + uint64_t deconv_skip_row; // paralvl*deconv_group uint64_t deconv_res_skip_row; // deconv_group * result_amount_per_row uint64_t deconv_ena; uint64_t deconv_dump; + uint64_t output_address_phy; uint64_t output_height; uint64_t result_amount_per_row_multi_para; + uint64_t sb_address_phy; uint64_t fpga_bias_scale_len; uint64_t filter_amount_whole; + uint64_t filter_address_phy; uint64_t filters_amount_whole; + uint64_t image_address_phy; uint64_t image_hight; uint64_t image_amount_per_row; + uint64_t image_amount_per_row_multi_win_first; uint64_t image_amount_per_row_multi_win; uint64_t filter_pad_hight; + uint64_t image_block_num; uint64_t image_block_len; uint64_t image_block_len_last; @@ -178,6 +191,7 @@ struct ConvArgs { struct ImageInputArgs image; // input image; struct ImageOutputArgs output; + // #ifdef PADDLE_MOBILE_FPGA_V1 struct DeconvTxParm deconv_tx_param; struct ConvDriverParam driver; }; @@ -242,6 +256,7 @@ struct EWAddArgs { struct ImageInputArgs image0; struct ImageInputArgs image1; struct ImageOutputArgs output; + // #ifdef PADDLE_MOBILE_FPGA_V1 struct EWAddDriverParam driver; }; @@ -287,6 +302,8 @@ struct DWDeconvArgs { std::vector> vector_dw_conv_space; }; +// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; +// } static inline uint32_t align_to_x(int64_t num, int64_t x) { return ((uint32_t)(num + x) - 1) / (uint32_t)x * (uint32_t)x; } diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index d19120739e5a28d25cb3fa515006390f96d82b80..1c1bb11d6ef65a06622c6e6aacdcfe94881a20fc 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "io/api_paddle_mobile.h" +#include #include +#include #include #include "common/enforce.h" #include "framework/tensor.h" @@ -172,6 +174,14 @@ void PaddleMobilePredictor::FetchPaddleTensors( } } +template +void PaddleMobilePredictor::FetchPaddleTensors(PaddleTensor *output, + int id) { + std::shared_ptr tensor_ptr = + paddle_mobile_->FetchResult(id); + ConvertTensors(*(tensor_ptr.get()), output); + return; +} template void PaddleMobilePredictor::GetPaddleTensor(const std::string &name, PaddleTensor *output) { diff --git a/src/io/api_paddle_mobile.h b/src/io/api_paddle_mobile.h index 38af541a9262ea1f4c9ea0f8e4229316c54a4a18..11c993b3f879455eb1ae5268e3d9c2fcbcfc0bc1 100644 --- a/src/io/api_paddle_mobile.h +++ b/src/io/api_paddle_mobile.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - +#include #include #include #include "common/types.h" @@ -36,6 +36,7 @@ class PaddleMobilePredictor : public PaddlePredictor { void Predict_From_To(int start, int end) override; void FeedPaddleTensors(const std::vector& inputs) override; void FetchPaddleTensors(std::vector* outputs) override; + void FetchPaddleTensors(PaddleTensor* outputs, int id) override; void GetPaddleTensor(const std::string& name, PaddleTensor* output) override; #endif diff --git a/src/io/paddle_inference_api.h b/src/io/paddle_inference_api.h index e01b5abb782a32366c7adad6284c3ed3a5f81e79..9a0ed823b19ad1ec07c2ecef928b1018c56ee62c 100644 --- a/src/io/paddle_inference_api.h +++ b/src/io/paddle_inference_api.h @@ -137,6 +137,7 @@ class PaddlePredictor { virtual void Predict_From_To(int start, int end) = 0; virtual void FeedPaddleTensors(const std::vector& inputs) = 0; virtual void FetchPaddleTensors(std::vector* outputs) = 0; + virtual void FetchPaddleTensors(PaddleTensor* outputs, int id) = 0; virtual void GetPaddleTensor(const std::string& name, PaddleTensor* output) = 0; #endif diff --git a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp index 6046b3d2f0a4a1d273d31aac079244ce3ec3703a..31872411f7a0862209c0017cf4cf98e7826abc03 100644 --- a/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp +++ b/src/operators/kernel/fpga/V1/anchor_generator_kernel.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef ANCHOR_GENERATOR_OP - #include #include +#include #include #include #include "operators/kernel/detection_kernel.h" @@ -39,9 +39,10 @@ bool AnchorGeneratorKernel::Init( 79, 69, -96, -77, 112, 93, -137, -118, 153, 134, -204, -188, 220, 204, -281, -395, 296, 441}; - int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103, - 0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58, - 0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46}; + int anchors_offset2[] = {-18, -31, 34, 47, -22, -22, 38, 38, -33, + -44, 49, 60, -2, -2, 18, 18, -10, -14, + 26, 30, -14, -22, 30, 38, -9, -26, 25, + 42, -92, -92, 108, 108, -2, -15, 18, 31}; if (offset > 0.6) { memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));