提交 c00ec992 编写于 作者: J jameswu2014 提交者: qnqinan

marker1-anchor update (#1652)

上级 b63e4ddd
...@@ -97,7 +97,7 @@ float fp16_2_fp32(int16_t fp16_num) { ...@@ -97,7 +97,7 @@ float fp16_2_fp32(int16_t fp16_num) {
} else if (se_fp16 < 63) { } else if (se_fp16 < 63) {
e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23); e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
offset = 1024; offset = 1024;
} else { } else { // se_fp16 == 63
e_fp32 = 0xC7800000; e_fp32 = 0xC7800000;
offset = 1024; offset = 1024;
} }
......
...@@ -34,8 +34,10 @@ limitations under the License. */ ...@@ -34,8 +34,10 @@ limitations under the License. */
#define BS_NUM_ALIGNMENT (8) #define BS_NUM_ALIGNMENT (8)
#define BIAS_SCALE_DMA_NUM (4) #define BIAS_SCALE_DMA_NUM (4)
#define RESULT_ALIGNMENT (32) #define RESULT_ALIGNMENT (32)
#define PE_COLUMN (8) #define PE_COLUMN (8)
#define ROW_PARALLEL_NUM (2) #define ROW_PARALLEL_NUM (2)
#define BIAS_NUM_ALIGNMENT (16) #define BIAS_NUM_ALIGNMENT (16)
#endif #endif
...@@ -92,13 +94,14 @@ struct ImageOutputArgs { ...@@ -92,13 +94,14 @@ struct ImageOutputArgs {
activation; // To select activation and specify (Leaky)Relu parameter. activation; // To select activation and specify (Leaky)Relu parameter.
}; };
// #ifdef PADDLE_MOBILE_FPGA_V1
struct ConvDriverParam { struct ConvDriverParam {
uint64_t filter_per_group; uint64_t filter_per_group;
uint64_t channel_per_group; uint64_t channel_per_group;
uint64_t image_one_pad_per_row; uint64_t image_one_pad_per_row;
uint64_t deconv_param; uint64_t deconv_param;
// new
uint64_t col_padding_up; uint64_t col_padding_up;
uint64_t col_padding_down; uint64_t col_padding_down;
uint64_t row_padding_up; uint64_t row_padding_up;
...@@ -108,39 +111,49 @@ struct ConvDriverParam { ...@@ -108,39 +111,49 @@ struct ConvDriverParam {
uint64_t filter_pad_width_mul_channel; uint64_t filter_pad_width_mul_channel;
uint64_t image_win_cnt; uint64_t image_win_cnt;
uint64_t image_win_cnt_last; uint64_t image_win_cnt_last;
uint64_t filter_row; uint64_t filter_row;
uint64_t filter_width; uint64_t filter_width;
uint64_t filter_height; uint64_t filter_height;
uint64_t skip_window; uint64_t skip_window;
uint64_t stride_h; uint64_t stride_h;
uint64_t filter_amount_all; uint64_t filter_amount_all;
uint64_t prog_full_cnt; uint64_t prog_full_cnt;
uint64_t filter_align; uint64_t filter_align;
uint64_t filter_num; uint64_t filter_num;
uint64_t output_width; uint64_t output_width;
uint64_t output_amount_per_row; uint64_t output_amount_per_row;
uint64_t res_row_data_align4_pad; uint64_t res_row_data_align4_pad;
uint64_t cal_res_num; uint64_t cal_res_num;
uint64_t last_cal_res_row_num; uint64_t last_cal_res_row_num;
uint64_t post_prog_full_cnt; uint64_t post_prog_full_cnt;
uint64_t deconv_skip_row; // paralvl*deconv_group uint64_t deconv_skip_row; // paralvl*deconv_group
uint64_t deconv_res_skip_row; // deconv_group * result_amount_per_row uint64_t deconv_res_skip_row; // deconv_group * result_amount_per_row
uint64_t deconv_ena; uint64_t deconv_ena;
uint64_t deconv_dump; uint64_t deconv_dump;
uint64_t output_address_phy; uint64_t output_address_phy;
uint64_t output_height; uint64_t output_height;
uint64_t result_amount_per_row_multi_para; uint64_t result_amount_per_row_multi_para;
uint64_t sb_address_phy; uint64_t sb_address_phy;
uint64_t fpga_bias_scale_len; uint64_t fpga_bias_scale_len;
uint64_t filter_amount_whole; uint64_t filter_amount_whole;
uint64_t filter_address_phy; uint64_t filter_address_phy;
uint64_t filters_amount_whole; uint64_t filters_amount_whole;
uint64_t image_address_phy; uint64_t image_address_phy;
uint64_t image_hight; uint64_t image_hight;
uint64_t image_amount_per_row; uint64_t image_amount_per_row;
uint64_t image_amount_per_row_multi_win_first; uint64_t image_amount_per_row_multi_win_first;
uint64_t image_amount_per_row_multi_win; uint64_t image_amount_per_row_multi_win;
uint64_t filter_pad_hight; uint64_t filter_pad_hight;
uint64_t image_block_num; uint64_t image_block_num;
uint64_t image_block_len; uint64_t image_block_len;
uint64_t image_block_len_last; uint64_t image_block_len_last;
...@@ -178,6 +191,7 @@ struct ConvArgs { ...@@ -178,6 +191,7 @@ struct ConvArgs {
struct ImageInputArgs image; // input image; struct ImageInputArgs image; // input image;
struct ImageOutputArgs output; struct ImageOutputArgs output;
// #ifdef PADDLE_MOBILE_FPGA_V1
struct DeconvTxParm deconv_tx_param; struct DeconvTxParm deconv_tx_param;
struct ConvDriverParam driver; struct ConvDriverParam driver;
}; };
...@@ -242,6 +256,7 @@ struct EWAddArgs { ...@@ -242,6 +256,7 @@ struct EWAddArgs {
struct ImageInputArgs image0; struct ImageInputArgs image0;
struct ImageInputArgs image1; struct ImageInputArgs image1;
struct ImageOutputArgs output; struct ImageOutputArgs output;
// #ifdef PADDLE_MOBILE_FPGA_V1
struct EWAddDriverParam driver; struct EWAddDriverParam driver;
}; };
...@@ -287,6 +302,8 @@ struct DWDeconvArgs { ...@@ -287,6 +302,8 @@ struct DWDeconvArgs {
std::vector<std::shared_ptr<char>> vector_dw_conv_space; std::vector<std::shared_ptr<char>> vector_dw_conv_space;
}; };
// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
// }
static inline uint32_t align_to_x(int64_t num, int64_t x) { static inline uint32_t align_to_x(int64_t num, int64_t x) {
return ((uint32_t)(num + x) - 1) / (uint32_t)x * (uint32_t)x; return ((uint32_t)(num + x) - 1) / (uint32_t)x * (uint32_t)x;
} }
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
// limitations under the License. // limitations under the License.
#include "io/api_paddle_mobile.h" #include "io/api_paddle_mobile.h"
#include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "framework/tensor.h" #include "framework/tensor.h"
...@@ -172,6 +174,14 @@ void PaddleMobilePredictor<Device, T>::FetchPaddleTensors( ...@@ -172,6 +174,14 @@ void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(
} }
} }
template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(PaddleTensor *output,
int id) {
std::shared_ptr<framework::Tensor> tensor_ptr =
paddle_mobile_->FetchResult(id);
ConvertTensors(*(tensor_ptr.get()), output);
return;
}
template <typename Device, typename T> template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name, void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name,
PaddleTensor *output) { PaddleTensor *output) {
......
...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
...@@ -36,6 +36,7 @@ class PaddleMobilePredictor : public PaddlePredictor { ...@@ -36,6 +36,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
void Predict_From_To(int start, int end) override; void Predict_From_To(int start, int end) override;
void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) override; void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) override;
void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) override; void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) override;
void FetchPaddleTensors(PaddleTensor* outputs, int id) override;
void GetPaddleTensor(const std::string& name, PaddleTensor* output) override; void GetPaddleTensor(const std::string& name, PaddleTensor* output) override;
#endif #endif
......
...@@ -137,6 +137,7 @@ class PaddlePredictor { ...@@ -137,6 +137,7 @@ class PaddlePredictor {
virtual void Predict_From_To(int start, int end) = 0; virtual void Predict_From_To(int start, int end) = 0;
virtual void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) = 0; virtual void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) = 0;
virtual void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) = 0; virtual void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) = 0;
virtual void FetchPaddleTensors(PaddleTensor* outputs, int id) = 0;
virtual void GetPaddleTensor(const std::string& name, virtual void GetPaddleTensor(const std::string& name,
PaddleTensor* output) = 0; PaddleTensor* output) = 0;
#endif #endif
......
...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef ANCHOR_GENERATOR_OP #ifdef ANCHOR_GENERATOR_OP
#include <string.h> #include <string.h>
#include <iostream> #include <iostream>
#include <memory>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "operators/kernel/detection_kernel.h" #include "operators/kernel/detection_kernel.h"
...@@ -39,9 +39,10 @@ bool AnchorGeneratorKernel<FPGA, float>::Init( ...@@ -39,9 +39,10 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
79, 69, -96, -77, 112, 93, -137, -118, 153, 79, 69, -96, -77, 112, 93, -137, -118, 153,
134, -204, -188, 220, 204, -281, -395, 296, 441}; 134, -204, -188, 220, 204, -281, -395, 296, 441};
int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103, int anchors_offset2[] = {-18, -31, 34, 47, -22, -22, 38, 38, -33,
0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58, -44, 49, 60, -2, -2, 18, 18, -10, -14,
0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46}; 26, 30, -14, -22, 30, 38, -9, -26, 25,
42, -92, -92, 108, 108, -2, -15, 18, 31};
if (offset > 0.6) { if (offset > 0.6) {
memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset)); memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册