提交 7d8589c8 编写于 作者: J jameswu2014 提交者: qnqinan

marker1-anchor update (#1652)

上级 111ca78b
......@@ -97,7 +97,7 @@ float fp16_2_fp32(int16_t fp16_num) {
} else if (se_fp16 < 63) {
e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
offset = 1024;
} else {
} else { // se_fp16 == 63
e_fp32 = 0xC7800000;
offset = 1024;
}
......
......@@ -34,8 +34,10 @@ limitations under the License. */
#define BS_NUM_ALIGNMENT (8)
#define BIAS_SCALE_DMA_NUM (4)
#define RESULT_ALIGNMENT (32)
#define PE_COLUMN (8)
#define ROW_PARALLEL_NUM (2)
#define BIAS_NUM_ALIGNMENT (16)
#endif
......@@ -92,13 +94,14 @@ struct ImageOutputArgs {
activation; // To select activation and specify (Leaky)Relu parameter.
};
// #ifdef PADDLE_MOBILE_FPGA_V1
struct ConvDriverParam {
uint64_t filter_per_group;
uint64_t channel_per_group;
uint64_t image_one_pad_per_row;
uint64_t deconv_param;
// new
uint64_t col_padding_up;
uint64_t col_padding_down;
uint64_t row_padding_up;
......@@ -108,39 +111,49 @@ struct ConvDriverParam {
uint64_t filter_pad_width_mul_channel;
uint64_t image_win_cnt;
uint64_t image_win_cnt_last;
uint64_t filter_row;
uint64_t filter_width;
uint64_t filter_height;
uint64_t skip_window;
uint64_t stride_h;
uint64_t filter_amount_all;
uint64_t prog_full_cnt;
uint64_t filter_align;
uint64_t filter_num;
uint64_t output_width;
uint64_t output_amount_per_row;
uint64_t res_row_data_align4_pad;
uint64_t cal_res_num;
uint64_t last_cal_res_row_num;
uint64_t post_prog_full_cnt;
uint64_t deconv_skip_row; // paralvl*deconv_group
uint64_t deconv_res_skip_row; // deconv_group * result_amount_per_row
uint64_t deconv_ena;
uint64_t deconv_dump;
uint64_t output_address_phy;
uint64_t output_height;
uint64_t result_amount_per_row_multi_para;
uint64_t sb_address_phy;
uint64_t fpga_bias_scale_len;
uint64_t filter_amount_whole;
uint64_t filter_address_phy;
uint64_t filters_amount_whole;
uint64_t image_address_phy;
uint64_t image_hight;
uint64_t image_amount_per_row;
uint64_t image_amount_per_row_multi_win_first;
uint64_t image_amount_per_row_multi_win;
uint64_t filter_pad_hight;
uint64_t image_block_num;
uint64_t image_block_len;
uint64_t image_block_len_last;
......@@ -178,6 +191,7 @@ struct ConvArgs {
struct ImageInputArgs image; // input image;
struct ImageOutputArgs output;
// #ifdef PADDLE_MOBILE_FPGA_V1
struct DeconvTxParm deconv_tx_param;
struct ConvDriverParam driver;
};
......@@ -242,6 +256,7 @@ struct EWAddArgs {
struct ImageInputArgs image0;
struct ImageInputArgs image1;
struct ImageOutputArgs output;
// #ifdef PADDLE_MOBILE_FPGA_V1
struct EWAddDriverParam driver;
};
......@@ -287,6 +302,8 @@ struct DWDeconvArgs {
std::vector<std::shared_ptr<char>> vector_dw_conv_space;
};
// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
// }
static inline uint32_t align_to_x(int64_t num, int64_t x) {
return ((uint32_t)(num + x) - 1) / (uint32_t)x * (uint32_t)x;
}
......
......@@ -13,7 +13,9 @@
// limitations under the License.
#include "io/api_paddle_mobile.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "common/enforce.h"
#include "framework/tensor.h"
......@@ -172,6 +174,14 @@ void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(
}
}
template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::FetchPaddleTensors(PaddleTensor *output,
int id) {
std::shared_ptr<framework::Tensor> tensor_ptr =
paddle_mobile_->FetchResult(id);
ConvertTensors(*(tensor_ptr.get()), output);
return;
}
template <typename Device, typename T>
void PaddleMobilePredictor<Device, T>::GetPaddleTensor(const std::string &name,
PaddleTensor *output) {
......
......@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "common/types.h"
......@@ -36,6 +36,7 @@ class PaddleMobilePredictor : public PaddlePredictor {
void Predict_From_To(int start, int end) override;
void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) override;
void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) override;
void FetchPaddleTensors(PaddleTensor* outputs, int id) override;
void GetPaddleTensor(const std::string& name, PaddleTensor* output) override;
#endif
......
......@@ -137,6 +137,7 @@ class PaddlePredictor {
virtual void Predict_From_To(int start, int end) = 0;
virtual void FeedPaddleTensors(const std::vector<PaddleTensor>& inputs) = 0;
virtual void FetchPaddleTensors(std::vector<PaddleTensor>* outputs) = 0;
virtual void FetchPaddleTensors(PaddleTensor* outputs, int id) = 0;
virtual void GetPaddleTensor(const std::string& name,
PaddleTensor* output) = 0;
#endif
......
......@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ANCHOR_GENERATOR_OP
#include <string.h>
#include <iostream>
#include <memory>
#include <utility>
#include <vector>
#include "operators/kernel/detection_kernel.h"
......@@ -39,9 +39,10 @@ bool AnchorGeneratorKernel<FPGA, float>::Init(
79, 69, -96, -77, 112, 93, -137, -118, 153,
134, -204, -188, 220, 204, -281, -395, 296, 441};
int anchors_offset2[] = {0, 0, 51, 77, 0, 0, 30, 35, 0, 0, 81, 103,
0, 0, 20, 21, 0, 0, 36, 44, 0, 0, 43, 58,
0, 0, 34, 68, 0, 0, 24, 28, 0, 0, 19, 46};
int anchors_offset2[] = {-18, -31, 34, 47, -22, -22, 38, 38, -33,
-44, 49, 60, -2, -2, 18, 18, -10, -14,
26, 30, -14, -22, 30, 38, -9, -26, 25,
42, -92, -92, 108, 108, -2, -15, 18, 31};
if (offset > 0.6) {
memcpy(anchors_offset, anchors_offset2, sizeof(anchors_offset));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册