未验证 提交 004c7a94 编写于 作者: S smilejames 提交者: GitHub

Merge branch 'develop' into develop

...@@ -46,8 +46,8 @@ static Dtype find_max(Dtype* data, int num) { ...@@ -46,8 +46,8 @@ static Dtype find_max(Dtype* data, int num) {
return max; return max;
} }
template <typename Dtype> // template <typename Dtype>
framework::Tensor* quantilize_filter(framework::Tensor* filter) { framework::Tensor* quantify_filter(framework::Tensor* filter) {
float scale = 0; float scale = 0;
float fix_range = static_cast<float>((1 << (8 - 1)) - 1); float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
...@@ -57,7 +57,7 @@ framework::Tensor* quantilize_filter(framework::Tensor* filter) { ...@@ -57,7 +57,7 @@ framework::Tensor* quantilize_filter(framework::Tensor* filter) {
const int width = filter->dims()[3]; const int width = filter->dims()[3];
int8_t* int_data = nullptr; int8_t* int_data = nullptr;
int8_t* tmp_data = new int[filter->numel()]; int8_t* tmp_data = new int8_t[filter->numel()];
// 32bit filter -> 8bit filter; // 32bit filter -> 8bit filter;
if (filter->type() == typeid(float)) { if (filter->type() == typeid(float)) {
......
...@@ -24,7 +24,7 @@ template <typename Dtype> ...@@ -24,7 +24,7 @@ template <typename Dtype>
static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
int height, int width); int height, int width);
template <typename Dtype> // template <typename Dtype>
framework::Tensor* quantilize_filter(framework::Tensor* filter); framework::Tensor* quantify_filter(framework::Tensor* filter);
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
Tensor *out = param->Output(); Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
...@@ -60,7 +60,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -60,7 +60,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
const Tensor *quant_filter = quantilize_filter(filter); Tensor *quant_filter = fpga::quantify_filter(filter);
// delete original filter? // delete original filter?
filter = quant_filter; filter = quant_filter;
...@@ -68,22 +68,22 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -68,22 +68,22 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = reinterpret_cast<void *> filter_ptr; convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.group_num = param->Groups();
convArgs.sb_address = reinterpret_cast<void *> bs_ptr; convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0]; convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1]; convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2]; convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3]; convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = reinterpret_cast<void *> input_ptr; convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1]; convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2]; convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3]; convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0]; convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1]; convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer(); convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = reinterpret_cast<void *> out_ptr; convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer(); convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
......
...@@ -920,7 +920,11 @@ class FusionFcParam : public OpParam { ...@@ -920,7 +920,11 @@ class FusionFcParam : public OpParam {
} }
const Tensor *InputX() const { return input_x_; } const Tensor *InputX() const { return input_x_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *InputY() const { return input_y_; }
#else
const Tensor *InputY() const { return input_y_; } const Tensor *InputY() const { return input_y_; }
#endif
const Tensor *InputZ() const { return input_z_; } const Tensor *InputZ() const { return input_z_; }
...@@ -976,7 +980,11 @@ class FusionConvAddParam : public OpParam { ...@@ -976,7 +980,11 @@ class FusionConvAddParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_; } Tensor *Output() const { return output_; }
...@@ -1050,7 +1058,11 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1050,7 +1058,11 @@ class FusionConvAddBNReluParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_; } Tensor *Output() const { return output_; }
...@@ -1144,8 +1156,11 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1144,8 +1156,11 @@ class FusionConvAddBNParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_y_; } Tensor *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; } const vector<int> &Strides() const { return strides_; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册