diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5bbf4f254d465f4c45124e7512b64662f155478d --- /dev/null +++ b/src/fpga/fpga_quantilization.cpp @@ -0,0 +1,83 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "fpga/fpga_quantilization.h" +#include + +namespace paddle_mobile { +namespace fpga { + +template +static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, + int height, int width) { + int offset_height = 0; + + for (int n = 0; n < num; n++) { + int amount_per_row = width * channel; + for (int c = 0; c < channel; c++) { + for (int h = 0; h < height; h++) { + int offset_height = h * amount_per_row; + for (int w = 0; w < width; w++) { + *(data_out + offset_height + w * channel + c) = *(data_in++); + } + } + } + data_out += num; + } +} + +template +framework::Tensor* quantilize_filter(framework::Tensor* filter) { + float scale = 0; + float max = 0f; + + const int batch_size = filter->dims()[0]; + const int channel = filter->dims()[1]; + const int height = filter->dims()[2]; + const int width = filter->dims()[3]; + + // 32bit filter -> 8bit filter; + if (filter->type() == typeid(float)) { + float* float_data = filter->data(); + for (int i = 0; i < filter->numel(); ++i) { + max = std::max(max, float_data[i]); + } + + float fix_range = static_cast((1 << (8 - 1)) - 1); + float float_range = max; + scale = (float_range / fix_range); + + framework::Tensor* filter = filter; + framework::Tensor* quant_filter = new framework::Tensor(); + int8_t* temp = new int8_t[filter->numel()]; + int8_t* int_data = quant_filter->mutable_data(); + for (int i = 0; i < filter->numel(); ++i) { + temp[i] = (int8_t)float_data[i] * scale; + } + quant_filter.scale = scale; + // NCHW -> NHWC; + chw_to_hwc(temp, int_data, in_batch_size, channel, height, width); + return quantFilter; + } else if (filter->type() == typeid(int8_t)) { + // model is already quantilized + int8_t* int_data = filter->data(); + for (int i = 0; i < filter->numel(); ++i) { + max = std::max(max, int_data[i]); + } + } + return filter; +} + +} // namespace fpga +} // namespace paddle_mobile diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h index 7a1df04732580c7225423cedeb277beca3edc154..8dacd20abdc85da05a451ec763fd01f03f8f4516 100644 --- a/src/fpga/fpga_quantilization.h +++ b/src/fpga/fpga_quantilization.h @@ -18,35 +18,13 @@ limitations under the License. */ #include "framework/tensor.h" namespace paddle_mobile { +namespace fpga { template -framework::Tensor* quantilize_filter(framework::Tensor* filter) { - float scale = 0; - // 32bit filter -> 8bit filter; - float min = 0f; - float max = 0f; - if (filter->type() == typeid(float)) { - float* floatData = originalFilter->data(); - for (int i = 0; i < filter->numel(); ++i) { - min = std::min(min, floatData[i]); - max = std::max(max, floatData[i]); - } - - float fix_range = (float)((1 << (8 - 1)) - 1); - float float_range = max; - scale = (float_range / fix_range); - - framework::Tensor* originalFilter = filter; - framework::Tensor* quantFilter = new framework::Tensor(); - int8_t* intData = quantFilter->mutable_data(); - for (int i = 0; i < filter->numel(); ++i) { - intData[i] = (int8_t)floatData[i] * scale; - } - quantFilter.scale = scale; - // NCHW -> NHWC; - return quantFilter; - } - return filter; -} +static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, + int height, int width); +template +framework::Tensor* quantilize_filter(framework::Tensor* filter); +} // namespace fpga } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index 6719db3a80cb3c3a2ee603096b2659fa5489497d..3240a8d6b9604d0876691b641c072bc596312dbd 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -16,6 +16,7 @@ limitations under the License. */ #include "operators/kernel/conv_add_bn_kernel.h" #include "fpga/api/fpga_api.h" +#include "fpga/quantilization.h" namespace paddle_mobile { namespace operators { @@ -28,7 +29,7 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); const Tensor *filter = param->Filter(); - auto filter_ptr = filter->data(); + Tensor *out = param->Output(); auto out_ptr = out->mutable_data(); auto bn_mean_ptr = param->InputMean()->data(); @@ -41,7 +42,8 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { "Image channel should be equal to bias number"); const int channel = input->dims()[1]; - float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float)); + float *bs_ptr = + reinterpret_cast(fpga::fpga_malloc(2 * channel * sizeof(float))); Tensor *new_scale = new Tensor(); Tensor *new_bias = new Tensor(); auto new_scale_ptr = new_scale->mutable_data({channel}); @@ -58,26 +60,33 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { param->SetNewScale(new_scale); param->SetNewBias(new_bias); + const Tensor *quant_filter = quantilize_filter(filter); + + // delete original filter? + filter = quant_filter; + + auto filter_ptr = filter->data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = (void *)filter_ptr; + convArgs.filter_address = reinterpret_cast filter_ptr; convArgs.filter_num = filter->dims()[0]; convArgs.group_num = param->Groups(); - convArgs.sb_address = (void *)bs_ptr; + convArgs.sb_address = reinterpret_cast bs_ptr; convArgs.kernel.stride_h = param->Strides()[0]; convArgs.kernel.stride_w = param->Strides()[1]; convArgs.kernel.height = filter->dims()[2]; convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = (void *)input_ptr; + convArgs.image.address = reinterpret_cast input_ptr; convArgs.image.channels = input->dims()[1]; convArgs.image.height = input->dims()[2]; convArgs.image.width = input->dims()[3]; convArgs.image.pad_height = param->Paddings()[0]; convArgs.image.pad_width = param->Paddings()[1]; convArgs.image.scale_address = input->fpga_args().scale_pointer(); - convArgs.output.address = (void *)out_ptr; + convArgs.output.address = reinterpret_cast out_ptr; convArgs.output.scale_address = out->fpga_args().scale_pointer(); param->SetFpgaArgs(convArgs); + return true; }