diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp index 5bbf4f254d465f4c45124e7512b64662f155478d..34033a60a683183695a79bfafbaf14223e2eebf2 100644 --- a/src/fpga/fpga_quantilization.cpp +++ b/src/fpga/fpga_quantilization.cpp @@ -37,45 +37,57 @@ static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, } } +template +static Dtype find_max(Dtype* data, int num) { + Dtype max = 0; + for (int i = 0; i < num; ++i) { + max = std::max(max, data[i]); + } + return max; +} + template framework::Tensor* quantilize_filter(framework::Tensor* filter) { float scale = 0; - float max = 0f; + float fix_range = static_cast((1 << (8 - 1)) - 1); const int batch_size = filter->dims()[0]; const int channel = filter->dims()[1]; const int height = filter->dims()[2]; const int width = filter->dims()[3]; + int8_t* int_data = nullptr; + int8_t* tmp_data = new int[filter->numel()]; + // 32bit filter -> 8bit filter; if (filter->type() == typeid(float)) { float* float_data = filter->data(); - for (int i = 0; i < filter->numel(); ++i) { - max = std::max(max, float_data[i]); - } + float max = find_max(float_data, filter->numel()); - float fix_range = static_cast((1 << (8 - 1)) - 1); - float float_range = max; - scale = (float_range / fix_range); + scale = (max / fix_range); framework::Tensor* filter = filter; framework::Tensor* quant_filter = new framework::Tensor(); - int8_t* temp = new int8_t[filter->numel()]; - int8_t* int_data = quant_filter->mutable_data(); + + int_data = quant_filter->mutable_data(); for (int i = 0; i < filter->numel(); ++i) { - temp[i] = (int8_t)float_data[i] * scale; + tmp_data[i] = (int8_t)float_data[i] * scale; } - quant_filter.scale = scale; - // NCHW -> NHWC; - chw_to_hwc(temp, int_data, in_batch_size, channel, height, width); - return quantFilter; - } else if (filter->type() == typeid(int8_t)) { - // model is already quantilized - int8_t* int_data = filter->data(); + filter = quant_filter; + } else { + int8_t max = find_max(filter->data(), filter->numel()); + scale = (max / fix_range); + + int_data = filter->data(); for (int i = 0; i < filter->numel(); ++i) { - max = std::max(max, int_data[i]); + tmp_data[i] = int_data[i]; } + int_data = filter->mutable_data(); } + // NCHW -> NHWC; + chw_to_hwc(tmp_data, int_data, batch_size, channel, height, width); + delete tmp_data; + *(filter->fpga_args().scale_pointer()) = scale; return filter; } diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp index ae1270b146373587287140116114970963dcca7c..c6e04787a58bc437bf0738cf67072426f1cbaa57 100644 --- a/src/operators/kernel/fpga/concat_kernel.cpp +++ b/src/operators/kernel/fpga/concat_kernel.cpp @@ -36,18 +36,18 @@ void ConcatKernel::Compute(const ConcatParam ¶m) const { auto out_channel = out_dim[3]; auto out_offset = 0; - for (int i = 0; i < inputs.size(); ++i) { auto input = inputs[i]; auto channels = input->dims()[3]; out_offset += channels; auto src = input->data(); for (int j = 0; j < pixels; ++j) { - auto dst = out->data() + out_offset; + auto dst = out->mutable_data() + out_offset; memory::Copy(dst, src, sizeof(half)); } } } +template class ConcatKernel; } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91d0f393fcc1018bacd507c5f7975f7b3a2a56ca --- /dev/null +++ b/src/operators/kernel/fpga/conv_kernel.cpp @@ -0,0 +1,38 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef CONV_OP + +#include "operators/kernel/conv_kernel.h" +#include "operators/kernel/central-arm-func/conv_arm_func.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool ConvKernel::Init(ConvParam *param) { + return true; +} + +template <> +void ConvKernel::Compute(const ConvParam ¶m) const { + // ConvCompute(param); +} + +template class ConvKernel; + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ee1d12bfd6be13d67fd8360be2ab5c8d7f86e662..f4a14f1bc4197051594a0f8609b4662ad4c7cefb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -160,4 +160,12 @@ else () #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) + + +endif() + +if(FPGA) + ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-tensor-quant paddle-mobile) + endif() diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp index 1e30b9be551c608c5200460ebb80526270da5aed..3835c395a4764c3c978b6bba9c1af48305be1d58 100644 --- a/test/fpga/test_tensor_quant.cpp +++ b/test/fpga/test_tensor_quant.cpp @@ -20,7 +20,7 @@ int main() { paddle_mobile::PaddleMobile paddle_mobile; bool optimize = false; if (paddle_mobile.Load(g_googlenet, optimize)) { - auto time2 = time(); + auto time1 = time(); DLOG << "load cost: " << time_diff(time1, time1) << "ms"; std::vector input; std::vector dims{1, 3, 224, 224};