diff --git a/src/common/types.cpp b/src/common/types.cpp index b6387503856f438acd74b8d147da13a2b009f2a1..41bbfa5256a1d55ac9c8ebe3ba695c4a6f1be720 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -50,6 +50,7 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU = "fusion_elementwise_add_relu"; const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu"; const char *G_OP_TYPE_REGION = "region"; +const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn"; std::unordered_map< std::string, std::pair, std::vector>> @@ -85,6 +86,7 @@ std::unordered_map< {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Y"}}}, {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}}, - {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}}; + {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}, + {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index 6066879305d5ea7d1b6dcb0bb618c234338cc171..78c96f327a5f483ebee9d56bf338a4415542fbde 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -113,6 +113,7 @@ extern const char *G_OP_TYPE_FUSION_POOL_BN; extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; extern const char *G_OP_TYPE_FUSION_FC_RELU; extern const char *G_OP_TYPE_REGION; +extern const char *G_OP_TYPE_FUSION_CONV_BN; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp index dee3d3abc19e020304ff9e658d40797b6681c43b..8b351f1a81e0a92f0e2f12a3f61dd2a7d3948c85 100644 --- a/src/fpga/fpga_quantilization.cpp +++ b/src/fpga/fpga_quantilization.cpp @@ -47,7 +47,9 @@ static Dtype find_max(Dtype* data, int num) { } // template -framework::Tensor* quantify_filter(framework::Tensor* filter) { +void quantify_filter(framework::Tensor* filter) { + DLOG << "quantilize_filter........"; + float scale = 0; float fix_range = static_cast((1 << (8 - 1)) - 1); @@ -62,25 +64,20 @@ framework::Tensor* quantify_filter(framework::Tensor* filter) { // 32bit filter -> 8bit filter; if (filter->type() == typeid(float)) { float* float_data = filter->data(); - float max = find_max(float_data, filter->numel()); + float max = find_max(float_data, filter->numel()); scale = (max / fix_range); - framework::Tensor* filter = filter; - framework::Tensor* quant_filter = new framework::Tensor(); - - int_data = quant_filter->mutable_data(); for (int i = 0; i < filter->numel(); ++i) { tmp_data[i] = (int8_t)float_data[i] * scale; } - filter = quant_filter; + int_data = filter->mutable_data(); } else { - int8_t max = find_max(filter->data(), filter->numel()); + int8_t max = find_max(filter->data(), filter->numel()); scale = (max / fix_range); - int_data = filter->data(); for (int i = 0; i < filter->numel(); ++i) { - tmp_data[i] = int_data[i]; + tmp_data[i] = filter->data()[i]; } int_data = filter->mutable_data(); } @@ -88,7 +85,6 @@ framework::Tensor* quantify_filter(framework::Tensor* filter) { chw_to_hwc(tmp_data, int_data, batch_size, channel, height, width); delete tmp_data; *(filter->fpga_args().scale_pointer()) = scale; - return filter; } } // namespace fpga diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h index 56e14f89ac0e7d21e7bbb704df838374be84fbcd..4f1f6ad402a3ff4df773ecbd2121820f4c7dc265 100644 --- a/src/fpga/fpga_quantilization.h +++ b/src/fpga/fpga_quantilization.h @@ -25,6 +25,7 @@ static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, int height, int width); // template -framework::Tensor* quantify_filter(framework::Tensor* filter); +void quantify_filter(framework::Tensor* filter); + } // namespace fpga } // namespace paddle_mobile diff --git a/src/io/executor.cpp b/src/io/executor.cpp index d6434b64aa752fd62bc637a882298228d59880b8..73e6c9d6f170fc4eebb6af2f8b7a67c847961950 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -89,7 +89,6 @@ Executor::Executor(const framework::Program p, int batch_size, } else { InitMemory(); } - std::shared_ptr to_predict_block = to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; diff --git a/src/memory/t_malloc.cpp b/src/memory/t_malloc.cpp index 42b8c4551871c58955251d94845ca13576d7735b..8902543347b2db7caee7126b2a28fa460ca741db 100644 --- a/src/memory/t_malloc.cpp +++ b/src/memory/t_malloc.cpp @@ -26,7 +26,7 @@ namespace paddle_mobile { namespace memory { const int MALLOC_ALIGN = 64; -#ifdef PADDLE_MOBILE_FPGA +#ifdef PADDLE_MOBILE_FPGA__VV namespace fpga = paddle_mobile::fpga; void Copy(void *dst, const void *src, size_t num) { diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 4766d56d9ae0b86cc28c476a17547acfd53ab02b..7a58e29cea635e62e64806a0c40956baf684d76e 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -41,7 +41,7 @@ class FeedOp : public framework::OperatorBase { void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); } void Init() { const Tensor *input = param_.InputX(); - auto input_ptr = input->data(); + auto input_ptr = input->mutable_data(); Tensor *output = param_.Out(); auto output_ptr = output->mutable_data(); fpga::BypassArgs args; diff --git a/src/operators/fusion_elementwise_add_relu_op.h b/src/operators/fusion_elementwise_add_relu_op.h index b7e1f244732f9b4c463b6dd0f1ba81e7baf04bfd..b9d662cae559781789130e0483bccef06e1ac9b0 100644 --- a/src/operators/fusion_elementwise_add_relu_op.h +++ b/src/operators/fusion_elementwise_add_relu_op.h @@ -28,7 +28,7 @@ using std::vector; class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher { public: FusioneElementwiseAddReluMatcher() { - node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU); + node_ = framework::Node(G_OP_TYPE_ELEMENTWISE_ADD); node_ > std::make_shared(G_OP_TYPE_RELU); } diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index 095ae4a6d0c8d642aa1e8225bb69f27fb63091b0..91553a8aa3289030ee06bee2def09cb672665e83 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -60,10 +60,7 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { param->SetNewScale(new_scale); param->SetNewBias(new_bias); - Tensor *quant_filter = fpga::quantify_filter(filter); - - // delete original filter? - filter = quant_filter; + fpga::quantify_filter(filter); auto filter_ptr = filter->data(); fpga::ConvArgs convArgs; diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/fpga/conv_kernel.cpp deleted file mode 100644 index 91d0f393fcc1018bacd507c5f7975f7b3a2a56ca..0000000000000000000000000000000000000000 --- a/src/operators/kernel/fpga/conv_kernel.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef CONV_OP - -#include "operators/kernel/conv_kernel.h" -#include "operators/kernel/central-arm-func/conv_arm_func.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool ConvKernel::Init(ConvParam *param) { - return true; -} - -template <> -void ConvKernel::Compute(const ConvParam ¶m) const { - // ConvCompute(param); -} - -template class ConvKernel; - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 4d1d5af29b81b044ca6d89b4a48a078f73dcabc9..a139714b2c71ce6ef2c79343af0e918e577114fb 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -210,7 +210,7 @@ class ConvParam : OpParam { const Tensor *Input() const { return input_; } - const Tensor *Filter() const { return filter_; } + Tensor *Filter() const { return filter_; } Tensor *Output() const { return output_; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 468cbd4ed6d579f7b39f8628a3e052e90ae26644..1033cfa180ac6928b2edf6b0cef2885dd0e72a8c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -27,6 +27,11 @@ elseif("resnet" IN_LIST NET) ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-resnet paddle-mobile) elseif("FPGAnets" IN_LIST NET) + # ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) + # target_link_libraries(test-resnet paddle-mobile) + ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-tensor-quant paddle-mobile) + else () # gen test @@ -173,8 +178,7 @@ else () endif() -if(FPGA) - ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-tensor-quant paddle-mobile) - -endif() +# if(FPGA) +# ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-tensor-quant paddle-mobile) +# endif() diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp index 3835c395a4764c3c978b6bba9c1af48305be1d58..6cfc27e91ced109e41bf5420649dbb762ee94d66 100644 --- a/test/fpga/test_tensor_quant.cpp +++ b/test/fpga/test_tensor_quant.cpp @@ -12,23 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include "../test_helper.h" #include "../test_include.h" int main() { - paddle_mobile::PaddleMobile paddle_mobile; - bool optimize = false; - if (paddle_mobile.Load(g_googlenet, optimize)) { - auto time1 = time(); - DLOG << "load cost: " << time_diff(time1, time1) << "ms"; - std::vector input; - std::vector dims{1, 3, 224, 224}; - GetInput(g_test_image_1x3x224x224, &input, dims); + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + auto time1 = time(); + if (paddle_mobile.Load(g_resnet, true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + std::vector dims{1, 3, 32, 32}; + Tensor input_tensor; + SetupTensor(&input_tensor, {1, 3, 32, 32}, static_cast(0), + static_cast(1)); + + std::vector input(input_tensor.data(), + input_tensor.data() + input_tensor.numel()); + // 预热一次 + paddle_mobile.Predict(input, dims); auto time3 = time(); - auto vec_result = paddle_mobile.Predict(input, dims); + for (int i = 0; i < 10; ++i) { + paddle_mobile.Predict(input, dims); + } auto time4 = time(); - DLOG << "predict cost :" << time_diff(time3, time4) << "ms"; + std::cout << "predict cost :" << time_diff(time3, time4) << "ms" + << std::endl; } + return 0; } diff --git a/tools/op.cmake b/tools/op.cmake index 0eab67267032d3956a52b80ab7494c6572df7074..af246f1d48e7c687812c454af163f12d5f804571 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -82,6 +82,7 @@ if ("FPGAnets" IN_LIST NET) set(CONCAT_OP ON) set(SOFTMAX_OP ON) set(DROPOUT_OP ON) + # set(CONV_OP ON) set(FOUND_MATCH ON) endif()