diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp index 34033a60a683183695a79bfafbaf14223e2eebf2..dee3d3abc19e020304ff9e658d40797b6681c43b 100644 --- a/src/fpga/fpga_quantilization.cpp +++ b/src/fpga/fpga_quantilization.cpp @@ -46,8 +46,8 @@ static Dtype find_max(Dtype* data, int num) { return max; } -template -framework::Tensor* quantilize_filter(framework::Tensor* filter) { +// template +framework::Tensor* quantify_filter(framework::Tensor* filter) { float scale = 0; float fix_range = static_cast((1 << (8 - 1)) - 1); @@ -57,7 +57,7 @@ framework::Tensor* quantilize_filter(framework::Tensor* filter) { const int width = filter->dims()[3]; int8_t* int_data = nullptr; - int8_t* tmp_data = new int[filter->numel()]; + int8_t* tmp_data = new int8_t[filter->numel()]; // 32bit filter -> 8bit filter; if (filter->type() == typeid(float)) { diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/fpga_quantilization.h index 8dacd20abdc85da05a451ec763fd01f03f8f4516..56e14f89ac0e7d21e7bbb704df838374be84fbcd 100644 --- a/src/fpga/fpga_quantilization.h +++ b/src/fpga/fpga_quantilization.h @@ -24,7 +24,7 @@ template static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, int height, int width); -template -framework::Tensor* quantilize_filter(framework::Tensor* filter); +// template +framework::Tensor* quantify_filter(framework::Tensor* filter); } // namespace fpga } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp index b519d6da01467fb248e21dc3af41cfab7fd4b67e..095ae4a6d0c8d642aa1e8225bb69f27fb63091b0 100644 --- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp @@ -28,7 +28,7 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { auto input_ptr = input->data(); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); - const Tensor *filter = param->Filter(); + Tensor *filter = param->Filter(); Tensor *out = param->Output(); auto out_ptr = out->mutable_data(); @@ -60,7 +60,7 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { param->SetNewScale(new_scale); param->SetNewBias(new_bias); - const Tensor *quant_filter = quantilize_filter(filter); + Tensor *quant_filter = fpga::quantify_filter(filter); // delete original filter? filter = quant_filter; @@ -68,22 +68,22 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { auto filter_ptr = filter->data(); fpga::ConvArgs convArgs; convArgs.relu_enabled = relu_enabled; - convArgs.filter_address = reinterpret_cast filter_ptr; + convArgs.filter_address = (void *)filter_ptr; convArgs.filter_num = filter->dims()[0]; convArgs.group_num = param->Groups(); - convArgs.sb_address = reinterpret_cast bs_ptr; + convArgs.sb_address = (void *)bs_ptr; convArgs.kernel.stride_h = param->Strides()[0]; convArgs.kernel.stride_w = param->Strides()[1]; convArgs.kernel.height = filter->dims()[2]; convArgs.kernel.width = filter->dims()[3]; - convArgs.image.address = reinterpret_cast input_ptr; + convArgs.image.address = (void *)input_ptr; convArgs.image.channels = input->dims()[1]; convArgs.image.height = input->dims()[2]; convArgs.image.width = input->dims()[3]; convArgs.image.pad_height = param->Paddings()[0]; convArgs.image.pad_width = param->Paddings()[1]; convArgs.image.scale_address = input->fpga_args().scale_pointer(); - convArgs.output.address = reinterpret_cast out_ptr; + convArgs.output.address = (void *)out_ptr; convArgs.output.scale_address = out->fpga_args().scale_pointer(); param->SetFpgaArgs(convArgs); diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 237b8ea5ba58f5705eec9d0a8c3a8f0c781a4ada..4d1d5af29b81b044ca6d89b4a48a078f73dcabc9 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -920,7 +920,11 @@ class FusionFcParam : public OpParam { } const Tensor *InputX() const { return input_x_; } +#ifdef PADDLE_MOBILE_FPGA + Tensor *InputY() const { return input_y_; } +#else const Tensor *InputY() const { return input_y_; } +#endif const Tensor *InputZ() const { return input_z_; } @@ -976,7 +980,11 @@ class FusionConvAddParam : public OpParam { const Tensor *Input() const { return input_; } +#ifdef PADDLE_MOBILE_FPGA + Tensor *Filter() const { return filter_; } +#else const Tensor *Filter() const { return filter_; } +#endif Tensor *Output() const { return output_; } @@ -1050,7 +1058,11 @@ class FusionConvAddBNReluParam : public OpParam { const Tensor *Input() const { return input_; } +#ifdef PADDLE_MOBILE_FPGA + Tensor *Filter() const { return filter_; } +#else const Tensor *Filter() const { return filter_; } +#endif Tensor *Output() const { return output_; } @@ -1144,8 +1156,11 @@ class FusionConvAddBNParam : public OpParam { const Tensor *Input() const { return input_; } +#ifdef PADDLE_MOBILE_FPGA + Tensor *Filter() const { return filter_; } +#else const Tensor *Filter() const { return filter_; } - +#endif Tensor *Output() const { return output_y_; } const vector &Strides() const { return strides_; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f4a14f1bc4197051594a0f8609b4662ad4c7cefb..468cbd4ed6d579f7b39f8628a3e052e90ae26644 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET) # gen test ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenet paddle-mobile) + + # gen test + ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-mobilenet-combine paddle-mobile) + elseif ("yolo" IN_LIST NET) # gen test ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) @@ -138,6 +143,10 @@ else () ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) target_link_libraries(test-mobilenetssd paddle-mobile) + # gen test + ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) + target_link_libraries(test-mobilenet-combine paddle-mobile) + # gen test ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) target_link_libraries(test-sigmoid paddle-mobile) diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index d7793f729866024e2560ad13ac5613172eecc4dd..5a3cc43a552ccec34817af2409af98e8db0ec9e5 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -44,5 +44,8 @@ int main() { << std::endl; } + std::cout + << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?" + << std::endl; return 0; } diff --git a/test/net/test_mobilenet_combine.cpp b/test/net/test_mobilenet_combine.cpp new file mode 100644 index 0000000000000000000000000000000000000000..af93d105ea0c290b1dd3a80310a39e0f52c8abaa --- /dev/null +++ b/test/net/test_mobilenet_combine.cpp @@ -0,0 +1,51 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "../test_helper.h" +#include "../test_include.h" + +int main() { + paddle_mobile::PaddleMobile paddle_mobile; + paddle_mobile.SetThreadNum(4); + auto time1 = time(); + if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model", + std::string(g_mobilenet_combined) + "/params", true)) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + std::vector input; + std::vector dims{1, 3, 224, 224}; + GetInput(g_test_image_1x3x224x224_banana, &input, dims); + + // 预热一次 + auto vec_result = paddle_mobile.Predict(input, dims); + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; + + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); + } + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" + << std::endl; + } + std::cout + << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?" + << std::endl; + return 0; +} diff --git a/test/test_helper.h b/test/test_helper.h index 658af447d6cfcd85c68ff350b104c2468d442e40..f6ad597ab122f4abda2ed255f0ec957c56d3cb46 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -27,6 +27,7 @@ limitations under the License. */ static const char *g_ocr = "../models/ocr"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; +static const char *g_mobilenet_combined = "../models/mobilenet_combine"; static const char *g_squeezenet = "../models/squeezenet"; static const char *g_googlenet = "../models/googlenet"; static const char *g_mobilenet = "../models/mobilenet"; diff --git a/tools/op.cmake b/tools/op.cmake index 6b6cb13dbc49b2a6cd672ea4e637f6650d60f8d2..0eab67267032d3956a52b80ab7494c6572df7074 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET) set(ELEMENTWISEADD_OP ON) set(RELU_OP ON) set(SOFTMAX_OP ON) - set(SOFTMAX_OP ON) + set(MUL_OP ON) set(DEPTHWISECONV_OP ON) set(BATCHNORM_OP ON) set(POOL_OP ON)