提交 9c0c0df1 编写于 作者: S smilejames 提交者: GitHub

Merge branch 'develop' into develop

...@@ -46,8 +46,8 @@ static Dtype find_max(Dtype* data, int num) { ...@@ -46,8 +46,8 @@ static Dtype find_max(Dtype* data, int num) {
return max; return max;
} }
template <typename Dtype> // template <typename Dtype>
framework::Tensor* quantilize_filter(framework::Tensor* filter) { framework::Tensor* quantify_filter(framework::Tensor* filter) {
float scale = 0; float scale = 0;
float fix_range = static_cast<float>((1 << (8 - 1)) - 1); float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
...@@ -57,7 +57,7 @@ framework::Tensor* quantilize_filter(framework::Tensor* filter) { ...@@ -57,7 +57,7 @@ framework::Tensor* quantilize_filter(framework::Tensor* filter) {
const int width = filter->dims()[3]; const int width = filter->dims()[3];
int8_t* int_data = nullptr; int8_t* int_data = nullptr;
int8_t* tmp_data = new int[filter->numel()]; int8_t* tmp_data = new int8_t[filter->numel()];
// 32bit filter -> 8bit filter; // 32bit filter -> 8bit filter;
if (filter->type() == typeid(float)) { if (filter->type() == typeid(float)) {
......
...@@ -24,7 +24,7 @@ template <typename Dtype> ...@@ -24,7 +24,7 @@ template <typename Dtype>
static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel, static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
int height, int width); int height, int width);
template <typename Dtype> // template <typename Dtype>
framework::Tensor* quantilize_filter(framework::Tensor* filter); framework::Tensor* quantify_filter(framework::Tensor* filter);
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
auto input_ptr = input->data<half>(); auto input_ptr = input->data<half>();
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
const Tensor *filter = param->Filter(); Tensor *filter = param->Filter();
Tensor *out = param->Output(); Tensor *out = param->Output();
auto out_ptr = out->mutable_data<half>(); auto out_ptr = out->mutable_data<half>();
...@@ -60,7 +60,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -60,7 +60,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
param->SetNewScale(new_scale); param->SetNewScale(new_scale);
param->SetNewBias(new_bias); param->SetNewBias(new_bias);
const Tensor *quant_filter = quantilize_filter(filter); Tensor *quant_filter = fpga::quantify_filter(filter);
// delete original filter? // delete original filter?
filter = quant_filter; filter = quant_filter;
...@@ -68,22 +68,22 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) { ...@@ -68,22 +68,22 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam *param) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
fpga::ConvArgs convArgs; fpga::ConvArgs convArgs;
convArgs.relu_enabled = relu_enabled; convArgs.relu_enabled = relu_enabled;
convArgs.filter_address = reinterpret_cast<void *> filter_ptr; convArgs.filter_address = (void *)filter_ptr;
convArgs.filter_num = filter->dims()[0]; convArgs.filter_num = filter->dims()[0];
convArgs.group_num = param->Groups(); convArgs.group_num = param->Groups();
convArgs.sb_address = reinterpret_cast<void *> bs_ptr; convArgs.sb_address = (void *)bs_ptr;
convArgs.kernel.stride_h = param->Strides()[0]; convArgs.kernel.stride_h = param->Strides()[0];
convArgs.kernel.stride_w = param->Strides()[1]; convArgs.kernel.stride_w = param->Strides()[1];
convArgs.kernel.height = filter->dims()[2]; convArgs.kernel.height = filter->dims()[2];
convArgs.kernel.width = filter->dims()[3]; convArgs.kernel.width = filter->dims()[3];
convArgs.image.address = reinterpret_cast<void *> input_ptr; convArgs.image.address = (void *)input_ptr;
convArgs.image.channels = input->dims()[1]; convArgs.image.channels = input->dims()[1];
convArgs.image.height = input->dims()[2]; convArgs.image.height = input->dims()[2];
convArgs.image.width = input->dims()[3]; convArgs.image.width = input->dims()[3];
convArgs.image.pad_height = param->Paddings()[0]; convArgs.image.pad_height = param->Paddings()[0];
convArgs.image.pad_width = param->Paddings()[1]; convArgs.image.pad_width = param->Paddings()[1];
convArgs.image.scale_address = input->fpga_args().scale_pointer(); convArgs.image.scale_address = input->fpga_args().scale_pointer();
convArgs.output.address = reinterpret_cast<void *> out_ptr; convArgs.output.address = (void *)out_ptr;
convArgs.output.scale_address = out->fpga_args().scale_pointer(); convArgs.output.scale_address = out->fpga_args().scale_pointer();
param->SetFpgaArgs(convArgs); param->SetFpgaArgs(convArgs);
......
...@@ -920,7 +920,11 @@ class FusionFcParam : public OpParam { ...@@ -920,7 +920,11 @@ class FusionFcParam : public OpParam {
} }
const Tensor *InputX() const { return input_x_; } const Tensor *InputX() const { return input_x_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *InputY() const { return input_y_; }
#else
const Tensor *InputY() const { return input_y_; } const Tensor *InputY() const { return input_y_; }
#endif
const Tensor *InputZ() const { return input_z_; } const Tensor *InputZ() const { return input_z_; }
...@@ -976,7 +980,11 @@ class FusionConvAddParam : public OpParam { ...@@ -976,7 +980,11 @@ class FusionConvAddParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_; } Tensor *Output() const { return output_; }
...@@ -1050,7 +1058,11 @@ class FusionConvAddBNReluParam : public OpParam { ...@@ -1050,7 +1058,11 @@ class FusionConvAddBNReluParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_; } Tensor *Output() const { return output_; }
...@@ -1144,8 +1156,11 @@ class FusionConvAddBNParam : public OpParam { ...@@ -1144,8 +1156,11 @@ class FusionConvAddBNParam : public OpParam {
const Tensor *Input() const { return input_; } const Tensor *Input() const { return input_; }
#ifdef PADDLE_MOBILE_FPGA
Tensor *Filter() const { return filter_; }
#else
const Tensor *Filter() const { return filter_; } const Tensor *Filter() const { return filter_; }
#endif
Tensor *Output() const { return output_y_; } Tensor *Output() const { return output_y_; }
const vector<int> &Strides() const { return strides_; } const vector<int> &Strides() const { return strides_; }
......
...@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET) ...@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet paddle-mobile) target_link_libraries(test-mobilenet paddle-mobile)
# gen test
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile)
elseif ("yolo" IN_LIST NET) elseif ("yolo" IN_LIST NET)
# gen test # gen test
ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h)
...@@ -138,6 +143,10 @@ else () ...@@ -138,6 +143,10 @@ else ()
ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenetssd paddle-mobile) target_link_libraries(test-mobilenetssd paddle-mobile)
# gen test
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile) target_link_libraries(test-sigmoid paddle-mobile)
......
...@@ -44,5 +44,8 @@ int main() { ...@@ -44,5 +44,8 @@ int main() {
<< std::endl; << std::endl;
} }
std::cout
<< "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
<< std::endl;
return 0; return 0;
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
std::string(g_mobilenet_combined) + "/params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
// 预热一次
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
std::cout
<< "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
<< std::endl;
return 0;
}
...@@ -27,6 +27,7 @@ limitations under the License. */ ...@@ -27,6 +27,7 @@ limitations under the License. */
static const char *g_ocr = "../models/ocr"; static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_mobilenet_combined = "../models/mobilenet_combine";
static const char *g_squeezenet = "../models/squeezenet"; static const char *g_squeezenet = "../models/squeezenet";
static const char *g_googlenet = "../models/googlenet"; static const char *g_googlenet = "../models/googlenet";
static const char *g_mobilenet = "../models/mobilenet"; static const char *g_mobilenet = "../models/mobilenet";
......
...@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET) ...@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET)
set(ELEMENTWISEADD_OP ON) set(ELEMENTWISEADD_OP ON)
set(RELU_OP ON) set(RELU_OP ON)
set(SOFTMAX_OP ON) set(SOFTMAX_OP ON)
set(SOFTMAX_OP ON) set(MUL_OP ON)
set(DEPTHWISECONV_OP ON) set(DEPTHWISECONV_OP ON)
set(BATCHNORM_OP ON) set(BATCHNORM_OP ON)
set(POOL_OP ON) set(POOL_OP ON)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册