提交 5737bb9c 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'origin/develop' into develop

...@@ -51,6 +51,8 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU = ...@@ -51,6 +51,8 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu"; const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
const char *G_OP_TYPE_REGION = "region"; const char *G_OP_TYPE_REGION = "region";
const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn"; const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
const char *G_OP_TYPE_PRELU = "prelu";
std::unordered_map< std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
...@@ -58,6 +60,7 @@ std::unordered_map< ...@@ -58,6 +60,7 @@ std::unordered_map<
{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}}, {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}}, {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}}, {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}}, {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
...@@ -87,6 +90,7 @@ std::unordered_map< ...@@ -87,6 +90,7 @@ std::unordered_map<
{G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}}, {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}}, {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_REGION, {{"X"}, {"Out"}}}, {G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}}; {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -114,6 +114,8 @@ extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU; ...@@ -114,6 +114,8 @@ extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_FC_RELU; extern const char *G_OP_TYPE_FUSION_FC_RELU;
extern const char *G_OP_TYPE_REGION; extern const char *G_OP_TYPE_REGION;
extern const char *G_OP_TYPE_FUSION_CONV_BN; extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU;
extern std::unordered_map< extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>> std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
...@@ -20,4 +20,13 @@ namespace paddle_mobile { ...@@ -20,4 +20,13 @@ namespace paddle_mobile {
namespace operators {} namespace operators {}
} // namespace paddle_mobile } // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif #endif
...@@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel< ...@@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(conv2d_transpose);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(conv2d_transpose);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(conv2d_transpose);
#endif
#endif #endif
...@@ -33,77 +33,34 @@ struct PReluFunctor { ...@@ -33,77 +33,34 @@ struct PReluFunctor {
* */ * */
template <> template <>
void PReluKernel<CPU, float>::Compute(const PReluParam &param) const { void PReluKernel<CPU, float>::Compute(const PReluParam &param) const {
const auto *input_x = param.InputX(); auto *x = param.InputX();
auto *input_x_ptr = input_x->data<float>(); auto *alpha = param.InputAlpha();
auto *out = param.Out(); auto *out = param.Out();
auto *out_ptr = out->mutable_data<float>(); std::string mode = param.Mode();
const auto *x_ptr = x->data<float>();
if (param.Slopes().size() == 1) { auto *o_ptr = out->mutable_data<float>();
PReluFunctor<float> func_(param.Slopes()[0]); const auto *alpha_ptr = alpha->data<float>();
math::Transform trans; int numel = x->numel();
trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_); auto dim = x->dims();
} else if (param.Slopes().size() > 1) { int index = 0;
const int dim_size = input_x->dims().size(); int i = 0;
switch (dim_size) { int temp = 0;
case 0: if (mode == "channel") {
break; temp = numel / (dim[0] * dim[1]);
case 1: { #pragma omp parallel for
const int input_width = input_x->dims()[0]; for (i = 0; i < numel; i++) {
math::Transform trans; index = (i / temp) % dim[1];
o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
#pragma omp parallel for }
for (int w = 0; w < input_width; ++w) { } else if (mode == "element") {
out_ptr[w] = input_x_ptr[w] * param.Slopes()[w]; #pragma omp parallel for
} for (i = 0; i < numel; i++) {
} break; o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i];
case 2: { }
const int input_height = input_x->dims()[0]; } else {
const int input_width = input_x->dims()[1]; #pragma omp parallel for
for (i = 0; i < numel; i++) {
math::Transform trans; o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i];
#pragma omp parallel for
for (int h = 0; h < input_height; ++h) {
PReluFunctor<float> func_(param.Slopes()[h]);
const float *ptr = input_x_ptr + h * input_width;
float *optr = out_ptr + +h * input_width;
trans(ptr, ptr + input_width, optr, func_);
}
} break;
case 3: {
const int chan_size = input_x->dims()[0];
const int input_height = input_x->dims()[1];
const int input_width = input_x->dims()[2];
math::Transform trans;
#pragma omp parallel for
for (int c = 0; c < chan_size; ++c) {
PReluFunctor<float> func_(param.Slopes()[c]);
int size = input_height * input_width;
const float *ptr = input_x_ptr + c * size;
float *optr = out_ptr + c * size;
trans(ptr, ptr + size, optr, func_);
}
} break;
case 4:
default: {
const int batch_size = input_x->dims()[0];
const int chan_size = input_x->dims()[1];
const int input_height = input_x->dims()[2];
const int input_width = input_x->dims()[3];
math::Transform trans;
#pragma omp parallel for
for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < chan_size; ++c) {
PReluFunctor<float> func_(param.Slopes()[c]);
int size = input_height * input_width;
const float *ptr = input_x_ptr + b * c * size;
float *optr = out_ptr + +b * c * size;
trans(ptr, ptr + size, optr, func_);
}
}
} // case 3,default
break;
} }
} }
} }
......
...@@ -39,6 +39,11 @@ using std::vector; ...@@ -39,6 +39,11 @@ using std::vector;
class OpParam { class OpParam {
protected: protected:
template <typename T>
static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Alpha", inputs, scope);
}
template <typename T> template <typename T>
static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) { static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Input", inputs, scope); return GetVarValue<T>("Input", inputs, scope);
...@@ -895,19 +900,24 @@ class PReluParam : public OpParam { ...@@ -895,19 +900,24 @@ class PReluParam : public OpParam {
public: public:
PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope) {
DLOG << "PReluParam inputs before";
input_x_ = InputXFrom<LoDTensor>(inputs, scope); input_x_ = InputXFrom<LoDTensor>(inputs, scope);
alpha_ = InputAlphaFrom<LoDTensor>(inputs, scope);
framework::DDim dims = alpha_->dims();
out_ = OutFrom<LoDTensor>(outputs, scope); out_ = OutFrom<LoDTensor>(outputs, scope);
slopes_ = GetAttr<vector<float>>("slopes", attrs); mode_ = GetAttr<std::string>("mode", attrs);
DLOG << "PReluParam mode after" << mode_;
} }
const Tensor *InputX() const { return input_x_; } const Tensor *InputX() const { return input_x_; }
const Tensor *InputAlpha() const { return alpha_; }
Tensor *Out() const { return out_; } Tensor *Out() const { return out_; }
const vector<float> &Slopes() const { return slopes_; } const std::string &Mode() const { return mode_; }
private: private:
Tensor *input_x_; Tensor *input_x_;
Tensor *out_; Tensor *out_;
vector<float> slopes_; Tensor *alpha_;
std::string mode_;
}; };
#endif #endif
......
...@@ -159,6 +159,10 @@ else () ...@@ -159,6 +159,10 @@ else ()
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile) target_link_libraries(test-mobilenet-combine paddle-mobile)
# gen test
ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-genet paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile) target_link_libraries(test-sigmoid paddle-mobile)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
std::string(g_genet_combine) + "/params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 128, 128};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
// 预热一次
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
std::cout
<< "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
<< std::endl;
return 0;
}
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
static const char *g_ocr = "../models/ocr"; static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_genet_combine = "../models/enet";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_mobilenet_combined = "../models/mobilenet_combine"; static const char *g_mobilenet_combined = "../models/mobilenet_combine";
static const char *g_mobilenet_detect = "../models/mobilenet-detect"; static const char *g_mobilenet_detect = "../models/mobilenet-detect";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册