提交 38a6fbed 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #800 from xiebaiyuan/develop

dd preluop support genet && add unit test  close #799
......@@ -51,6 +51,8 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
const char *G_OP_TYPE_REGION = "region";
const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
const char *G_OP_TYPE_PRELU = "prelu";
std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......@@ -58,6 +60,7 @@ std::unordered_map<
{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
......@@ -87,6 +90,7 @@ std::unordered_map<
{G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}}};
{G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};
} // namespace paddle_mobile
......@@ -114,6 +114,8 @@ extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_FC_RELU;
extern const char *G_OP_TYPE_REGION;
extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU;
extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
......
......@@ -20,4 +20,13 @@ namespace paddle_mobile {
namespace operators {}
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#endif
......@@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
} // namespace operators
} // namespace paddle_mobile
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(conv2d_transpose);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(conv2d_transpose);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(conv2d_transpose);
#endif
#endif
......@@ -33,77 +33,36 @@ struct PReluFunctor {
* */
template <>
void PReluKernel<CPU, float>::Compute(const PReluParam &param) const {
const auto *input_x = param.InputX();
auto *input_x_ptr = input_x->data<float>();
auto *out = param.Out();
auto *out_ptr = out->mutable_data<float>();
if (param.Slopes().size() == 1) {
PReluFunctor<float> func_(param.Slopes()[0]);
math::Transform trans;
trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
} else if (param.Slopes().size() > 1) {
const int dim_size = input_x->dims().size();
switch (dim_size) {
case 0:
break;
case 1: {
const int input_width = input_x->dims()[0];
math::Transform trans;
#pragma omp parallel for
for (int w = 0; w < input_width; ++w) {
out_ptr[w] = input_x_ptr[w] * param.Slopes()[w];
}
} break;
case 2: {
const int input_height = input_x->dims()[0];
const int input_width = input_x->dims()[1];
math::Transform trans;
#pragma omp parallel for
for (int h = 0; h < input_height; ++h) {
PReluFunctor<float> func_(param.Slopes()[h]);
const float *ptr = input_x_ptr + h * input_width;
float *optr = out_ptr + +h * input_width;
trans(ptr, ptr + input_width, optr, func_);
}
} break;
case 3: {
const int chan_size = input_x->dims()[0];
const int input_height = input_x->dims()[1];
const int input_width = input_x->dims()[2];
DLOG << "PReluKernel :Compute";
math::Transform trans;
#pragma omp parallel for
for (int c = 0; c < chan_size; ++c) {
PReluFunctor<float> func_(param.Slopes()[c]);
int size = input_height * input_width;
const float *ptr = input_x_ptr + c * size;
float *optr = out_ptr + c * size;
trans(ptr, ptr + size, optr, func_);
}
} break;
case 4:
default: {
const int batch_size = input_x->dims()[0];
const int chan_size = input_x->dims()[1];
const int input_height = input_x->dims()[2];
const int input_width = input_x->dims()[3];
math::Transform trans;
#pragma omp parallel for
for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < chan_size; ++c) {
PReluFunctor<float> func_(param.Slopes()[c]);
int size = input_height * input_width;
const float *ptr = input_x_ptr + b * c * size;
float *optr = out_ptr + +b * c * size;
trans(ptr, ptr + size, optr, func_);
}
}
} // case 3,default
break;
auto *x = param.InputX();
auto *alpha = param.InputAlpha();
auto *out = param.Out();
std::string mode = param.Mode();
const auto *x_ptr = x->data<float>();
auto *o_ptr = out->mutable_data<float>();
const auto *alpha_ptr = alpha->data<float>();
int numel = x->numel();
auto dim = x->dims();
int index = 0;
int i = 0;
int temp = 0;
if (mode == "channel") {
#pragma omp parallel for
for (i = 0; i < numel; i++) {
temp = numel / (dim[0] * dim[1]);
index = (i / temp) % dim[1];
o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
}
} else if (mode == "element") {
#pragma omp parallel for
for (i = 0; i < numel; i++) {
o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i];
}
} else {
#pragma omp parallel for
for (i = 0; i < numel; i++) {
o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i];
}
}
}
......
......@@ -39,6 +39,11 @@ using std::vector;
class OpParam {
protected:
template <typename T>
static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Alpha", inputs, scope);
}
template <typename T>
static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Input", inputs, scope);
......@@ -895,19 +900,24 @@ class PReluParam : public OpParam {
public:
PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
DLOG << "PReluParam inputs before";
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
alpha_ = InputAlphaFrom<LoDTensor>(inputs, scope);
framework::DDim dims = alpha_->dims();
out_ = OutFrom<LoDTensor>(outputs, scope);
slopes_ = GetAttr<vector<float>>("slopes", attrs);
mode_ = GetAttr<std::string>("mode", attrs);
DLOG << "PReluParam mode after" << mode_;
}
const Tensor *InputX() const { return input_x_; }
const Tensor *InputAlpha() const { return alpha_; }
Tensor *Out() const { return out_; }
const vector<float> &Slopes() const { return slopes_; }
const std::string &Mode() const { return mode_; }
private:
Tensor *input_x_;
Tensor *out_;
vector<float> slopes_;
Tensor *alpha_;
std::string mode_;
};
#endif
......
......@@ -159,6 +159,10 @@ else ()
ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-mobilenet-combine paddle-mobile)
# gen test
ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-genet paddle-mobile)
# gen test
ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h)
target_link_libraries(test-sigmoid paddle-mobile)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4);
auto time1 = time();
if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
std::string(g_genet_combine) + "/params", true)) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 128, 128};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
// 预热一次
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
std::cout
<< "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
<< std::endl;
return 0;
}
......@@ -26,6 +26,7 @@ limitations under the License. */
static const char *g_ocr = "../models/ocr";
static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
static const char *g_genet_combine = "../models/enet";
static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
static const char *g_mobilenet_combined = "../models/mobilenet_combine";
static const char *g_mobilenet_detect = "../models/mobilenet-detect";
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册