提交 ddfa00ac 编写于 作者: W wangliu

move arm op kernels to central_arm_func

上级 6d5281de
......@@ -3,7 +3,6 @@
#### 以下是 paddle-mobile 代码的执行流程图:
![执行流程图](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305189473720.png)
......@@ -15,7 +14,6 @@
先来看一下模型, 模型分为两种结构:
一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件
![模型描述](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305190629577.png)
......@@ -23,6 +21,7 @@
![模型描述combined](http://otkwwi4x8.bkt.clouddn.com/2018-07-02-15305191057130.png)
loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu).
方便进行算法优化.
......
......@@ -23,7 +23,17 @@ namespace framework {
class Scope {
public:
Scope() = default;
~Scope() = default;
~Scope() {
for (auto &var : vars_) {
delete var.second;
}
vars_.clear();
for (auto kid : kids_) {
delete kid;
}
kids_.clear();
}
Scope &NewScope() const;
......
......@@ -54,13 +54,14 @@ string jstring2cppstring(JNIEnv *env, jstring jstr) {
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
jclass thiz,
jstring modelPath) {
ANDROIDLOGI("load invoked");
bool optimize = true;
return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
optimize);
}
JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
JNIEnv *env, jclass thiz, jfloatArray buf) {
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_PML_predict(JNIEnv *env, jclass thiz, jfloatArray buf) {
jfloatArray result = NULL;
int count = 0;
float *dataPointer = nullptr;
......@@ -78,6 +79,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
count = output->numel();
result = env->NewFloatArray(count);
env->SetFloatArrayRegion(result, 0, count, output->data<float>());
ANDROIDLOGI("predict finished");
return result;
}
......
......@@ -31,8 +31,8 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
/**
* object detection for anroid
*/
JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
JNIEnv *env, jclass thiz, jfloatArray buf);
JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_PML_predict(JNIEnv *env, jclass thiz, jfloatArray buf);
/**
* clear data of the net when destroy for android
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
#include "../central-arm-func/conv_add_arm_func.h"
namespace paddle_mobile {
namespace operators {
......@@ -23,111 +24,9 @@ bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam *param) {
return true;
}
void ConvAddBasic(const FusionConvAddParam &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
int axis = param.Axis();
Tensor *output = param.Output();
math::expand_bias(bias, axis, output->dims());
output->ShareDataWith(bias);
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(1));
}
}
}
template <>
void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam &param) const {
if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
param.Bias(), true);
} else if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3) {
math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
param.Filter(), param.Bias(), param.Output(), true);
} else {
ConvAddBasic(param);
}
ConvAddCompute<float>(param);
}
template class ConvAddKernel<CPU, float>;
......
......@@ -14,27 +14,11 @@ limitations under the License. */
#ifdef POOL_OP
#include <operators/kernel/pool_kernel.h>
#include "common/log.h"
#include "operators/kernel/pool_kernel.h"
#include "../central-arm-func/pool_arm_func.h"
namespace paddle_mobile {
namespace operators {
inline void PoolBasic(std::string pooling_type, std::vector<int> ksize,
std::vector<int> strides, std::vector<int> paddings,
const Tensor *in_x, Tensor *out) {
if (pooling_type == "max") {
math::PoolFunctor<CPU, math::MaxPool<float>, float> pool2d_forward;
math::MaxPool<float> pool_process;
pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
} else if (pooling_type == "avg") {
math::PoolFunctor<CPU, math::AvgPool<float>, float> pool2d_forward;
math::AvgPool<float> pool_process;
pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
}
}
template <>
bool PoolKernel<CPU, float>::Init(PoolParam *param) {
return true;
......@@ -42,54 +26,7 @@ bool PoolKernel<CPU, float>::Init(PoolParam *param) {
template <>
void PoolKernel<CPU, float>::Compute(const PoolParam &param) const {
const Tensor *in_x = param.Input();
Tensor *out = param.Output();
std::string pooling_type = param.PoolingType();
std::vector<int> ksize = param.Ksize();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
if (ksize.size() != 2) {
LOG(paddle_mobile::LogLevel::kLOG_ERROR)
<< "Pool op only supports 2D and 3D input.";
}
if (param.isGlobalPooling()) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
}
} else if (ksize[0] == 3 && ksize[0] == ksize[1]) {
if (pooling_type == "max") {
if (strides[0] == strides[1] && strides[0] == 1 &&
paddings[0] == paddings[1] && paddings[1] == 1) {
math::Pool3x3Maxs1p1(in_x, out);
} else {
math::Pool3x3Max(strides, paddings, in_x, out);
}
math::Pool3x3Max(strides, paddings, in_x, out);
} else if (pooling_type == "avg") {
if (strides[0] == strides[1] && strides[0] == 1 &&
paddings[0] == paddings[1] && paddings[1] == 1) {
math::Pool3x3Avgs1p1(in_x, out);
} else {
math::Pool3x3Avg(strides, paddings, in_x, out);
}
math::Pool3x3Avg(strides, paddings, in_x, out);
}
} else if (ksize[0] == 2 && ksize[0] == ksize[1]) {
if (pooling_type == "max") {
math::Pool2x2Max(strides, paddings, in_x, out);
} else if (pooling_type == "avg") {
math::Pool2x2Avg(strides, paddings, in_x, out);
}
} else {
PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
}
PoolCompute<float>(param);
}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef SIGMOID_OP
#include "../sigmoid_kernel.h"
#include "../central-arm-func/sigmoid_arm_func.h"
#if __ARM_NEON
#include "../../math/math_func_neon.h"
#endif
......@@ -25,52 +26,6 @@ namespace operators {
using framework::DDim;
using framework::Tensor;
void sigmoid(const Tensor *X, Tensor *Y) {
#if __ARM_NEON
const float *input = X->data<float>();
float *output = Y->mutable_data<float>();
const DDim &dDim = X->dims();
int axis_index = 1;
if (dDim.size() < 4) {
axis_index = 0;
}
DDim outer_ddim =
paddle_mobile::framework::slice_ddim(dDim, 0, axis_index + 1);
DDim inner_ddim =
paddle_mobile::framework::slice_ddim(dDim, axis_index + 1, dDim.size());
int out_size = paddle_mobile::framework::product(outer_ddim);
int inner_size = paddle_mobile::framework::product(inner_ddim);
DLOG << "outsize=" << out_size;
DLOG << "innersize=" << inner_size;
#pragma omp parallel for
for (int i = 0; i < out_size; ++i) {
const float *input_outer_ptr = input + i * inner_size;
float *output_outer_ptr = output + i * inner_size;
int nn = inner_size >> 2;
int remain = inner_size - (nn << 2);
float32x4_t _one = vdupq_n_f32(1.f);
for (; nn > 0; nn--) {
float32x4_t data = vld1q_f32(input_outer_ptr);
data = vnegq_f32(data);
data = exp_ps(data);
data = vaddq_f32(data, _one);
float32x4_t out_data = vrecpeq_f32(data);
out_data = vmulq_f32(vrecpsq_f32(data, out_data), out_data);
vst1q_f32(output_outer_ptr, out_data);
input_outer_ptr += 4;
output_outer_ptr += 4;
}
for (; remain > 0; remain--) {
*output_outer_ptr = 1.f / (1.f + exp(-*input_outer_ptr));
output_outer_ptr++;
input_outer_ptr++;
}
}
#endif
}
template <>
bool SigmoidKernel<CPU, float>::Init(SigmoidParam *param) {
return true;
......@@ -78,11 +33,7 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam *param) {
template <>
void SigmoidKernel<CPU, float>::Compute(const SigmoidParam &param) const {
const Tensor *in_x = param.InputX();
Tensor *out = param.Out();
auto x_dims = in_x->dims();
out->Resize(x_dims);
sigmoid(in_x, out);
SigmoidCompute<float>(param);
}
template class SigmoidKernel<CPU, float>;
......
......@@ -15,7 +15,8 @@ limitations under the License. */
#ifdef SOFTMAX_OP
#include "../softmax_kernel.h"
#include "../../math/softmax.h"
#include "../central-arm-func/softmax_arm_func.h"
#include "operators/math/softmax.h"
namespace paddle_mobile {
namespace operators {
......@@ -26,11 +27,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam *param) {
template <>
void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam &param) const {
const Tensor *in_x = param.InputX();
Tensor *out = param.Out();
auto x_dims = in_x->dims();
out->Resize(x_dims);
math::SoftmaxFuntor<CPU, float>()(in_x, out);
SoftmaxCompute<float>(param);
}
template class SoftmaxKernel<CPU, float>;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#pragma once
#include <vector>
#include "operators/math/conv_func.h"
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
void ConvAddBasic(const FusionConvAddParam &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
int axis = param.Axis();
Tensor *output = param.Output();
math::expand_bias(bias, axis, output->dims());
output->ShareDataWith(bias);
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations();
const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
std::vector<int64_t> output_shape_vec(framework::vectorize(output->dims()));
size_t data_dim = filter_shape_vec.size() - 2;
std::vector<int64_t> col_shape_vec(1 + 2 * data_dim);
col_shape_vec[0] = input->dims()[1] / groups;
for (size_t j = 0; j < data_dim; ++j) {
col_shape_vec[j + 1] = filter_shape_vec[j + 2];
col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2];
}
framework::DDim col_shape(framework::make_ddim(col_shape_vec));
framework::DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, data_dim + 1);
bool is_expand =
math::IsExpand(filter_shape_vec, strides, paddings, dilations);
Tensor col;
Tensor col_matrix;
if (is_expand) {
col.mutable_data<float>(col_shape);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
}
framework::DDim input_shape = framework::slice_ddim(
input->dims(), 1, static_cast<int>(input->dims().size()));
framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape);
framework::DDim output_matrix_shape = {
output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])};
// convolution operator: im2col(or vol2col) + gemm
int in_step = static_cast<int>(input->dims()[1]) / groups;
int out_step = static_cast<int>(output->dims()[1]) / groups;
math::Vol2ColFunctor<CPU, float> vol2col;
math::Im2ColFunctor<math::ColFormat::kCFO, CPU, float> im2col;
for (int i = 0; i < batch_size; i++) {
Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
for (int g = 0; g < groups; g++) {
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
if (!is_expand) {
col.ShareDataWith(in_slice);
col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape);
} else if (data_dim == 2U) {
// im2col
im2col(in_slice, dilations, strides,
std::vector<int>{paddings[0], paddings[1], paddings[0],
paddings[1]},
&col);
} else if (data_dim == 3U) {
// vol2col
vol2col(in_slice, dilations, strides, paddings, &col);
}
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice,
static_cast<float>(1));
}
}
}
template <typename P>
void ConvAddCompute(const FusionConvAddParam &param) {
if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
param.Bias(), true);
} else if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3) {
math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
param.Filter(), param.Bias(), param.Output(), true);
} else {
ConvAddBasic(param);
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -15,19 +15,21 @@ limitations under the License. */
#ifdef CONV_OP
#pragma once
#include <operators/math/depthwise_conv_3x3.h>
#include <vector>
#include "operators/math/conv_func.h"
#include "operators/math/depthwise_conv_3x3.h"
#include "operators/math/im2col.h"
#include "operators/math/math_function.h"
#include "operators/math/vol2col.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
inline void ConvBasic(const ConvParam &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor *output = param.Output();
output->mutable_data<float>();
int groups = param.Groups();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
......@@ -111,20 +113,18 @@ inline void ConvBasic(const ConvParam &param) {
template <typename P>
void ConvCompute(const ConvParam &param) {
Tensor Bias;
Bias.mutable_data<float>({param.Groups()});
if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
&Bias, false);
nullptr, false);
} else if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 2) {
param.Filter()->dims()[2] == 3) {
math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
param.Filter(), &Bias, param.Output(), false);
param.Filter(), nullptr, param.Output(), false);
} else {
ConvBasic(param);
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#pragma once
#include <string>
#include <vector>
#include "operators/math/pooling.h"
namespace paddle_mobile {
namespace operators {
using framework::Tensor;
inline void PoolBasic(std::string pooling_type, std::vector<int> ksize,
std::vector<int> strides, std::vector<int> paddings,
const Tensor *in_x, Tensor *out) {
if (pooling_type == "max") {
math::PoolFunctor<CPU, math::MaxPool<float>, float> pool2d_forward;
math::MaxPool<float> pool_process;
pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
} else if (pooling_type == "avg") {
math::PoolFunctor<CPU, math::AvgPool<float>, float> pool2d_forward;
math::AvgPool<float> pool_process;
pool2d_forward(*in_x, ksize, strides, paddings, pool_process, out);
}
}
template <typename P>
void PoolCompute(const PoolParam &param) {
const Tensor *in_x = param.Input();
Tensor *out = param.Output();
std::string pooling_type = param.PoolingType();
std::vector<int> ksize = param.Ksize();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
if (ksize.size() != 2) {
LOG(paddle_mobile::LogLevel::kLOG_ERROR)
<< "Pool op only supports 2D and 3D input.";
}
if (param.isGlobalPooling()) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
}
} else if (ksize[0] == 3 && ksize[0] == ksize[1]) {
if (pooling_type == "max") {
if (strides[0] == strides[1] && strides[0] == 1 &&
paddings[0] == paddings[1] && paddings[1] == 1) {
math::Pool3x3Maxs1p1(in_x, out);
} else {
math::Pool3x3Max(strides, paddings, in_x, out);
}
} else if (pooling_type == "avg") {
if (strides[0] == strides[1] && strides[0] == 1 &&
paddings[0] == paddings[1] && paddings[1] == 1) {
math::Pool3x3Avgs1p1(in_x, out);
} else {
math::Pool3x3Avg(strides, paddings, in_x, out);
}
}
} else if (ksize[0] == 2 && ksize[0] == ksize[1]) {
if (pooling_type == "max") {
math::Pool2x2Max(strides, paddings, in_x, out);
} else if (pooling_type == "avg") {
math::Pool2x2Avg(strides, paddings, in_x, out);
}
} else {
PoolBasic(pooling_type, ksize, strides, paddings, in_x, out);
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SIGMOID_OP
#pragma once
#include "operators/op_param.h"
#if __ARM_NEON
#include <arm_neon.h>
#include "operators/math/math_func_neon.h"
#endif
namespace paddle_mobile {
namespace operators {
using framework::DDim;
void sigmoid(const Tensor *X, Tensor *Y) {
#if __ARM_NEON
const float *input = X->data<float>();
float *output = Y->mutable_data<float>();
const DDim &dDim = X->dims();
int axis_index = 1;
if (dDim.size() < 4) {
axis_index = 0;
}
DDim outer_ddim =
paddle_mobile::framework::slice_ddim(dDim, 0, axis_index + 1);
DDim inner_ddim =
paddle_mobile::framework::slice_ddim(dDim, axis_index + 1, dDim.size());
int out_size = paddle_mobile::framework::product(outer_ddim);
int inner_size = paddle_mobile::framework::product(inner_ddim);
DLOG << "outsize=" << out_size;
DLOG << "innersize=" << inner_size;
#pragma omp parallel for
for (int i = 0; i < out_size; ++i) {
const float *input_outer_ptr = input + i * inner_size;
float *output_outer_ptr = output + i * inner_size;
int nn = inner_size >> 2;
int remain = inner_size - (nn << 2);
float32x4_t _one = vdupq_n_f32(1.f);
for (; nn > 0; nn--) {
float32x4_t data = vld1q_f32(input_outer_ptr);
data = vnegq_f32(data);
data = exp_ps(data);
data = vaddq_f32(data, _one);
float32x4_t out_data = vrecpeq_f32(data);
out_data = vmulq_f32(vrecpsq_f32(data, out_data), out_data);
vst1q_f32(output_outer_ptr, out_data);
input_outer_ptr += 4;
output_outer_ptr += 4;
}
for (; remain > 0; remain--) {
*output_outer_ptr = 1.f / (1.f + exp(-*input_outer_ptr));
output_outer_ptr++;
input_outer_ptr++;
}
}
#endif
}
template <typename P>
void SigmoidCompute(const SigmoidParam &param) {
const Tensor *in_x = param.InputX();
Tensor *out = param.Out();
auto x_dims = in_x->dims();
out->Resize(x_dims);
sigmoid(in_x, out);
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SOFTMAX_OP
#pragma once
#include "../../math/softmax.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void SoftmaxCompute(const SoftmaxParam &param) {
const Tensor *in_x = param.InputX();
Tensor *out = param.Out();
auto x_dims = in_x->dims();
out->Resize(x_dims);
math::SoftmaxFuntor<CPU, float>()(in_x, out);
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -17,7 +17,6 @@ limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/math/pooling.h"
#include "operators/op_param.h"
namespace paddle_mobile {
......
......@@ -23,8 +23,6 @@ namespace paddle_mobile {
namespace operators {
using framework::OpKernelBase;
void simoid(Tensor *X, Tensor *Y);
template <typename DeviceType, typename T>
class SoftmaxKernel : public OpKernelBase<DeviceType, SoftmaxParam> {
public:
......
......@@ -245,7 +245,10 @@ void DepthwiseConv3x3s1p1(const Tensor *input, const Tensor *filter,
const float *input_data = input->data<float>();
const float *filter_data = filter->data<float>();
float *output_data = output->data<float>();
const float *bias_data = bias->data<float>();
const float *bias_data;
if (if_bias) {
bias_data = bias->data<float>();
}
const int h = static_cast<int>(input->dims()[2]);
const int w = static_cast<int>(input->dims()[3]);
......
......@@ -13,9 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#include "operators/math/pool_3x3.h"
#include <climits>
#include "pool_3x3.h"
#include "framework/tensor.h"
#if __ARM_NEON
#include <arm_neon.h>
#endif // __ARM_NEON
#include <climits>
namespace paddle_mobile {
namespace operators {
namespace math {
......
......@@ -195,8 +195,7 @@ class OpParam {
class ConvParam : OpParam {
public:
ConvParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
const AttributeMap &attrs, const Scope &scope) {
filter_ = FilterFrom<LoDTensor>(inputs, scope);
input_ = InputFrom<LoDTensor>(inputs, scope);
output_ = OutputFrom<LoDTensor>(outputs, scope);
......@@ -237,12 +236,11 @@ Print &operator<<(Print &printer, const ConvParam &conv_param);
class ElementwiseAddParam : OpParam {
public:
ElementwiseAddParam(const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
input_y_ = InputYFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
const VariableNameMap &outputs, const AttributeMap &attrs,
const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
input_y_ = InputYFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
......@@ -267,11 +265,10 @@ class ElementwiseAddParam : OpParam {
class MulParam : OpParam {
public:
MulParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
input_y_ = InputYFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
input_y_ = InputYFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
x_num_col_dims_ = GetAttr<int>("x_num_col_dims", attrs);
y_num_col_dims_ = GetAttr<int>("y_num_col_dims", attrs);
}
......@@ -299,10 +296,9 @@ class MulParam : OpParam {
class ConcatParam : public OpParam {
public:
ConcatParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
const AttributeMap &attrs, const Scope &scope) {
inputs_ = InputMultiFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
axis_ = GetAttr<int>("axis", attrs);
}
......@@ -323,11 +319,10 @@ class ConcatParam : public OpParam {
class LrnParam : public OpParam {
public:
LrnParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
mid_out_ = MidOutFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
mid_out_ = MidOutFrom<LoDTensor>(outputs, scope);
n_ = GetAttr<int>("n", attrs);
alpha_ = GetAttr<float>("alpha", attrs);
beta_ = GetAttr<float>("beta", attrs);
......@@ -367,14 +362,13 @@ class LrnParam : public OpParam {
class BatchNormParam : OpParam {
public:
BatchNormParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
output_y_ = OutputYFrom<framework::LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<framework::LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<framework::LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<framework::LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<framework::LoDTensor>(inputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
output_y_ = OutputYFrom<LoDTensor>(outputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
is_test_ = GetAttr<bool>("is_test", attrs);
......@@ -418,11 +412,10 @@ class BatchNormParam : OpParam {
class PoolParam : public OpParam {
public:
PoolParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_ = InputXFrom<framework::LoDTensor>(inputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_ = InputXFrom<LoDTensor>(inputs, scope);
output_ = OutFrom<framework::LoDTensor>(outputs, scope);
output_ = OutFrom<LoDTensor>(outputs, scope);
pooling_type_ = GetAttr<string>("pooling_type", attrs);
ksize_ = GetAttr<vector<int>>("ksize", attrs);
strides_ = GetAttr<vector<int>>("strides", attrs);
......@@ -464,13 +457,11 @@ class PoolParam : public OpParam {
class PriorBoxParam : public OpParam {
public:
PriorBoxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_ = InputFrom<framework::LoDTensor>(inputs, scope);
input_image_ = InputImageFrom<framework::LoDTensor>(inputs, scope);
output_boxes_ = OutputBoxesFrom<framework::LoDTensor>(outputs, scope);
output_variances_ =
OutputVariancesFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_ = InputFrom<LoDTensor>(inputs, scope);
input_image_ = InputImageFrom<LoDTensor>(inputs, scope);
output_boxes_ = OutputBoxesFrom<LoDTensor>(outputs, scope);
output_variances_ = OutputVariancesFrom<LoDTensor>(outputs, scope);
min_sizes_ = GetAttr<vector<float>>("min_sizes", attrs);
max_sizes_ = GetAttr<vector<float>>("max_sizes", attrs);
aspect_ratios_ = GetAttr<vector<float>>("aspect_ratios", attrs);
......@@ -528,13 +519,11 @@ class PriorBoxParam : public OpParam {
class BoxCoderParam : public OpParam {
public:
BoxCoderParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_priorbox_ = InputPriorBoxFrom<framework::LoDTensor>(inputs, scope);
input_priorboxvar_ =
InputPriorBoxVarFrom<framework::LoDTensor>(inputs, scope);
input_targetbox_ = InputTargetBoxFrom<framework::LoDTensor>(inputs, scope);
output_box_ = OutputBoxFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_priorbox_ = InputPriorBoxFrom<LoDTensor>(inputs, scope);
input_priorboxvar_ = InputPriorBoxVarFrom<LoDTensor>(inputs, scope);
input_targetbox_ = InputTargetBoxFrom<LoDTensor>(inputs, scope);
output_box_ = OutputBoxFrom<LoDTensor>(outputs, scope);
code_type_ = GetAttr<std::string>("code_type", attrs);
}
const Tensor *InputPriorBox() const { return input_priorbox_; }
......@@ -560,10 +549,9 @@ class BoxCoderParam : public OpParam {
class SoftmaxParam : public OpParam {
public:
SoftmaxParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
}
const Tensor *InputX() const { return input_x_; }
Tensor *Out() const { return out_; }
......@@ -578,10 +566,9 @@ class SoftmaxParam : public OpParam {
class SigmoidParam : public OpParam {
public:
SigmoidParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
}
const Tensor *InputX() const { return input_x_; }
Tensor *Out() const { return out_; }
......@@ -643,9 +630,9 @@ class MultiClassNMSParam : public OpParam {
class FeedParam : public OpParam {
public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs, framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
auto var = scope.Var("batch_size");
batch_size = var->GetValue<int>();
}
......@@ -662,10 +649,9 @@ class FeedParam : public OpParam {
class FetchParam : public OpParam {
public:
FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
const framework::Scope &scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, scope);
out_ = OutFrom<framework::LoDTensor>(outputs, scope);
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<LoDTensor>(outputs, scope);
}
const Tensor *InputX() const { return input_x_; }
Tensor *Out() const { return out_; }
......@@ -863,10 +849,10 @@ class FusionConvAddBNReluParam : public OpParam {
paddings_ = GetAttr<vector<int>>("paddings", attrs);
dilations_ = GetAttr<vector<int>>("dilations", attrs);
groups = GetAttr<int>("groups", attrs);
input_bias_ = InputBiasFrom<framework::LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<framework::LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<framework::LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<framework::LoDTensor>(inputs, scope);
input_bias_ = InputBiasFrom<LoDTensor>(inputs, scope);
input_mean_ = InputMeanFrom<LoDTensor>(inputs, scope);
input_scale_ = InputScaleFrom<LoDTensor>(inputs, scope);
input_variance_ = InputVarianceFrom<LoDTensor>(inputs, scope);
epsilon_ = GetAttr<float>("epsilon", attrs);
momentum_ = GetAttr<float>("momentum", attrs);
is_test_ = GetAttr<bool>("is_test", attrs);
......
......@@ -17,25 +17,25 @@ limitations under the License. */
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile::Loader<paddle_mobile::CPU> loader;
bool optimize = true;
auto time1 = time();
// auto program = loader.Load(g_googlenet, optimize);
if (paddle_mobile.Load(g_googlenet_combine + "/model",
g_googlenet_combine + "/params", optimize)) {
auto program = loader.Load(g_googlenet_combine + "/model",
g_googlenet_combine + "/params", optimize);
auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time2) << "ms\n";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, optimize);
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims);
auto time3 = time();
for (int i = 0; i < 10; ++i) {
paddle_mobile.Predict(input, dims);
executor.Predict(input, dims);
}
auto time4 = time();
DLOG << "predict cost :" << time_diff(time3, time4) << "ms\n";
}
return 0;
}
......@@ -32,8 +32,8 @@ build_for_mac() {
build_for_android() {
#rm -rf "../build"
if [ -z "${ANDROID_NDK}" ]; then
echo "ANDROID_NDK not found!"
if [ -z "${NDK_ROOT}" ]; then
echo "NDK_ROOT not found!"
exit -1
fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册