提交 8162ba9a 编写于 作者: E eclipsess

code style

上级 e89a45d7
......@@ -65,17 +65,17 @@ void BatchNormKernel<CPU, float>::Compute(const BatchNormParam &param) const {
/// ((x - est_mean) * (inv_var) * scale + bias equal to
/// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{
for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) {
int tmp_index = n * stride0 + i * stride1 + h * stride2;
for (int w = 0; w < W; w++) {
int index = tmp_index + w;
out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
{
for (int n = 0; n < N; n++) {
for (int h = 0; h < H; h++) {
int tmp_index = n * stride0 + i * stride1 + h * stride2;
for (int w = 0; w < W; w++) {
int index = tmp_index + w;
out_ptr[index] =
input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i];
}
}
}
......
......@@ -84,36 +84,36 @@ void StridedNumelCopyWithAxis(int64_t axis, T *dst,
}
}
template <>
void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
auto inputs = param.Inputs();
auto *out = param.Out();
int64_t axis = param.Axis();
out->mutable_data<float>();
template <>
void ConcatKernel<CPU, float>::Compute(const ConcatParam &param) const {
auto inputs = param.Inputs();
auto *out = param.Out();
int64_t axis = param.Axis();
out->mutable_data<float>();
/// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && inputs.size() < 10) {
size_t output_offset = 0;
for (auto *in : inputs) {
auto in_stride = framework::stride_numel(in->dims());
auto out_stride = framework::stride_numel(out->dims());
auto dst = out->data<float>() + output_offset;
auto src = in->data<float>();
PADDLE_MOBILE_ENFORCE(
in_stride.size() == out_stride.size(),
"src and dst tensor should have the same dims size.");
memory::Copy(dst, src, sizeof(float) * in_stride[0]);
output_offset += in_stride[0];
}
} else {
std::vector<framework::Tensor> inputs_concat(inputs.size());
for (int j = 0; j < inputs.size(); ++j) {
inputs_concat[j] = *inputs[j];
}
ConcatFunctor<float> concat_functor;
concat_functor(inputs_concat, static_cast<int>(axis), out);
}
/// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && inputs.size() < 10) {
size_t output_offset = 0;
for (auto *in : inputs) {
auto in_stride = framework::stride_numel(in->dims());
auto out_stride = framework::stride_numel(out->dims());
auto dst = out->data<float>() + output_offset;
auto src = in->data<float>();
PADDLE_MOBILE_ENFORCE(
in_stride.size() == out_stride.size(),
"src and dst tensor should have the same dims size.");
memory::Copy(dst, src, sizeof(float) * in_stride[0]);
output_offset += in_stride[0];
}
} else {
std::vector<framework::Tensor> inputs_concat(inputs.size());
for (int j = 0; j < inputs.size(); ++j) {
inputs_concat[j] = *inputs[j];
}
ConcatFunctor<float> concat_functor;
concat_functor(inputs_concat, static_cast<int>(axis), out);
}
}
} // namespace operators
} // namespace paddle_mobile
......@@ -14,30 +14,31 @@ limitations under the License. */
#pragma once
#include <operators/math/transform.h>
#include "operators/kernel/relu_kernel.h"
#include <operators/math/transform.h>
namespace paddle_mobile {
namespace operators {
template <typename T>
struct ReluFunctor {
inline T operator()(T in) const { return in > 0 ? in : 0; }
};
template <>
void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
const auto *input_x = param.InputX();
auto *input_x_ptr = input_x->data<float>();
auto *out = param.Out();
auto *out_ptr = out->mutable_data<float>();
ReluFunctor<float> func_;
math::Transform trans;
trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
// for (int i = 0; i < input_x->numel(); i++) {
// out_ptr[i] = input_x_ptr[i] > 0 ? input_x_ptr[i] : 0;
// }
}
template <typename T>
struct ReluFunctor {
inline T operator()(T in) const { return in > 0 ? in : 0; }
};
template <>
void ReluKernel<CPU, float>::Compute(const ReluParam &param) const {
const auto *input_x = param.InputX();
auto *input_x_ptr = input_x->data<float>();
auto *out = param.Out();
auto *out_ptr = out->mutable_data<float>();
ReluFunctor<float> func_;
math::Transform trans;
trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
// for (int i = 0; i < input_x->numel(); i++) {
// out_ptr[i] = input_x_ptr[i] > 0 ? input_x_ptr[i] : 0;
// }
}
} // namespace operators
} // namespace paddle_mobile
\ No newline at end of file
} // namespace paddle_mobile
......@@ -18,7 +18,7 @@ limitations under the License. */
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
namespace paddle_mobile {
namespace operators {
namespace operators {
/*
* Out = X ⊙ Y
......@@ -31,148 +31,148 @@ namespace paddle_mobile {
* pre=2*3, n=4*5, post=1
* x.shape(6, 20, 1) * y.shape(1, 20, 1).broadcast(6, 20, 1)
*/
inline void get_mid_dims(const framework::DDim &x_dims,
const framework::DDim &y_dims, const int axis,
int *pre, int *n, int *post) {
*pre = 1;
*n = 1;
*post = 1;
// compute pre
for (int i = 0; i < axis; ++i) {
(*pre) *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
assert(x_dims[i + axis] == y_dims[i]);
/// "Broadcast dimension mismatch.");
(*n) *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
(*post) *= x_dims[i];
}
}
inline void get_mid_dims(const framework::DDim &x_dims,
const framework::DDim &y_dims, const int axis,
int *pre, int *n, int *post) {
*pre = 1;
*n = 1;
*post = 1;
// compute pre
for (int i = 0; i < axis; ++i) {
(*pre) *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
assert(x_dims[i + axis] == y_dims[i]);
/// "Broadcast dimension mismatch.");
(*n) *= y_dims[i];
}
for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) {
(*post) *= x_dims[i];
}
}
/// remove dims tail 1. (4,20,1,1) -> (4,20)
inline void trim_trailing_singular_dims(framework::DDim *dims) {
// Remove trailing dimensions of size 1 for y
auto actual_dims_size = dims->size();
for (; actual_dims_size != 0; --actual_dims_size) {
if ((*dims)[actual_dims_size - 1] != 1) break;
}
if (actual_dims_size != dims->size()) {
auto actual_dims = framework::vectorize(*dims);
actual_dims.resize(actual_dims_size);
*dims = framework::make_ddim(actual_dims);
}
}
inline void trim_trailing_singular_dims(framework::DDim *dims) {
// Remove trailing dimensions of size 1 for y
auto actual_dims_size = dims->size();
for (; actual_dims_size != 0; --actual_dims_size) {
if ((*dims)[actual_dims_size - 1] != 1) break;
}
if (actual_dims_size != dims->size()) {
auto actual_dims = framework::vectorize(*dims);
actual_dims.resize(actual_dims_size);
*dims = framework::make_ddim(actual_dims);
}
}
/// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last
/// dimension
/// in (4,20,2) is 2 ,
/// (20,1) move 1 stride , to fill(add) 2 element with the same number.
template <typename T>
class MidWiseTransformIterator {
public:
MidWiseTransformIterator(const T *ptr, int n, int post)
: ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
MidWiseTransformIterator<T> &operator++() {
if (post_ != 1) {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
}
return *this;
} else {
++i_;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
return *this;
}
}
bool operator==(const MidWiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) == &(*rhs);
}
bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) != &(*rhs);
}
const T &operator*() { return ptr_[i_]; }
private:
const T *ptr_;
int64_t i_;
int64_t j_;
int64_t n_;
int64_t post_;
};
template <typename Functor, typename T, typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z, Functor func)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<OutType>()),
nx_(x->numel()),
func_(func) {}
inline void Run() const {
math::Transform trans;
// 同时执行func(x_, y_)传入z_。
trans(x_, x_ + nx_, y_, z_, func_);
}
inline void RunMidWise(int n, int pre, int post) const {
math::Transform trans;
trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post), z_, func_);
}
private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
Functor func_;
};
template <typename Functor, typename T, typename OutType = T>
void ElementwiseComputeEx(const framework::Tensor *x,
const framework::Tensor *y, int axis, Functor func,
framework::Tensor *z) {
TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
auto x_dims = x->dims();
auto y_dims = y->dims();
PADDLE_MOBILE_ENFORCE(x_dims.size() >= y_dims.size(),
"Rank of first input must >= rank of second input.");
if (x_dims == y_dims) {
functor.Run();
return;
}
/// axis = -1 represent the last dimensions.
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_MOBILE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
trim_trailing_singular_dims(&y_dims);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
functor.RunMidWise(n, pre, post);
template <typename T>
class MidWiseTransformIterator {
public:
MidWiseTransformIterator(const T *ptr, int n, int post)
: ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {}
MidWiseTransformIterator<T> &operator++() {
if (post_ != 1) {
++j_;
if (UNLIKELY(j_ == post_)) {
++i_;
j_ = 0;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
} // namespace operators
}
return *this;
} else {
++i_;
if (UNLIKELY(i_ == n_)) {
i_ = 0;
}
return *this;
}
}
bool operator==(const MidWiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) == &(*rhs);
}
bool operator!=(const MidWiseTransformIterator<T> &rhs) const {
return (ptr_ + i_) != &(*rhs);
}
const T &operator*() { return ptr_[i_]; }
private:
const T *ptr_;
int64_t i_;
int64_t j_;
int64_t n_;
int64_t post_;
};
template <typename Functor, typename T, typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z, Functor func)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<OutType>()),
nx_(x->numel()),
func_(func) {}
inline void Run() const {
math::Transform trans;
// 同时执行func(x_, y_)传入z_。
trans(x_, x_ + nx_, y_, z_, func_);
}
inline void RunMidWise(int n, int pre, int post) const {
math::Transform trans;
trans(x_, x_ + nx_, MidWiseTransformIterator<T>(y_, n, post), z_, func_);
}
private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
Functor func_;
};
template <typename Functor, typename T, typename OutType = T>
void ElementwiseComputeEx(const framework::Tensor *x,
const framework::Tensor *y, int axis, Functor func,
framework::Tensor *z) {
TransformFunctor<Functor, T, OutType> functor(x, y, z, func);
auto x_dims = x->dims();
auto y_dims = y->dims();
PADDLE_MOBILE_ENFORCE(x_dims.size() >= y_dims.size(),
"Rank of first input must >= rank of second input.");
if (x_dims == y_dims) {
functor.Run();
return;
}
/// axis = -1 represent the last dimensions.
axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis);
PADDLE_MOBILE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
trim_trailing_singular_dims(&y_dims);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post);
functor.RunMidWise(n, pre, post);
}
} // namespace operators
} // namespace paddle_mobile
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册