diff --git a/src/operators/kernel/arm/batchnorm_kernel.cpp b/src/operators/kernel/arm/batchnorm_kernel.cpp index 32b2ba66e3f29fa844d2900850a4cf53de2c2294..e28bdd7147f300cb181ffc5e0aeebec412ec45e7 100644 --- a/src/operators/kernel/arm/batchnorm_kernel.cpp +++ b/src/operators/kernel/arm/batchnorm_kernel.cpp @@ -71,8 +71,9 @@ void BatchNormKernel::Compute(const BatchNormParam ¶m) const { { for (int n = 0; n < N; n++) { for (int h = 0; h < H; h++) { + int tmp_index = n * stride0 + i * stride1 + h * stride2; for (int w = 0; w < W; w++) { - int index = n * stride0 + i * stride1 + h * stride2 + w; + int index = tmp_index + w; out_ptr[index] = input_x_ptr[index] * new_scale_ptr[i] + new_bias_ptr[i]; } diff --git a/src/operators/kernel/arm/concat_kernel.cpp b/src/operators/kernel/arm/concat_kernel.cpp index 803234e61b05f69fa1a0be10cec1965017327754..705b698dbe9e9768713417f85ae2879df66acf9e 100644 --- a/src/operators/kernel/arm/concat_kernel.cpp +++ b/src/operators/kernel/arm/concat_kernel.cpp @@ -51,38 +51,6 @@ class ConcatFunctor { } } }; -template -void StridedNumelCopyWithAxis(int64_t axis, T *dst, - const framework::DDim &dst_stride_numel, - const T *src, - const framework::DDim &src_stride_numel, - int64_t size) { - int64_t before = dst_stride_numel[0] / dst_stride_numel[axis]; - int64_t src_after = src_stride_numel[axis]; - int64_t dst_after = dst_stride_numel[axis]; - - /// "src and dst tensor should have the same dims size." - assert(src_stride_numel.size() == dst_stride_numel.size()); - - for (int64_t i = 0; i < axis; ++i) { - if (i < axis) { - /// src and dst should have the same elements - /// except the specified axis. - assert(src_stride_numel[i] / src_stride_numel[axis] == - dst_stride_numel[i] / dst_stride_numel[axis]); - - } else if (i == axis) { - continue; - } else { - /// "src and dst should have the same elements " - /// "except the specified axis." - assert(src_stride_numel[i] == dst_stride_numel[i]); - } - } - for (int64_t i = 0; i < before; ++i) { - memory::Copy(dst + i * dst_after, src + i * src_after, sizeof(T) * size); - } -} template <> void ConcatKernel::Compute(const ConcatParam ¶m) const { @@ -97,10 +65,13 @@ void ConcatKernel::Compute(const ConcatParam ¶m) const { for (auto *in : inputs) { auto in_stride = framework::stride_numel(in->dims()); auto out_stride = framework::stride_numel(out->dims()); - StridedNumelCopyWithAxis(axis, out->data() + output_offset, - out_stride, in->data(), in_stride, - in_stride[axis]); - output_offset += in_stride[axis]; + auto dst = out->data() + output_offset; + auto src = in->data(); + PADDLE_MOBILE_ENFORCE( + in_stride.size() == out_stride.size(), + "src and dst tensor should have the same dims size."); + memory::Copy(dst, src, sizeof(float) * in_stride[0]); + output_offset += in_stride[0]; } } else { std::vector inputs_concat(inputs.size()); diff --git a/src/operators/kernel/arm/relu_kernel.cpp b/src/operators/kernel/arm/relu_kernel.cpp index 2441d453b9fa4e5423fd7087c14f7fce6cbaa825..e0badea51e7da4f3119c9303b259259ba8b48e80 100644 --- a/src/operators/kernel/arm/relu_kernel.cpp +++ b/src/operators/kernel/arm/relu_kernel.cpp @@ -15,19 +15,30 @@ limitations under the License. */ #pragma once #include "operators/kernel/relu_kernel.h" +#include namespace paddle_mobile { namespace operators { +template +struct ReluFunctor { + inline T operator()(T in) const { return in > 0 ? in : 0; } +}; + template <> void ReluKernel::Compute(const ReluParam ¶m) const { const auto *input_x = param.InputX(); auto *input_x_ptr = input_x->data(); auto *out = param.Out(); auto *out_ptr = out->mutable_data(); - for (int i = 0; i < input_x->numel(); i++) { - out_ptr[i] = input_x_ptr[i] > 0 ? input_x_ptr[i] : 0; - } + + ReluFunctor func_; + math::Transform trans; + trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_); + + // for (int i = 0; i < input_x->numel(); i++) { + // out_ptr[i] = input_x_ptr[i] > 0 ? input_x_ptr[i] : 0; + // } } } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/lrn_kernel.h b/src/operators/kernel/lrn_kernel.h index 74e7a29104a981c6b0cfa2dc01e7c64210699f1a..f5fd8313482a92aad0c01d3e0acc9dcfcc83f2d8 100644 --- a/src/operators/kernel/lrn_kernel.h +++ b/src/operators/kernel/lrn_kernel.h @@ -42,12 +42,13 @@ struct LRNFunctor { for (int index = start; index < end; index++) { int channel = b + index; if (channel >= 0 && channel < C) { + int tmp_u = a * stride0 + b * stride1; + int tmp_i = a * stride0 + channel * stride1; for (int c = 0; c < H; c++) { for (int d = 0; d < W; d++) { - int u = a * stride0 + b * stride1 + c * stride2 + d; - - int i = a * stride0 + channel * stride1 + c * stride2 + d; - + int tmp = c * stride2 + d; + int u = tmp_u + tmp; + int i = tmp_i + tmp; sqr_buffer_ptr[u] += alpha * input_ptr[i] * input_ptr[i]; } } diff --git a/src/operators/math/elementwise_op_function.h b/src/operators/math/elementwise_op_function.h index e26f5225471b7ad639f19556e0c68a00230c65ec..95fd037988b1401597d17a58f12fc4c460045a33 100644 --- a/src/operators/math/elementwise_op_function.h +++ b/src/operators/math/elementwise_op_function.h @@ -67,35 +67,6 @@ inline void trim_trailing_singular_dims(framework::DDim *dims) { } } -template -class RowwiseTransformIterator { - public: - RowwiseTransformIterator(const T *ptr, int n) : ptr_(ptr), i_(0), n_(n) {} - - RowwiseTransformIterator &operator++() { - ++i_; - if (UNLIKELY(i_ == n_)) { - i_ = 0; - } - return *this; - } - - bool operator==(const RowwiseTransformIterator &rhs) const { - return (ptr_ + i_) == &(*rhs); - } - - bool operator!=(const RowwiseTransformIterator &rhs) const { - return (ptr_ + i_) != &(*rhs); - } - - const T &operator*() { return ptr_[i_]; } - - private: - const T *ptr_; - int i_; - int64_t n_; -}; - /// (4,20,2)+(20,): (20,) just as (20,1), when move 2 strides in last /// dimension /// in (4,20,2) is 2 , @@ -107,15 +78,23 @@ class MidWiseTransformIterator { : ptr_(ptr), i_(0), j_(0), n_(n), post_(post) {} MidWiseTransformIterator &operator++() { - ++j_; - if (UNLIKELY(j_ == post_)) { + if (post_ != 1) { + ++j_; + if (UNLIKELY(j_ == post_)) { + ++i_; + j_ = 0; + if (UNLIKELY(i_ == n_)) { + i_ = 0; + } + } + return *this; + } else { ++i_; - j_ = 0; if (UNLIKELY(i_ == n_)) { i_ = 0; } + return *this; } - return *this; } bool operator==(const MidWiseTransformIterator &rhs) const { @@ -153,11 +132,6 @@ class TransformFunctor { trans(x_, x_ + nx_, y_, z_, func_); } - inline void RunRowWise(int n, int pre) const { - math::Transform trans; - trans(x_, x_ + nx_, RowwiseTransformIterator(y_, n), z_, func_); - } - inline void RunMidWise(int n, int pre, int post) const { math::Transform trans; trans(x_, x_ + nx_, MidWiseTransformIterator(y_, n, post), z_, func_); @@ -179,31 +153,25 @@ void ElementwiseComputeEx(const framework::Tensor *x, auto x_dims = x->dims(); auto y_dims = y->dims(); - // PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(), - // "Rank of first input must >= rank of second - // input."); + PADDLE_MOBILE_ENFORCE(x_dims.size() >= y_dims.size(), + "Rank of first input must >= rank of second input."); if (x_dims == y_dims) { functor.Run(); return; } - /// axis = -1 represent the last dimension. + /// axis = -1 represent the last dimensions. axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis); - // PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(), - // "Axis should be in range [0, x_dims)"); + PADDLE_MOBILE_ENFORCE(axis >= 0 && axis < x_dims.size(), + "Axis should be in range [0, x_dims)"); trim_trailing_singular_dims(&y_dims); axis = (y_dims.size() == 0) ? x_dims.size() : axis; int pre, n, post; get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post); - if (post == 1) { - functor.RunRowWise(n, pre); - return; - } else { - functor.RunMidWise(n, pre, post); - return; - } + + functor.RunMidWise(n, pre, post); } } // namespace operators