提交 234cebcb 编写于 作者: E eclipsycn 提交者: GitHub

Merge pull request #419 from dolphin8/develop

#418 a faster transpose implementation
...@@ -11,29 +11,28 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,29 +11,28 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifdef TRANSPOSE_OP #ifdef TRANSPOSE_OP
#include "operators/kernel/transpose_kernel.h" #include "operators/kernel/transpose_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
template <typename T> // vector<int> pos;
void TransposeFunc(const int numel, const T* input, const vector<int> axis, // template <typename T>
const vector<int> old_strides, const vector<int> new_strides, // void TransposeFunc(const int numel, const T* input, const vector<int> axis,
T* output) { // const vector<int> old_strides, const vector<int>
for (int i = 0; i < numel; ++i) { // new_strides, T* output) {
int old_idx = 0; // for (int i = 0; i < numel; ++i) {
int idx = i; // int old_idx = 0;
for (int j = 0; j < axis.size(); ++j) { // int idx = i;
int order = axis[j]; // for (int j = 0; j < axis.size(); ++j) {
old_idx += (idx / new_strides[j]) * old_strides[order]; // int order = axis[j];
idx %= new_strides[j]; // old_idx += (idx / new_strides[j]) * old_strides[order];
} // idx %= new_strides[j];
output[i] = input[old_idx]; // }
} // output[i] = input[old_idx];
} // }
// }
template <> template <>
void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const { void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const {
...@@ -44,28 +43,38 @@ void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const { ...@@ -44,28 +43,38 @@ void TransposeKernel<CPU, float>::Compute(const TransposeParam& param) const {
const auto* input_x_data = input_x->data<float>(); const auto* input_x_data = input_x->data<float>();
auto* out_data = out->mutable_data<float>(); auto* out_data = out->mutable_data<float>();
size_t axis_size = axis.size(); size_t ndim = axis.size();
std::vector<int> new_dims; std::vector<int> xdim(ndim);
new_dims.reserve(axis_size); std::vector<int> xstride(ndim);
for (auto c : axis) { std::vector<int> xout(ndim);
new_dims.push_back(input_x_dims[c]); for (int i = 0; i < ndim; i++) {
int j = ndim - 1 - i;
xdim[j] = input_x_dims[axis[i]];
xstride[j] = 1;
for (int k = axis[i] + 1; k < ndim; k++) {
xstride[j] *= input_x_dims[k];
}
xout[j] = xstride[j] * xdim[j];
} }
std::vector<int> old_strides; auto numel = input_x->numel();
std::vector<int> new_strides; size_t pind = 0;
for (int i = 0; i < axis.size(); i++) { std::vector<int> ind(ndim);
int temp_old = 1; for (int i = 0; i < numel; i++) {
int temp_new = 1; out_data[i] = input_x_data[pind];
for (int j = i + 1; j < axis.size(); j++) { ind[0]++;
temp_old *= input_x_dims[j]; pind += xstride[0];
temp_new *= new_dims[j]; for (int j = 0; j < ndim - 1; j++) {
if (ind[j] == xdim[j]) {
ind[j + 1]++;
ind[j] = 0;
pind += xstride[j + 1];
pind -= xout[j];
} else {
break;
}
} }
old_strides.push_back(temp_old);
new_strides.push_back(temp_new);
} }
TransposeFunc<float>(input_x->numel(), input_x_data, axis, old_strides,
new_strides, out_data);
} }
} // namespace operators } // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册