未验证 提交 6b3c341f 编写于 作者: H HappyAngel 提交者: GitHub

add NHWC NCHW transform, test=develop (#2381)

* add nhwc to nchw

* add layout in funcs

* change layout as extra, test=develop

* change make, test=develop

* use template class method to update layout NNCHHW and NHWC transform, test=develop

* fix cmake error, set layout to extra, test=develop

* fix test_layout_compute_arm test, its extra

* layout is extra, test=develop

* fix error in kernels/arm/layout_comput.cc when register kernel, DataLayout must be NCHW, test=develop

* delete extra note, test=develop

* delete extra test

* delete layout_test, test=develop

, its in tests/math/layout_comput_test

* delete extrat test, test=develop
上级 66d2ae25
...@@ -57,8 +57,8 @@ endif() ...@@ -57,8 +57,8 @@ endif()
if (NOT HAS_ARM_MATH_LIB_DIR) if (NOT HAS_ARM_MATH_LIB_DIR)
# TODO(xxx): seperate them and do not deps proto, eigen3 # TODO(xxx): seperate them and do not deps proto, eigen3
cc_library(math_arm SRCS cc_library(math_arm SRCS
funcs.cc funcs.cc
packed_sgemm.cc packed_sgemm.cc
packed_sgemm_c4.cc packed_sgemm_c4.cc
sgemm.cc sgemm.cc
...@@ -68,8 +68,10 @@ if (NOT HAS_ARM_MATH_LIB_DIR) ...@@ -68,8 +68,10 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
gemv_arm_int8.cc gemv_arm_int8.cc
conv3x3s1_direct_fp32.cc conv3x3s1_direct_fp32.cc
conv3x3s2_direct_fp32.cc conv3x3s2_direct_fp32.cc
conv3x3s1_depthwise_fp32.cc conv3x3s1p01_depthwise_fp32.cc
conv3x3s2_depthwise_fp32.cc conv3x3s2p01_depthwise_fp32.cc
conv3x3s1px_depthwise_fp32.cc
conv3x3s2px_depthwise_fp32.cc
conv3x3s1_direct_int8.cc conv3x3s1_direct_int8.cc
conv3x3s2_direct_int8.cc conv3x3s2_direct_int8.cc
conv3x3s1_depthwise_int8.cc conv3x3s1_depthwise_int8.cc
...@@ -77,16 +79,13 @@ if (NOT HAS_ARM_MATH_LIB_DIR) ...@@ -77,16 +79,13 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
conv5x5s1_depthwise_int8.cc conv5x5s1_depthwise_int8.cc
conv5x5s1_depthwise_fp32.cc conv5x5s1_depthwise_fp32.cc
conv5x5s2_depthwise_fp32.cc conv5x5s2_depthwise_fp32.cc
conv_depthwise_3x3p0.cc
conv_depthwise_3x3p1.cc
conv_depthwise_3x3s1.cc
conv_depthwise_3x3s2.cc
conv_winograd_3x3.cc conv_winograd_3x3.cc
conv_impl.cc conv_impl.cc
softmax.cc softmax.cc
scale.cc scale.cc
pooling.cc pooling.cc
elementwise.cc elementwise.cc
layout.cc
lrn.cc lrn.cc
decode_bboxes.cc decode_bboxes.cc
concat.cc concat.cc
...@@ -122,4 +121,3 @@ if (NOT HAS_ARM_MATH_LIB_DIR) ...@@ -122,4 +121,3 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
anchor_generator.cc anchor_generator.cc
DEPS ${lite_kernel_deps} context tensor) DEPS ${lite_kernel_deps} context tensor)
endif() endif()
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/backends/arm/math/conv_depthwise.h"
#include <arm_neon.h> #include <arm_neon.h>
#include "lite/backends/arm/math/conv_depthwise.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "lite/backends/arm/math/conv_depthwise.h"
#include <arm_neon.h> #include <arm_neon.h>
#include "lite/backends/arm/math/conv_depthwise.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
......
...@@ -361,7 +361,6 @@ void conv_im2col_gemm(const float* i_data, ...@@ -361,7 +361,6 @@ void conv_im2col_gemm(const float* i_data,
float* tmp_work_space = float* tmp_work_space =
ctx->workspace_data<float>() + ctx->llc_size() / sizeof(float); ctx->workspace_data<float>() + ctx->llc_size() / sizeof(float);
//! use gemv when the output channel size = 1 //! use gemv when the output channel size = 1
for (int b = 0; b < num; ++b) { for (int b = 0; b < num; ++b) {
// dC // dC
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "lite/backends/arm/math/im2sequence.h" #include "lite/backends/arm/math/im2sequence.h"
#include "lite/backends/arm/math/increment.h" #include "lite/backends/arm/math/increment.h"
#include "lite/backends/arm/math/interpolate.h" #include "lite/backends/arm/math/interpolate.h"
#include "lite/backends/arm/math/layout.h"
#include "lite/backends/arm/math/lrn.h" #include "lite/backends/arm/math/lrn.h"
#include "lite/backends/arm/math/negative.h" #include "lite/backends/arm/math/negative.h"
#include "lite/backends/arm/math/norm.h" #include "lite/backends/arm/math/norm.h"
......
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace lite {
namespace arm {
namespace math {
template <typename T>
void NCHW2NHWC(int N, int C, int HxW, const T* X, T* Y);
template <typename T>
void NHWC2NCHW(int N, int C, int HxW, const T* X, T* Y);
} // namespace math
} // namespace arm
} // namespace lite
} // namespace paddle
...@@ -145,6 +145,12 @@ class KernelRegistry final { ...@@ -145,6 +145,12 @@ class KernelRegistry final {
KernelRegistryForTarget<TARGET(kARM), KernelRegistryForTarget<TARGET(kARM),
PRECISION(kInt8), PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, // DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kARM),
PRECISION(kFloat),
DATALAYOUT(kNHWC)> *, //
KernelRegistryForTarget<TARGET(kARM),
PRECISION(kInt8),
DATALAYOUT(kNHWC)> *, //
KernelRegistryForTarget<TARGET(kOpenCL), KernelRegistryForTarget<TARGET(kOpenCL),
PRECISION(kFloat), PRECISION(kFloat),
......
...@@ -46,6 +46,7 @@ add_kernel(reduce_max_compute_arm ARM basic SRCS reduce_max_compute.cc DEPS ${li ...@@ -46,6 +46,7 @@ add_kernel(reduce_max_compute_arm ARM basic SRCS reduce_max_compute.cc DEPS ${li
add_kernel(sequence_expand_compute_arm ARM basic SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(sequence_expand_compute_arm ARM basic SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(im2sequence_compute_arm ARM basic SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(im2sequence_compute_arm ARM basic SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_pool_compute_arm ARM basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(sequence_pool_compute_arm ARM basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(layout_compute_arm ARM extra SRCS layout_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(layer_norm_compute_arm ARM extra SRCS layer_norm_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(layer_norm_compute_arm ARM extra SRCS layer_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(gather_compute_arm ARM extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(gather_compute_arm ARM extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_mean_compute_arm ARM extra SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps} math_arm) add_kernel(reduce_mean_compute_arm ARM extra SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
...@@ -101,7 +102,6 @@ lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS tran ...@@ -101,7 +102,6 @@ lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS tran
lite_cc_test(test_argmax_compute_arm SRCS argmax_compute_test.cc DEPS argmax_compute_arm) lite_cc_test(test_argmax_compute_arm SRCS argmax_compute_test.cc DEPS argmax_compute_arm)
lite_cc_test(test_axpy_compute_arm SRCS axpy_compute_test.cc DEPS axpy_compute_arm) lite_cc_test(test_axpy_compute_arm SRCS axpy_compute_test.cc DEPS axpy_compute_arm)
lite_cc_test(test_conv_transpose_compute_arm SRCS conv_transpose_compute_test.cc DEPS conv_transpose_compute_arm) lite_cc_test(test_conv_transpose_compute_arm SRCS conv_transpose_compute_test.cc DEPS conv_transpose_compute_arm)
if(LITE_BUILD_EXTRA) if(LITE_BUILD_EXTRA)
lite_cc_test(test_layer_norm_compute_arm SRCS layer_norm_compute_test.cc DEPS layer_norm_compute_arm) lite_cc_test(test_layer_norm_compute_arm SRCS layer_norm_compute_test.cc DEPS layer_norm_compute_arm)
lite_cc_test(test_lookup_table_compute_arm SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_arm) lite_cc_test(test_lookup_table_compute_arm SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_arm)
......
此差异已折叠。
此差异已折叠。
...@@ -8,4 +8,10 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH ...@@ -8,4 +8,10 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
lite_cc_test(conv_transpose_compute_test SRCS conv_transpose_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(conv_transpose_compute_test SRCS conv_transpose_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(conv_int8_compute_test SRCS conv_int8_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(conv_int8_compute_test SRCS conv_int8_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(pool_compute_test SRCS pool_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(pool_compute_test SRCS pool_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
if(LITE_BUILD_EXTRA)
lite_cc_test(layout_compute_test SRCS layout_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
endif() endif()
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册