提交 b1e4f4fd 编写于 作者: H HappyAngel 提交者: GitHub

add NHWC NCHW transform, test=develop (#2381)

* add nhwc to nchw

* add layout in funcs

* change layout as extra, test=develop

* change make, test=develop

* use template class method to update layout NNCHHW and NHWC transform, test=develop

* fix cmake error, set layout to extra, test=develop

* fix test_layout_compute_arm test, its extra

* layout is extra, test=develop

* fix error in kernels/arm/layout_comput.cc when register kernel, DataLayout must be NCHW, test=develop

* delete extra note, test=develop

* delete extra test

* delete layout_test, test=develop

, its in tests/math/layout_comput_test

* delete extrat test, test=develop
上级 2f3336c1
......@@ -68,8 +68,10 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
gemv_arm_int8.cc
conv3x3s1_direct_fp32.cc
conv3x3s2_direct_fp32.cc
conv3x3s1_depthwise_fp32.cc
conv3x3s2_depthwise_fp32.cc
conv3x3s1p01_depthwise_fp32.cc
conv3x3s2p01_depthwise_fp32.cc
conv3x3s1px_depthwise_fp32.cc
conv3x3s2px_depthwise_fp32.cc
conv3x3s1_direct_int8.cc
conv3x3s2_direct_int8.cc
conv3x3s1_depthwise_int8.cc
......@@ -77,16 +79,13 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
conv5x5s1_depthwise_int8.cc
conv5x5s1_depthwise_fp32.cc
conv5x5s2_depthwise_fp32.cc
conv_depthwise_3x3p0.cc
conv_depthwise_3x3p1.cc
conv_depthwise_3x3s1.cc
conv_depthwise_3x3s2.cc
conv_winograd_3x3.cc
conv_impl.cc
softmax.cc
scale.cc
pooling.cc
elementwise.cc
layout.cc
lrn.cc
decode_bboxes.cc
concat.cc
......@@ -122,4 +121,3 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
anchor_generator.cc
DEPS ${lite_kernel_deps} context tensor)
endif()
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/arm/math/conv_depthwise.h"
#include <arm_neon.h>
#include "lite/backends/arm/math/conv_depthwise.h"
namespace paddle {
namespace lite {
......
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/arm/math/conv_depthwise.h"
#include <arm_neon.h>
#include "lite/backends/arm/math/conv_depthwise.h"
namespace paddle {
namespace lite {
......
......@@ -361,7 +361,6 @@ void conv_im2col_gemm(const float* i_data,
float* tmp_work_space =
ctx->workspace_data<float>() + ctx->llc_size() / sizeof(float);
//! use gemv when the output channel size = 1
for (int b = 0; b < num; ++b) {
// dC
......
......@@ -39,6 +39,7 @@
#include "lite/backends/arm/math/im2sequence.h"
#include "lite/backends/arm/math/increment.h"
#include "lite/backends/arm/math/interpolate.h"
#include "lite/backends/arm/math/layout.h"
#include "lite/backends/arm/math/lrn.h"
#include "lite/backends/arm/math/negative.h"
#include "lite/backends/arm/math/norm.h"
......
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace paddle {
namespace lite {
namespace arm {
namespace math {
template <typename T>
void NCHW2NHWC(int N, int C, int HxW, const T* X, T* Y);
template <typename T>
void NHWC2NCHW(int N, int C, int HxW, const T* X, T* Y);
} // namespace math
} // namespace arm
} // namespace lite
} // namespace paddle
......@@ -145,6 +145,12 @@ class KernelRegistry final {
KernelRegistryForTarget<TARGET(kARM),
PRECISION(kInt8),
DATALAYOUT(kNCHW)> *, //
KernelRegistryForTarget<TARGET(kARM),
PRECISION(kFloat),
DATALAYOUT(kNHWC)> *, //
KernelRegistryForTarget<TARGET(kARM),
PRECISION(kInt8),
DATALAYOUT(kNHWC)> *, //
KernelRegistryForTarget<TARGET(kOpenCL),
PRECISION(kFloat),
......
......@@ -46,6 +46,7 @@ add_kernel(reduce_max_compute_arm ARM basic SRCS reduce_max_compute.cc DEPS ${li
add_kernel(sequence_expand_compute_arm ARM basic SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(im2sequence_compute_arm ARM basic SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_pool_compute_arm ARM basic SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(layout_compute_arm ARM extra SRCS layout_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(layer_norm_compute_arm ARM extra SRCS layer_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(gather_compute_arm ARM extra SRCS gather_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_mean_compute_arm ARM extra SRCS reduce_mean_compute.cc DEPS ${lite_kernel_deps} math_arm)
......@@ -101,7 +102,6 @@ lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS tran
lite_cc_test(test_argmax_compute_arm SRCS argmax_compute_test.cc DEPS argmax_compute_arm)
lite_cc_test(test_axpy_compute_arm SRCS axpy_compute_test.cc DEPS axpy_compute_arm)
lite_cc_test(test_conv_transpose_compute_arm SRCS conv_transpose_compute_test.cc DEPS conv_transpose_compute_arm)
if(LITE_BUILD_EXTRA)
lite_cc_test(test_layer_norm_compute_arm SRCS layer_norm_compute_test.cc DEPS layer_norm_compute_arm)
lite_cc_test(test_lookup_table_compute_arm SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_arm)
......
此差异已折叠。
此差异已折叠。
......@@ -8,4 +8,10 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
lite_cc_test(conv_transpose_compute_test SRCS conv_transpose_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(conv_int8_compute_test SRCS conv_int8_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(pool_compute_test SRCS pool_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
if(LITE_BUILD_EXTRA)
lite_cc_test(layout_compute_test SRCS layout_compute_test.cc DEPS arena_framework ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
endif()
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册