From b55942a94df02c59b1bdb7c6ec8c09baa98c9c1a Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 6 Jun 2022 20:56:30 +0800 Subject: [PATCH] feat(dnn/naive/norm,-dnn/cuda/norm,-dnn/test/norm): add norm dnn opr, fwd only GitOrigin-RevId: 989474168d45c55ab9a45983b93e54cd3526e191 --- dnn/include/megdnn/oprs/general.h | 29 +++ dnn/scripts/opr_param_defs.py | 8 + dnn/src/common/handle_impl.h | 3 +- dnn/src/common/norm.cpp | 43 +++++ dnn/src/common/opr_trait.h | 1 + dnn/src/cuda/handle_create.cpp | 2 + dnn/src/cuda/norm/helper.cu | 28 +++ dnn/src/cuda/norm/helper.h | 226 +++++++++++++++++++++++ dnn/src/cuda/norm/opr_impl.cpp | 180 ++++++++++++++++++ dnn/src/cuda/norm/opr_impl.h | 25 +++ dnn/src/naive/handle.cpp | 1 + dnn/src/naive/norm/helper.h | 152 ++++++++++++++++ dnn/src/naive/norm/opr_impl.cpp | 197 ++++++++++++++++++++ dnn/src/naive/norm/opr_impl.h | 23 +++ dnn/test/common/norm.h | 19 ++ dnn/test/cuda/norm.cpp | 291 ++++++++++++++++++++++++++++++ dnn/test/naive/norm.cpp | 237 ++++++++++++++++++++++++ 17 files changed, 1464 insertions(+), 1 deletion(-) create mode 100644 dnn/src/common/norm.cpp create mode 100644 dnn/src/cuda/norm/helper.cu create mode 100644 dnn/src/cuda/norm/helper.h create mode 100644 dnn/src/cuda/norm/opr_impl.cpp create mode 100644 dnn/src/cuda/norm/opr_impl.h create mode 100644 dnn/src/naive/norm/helper.h create mode 100644 dnn/src/naive/norm/opr_impl.cpp create mode 100644 dnn/src/naive/norm/opr_impl.h create mode 100644 dnn/test/common/norm.h create mode 100644 dnn/test/cuda/norm.cpp create mode 100644 dnn/test/naive/norm.cpp diff --git a/dnn/include/megdnn/oprs/general.h b/dnn/include/megdnn/oprs/general.h index ec6354017..081ebeab7 100644 --- a/dnn/include/megdnn/oprs/general.h +++ b/dnn/include/megdnn/oprs/general.h @@ -1475,6 +1475,35 @@ protected: using LAMB = LAMBUpdate; +class NormBase : public OperatorBase { + DEF_OPR_PARAM(Norm); // package norm params in Norm keyword from py declaration + DEF_OPR_IMPL(NormBase, OperatorBase, 1, 1); // constructor and static members + +public: + virtual void deduce_layout(const TensorLayout& src, TensorLayout& dst) = 0; + virtual size_t get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& dst) = 0; + +protected: + void check_exec( + const TensorLayout& src, const TensorLayout& dst, + size_t workspace_in_bytes); +}; + +class NormForward : public NormBase { + DEF_OPR_IMPL(NormForward, NormBase, 1, 1); + using Mode = Param::Mode; + +public: + virtual void exec( + _megdnn_tensor_in src, _megdnn_tensor_out dst, + _megdnn_workspace workspace) = 0; + virtual void deduce_layout(const TensorLayout& src, TensorLayout& dst); + virtual size_t get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& dst) = 0; +}; +using Norm = NormForward; + } // namespace megdnn #include "megdnn/internal/opr_header_epilogue.h" diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index 22055d0ac..824c9cc9a 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -1277,3 +1277,11 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), add_fields('bool', Doc('bias_correction', 'whether correct bias'), 'true'). add_fields('bool', Doc('always_adapt', 'apply adaptive lr to 0.0'), 'false') ) +(pdef("Norm"). + add_enum('Mode', + Doc('P_NORM=0', 'calculate p-norm, parameter p would be ignored in other mode'), + Doc('INF_NORM=1', 'infinite norm'), + Doc('NEG_INF_NORM=2', 'negative infinite norm'), name_field="mode"). + add_fields('float32', Doc('p', 'the order of norm'), '2'). + add_fields('int32', Doc('dim', 'which dim the norm performed along'), '-1'), + ) diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 709199f00..a63bd0784 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -212,7 +212,8 @@ private: cb(LAMBUpdate) \ cb(LSTMBackward) \ cb(SoftmaxForward) \ - cb(SoftmaxBackward) + cb(SoftmaxBackward) \ + cb(NormForward) // clang-format on /*! diff --git a/dnn/src/common/norm.cpp b/dnn/src/common/norm.cpp new file mode 100644 index 000000000..2c6538fcf --- /dev/null +++ b/dnn/src/common/norm.cpp @@ -0,0 +1,43 @@ +#include "megdnn/oprs.h" +#include "src/common/utils.h" + +namespace megdnn { +void NormForward::deduce_layout(const TensorLayout& src, TensorLayout& dst) { + megdnn_assert( + param().dim > -1 && param().dim < static_cast(src.ndim), + "dim params must be passed and cannot be -1."); + + SmallVector shapeList; + for (size_t i = 0; i < src.ndim; ++i) { + if (static_cast(i) != param().dim) { + shapeList.append(1, static_cast(src.shape[i])); + } else { + shapeList.append(1, static_cast(1)); + } + } + dst = TensorLayout{TensorShape(shapeList), src.dtype}; + return; +} + +void NormBase::check_exec( + const TensorLayout& src, const TensorLayout& dst, size_t workspace_in_bytes) { + megdnn_assert_eq_dtype(src, dst); + +#if !MEGDNN_DISABLE_FLOAT16 + megdnn_assert( + src.dtype.enumv() == DTypeEnum::Float16 || + src.dtype.enumv() == DTypeEnum::Float32, + "Float16 or Float32 is only supported."); +#else + megdnn_assert( + src.dtype.enumv() == DTypeEnum::Float32, "Float32 is only supported."); +#endif + + TensorLayout dst_expected; + deduce_layout(src, dst_expected); + megdnn_assert_eq_layout(dst_expected, dst); + + auto required_workspace_in_bytes = get_workspace_in_bytes(src, dst); + megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); +} +} // namespace megdnn diff --git a/dnn/src/common/opr_trait.h b/dnn/src/common/opr_trait.h index 1d875a0c1..89bfad73b 100644 --- a/dnn/src/common/opr_trait.h +++ b/dnn/src/common/opr_trait.h @@ -16,6 +16,7 @@ struct OprTrait {}; static const bool can_deduce_layout = CanDeduceLayout; \ } +DEF(Norm, 2, true, true); DEF(Padding, 2, false, true); DEF(PaddingBackward, 2, false, false); DEF(ConvolutionForward, 3, true, true); diff --git a/dnn/src/cuda/handle_create.cpp b/dnn/src/cuda/handle_create.cpp index 4cc6b0855..fee747c06 100644 --- a/dnn/src/cuda/handle_create.cpp +++ b/dnn/src/cuda/handle_create.cpp @@ -47,6 +47,7 @@ #include "src/cuda/matrix_mul/opr_impl.h" #include "src/cuda/max_tensor_diff/opr_impl.h" #include "src/cuda/mesh_indexing/opr_impl.h" +#include "src/cuda/norm/opr_impl.h" #include "src/cuda/padding/opr_impl.h" #include "src/cuda/param_pack/opr_impl.h" #include "src/cuda/pooling/opr_impl.h" @@ -216,6 +217,7 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(DropoutForward); MEGDNN_SPECIALIZE_CREATE_OPERATOR(DropoutBackward); MEGDNN_SPECIALIZE_CREATE_OPERATOR(SoftmaxForward); MEGDNN_SPECIALIZE_CREATE_OPERATOR(SoftmaxBackward); +MEGDNN_SPECIALIZE_CREATE_OPERATOR(NormForward); template std::unique_ptr HandleImpl::create_operator() { diff --git a/dnn/src/cuda/norm/helper.cu b/dnn/src/cuda/norm/helper.cu new file mode 100644 index 000000000..c83fbb7c0 --- /dev/null +++ b/dnn/src/cuda/norm/helper.cu @@ -0,0 +1,28 @@ + + +#include "helper.h" +#include "megdnn/dtype.h" +#include "src/cuda/reduce_helper.cuh" + +namespace megdnn { +namespace cuda { + +using namespace device_reduce; +#define COMMA , + +INST_REDUCE(NormOp, false); +INST_REDUCE(NormOp, false); + +INST_REDUCE(NormZeroOp, false); +INST_REDUCE(NormZeroOp, false); + +INST_REDUCE(NormOneOp, false); +INST_REDUCE(NormOneOp, false); + +INST_REDUCE(NormTwoOp, false); +INST_REDUCE(NormTwoOp, false); + +#undef COMMA + +} // namespace cuda +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/cuda/norm/helper.h b/dnn/src/cuda/norm/helper.h new file mode 100644 index 000000000..d5553ac03 --- /dev/null +++ b/dnn/src/cuda/norm/helper.h @@ -0,0 +1,226 @@ +#pragma once +#include "megdnn/dtype.h" + +#if MEGDNN_CC_HOST +#include "megdnn/basic_types.h" +#endif + +namespace megdnn { +namespace device_reduce { + +template +struct NormOp; + +template <> +struct NormOp { + typedef dt_float32 wtype; + typedef dt_float32 src_ctype; + typedef dt_float32 dst_ctype; + typedef wtype p_type; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + const p_type p; + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { + return powf(fabsf(src[idx]), p); + } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { + dst[idx] = powf(val, 1.f / p); + } + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormOp(src_ctype* src, dst_ctype* dst, size_t B, p_type p) + : INIT(wtype(0)), src(src), dst(dst), B(B), p(static_cast(p)) {} +}; + +#if !MEGDNN_DISABLE_FLOAT16 +template <> +struct NormOp { + typedef dt_float16 wtype; + typedef dt_float16 src_ctype; + typedef dt_float16 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + const wtype p; + + // HALF_FLOAT API has dispatch host and device. + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { + return half_float::detail::pow(half_float::detail::abs(src[idx]), p); + } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { + dst[idx] = half_float::detail::pow(val, static_cast(1.f) / p); + } + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE + NormOp(src_ctype* src, dst_ctype* dst, size_t B, dt_float32 p) + : INIT(wtype(0)), src(src), dst(dst), B(B), p(static_cast(p)) {} +}; +#endif + +// TODO: 0Norm impl need understand reduceop +template +struct NormZeroOp; + +template <> +struct NormZeroOp { + typedef dt_float32 wtype; + typedef dt_float32 src_ctype; + typedef dt_float32 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + const wtype epsilon = 0.00001f; + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { + return fabsf(src[idx] - 0.0f) <= epsilon ? 0.0f : 1.0f; + } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } + + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormZeroOp(src_ctype* src, dst_ctype* dst, size_t B) + : INIT(wtype(0)), src(src), dst(dst), B(B) {} +}; + +#if !MEGDNN_DISABLE_FLOAT16 +template <> +struct NormZeroOp { + typedef dt_float16 wtype; + typedef dt_float16 src_ctype; + typedef dt_float16 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + const wtype epsilon = half_float::half(0.00001f); + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { + return half_float::detail::fabs(src[idx] - half_float::half()) <= epsilon + ? half_float::half(0.0f) + : half_float::half(1.0f); + } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } + + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormZeroOp(src_ctype* src, dst_ctype* dst, size_t B) + : INIT(wtype(0)), src(src), dst(dst), B(B) {} +}; +#endif + +template +struct NormOneOp; + +template <> +struct NormOneOp { + typedef dt_float32 wtype; + typedef dt_float32 src_ctype; + typedef dt_float32 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return fabsf(src[idx]); } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } + + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormOneOp(src_ctype* src, dst_ctype* dst, size_t B) + : INIT(wtype(0)), src(src), dst(dst), B(B) {} +}; + +#if !MEGDNN_DISABLE_FLOAT16 +template <> +struct NormOneOp { + typedef dt_float16 wtype; + typedef dt_float16 src_ctype; + typedef dt_float16 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { + return half_float::detail::abs(src[idx]); + } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { dst[idx] = val; } + + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormOneOp(src_ctype* src, dst_ctype* dst, size_t B) + : INIT(wtype(0)), src(src), dst(dst), B(B) {} +}; +#endif + +template +struct NormTwoOp; + +template <> +struct NormTwoOp { + typedef dt_float32 wtype; + typedef dt_float32 src_ctype; + typedef dt_float32 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx] * src[idx]; } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { + dst[idx] = sqrtf(val); + } + + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormTwoOp(src_ctype* src, dst_ctype* dst, size_t B) + : INIT(wtype(0)), src(src), dst(dst), B(B) {} +}; + +#if !MEGDNN_DISABLE_FLOAT16 +template <> +struct NormTwoOp { + typedef dt_float16 wtype; + typedef dt_float16 src_ctype; + typedef dt_float16 dst_ctype; + const wtype INIT; + + src_ctype* src; + dst_ctype* dst; + const size_t B; + + MEGDNN_HOST MEGDNN_DEVICE wtype read(uint32_t idx) { return src[idx] * src[idx]; } + MEGDNN_HOST MEGDNN_DEVICE void write(uint32_t idx, wtype val) { + dst[idx] = half_float::detail::sqrt(val); + } + + static MEGDNN_HOST MEGDNN_DEVICE wtype apply(wtype lhs, wtype rhs) { + return lhs + rhs; + } + MEGDNN_HOST MEGDNN_DEVICE NormTwoOp(src_ctype* src, dst_ctype* dst, size_t B) + : INIT(wtype(0)), src(src), dst(dst), B(B) {} +}; +#endif + +} // namespace device_reduce +} // namespace megdnn diff --git a/dnn/src/cuda/norm/opr_impl.cpp b/dnn/src/cuda/norm/opr_impl.cpp new file mode 100644 index 000000000..0b2f62157 --- /dev/null +++ b/dnn/src/cuda/norm/opr_impl.cpp @@ -0,0 +1,180 @@ +#include "src/cuda/norm/opr_impl.h" +#include "helper.h" +#include "src/common/reduce_helper_device.h" +#include "src/common/utils.h" +#include "src/cuda/handle.h" +#include "src/cuda/reduce_helper.cuh" +#include "src/cuda/utils.h" + +namespace megdnn { +namespace cuda { + +using namespace device_reduce; +using Mode = Norm::Mode; + +template <> +void NormForwardImpl::dispatch_mode( + _megdnn_tensor_inout src, _megdnn_tensor_inout dst, _megdnn_workspace workspace, + size_t A, size_t B, size_t C, cudaStream_t stream) { +#define CASE(dt) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + auto reduceOp = \ + MinOp(src.ptr(), dst.ptr(), B); \ + run_reduce, false>( \ + workspace.ptr(), A, B, C, stream, reduceOp); \ + break; \ + }; + switch (src.layout.dtype.enumv()) { + CASE(::megdnn::dtype::Float32) +#if !MEGDNN_DISABLE_FLOAT16 + CASE(::megdnn::dtype::Float16) +#endif + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +template <> +void NormForwardImpl::dispatch_mode( + _megdnn_tensor_inout src, _megdnn_tensor_inout dst, _megdnn_workspace workspace, + size_t A, size_t B, size_t C, cudaStream_t stream) { +#define CASE(dt) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + auto reduceOp = \ + MaxOp(src.ptr(), dst.ptr(), B); \ + run_reduce, false>( \ + workspace.ptr(), A, B, C, stream, reduceOp); \ + break; \ + }; + switch (src.layout.dtype.enumv()) { + CASE(::megdnn::dtype::Float32) +#if !MEGDNN_DISABLE_FLOAT16 + CASE(::megdnn::dtype::Float16) +#endif + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +template <> +void NormForwardImpl::dispatch_mode( + _megdnn_tensor_inout src, _megdnn_tensor_inout dst, _megdnn_workspace workspace, + size_t A, size_t B, size_t C, cudaStream_t stream) { + typedef dt_float32 p_type; + +#define CASE(dt) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + p_type epsilon = 0.000001f; \ + if (fabs(param().p - 0.0f) < epsilon) { \ + run_reduce, false>( \ + workspace.ptr(), A, B, C, stream, \ + NormZeroOp( \ + src.ptr(), dst.ptr(), B)); \ + } else if (fabs(param().p - 1.0f) < epsilon) { \ + run_reduce, false>( \ + workspace.ptr(), A, B, C, stream, \ + NormOneOp( \ + src.ptr(), dst.ptr(), B)); \ + } else if (fabs(param().p - 2.0f) < epsilon) { \ + run_reduce, false>( \ + workspace.ptr(), A, B, C, stream, \ + NormTwoOp( \ + src.ptr(), dst.ptr(), B)); \ + } else { \ + run_reduce, false>( \ + workspace.ptr(), A, B, C, stream, \ + NormOp( \ + src.ptr(), dst.ptr(), B, param().p)); \ + } \ + break; \ + }; + + switch (src.layout.dtype.enumv()) { + CASE(::megdnn::dtype::Float32) +#if !MEGDNN_DISABLE_FLOAT16 + CASE(::megdnn::dtype::Float16) +#endif + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +} // namespace cuda + +namespace cuda { +void NormForwardImpl::exec( + _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { + check_exec(src.layout, dst.layout, workspace.size); + size_t A, B, C; + reduce::get_ABC(src.layout, A, B, C, param().dim); + auto stream = cuda_stream(this->handle()); + +#define CASE(mode) \ + case mode: { \ + dispatch_mode(src, dst, workspace, A, B, C, stream); \ + break; \ + }; + + switch (param().mode) { + CASE(Mode::P_NORM) + CASE(Mode::INF_NORM) + CASE(Mode::NEG_INF_NORM) + default: + megdnn_assert_internal(false); + } +#undef CASE + + return; +} + +size_t NormForwardImpl::get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& dst) { + using namespace device_reduce; + size_t A, B, C; + reduce::get_ABC(src, A, B, C, param().dim); + +#define cb(dt, op) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + return get_reduce_workspace_in_bytes>(A, B, C); \ + break; \ + }; + +#if !MEGDNN_DISABLE_FLOAT16 +#define CASE(mode, op) \ + case mode: { \ + switch (src.dtype.enumv()) { \ + cb(::megdnn::dtype::Float32, op) cb(::megdnn::dtype::Float16, op) default \ + : megdnn_assert_internal(false); \ + } \ + }; +#else +#define CASE(mode, op) \ + case mode: { \ + switch (src.dtype.enumv()) { \ + cb(::megdnn::dtype::Float32, op) default : megdnn_assert_internal(false); \ + } \ + }; +#endif + + // XXX: 0/1 norm dispathed to different Op, but workspace size same as + // NormOp + switch (param().mode) { + CASE(Mode::INF_NORM, MaxOp) + CASE(Mode::NEG_INF_NORM, MinOp) + CASE(Mode::P_NORM, NormOp) + default: + megdnn_assert_internal(false); + } +#undef CASE +#undef cb +} + +} // namespace cuda +} // namespace megdnn diff --git a/dnn/src/cuda/norm/opr_impl.h b/dnn/src/cuda/norm/opr_impl.h new file mode 100644 index 000000000..9d1d85d7c --- /dev/null +++ b/dnn/src/cuda/norm/opr_impl.h @@ -0,0 +1,25 @@ +#pragma once +#include "megdnn/oprs.h" +#include "src/cuda/utils.h" + +namespace megdnn { +namespace cuda { +class NormForwardImpl : public NormForward { + using Norm::Norm; + +public: + void exec( + _megdnn_tensor_in src, _megdnn_tensor_out dst, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& dst) override; + +protected: + template + void dispatch_mode( + _megdnn_tensor_inout src, _megdnn_tensor_inout dst, + _megdnn_workspace workspace, size_t A, size_t B, size_t C, + cudaStream_t stream); +}; +} // namespace cuda +} // namespace megdnn diff --git a/dnn/src/naive/handle.cpp b/dnn/src/naive/handle.cpp index 5bedcd3a9..21d23ad63 100644 --- a/dnn/src/naive/handle.cpp +++ b/dnn/src/naive/handle.cpp @@ -51,6 +51,7 @@ #include "src/naive/matrix_mul/opr_impl.h" #include "src/naive/max_tensor_diff/opr_impl.h" #include "src/naive/mesh_indexing/opr_impl.h" +#include "src/naive/norm/opr_impl.h" #include "src/naive/padding/opr_impl.h" #include "src/naive/param_pack/opr_impl.h" #include "src/naive/pooling/opr_impl.h" diff --git a/dnn/src/naive/norm/helper.h b/dnn/src/naive/norm/helper.h new file mode 100644 index 000000000..4d1470cc4 --- /dev/null +++ b/dnn/src/naive/norm/helper.h @@ -0,0 +1,152 @@ +#pragma once +#include +#include +#include "megdnn/basic_types.h" +#include "megdnn/dtype.h" +#include "src/common/utils.h" + +using namespace megdnn; + +/* anonymous namespace */ +namespace { +using Mode = Reduce::Mode; + +/* Reduce Trait */ +template +struct Trait; + +template +struct Trait { + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t) { return x; } +}; +template +const ctype Trait::INIT = ctype(0); + +template +struct Trait { + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t B) { return x / (ctype)B; } +}; +template +const ctype Trait::INIT = ctype(0); + +template +struct Trait { + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x) { return x * x; } + static ctype write(ctype x, size_t) { return x; } +}; +template +const ctype Trait::INIT = ctype(0); + +template +struct Trait { + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x * y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t) { return x; } +}; +template +const ctype Trait::INIT = ctype(1); + +template +struct Trait { + static ctype apply(ctype x, ctype y) { return x < y ? x : y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t) { return x; } +}; + +template <> +struct Trait { + using ctype = dt_float32; + + static ctype apply(ctype x, ctype y) { return (std::isnan(x) || x < y) ? x : y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t) { return x; } +}; + +template +struct Trait { + static ctype apply(ctype x, ctype y) { return x > y ? x : y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t) { return x; } +}; + +template <> +struct Trait { + using ctype = dt_float32; + + static ctype apply(ctype x, ctype y) { return (std::isnan(x) || x > y) ? x : y; } + static ctype visit(ctype x) { return x; } + static ctype write(ctype x, size_t) { return x; } +}; + +/* NormOp */ +template +struct NormOp; + +template <> +struct NormOp { + typedef dt_float32 ctype; + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x, dt_float32 p) { return powf(fabs(x), p); } + static ctype write(ctype x, size_t, dt_float32 p) { return powf(x, 1.f / p); } +}; + +#if !MEGDNN_DISABLE_FLOAT16 +template <> +struct NormOp { + typedef dt_float16 ctype; + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x, dt_float32 p) { + return half_float::pow(half_float::abs(x), half_float::half(p)); + } + static ctype write(ctype x, size_t, dt_float32 p) { + return half_float::pow(x, half_float::half(1.f / p)); + } +}; +#endif + +template +struct NormZeroOp; + +template <> +struct NormZeroOp { + typedef dt_float32 ctype; + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x) { return x - 0.f < 0.00001f ? 0.f : 1.f; } + static ctype write(ctype x, size_t) { return x; } +}; + +#if !MEGDNN_DISABLE_FLOAT16 +template <> +struct NormZeroOp { + typedef dt_float16 ctype; + static const ctype INIT; + + static ctype apply(ctype x, ctype y) { return x + y; } + static ctype visit(ctype x) { + return x - half_float::half(0.f) < half_float::half(0.00001f) + ? half_float::half(0.f) + : half_float::half(1.f); + } + static ctype write(ctype x, size_t) { return x; } +}; +#endif +} // namespace diff --git a/dnn/src/naive/norm/opr_impl.cpp b/dnn/src/naive/norm/opr_impl.cpp new file mode 100644 index 000000000..f13ce4172 --- /dev/null +++ b/dnn/src/naive/norm/opr_impl.cpp @@ -0,0 +1,197 @@ +#include "src/naive/norm/opr_impl.h" + +#include "helper.h" +#include "src/common/utils.h" +#include "src/naive/handle.h" + +namespace megdnn { +namespace naive { +using Mode = Norm::Mode; + +template <> +void NormForwardImpl::dispatch_mode( + _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t A, size_t B, size_t C) { +#define CASE(dt) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + const ctype* __restrict sptr = src.ptr(); \ + ctype* __restrict dptr = dst.ptr(); \ + std::function func; \ + func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ + if (bl + 1 < br) { \ + size_t mid = bl + (br - bl) / 2; \ + return Trait::apply( \ + func(a, c, bl, mid), func(a, c, mid, br)); \ + } else { \ + return Trait::visit( \ + sptr[a * B * C + bl * C + c]); \ + } \ + }; \ + for (size_t a = 0; a < A; ++a) \ + for (size_t c = 0; c < C; ++c) { \ + dptr[a * C + c] = Trait::write( \ + func(a, c, 0, B), B); \ + } \ + break; \ + }; + + switch (src.layout.dtype.enumv()) { + CASE(::megdnn::dtype::Float32) +#if !MEGDNN_DISABLE_FLOAT16 + CASE(::megdnn::dtype::Float16) +#endif + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +template <> +void NormForwardImpl::dispatch_mode( + _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t A, size_t B, size_t C) { +#define CASE(dt) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + const ctype* __restrict sptr = src.ptr(); \ + ctype* __restrict dptr = dst.ptr(); \ + std::function func; \ + func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ + if (bl + 1 < br) { \ + size_t mid = bl + (br - bl) / 2; \ + return Trait::apply( \ + func(a, c, bl, mid), func(a, c, mid, br)); \ + } else { \ + return Trait::visit( \ + sptr[a * B * C + bl * C + c]); \ + } \ + }; \ + for (size_t a = 0; a < A; ++a) \ + for (size_t c = 0; c < C; ++c) { \ + dptr[a * C + c] = Trait::write( \ + func(a, c, 0, B), B); \ + } \ + break; \ + }; + + switch (src.layout.dtype.enumv()) { + CASE(::megdnn::dtype::Float32) +#if !MEGDNN_DISABLE_FLOAT16 + CASE(::megdnn::dtype::Float16) +#endif + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +template <> +void NormForwardImpl::dispatch_mode( + _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t A, size_t B, size_t C) { +#define CASE(dt) \ + case DTypeTrait
::enumv: { \ + using ctype = DTypeTrait
::ctype; \ + const ctype* __restrict sptr = src.ptr(); \ + ctype* __restrict dptr = dst.ptr(); \ + std::function func; \ + if (param().p - 0.f < 0.00001f) { \ + func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ + if (bl + 1 < br) { \ + size_t mid = bl + (br - bl) / 2; \ + return NormZeroOp::apply( \ + func(a, c, bl, mid), func(a, c, mid, br)); \ + } else { \ + return NormZeroOp::visit(sptr[a * B * C + bl * C + c]); \ + } \ + }; \ + for (size_t a = 0; a < A; ++a) { \ + for (size_t c = 0; c < C; ++c) { \ + dptr[a * C + c] = NormZeroOp::write(func(a, c, 0, B), B); \ + } \ + } \ + } else { \ + func = [&](size_t a, size_t c, size_t bl, size_t br) -> ctype { \ + if (bl + 1 < br) { \ + size_t mid = bl + (br - bl) / 2; \ + return NormOp::apply( \ + func(a, c, bl, mid), func(a, c, mid, br)); \ + } else { \ + return NormOp::visit( \ + sptr[a * B * C + bl * C + c], param().p); \ + } \ + }; \ + for (size_t a = 0; a < A; ++a) { \ + for (size_t c = 0; c < C; ++c) { \ + dptr[a * C + c] = \ + NormOp::write(func(a, c, 0, B), B, param().p); \ + } \ + } \ + } \ + break; \ + }; + + switch (src.layout.dtype.enumv()) { + CASE(::megdnn::dtype::Float32) +#if !MEGDNN_DISABLE_FLOAT16 + CASE(::megdnn::dtype::Float16) +#endif + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +void NormForwardImpl::exec( + _megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { + check_exec(src.layout, dst.layout, workspace.size); + using namespace reduce; + size_t A, B, C; + reduce::get_ABC(src.layout, A, B, C, param().dim); + auto make_tensor = [&](DType comp_dtype, _megdnn_tensor_inout tensor, + dt_byte*& workspace_ptr) { + if (comp_dtype == tensor.layout.dtype) + return tensor; + auto layout = TensorLayout(tensor.layout, comp_dtype); + TensorND new_tensor{workspace_ptr, layout}; + workspace_ptr += layout.span().dist_byte(); + return new_tensor; + }; + auto typecvt = handle()->create_operator(); + + auto copy_to = [&typecvt](const TensorND& from, const TensorND& to) { + if (from.raw_ptr() != to.raw_ptr()) + typecvt->exec(from, to); + }; + + auto workspace_ptr = workspace.ptr(); + + auto new_src = make_tensor(src.layout.dtype, src, workspace_ptr); + auto new_dst = make_tensor(dst.layout.dtype, dst, workspace_ptr); + +#define CASE(mode) \ + case mode: { \ + copy_to(src, new_src); \ + ::megdnn::naive::HandleImpl* handlePtr = static_cast(handle()); \ + MEGDNN_DISPATCH_CPU_KERN( \ + handlePtr, dispatch_mode(new_src, new_dst, A, B, C)); \ + copy_to(new_dst, dst); \ + break; \ + }; + switch (param().mode) { + CASE(Mode::P_NORM) + CASE(Mode::INF_NORM) + CASE(Mode::NEG_INF_NORM) + default: + megdnn_assert_internal(false); + } +#undef CASE +} + +size_t NormForwardImpl::get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& dst) { + MEGDNN_MARK_USED_VAR(src); + MEGDNN_MARK_USED_VAR(dst); + return 0; +} + +} // namespace naive +} // namespace megdnn diff --git a/dnn/src/naive/norm/opr_impl.h b/dnn/src/naive/norm/opr_impl.h new file mode 100644 index 000000000..23116ac00 --- /dev/null +++ b/dnn/src/naive/norm/opr_impl.h @@ -0,0 +1,23 @@ +#pragma once +#include "megdnn/oprs.h" +#include "src/common/reduce_helper.h" +#include "src/naive/reduce/opr_impl.h" + +namespace megdnn { +namespace naive { +class NormForwardImpl : public Norm { +public: + using Norm::Norm; + void exec( + _megdnn_tensor_in src, _megdnn_tensor_out dst, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes( + const TensorLayout& src, const TensorLayout& dst) override; + +protected: + template + void dispatch_mode( + _megdnn_tensor_in src, _megdnn_tensor_out dst, size_t, size_t, size_t); +}; +} // namespace naive +} // namespace megdnn diff --git a/dnn/test/common/norm.h b/dnn/test/common/norm.h new file mode 100644 index 000000000..feed79958 --- /dev/null +++ b/dnn/test/common/norm.h @@ -0,0 +1,19 @@ + +#pragma once +#include +#include "megdnn/basic_types.h" +#include "megdnn/opr_param_defs.h" + +namespace megdnn { +namespace test { +namespace norm { + +struct TestArg { + param::Norm param; + TensorShape src; + TestArg(param::Norm param, TensorShape src) : param(param), src(src) {} +}; + +} // namespace norm +} // namespace test +} // namespace megdnn diff --git a/dnn/test/cuda/norm.cpp b/dnn/test/cuda/norm.cpp new file mode 100644 index 000000000..a778bae17 --- /dev/null +++ b/dnn/test/cuda/norm.cpp @@ -0,0 +1,291 @@ +#include "test/common/norm.h" +#include "megdnn/dtype.h" +#include "megdnn/oprs.h" +#include "test/common/checker.h" +// #include "test/naive/fixture.h" +// #include "test/common/benchmarker.h" +#include +#include "test/cuda/benchmark.h" +#include "test/cuda/fixture.h" +#include "test/cuda/utils.h" + +namespace megdnn { +namespace test { +// CORRECT +// L2, fp32, dim +TEST_F(CUDA, L2NORM_FP32_DIM0) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 2; + param.dim = 0; + checker.set_param(param); + checker.exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + }); +} +TEST_F(CUDA, L2NORM_FP32_DIM1) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 2; + param.dim = 1; + checker.set_param(param); + checker.exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 1, 3, 4}, dtype::Float32(), + {12.000, 13.0384, 14.1421, 15.2971, 16.4924, 17.7200, + 18.9737, 20.2485, 21.5407, 22.8473, 24.1661, 25.4951}), + }); +} +TEST_F(CUDA, L2NORM_FP32_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 2; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float32(), + {3.7417, 11.2250, 19.1311, 27.0924, 35.0714, 43.0581})}); +} +// TODO: support -1 dim param, or test for assert +// l2, fp16 +TEST_F(CUDA, L2NORM_FP16_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 2; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float16(), + {3.7422, 11.2266, 19.1250, 27.0938, 35.0625, 43.0625})}); +} +// l1, fp32,fp16 +TEST_F(CUDA, L1NORM_FP32_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 1; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float32(), {6, 22, 38, 54, 70, 86}), + }); +} +TEST_F(CUDA, L1NORM_FP16_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 1; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float16(), {6, 22, 38, 54, 70, 86}), + }); +} +// l0, fp32,fp16 +TEST_F(CUDA, L0NORM_FP32_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 0; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 4, 4, 4, 4, 4}), + }); +} +TEST_F(CUDA, L0NORM_FP16_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.p = 0; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 4, 4, 4, 4, 4}), + }); +} +// inf +TEST_F(CUDA, INF_NORM_FP32_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + using Mode = Norm::Param::Mode; + + param.dim = 3; + param.mode = Mode::INF_NORM; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 7, 11, 15, 19, 23}), + }); +} +TEST_F(CUDA, INF_NORM_FP16_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + using Mode = Norm::Param::Mode; + + param.dim = 3; + param.mode = Mode::INF_NORM; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 7, 11, 15, 19, 23}), + }); +} +// -inf +TEST_F(CUDA, NEG_INF_NORM_FP32_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.mode = Norm::Param::Mode::NEG_INF_NORM; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float32(), {0, 4, 8, 12, 16, 20}), + }); +} +TEST_F(CUDA, NEG_INF_NORM_FP16_DIM3) { + Checker checker(handle_cuda()); + Norm::Param param; + param.mode = Norm::Param::Mode::NEG_INF_NORM; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float16(), {0, 4, 8, 12, 16, 20}), + }); +} + +// PERF +TEST_F(CUDA, L2NORM_SPEED_FP32) { + auto benchmarker = Benchmarker(handle_cuda()); + benchmarker.set_dtype(0, dtype::Float32()); + benchmarker.set_dtype(1, dtype::Float32()); + Norm::Param param; + param.mode = Norm::Param::Mode::P_NORM; + param.dim = 0; + param.p = 2; + SmallVector shapes{{4194304}, {}}; + NormalRNG rng(0, 1); + float eachTime; + float totalTime = 0.f; +#define ITER 10 + for (auto i = 0; i < ITER; i++) { + eachTime = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes); + // printf("PNORM_SPEED_FP32 cuda time: %.6fms\n", eachTime); + totalTime += eachTime; + } + totalTime /= ITER; + printf("PNORM_SPEED_FP32 AVG TIME: %.6fms\n", totalTime); +#undef ITER +} +TEST_F(CUDA, INFNORM_SPEED_FP32) { + auto benchmarker = Benchmarker(handle_cuda()); + benchmarker.set_dtype(0, dtype::Float32()); + benchmarker.set_dtype(1, dtype::Float32()); + Norm::Param param; + param.mode = Norm::Param::Mode::INF_NORM; + param.dim = 0; + SmallVector shapes{{4194304}, {}}; + NormalRNG rng(0, 1); + float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes); + printf("INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32); +} +TEST_F(CUDA, NEG_INFNORM_SPEED_FP32) { + auto benchmarker = Benchmarker(handle_cuda()); + benchmarker.set_dtype(0, dtype::Float32()); + benchmarker.set_dtype(1, dtype::Float32()); + Norm::Param param; + param.mode = Norm::Param::Mode::NEG_INF_NORM; + param.dim = 0; + SmallVector shapes{{4194304}, {}}; + NormalRNG rng(0, 1); + float time_fp32 = benchmarker.set_param(param).set_rng(0, &rng).exec(shapes); + printf("NEG_INF_SPEED_FP32 cuda time: float=%.6fms\n", time_fp32); +} +} // namespace test +} // namespace megdnn diff --git a/dnn/test/naive/norm.cpp b/dnn/test/naive/norm.cpp new file mode 100644 index 000000000..1b03fea42 --- /dev/null +++ b/dnn/test/naive/norm.cpp @@ -0,0 +1,237 @@ +#include "test/common/norm.h" +#include "megdnn/dtype.h" +#include "megdnn/oprs.h" +#include "test/common/benchmarker.h" +#include "test/common/checker.h" +#include "test/naive/fixture.h" + +namespace megdnn { +namespace test { +TEST_F(NAIVE, L2NORM_FP32_DIM0) { + Checker checker(handle(), false); + Norm::Param param; + param.p = 2; + param.dim = 0; + checker.set_param(param); + checker.exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + }); +} +TEST_F(NAIVE, L2NORM_FP32_DIM1) { + Checker checker(handle()); + Norm::Param param; + param.p = 2; + param.dim = 1; + checker.set_param(param); + checker.exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 1, 3, 4}, dtype::Float32(), + {12.000, 13.0384, 14.1421, 15.2971, 16.4924, 17.7200, + 18.9737, 20.2485, 21.5407, 22.8473, 24.1661, 25.4951}), + }); +} +TEST_F(NAIVE, L2NORM_FP32_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.p = 2; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float32(), + {3.7417, 11.2250, 19.1311, 27.0924, 35.0714, 43.0581})}); +} +// l2, fp16 +TEST_F(NAIVE, L2NORM_FP16_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.p = 2; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float16(), + {3.7422, 11.2266, 19.1250, 27.0938, 35.0625, 43.0625})}); +} +// l1, fp32,fp16 +TEST_F(NAIVE, L1NORM_FP32_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.p = 1; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float32(), {6, 22, 38, 54, 70, 86}), + }); +} +TEST_F(NAIVE, L1NORM_FP16_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.p = 1; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue( + {1, 2, 3, 1}, dtype::Float16(), {6, 22, 38, 54, 70, 86}), + }); +} +// l0, fp32,fp16 +TEST_F(NAIVE, L0NORM_FP32_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.p = 0; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 4, 4, 4, 4, 4}), + }); +} +TEST_F(NAIVE, L0NORM_FP16_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.p = 0; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 4, 4, 4, 4, 4}), + }); +} +// inf +TEST_F(NAIVE, INF_NORM_FP32_DIM3) { + Checker checker(handle()); + Norm::Param param; + using Mode = Norm::Param::Mode; + + param.dim = 3; + param.mode = Mode::INF_NORM; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float32(), {3, 7, 11, 15, 19, 23}), + }); +} +TEST_F(NAIVE, INF_NORM_FP16_DIM3) { + Checker checker(handle()); + Norm::Param param; + using Mode = Norm::Param::Mode; + + param.dim = 3; + param.mode = Mode::INF_NORM; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float16(), {3, 7, 11, 15, 19, 23}), + }); +} +// -inf +TEST_F(NAIVE, NEG_INF_NORM_FP32_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.mode = Norm::Param::Mode::NEG_INF_NORM; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float32(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float32(), {0, 4, 8, 12, 16, 20}), + }); +} +TEST_F(NAIVE, NEG_INF_NORM_FP16_DIM3) { + Checker checker(handle()); + Norm::Param param; + param.mode = Norm::Param::Mode::NEG_INF_NORM; + param.dim = 3; + checker.set_param(param).exect( + Testcase{ + TensorValue( + {1, 2, 3, 4}, dtype::Float16(), + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}), + {}}, + Testcase{ + {}, + TensorValue({1, 2, 3, 1}, dtype::Float16(), {0, 4, 8, 12, 16, 20}), + }); +} + +} // namespace test +} // namespace megdnn \ No newline at end of file -- GitLab