提交 1e6ef377 编写于 作者: M Megvii Engine Team

feat(mgb/dnn): add accuracy shake checker

GitOrigin-RevId: 0bb52078a1e338f2ff3a0c1887121dc3a0a7c698
上级 a5a29826
......@@ -18,4 +18,39 @@
#include "megdnn/oprs/utils.h"
#include "megdnn/oprs/linalg.h"
template <typename Opr>
struct OprArityTrait;
template <typename Opr, int _arity_in, int _arity_out>
struct OprArityTraitTmpl {
static constexpr int arity_in = _arity_in;
static constexpr int arity_out = _arity_out;
static constexpr int arity = arity_in + arity_out;
};
#define INST_ARITY(_Opr, _in, _out) \
template <> \
struct OprArityTrait<_Opr> : public OprArityTraitTmpl<_Opr, _in, _out> {};
INST_ARITY(megdnn::ConvolutionBackwardData, 2, 1);
INST_ARITY(megdnn::ConvolutionBackwardFilter, 2, 1);
INST_ARITY(megdnn::Convolution3DForward, 2, 1);
INST_ARITY(megdnn::Convolution3DBackwardData, 2, 1);
INST_ARITY(megdnn::Convolution3DBackwardFilter, 2, 1);
INST_ARITY(megdnn::LocalShareForward, 2, 1);
INST_ARITY(megdnn::LocalShareBackwardData, 2, 1);
INST_ARITY(megdnn::LocalShareBackwardFilter, 2, 1);
INST_ARITY(megdnn::Convolution, 2, 1);
INST_ARITY(megdnn::DeformableConvForward, 4, 1);
INST_ARITY(megdnn::DeformableConvBackwardFilter, 4, 1);
INST_ARITY(megdnn::BatchConvBiasForward, 4, 1);
INST_ARITY(megdnn::ConvBias, 4, 1);
INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3);
INST_ARITY(megdnn::MatrixMul, 2, 1);
INST_ARITY(megdnn::BatchedMatrixMul, 2, 1);
#undef INST_ARITY
// vim: syntax=cpp.doxygen
......@@ -47,6 +47,9 @@ namespace megdnn {
return algo_pack().all_algos_map().at(desc); \
}
#define MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) \
cb(AlgoAttribute::ACCURACY_DEPEND_ON_BATCH)
/**
* \brief construct algo from AlgorithmDesc
*/
......
......@@ -323,6 +323,34 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args,
}
}
bool check_bias_share_in_channel(const TensorLayout& bias,
const param::ConvBias::Format format) {
bool share_in_channel = false;
if (format == param::ConvBias::Format::NCHW ||
format == param::ConvBias::Format::NCHW4_NCHW) {
share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[2] == 1 &&
bias[3] == 1);
} else if (format == param::ConvBias::Format::NHWC) {
share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[1] == 1 &&
bias[2] == 1);
} else if (format == param::ConvBias::Format::NCHW4 ||
format == param::ConvBias::Format::NCHW8 ||
format == param::ConvBias::Format::NCHW32 ||
format == param::ConvBias::Format::NCHW4_NCHW32 ||
format == param::ConvBias::Format::NCHW32_NCHW4) {
share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[2] == 1 &&
bias[3] == 1);
} else if (format == param::ConvBias::Format::NHWCD4) {
share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[1] == 1 &&
bias[3] == 1);
} else {
megdnn_assert(format == param::ConvBias::Format::CHWN4);
share_in_channel = (bias.ndim == 5 && bias[1] == 1 && bias[2] == 1 &&
bias[3] == 1);
}
return share_in_channel;
}
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -21,6 +21,9 @@ void handle_bias_and_nonlinear(Handle* handle, param::ConvBias args,
const TensorND* conv_dst_tensor,
const TensorND* dst_tensor,
const TensorND* bias_tensor);
bool check_bias_share_in_channel(const TensorLayout& bias,
const param::ConvBias::Format format);
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -9,7 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/common/utils.h"
#include "src/common/conv_bias.h"
#include "src/cuda/batch_conv_bias/algo.h"
#include "src/cuda/batch_conv_bias/batch_conv_bias.cuh"
#include "src/cuda/batch_conv_bias/opr_impl.h"
......@@ -106,7 +106,7 @@ bool BatchConvBiasForwardImpl::AlgoInt8NCHW4DotProdGemm::is_available(
using Mode = Param::Mode;
bool available = true;
auto&& param = args.opr->param();
if (!conv_bias::check_bias_share_in_channel(args.bias_layout, param.format))
if (!check_bias_share_in_channel(args.bias_layout, param.format))
return false;
if (param.format != Format::NCHW4)
return false;
......
......@@ -10,7 +10,7 @@
*/
#include "megdnn/oprs/general.h"
#include "src/common/utils.h"
#include "src/common/conv_bias.h"
#include "src/cuda/batch_conv_bias/algo.h"
#include "src/cuda/batch_conv_bias/batch_conv_bias.cuh"
#include "src/cuda/batch_conv_bias/opr_impl.h"
......@@ -86,7 +86,7 @@ bool BatchConvBiasForwardImpl::AlgoInt8NCHW4DotProdImplicitGemmPrecomp::
using Mode = Param::Mode;
bool available = true;
auto&& param = args.opr->param();
if (!conv_bias::check_bias_share_in_channel(args.bias_layout, param.format))
if (!check_bias_share_in_channel(args.bias_layout, param.format))
return false;
if (param.format != Format::NCHW4)
return false;
......
......@@ -115,7 +115,8 @@ public:
size_t get_workspace_in_bytes(const SizeArgs& /*args*/) const override;
void exec(const ExecArgs& args) const final;
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
const char* name() const override { return "CUBLAS"; }
MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLAS)
......@@ -128,7 +129,8 @@ public:
size_t get_workspace_in_bytes(const SizeArgs& /*args*/) const override;
void exec(const ExecArgs& args) const final;
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
const char* name() const override { return "CUBLAS_LT"; }
MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLASLT)
......
......@@ -173,6 +173,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
......@@ -280,6 +283,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
......@@ -352,7 +358,8 @@ public:
const OperatorBase* opr) const override;
MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL)
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
private:
......@@ -406,7 +413,8 @@ public:
const OperatorBase* opr) const override;
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
MEGDNN_DECL_ALGO_TYPE(CUDA_BATCHED_MATMUL)
......@@ -428,7 +436,14 @@ public:
const char* name() const override { return m_name.c_str(); }
AlgoAttribute attribute() const override {
auto ret = static_cast<AlgoAttribute>(0);
auto ret = AlgoAttribute::DEFAULT;
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb)
#undef cb
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
......
......@@ -16,6 +16,7 @@
#include "src/cuda/conv_bias/helper.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/utils.h"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -29,7 +30,7 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
}
if (args.bias_layout->ndim == 0 ||
!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
!check_bias_share_in_channel(*(args.bias_layout),
args.opr->param().format)) {
return false;
}
......
......@@ -168,34 +168,6 @@ bool is_cudnn_supported(const BiasForwardSizeArgs& args) {
return supported;
}
bool check_bias_share_in_channel(const TensorLayout& bias,
const param::ConvBias::Format format) {
bool share_in_channel = false;
if (format == param::ConvBias::Format::NCHW ||
format == param::ConvBias::Format::NCHW4_NCHW) {
share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[2] == 1 &&
bias[3] == 1);
} else if (format == param::ConvBias::Format::NHWC) {
share_in_channel = (bias.ndim == 4 && bias[0] == 1 && bias[1] == 1 &&
bias[2] == 1);
} else if (format == param::ConvBias::Format::NCHW4 ||
format == param::ConvBias::Format::NCHW8 ||
format == param::ConvBias::Format::NCHW32 ||
format == param::ConvBias::Format::NCHW4_NCHW32 ||
format == param::ConvBias::Format::NCHW32_NCHW4) {
share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[2] == 1 &&
bias[3] == 1);
} else if (format == param::ConvBias::Format::NHWCD4) {
share_in_channel = (bias.ndim == 5 && bias[0] == 1 && bias[1] == 1 &&
bias[3] == 1);
} else {
megdnn_assert(format == param::ConvBias::Format::CHWN4);
share_in_channel = (bias.ndim == 5 && bias[1] == 1 && bias[2] == 1 &&
bias[3] == 1);
}
return share_in_channel;
}
SmallVector<size_t> matmul_get_workspace_bundle(
const BiasForwardSizeArgs& args) {
auto dtype = args.src_layout->dtype;
......
......@@ -126,9 +126,6 @@ namespace conv_bias {
}
};
bool check_bias_share_in_channel(const TensorLayout& bias,
const param::ConvBias::Format format);
} // namespace conv_bias
} // namespace cuda
} // namespace megdnn
......
......@@ -15,6 +15,7 @@
#include "src/cuda/convolution_helper/layout.cuh"
#include "src/cuda/convolution_helper/parameter.cuh"
#include "src/cuda/utils.h"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -83,7 +84,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4DotProdImplicitGemm::is_available(
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format != Format::CHWN4)
......
......@@ -15,6 +15,7 @@
#include "src/cuda/convolution_helper/layout.cuh"
#include "src/cuda/convolution_helper/parameter.cuh"
#include "src/cuda/utils.h"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -71,7 +72,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4IMMAImplicitGemm::is_available(
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format != Format::CHWN4)
......
......@@ -15,6 +15,7 @@
#include "src/cuda/convolution_helper/layout.cuh"
#include "src/cuda/convolution_helper/parameter.cuh"
#include "src/cuda/utils.h"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -118,7 +119,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4IMMAImplicitGemmReorderFilter::
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format != Format::CHWN4)
......
......@@ -15,6 +15,7 @@
#include "src/cuda/convolution_helper/layout.cuh"
#include "src/cuda/convolution_helper/parameter.cuh"
#include "src/cuda/utils.h"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -118,7 +119,7 @@ bool ConvBiasForwardImpl::AlgoInt8CHWN4IMMAImplicitGemmUnrollWidth::
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format != Format::CHWN4)
......
......@@ -14,6 +14,7 @@
#include "src/cuda/conv_bias/cutlass_convolution_wrapper.cuh"
#include "src/cuda/convolution_helper/parameter.cuh"
#include "src/cuda/utils.h"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -32,7 +33,7 @@ bool ConvBiasForwardImpl::AlgoInt8NCHW32IMMAImplicitGemm::is_available(
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format != Format::NCHW32 && param.format != Format::NCHW32_NCHW4)
......
......@@ -13,6 +13,7 @@
#include "src/cuda/utils.h"
#include "src/cuda/convolution_helper/parameter.cuh"
#include "src/cuda/conv_bias/cutlass_convolution_wrapper.cuh"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -29,7 +30,7 @@ bool ConvBiasForwardImpl::AlgoInt8NCHW4DotProdImplicitGemm::is_available(
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format == Format::NCHW4_NCHW32) {
......
......@@ -12,6 +12,7 @@
#include "./algo.h"
#include "src/cuda/utils.h"
#include "src/cuda/convolution_helper/bias_visitor.cuh"
#include "src/common/conv_bias.h"
using namespace megdnn;
using namespace cuda;
......@@ -29,7 +30,7 @@ bool ConvBiasForwardImpl::AlgoInt8NCHW4IMMAImplicitGemm::is_available(
bool available = true;
auto&& param = args.opr->param();
auto&& fm = args.filter_meta;
if (!conv_bias::check_bias_share_in_channel(*(args.bias_layout),
if (!check_bias_share_in_channel(*(args.bias_layout),
param.format))
return false;
if (param.format != Format::NCHW4)
......
......@@ -127,6 +127,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
cudnnConvolutionBwdDataAlgo_t cudnn_enum() const { return m_cudnn_enum; }
......@@ -158,7 +161,8 @@ public:
const char* name() const override { return "MATMUL"; }
MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL)
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
};
......
......@@ -123,6 +123,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
......@@ -155,7 +158,8 @@ public:
const char* name() const override { return "MATMUL"; }
MEGDNN_DECL_ALGO_TYPE(CUDA_MATMUL)
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
};
......
......@@ -119,6 +119,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
......
......@@ -112,6 +112,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
......
......@@ -106,7 +106,8 @@ public:
const char* name() const override { return "1x1x1"; }
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
MEGDNN_DECL_ALGO_TYPE(CUDA_1X1X1)
};
......@@ -126,10 +127,17 @@ public:
const char* name() const override { return m_name.c_str(); }
AlgoAttribute attribute() const override {
auto ret = static_cast<AlgoAttribute>(0);
auto ret = AlgoAttribute::DEFAULT;
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb)
#undef cb
return ret;
}
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg,
......@@ -157,6 +165,9 @@ public:
if (m_attr.is_reproducible) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
if (m_attr.accuracy_depend_on_batch) {
ret |= AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
return ret;
}
......
......@@ -470,9 +470,9 @@ void Conv3DDesc::set(const param::Convolution3D& param, const size_t nr_group) {
#define V(v) V1(v)
#define DEF_NAME(NAME) \
#NAME "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
#define DEF_ALGO(NAME, PROD) \
{ \
NAME, { DEF_NAME(NAME), PROD } \
#define DEF_ALGO(NAME, PROD1, PROD2) \
{ \
NAME, { DEF_NAME(NAME), PROD1, PROD2 } \
}
#if !(CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1)
......@@ -483,19 +483,18 @@ const std::unordered_map<cudnnConvolutionBwdDataAlgo_t, CudnnAlgoPack::Attr>
CudnnAlgoPack::conv_bwd_data_algos() {
static const std::unordered_map<cudnnConvolutionBwdDataAlgo_t,
CudnnAlgoPack::Attr>
algos = {
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true),
algos =
{ DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, true, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true, true),
#if CUDNN_MAJOR >= 5
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, true, false),
#if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED,
true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED, true, false),
#endif
#endif
};
};
return algos;
}
......@@ -505,15 +504,16 @@ CudnnAlgoPack::conv_bwd_flt_algos() {
static const std::unordered_map<cudnnConvolutionBwdFilterAlgo_t,
CudnnAlgoPack::Attr>
algos = {
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT, true, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false, false),
#if CUDNN_MAJOR >= 6 || (CUDNN_MAJOR >= 5 && CUDNN_MINOR >= 1)
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED,
true),
true, false),
#if CUDNN_MAJOR >= 6
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING, true,
true),
#endif
#endif
......@@ -522,28 +522,30 @@ CudnnAlgoPack::conv_bwd_flt_algos() {
return algos;
}
const std::unordered_map<cudnnConvolutionFwdAlgo_t, CudnnAlgoPack::Attr>
CudnnAlgoPack::conv_fwd_algos() {
static const std::unordered_map<cudnnConvolutionFwdAlgo_t,
CudnnAlgoPack::Attr>
algos = {
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_GEMM, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true),
algos =
{ DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true, false),
#if CUDNN_VERSION == 8004
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, true),
#else
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true, false),
#endif
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_GEMM, true, false),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, true, false),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT, true, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true, true),
#if CUDNN_MAJOR >= 5
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, true, false),
#if CUDNN_MAJOR >= 6 || CUDNN_MINOR >= 1
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED, true, false),
#endif
#endif
};
};
return algos;
}
......@@ -553,9 +555,10 @@ CudnnAlgoPack::conv3d_bwd_data_algos() {
static const std::unordered_map<cudnnConvolutionBwdDataAlgo_t,
CudnnAlgoPack::Attr>
algos = {
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, false, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, true, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, true,
true),
};
return algos;
......@@ -568,9 +571,9 @@ CudnnAlgoPack::conv3d_bwd_flt_algos() {
static const std::unordered_map<cudnnConvolutionBwdFilterAlgo_t,
CudnnAlgoPack::Attr>
algos = {
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, false, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, true, false),
DEF_ALGO(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3, false, false),
};
return algos;
......@@ -581,10 +584,15 @@ CudnnAlgoPack::conv3d_fwd_algos() {
static const std::unordered_map<cudnnConvolutionFwdAlgo_t,
CudnnAlgoPack::Attr>
algos = {
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true),
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, true, false),
#if CUDNN_VERSION == 8004
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true,
true),
#else
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, true,
false),
#endif
DEF_ALGO(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING, true, true),
};
return algos;
......
......@@ -112,6 +112,7 @@ public:
struct Attr {
std::string name;
bool is_reproducible;
bool accuracy_depend_on_batch;
};
static const std::unordered_map<cudnnConvolutionBwdDataAlgo_t, Attr>
......
......@@ -115,7 +115,8 @@ public:
MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLAS)
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::USABLE_DEPEND_ON_SHAPE;
AlgoAttribute::USABLE_DEPEND_ON_SHAPE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
};
......@@ -142,7 +143,8 @@ public:
void exec(const ExecArgs& args) const override;
MEGDNN_DECL_ALGO_TYPE(CUDA_CUBLASLT)
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
};
#endif
......
......@@ -25,7 +25,8 @@ public:
size_t get_workspace(const KernSizeParam&) const override { return 0; }
kern_t get_kern(const KernSizeParam&) const override;
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
PackMode packmode() const override { return PackMode::NO_PACK; }
MEGDNN_OVERRIDE_MATMUL_DESC(8, 16, 1, 4, AlgoDataType::FLOAT32, DEFAULT)
......@@ -36,7 +37,8 @@ public:
class MatrixMulImpl::AlgoF32MKLPackA : public AlgoBase {
public:
AlgoAttribute attribute() const override {
return AlgoAttribute::REPRODUCIBLE;
return AlgoAttribute::REPRODUCIBLE |
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH;
}
const char* name() const override { return "X86_F32_MKL_PACKA"; }
bool usable(const KernSizeParam&) const override;
......
/**
* \file dnn/test/common/accuracy_shake_checker.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/common/accuracy_shake_checker.h"
using namespace megdnn;
using namespace test;
namespace {
template <typename ctype>
::testing::AssertionResult assert_tensor_binary_eq(
const char* expr0, const char* expr1, const char* /*expr2*/,
const TensorND& v0, const TensorND& v1, const std::string& algo_name) {
ctype* it0_orig = v0.ptr<ctype>();
ctype* it1 = v1.ptr<ctype>();
ctype* it0 = it0_orig;
auto nr_elem = v1.layout.total_nr_elems();
auto nr_elem_single_batch = v0.layout.total_nr_elems();
for (size_t i = 0; i < nr_elem; ++i) {
if (i % nr_elem_single_batch == 0) {
it0 = it0_orig;
}
ctype iv0 = *it0, iv1 = *it1;
if (!good_float(iv0) || !good_float(iv1) ||
memcmp(it0, it1, sizeof(ctype))) {
Index index(v1.layout, i);
return ::testing::AssertionFailure()
<< "Unequal value\n"
<< "Value of: " << expr1 << "\n"
<< " Actual: " << (iv1 + 0) << "\n"
<< "Expected: " << expr0 << "\n"
<< "Which is: " << (iv0 + 0) << "\n"
<< "At index: " << index.to_string() << "/"
<< v1.layout.TensorShape::to_string() << "\n"
<< " DType: " << v1.layout.dtype.name() << "\n"
<< "algo: " << algo_name;
}
++it0;
++it1;
}
return ::testing::AssertionSuccess();
}
} // namespace
::testing::AssertionResult test::__assert_tensor_binary_eq(
const char* expr0, const char* expr1, const char* expr2,
const TensorND& v0, const TensorND& v1,
const Algorithm::Info::Desc& algo) {
bool shape_match = v0.layout[0] == 1;
for (size_t i = 1; i < v0.layout.ndim; ++i) {
shape_match &= v0.layout[i] == v1.layout[i];
}
if (!shape_match) {
return ::testing::AssertionFailure()
<< "Shape mismatch\n"
<< "Value of: " << expr1 << "\n"
<< " Actual: " << v1.layout.TensorShape::to_string() << "\n"
<< "Expected: " << expr0 << "\n"
<< "Which is: " << v0.layout.TensorShape::to_string() << "\n"
<< "algo: " << algo.name << "\n";
}
if (!v0.layout.is_physical_contiguous() ||
!v1.layout.is_physical_contiguous()) {
return ::testing::AssertionFailure()
<< "layout should be physical contiguous\n"
<< "Value of: " << expr1 << "\n"
<< " Actual: " << v1.layout.is_physical_contiguous() << "\n"
<< "Expected: " << expr0 << "\n"
<< "Which is: " << v0.layout.is_physical_contiguous() << "\n"
<< "algo: " << algo.name << "\n";
}
auto dtype = v0.layout.dtype;
if (dtype != v1.layout.dtype) {
return ::testing::AssertionFailure()
<< "Data type should match\n"
<< "Value of: " << expr1 << "\n"
<< " Actual: " << v1.layout.dtype.name() << "\n"
<< "Expected: " << expr0 << "\n"
<< "Which is: " << v0.layout.dtype.name() << "\n"
<< "algo: " << algo.name << "\n";
}
switch (dtype.enumv()) {
#define cb(_dt) \
case DTypeTrait<_dt>::enumv: \
return assert_tensor_binary_eq<DTypeTrait<_dt>::ctype>( \
expr0, expr1, expr2, v0, v1, algo.name);
MEGDNN_FOREACH_COMPUTING_DTYPE(cb)
MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
#undef cb
default : megdnn_trap();
}
}
// vim: syntax=cpp.doxygen
/**
* \file dnn/test/common/accuracy_shake_checker.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <vector>
#include "megdnn/oprs.h"
#include "src/common/conv_bias.h"
#include "src/common/utils.h"
#include "test/common/checker.h"
#include "test/common/index.h"
namespace megdnn {
namespace test {
namespace {
template <class Opr>
struct BatchTrait {
//! index of batch in tensor, 3 for CHWN4 e.g.
static size_t index_of_batch(const typename Opr::Param&) { return 0; }
//! indices contain batch in inputs and outputs, src(0) dst(2) for conv e.g.
static std::vector<size_t> indices_contain_batch;
static std::vector<size_t> indices_contain_batch_broadcast;
};
template <class Opr>
std::vector<size_t> BatchTrait<Opr>::indices_contain_batch = {};
template <class Opr>
std::vector<size_t> BatchTrait<Opr>::indices_contain_batch_broadcast = {};
#define DEFAULT_INDEX_OF_BATCH(opr) \
static size_t index_of_batch(const opr::Param&) { return 0; }
#define CONV_INDEX_OF_BATCH(opr) \
static size_t index_of_batch(const opr::Param& p) { \
if (p.format == opr::Param::Format::CHWN4) { \
return 3; \
} \
return 0; \
}
#define OPR_WITHOUT_INPUT_BROADCAST(INDEX_OF_BATCH, opr, idxs, idxs_brdcst) \
template <> \
struct BatchTrait<opr> { \
INDEX_OF_BATCH(opr) \
static std::vector<size_t> indices_contain_batch; \
static std::vector<size_t> indices_contain_batch_broadcast; \
}; \
std::vector<size_t> BatchTrait<opr>::indices_contain_batch = idxs; \
std::vector<size_t> BatchTrait<opr>::indices_contain_batch_broadcast = \
idxs_brdcst;
OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
megdnn::Convolution3DForward,
(std::initializer_list<size_t>{0, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
megdnn::Convolution3DBackwardData,
(std::initializer_list<size_t>{1, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH,
megdnn::Convolution3DBackwardFilter,
(std::initializer_list<size_t>{0, 1}), {})
OPR_WITHOUT_INPUT_BROADCAST(DEFAULT_INDEX_OF_BATCH, megdnn::BatchedMatrixMul,
(std::initializer_list<size_t>{0, 1, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvolutionForward,
(std::initializer_list<size_t>{0, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
megdnn::ConvolutionBackwardData,
(std::initializer_list<size_t>{1, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
megdnn::ConvolutionBackwardFilter,
(std::initializer_list<size_t>{0, 1}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareForward,
(std::initializer_list<size_t>{0, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::LocalShareBackwardData,
(std::initializer_list<size_t>{1, 2}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
megdnn::LocalShareBackwardFilter,
(std::initializer_list<size_t>{0, 1}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::DeformableConvForward,
(std::initializer_list<size_t>{0, 2, 3, 4}), {})
OPR_WITHOUT_INPUT_BROADCAST(
CONV_INDEX_OF_BATCH, megdnn::DeformableConvBackwardData,
(std::initializer_list<size_t>{0, 2, 3, 4, 5, 6, 7}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH,
megdnn::DeformableConvBackwardFilter,
(std::initializer_list<size_t>{0, 1, 2, 3}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::BatchConvBiasForward,
(std::initializer_list<size_t>{0, 1, 2, 3, 4}), {})
OPR_WITHOUT_INPUT_BROADCAST(CONV_INDEX_OF_BATCH, megdnn::ConvBiasForward,
(std::initializer_list<size_t>{0, 3, 4}), {2})
#undef OPR_WITHOUT_INPUT_BROADCAST
#undef DEFAULT_INDEX_OF_BATCH
#undef CONV_INDEX_OF_BATCH
template <class Opr>
struct LayoutsModifier {
static void on(TensorLayoutArray& layouts, const typename Opr::Param& p,
size_t new_batch_size) {
size_t batch_index = BatchTrait<Opr>::index_of_batch(p);
for (size_t index : BatchTrait<Opr>::indices_contain_batch) {
layouts.at(index)[batch_index] = new_batch_size;
}
for (size_t index : BatchTrait<Opr>::indices_contain_batch_broadcast) {
if (!check_bias_share_in_channel(layouts.at(index), p.format)) {
layouts.at(index)[batch_index] = new_batch_size;
}
}
}
};
#define OPR_NO_BIAS(opr) \
template <> \
struct LayoutsModifier<opr> { \
static void on(TensorLayoutArray& layouts, \
const typename opr::Param& p, size_t new_batch_size) { \
size_t batch_index = BatchTrait<opr>::index_of_batch(p); \
for (size_t index : BatchTrait<opr>::indices_contain_batch) { \
layouts.at(index)[batch_index] = new_batch_size; \
} \
} \
};
OPR_NO_BIAS(megdnn::Convolution3D)
OPR_NO_BIAS(megdnn::BatchedMatrixMul)
#undef OPR_NO_BIAS
template <>
struct LayoutsModifier<megdnn::MatrixMul> {
public:
static void on(TensorLayoutArray& layouts,
const megdnn::MatrixMul::Param& p,
size_t new_batch_size) {
assert(!p.transposeA && !p.transposeB);
MEGDNN_MARK_USED_VAR(p);
layouts.at(0)[0] = new_batch_size;
layouts.at(2)[0] = new_batch_size;
}
};
template <class Opr, typename OprAlgoProxy = OprAlgoProxy<Opr>>
class AlgoGenerator {
public:
AlgoGenerator(ExecutionPolicyAlgoName name)
: m_policy_name{name} {}
std::vector<Algorithm::Info::Desc> operator()(
Opr* opr, const CheckerHelper::TensorValueArray& arr) {
TensorLayoutArray layouts;
for (auto&& val : arr) {
layouts.push_back(val.layout);
}
std::vector<Algorithm::Info::Desc> ret;
megdnn_assert(layouts.size() == OprTrait<Opr>::arity);
for (auto algo_info :
AlgoProxy<Opr, OprTrait<Opr>::arity>::get_all_algorithms_info(
opr, layouts)) {
if (!(algo_info.attribute &
AlgoAttribute::ACCURACY_DEPEND_ON_BATCH) &&
std::regex_match(
algo_info.desc.name,
std::regex("(.*)(" + m_policy_name.name + ")(.*)"))) {
ret.push_back(algo_info.desc);
} else {
continue;
}
}
return ret;
}
private:
ExecutionPolicyAlgoName m_policy_name;
};
} // namespace
::testing::AssertionResult __assert_tensor_binary_eq(
const char* expr0, const char* expr1, const char* expr2,
const TensorND& v0, const TensorND& v1,
const Algorithm::Info::Desc& algo);
template <typename Opr, typename Proxy = OprProxy<Opr>>
class AccuracyShakeChecker : public CheckerHelper {
public:
static constexpr int arity_in = OprArityTrait<Opr>::arity_in;
using Param = typename Opr::Param;
using BeforeExecCallback = std::function<std::vector<Algorithm::Info::Desc>(
Opr*, const TensorValueArray&)>;
AccuracyShakeChecker(Handle* handle, bool check_dispatch = false)
: CheckerHelper(handle, check_dispatch),
m_before_exec_callback{AlgoGenerator<Opr>("")},
m_param(Param()) {}
TensorLayoutArray make_layouts(const TensorShapeArray& shapes) {
TensorLayoutArray layouts(shapes.size());
for (size_t i = 0; i < shapes.size(); ++i) {
DType dt = (m_dtype.find(i) != m_dtype.end() ? m_dtype[i]
: dtype::Float32());
TensorFormat fmt =
(m_fmt.find(i) != m_fmt.end() ? m_fmt[i] : TensorFormat{});
layouts[i] = TensorLayout(shapes[i], dt, fmt);
}
return layouts;
}
/*!
* \brief execute opr on current param/dtype/rng config
* \param shapes input/output shapes, which would be passed as
* arguments to Opr::deduce_layout
*
* Checker would construct TensorLayout vectors from shapes and dtypes,
* and call exec(TensorLayoutArray &).
*/
AccuracyShakeChecker& exec(const TensorShapeArray& shapes) {
exec(make_layouts(shapes));
return *this;
}
void exec(TensorLayoutArray layouts);
AccuracyShakeChecker& set_param(Param p) {
m_param = p;
opr()->param() = p;
return *this;
}
AccuracyShakeChecker& set_dtype(size_t idx, DType dtype) {
m_dtype[idx] = dtype;
return *this;
}
AccuracyShakeChecker& set_rng(size_t idx, RNG* rng) {
m_rng[idx] = rng;
return *this;
}
//! set a callback to be invoked before executing the operator
AccuracyShakeChecker& set_before_exec_callback(
const BeforeExecCallback& cb) {
m_before_exec_callback = cb;
return *this;
}
AccuracyShakeChecker& reset_before_exec_callback() {
m_before_exec_callback = nullptr;
return *this;
}
//! get the opr impl so setting other than param() can be modified
Opr* opr() {
if (!m_opr_cur) {
m_opr_cur = m_handle_cur->create_operator<Opr>();
}
return m_opr_cur.get();
}
private:
BeforeExecCallback m_before_exec_callback;
Param m_param;
Proxy m_proxy;
std::unique_ptr<Opr> m_opr_cur;
std::shared_ptr<TensorValueArray> m_tensors_cur_host,
m_tensors_single_batch_host;
void init_host_values();
void check_tensors_ignore_batch(
const TensorValueArray& tensors_single_batch,
const TensorValueArray& tensors, const Algorithm::Info::Desc& desc);
};
template <typename Opr, typename Proxy>
void AccuracyShakeChecker<Opr, Proxy>::exec(TensorLayoutArray layouts) {
auto opr_cur = this->opr();
opr_cur->param() = m_param;
m_proxy.deduce_layout(opr_cur, layouts);
TensorLayoutArray layouts_single_batch = layouts;
for (size_t i=0; i<layouts_single_batch.size(); ++i) {
ASSERT_TRUE(layouts[i].is_physical_contiguous())
<< "layouts should be physical contiguous "
<< layouts[i].to_string();
}
ASSERT_TRUE(0 == BatchTrait<Opr>::index_of_batch(opr_cur->param()))
<< "index of batch should be 0 ";
LayoutsModifier<Opr>::on(layouts_single_batch, opr_cur->param(), 1);
// allocate input
auto tensors_single_batch_storage =
alloc_tensors(m_handle_cur, layouts_single_batch, 0);
m_tensors_single_batch_host =
alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
auto tensors_cur_storage = alloc_tensors(m_handle_cur, layouts, 0);
m_tensors_cur_host =
alloc_tensors(m_handle_naive.get(), layouts, 0);
auto &&tensors_single_batch = *tensors_single_batch_storage;
auto &&tensors_single_batch_host = *m_tensors_single_batch_host;
auto &&tensors_cur = *tensors_cur_storage;
auto &&tensors_cur_host = *m_tensors_cur_host;
// allocate output
auto tensors_single_batch_storage_out =
alloc_tensors(m_handle_naive.get(), layouts_single_batch, 0);
auto tensors_cur_storage_out =
alloc_tensors(m_handle_naive.get(), layouts, 0);
auto &&tensors_single_batch_out = *tensors_single_batch_storage_out;
auto &&tensors_cur_out = *tensors_cur_storage_out;
init_host_values();
copy_tensors_to_device(tensors_cur, tensors_cur_host);
copy_tensors_to_device(tensors_single_batch, tensors_single_batch_host);
std::vector<Algorithm::Info::Desc> algo_desc;
if (m_before_exec_callback) {
algo_desc = m_before_exec_callback(opr_cur, tensors_cur);
} else {
algo_desc.push_back({});
}
for (size_t i = 0; i < algo_desc.size(); ++i) {
opr_cur->execution_policy().algo = algo_desc[i];
m_proxy.exec(opr_cur, tensors_cur);
m_proxy.exec(opr_cur, tensors_single_batch);
copy_tensors_from_device(tensors_cur_out, tensors_cur);
copy_tensors_from_device(tensors_single_batch_out,
tensors_single_batch);
check_tensors_ignore_batch(tensors_single_batch_out, tensors_cur_out,
algo_desc[i]);
}
}
template <typename Opr, typename Proxy>
void AccuracyShakeChecker<Opr, Proxy>::init_host_values() {
size_t index_of_batch = 0;
auto &&tensors_single_batch = *m_tensors_single_batch_host;
auto &&tensors_cur = *m_tensors_cur_host;
for (size_t i = 0; i < arity_in; ++i) {
auto &&tensor_single_batch = tensors_single_batch[i];
auto &&tensor_cur = tensors_cur[i];
auto rng = m_rng[i];
if (!rng)
rng = m_default_rng.get();
rng->gen(tensor_single_batch);
dt_byte* raw_storage_cur = static_cast<dt_byte*>(tensor_cur.raw_ptr) +
tensor_cur.layout.span().low_byte;
dt_byte* raw_storage_single_batch =
static_cast<dt_byte*>(tensor_single_batch.raw_ptr) +
tensor_single_batch.layout.span().low_byte;
const size_t step = tensor_single_batch.layout.span().dist_byte();
if (tensor_cur.layout.eq_shape(tensor_single_batch.layout)) {
memcpy(raw_storage_cur, raw_storage_single_batch, step);
} else {
ASSERT_TRUE(1 == tensor_single_batch.layout[index_of_batch])
<< "bad batch size "
<< tensor_single_batch.layout[index_of_batch];
for (size_t b=0; b<tensor_cur.layout[index_of_batch]; ++b) {
memcpy(raw_storage_cur, raw_storage_single_batch, step);
raw_storage_cur += step;
}
}
}
}
template <typename Opr, typename Proxy>
void AccuracyShakeChecker<Opr, Proxy>::check_tensors_ignore_batch(
const TensorValueArray& tensors_single_batch,
const TensorValueArray& tensors, const Algorithm::Info::Desc& algo) {
for (size_t i = 0; i < tensors_single_batch.size(); ++i) {
if (tensors_single_batch[i].layout.ndim == 0 ||
tensors_single_batch[i].layout.eq_shape(tensors[i].layout))
continue;
ASSERT_PRED_FORMAT3(::megdnn::test::__assert_tensor_binary_eq,
tensors_single_batch[i], tensors[i], algo);
}
}
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -19,50 +19,6 @@ using namespace megdnn;
using namespace test;
namespace {
bool good_float(float val) {
return std::isfinite(val);
}
bool good_float(int) {
return true;
}
bool good_float(dt_qint8) {
return true;
}
bool good_float(dt_qint16) {
return true;
}
bool good_float(dt_quint8) {
return true;
}
bool good_float(dt_qint32) {
return true;
}
// A hack for the (x+0) promote to int trick on dt_quint8.
int operator +(dt_quint8 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return lhs.as_uint8();
}
int operator +(dt_qint32 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return lhs.as_int32();
}
int operator +(dt_qint8 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return int8_t(lhs);
}
int operator +(dt_qint16 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return lhs.as_int16();
}
template<typename ctype, class Iter>
::testing::AssertionResult assert_tensor_eq_with_iter(
......
......@@ -86,6 +86,7 @@ protected:
size_t m_offset = 0;
CheckerHelper(Handle* handle, bool check_dispatch = true);
~CheckerHelper() noexcept;
using OprExec = std::function<void(const TensorValueArray&)>;
......@@ -100,14 +101,15 @@ protected:
void enable_contig_naive() { m_enable_contig_naive = true; }
private:
std::shared_ptr<TensorValueArray> m_tensors_naive;
void init_naive_values();
void copy_tensors_to_device(const TensorValueArray& dest,
const TensorValueArray& src);
void copy_tensors_from_device(const TensorValueArray& dest,
const TensorValueArray& src);
private:
std::shared_ptr<TensorValueArray> m_tensors_naive;
void init_naive_values();
void check_tensors(const TensorValueArray& expected,
const TensorValueArray& computed);
};
......
......@@ -311,6 +311,51 @@ public:
size_t get_cpu_count();
static inline bool good_float(float val) {
return std::isfinite(val);
}
static inline bool good_float(int) {
return true;
}
static inline bool good_float(dt_qint8) {
return true;
}
static inline bool good_float(dt_qint16) {
return true;
}
static inline bool good_float(dt_quint8) {
return true;
}
static inline bool good_float(dt_qint32) {
return true;
}
// A hack for the (x+0) promote to int trick on dt_quint8.
static inline int operator+(dt_quint8 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return lhs.as_uint8();
}
static inline int operator+(dt_qint32 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return lhs.as_int32();
}
static inline int operator+(dt_qint8 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return int8_t(lhs);
}
static inline int operator+(dt_qint16 lhs, int rhs) {
megdnn_assert(rhs == 0, "unexpected rhs");
return lhs.as_int16();
}
} // namespace test
static inline bool operator==(const TensorLayout& a, const TensorLayout& b) {
......
/**
* \file dnn/test/cuda/accuracy_shake.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/dtype.h"
#include "megdnn/oprs.h"
#include "megdnn/opr_param_defs.h"
#include "test/cuda/fixture.h"
#include "test/cuda/utils.h"
#include "test/common/rng.h"
#include "test/common/accuracy_shake_checker.h"
namespace megdnn {
namespace test {
TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD) {
require_compute_capability(6, 1);
AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
NormalRNG default_rng;
checker.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.set_dtype(2, dtype::Float32())
.set_rng(0, &default_rng)
.set_rng(1, &default_rng);
// convolution
checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
// convbias without z
checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
// convbias with z
checker.exec({{64, 16, 32, 32},
{64, 16, 3, 3},
{1, 64, 1, 1},
{64, 64, 30, 30},
{}});
ConvBias::Param param;
// group
param.sparse = ConvBias::Param::Sparse::GROUP;
checker.set_param(param);
checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
checker.exec({{64, 16, 32, 32},
{2, 32, 8, 3, 3},
{1, 64, 1, 1},
{64, 64, 30, 30},
{}});
}
TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHW) {
require_compute_capability(6, 1);
AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
UniformIntRNG int_rng{-128, 127};
checker.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS8(2.5f))
.set_dtype(2, dtype::QuantizedS32(6.25f))
.set_dtype(3, dtype::QuantizedS8(0.25f))
.set_dtype(4, dtype::QuantizedS8(0.25f))
.set_rng(0, &int_rng)
.set_rng(1, &int_rng)
.set_rng(2, &int_rng)
.set_rng(3, &int_rng);
// convolution
checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
// convbias without z
checker.exec({{64, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
// convbias with z
checker.exec({{64, 16, 32, 32},
{64, 16, 3, 3},
{1, 64, 1, 1},
{64, 64, 30, 30},
{}});
// group
ConvBias::Param param;
param.sparse = ConvBias::Param::Sparse::GROUP;
checker.set_param(param);
checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
checker.exec({{64, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
checker.exec({{64, 16, 32, 32},
{2, 32, 8, 3, 3},
{1, 64, 1, 1},
{64, 64, 30, 30},
{}});
}
TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) {
require_compute_capability(6, 1);
UniformIntRNG int_rng{-50, 50};
AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
ConvBias::Param param;
param.format = ConvBias::Param::Format::NHWC;
checker.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS8(2.5f))
.set_dtype(2, dtype::QuantizedS32(6.25f))
.set_dtype(4, dtype::QuantizedS8(60.25f))
.set_rng(0, &int_rng)
.set_rng(1, &int_rng)
.set_rng(2, &int_rng)
.set_param(param);
checker.exec({{20, 32, 32, 4}, {24, 1, 1, 4}, {1, 1, 1, 24}, {}, {}});
param.sparse = ConvBias::Param::Sparse::GROUP;
checker.set_param(param).exec(
{{20, 32, 32, 16}, {4, 4, 1, 1, 4}, {1, 1, 1, 16}, {}, {}});
}
TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) {
using Format = ConvBias::Param::Format;
require_compute_capability(6, 1);
AccuracyShakeChecker<ConvBiasForward> checker(handle_cuda());
UniformIntRNG int_rng{-5, 5};
UniformFloatRNG float_rng{-50, 50};
checker.set_dtype(0, dtype::QuantizedS8(1.2f))
.set_dtype(1, dtype::QuantizedS8(1.3f))
.set_dtype(2, dtype::QuantizedS32(1.2 * 1.3f))
.set_dtype(3, dtype::QuantizedS8(1.3f))
.set_dtype(4, dtype::QuantizedS8(1.3f))
.set_rng(0, &int_rng)
.set_rng(1, &int_rng)
.set_rng(2, &int_rng)
.set_rng(3, &int_rng);
auto run = [&](const TensorShapeArray& shapes, const Format& format) {
ConvBias::Param param;
param.format = format;
checker.set_param(param).exec(
{shapes[0], shapes[1], shapes[2], {}, {}});
};
run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}}, Format::NCHW4);
run({{20, 1, 24, 24, 32}, {64, 1, 3, 3, 32}, {1, 2, 1, 1, 32}},
Format::NCHW32);
run({{16, 4, 23, 40, 4},
{32, 4, 3, 3, 4},
{1, 1, 1, 1, 32}}, Format::NCHW4_NCHW32);
checker.set_dtype(0, dtype::QuantizedS8(1.9980618f))
.set_dtype(1, dtype::QuantizedS8(1.9980927f))
.set_dtype(2, dtype::Float32())
.set_dtype(3, dtype::Float32())
.set_dtype(4, dtype::Float32())
.set_rng(0, &int_rng)
.set_rng(1, &int_rng)
.set_rng(2, &float_rng)
.set_rng(3, &float_rng);
run({{16, 4, 92, 160, 4}, {20, 4, 3, 3, 4}, {1, 20, 1, 1}},
Format::NCHW4_NCHW);
}
TEST_F(CUDA, SHAKE_MATRIX_MUL_FORWARD) {
AccuracyShakeChecker<MatrixMul> checker(handle_cuda());
checker.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.set_dtype(2, dtype::Float32())
.exec({{50, 100}, {100, 60}, {}});
}
TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) {
require_compute_capability(6, 1);
AccuracyShakeChecker<BatchConvBiasForward> checker(handle_cuda());
UniformIntRNG const_rng{1, 1};
UniformIntRNG rng{-5, 5};
UniformIntRNG bias_rng{-50, 50};
checker.set_rng(0, &rng)
.set_rng(1, &rng)
.set_rng(2, &rng)
.set_rng(3, &rng)
.set_dtype(0, dtype::QuantizedS8{1.2f})
.set_dtype(1, dtype::QuantizedS8{1.3f})
.set_dtype(2, dtype::QuantizedS32{1.2f * 1.3f})
.set_dtype(3, dtype::QuantizedS8{1.1f})
.set_dtype(4, dtype::QuantizedS8{1.1f});
param::BatchConvBias param;
param.pad_h = 2, param.pad_w = 1;
param.stride_h = 1, param.stride_w = 2;
param.format = param::BatchConvBias::Format::NCHW4;
checker.set_param(param).exec({{32, 4, 24, 24, 4},
{32, 32, 4, 1, 1, 4},
{1, 8, 1, 1, 4},
{},
{}});
}
TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) {
AccuracyShakeChecker<BatchedMatrixMul> checker(handle_cuda());
UniformIntRNG int_rng{-127, 127};
NormalRNG default_rng;
checker.set_dtype(0, dtype::QuantizedS8(1.2f))
.set_dtype(1, dtype::QuantizedS8(1.3f))
.set_dtype(2, {})
.set_rng(0, &int_rng)
.set_rng(1, &int_rng);
checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
checker.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.set_dtype(2, dtype::Float32())
.set_rng(0, &default_rng)
.set_rng(1, &default_rng);
checker.exec({{20, 424, 368}, {20, 368, 256}, {20, 424, 256}});
}
TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) {
AccuracyShakeChecker<Convolution3DForward> checker(handle_cuda());
NormalRNG default_rng;
float scale = 1.0f / sqrt(5);
UniformFloatRNG rng(scale, 2 * scale);
param::Convolution3D param;
param.mode = param::Convolution3D::Mode::CROSS_CORRELATION;
param.stride_d = param.stride_h = param.stride_w = 2;
param.pad_d = param.pad_h = param.pad_w = 0;
param.dilate_d = param.dilate_h = param.dilate_w = 1;
checker.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.set_rng(0, &default_rng)
.set_rng(1, &default_rng)
.set_param(param)
.exec({{20, 5, 12, 12, 16}, {5, 5, 3, 3, 3}, {}});
}
TEST_F(CUDA, SHAKE_LOCAL_SHARE) {
AccuracyShakeChecker<LocalShare> checker(handle_cuda());
using Param = LocalShare::Param;
Param param;
param.spatial_groups_h = param.spatial_groups_w = 3;
checker.set_param(param);
checker.exec({{20, 16, 32, 32}, {3, 3, 16, 3, 3, 64}, {}});
}
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -20,6 +20,7 @@
#include "test/common/rng.h"
#include "test/cuda/benchmark.h"
#include "src/cuda/utils.h"
#include "test/common/accuracy_shake_checker.h"
#define V1(x) #x
#define V(x) V1(x)
......
/**
* \file dnn/test/x86/accuracy_shake.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/x86/fixture.h"
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs.h"
#include "test/common/accuracy_shake_checker.h"
#include "test/common/convolution.h"
#include "test/common/rng.h"
#include "test/common/tensor.h"
#include "test/common/workspace_wrapper.h"
namespace megdnn {
namespace test {
TEST_F(X86, SHAKE_CONV_BIAS_FORWARD) {
AccuracyShakeChecker<ConvBiasForward> checker(handle());
NormalRNG default_rng;
checker.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.set_dtype(2, dtype::Float32())
.set_rng(0, &default_rng)
.set_rng(1, &default_rng);
checker.set_before_exec_callback(AlgoGenerator<ConvBiasForward>("X86"));
// convolution
checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
// convbias without z
checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
// convbias with z
checker.exec({{6, 16, 32, 32},
{64, 16, 3, 3},
{1, 64, 1, 1},
{6, 64, 30, 30},
{}});
// group
ConvBias::Param param;
param.sparse = ConvBias::Param::Sparse::GROUP;
checker.set_param(param);
checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
checker.exec({{6, 16, 32, 32},
{2, 32, 8, 3, 3},
{1, 64, 1, 1},
{6, 64, 30, 30},
{}});
}
TEST_F(X86, SHAKE_CONV_BIAS_FORWARD_INT8) {
AccuracyShakeChecker<ConvBiasForward> checker(handle());
UniformIntRNG rng{-50, 50};
checker.set_dtype(0, dtype::QuantizedS8(2.5f))
.set_dtype(1, dtype::QuantizedS8(2.5f))
.set_dtype(2, dtype::QuantizedS32(6.25f))
.set_dtype(3, dtype::QuantizedS32(6.25f))
.set_dtype(4, {})
.set_rng(0, &rng)
.set_rng(1, &rng)
.set_rng(2, &rng);
checker.set_before_exec_callback(AlgoGenerator<ConvBiasForward>("X86"));
// convolution
checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {}, {}, {}});
// convbias without z
checker.exec({{6, 16, 32, 32}, {64, 16, 3, 3}, {1, 64, 1, 1}, {}, {}});
// convbias with z
checker.exec({{6, 16, 32, 32},
{64, 16, 3, 3},
{1, 64, 1, 1},
{6, 64, 30, 30},
{}});
// group
ConvBias::Param param;
param.sparse = ConvBias::Param::Sparse::GROUP;
checker.set_param(param);
checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {}, {}, {}});
checker.exec({{6, 16, 32, 32}, {2, 32, 8, 3, 3}, {1, 64, 1, 1}, {}, {}});
checker.exec({{6, 16, 32, 32},
{2, 32, 8, 3, 3},
{1, 64, 1, 1},
{6, 64, 30, 30},
{}});
}
TEST_F(X86, SHAKE_MATRIX_MUL_FORWARD) {
AccuracyShakeChecker<MatrixMul> checker(handle());
checker.set_dtype(0, dtype::Float32())
.set_dtype(1, dtype::Float32())
.set_dtype(2, dtype::Float32())
.exec({{20, 100}, {100, 60}, {}});
}
} // namespace test
} // namespace megdnn
// vim: syntax=cpp.doxygen
......@@ -15,6 +15,7 @@
#include "megdnn/oprs.h"
#include "test/common/benchmarker.h"
#include "test/common/checker.h"
#include "test/common/accuracy_shake_checker.h"
#include "test/common/convolution.h"
#include "test/common/rng.h"
#include "test/common/tensor.h"
......
......@@ -18,9 +18,7 @@
#include "megbrain/comp_node.h"
#include "megdnn/basic_types.h"
#include "megdnn/oprs/base.h"
#include "megdnn/oprs/linalg.h"
#include "megdnn/oprs/nn.h"
#include "megdnn/oprs.h"
namespace mgb {
namespace opr {
......@@ -45,39 +43,6 @@ namespace opr {
cb(BatchedMatrixMul)
// clang-format on
template <typename Opr>
struct OprArityTrait;
template <typename Opr, int _arity_in, int _arity_out>
struct OprArityTraitTmpl {
static constexpr int arity_in = _arity_in;
static constexpr int arity_out = _arity_out;
static constexpr int arity = arity_in + arity_out;
};
#define INST_ARITY(_Opr, _in, _out) \
template <> \
struct OprArityTrait<_Opr> : public OprArityTraitTmpl<_Opr, _in, _out> {};
INST_ARITY(megdnn::ConvolutionBackwardData, 2, 1);
INST_ARITY(megdnn::ConvolutionBackwardFilter, 2, 1);
INST_ARITY(megdnn::Convolution3DForward, 2, 1);
INST_ARITY(megdnn::Convolution3DBackwardData, 2, 1);
INST_ARITY(megdnn::Convolution3DBackwardFilter, 2, 1);
INST_ARITY(megdnn::LocalShareForward, 2, 1);
INST_ARITY(megdnn::LocalShareBackwardData, 2, 1);
INST_ARITY(megdnn::LocalShareBackwardFilter, 2, 1);
INST_ARITY(megdnn::Convolution, 2, 1);
INST_ARITY(megdnn::DeformableConvForward, 4, 1);
INST_ARITY(megdnn::DeformableConvBackwardFilter, 4, 1);
INST_ARITY(megdnn::BatchConvBiasForward, 4, 1);
INST_ARITY(megdnn::ConvBias, 4, 1);
INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3);
INST_ARITY(megdnn::MatrixMul, 2, 1);
INST_ARITY(megdnn::BatchedMatrixMul, 2, 1);
#undef INST_ARITY
template <typename Opr>
constexpr bool opr_supports_preprocess() {
return std::is_same<Opr, megdnn::ConvolutionForward>::value ||
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册