diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index 3b1a5caaa6c4d9322f9d57a7da5190f541e48e94..d871976731ef7427b9948ff026c05a5c8e58896a 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -60,7 +60,7 @@ struct PreprocessedFilter { TensorNDArray tensors; }; -} // namespace intl +} // namespace detail /** * \brief base class for convolution operation @@ -1562,6 +1562,58 @@ protected: }; using BatchConvBias = BatchConvBiasForward; +class FakeQuantBase : public OperatorBase { + DEF_OPR_IMPL_CTOR(FakeQuantBase, OperatorBase); + DEF_OPR_PARAM(FakeQuant); + +protected: + void deduce_layout_fwd(const TensorLayout& input, TensorLayout& output); + void check_layout_fwd(const TensorLayout& input, const TensorLayout& scale, + const TensorLayout& zero_point, + const TensorLayout& output); +}; + +class FakeQuantForward : public FakeQuantBase { + DEF_OPR_IMPL(FakeQuantForward, FakeQuantBase, 3, 1); + +public: + virtual void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, _megdnn_tensor_out output, + _megdnn_workspace workspace) = 0; + void deduce_layout(const TensorLayout& input, const TensorLayout& scale, + const TensorLayout& zero_point, TensorLayout& output); + virtual size_t get_workspace_in_bytes(const TensorLayout& input, + const TensorLayout& scale, + const TensorLayout& zero_point, + const TensorLayout& output) = 0; + +protected: + void check_exec(const TensorLayout& input, const TensorLayout& scale, + const TensorLayout& zero_point, const TensorLayout& output, + size_t workspace_in_bytes); +}; + +using FakeQuant = FakeQuantForward; + +class FakeQuantBackward : public FakeQuantBase { + DEF_OPR_IMPL(FakeQuantBackward, FakeQuantBase, 4, 1); + +public: + virtual void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input, + _megdnn_tensor_in scale, _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad, _megdnn_workspace workspace) = 0; + virtual size_t get_workspace_in_bytes(const TensorLayout& diff, + const TensorLayout& input, + const TensorLayout& scale, + const TensorLayout& zero_point, + const TensorLayout& grad) = 0; + +protected: + void check_exec(const TensorLayout& diff, const TensorLayout& input, + const TensorLayout& scale, const TensorLayout& zero_point, + const TensorLayout& grad, size_t workspace_in_bytes); +}; + } // namespace megdnn #include "megdnn/internal/opr_header_epilogue.h" diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index 6379eba1626891917ee898c600bc2369e822487c..c1fc3d1ecaf14a7ed5ac2b4a93cb12a36dd0fc67 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -943,5 +943,9 @@ when the ``I`` suffix is present. add_enum_alias('Format', 'ConvolutionV0'). add_enum_alias('ComputeMode', 'Convolution', name_field="compute_mode") ) +(pdef('FakeQuant'). + add_fields('int32','qmin','-2147483648'). + add_fields('int32','qmax','2147483647') + ) diff --git a/dnn/src/common/fake_quant.cpp b/dnn/src/common/fake_quant.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96700a6fc2e0a213c299e1dd53d916d4ba1bced0 --- /dev/null +++ b/dnn/src/common/fake_quant.cpp @@ -0,0 +1,66 @@ +/** + * \file dnn/src/common/fakequant.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "megdnn/oprs.h" +#include "src/common/utils.h" + +namespace megdnn { + +void FakeQuantBase::deduce_layout_fwd(const TensorLayout& input, + TensorLayout& output) { + output = TensorLayout(input, input.dtype); +} + +void FakeQuantBase::check_layout_fwd(const TensorLayout& input, + const TensorLayout& scale, + const TensorLayout& zero_point, + const TensorLayout& output) { + megdnn_assert(input.dtype == dtype::Float32()); + megdnn_assert(scale.dtype == dtype::Float32()); + megdnn_assert(zero_point.dtype == dtype::Float32()); + TensorLayout expected; + deduce_layout_fwd(input, expected); + megdnn_assert_eq_layout(expected, output); +} + +void FakeQuantForward::deduce_layout(const TensorLayout& input, + const TensorLayout& /*scale*/, + const TensorLayout& /*zero_point*/, + TensorLayout& output) { + deduce_layout_fwd(input, output); +} + +void FakeQuantForward::check_exec(const TensorLayout& input, + const TensorLayout& scale, + const TensorLayout& zero_point, + const TensorLayout& output, + size_t workspace_in_bytes) { + check_layout_fwd(input, scale, zero_point, output); + auto required_workspace_space = + get_workspace_in_bytes(input, scale, zero_point, output); + megdnn_assert(workspace_in_bytes >= required_workspace_space); +} + +void FakeQuantBackward::check_exec(const TensorLayout& diff, + const TensorLayout& input, + const TensorLayout& scale, + const TensorLayout& zero_point, + const TensorLayout& grad, + size_t workspace_in_bytes) { + megdnn_assert_eq_shape(input, diff); + megdnn_assert_eq_shape(input, grad); + auto required_worspace_space = + get_workspace_in_bytes(diff, input, scale, zero_point, grad); + megdnn_assert(workspace_in_bytes >= required_worspace_space); +} + +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/common/handle_impl.h b/dnn/src/common/handle_impl.h index 84df453d74c127ba9fbdd35e9f304b2929d86cae..17d217c8da9a9024610149107d356f6228b97c11 100644 --- a/dnn/src/common/handle_impl.h +++ b/dnn/src/common/handle_impl.h @@ -201,7 +201,9 @@ private: cb(RemapBackwardMat) \ cb(AdaptivePoolingForward) \ cb(AdaptivePoolingBackward) \ - cb(DctChannelSelectForward) + cb(DctChannelSelectForward) \ + cb(FakeQuantForward) \ + cb(FakeQuantBackward) /*! * \brief specialize HandleImpl::create_operator for a single opr type; diff --git a/dnn/src/cuda/elemwise_helper.cuh b/dnn/src/cuda/elemwise_helper.cuh index db548bc682d206abe97d943d2e7ea8dfa02c464b..d360ae5d670e08297cd29a3a93e8cb58ad411a91 100644 --- a/dnn/src/cuda/elemwise_helper.cuh +++ b/dnn/src/cuda/elemwise_helper.cuh @@ -13,9 +13,9 @@ #pragma once #include "src/common/elemwise_helper.cuh" -#include "src/cuda/utils.cuh" #include "src/cuda/int_fastdiv.cuh" #include "src/cuda/query_blocksize.cuh" +#include "src/cuda/utils.cuh" /* * please note that all arithmetics on GPU are 32-bit for best performance; this @@ -649,6 +649,102 @@ struct OpCallerUniform { } }; +//! specialization for arity == 4 +template +struct OpCallerUniform { + Op op; + PVis par[4]; + static const uint32_t packed_size = PVis::packed_size; + + devfunc void thread_init(uint32_t idx) { + idx = idx * packed_size; + par[0].thread_init(idx); + par[1].thread_init(idx); + par[2].thread_init(idx); + par[3].thread_init(idx); + } + + devfunc void on(uint32_t idx) { + idx = idx * packed_size; + op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), par[3].at(idx)); + } + + devfunc void on(uint32_t idx, uint32_t remain) { + idx = idx * packed_size; + if (remain >= packed_size) { + op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), + par[3].at(idx)); + } else { + auto ptr0 = par[0].ptr(); + auto ptr1 = par[1].ptr(); + auto ptr2 = par[2].ptr(); + auto ptr3 = par[3].ptr(); + for (int i = 0; i < remain; i++) { + op(idx + i, ptr0[par[0].offset(idx + i)], + ptr1[par[1].offset(idx + i)], ptr2[par[2].offset(idx + i)], + ptr3[par[3].offset(idx + i)]); + } + } + } + + devfunc void next() { + par[0].next(); + par[1].next(); + par[2].next(); + par[3].next(); + } +}; + +//! specialization for arity == 5 +template +struct OpCallerUniform { + Op op; + PVis par[5]; + static const uint32_t packed_size = PVis::packed_size; + + devfunc void thread_init(uint32_t idx) { + idx = idx * packed_size; + par[0].thread_init(idx); + par[1].thread_init(idx); + par[2].thread_init(idx); + par[3].thread_init(idx); + par[4].thread_init(idx); + } + + devfunc void on(uint32_t idx) { + idx = idx * packed_size; + op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), par[3].at(idx), + par[4].at(idx)); + } + + devfunc void on(uint32_t idx, uint32_t remain) { + idx = idx * packed_size; + if (remain >= packed_size) { + op(idx, par[0].at(idx), par[1].at(idx), par[2].at(idx), + par[3].at(idx), par[4].at(idx)); + } else { + auto ptr0 = par[0].ptr(); + auto ptr1 = par[1].ptr(); + auto ptr2 = par[2].ptr(); + auto ptr3 = par[3].ptr(); + auto ptr4 = par[4].ptr(); + for (int i = 0; i < remain; i++) { + op(idx + i, ptr0[par[0].offset(idx + i)], + ptr1[par[1].offset(idx + i)], ptr2[par[2].offset(idx + i)], + ptr3[par[3].offset(idx + i)], ptr4[par[4].offset(idx + i)]); + } + } + } + + devfunc void next() { + par[0].next(); + par[1].next(); + par[2].next(); + par[3].next(); + par[4].next(); + } +}; + /*! * \brief call binary (i.e. arity == 2) operator with different param * visitors diff --git a/dnn/src/cuda/fake_quant/kern.cu b/dnn/src/cuda/fake_quant/kern.cu new file mode 100644 index 0000000000000000000000000000000000000000..d781e37798949254a2c6643dd73e824a2f91eb05 --- /dev/null +++ b/dnn/src/cuda/fake_quant/kern.cu @@ -0,0 +1,30 @@ +/** + * \file dnn/src/cuda/fake_quant/kern.cu + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "./kern.cuh" + +namespace megdnn { +namespace cuda { + +#define cb(_dtype) \ + INST_RUN_ELEMWISE(FakeQuantKernOp::ctype>, \ + DTypeTrait<_dtype>::ctype, 2); \ + INST_RUN_ELEMWISE(FakeQuantBwdKernOp::ctype>, \ + DTypeTrait<_dtype>::ctype, 2); \ + INST_RUN_ELEMWISE(FakeQuantKernOpNonContig::ctype>, \ + DTypeTrait<_dtype>::ctype, 4); \ + INST_RUN_ELEMWISE(FakeQuantBwdKernOpNonContig::ctype>, \ + DTypeTrait<_dtype>::ctype, 5); +cb(megdnn::dtype::Float32) + +} // namespace cuda +} // namespace megdnn \ No newline at end of file diff --git a/dnn/src/cuda/fake_quant/kern.cuh b/dnn/src/cuda/fake_quant/kern.cuh new file mode 100644 index 0000000000000000000000000000000000000000..5cc2ed74b115bef5523da30984febef70cbce8d1 --- /dev/null +++ b/dnn/src/cuda/fake_quant/kern.cuh @@ -0,0 +1,106 @@ +/** + * \file dnn/src/cuda/elemwise_helper.cuh + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#pragma once + +#include "src/cuda/elemwise_helper.cuh" +#include "src/cuda/utils.cuh" + +#if MEGDNN_CC_HOST +#include "megdnn/oprs.h" +#endif + +namespace megdnn { +namespace cuda { + +template +struct FakeQuantKernOp { + ctype* input; + ctype* output; + ctype qmin, qmax; + + __device__ void operator()(uint32_t idx, ctype scale, ctype zero_point) { + ctype x = round(input[idx] / scale) + zero_point; + x = fmaxf(fminf(x, qmax), qmin); + output[idx] = (x - zero_point) * scale; + } + +#if MEGDNN_CC_HOST + FakeQuantKernOp(const TensorND& input, const TensorND& output, + const FakeQuant::Param& param) + : input{input.ptr()}, + output{output.ptr()}, + qmin(param.qmin), + qmax(param.qmax) {} +#endif +}; + +template +struct FakeQuantBwdKernOp { + ctype* diff; + ctype* input; + ctype* grad; + ctype qmin, qmax; + + __device__ void operator()(uint32_t idx, ctype scale, ctype zero_point) { + ctype x = round(input[idx] / scale) + zero_point; + grad[idx] = x <= qmax && x >= qmin ? diff[idx] : 0.0; + } + +#if MEGDNN_CC_HOST + FakeQuantBwdKernOp(const TensorND& diff, const TensorND& input, + const TensorND& grad, const FakeQuant::Param& param) + : diff{diff.ptr()}, + input{input.ptr()}, + grad{grad.ptr()}, + qmin(param.qmin), + qmax(param.qmax) {} +#endif +}; + +template +struct FakeQuantKernOpNonContig { + ctype qmin; + ctype qmax; + + __device__ void operator()(uint32_t, ctype& output, ctype input, + ctype scale, ctype zero_point) { + ctype x = round(input / scale) + zero_point; + x = fmaxf(fminf(x, qmax), qmin); + output = (x - zero_point) * scale; + } + +#if MEGDNN_CC_HOST + FakeQuantKernOpNonContig(const FakeQuant::Param& param) + : qmin(param.qmin), qmax(param.qmax) {} +#endif +}; + +template +struct FakeQuantBwdKernOpNonContig { + ctype qmin; + ctype qmax; + + __device__ void operator()(uint32_t, ctype& grad, ctype diff, ctype input, + ctype scale, ctype zero_point) { + ctype x = round(input / scale) + zero_point; + grad = x <= qmax && x >= qmin ? diff : 0.0; + } + +#if MEGDNN_CC_HOST + FakeQuantBwdKernOpNonContig(const FakeQuant::Param& param) + : qmin(param.qmin), qmax(param.qmax) {} +#endif +}; + +} // namespace cuda +} // namespace megdnn diff --git a/dnn/src/cuda/fake_quant/opr_impl.cpp b/dnn/src/cuda/fake_quant/opr_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..04ee2480398f0ad23a72727258feed04a6c42279 --- /dev/null +++ b/dnn/src/cuda/fake_quant/opr_impl.cpp @@ -0,0 +1,134 @@ +/** + * \file dnn/src/cuda/fake_quant/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "./opr_impl.h" +#include "./kern.cuh" +#include "src/common/utils.h" +namespace megdnn { +namespace cuda { + +void FakeQuantForwardImpl::exec(_megdnn_tensor_in input, + _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out output, + _megdnn_workspace workspace) { + check_exec(input.layout, scale.layout, zero_point.layout, output.layout, + workspace.size); + + if (!input.layout.is_contiguous() || !output.layout.is_contiguous()) { + return exec_noncontig(input, scale, zero_point, output); + } + ElemwiseOpParamN<2> ele_param; + ele_param[0] = scale; + ele_param[0].layout = ele_param[0].layout.broadcast(input.layout); + ele_param[1] = zero_point; + ele_param[1].layout = ele_param[1].layout.broadcast(input.layout); + ele_param.init_from_given_tensor(); + auto stream = cuda_stream(handle()); + +#define cb(DType) \ + if (input.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + run_elemwise, T, 2>(ele_param, stream, \ + {input, output, m_param}); \ + return; \ + } + cb(megdnn::dtype::Float32) +#undef cb +} + +void FakeQuantForwardImpl::exec_noncontig(_megdnn_tensor_in input, + _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out output) { + ElemwiseOpParamN<4> ele_param; + ele_param[0] = output; + ele_param[1] = input; + ele_param[2] = scale; + ele_param[2].layout = ele_param[2].layout.broadcast(input.layout); + ele_param[3] = zero_point; + ele_param[3].layout = ele_param[3].layout.broadcast(input.layout); + ele_param.init_from_given_tensor(); + auto stream = cuda_stream(handle()); + +#define cb(DType) \ + if (input.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + run_elemwise, T, 4>(ele_param, stream, \ + {m_param}); \ + return; \ + } + cb(megdnn::dtype::Float32) +#undef cb +} + +void FakeQuantBackwardImpl::exec(_megdnn_tensor_in diff, + _megdnn_tensor_in input, + _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) { + check_exec(diff.layout, input.layout, scale.layout, zero_point.layout, + grad.layout, workspace.size); + + if (!input.layout.is_contiguous() || !diff.layout.is_contiguous() || + !grad.layout.is_contiguous()) { + return exec_noncontig(diff, input, scale, zero_point, grad); + } + ElemwiseOpParamN<2> ele_param; + ele_param[0] = scale; + ele_param[0].layout = ele_param[0].layout.broadcast(input.layout); + ele_param[1] = zero_point; + ele_param[1].layout = ele_param[1].layout.broadcast(input.layout); + ele_param.init_from_given_tensor(); + auto m_param = param(); + auto stream = cuda_stream(handle()); +#define cb(DType) \ + if (grad.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + run_elemwise, T, 2>( \ + ele_param, stream, {diff, input, grad, m_param}); \ + return; \ + } + cb(megdnn::dtype::Float32) +#undef cb +} + +void FakeQuantBackwardImpl::exec_noncontig(_megdnn_tensor_in diff, + _megdnn_tensor_in input, + _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad) { + ElemwiseOpParamN<5> ele_param; + ele_param[0] = grad; + ele_param[1] = diff; + ele_param[2] = input; + ele_param[3] = scale; + ele_param[3].layout = ele_param[3].layout.broadcast(input.layout); + ele_param[4] = zero_point; + ele_param[4].layout = ele_param[4].layout.broadcast(input.layout); + ele_param.init_from_given_tensor(); + auto m_param = param(); + auto stream = cuda_stream(handle()); +#define cb(DType) \ + if (grad.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + run_elemwise, T, 5>(ele_param, stream, \ + {m_param}); \ + return; \ + } + cb(megdnn::dtype::Float32) +#undef cb +} + +} // namespace cuda +} // namespace megdnn diff --git a/dnn/src/cuda/fake_quant/opr_impl.h b/dnn/src/cuda/fake_quant/opr_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..b4d0b5dab7731ea001c3354139c2c888c79307e3 --- /dev/null +++ b/dnn/src/cuda/fake_quant/opr_impl.h @@ -0,0 +1,55 @@ +/** + * \file dnn/src/cuda/fake_quant/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#pragma once +#include "megdnn/oprs.h" +#include "src/cuda/utils.h" +namespace megdnn { +namespace cuda { +class FakeQuantForwardImpl : public FakeQuantForward { +public: + using FakeQuantForward::FakeQuantForward; + void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, _megdnn_tensor_out output, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, + const TensorLayout&, + const TensorLayout&) override { + return 0; + } + +private: + void exec_noncontig(_megdnn_tensor_in input, _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out output); +}; + +class FakeQuantBackwardImpl : public FakeQuantBackward { +public: + using FakeQuantBackward::FakeQuantBackward; + void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input, + _megdnn_tensor_in scale, _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad, _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, + const TensorLayout&, const TensorLayout&, + const TensorLayout&) override { + return 0; + } + +private: + void exec_noncontig(_megdnn_tensor_in diff, _megdnn_tensor_in input, + _megdnn_tensor_in scale, _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad); +}; + +} // namespace cuda +} // namespace megdnn diff --git a/dnn/src/cuda/handle_create.cpp b/dnn/src/cuda/handle_create.cpp index e3b8ccbe6cef1a83ad9c9221ca7f9d7f9000076a..1dc4015dfac34864fe236dd4d6fbd1c9d23ec3d9 100644 --- a/dnn/src/cuda/handle_create.cpp +++ b/dnn/src/cuda/handle_create.cpp @@ -77,6 +77,7 @@ #include "src/cuda/roi_align/opr_impl.h" #include "src/cuda/batch_conv_bias/opr_impl.h" #include "src/cuda/remap/opr_impl.h" +#include "src/cuda/fake_quant/opr_impl.h" namespace megdnn { namespace cuda { diff --git a/dnn/src/naive/fake_quant/opr_impl.cpp b/dnn/src/naive/fake_quant/opr_impl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..466365888f091f1cdf2b060f5a6dfad3c1b7a760 --- /dev/null +++ b/dnn/src/naive/fake_quant/opr_impl.cpp @@ -0,0 +1,118 @@ +/** + * \file dnn/src/naive/fakequant/opr_impl.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "src/naive/fake_quant/opr_impl.h" +#include +#include +#include "megdnn/tensor_iter.h" +#include "src/common/elemwise_helper.cuh" +#include "src/common/utils.h" +#include "src/naive/handle.h" +namespace { +using namespace megdnn; + +template +void forward_impl(const ElemwiseOpParamN<4> src, float qmin, float qmax) { + auto inp = tensor_iter_valonly(src[0]).begin(); + auto out = tensor_iter_valonly(src[1]).begin(); + auto scale = tensor_iter_valonly(src[2]).begin(); + auto zero_point = tensor_iter_valonly(src[3]).begin(); + size_t total = src[0].layout.total_nr_elems(); + for (size_t i = 0; i < total; ++i) { + T x = round(*inp / (*scale)) + *zero_point; + x = x <= qmin ? qmin : x; + x = x >= qmax ? qmax : x; + *out = (x - *zero_point) * *scale; + ++inp; + ++out; + ++scale; + ++zero_point; + } +} + +template +void backward_impl(const ElemwiseOpParamN<5> src, float qmin, float qmax) { + auto diff = tensor_iter_valonly(src[0]).begin(); + auto input = tensor_iter_valonly(src[1]).begin(); + auto scale = tensor_iter_valonly(src[2]).begin(); + auto zero_point = tensor_iter_valonly(src[3]).begin(); + auto grad = tensor_iter_valonly(src[4]).begin(); + size_t total = src[0].layout.total_nr_elems(); + for (size_t i = 0; i < total; ++i) { + T x = round(*input / (*scale)) + *zero_point; + *grad = (x >= qmin && x <= qmax) ? *diff : 0.0; + ++diff; + ++input; + ++scale; + ++zero_point; + ++grad; + } +} + +} // namespace +namespace megdnn { +namespace naive { + +void FakeQuantForwardImpl::exec(_megdnn_tensor_in input, + _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out output, + _megdnn_workspace workspace) { + check_exec(input.layout, scale.layout, zero_point.layout, output.layout, + workspace.size); + ElemwiseOpParamN<4> src; + src[0] = input; + src[1] = output; + src[2] = scale; + src[2].layout = src[2].layout.broadcast(input.layout); + src[3] = zero_point; + src[3].layout = src[3].layout.broadcast(input.layout); +#define cb(DType) \ + if (input.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + forward_impl(src, param().qmin, param().qmax)); \ + return; \ + } + cb(dtype::Float32) +#undef cb +} + +void FakeQuantBackwardImpl::exec(_megdnn_tensor_in diff, + _megdnn_tensor_in input, + _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad, + _megdnn_workspace workspace) { + check_exec(diff.layout, input.layout, scale.layout, zero_point.layout, + grad.layout, workspace.size); + ElemwiseOpParamN<5> src; + src[0] = diff; + src[1] = input; + src[2] = scale; + src[2].layout = src[2].layout.broadcast(input.layout); + src[3] = zero_point; + src[3].layout = src[3].layout.broadcast(input.layout); + src[4] = grad; +#define cb(DType) \ + if (diff.layout.dtype == DType() && grad.layout.dtype == DType() && \ + input.layout.dtype == DType()) { \ + using T = typename DTypeTrait::ctype; \ + MEGDNN_DISPATCH_CPU_KERN_OPR( \ + backward_impl(src, param().qmin, param().qmax)); \ + return; \ + } + cb(dtype::Float32) +#undef cb +} +} // namespace naive +} // namespace megdnn diff --git a/dnn/src/naive/fake_quant/opr_impl.h b/dnn/src/naive/fake_quant/opr_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..8353edd37346413d718b8c7199ecd06b0b5be388 --- /dev/null +++ b/dnn/src/naive/fake_quant/opr_impl.h @@ -0,0 +1,45 @@ +/** + * \file dnn/src/naive/fakequant/opr_impl.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#pragma once +#include "megdnn/oprs.h" + +namespace megdnn { +namespace naive { +class FakeQuantForwardImpl : public FakeQuantForward { +public: + using FakeQuantForward::FakeQuantForward; + void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale, + _megdnn_tensor_in zero_point, _megdnn_tensor_out output, + _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, + const TensorLayout&, + const TensorLayout&) override { + return 0; + } +}; + +class FakeQuantBackwardImpl : public FakeQuantBackward { +public: + using FakeQuantBackward::FakeQuantBackward; + void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input, + _megdnn_tensor_in scale, _megdnn_tensor_in zero_point, + _megdnn_tensor_out grad, _megdnn_workspace workspace) override; + size_t get_workspace_in_bytes(const TensorLayout&, const TensorLayout&, + const TensorLayout&, const TensorLayout&, + const TensorLayout&) override { + return 0; + } +}; + +} // namespace naive +} // namespace megdnn diff --git a/dnn/src/naive/handle.cpp b/dnn/src/naive/handle.cpp index 77f70009cef34ef5077d6ed3c1687eb9823f0ab0..2f235ef885f1180babfb0444abeace762bced696 100644 --- a/dnn/src/naive/handle.cpp +++ b/dnn/src/naive/handle.cpp @@ -79,6 +79,8 @@ #include "src/naive/warp_affine/opr_impl.h" #include "src/naive/warp_perspective/opr_impl.h" #include "src/naive/winograd_filter_preprocess/opr_impl.h" +#include "src/naive/remap/opr_impl.h" +#include "src/naive/fake_quant/opr_impl.h" static size_t g_image2d_pitch_alignment = 1; diff --git a/dnn/test/common/fake_quant.h b/dnn/test/common/fake_quant.h new file mode 100644 index 0000000000000000000000000000000000000000..f4657fd46112adb2b076a88676fe2fe044904c0e --- /dev/null +++ b/dnn/test/common/fake_quant.h @@ -0,0 +1,60 @@ +/** + * \file dnn/test/common/fake_quant.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#pragma once +#include "megdnn/basic_types.h" +#include "megdnn/opr_param_defs.h" + +namespace megdnn { +namespace test { +namespace fake_quant { + +struct TestArg { + param::FakeQuant param; + TensorShape ishape; + TensorShape scale_shape; + TensorShape zeropoint_shape; + TestArg(param::FakeQuant param, TensorShape ishape, TensorShape scale_shape, + TensorShape zeropoint_shape) + : param(param), + ishape(ishape), + scale_shape(scale_shape), + zeropoint_shape(zeropoint_shape) {} +}; + +inline std::vector get_args() { + std::vector args; + param::FakeQuant cur_param; + + cur_param.qmin = -128; + cur_param.qmax = 128; + + for (size_t i = 10; i < 40; i += 2) { + args.emplace_back(cur_param, TensorShape{10, 64, i, i}, TensorShape{1}, + TensorShape{1}); + } + + for (size_t m : {1, 10}) + for (size_t n : {1, 10}) + for (size_t j : {1, 10}) + for (size_t k : {1, 10}) { + args.emplace_back(cur_param, TensorShape{10, 64, 10, 10}, + TensorShape{10, 64, m, n}, + TensorShape{10, 64, j, k}); + } + return args; +} + +} // namespace fake_quant +} // namespace test + +} // namespace megdnn \ No newline at end of file diff --git a/dnn/test/common/opr_trait.h b/dnn/test/common/opr_trait.h index 0d4d66422bc9879261209a9709f8601726f1637b..cad36421db38b01cd5443dacc51724f2db96029c 100644 --- a/dnn/test/common/opr_trait.h +++ b/dnn/test/common/opr_trait.h @@ -111,6 +111,8 @@ DEF(Remap, 3, true, true); DEF(RemapBackwardData, 3, true, false); DEF(RemapBackwardMat, 4, true, false); DEF(DctChannelSelectForward, 4, true, true); +DEF(FakeQuantForward, 4, true, true); +DEF(FakeQuantBackward, 5, true, false); } // namespace test } // namespace megdnn diff --git a/dnn/test/cuda/fake_quant.cpp b/dnn/test/cuda/fake_quant.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb0c554767368c05cc88f1e7f4bb9dbed25d31d0 --- /dev/null +++ b/dnn/test/cuda/fake_quant.cpp @@ -0,0 +1,99 @@ +/** + * \file dnn/test/cuda/fake_quant.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + */ + +#include "test/common/fake_quant.h" +#include "megdnn/oprs.h" +#include "test/common/checker.h" +#include "test/cuda/fixture.h" + +namespace megdnn { +namespace test { +using namespace fake_quant; +TEST_F(CUDA, FAKE_QUANT) { + std::vector args = get_args(); + auto dtype = dtype::Float32(); + std::unique_ptr rng; + + for (auto&& arg : args) { + auto param = arg.param; + auto ishape = arg.ishape; + auto scale_shape = arg.scale_shape; + auto zeropoint_shape = arg.zeropoint_shape; + Checker checker(handle_cuda()); + checker.set_param(param) + .set_dtype(0, dtype) + .set_dtype(1, dtype) + .set_dtype(2, dtype) + .set_dtype(3, dtype) + .execs(TensorShapeArray{ishape, scale_shape, zeropoint_shape, + ishape}); + } + // test noncontiguous layout + for (auto&& arg : args) { + auto param = arg.param; + auto ishape = arg.ishape; + auto scale_shape = arg.scale_shape; + auto zeropoint_shape = arg.zeropoint_shape; + Checker checker(handle_cuda()); + TensorLayout ilayout( + ishape, + {(long int)(ishape[1] * ishape[2] * ishape[3] * 2), + (long int)(ishape[2] * ishape[3]), (long int)ishape[3], 1}, + dtype::Float32()); + checker.set_param(param).execl({ilayout, + {scale_shape, dtype::Float32()}, + {zeropoint_shape, dtype::Float32()}, + ilayout}); + } +} + +TEST_F(CUDA, FAKE_QUANT_BACKWARD) { + std::vector args = get_args(); + auto dtype = dtype::Float32(); + + for (auto&& arg : args) { + auto param = arg.param; + auto ishape = arg.ishape; + auto scale_shape = arg.scale_shape; + auto zeropoint_shape = arg.zeropoint_shape; + Checker checker(handle_cuda()); + checker.set_param(param) + .set_dtype(0, dtype) + .set_dtype(1, dtype) + .set_dtype(2, dtype) + .set_dtype(3, dtype) + .set_dtype(4, dtype) + .execs(TensorShapeArray{ishape, ishape, scale_shape, + zeropoint_shape, ishape}); + } + // test noncontiguous layout + for (auto&& arg : args) { + auto param = arg.param; + auto ishape = arg.ishape; + auto scale_shape = arg.scale_shape; + auto zeropoint_shape = arg.zeropoint_shape; + Checker checker(handle_cuda()); + TensorLayout ilayout( + ishape, + {(long int)(ishape[1] * ishape[2] * ishape[3] * 2), + (long int)(ishape[2] * ishape[3]), (long int)ishape[3], 1}, + dtype::Float32()); + checker.set_param(param).execl({ilayout, + ilayout, + {scale_shape, dtype::Float32()}, + {zeropoint_shape, dtype::Float32()}, + ilayout}); + } +} + +} // namespace test +} // namespace megdnn \ No newline at end of file