提交 a85531dd 编写于 作者: M Megvii Engine Team

feat(mgb/opr): add tqt opr

GitOrigin-RevId: 49c62cd5327f55ceade849918cce13a1fef2ab39
上级 44742e32
......@@ -224,8 +224,8 @@ public:
const PreprocessedFilter* preprocessed_filter,
_megdnn_workspace workspace) = 0;
/**
* \brief execute weight preprocessing, read weights form filter and write to
* preprocessed_filter after preprocessed.
* \brief execute weight preprocessing, read weights form filter and write
* to preprocessed_filter after preprocessed.
*
* \praram[in] workspace the needed tmp workspace when exec_preprocess
*/
......@@ -1684,6 +1684,55 @@ protected:
const TensorLayout& grad, size_t workspace_in_bytes);
};
class TQTBase : public OperatorBase {
DEF_OPR_IMPL_CTOR(TQTBase, OperatorBase);
DEF_OPR_PARAM(TQT);
protected:
void deduce_layout_fwd(const TensorLayout& input, TensorLayout& output);
void check_layout_fwd(const TensorLayout& input, const TensorLayout& scale,
const TensorLayout& output);
};
class TQTForward : public TQTBase {
DEF_OPR_IMPL(TQTForward, TQTBase, 2, 1);
public:
virtual void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale,
_megdnn_tensor_out output,
_megdnn_workspace workspace) = 0;
void deduce_layout(const TensorLayout& input, const TensorLayout& scale,
TensorLayout& output);
virtual size_t get_workspace_in_bytes(const TensorLayout& input,
const TensorLayout& scale,
const TensorLayout& output) = 0;
protected:
void check_exec(const TensorLayout& input, const TensorLayout& scale,
const TensorLayout& output, size_t workspace_in_bytes);
};
using TQT = TQTForward;
class TQTBackward : public TQTBase {
DEF_OPR_IMPL(TQTBackward, TQTBase, 3, 2);
public:
virtual void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input,
_megdnn_tensor_in scale, _megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(const TensorLayout& diff,
const TensorLayout& input,
const TensorLayout& scale,
const TensorLayout& grad_x,
const TensorLayout& grad_s) = 0;
protected:
void check_exec(const TensorLayout& diff, const TensorLayout& input,
const TensorLayout& scale, const TensorLayout& grad_x,
const TensorLayout& grad_s, size_t workspace_in_bytes);
};
} // namespace megdnn
#include "megdnn/internal/opr_header_epilogue.h"
......
......@@ -948,5 +948,7 @@ when the ``I`` suffix is present.
add_fields('int32','qmin','-2147483648').
add_fields('int32','qmax','2147483647')
)
(pdef('TQT').
add_fields('int32', 'qmin', '-2147483648').
add_fields('int32', 'qmax', '2147483647')
)
......@@ -202,7 +202,9 @@ private:
cb(AdaptivePoolingBackward) \
cb(DctChannelSelectForward) \
cb(FakeQuantForward) \
cb(FakeQuantBackward)
cb(FakeQuantBackward) \
cb(TQTForward) \
cb(TQTBackward)
/*!
* \brief specialize HandleImpl::create_operator for a single opr type;
......
/**
* \file dnn/src/common/tqt.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megdnn/oprs.h"
#include "src/common/utils.h"
namespace megdnn {
void TQTBase::deduce_layout_fwd(const TensorLayout& input,
TensorLayout& output) {
output = TensorLayout(input, input.dtype);
}
void TQTBase::check_layout_fwd(const TensorLayout& input,
const TensorLayout& scale,
const TensorLayout& output) {
megdnn_assert(input.dtype == dtype::Float32());
megdnn_assert(scale.dtype == dtype::Float32());
TensorLayout expected;
deduce_layout_fwd(input, expected);
megdnn_assert_eq_layout(expected, output);
}
void TQTForward::deduce_layout(const TensorLayout& input,
const TensorLayout& /* scale */,
TensorLayout& output) {
deduce_layout_fwd(input, output);
}
void TQTForward::check_exec(const TensorLayout& input,
const TensorLayout& scale,
const TensorLayout& output,
size_t workspace_in_bytes) {
check_layout_fwd(input, scale, output);
auto required_workspace_space =
get_workspace_in_bytes(input, scale, output);
megdnn_assert(workspace_in_bytes >= required_workspace_space);
}
void TQTBackward::check_exec(const TensorLayout& diff,
const TensorLayout& input,
const TensorLayout& scale,
const TensorLayout& grad_x,
const TensorLayout& grad_s,
size_t workspace_in_bytes) {
megdnn_assert_eq_shape(diff, input);
megdnn_assert_eq_shape(grad_x, input);
auto required_worspace_space =
get_workspace_in_bytes(diff, input, scale, grad_x, grad_s);
megdnn_assert(workspace_in_bytes >= required_worspace_space);
}
} // namespace megdnn
......@@ -77,6 +77,7 @@
#include "src/cuda/batch_conv_bias/opr_impl.h"
#include "src/cuda/remap/opr_impl.h"
#include "src/cuda/fake_quant/opr_impl.h"
#include "src/cuda/tqt/opr_impl.h"
namespace megdnn {
namespace cuda {
......
/**
* \file dnn/src/cuda/tqt/kern.cu
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./kern.cuh"
namespace megdnn {
namespace cuda {
#define cb(_dtype) \
INST_RUN_ELEMWISE(TQTKernOp<DTypeTrait<_dtype>::ctype>, \
DTypeTrait<_dtype>::ctype, 1); \
INST_RUN_ELEMWISE(TQTBwdKernOp<DTypeTrait<_dtype>::ctype>, \
DTypeTrait<_dtype>::ctype, 1); \
INST_RUN_ELEMWISE(TQTKernOpNonContig<DTypeTrait<_dtype>::ctype>, \
DTypeTrait<_dtype>::ctype, 3); \
INST_RUN_ELEMWISE(TQTBwdKernOpNonContig<DTypeTrait<_dtype>::ctype>, \
DTypeTrait<_dtype>::ctype, 5);
cb(megdnn::dtype::Float32)
} // namespace cuda
} // namespace megdnn
/**
* \file dnn/src/cuda/tqt/kern.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "src/cuda/elemwise_helper.cuh"
#include "src/cuda/utils.cuh"
#if MEGDNN_CC_HOST
#include "megdnn/oprs.h"
#endif
namespace megdnn {
namespace cuda {
template <typename ctype>
struct TQTKernOp {
ctype* input;
ctype* output;
ctype qmin, qmax;
__device__ void operator()(uint32_t idx, ctype scale) {
ctype t = powf(2, scale);
ctype x = round(input[idx] / t);
x = fmaxf(fminf(x, qmax), qmin);
output[idx] = x * t;
}
#if MEGDNN_CC_HOST
TQTKernOp(const TensorND& input, const TensorND& output,
const TQT::Param& param)
: input{input.ptr<ctype>()},
output{output.ptr<ctype>()},
qmin(param.qmin),
qmax(param.qmax) {}
#endif
};
template <typename ctype>
struct TQTBwdKernOp {
ctype* diff;
ctype* input;
ctype* grad_x;
ctype* grad_s;
ctype qmin, qmax;
__device__ void operator()(uint32_t idx, ctype scale) {
ctype t = powf(2, scale);
ctype scaled = input[idx] / t;
ctype rounded = round(scaled);
rounded = fmaxf(fminf(rounded, qmax), qmin);
bool mask_clip = scaled < -0.5 + qmin && scaled > 0.5 + qmax;
bool mask_quant = !mask_clip;
grad_x[idx] = diff[idx] * mask_quant;
ctype grad_quant =
diff[idx] * mask_quant * (rounded - scaled) * t * log(2.0);
ctype grad_clip = diff[idx] * mask_clip * rounded * t * log(2.0);
grad_s[idx] = grad_quant + grad_clip;
}
#if MEGDNN_CC_HOST
TQTBwdKernOp(const TensorND& diff, const TensorND& input,
const TensorND& grad_x, const TensorND& grad_s,
const TQT::Param& param)
: diff{diff.ptr<ctype>()},
input{input.ptr<ctype>()},
grad_x{grad_x.ptr<ctype>()},
grad_s{grad_s.ptr<ctype>()},
qmin(param.qmin),
qmax(param.qmax) {}
#endif
};
template <typename ctype>
struct TQTKernOpNonContig {
ctype qmin;
ctype qmax;
__device__ void operator()(uint32_t, ctype& input, ctype& scale,
ctype& output) {
ctype t = powf(2, scale);
ctype x = round(input / t);
x = fmaxf(fminf(x, qmax), qmin);
output = x * t;
}
#if MEGDNN_CC_HOST
TQTKernOpNonContig(const TQT::Param& param)
: qmin(param.qmin), qmax(param.qmax) {}
#endif
};
template <typename ctype>
struct TQTBwdKernOpNonContig {
ctype qmin;
ctype qmax;
__device__ void operator()(uint32_t, ctype& diff, ctype& input,
ctype& scale, ctype& grad_x, ctype& grad_s) {
ctype t = powf(2, scale);
ctype scaled = input / t;
ctype rounded = round(scaled);
rounded = fmaxf(fminf(rounded, qmax), qmin);
bool mask_clip = scaled < -0.5 + qmin && scaled > 0.5 + qmax;
bool mask_quant = !mask_clip;
grad_x = diff * mask_quant;
ctype grad_quant =
diff * mask_quant * (rounded - scaled) * t * log(2.0);
ctype grad_clip = diff * mask_clip * rounded * t * log(2.0);
grad_s = grad_quant + grad_clip;
}
#if MEGDNN_CC_HOST
TQTBwdKernOpNonContig(const TQT::Param& param)
: qmin(param.qmin), qmax(param.qmax) {}
#endif
};
} // namespace cuda
} // namespace megdnn
/**
* \file dnn/src/cuda/tqt/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./opr_impl.h"
#include "./kern.cuh"
#include "src/common/utils.h"
namespace megdnn {
namespace cuda {
void TQTForwardImpl::exec(_megdnn_tensor_in input, _megdnn_tensor_in scale,
_megdnn_tensor_out output,
_megdnn_workspace workspace) {
check_exec(input.layout, scale.layout, output.layout, workspace.size);
if (!input.layout.is_contiguous() || !output.layout.is_contiguous())
return exec_noncontig(input, scale, output);
ElemwiseOpParamN<1> ele_param;
ele_param[0] = scale;
ele_param[0].layout = ele_param[0].layout.broadcast(input.layout);
ele_param.init_from_given_tensor();
auto m_param = param();
auto stream = cuda_stream(handle());
#define cb(DType) \
if (input.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
run_elemwise<TQTKernOp<T>, T, 1>(ele_param, stream, \
{input, output, m_param}); \
return; \
}
cb(megdnn::dtype::Float32)
#undef cb
}
void TQTForwardImpl::exec_noncontig(_megdnn_tensor_in input,
_megdnn_tensor_in scale,
_megdnn_tensor_out output) {
ElemwiseOpParamN<3> ele_param;
ele_param[0] = input;
ele_param[1] = scale;
ele_param[1].layout = ele_param[1].layout.broadcast(input.layout);
ele_param[2] = output;
ele_param.init_from_given_tensor();
auto m_param = param();
auto stream = cuda_stream(handle());
#define cb(DType) \
if (input.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
run_elemwise<TQTKernOpNonContig<T>, T, 3>(ele_param, stream, \
{m_param}); \
return; \
}
cb(megdnn::dtype::Float32)
#undef cb
}
void TQTBackwardImpl::exec(_megdnn_tensor_in diff, _megdnn_tensor_in input,
_megdnn_tensor_in scale, _megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s,
_megdnn_workspace workspace) {
check_exec(diff.layout, input.layout, scale.layout, grad_x.layout,
grad_s.layout, workspace.size);
if (!input.layout.is_contiguous() || !diff.layout.is_contiguous() ||
!grad_x.layout.is_contiguous() || !grad_s.layout.is_contiguous())
return exec_noncontig(diff, input, scale, grad_x, grad_s);
ElemwiseOpParamN<1> ele_param;
ele_param[0] = scale;
ele_param[0].layout = ele_param[0].layout.broadcast(input.layout);
ele_param.init_from_given_tensor();
auto m_param = param();
auto stream = cuda_stream(handle());
#define cb(DType) \
if (grad_x.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
run_elemwise<TQTBwdKernOp<T>, T, 1>( \
ele_param, stream, {diff, input, grad_x, grad_s, m_param}); \
return; \
}
cb(megdnn::dtype::Float32)
#undef cb
}
void TQTBackwardImpl::exec_noncontig(_megdnn_tensor_in diff,
_megdnn_tensor_in input,
_megdnn_tensor_in scale,
_megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s) {
ElemwiseOpParamN<5> ele_param;
ele_param[0] = diff;
ele_param[1] = input;
ele_param[2] = scale;
ele_param[2].layout = ele_param[2].layout.broadcast(input.layout);
ele_param[3] = grad_x;
ele_param[4] = grad_s;
ele_param.init_from_given_tensor();
auto m_param = param();
auto stream = cuda_stream(handle());
#define cb(DType) \
if (input.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
run_elemwise<TQTBwdKernOpNonContig<T>, T, 5>(ele_param, stream, \
{m_param}); \
return; \
}
cb(megdnn::dtype::Float32)
#undef cb
}
} // namespace cuda
} // namespace megdnn
\ No newline at end of file
/**
* \file dnn/src/cuda/tqt/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
#include "src/cuda/utils.h"
namespace megdnn {
namespace cuda {
class TQTForwardImpl final : public TQTForward {
public:
using TQTForward::TQTForward;
void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale,
_megdnn_tensor_out output, _megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout&, /* input */
const TensorLayout&, /* scale */
const TensorLayout& /* output */) override {
return 0;
}
private:
void exec_noncontig(_megdnn_tensor_in input, _megdnn_tensor_in scale,
_megdnn_tensor_out output);
};
class TQTBackwardImpl final : public TQTBackward {
public:
using TQTBackward::TQTBackward;
void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input,
_megdnn_tensor_in scale, _megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s, _megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& /* diff */,
const TensorLayout& /* input */,
const TensorLayout& /* scale */,
const TensorLayout& /* grad_x */,
const TensorLayout& /* grad_s */) override {
return 0;
}
private:
void exec_noncontig(_megdnn_tensor_in diff, _megdnn_tensor_in input,
_megdnn_tensor_in scale, _megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s);
};
} // namespace cuda
} // namespace megdnn
......@@ -12,7 +12,6 @@
#include "src/naive/fake_quant/opr_impl.h"
#include <cmath>
#include <iostream>
#include "megdnn/tensor_iter.h"
#include "src/common/elemwise_helper.cuh"
#include "src/common/utils.h"
......
......@@ -80,6 +80,7 @@
#include "src/naive/warp_perspective/opr_impl.h"
#include "src/naive/remap/opr_impl.h"
#include "src/naive/fake_quant/opr_impl.h"
#include "src/naive/tqt/opr_impl.h"
static size_t g_image2d_pitch_alignment = 1;
......
/**
* \file dnn/src/naive/tqt/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/naive/tqt/opr_impl.h"
#include <cmath>
#include "megdnn/tensor_iter.h"
#include "src/common/elemwise_helper.cuh"
#include "src/common/utils.h"
#include "src/naive/handle.h"
namespace {
using namespace megdnn;
template <typename T>
void forward_impl(const ElemwiseOpParamN<3> src, float qmin, float qmax) {
auto inp = tensor_iter_valonly<T>(src[0]).begin();
auto out = tensor_iter_valonly<T>(src[1]).begin();
auto scale = tensor_iter_valonly<T>(src[2]).begin();
size_t total = src[0].layout.total_nr_elems();
for (size_t i = 0; i < total; ++i) {
T t = pow(2, *scale);
T x = round(*inp / t);
x = x <= qmin ? qmin : x;
x = x >= qmax ? qmax : x;
*out = x * t;
++inp;
++out;
++scale;
}
}
template <typename T>
void backward_impl(const ElemwiseOpParamN<5> src, float qmin, float qmax) {
auto diff = tensor_iter_valonly<T>(src[0]).begin();
auto input = tensor_iter_valonly<T>(src[1]).begin();
auto scale = tensor_iter_valonly<T>(src[2]).begin();
auto grad_x = tensor_iter_valonly<T>(src[3]).begin();
auto grad_s = tensor_iter_valonly<T>(src[4]).begin();
size_t total = src[0].layout.total_nr_elems();
for (size_t i = 0; i < total; ++i) {
T t = pow(2, *scale);
T scaled = *input / t;
T rounded = round(scaled);
rounded = rounded <= qmin ? qmin : rounded;
rounded = rounded >= qmax ? qmax : rounded;
bool mask_clip = scaled < -0.5 + qmin && scaled > 0.5 + qmax;
bool mask_quant = !mask_clip;
*grad_x = *diff * mask_quant;
T grad_quant = *diff * mask_quant * (rounded - scaled) * t * log(2.0);
T grad_clip = *diff * mask_clip * rounded * t * log(2.0);
*grad_s = grad_quant + grad_clip;
++input;
++diff;
++scale;
++grad_x;
++grad_s;
}
}
} // namespace
namespace megdnn {
namespace naive {
void TQTForwardImpl::exec(_megdnn_tensor_in input, _megdnn_tensor_in scale,
_megdnn_tensor_out output,
_megdnn_workspace workspace) {
check_exec(input.layout, scale.layout, output.layout, workspace.size);
ElemwiseOpParamN<3> src;
src[0] = input;
src[1] = output;
src[2] = scale;
src[2].layout = src[2].layout.broadcast(input.layout);
#define cb(DType) \
if (input.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR( \
forward_impl<T>(src, param().qmin, param().qmax)); \
return; \
}
cb(dtype::Float32)
#undef cb
}
void TQTBackwardImpl::exec(_megdnn_tensor_in diff, _megdnn_tensor_in input,
_megdnn_tensor_in scale, _megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s,
_megdnn_workspace workspace) {
check_exec(diff.layout, input.layout, scale.layout, grad_x.layout,
grad_s.layout, workspace.size);
ElemwiseOpParamN<5> src;
src[0] = diff;
src[1] = input;
src[2] = scale;
src[2].layout = src[2].layout.broadcast(input.layout);
src[3] = grad_x;
src[4] = grad_s;
#define cb(DType) \
if (diff.layout.dtype == DType() && grad_x.layout.dtype == DType() && \
input.layout.dtype == DType()) { \
using T = typename DTypeTrait<DType>::ctype; \
MEGDNN_DISPATCH_CPU_KERN_OPR( \
backward_impl<T>(src, param().qmin, param().qmax)); \
return; \
}
cb(dtype::Float32)
#undef cb
}
} // namespace naive
} // namespace megdnn
/**
* \file dnn/src/naive/tqt/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/oprs.h"
namespace megdnn {
namespace naive {
class TQTForwardImpl final : public TQTForward {
public:
using TQTForward::TQTForward;
void exec(_megdnn_tensor_in input, _megdnn_tensor_in scale,
_megdnn_tensor_out output, _megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& /* input */,
const TensorLayout& /* scale */,
const TensorLayout& /* output */) override {
return 0;
}
};
class TQTBackwardImpl final : public TQTBackward {
public:
using TQTBackward::TQTBackward;
void exec(_megdnn_tensor_in diff, _megdnn_tensor_in input,
_megdnn_tensor_in scale, _megdnn_tensor_out grad_x,
_megdnn_tensor_out grad_s, _megdnn_workspace workspace) override;
size_t get_workspace_in_bytes(const TensorLayout& /* diff */,
const TensorLayout& /* input */,
const TensorLayout& /* scale */,
const TensorLayout& /* grad_x */,
const TensorLayout& /* grad_s */) override {
return 0;
}
};
} // namespace naive
} // namespace megdnn
......@@ -58,6 +58,16 @@ struct DeduceLayoutProxy<Opr, 5, true> {
}
};
template <typename Opr>
struct DeduceLayoutProxy<Opr, 5, false> {
static void deduce_layout(Opr*, TensorLayoutArray&) {}
};
template <typename Opr>
struct DeduceLayoutProxy<Opr, 6, false> {
static void deduce_layout(Opr*, TensorLayoutArray&) {}
};
template <typename Opr>
struct DeduceLayoutProxy<Opr, 7, false> {
static void deduce_layout(Opr*, TensorLayoutArray&) {}
......
......@@ -37,6 +37,22 @@ struct ExecProxy<Opr, 8, true> {
tensors[5], tensors[6], tensors[7], W.workspace());
}
};
template <typename Opr>
struct ExecProxy<Opr, 6, true> {
WorkspaceWrapper W;
void exec(Opr* opr, const TensorNDArray& tensors) {
if (!W.valid()) {
W = WorkspaceWrapper(opr->handle(), 0);
}
W.update(opr->get_workspace_in_bytes(
tensors[0].layout, tensors[1].layout, tensors[2].layout,
tensors[3].layout, tensors[4].layout, tensors[5].layout));
opr->exec(tensors[0], tensors[1], tensors[2], tensors[3], tensors[4],
tensors[5], W.workspace());
}
};
template <typename Opr>
struct ExecProxy<Opr, 5, true> {
WorkspaceWrapper W;
......
......@@ -112,6 +112,8 @@ DEF(RemapBackwardMat, 4, true, false);
DEF(DctChannelSelectForward, 4, true, true);
DEF(FakeQuantForward, 4, true, true);
DEF(FakeQuantBackward, 5, true, false);
DEF(TQTForward, 3, true, true);
DEF(TQTBackward, 5, true, false);
} // namespace test
} // namespace megdnn
......
/**
* \file dnn/test/common/tqt.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megdnn/basic_types.h"
#include "megdnn/opr_param_defs.h"
namespace megdnn {
namespace test {
namespace tqt {
struct TestArg {
param::TQT param;
TensorShape ishape;
TensorShape scale_shape;
TestArg(param::TQT param, TensorShape ishape, TensorShape scale_shape)
: param(param), ishape(ishape), scale_shape(scale_shape) {}
};
inline std::vector<TestArg> get_args() {
std::vector<TestArg> args;
param::TQT cur_param;
cur_param.qmin = -127;
cur_param.qmax = 127;
for (size_t i = 10; i < 30; i += 2) {
args.emplace_back(cur_param, TensorShape{10, 64, i, i}, TensorShape{1});
}
return args;
}
} // namespace tqt
} // namespace test
} // namespace megdnn
/**
* \file dnn/test/cuda/tqt.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "test/common/tqt.h"
#include "megdnn/oprs.h"
#include "test/common/checker.h"
#include "test/cuda/fixture.h"
namespace megdnn {
namespace test {
using namespace tqt;
TEST_F(CUDA, TQT) {
std::vector<TestArg> args = get_args();
auto dtype = dtype::Float32();
for (auto&& arg : args) {
auto param = arg.param;
auto ishape = arg.ishape;
auto scale_shape = arg.scale_shape;
Checker<TQTForward> checker(handle_cuda());
checker.set_param(param)
.set_dtype(0, dtype)
.set_dtype(1, dtype)
.set_dtype(2, dtype)
.execs({ishape, scale_shape, ishape});
}
// test noncontiguous layout
for (auto&& arg : args) {
auto param = arg.param;
auto ishape = arg.ishape;
auto sshape = arg.scale_shape;
Checker<TQTForward> checker(handle_cuda());
TensorLayout ilayout(
ishape,
{(long int)(ishape[1] * ishape[2] * ishape[3] * 2),
(long int)(ishape[2] * ishape[3]), (long int)ishape[3], 1},
dtype::Float32());
checker.set_param(param).execl(
{ilayout, {sshape, dtype::Float32()}, ilayout});
}
}
TEST_F(CUDA, TQT_BACKWARD) {
std::vector<TestArg> args = get_args();
auto dtype = dtype::Float32();
for (auto&& arg : args) {
auto param = arg.param;
auto ishape = arg.ishape;
auto scale_shape = arg.scale_shape;
Checker<TQTBackward> checker(handle_cuda());
checker.set_param(param)
.set_dtype(0, dtype)
.set_dtype(1, dtype)
.set_dtype(2, dtype)
.set_dtype(3, dtype)
.set_dtype(4, dtype)
.execs({ishape, ishape, scale_shape, ishape, ishape});
}
// test noncontiguous layout
for (auto&& arg : args) {
auto param = arg.param;
auto ishape = arg.ishape;
auto sshape = arg.scale_shape;
Checker<TQTBackward> checker(handle_cuda());
TensorLayout ilayout(
ishape,
{(long int)(ishape[1] * ishape[2] * ishape[3] * 2),
(long int)(ishape[2] * ishape[3]), (long int)ishape[3], 1},
dtype::Float32());
checker.set_param(param).execl({ilayout,
ilayout,
{sshape, dtype::Float32()},
ilayout,
ilayout});
}
}
} // namespace test
} // namespace megdnn
\ No newline at end of file
......@@ -15,7 +15,7 @@ from ..core.autodiff.grad import Function
from ..core.tensor.dtype import _metadata_dict, get_quantized_dtype
from ..module import Module
from ..tensor import Parameter, Tensor
from .utils import QuantMode, fake_quant_tensor, get_qparam_dict
from .utils import QuantMode, fake_quant_tensor, get_qparam_dict, tqt_forward
class _FakeQuantize(Module):
......@@ -65,51 +65,6 @@ class _FakeQuantize(Module):
return self.normal_foward(inp, q_dict=q_dict)
class TQT_Function(Function):
def __init__(self, lowerbound, upperbound):
super().__init__()
self.lowerbound = lowerbound
self.upperbound = upperbound
self.saved_tensors = ()
def save_for_backward(self, *tensors: Iterable[Tensor]):
"""
Saves tensors needed for gradient computation. This method should be called only
once in :meth:`~.function.Function.forward`, additional calls will replace values saved previously.
The saved tensors can be accessed through the ``saved_tensors`` attribute.
"""
self.saved_tensors = tensors
def forward(self, inp, scale):
t = 2 ** scale
# t = F.maximum(t, 1e-4)
inp_scaled = inp / t
inp_clipped = F.maximum(F.minimum(inp_scaled, self.upperbound), self.lowerbound)
inp_rounded = F.round(inp_clipped)
inp_flq = inp_rounded * t
self.save_for_backward(inp_scaled, inp_rounded, t)
return inp_flq
def backward(self, grad_inp_flq):
(inp_scaled, inp_rounded, t) = self.saved_tensors
mask_clip = F.logical_and(
inp_scaled < -0.5 + self.lowerbound, inp_scaled > self.upperbound + 0.5
) # mask for accumulating the gradients of |data_scaled|>L
mask_quant = F.logical_not(mask_clip)
grad_quant = (
grad_inp_flq * mask_quant * (inp_rounded - inp_scaled)
) # gradient within |data_scaled|<=L
grad_clip = (
grad_inp_flq * mask_clip * inp_rounded
) # gradient with | data_scaled|>L
grad_s = grad_clip.sum() + grad_quant.sum()
# dL/ds = dL/dt * t * ln(2)
grad_s = grad_s * t * math.log(2)
grad_inp = grad_inp_flq * mask_quant
return grad_inp, grad_s
class TQT(_FakeQuantize):
r"""
TQT: https://arxiv.org/abs/1903.08066 Trained Quantization Thresholds
......@@ -130,11 +85,11 @@ class TQT(_FakeQuantize):
), "only symmetric quantization is supported by TQT"
if "scale" not in q_dict or q_dict["scale"] is None:
raise AssertionError("Can not get an initialized scale")
self.scale = F.log(q_dict["scale"]) / math.log(2)
self.scale = Tensor(F.log(q_dict["scale"]) / math.log(2))
def fake_quant_forward(self, inp, q_dict=None):
# when enable, TQT will do fakequant forward, finetune the scale
return TQT_Function(self.qmin, self.qmax)(inp, self.scale)
return tqt_forward(self.qmin, self.qmax, inp, self.scale)
def get_qparams(self):
q_dict = get_qparam_dict(QuantMode.SYMMERTIC)
......
......@@ -33,6 +33,12 @@ class Round(Function):
return output_grads
def tqt_forward(qmin, qmax, inp, scale):
op = builtin.TQT(qmin=qmin, qmax=qmax)
(output,) = apply(op, inp, scale)
return output
def register_method_to_class(cls):
def decorator(func):
@wraps(func)
......
......@@ -13,12 +13,11 @@ import megengine as mge
from megengine import tensor
from megengine.core.autodiff.grad import Function, Grad
from megengine.core.tensor.utils import make_shape_tuple
from megengine.quantization.fake_quant import TQT_Function
from megengine.quantization.internal_fake_quant import *
from megengine.quantization.utils import QuantMode, fake_quant_tensor
from megengine.quantization.utils import QuantMode, fake_quant_tensor, tqt_forward
class numpy_TQT_Function:
class TQT_numpy:
def __init__(self, lowerbound, upperbound):
super().__init__()
self.lowerbound = lowerbound
......@@ -57,27 +56,32 @@ class numpy_TQT_Function:
return grad_inp, grad_s
def test_TQT():
f = TQT_Function(-127, 127)
nf = numpy_TQT_Function(-127, 127)
def test_tqt():
def check_inp(a, b, c, a_np, b_np, c_np):
np.testing.assert_allclose(
f.forward(a, b).numpy(),
nf.forward(a_np, b_np).astype("float32"),
rtol=1e-6,
atol=1e-6,
)
c1, c2 = f.backward(c)
c1_np, c2_np = nf.backward(c_np)
np.testing.assert_allclose(c1.numpy(), c1_np.astype("float32"), rtol=1e-6)
np.testing.assert_allclose(c2.numpy(), c2_np.astype("float32"), rtol=5e-5)
a_np = np.random.random((4, 3)).astype("float32")
b_np = np.random.random((1)).astype("float32")
a = tensor(a_np)
b = tensor(b_np)
check_inp(a, b, b, a_np, b_np, b_np)
g = []
def cb(grad):
g.append(grad)
x = np.random.normal(size=(1, 2, 3, 4))
s = np.random.rand(1) + 1
g_y = np.ones(shape=(1, 2, 3, 4), dtype="float32")
n = TQT_numpy(-127, 127)
y_np = n.forward(x, s)
g_x_np, g_s_np = n.backward(g_y)
x = mge.tensor(x, dtype="float32")
s = mge.tensor(s, dtype="float32")
g_y = mge.tensor(g_y, dtype="float32")
grad = Grad().wrt(x, s, callback=cb)
y = tqt_forward(-127, 127, x, s)
grad(y, g_y)
g_x, g_s = g
np.testing.assert_allclose(y.numpy(), y_np, atol=1e-6)
np.testing.assert_allclose(g_x.numpy(), g_x_np, atol=1e-6)
np.testing.assert_allclose(g_s.numpy(), g_s_np, atol=1e-6)
......
......@@ -15,6 +15,7 @@
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/dnn/adaptive_pooling.h"
#include "megbrain/opr/dnn/fake_quant.h"
#include "megbrain/opr/dnn/tqt.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/dnn/local.h"
#include "megbrain/opr/dnn/roi_align.h"
......@@ -625,6 +626,18 @@ OP_TRAIT_REG(FakeQuant, FakeQuant)
.apply_on_var_node(apply_on_var_node)
.fallback();
}} // fake_quant
namespace { namespace tqt {
auto apply_on_var_node(
const OpDef& def,
const VarNodeArray& inputs) {
auto&& op = static_cast<const TQT&>(def);
mgb_assert(inputs.size() == 2);
return opr::TQT::make(inputs[0], inputs[1], op.param());
}
OP_TRAIT_REG(TQT, TQT)
.apply_on_var_node(apply_on_var_node)
.fallback();
}} // tqt
namespace { namespace elemwise_multi_type {
auto apply_on_var_node(
const OpDef& def,
......@@ -636,7 +649,7 @@ auto apply_on_var_node(
OP_TRAIT_REG(ElemwiseMultiType, ElemwiseMultiType)
.apply_on_var_node(apply_on_var_node)
.fallback();
}} // fake_quant
}} // elemwise_multi_type
namespace { namespace svd {
auto apply_on_var_node(
......
......@@ -232,6 +232,7 @@ def BatchedSetMeshIndexing: FancyIndexingBase<"BatchedSetMeshIndexing">;
def FakeQuant: MgbHashableOp<"FakeQuant", [FakeQuantParam]>;
def AssertEqual: MgbHashableOp<"AssertEqual",[AssertEqualParam]>;
def TQT: MgbHashableOp<"TQT", [TQTParam]>;
def ElemwiseMultiType: MgbHashableOp<"ElemwiseMultiType", [ElemwiseMultiTypeParam]> {
let extraArguments = (ins
MgbDTypeAttr:$dtype
......
......@@ -319,5 +319,8 @@ decl_opr('FakeQuant',
inputs=[Doc('src','input tenosr'),Doc('scale','scale tensor'),Doc('zero_point','zero point tensor')],
params='FakeQuant')
decl_opr('TQT',
inputs=[Doc('src','input tensor'),Doc('scale','scale tensor')],
params='TQT')
# vim: ft=python
......@@ -19,6 +19,7 @@
#include "megbrain/opr/dnn/local.h"
#include "megbrain/opr/dnn/lrn.h"
#include "megbrain/opr/dnn/fake_quant.h"
#include "megbrain/opr/dnn/tqt.h"
#include "megbrain/serialization/sereg.h"
......@@ -238,6 +239,19 @@ namespace serialization {
}
};
template <>
struct OprMaker<opr::TQTBackward, 3> {
using Param = opr::TQTBackward::Param;
static cg::OperatorNodeBase* make(const Param& param,
const cg::VarNodeArray& i, ComputingGraph& graph,
const OperatorNodeConfig& config) {
MGB_MARK_USED_VAR(graph);
return opr::TQTBackward::make(i[0], i[1], i[2], param, config)[0]
.node()
->owner_opr();
}
};
template<class MegDNNConv = megdnn::LocalShare>
struct MakeLocalShareCaller2 {
template<typename Opr>
......@@ -426,6 +440,8 @@ namespace opr {
MGB_SEREG_OPR(BatchConvBiasForward, 0);
MGB_SEREG_OPR(FakeQuant, 3);
MGB_SEREG_OPR(FakeQuantBackward, 4);
MGB_SEREG_OPR(TQT, 2);
MGB_SEREG_OPR(TQTBackward, 3);
} // namespace opr
......
/**
* \file src/opr/impl/dnn/tqt.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/opr/dnn/tqt.h"
#include "../internal/megdnn_opr_wrapper.inl"
#include "megbrain/graph/grad_impl.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/internal/out_shape_by_sym_var.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
using namespace mgb;
using namespace opr;
MGB_DYN_TYPE_OBJ_FINAL_IMPL(TQTForward);
MEGDNN_OPR_INIT2(TQTForward, "tqt_fwd");
#ifdef MGB_ENABLE_GRAD
MGB_IMPL_OPR_GRAD(TQTForward) {
SymbolVarArray grad = TQTBackward::make(out_grad[0], opr.input(0),
opr.input(1), opr.param());
if (wrt_idx == 0) {
return grad[0].node();
} else if (wrt_idx == 1) {
return reduce_sum(grad[1], GetVarShape::make(opr.input(wrt_idx)))
.node();
} else {
return nullptr;
}
}
#endif
MGB_DYN_TYPE_OBJ_FINAL_IMPL(TQTBackward);
TQTBackward::TQTBackward(VarNode* y_grad, VarNode* x, VarNode* scale,
const Param& param, const OperatorNodeConfig& config)
: Super({x->owner_graph(), config, "tqt_bwd", {y_grad, x, scale}}, 1,
true) {
init_megdnn_opr(*this, param);
add_input({y_grad, x, scale});
}
SymbolVarArray TQTBackward::make(SymbolVar y_grad, SymbolVar x, SymbolVar scale,
const Param& param,
const OperatorNodeConfig& config) {
auto&& out = x.node()->owner_graph()
->insert_opr(std::make_unique<TQTBackward>(
y_grad.node(), x.node(), scale.node(), param,
config))
->output();
SymbolVarArray ret(out.size());
for (size_t i = 0; i < ret.size(); ++i) {
ret[i] = out[i];
}
return ret;
}
void TQTBackward::init_output_static_infer_desc() {
using namespace cg::static_infer;
auto&& mgr = owner_graph()->static_infer_manager();
mgr.register_shape_infer(output(0),
ShapeInferDesc::make_identity(input(1)));
mgr.register_shape_infer(output(1),
ShapeInferDesc::make_identity(input(1)));
this->init_output_static_infer_desc_workspace(
intl::AutoAddWorkspaceNeedLimitGetter<megdnn::TQTBackward>::val);
}
void TQTBackward::init_output_dtype() {
output(0)->dtype(input(1)->dtype());
output(1)->dtype(input(2)->dtype());
}
......@@ -163,6 +163,11 @@ namespace {
#define _FOREACH_IO(_i, _o) _i(0), _i(1), _i(2), _o(0), _o(1)
#include "./megdnn_opr_wrapper_megdnn_opr_meth_invoker_impl.inl"
#define _NR_INPUTS 3
#define _NR_OUTPUTS 3
#define _FOREACH_IO(_i, _o) _i(0), _i(1), _i(2), _o(0), _o(1), _o(2)
#include "./megdnn_opr_wrapper_megdnn_opr_meth_invoker_impl.inl"
#define _NR_INPUTS 4
#define _NR_OUTPUTS 1
#define _FOREACH_IO(_i, _o) _i(0), _i(1), _i(2), _i(3), _o(0)
......
/**
* \file src/opr/include/megbrain/opr/dnn/tqt.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megbrain/opr/internal/megdnn_opr_wrapper.h"
#include "megdnn/oprs.h"
namespace mgb {
namespace opr {
MGB_DEFINE_OPR_CLASS(TQTForward,
intl::MegDNNOprWrapperFwd<megdnn::TQTForward>) // {
public:
TQTForward(VarNode* src, VarNode* scale, const Param& param,
const OperatorNodeConfig& config);
static SymbolVar make(SymbolVar src, SymbolVar scale, const Param& param = {},
const OperatorNodeConfig& config = {});
};
using TQT = TQTForward;
MGB_DEFINE_OPR_CLASS(TQTBackward,
intl::MegDNNOprWrapperBwd<megdnn::TQTBackward>) // {
public:
TQTBackward(VarNode* y_grad, VarNode* x, VarNode* scale, const Param& param,
const OperatorNodeConfig& config);
static SymbolVarArray make(SymbolVar y_grad, SymbolVar x, SymbolVar scale,
const Param& param = {},
const OperatorNodeConfig& config = {});
private:
void init_output_static_infer_desc() override;
void init_output_dtype() override;
};
} // namespace opr
} // namespace mgb
/**
* \file src/opr/test/dnn/tqt.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/opr/dnn/tqt.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/test/autocheck.h"
using namespace std;
using namespace mgb;
namespace {
void run() {
using Checker = AutoOprChecker<2, 1>;
auto make_graph =
[&](const Checker::SymInpArray& inputs) -> Checker::SymOutArray {
auto o0 = opr::TQTForward::make(inputs[0], inputs[1]);
return {o0};
};
auto fwd = [&](Checker::NumOutArray& dest, Checker::NumInpArray inp) {
auto opr = MegDNNHandle::get(
CompNodeEnv::from_comp_node(CompNode::default_cpu()))
->create_operator<megdnn::TQTForward>();
dest[0].dtype(dtype::Float32())
.comp_node(inp[0]->comp_node())
.resize(inp[0]->shape());
opr->exec(inp[0]->as_megdnn(), inp[1]->as_megdnn(), dest[0].as_megdnn(),
{});
};
auto gen = [&](HostTensorND& src) {
HostTensorGenerator<dtype::Float32, RandomDistribution::GAUSSIAN>
src_gen(10.f);
src = *src_gen(src.shape(), src.comp_node());
};
Checker::RunOptions opt;
opt.numdiff_max_err = 1e-5;
Checker checker{make_graph, fwd};
checker.set_input_generator(0, gen)
.set_input_generator(1, gen)
.set_input_allow_grad(0, false)
.set_input_allow_grad(1, false)
.set_output_allow_grad(0, false);
checker.run({TensorShape{1, 2, 3, 4}, TensorShape{1}}, opt)
.run({TensorShape{2, 3, 8, 8}, TensorShape{1}}, opt)
.run({TensorShape{1, 3, 4, 4}, TensorShape{1}}, opt);
}
} // anonymous namespace
TEST(TestOprDNN, TQTForward) {
REQUIRE_GPU(1);
run();
}
......@@ -105,6 +105,7 @@ union OperatorParam {
param.NvOf = 71,
param.DctChannelSelect = 72,
param.FakeQuant = 73,
param.TQT = 74,
}
table Operator {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册