提交 659217ac 编写于 作者: M Megvii Engine Team

refactor(megdnn): refactor bfloat16 convbias to recursive inteface

GitOrigin-RevId: 378194fb7f5482f72eb95eaf23610ceec9c9c554
上级 4a1d52c9
...@@ -63,12 +63,8 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { ...@@ -63,12 +63,8 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group batched_matmul non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group batched_matmul
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1 non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1
algo_size = all_algos.size(); all_algos.push_back(&bfloat16);
for (size_t i = 0; i < algo_size; ++i) { bfloat16_algos.push_back(&bfloat16);
bfloat16_refhold.emplace_back(new AlgoBFloat16(all_algos[i]));
all_algos.push_back(bfloat16_refhold.back().get());
bfloat16_algos.push_back(bfloat16_refhold.back().get());
}
size_t all_algo_size = all_algos.size(); size_t all_algo_size = all_algos.size();
#if CUDA_VERSION >= 10000 #if CUDA_VERSION >= 10000
......
...@@ -702,32 +702,20 @@ private: ...@@ -702,32 +702,20 @@ private:
class ConvBiasForwardImpl::AlgoBFloat16 final : public AlgoBase { class ConvBiasForwardImpl::AlgoBFloat16 final : public AlgoBase {
public: public:
AlgoBFloat16(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override; bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override; void exec(const ExecArgs& args) const override;
const char* name() const override { return m_name.c_str(); } std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
bool is_reproducible() const override { return m_impl->is_reproducible(); } const char* name() const override { return "CONVBIAS_BFLOAT16"; }
bool is_reproducible() const override { return true; }
MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16) MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16)
std::string param() const override {
std::string ret;
serialize_write_pod(m_impl, ret);
return ret;
}
private: private:
SizeArgs float_args(const SizeArgs& args, ConvBiasForwardImpl* opr,
TensorLayout& fsrc, TensorLayout& ffilter,
TensorLayout& fbias, TensorLayout& fz,
TensorLayout& fdst) const;
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
AlgoBase* m_impl;
std::string m_name;
}; };
...@@ -766,7 +754,7 @@ public: ...@@ -766,7 +754,7 @@ public:
std::vector<AlgoInt8NCHW32IMMAImplicitGemm> int8_nchw32_imma; std::vector<AlgoInt8NCHW32IMMAImplicitGemm> int8_nchw32_imma;
#endif #endif
std::vector<std::unique_ptr<AlgoGroupConvGeneral>> gconv_refhold; std::vector<std::unique_ptr<AlgoGroupConvGeneral>> gconv_refhold;
std::vector<std::unique_ptr<AlgoBFloat16>> bfloat16_refhold; AlgoBFloat16 bfloat16;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoBase* cudnn_conv_bias_act_from_enum(cudnnConvolutionFwdAlgo_t algo); AlgoBase* cudnn_conv_bias_act_from_enum(cudnnConvolutionFwdAlgo_t algo);
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
* *
* Unless required by applicable law or agreed to in writing, * Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/ */
#include "src/cuda/conv_bias/algo.h" #include "src/cuda/conv_bias/algo.h"
...@@ -18,58 +19,70 @@ using namespace megdnn; ...@@ -18,58 +19,70 @@ using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace conv_bias; using namespace conv_bias;
ConvBiasForwardImpl::AlgoBFloat16::AlgoBFloat16( namespace {
ConvBiasForwardImpl::AlgoBase* algorithm) std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> sub_opr_config(
: m_impl(algorithm) { const TensorLayoutArray& layouts, const ConvBiasForwardImpl* opr) {
megdnn_assert_internal(algorithm); megdnn_assert(layouts.size() >= 3);
m_name = ssprintf("BFLOAT16:%s", m_impl->name()); std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> ret;
} ret.first = layouts;
ConvBiasForwardImpl::AlgoBase::SizeArgs
ConvBiasForwardImpl::AlgoBFloat16::float_args(
const SizeArgs& args, ConvBiasForwardImpl* opr, TensorLayout& fsrc,
TensorLayout& ffilter, TensorLayout& fbias, TensorLayout& fz,
TensorLayout& fdst) const {
fsrc = *args.src_layout;
ffilter = *args.filter_layout;
fbias = *args.bias_layout;
fz = *args.z_layout;
fdst = *args.dst_layout;
auto change_dtype = [](TensorLayout& layout) { auto change_dtype = [](TensorLayout& layout) {
if (layout.dtype == dtype::BFloat16()) { if (layout.dtype == dtype::BFloat16()) {
layout.dtype = dtype::Float32(); layout.dtype = dtype::Float32();
} }
}; };
change_dtype(fsrc); change_dtype(ret.first[0]);
change_dtype(ffilter); change_dtype(ret.first[1]);
change_dtype(fbias); change_dtype(ret.first[2]);
change_dtype(fz); change_dtype(ret.first[3]);
change_dtype(fdst); change_dtype(ret.first[4]);
opr->param() = args.opr->param();
opr->param().compute_mode = Param::ComputeMode::DEFAULT; ret.second = opr->param();
opr->execution_policy() = {m_impl->desc(), {}}; ret.second.compute_mode = ConvBiasForwardImpl::Param::ComputeMode::DEFAULT;
return SizeArgs(opr, fsrc, ffilter, fbias, fz, fdst); return ret;
}
} // namespace
std::vector<Algorithm::SearchItem>
ConvBiasForwardImpl::AlgoBFloat16::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
auto&& config = sub_opr_config(
layouts, static_cast<const ConvBiasForwardImpl*>(opr));
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, config.first}};
} }
bool ConvBiasForwardImpl::AlgoBFloat16::is_available( bool ConvBiasForwardImpl::AlgoBFloat16::is_available(
const SizeArgs& args) const { const SizeArgs& args) const {
TensorLayout fsrc, ffilter, fbias, fz, fdst;
auto convbias_opr = args.handle->create_operator<ConvBias>(); auto convbias_opr = args.handle->create_operator<ConvBias>();
SizeArgs fargs = float_args( auto&& config = sub_opr_config(
args, static_cast<ConvBiasForwardImpl*>(convbias_opr.get()), fsrc, {*args.src_layout, *args.filter_layout, *args.bias_layout,
ffilter, fbias, fz, fdst); *args.z_layout, *args.dst_layout},
args.opr);
convbias_opr->param() = config.second;
return args.src_layout->dtype == args.filter_layout->dtype && return args.src_layout->dtype == args.filter_layout->dtype &&
args.src_layout->dtype == dtype::BFloat16() && args.src_layout->dtype == dtype::BFloat16() &&
m_impl->is_available(fargs); get_algorithm(static_cast<ConvBiasForwardImpl*>(convbias_opr.get()),
config.first[0], config.first[1], config.first[2],
config.first[3], config.first[4]);
} }
WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle( WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
void* ptr, const SizeArgs& args) const { void* ptr, const SizeArgs& args) const {
TensorLayout fsrc, ffilter, fbias, fz, fdst;
auto convbias_opr = args.handle->create_operator<ConvBias>(); auto convbias_opr = args.handle->create_operator<ConvBias>();
SizeArgs fargs = float_args( if (args.opr->execution_policy().algo.valid()) {
args, static_cast<ConvBiasForwardImpl*>(convbias_opr.get()), fsrc, megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
ffilter, fbias, fz, fdst); convbias_opr->execution_policy() =
args.opr->execution_policy().sub_policy[0];
}
auto&& config = sub_opr_config(
{*args.src_layout, *args.filter_layout, *args.bias_layout,
*args.z_layout, *args.dst_layout},
args.opr);
convbias_opr->param() = config.second;
SmallVector<size_t> sizes; SmallVector<size_t> sizes;
auto get_workspace = [&sizes](const TensorLayout& src, auto get_workspace = [&sizes](const TensorLayout& src,
const TensorLayout& dst) { const TensorLayout& dst) {
...@@ -77,12 +90,15 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle( ...@@ -77,12 +90,15 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
sizes.push_back(dst.span().dist_byte()); sizes.push_back(dst.span().dist_byte());
} }
}; };
get_workspace(*args.src_layout, fsrc); get_workspace(*args.src_layout, config.first[0]);
get_workspace(*args.filter_layout, ffilter); get_workspace(*args.filter_layout, config.first[1]);
get_workspace(*args.bias_layout, fbias); get_workspace(*args.bias_layout, config.first[2]);
get_workspace(*args.z_layout, fz); get_workspace(*args.z_layout, config.first[3]);
get_workspace(*args.dst_layout, fdst); get_workspace(*args.dst_layout, config.first[4]);
sizes.push_back(m_impl->get_workspace_in_bytes(fargs)); sizes.push_back(convbias_opr->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2], config.first[3],
config.first[4], nullptr));
return {ptr, std::move(sizes)}; return {ptr, std::move(sizes)};
} }
...@@ -110,7 +126,12 @@ void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const { ...@@ -110,7 +126,12 @@ void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const {
auto convbias_opr = args.handle->create_operator<ConvBias>(); auto convbias_opr = args.handle->create_operator<ConvBias>();
convbias_opr->param() = args.opr->param(); convbias_opr->param() = args.opr->param();
convbias_opr->param().compute_mode = Param::ComputeMode::DEFAULT; convbias_opr->param().compute_mode = Param::ComputeMode::DEFAULT;
convbias_opr->execution_policy() = {m_impl->desc(), {}}; if (args.opr->execution_policy().algo.valid()) {
megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
convbias_opr->execution_policy() =
args.opr->execution_policy().sub_policy[0];
}
convbias_opr->exec(fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor, convbias_opr->exec(fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor,
fdst_tensor, nullptr, cvter.workspace()); fdst_tensor, nullptr, cvter.workspace());
} }
......
...@@ -214,6 +214,9 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, ...@@ -214,6 +214,9 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
DISPATCH_RAW(Float16, Float16, Float16, FLOAT32, DISPATCH_RAW(Float16, Float16, Float16, FLOAT32,
(convolution::forward_bias<dt_float16, dt_float16, (convolution::forward_bias<dt_float16, dt_float16,
dt_float16, dt_float32>)) dt_float16, dt_float32>))
DISPATCH_RAW(BFloat16, BFloat16, BFloat16, FLOAT32,
(convolution::forward_bias<dt_bfloat16, dt_bfloat16,
dt_bfloat16, dt_float32>))
#endif #endif
else { else {
megdnn_throw(ssprintf( megdnn_throw(ssprintf(
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/ */
#include "megdnn/dtype.h"
#include "test/cuda/fixture.h" #include "test/cuda/fixture.h"
#include "megdnn/opr_param_defs.h" #include "megdnn/opr_param_defs.h"
...@@ -108,6 +109,32 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_F32) { ...@@ -108,6 +109,32 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_F32) {
} }
} }
TEST_F(CUDA, CONV_BIAS_FORWARD_BF16) {
using namespace conv_bias;
std::vector<TestArg> args = get_args();
Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<ConvBiasForward>(ExecutionPolicyAlgoName{
"CONVBIAS_BFLOAT16", {{"MATMUL", {}}}}));
NormalRNG default_rng;
for (auto&& arg : args) {
arg.param.compute_mode = param::Convolution::ComputeMode::FLOAT32;
checker.set_dtype(0, dtype::BFloat16())
.set_dtype(1, dtype::BFloat16())
.set_dtype(2, dtype::BFloat16())
.set_dtype(3, dtype::BFloat16())
.set_dtype(4, dtype::BFloat16())
.set_rng(0, &default_rng)
.set_rng(1, &default_rng)
.set_rng(2, &default_rng)
.set_epsilon(2e-2)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) { TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) {
require_compute_capability(6, 1); require_compute_capability(6, 1);
......
...@@ -80,7 +80,8 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) ...@@ -80,7 +80,8 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
Checker<ConvolutionForward> checker(handle_cuda()); Checker<ConvolutionForward> checker(handle_cuda());
NormalRNG default_rng; NormalRNG default_rng;
for (auto &&arg: args) { for (auto &&arg: args) {
float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]); float scale =
1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
UniformFloatRNG rng(scale, 2 * scale); UniformFloatRNG rng(scale, 2 * scale);
checker. checker.
set_dtype(0, dtype::Float32()). set_dtype(0, dtype::Float32()).
...@@ -115,7 +116,6 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) ...@@ -115,7 +116,6 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
.set_epsilon(1e-1) .set_epsilon(1e-1)
.set_param(arg.param) .set_param(arg.param)
.execs({arg.src, arg.filter, {}}); .execs({arg.src, arg.filter, {}});
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册