提交 659217ac 编写于 作者: M Megvii Engine Team

refactor(megdnn): refactor bfloat16 convbias to recursive inteface

GitOrigin-RevId: 378194fb7f5482f72eb95eaf23610ceec9c9c554
上级 4a1d52c9
......@@ -63,12 +63,8 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group batched_matmul
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1
algo_size = all_algos.size();
for (size_t i = 0; i < algo_size; ++i) {
bfloat16_refhold.emplace_back(new AlgoBFloat16(all_algos[i]));
all_algos.push_back(bfloat16_refhold.back().get());
bfloat16_algos.push_back(bfloat16_refhold.back().get());
}
all_algos.push_back(&bfloat16);
bfloat16_algos.push_back(&bfloat16);
size_t all_algo_size = all_algos.size();
#if CUDA_VERSION >= 10000
......
......@@ -702,32 +702,20 @@ private:
class ConvBiasForwardImpl::AlgoBFloat16 final : public AlgoBase {
public:
AlgoBFloat16(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override;
const char* name() const override { return m_name.c_str(); }
std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
bool is_reproducible() const override { return m_impl->is_reproducible(); }
const char* name() const override { return "CONVBIAS_BFLOAT16"; }
bool is_reproducible() const override { return true; }
MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16)
std::string param() const override {
std::string ret;
serialize_write_pod(m_impl, ret);
return ret;
}
private:
SizeArgs float_args(const SizeArgs& args, ConvBiasForwardImpl* opr,
TensorLayout& fsrc, TensorLayout& ffilter,
TensorLayout& fbias, TensorLayout& fz,
TensorLayout& fdst) const;
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
AlgoBase* m_impl;
std::string m_name;
};
......@@ -766,7 +754,7 @@ public:
std::vector<AlgoInt8NCHW32IMMAImplicitGemm> int8_nchw32_imma;
#endif
std::vector<std::unique_ptr<AlgoGroupConvGeneral>> gconv_refhold;
std::vector<std::unique_ptr<AlgoBFloat16>> bfloat16_refhold;
AlgoBFloat16 bfloat16;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoBase* cudnn_conv_bias_act_from_enum(cudnnConvolutionFwdAlgo_t algo);
......
......@@ -6,7 +6,8 @@
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/cuda/conv_bias/algo.h"
......@@ -18,58 +19,70 @@ using namespace megdnn;
using namespace cuda;
using namespace conv_bias;
ConvBiasForwardImpl::AlgoBFloat16::AlgoBFloat16(
ConvBiasForwardImpl::AlgoBase* algorithm)
: m_impl(algorithm) {
megdnn_assert_internal(algorithm);
m_name = ssprintf("BFLOAT16:%s", m_impl->name());
}
ConvBiasForwardImpl::AlgoBase::SizeArgs
ConvBiasForwardImpl::AlgoBFloat16::float_args(
const SizeArgs& args, ConvBiasForwardImpl* opr, TensorLayout& fsrc,
TensorLayout& ffilter, TensorLayout& fbias, TensorLayout& fz,
TensorLayout& fdst) const {
fsrc = *args.src_layout;
ffilter = *args.filter_layout;
fbias = *args.bias_layout;
fz = *args.z_layout;
fdst = *args.dst_layout;
namespace {
std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> sub_opr_config(
const TensorLayoutArray& layouts, const ConvBiasForwardImpl* opr) {
megdnn_assert(layouts.size() >= 3);
std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> ret;
ret.first = layouts;
auto change_dtype = [](TensorLayout& layout) {
if (layout.dtype == dtype::BFloat16()) {
layout.dtype = dtype::Float32();
}
};
change_dtype(fsrc);
change_dtype(ffilter);
change_dtype(fbias);
change_dtype(fz);
change_dtype(fdst);
opr->param() = args.opr->param();
opr->param().compute_mode = Param::ComputeMode::DEFAULT;
opr->execution_policy() = {m_impl->desc(), {}};
return SizeArgs(opr, fsrc, ffilter, fbias, fz, fdst);
change_dtype(ret.first[0]);
change_dtype(ret.first[1]);
change_dtype(ret.first[2]);
change_dtype(ret.first[3]);
change_dtype(ret.first[4]);
ret.second = opr->param();
ret.second.compute_mode = ConvBiasForwardImpl::Param::ComputeMode::DEFAULT;
return ret;
}
} // namespace
std::vector<Algorithm::SearchItem>
ConvBiasForwardImpl::AlgoBFloat16::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
auto&& config = sub_opr_config(
layouts, static_cast<const ConvBiasForwardImpl*>(opr));
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, config.first}};
}
bool ConvBiasForwardImpl::AlgoBFloat16::is_available(
const SizeArgs& args) const {
TensorLayout fsrc, ffilter, fbias, fz, fdst;
auto convbias_opr = args.handle->create_operator<ConvBias>();
SizeArgs fargs = float_args(
args, static_cast<ConvBiasForwardImpl*>(convbias_opr.get()), fsrc,
ffilter, fbias, fz, fdst);
auto&& config = sub_opr_config(
{*args.src_layout, *args.filter_layout, *args.bias_layout,
*args.z_layout, *args.dst_layout},
args.opr);
convbias_opr->param() = config.second;
return args.src_layout->dtype == args.filter_layout->dtype &&
args.src_layout->dtype == dtype::BFloat16() &&
m_impl->is_available(fargs);
get_algorithm(static_cast<ConvBiasForwardImpl*>(convbias_opr.get()),
config.first[0], config.first[1], config.first[2],
config.first[3], config.first[4]);
}
WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
void* ptr, const SizeArgs& args) const {
TensorLayout fsrc, ffilter, fbias, fz, fdst;
auto convbias_opr = args.handle->create_operator<ConvBias>();
SizeArgs fargs = float_args(
args, static_cast<ConvBiasForwardImpl*>(convbias_opr.get()), fsrc,
ffilter, fbias, fz, fdst);
if (args.opr->execution_policy().algo.valid()) {
megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
convbias_opr->execution_policy() =
args.opr->execution_policy().sub_policy[0];
}
auto&& config = sub_opr_config(
{*args.src_layout, *args.filter_layout, *args.bias_layout,
*args.z_layout, *args.dst_layout},
args.opr);
convbias_opr->param() = config.second;
SmallVector<size_t> sizes;
auto get_workspace = [&sizes](const TensorLayout& src,
const TensorLayout& dst) {
......@@ -77,12 +90,15 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
sizes.push_back(dst.span().dist_byte());
}
};
get_workspace(*args.src_layout, fsrc);
get_workspace(*args.filter_layout, ffilter);
get_workspace(*args.bias_layout, fbias);
get_workspace(*args.z_layout, fz);
get_workspace(*args.dst_layout, fdst);
sizes.push_back(m_impl->get_workspace_in_bytes(fargs));
get_workspace(*args.src_layout, config.first[0]);
get_workspace(*args.filter_layout, config.first[1]);
get_workspace(*args.bias_layout, config.first[2]);
get_workspace(*args.z_layout, config.first[3]);
get_workspace(*args.dst_layout, config.first[4]);
sizes.push_back(convbias_opr->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2], config.first[3],
config.first[4], nullptr));
return {ptr, std::move(sizes)};
}
......@@ -110,7 +126,12 @@ void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const {
auto convbias_opr = args.handle->create_operator<ConvBias>();
convbias_opr->param() = args.opr->param();
convbias_opr->param().compute_mode = Param::ComputeMode::DEFAULT;
convbias_opr->execution_policy() = {m_impl->desc(), {}};
if (args.opr->execution_policy().algo.valid()) {
megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
convbias_opr->execution_policy() =
args.opr->execution_policy().sub_policy[0];
}
convbias_opr->exec(fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor,
fdst_tensor, nullptr, cvter.workspace());
}
......
......@@ -214,6 +214,9 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
DISPATCH_RAW(Float16, Float16, Float16, FLOAT32,
(convolution::forward_bias<dt_float16, dt_float16,
dt_float16, dt_float32>))
DISPATCH_RAW(BFloat16, BFloat16, BFloat16, FLOAT32,
(convolution::forward_bias<dt_bfloat16, dt_bfloat16,
dt_bfloat16, dt_float32>))
#endif
else {
megdnn_throw(ssprintf(
......
......@@ -8,6 +8,7 @@
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "megdnn/dtype.h"
#include "test/cuda/fixture.h"
#include "megdnn/opr_param_defs.h"
......@@ -108,6 +109,32 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_F32) {
}
}
TEST_F(CUDA, CONV_BIAS_FORWARD_BF16) {
using namespace conv_bias;
std::vector<TestArg> args = get_args();
Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<ConvBiasForward>(ExecutionPolicyAlgoName{
"CONVBIAS_BFLOAT16", {{"MATMUL", {}}}}));
NormalRNG default_rng;
for (auto&& arg : args) {
arg.param.compute_mode = param::Convolution::ComputeMode::FLOAT32;
checker.set_dtype(0, dtype::BFloat16())
.set_dtype(1, dtype::BFloat16())
.set_dtype(2, dtype::BFloat16())
.set_dtype(3, dtype::BFloat16())
.set_dtype(4, dtype::BFloat16())
.set_rng(0, &default_rng)
.set_rng(1, &default_rng)
.set_rng(2, &default_rng)
.set_epsilon(2e-2)
.set_param(arg.param)
.execs({arg.src, arg.filter, arg.bias, {}, {}});
}
}
TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) {
require_compute_capability(6, 1);
......
......@@ -80,7 +80,8 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
Checker<ConvolutionForward> checker(handle_cuda());
NormalRNG default_rng;
for (auto &&arg: args) {
float scale = 1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
float scale =
1.0f / sqrt(arg.filter[1] * arg.filter[2] * arg.filter[3]);
UniformFloatRNG rng(scale, 2 * scale);
checker.
set_dtype(0, dtype::Float32()).
......@@ -115,7 +116,6 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
.set_epsilon(1e-1)
.set_param(arg.param)
.execs({arg.src, arg.filter, {}});
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册