提交 b8febaf9 编写于 作者: M Megvii Engine Team

refactor(megdnn): refactor bfloat16 convolutionbackwardfilter to recursive inteface

GitOrigin-RevId: 37c08a5b8b2484df300acf71c651640eca041144
上级 f14e0c17
...@@ -60,7 +60,7 @@ bool ConvolutionBackwardDataImpl::AlgoBFloat16::is_available( ...@@ -60,7 +60,7 @@ bool ConvolutionBackwardDataImpl::AlgoBFloat16::is_available(
auto&& config = sub_opr_config( auto&& config = sub_opr_config(
{*args.filter_layout, *args.diff_layout, *args.grad_layout}, {*args.filter_layout, *args.diff_layout, *args.grad_layout},
args.opr); args.opr);
conv_back_data_opr->param() = config.second; conv_back_data_opr->param() = config.second;
return args.diff_layout->dtype == args.filter_layout->dtype && return args.diff_layout->dtype == args.filter_layout->dtype &&
args.diff_layout->dtype == dtype::BFloat16() && args.diff_layout->dtype == dtype::BFloat16() &&
get_algorithm(static_cast<ConvolutionBackwardDataImpl*>( get_algorithm(static_cast<ConvolutionBackwardDataImpl*>(
...@@ -80,7 +80,7 @@ WorkspaceBundle ConvolutionBackwardDataImpl::AlgoBFloat16::get_workspace_bundle( ...@@ -80,7 +80,7 @@ WorkspaceBundle ConvolutionBackwardDataImpl::AlgoBFloat16::get_workspace_bundle(
auto&& config = sub_opr_config( auto&& config = sub_opr_config(
{*args.filter_layout, *args.diff_layout, *args.grad_layout}, {*args.filter_layout, *args.diff_layout, *args.grad_layout},
args.opr); args.opr);
conv_back_data_opr->param() = config.second; conv_back_data_opr->param() = config.second;
SmallVector<size_t> sizes; SmallVector<size_t> sizes;
auto get_workspace = [&sizes](const TensorLayout& src, auto get_workspace = [&sizes](const TensorLayout& src,
const TensorLayout& dst) { const TensorLayout& dst) {
......
...@@ -43,12 +43,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() { ...@@ -43,12 +43,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() {
megdnn_assert(all_algos_data == all_algos.data()); megdnn_assert(all_algos_data == all_algos.data());
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group matmul non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group matmul
size_t algo_size = all_algos.size(); all_algos.push_back(&bfloat16);
for (size_t i=0; i<algo_size; ++i) { bfloat16_algos.push_back(&bfloat16);
bfloat16_refhold.emplace_back(new AlgoBFloat16(all_algos[i]));
all_algos.push_back(bfloat16_refhold.back().get());
bfloat16_algos.push_back(bfloat16_refhold.back().get());
}
for (auto&& algo : all_algos) { for (auto&& algo : all_algos) {
m_all_algos_map.emplace(algo->info().desc, algo); m_all_algos_map.emplace(algo->info().desc, algo);
......
...@@ -158,27 +158,21 @@ public: ...@@ -158,27 +158,21 @@ public:
class ConvolutionBackwardFilterImpl::AlgoBFloat16 final : public AlgoBase { class ConvolutionBackwardFilterImpl::AlgoBFloat16 final : public AlgoBase {
public: public:
AlgoBFloat16(ConvolutionBackwardFilterImpl::AlgoBase*);
bool is_available(const SizeArgs& args) const override; bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override; void exec(const ExecArgs& args) const override;
const char* name() const override { return m_name.c_str(); } std::vector<SearchItem> get_subopr_list(
bool is_reproducible() const override { return true; } const TensorLayoutArray& layouts,
MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16) const OperatorBase* opr) const override;
std::string param() const override { const char* name() const override {
std::string ret; return "CONVOLUTION_BACKWARD_FILTER_BFLOAT16";
serialize_write_pod(m_algorithm, ret);
return ret;
} }
bool is_reproducible() const override { return true; }
MEGDNN_DECL_ALGO_TYPE(CUDA_BFLOAT16)
private: private:
std::string m_name;
ConvolutionBackwardFilterImpl::AlgoBase* m_algorithm = nullptr;
SizeArgs float_args(const SizeArgs& args,
ConvolutionBackwardFilterImpl* opr, TensorLayout& fsrc,
TensorLayout& ffilter, TensorLayout& fdst) const;
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
}; };
...@@ -225,7 +219,7 @@ public: ...@@ -225,7 +219,7 @@ public:
AlgoChanwise chanwise; AlgoChanwise chanwise;
std::vector<AlgoGroupConvGeneral> gconv; std::vector<AlgoGroupConvGeneral> gconv;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv; std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
std::vector<std::unique_ptr<AlgoBFloat16>> bfloat16_refhold; AlgoBFloat16 bfloat16;
std::vector<AlgoBase*> std::vector<AlgoBase*>
//! all algorithms //! all algorithms
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
* *
* Unless required by applicable law or agreed to in writing, * Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/ */
#include "./algo.h" #include "./algo.h"
...@@ -17,33 +18,39 @@ using namespace megdnn; ...@@ -17,33 +18,39 @@ using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace convolution; using namespace convolution;
ConvolutionBackwardFilterImpl::AlgoBFloat16::AlgoBFloat16( namespace {
ConvolutionBackwardFilterImpl::AlgoBase* algorithm) std::pair<TensorLayoutArray, ConvolutionBackwardFilterImpl::Param>
: m_algorithm(algorithm) { sub_opr_config(const TensorLayoutArray& layouts,
megdnn_assert_internal(algorithm); const ConvolutionBackwardFilterImpl* opr) {
m_name = ssprintf("CONVOLUTION_BACKWARD_Filter_BFLOAT16:%s", megdnn_assert(layouts.size() >= 3);
m_algorithm->name()); std::pair<TensorLayoutArray, ConvolutionBackwardFilterImpl::Param> ret;
} ret.first = layouts;
ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs
ConvolutionBackwardFilterImpl::AlgoBFloat16::float_args(
const SizeArgs& args, ConvolutionBackwardFilterImpl* opr,
TensorLayout& fsrc, TensorLayout& fdiff, TensorLayout& fgrad) const {
fsrc = *args.src_layout;
fdiff = *args.diff_layout;
fgrad = *args.grad_layout;
auto change_dtype = [](TensorLayout& layout) { auto change_dtype = [](TensorLayout& layout) {
if (layout.dtype == dtype::BFloat16()) { if (layout.dtype == dtype::BFloat16()) {
layout.dtype = dtype::Float32(); layout.dtype = dtype::Float32();
} }
}; };
change_dtype(fsrc); change_dtype(ret.first[0]);
change_dtype(fdiff); change_dtype(ret.first[1]);
change_dtype(fgrad); change_dtype(ret.first[2]);
opr->param() = args.opr->param();
opr->param().compute_mode = Param::ComputeMode::DEFAULT; ret.second = opr->param();
opr->execution_policy() = {m_algorithm->desc(), {}}; ret.second.compute_mode =
return SizeArgs(opr, fsrc, fdiff, fgrad); ConvolutionBackwardFilter::Param::ComputeMode::DEFAULT;
return ret;
}
} // namespace
std::vector<Algorithm::SearchItem>
ConvolutionBackwardFilterImpl::AlgoBFloat16::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
auto&& config = sub_opr_config(
layouts, static_cast<const ConvolutionBackwardFilterImpl*>(opr));
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVOLUTION_BACKWARD_FILTER, param_str,
config.first}};
} }
bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available( bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available(
...@@ -51,25 +58,33 @@ bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available( ...@@ -51,25 +58,33 @@ bool ConvolutionBackwardFilterImpl::AlgoBFloat16::is_available(
TensorLayout fsrc, fdiff, fgrad; TensorLayout fsrc, fdiff, fgrad;
auto conv_back_filter_opr = auto conv_back_filter_opr =
args.handle->create_operator<ConvolutionBackwardFilter>(); args.handle->create_operator<ConvolutionBackwardFilter>();
SizeArgs fargs = float_args(args,
static_cast<ConvolutionBackwardFilterImpl*>( auto&& config = sub_opr_config(
conv_back_filter_opr.get()), {*args.src_layout, *args.diff_layout, *args.grad_layout},
fsrc, fdiff, fgrad); args.opr);
conv_back_filter_opr->param() = config.second;
return args.src_layout->dtype == args.diff_layout->dtype && return args.src_layout->dtype == args.diff_layout->dtype &&
args.src_layout->dtype == dtype::BFloat16() && args.src_layout->dtype == dtype::BFloat16() &&
m_algorithm->is_available(fargs); get_algorithm(static_cast<ConvolutionBackwardFilterImpl*>(
conv_back_filter_opr.get()),
config.first[0], config.first[1], config.first[2]);
} }
WorkspaceBundle WorkspaceBundle
ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle( ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle(
void* ptr, const SizeArgs& args) const { void* ptr, const SizeArgs& args) const {
TensorLayout fsrc, fdiff, fgrad;
auto conv_back_filter_opr = auto conv_back_filter_opr =
args.handle->create_operator<ConvolutionBackwardFilter>(); args.handle->create_operator<ConvolutionBackwardFilter>();
SizeArgs fargs = float_args(args, if (args.opr->execution_policy().algo.valid()) {
static_cast<ConvolutionBackwardFilterImpl*>( megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
conv_back_filter_opr.get()), conv_back_filter_opr->execution_policy() =
fsrc, fdiff, fgrad); args.opr->execution_policy().sub_policy[0];
}
auto&& config = sub_opr_config(
{*args.src_layout, *args.diff_layout, *args.grad_layout},
args.opr);
conv_back_filter_opr->param() = config.second;
SmallVector<size_t> sizes; SmallVector<size_t> sizes;
auto get_workspace = [&sizes](const TensorLayout& src, auto get_workspace = [&sizes](const TensorLayout& src,
const TensorLayout& dst) { const TensorLayout& dst) {
...@@ -77,11 +92,14 @@ ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle( ...@@ -77,11 +92,14 @@ ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_bundle(
sizes.push_back(dst.span().dist_byte()); sizes.push_back(dst.span().dist_byte());
} }
}; };
get_workspace(*args.src_layout, fsrc);
get_workspace(*args.diff_layout, fdiff); get_workspace(*args.src_layout, config.first[0]);
get_workspace(*args.grad_layout, fgrad); get_workspace(*args.diff_layout, config.first[1]);
sizes.push_back(m_algorithm->get_workspace_in_bytes(fargs)); get_workspace(*args.grad_layout, config.first[2]);
return {ptr, std::move(sizes)}; sizes.push_back(conv_back_filter_opr->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2]));
auto ret = WorkspaceBundle{ptr, std::move(sizes)};
return ret;
} }
size_t ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_in_bytes( size_t ConvolutionBackwardFilterImpl::AlgoBFloat16::get_workspace_in_bytes(
...@@ -107,7 +125,12 @@ void ConvolutionBackwardFilterImpl::AlgoBFloat16::exec( ...@@ -107,7 +125,12 @@ void ConvolutionBackwardFilterImpl::AlgoBFloat16::exec(
conv_back_filter_opr->param() = args.opr->param(); conv_back_filter_opr->param() = args.opr->param();
conv_back_filter_opr->param().compute_mode = conv_back_filter_opr->param().compute_mode =
Param::ComputeMode::DEFAULT; Param::ComputeMode::DEFAULT;
conv_back_filter_opr->execution_policy() = {m_algorithm->desc(), {}};
if (args.opr->execution_policy().algo.valid()) {
megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
conv_back_filter_opr->execution_policy() =
args.opr->execution_policy().sub_policy[0];
}
conv_back_filter_opr->exec(fsrc_tensor, fdiff_tensor, fgrad_tensor, conv_back_filter_opr->exec(fsrc_tensor, fdiff_tensor, fgrad_tensor,
cvter.workspace()); cvter.workspace());
} }
......
...@@ -152,6 +152,17 @@ public: ...@@ -152,6 +152,17 @@ public:
->info(); ->info();
} }
AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& filter,
const TensorLayout& diff,
const TensorLayout& grad,
size_t workspace_limit_in_bytes,
bool reproducible) {
return get_algorithm_heuristic(filter, diff, grad,
workspace_limit_in_bytes, reproducible)
->info();
}
const char* get_algorithm_set_name() const override; const char* get_algorithm_set_name() const override;
class AlgoBase; class AlgoBase;
......
...@@ -328,12 +328,18 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER) ...@@ -328,12 +328,18 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER)
.set_epsilon(1e-1) .set_epsilon(1e-1)
.set_param(arg.param) .set_param(arg.param)
.exec(TensorLayoutArray{src, dst, filter}); .exec(TensorLayoutArray{src, dst, filter});
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
ExecutionPolicyAlgoName{"CONVOLUTION_BACKWARD_FILTER_BFLOAT16",
{{"MATMUL", {}}}}));
src.dtype = dst.dtype = filter.dtype = dtype::BFloat16(); src.dtype = dst.dtype = filter.dtype = dtype::BFloat16();
checker.set_rng(0, &rng) checker.set_rng(0, &rng)
.set_rng(1, &rng) .set_rng(1, &rng)
.set_epsilon(1e-1) .set_epsilon(1e-1)
.set_param(arg.param) .set_param(arg.param)
.exec(TensorLayoutArray{src, dst, filter}); .exec(TensorLayoutArray{src, dst, filter});
checker.reset_before_exec_callback();
checker.opr()->execution_policy() = {};
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册