提交 e33e2fc1 编写于 作者: L lizhenyu

add bn fusion pass

上级 98565d8b
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/gpu/batch_norm_add_relu_fusion.h"
#include <memory>
#include <vector>
#include <string>
#include "backend/session/anf_runtime_algorithm.h"
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
namespace mindspore {
namespace opt {
const BaseRef BatchNormAddReluFusion::DefinePattern() const {
VectorRef batch_norm_ex = VectorRef({prim::kPrimFusedBatchNormEx, x_, scale_, bias_, mean_, var_});
VectorRef tuple_get_item = VectorRef({prim::kPrimTupleGetItem, batch_norm_ex, index_});
VectorRef tensor_add = VectorRef({prim::kPrimTensorAdd, tuple_get_item, z_});
VectorRef relu = VectorRef({prim::kPrimRelu, tensor_add});
return relu;
}
const AnfNodePtr BatchNormAddReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
const EquivPtr &equiv) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(node);
auto tensor_add = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
MS_EXCEPTION_IF_NULL(tensor_add);
auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 0);
MS_EXCEPTION_IF_NULL(tuple_get_item);
auto batch_norm_ex = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0);
MS_EXCEPTION_IF_NULL(batch_norm_ex);
if (AnfAlgo::GetOutputInferDataType(batch_norm_ex, 0) != kNumberTypeFloat16) {
return nullptr;
}
auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 0);
auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 1);
auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 2);
auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 3);
auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 4);
auto z = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 1);
MS_EXCEPTION_IF_NULL(x);
MS_EXCEPTION_IF_NULL(scale);
MS_EXCEPTION_IF_NULL(bias);
MS_EXCEPTION_IF_NULL(mean);
MS_EXCEPTION_IF_NULL(var);
MS_EXCEPTION_IF_NULL(z);
auto prim = std::make_shared<Primitive>(kFusedBatchNormExWithAddAndActivation);
MS_EXCEPTION_IF_NULL(prim);
std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var, z};
auto fused_batch_norm_with_add_relu = graph->NewCNode(inputs);
MS_EXCEPTION_IF_NULL(fused_batch_norm_with_add_relu);
std::vector<TypeId> outputs_type;
std::vector<std::vector<size_t>> outputs_shape;
auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm_ex);
for (size_t i = 0; i < output_num; i++) {
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm_ex, i));
outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm_ex, i));
}
AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_add_relu.get());
AnfAlgo::CopyNodeAttrs(batch_norm_ex, fused_batch_norm_with_add_relu);
auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
manager->Replace(batch_norm_ex, fused_batch_norm_with_add_relu);
return tuple_get_item;
}
} // namespace opt
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_
#include <memory>
#include "backend/optimizer/common/optimizer.h"
namespace mindspore {
namespace opt {
class BatchNormAddReluFusion : public PatternProcessPass {
public:
explicit BatchNormAddReluFusion(bool multigraph = true)
: PatternProcessPass("batch_norm_add_relu_fusion", multigraph) {
x_ = std::make_shared<Var>();
scale_ = std::make_shared<Var>();
bias_ = std::make_shared<Var>();
mean_ = std::make_shared<Var>();
var_ = std::make_shared<Var>();
index_ = std::make_shared<Var>();
z_ = std::make_shared<Var>();
}
~BatchNormAddReluFusion() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
private:
VarPtr x_;
VarPtr scale_;
VarPtr bias_;
VarPtr mean_;
VarPtr var_;
VarPtr index_;
VarPtr z_;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/gpu/batch_norm_relu_fusion.h"
#include <memory>
#include <vector>
#include <string>
#include "backend/session/anf_runtime_algorithm.h"
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
namespace mindspore {
namespace opt {
const BaseRef BatchNormReluFusion::DefinePattern() const {
VectorRef batch_norm_ex = VectorRef({prim::kPrimFusedBatchNormEx, x_, scale_, bias_, mean_, var_});
VectorRef tuple_get = VectorRef({prim::kPrimTupleGetItem, batch_norm_ex, index_});
VectorRef relu = VectorRef({prim::kPrimRelu, tuple_get});
return relu;
}
const AnfNodePtr BatchNormReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
const EquivPtr &equiv) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(node);
auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
MS_EXCEPTION_IF_NULL(tuple_get_item);
auto batch_norm_ex = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0);
MS_EXCEPTION_IF_NULL(batch_norm_ex);
if (AnfAlgo::GetOutputInferDataType(batch_norm_ex, 0) != kNumberTypeFloat16) {
return nullptr;
}
auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 0);
auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 1);
auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 2);
auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 3);
auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 4);
MS_EXCEPTION_IF_NULL(x);
MS_EXCEPTION_IF_NULL(scale);
MS_EXCEPTION_IF_NULL(bias);
MS_EXCEPTION_IF_NULL(mean);
MS_EXCEPTION_IF_NULL(var);
auto prim = std::make_shared<Primitive>(kFusedBatchNormExWithActivation);
MS_EXCEPTION_IF_NULL(prim);
std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var};
auto fused_batch_norm_with_relu = graph->NewCNode(inputs);
MS_EXCEPTION_IF_NULL(fused_batch_norm_with_relu);
std::vector<TypeId> outputs_type;
std::vector<std::vector<size_t>> outputs_shape;
auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm_ex);
for (size_t i = 0; i < output_num; i++) {
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm_ex, i));
outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm_ex, i));
}
AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_relu.get());
AnfAlgo::CopyNodeAttrs(batch_norm_ex, fused_batch_norm_with_relu);
auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
manager->Replace(batch_norm_ex, fused_batch_norm_with_relu);
return tuple_get_item;
}
} // namespace opt
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_
#include <memory>
#include "backend/optimizer/common/optimizer.h"
namespace mindspore {
namespace opt {
class BatchNormReluFusion : public PatternProcessPass {
public:
explicit BatchNormReluFusion(bool multigraph = true) : PatternProcessPass("batch_norm_relu_fusion", multigraph) {
x_ = std::make_shared<Var>();
scale_ = std::make_shared<Var>();
bias_ = std::make_shared<Var>();
mean_ = std::make_shared<Var>();
var_ = std::make_shared<Var>();
index_ = std::make_shared<Var>();
}
~BatchNormReluFusion() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
private:
VarPtr x_;
VarPtr scale_;
VarPtr bias_;
VarPtr mean_;
VarPtr var_;
VarPtr index_;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/optimizer/gpu/batch_norm_relu_grad_fusion.h"
#include <memory>
#include <vector>
#include <string>
#include "backend/session/anf_runtime_algorithm.h"
#include "ir/primitive.h"
#include "utils/utils.h"
#include "backend/optimizer/common/helper.h"
namespace mindspore {
namespace opt {
const BaseRef BatchNormReluGradFusion::DefinePattern() const {
VectorRef relu_grad = VectorRef({prim::kPrimReluGrad, dy_, y_});
VectorRef batch_norm_grad =
VectorRef({prim::kPrimFusedBatchNormGradEx, relu_grad, x_, scale_, save_mean_, save_var_, reserve_});
return batch_norm_grad;
}
const AnfNodePtr BatchNormReluGradFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
const EquivPtr &equiv) const {
MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(node);
if (AnfAlgo::GetOutputInferDataType(node, 0) != kNumberTypeFloat16) {
return nullptr;
}
auto relu_grad = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
MS_EXCEPTION_IF_NULL(relu_grad);
auto outlist = GetRealNodeUsedList(graph, relu_grad);
if (outlist->size() >= 2) {
return nullptr;
}
auto dy = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 0);
MS_EXCEPTION_IF_NULL(dy);
auto y = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 1);
MS_EXCEPTION_IF_NULL(y);
auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 1);
MS_EXCEPTION_IF_NULL(x);
auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 2);
MS_EXCEPTION_IF_NULL(scale);
auto save_mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 3);
MS_EXCEPTION_IF_NULL(save_mean);
auto save_var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 4);
MS_EXCEPTION_IF_NULL(save_var);
auto reserve = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 5);
MS_EXCEPTION_IF_NULL(reserve);
auto batch_norm = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(save_mean), 0);
MS_EXCEPTION_IF_NULL(batch_norm);
auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 2);
MS_EXCEPTION_IF_NULL(bias);
auto prim = std::make_shared<Primitive>(kFusedBatchNormGradExWithActivation);
MS_EXCEPTION_IF_NULL(prim);
std::vector<AnfNodePtr> inputs = {NewValueNode(prim), dy, x, scale, save_mean, save_var, reserve, bias, y};
auto fused_batch_norm_grad_with_relu = graph->NewCNode(inputs);
MS_EXCEPTION_IF_NULL(fused_batch_norm_grad_with_relu);
std::vector<TypeId> outputs_type;
std::vector<std::vector<size_t>> outputs_shape;
auto output_num = AnfAlgo::GetOutputTensorNum(node);
for (size_t i = 0; i < output_num; i++) {
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(node, i));
outputs_shape.push_back(AnfAlgo::GetOutputInferShape(node, i));
}
AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_grad_with_relu.get());
AnfAlgo::CopyNodeAttrs(node, fused_batch_norm_grad_with_relu);
return fused_batch_norm_grad_with_relu;
}
} // namespace opt
} // namespace mindspore
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_
#include <memory>
#include "backend/optimizer/common/optimizer.h"
namespace mindspore {
namespace opt {
class BatchNormReluGradFusion : public PatternProcessPass {
public:
explicit BatchNormReluGradFusion(bool multigraph = true)
: PatternProcessPass("batch_norm_relu_grad_fusion", multigraph) {
dy_ = std::make_shared<Var>();
y_ = std::make_shared<Var>();
x_ = std::make_shared<Var>();
scale_ = std::make_shared<Var>();
save_mean_ = std::make_shared<Var>();
save_var_ = std::make_shared<Var>();
reserve_ = std::make_shared<Var>();
}
~BatchNormReluGradFusion() override = default;
const BaseRef DefinePattern() const override;
const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
private:
VarPtr dy_;
VarPtr y_;
VarPtr x_;
VarPtr scale_;
VarPtr save_mean_;
VarPtr save_var_;
VarPtr reserve_;
};
} // namespace opt
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_
......@@ -117,8 +117,10 @@ inline const PrimitivePtr kPrimApplyCenteredRMSProp = std::make_shared<Primitive
inline const PrimitivePtr kPrimAvgPoolGrad = std::make_shared<Primitive>("AvgPoolGrad");
inline const PrimitivePtr kPrimAvgPoolGradVm = std::make_shared<Primitive>("AvgPoolGradVm");
inline const PrimitivePtr kPrimFusedBatchNorm = std::make_shared<Primitive>("FusedBatchNorm");
inline const PrimitivePtr kPrimFusedBatchNormEx = std::make_shared<Primitive>("FusedBatchNormEx");
inline const PrimitivePtr kPrimConv2D = std::make_shared<Primitive>("Conv2D");
inline const PrimitivePtr kPrimFusedBatchNormGrad = std::make_shared<Primitive>("FusedBatchNormGrad");
inline const PrimitivePtr kPrimFusedBatchNormGradEx = std::make_shared<Primitive>("FusedBatchNormGradEx");
inline const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm");
inline const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad");
inline const PrimitivePtr kPrimReluGrad = std::make_shared<Primitive>("ReluGrad");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册