add bn fusion pass

e33e2fc1 · lizhenyu · 98565d8b · e33e2fc1 · e33e2fc1 · e33e2fc1
7 changed file
--- a/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/gpu/batch_norm_add_relu_fusion.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+const BaseRef BatchNormAddReluFusion::DefinePattern() const {
+  VectorRef batch_norm_ex = VectorRef({prim::kPrimFusedBatchNormEx, x_, scale_, bias_, mean_, var_});
+  VectorRef tuple_get_item = VectorRef({prim::kPrimTupleGetItem, batch_norm_ex, index_});
+  VectorRef tensor_add = VectorRef({prim::kPrimTensorAdd, tuple_get_item, z_});
+  VectorRef relu = VectorRef({prim::kPrimRelu, tensor_add});
+  return relu;
+}
+
+const AnfNodePtr BatchNormAddReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                                 const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+
+  auto tensor_add = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
+  MS_EXCEPTION_IF_NULL(tensor_add);
+  auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 0);
+  MS_EXCEPTION_IF_NULL(tuple_get_item);
+  auto batch_norm_ex = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0);
+  MS_EXCEPTION_IF_NULL(batch_norm_ex);
+
+  if (AnfAlgo::GetOutputInferDataType(batch_norm_ex, 0) != kNumberTypeFloat16) {
+    return nullptr;
+  }
+
+  auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 0);
+  auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 1);
+  auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 2);
+  auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 3);
+  auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 4);
+  auto z = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tensor_add), 1);
+
+  MS_EXCEPTION_IF_NULL(x);
+  MS_EXCEPTION_IF_NULL(scale);
+  MS_EXCEPTION_IF_NULL(bias);
+  MS_EXCEPTION_IF_NULL(mean);
+  MS_EXCEPTION_IF_NULL(var);
+  MS_EXCEPTION_IF_NULL(z);
+
+  auto prim = std::make_shared<Primitive>(kFusedBatchNormExWithAddAndActivation);
+  MS_EXCEPTION_IF_NULL(prim);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var, z};
+  auto fused_batch_norm_with_add_relu = graph->NewCNode(inputs);
+  MS_EXCEPTION_IF_NULL(fused_batch_norm_with_add_relu);
+
+  std::vector<TypeId> outputs_type;
+  std::vector<std::vector<size_t>> outputs_shape;
+  auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm_ex);
+  for (size_t i = 0; i < output_num; i++) {
+    outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm_ex, i));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm_ex, i));
+  }
+  AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_add_relu.get());
+  AnfAlgo::CopyNodeAttrs(batch_norm_ex, fused_batch_norm_with_add_relu);
+
+  auto manager = graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  manager->Replace(batch_norm_ex, fused_batch_norm_with_add_relu);
+  return tuple_get_item;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_add_relu_fusion.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_
+
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class BatchNormAddReluFusion : public PatternProcessPass {
+ public:
+  explicit BatchNormAddReluFusion(bool multigraph = true)
+      : PatternProcessPass("batch_norm_add_relu_fusion", multigraph) {
+    x_ = std::make_shared<Var>();
+    scale_ = std::make_shared<Var>();
+    bias_ = std::make_shared<Var>();
+    mean_ = std::make_shared<Var>();
+    var_ = std::make_shared<Var>();
+    index_ = std::make_shared<Var>();
+    z_ = std::make_shared<Var>();
+  }
+  ~BatchNormAddReluFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  VarPtr x_;
+  VarPtr scale_;
+  VarPtr bias_;
+  VarPtr mean_;
+  VarPtr var_;
+  VarPtr index_;
+  VarPtr z_;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_ADD_RELU_FUSION_H_
--- a/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/gpu/batch_norm_relu_fusion.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+const BaseRef BatchNormReluFusion::DefinePattern() const {
+  VectorRef batch_norm_ex = VectorRef({prim::kPrimFusedBatchNormEx, x_, scale_, bias_, mean_, var_});
+  VectorRef tuple_get = VectorRef({prim::kPrimTupleGetItem, batch_norm_ex, index_});
+  VectorRef relu = VectorRef({prim::kPrimRelu, tuple_get});
+  return relu;
+}
+
+const AnfNodePtr BatchNormReluFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                              const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+
+  auto tuple_get_item = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
+  MS_EXCEPTION_IF_NULL(tuple_get_item);
+  auto batch_norm_ex = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple_get_item), 0);
+  MS_EXCEPTION_IF_NULL(batch_norm_ex);
+
+  if (AnfAlgo::GetOutputInferDataType(batch_norm_ex, 0) != kNumberTypeFloat16) {
+    return nullptr;
+  }
+
+  auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 0);
+  auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 1);
+  auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 2);
+  auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 3);
+  auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm_ex), 4);
+
+  MS_EXCEPTION_IF_NULL(x);
+  MS_EXCEPTION_IF_NULL(scale);
+  MS_EXCEPTION_IF_NULL(bias);
+  MS_EXCEPTION_IF_NULL(mean);
+  MS_EXCEPTION_IF_NULL(var);
+
+  auto prim = std::make_shared<Primitive>(kFusedBatchNormExWithActivation);
+  MS_EXCEPTION_IF_NULL(prim);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(prim), x, scale, bias, mean, var};
+  auto fused_batch_norm_with_relu = graph->NewCNode(inputs);
+  MS_EXCEPTION_IF_NULL(fused_batch_norm_with_relu);
+
+  std::vector<TypeId> outputs_type;
+  std::vector<std::vector<size_t>> outputs_shape;
+  auto output_num = AnfAlgo::GetOutputTensorNum(batch_norm_ex);
+  for (size_t i = 0; i < output_num; i++) {
+    outputs_type.push_back(AnfAlgo::GetOutputInferDataType(batch_norm_ex, i));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(batch_norm_ex, i));
+  }
+  AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_with_relu.get());
+  AnfAlgo::CopyNodeAttrs(batch_norm_ex, fused_batch_norm_with_relu);
+
+  auto manager = graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  manager->Replace(batch_norm_ex, fused_batch_norm_with_relu);
+  return tuple_get_item;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_fusion.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_
+
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class BatchNormReluFusion : public PatternProcessPass {
+ public:
+  explicit BatchNormReluFusion(bool multigraph = true) : PatternProcessPass("batch_norm_relu_fusion", multigraph) {
+    x_ = std::make_shared<Var>();
+    scale_ = std::make_shared<Var>();
+    bias_ = std::make_shared<Var>();
+    mean_ = std::make_shared<Var>();
+    var_ = std::make_shared<Var>();
+    index_ = std::make_shared<Var>();
+  }
+  ~BatchNormReluFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  VarPtr x_;
+  VarPtr scale_;
+  VarPtr bias_;
+  VarPtr mean_;
+  VarPtr var_;
+  VarPtr index_;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_FUSION_H_
--- a/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/gpu/batch_norm_relu_grad_fusion.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+const BaseRef BatchNormReluGradFusion::DefinePattern() const {
+  VectorRef relu_grad = VectorRef({prim::kPrimReluGrad, dy_, y_});
+  VectorRef batch_norm_grad =
+    VectorRef({prim::kPrimFusedBatchNormGradEx, relu_grad, x_, scale_, save_mean_, save_var_, reserve_});
+  return batch_norm_grad;
+}
+
+const AnfNodePtr BatchNormReluGradFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                                  const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+
+  if (AnfAlgo::GetOutputInferDataType(node, 0) != kNumberTypeFloat16) {
+    return nullptr;
+  }
+
+  auto relu_grad = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
+  MS_EXCEPTION_IF_NULL(relu_grad);
+
+  auto outlist = GetRealNodeUsedList(graph, relu_grad);
+  if (outlist->size() >= 2) {
+    return nullptr;
+  }
+
+  auto dy = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 0);
+  MS_EXCEPTION_IF_NULL(dy);
+  auto y = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(relu_grad), 1);
+  MS_EXCEPTION_IF_NULL(y);
+  auto x = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 1);
+  MS_EXCEPTION_IF_NULL(x);
+  auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 2);
+  MS_EXCEPTION_IF_NULL(scale);
+  auto save_mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 3);
+  MS_EXCEPTION_IF_NULL(save_mean);
+  auto save_var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 4);
+  MS_EXCEPTION_IF_NULL(save_var);
+  auto reserve = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 5);
+  MS_EXCEPTION_IF_NULL(reserve);
+  auto batch_norm = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(save_mean), 0);
+  MS_EXCEPTION_IF_NULL(batch_norm);
+  auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(batch_norm), 2);
+  MS_EXCEPTION_IF_NULL(bias);
+
+  auto prim = std::make_shared<Primitive>(kFusedBatchNormGradExWithActivation);
+  MS_EXCEPTION_IF_NULL(prim);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(prim), dy, x, scale, save_mean, save_var, reserve, bias, y};
+  auto fused_batch_norm_grad_with_relu = graph->NewCNode(inputs);
+  MS_EXCEPTION_IF_NULL(fused_batch_norm_grad_with_relu);
+
+  std::vector<TypeId> outputs_type;
+  std::vector<std::vector<size_t>> outputs_shape;
+  auto output_num = AnfAlgo::GetOutputTensorNum(node);
+  for (size_t i = 0; i < output_num; i++) {
+    outputs_type.push_back(AnfAlgo::GetOutputInferDataType(node, i));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(node, i));
+  }
+  AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fused_batch_norm_grad_with_relu.get());
+  AnfAlgo::CopyNodeAttrs(node, fused_batch_norm_grad_with_relu);
+  return fused_batch_norm_grad_with_relu;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/batch_norm_relu_grad_fusion.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_
+
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class BatchNormReluGradFusion : public PatternProcessPass {
+ public:
+  explicit BatchNormReluGradFusion(bool multigraph = true)
+      : PatternProcessPass("batch_norm_relu_grad_fusion", multigraph) {
+    dy_ = std::make_shared<Var>();
+    y_ = std::make_shared<Var>();
+    x_ = std::make_shared<Var>();
+    scale_ = std::make_shared<Var>();
+    save_mean_ = std::make_shared<Var>();
+    save_var_ = std::make_shared<Var>();
+    reserve_ = std::make_shared<Var>();
+  }
+  ~BatchNormReluGradFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  VarPtr dy_;
+  VarPtr y_;
+  VarPtr x_;
+  VarPtr scale_;
+  VarPtr save_mean_;
+  VarPtr save_var_;
+  VarPtr reserve_;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_BATCH_NORM_RELU_GRAD_FUSION_H_
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -117,8 +117,10 @@ inline const PrimitivePtr kPrimApplyCenteredRMSProp = std::make_shared<Primitive
 inline const PrimitivePtr kPrimAvgPoolGrad = std::make_shared<Primitive>("AvgPoolGrad");
 inline const PrimitivePtr kPrimAvgPoolGradVm = std::make_shared<Primitive>("AvgPoolGradVm");
 inline const PrimitivePtr kPrimFusedBatchNorm = std::make_shared<Primitive>("FusedBatchNorm");
+inline const PrimitivePtr kPrimFusedBatchNormEx = std::make_shared<Primitive>("FusedBatchNormEx");
 inline const PrimitivePtr kPrimConv2D = std::make_shared<Primitive>("Conv2D");
 inline const PrimitivePtr kPrimFusedBatchNormGrad = std::make_shared<Primitive>("FusedBatchNormGrad");
+inline const PrimitivePtr kPrimFusedBatchNormGradEx = std::make_shared<Primitive>("FusedBatchNormGradEx");
 inline const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm");
 inline const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad");
 inline const PrimitivePtr kPrimReluGrad = std::make_shared<Primitive>("ReluGrad");