!3090 GPU add fusion

Merge pull request !3090 from VectorSL/batchnorm-cast

!3090 GPU add fusion
Merge pull request !3090 from VectorSL/batchnorm-cast
0c781152 · mindspore-ci-bot · Gitee · 526770e0 · 072b09b3 · 0c781152
6 changed file
--- a/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_fusion.h
@@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_
-#define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_FUSION_H_

 #include <memory>
 #include "backend/optimizer/common/optimizer.h"
@@ -53,4 +53,4 @@ class AdamFusion : public PatternProcessPass {
 };
 }  // namespace opt
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_FUSION_H_
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_FUSION_H_
--- a/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/adam_weight_decay_fusion.h
@@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_
-#define MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_WEIGHT_DECAY_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_WEIGHT_DECAY_FUSION_H_

 #include <memory>
 #include "backend/optimizer/common/optimizer.h"
@@ -55,4 +55,4 @@ class AdamWeightDecayFusion : public PatternProcessPass {
 };
 }  // namespace opt
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_GPU_IR_FUSION_ADAM_WEIGHT_DECAY_FUSION_H_
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_ADAM_WEIGHT_DECAY_FUSION_H_
--- a/mindspore/ccsrc/backend/optimizer/gpu/replace_addn_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/replace_addn_fusion.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/gpu/replace_addn_fusion.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+const BaseRef ReplaceAddNFusion::DefinePattern() const {
+  VectorRef addn = VectorRef({prim::kPrimAddN, A, B});
+  return addn;
+}
+
+const AnfNodePtr ReplaceAddNFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                            const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(equiv);
+
+  auto A = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
+  auto B = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 1);
+  MS_EXCEPTION_IF_NULL(A);
+  MS_EXCEPTION_IF_NULL(B);
+  int num_input = AnfAlgo::GetNodeAttr<int>(node, "n");
+
+  if (num_input == 2) {
+    auto prim = std::make_shared<Primitive>(prim::kPrimTensorAdd->name());
+    MS_EXCEPTION_IF_NULL(prim);
+    std::vector<AnfNodePtr> inputs = {NewValueNode(prim), A, B};
+    auto add_new = graph->NewCNode(inputs);
+    std::vector<TypeId> outputs_type;
+    std::vector<std::vector<size_t>> outputs_shape;
+    outputs_type.push_back(AnfAlgo::GetOutputInferDataType(A, 0));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(A, 0));
+    AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, add_new.get());
+    auto manager = graph->manager();
+    MS_EXCEPTION_IF_NULL(manager);
+    manager->Replace(utils::cast<CNodePtr>(node), utils::cast<CNodePtr>(add_new));
+    return add_new;
+  } else {
+    return nullptr;
+  }
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/gpu/replace_addn_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/replace_addn_fusion.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_ADDN_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_ADDN_FUSION_H_
+
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class ReplaceAddNFusion : public PatternProcessPass {
+ public:
+  explicit ReplaceAddNFusion(bool multigraph = true) : PatternProcessPass("replace_addn", multigraph) {
+    A = std::make_shared<Var>();
+    B = std::make_shared<Var>();
+  }
+  ~ReplaceAddNFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  VarPtr A;
+  VarPtr B;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_ADDN_FUSION_H_
--- a/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.cc
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/gpu/replace_bn_cast_fusion.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+
+#include "backend/session/anf_runtime_algorithm.h"
+#include "ir/primitive.h"
+#include "utils/utils.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+const BaseRef ReplaceBNCastFusion::DefinePattern() const {
+  VectorRef in_cast = VectorRef({prim::kPrimCast, x_});
+  VectorRef fbn2 = VectorRef({prim::kPrimFusedBatchNorm, in_cast, scale_, bias_, mean_, var_});
+  VectorRef tupleget = VectorRef({prim::kPrimTupleGetItem, fbn2, index_});
+  VectorRef out_cast = VectorRef({prim::kPrimCast, tupleget});
+  return out_cast;
+}
+
+const AnfNodePtr ReplaceBNCastFusion::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
+                                              const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(equiv);
+
+  auto tuple = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(node), 0);
+  auto index_node = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple), 1);
+  MS_EXCEPTION_IF_NULL(index_node);
+  auto value_node = index_node->cast<ValueNodePtr>();
+  MS_EXCEPTION_IF_NULL(value_node);
+  int item_idx = GetValue<int>(value_node->value());
+
+  auto fbn2 = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(tuple), 0);
+  auto x_after = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 0);
+  auto x_before = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(x_after), 0);
+  if (item_idx != 0) {
+    return nullptr;
+  }
+  auto scale = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 1);
+  auto bias = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 2);
+  auto mean = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 3);
+  auto var = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(fbn2), 4);
+
+  MS_EXCEPTION_IF_NULL(fbn2);
+  MS_EXCEPTION_IF_NULL(x_after);
+  MS_EXCEPTION_IF_NULL(x_before);
+  MS_EXCEPTION_IF_NULL(scale);
+  MS_EXCEPTION_IF_NULL(bias);
+  MS_EXCEPTION_IF_NULL(mean);
+  MS_EXCEPTION_IF_NULL(var);
+
+  auto manager = graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  manager->Replace(utils::cast<CNodePtr>(x_after), utils::cast<CNodePtr>(x_before));
+  manager->Replace(utils::cast<CNodePtr>(node), utils::cast<CNodePtr>(tuple));
+
+  std::vector<TypeId> outputs_type;
+  std::vector<std::vector<size_t>> outputs_shape;
+  auto output_num = AnfAlgo::GetOutputTensorNum(fbn2);
+  for (size_t i = 0; i < output_num; i++) {
+    outputs_type.push_back(AnfAlgo::GetOutputInferDataType(fbn2, i));
+    outputs_shape.push_back(AnfAlgo::GetOutputInferShape(fbn2, i));
+  }
+  outputs_type[0] = kNumberTypeFloat16;
+  AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, fbn2.get());
+
+  outputs_type.clear();
+  outputs_shape.clear();
+  outputs_type.push_back(kNumberTypeFloat16);
+  outputs_shape.push_back(AnfAlgo::GetOutputInferShape(tuple, 0));
+  AnfAlgo::SetOutputInferTypeAndShape(outputs_type, outputs_shape, tuple.get());
+  return tuple;
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.h
+++ b/mindspore/ccsrc/backend/optimizer/gpu/replace_bn_cast_fusion.h
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_BN_CAST_FUSION_H_
+#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_BN_CAST_FUSION_H_
+
+#include <memory>
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class ReplaceBNCastFusion : public PatternProcessPass {
+ public:
+  explicit ReplaceBNCastFusion(bool multigraph = true) : PatternProcessPass("replace_bn_cast", multigraph) {
+    x_ = std::make_shared<Var>();
+    scale_ = std::make_shared<Var>();
+    bias_ = std::make_shared<Var>();
+    mean_ = std::make_shared<Var>();
+    var_ = std::make_shared<Var>();
+    y_ = std::make_shared<Var>();
+    running_mean_ = std::make_shared<Var>();
+    running_var_ = std::make_shared<Var>();
+    save_mean_ = std::make_shared<Var>();
+    save_var_ = std::make_shared<Var>();
+    index_ = std::make_shared<Var>();
+  }
+  ~ReplaceBNCastFusion() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  VarPtr x_;
+  VarPtr scale_;
+  VarPtr bias_;
+  VarPtr mean_;
+  VarPtr var_;
+  VarPtr y_;
+  VarPtr running_mean_;
+  VarPtr running_var_;
+  VarPtr save_mean_;
+  VarPtr save_var_;
+  VarPtr index_;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_GPU_REPLACE_BN_CAST_FUSION_H_