From a42f48bd5d4da5010dff57dca2456604a5cfa4b3 Mon Sep 17 00:00:00 2001
From: yaozhixin <zhixiny@graphcore.ai>
Date: Mon, 4 Jul 2022 11:21:35 +0800
Subject: [PATCH] update paddle inference fp16 mode (#44014)

---
 paddle/fluid/framework/ir/CMakeLists.txt      |   1 +
 .../ir/ipu/inference_dtype_transfer_pass.cc   | 104 ++++++++++++++++++
 .../ir/ipu/inference_dtype_transfer_pass.h    |  30 +++++
 .../ir/ipu/inference_process_pass.cc          |   6 +-
 .../fluid/platform/device/ipu/ipu_compiler.cc |  10 +-
 .../fluid/platform/device/ipu/ipu_executor.cc |   2 +
 .../fluid/platform/device/ipu/ipu_strategy.cc |   1 +
 .../fluid/platform/device/ipu/ipu_strategy.h  |   5 +-
 8 files changed, 155 insertions(+), 4 deletions(-)
 create mode 100644 paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc
 create mode 100644 paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index d19b163817e..8569a3bb615 100755
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -240,6 +240,7 @@ if(WITH_IPU)
   pass_library(infer_shape_pass base DIR ipu)
   pass_library(delete_scale_op_pass base DIR ipu)
   pass_library(avg_shard_pass base DIR ipu)
+  pass_library(inference_dtype_transfer_pass base DIR ipu)
 endif()
 
 cc_library(
diff --git a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc
new file mode 100644
index 00000000000..f06f05e9f02
--- /dev/null
+++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc
@@ -0,0 +1,104 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h"
+
+#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
+
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/ir/graph_helper.h"
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/phi/common/place.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
+  VLOG(10) << "enter InferenceDtypeTransferPass::ApplyImpl";
+  VLOG(10) << "Raw Graph: ";
+  VLOG(10) << DebugString(graph);
+
+  auto* ipu_backend = platform::ipu::IpuBackend::GetInstance();
+  auto enable_fp16 = ipu_backend->GetIpuStrategy()->enable_fp16;
+
+  if (enable_fp16) {
+    VLOG(10) << "Transfer var to fp16...";
+    auto* scope = ipu_backend->GetScope();
+
+    std::unordered_set<std::string> used_var_names;
+    for (auto* node : graph->Nodes()) {
+      if (node->IsVar()) {
+        auto var_desc = node->Var();
+        if (var_desc->GetDataType() == proto::VarType::FP32) {
+          // Transfer the dtypes of var_desc
+          var_desc->SetDataType(proto::VarType::FP16);
+          VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to "
+                   << var_desc->GetDataType();
+
+          if (node->inputs.empty() && node->Var()->Persistable() &&
+              scope->FindVar(var_desc->Name()) &&
+              used_var_names.find(var_desc->Name()) == used_var_names.end()) {
+            // Transfer the dtypes of weight tensors
+            std::vector<float16> fp16_data;
+            auto* tensor = scope->FindVar(var_desc->Name())
+                               ->GetMutable<framework::LoDTensor>();
+            auto* data_ptr = tensor->data<float>();
+            auto num_elem = tensor->numel();
+
+            std::transform(data_ptr,
+                           data_ptr + num_elem,
+                           std::back_inserter(fp16_data),
+                           [&](float elem) { return float16(elem); });
+            memcpy(reinterpret_cast<void*>(data_ptr),
+                   fp16_data.data(),
+                   num_elem * sizeof(float16));
+            tensor->set_type(
+                framework::TransToPhiDataType(proto::VarType::FP16));
+          }
+        }
+        used_var_names.insert(var_desc->Name());
+      }
+      if (node->IsOp()) {
+        auto* op_desc = node->Op();
+        if (op_desc->Type() == "popart_cast") {
+          // Transfer the target dtype of cast Op
+          if (BOOST_GET_CONST(std::string, op_desc->GetAttr("to")) == "FLOAT") {
+            op_desc->SetAttr("to", std::string("FLOAT16"));
+            op_desc->Flush();
+          }
+        }
+        if (op_desc->Type() == "popart_constant") {
+          // Transfer the dtype of fill_constant Op
+          if (op_desc->GetAttrIfExists<int>("dtype") == 1) {
+            op_desc->SetAttr("dtype", 10);
+            op_desc->Flush();
+          }
+        }
+      }
+    }
+    VLOG(10) << "Transfer var to fp16...Done";
+  }
+
+  VLOG(10) << "Post Graph: ";
+  VLOG(10) << DebugString(graph);
+  VLOG(10) << "leave InferenceDtypeTransferPass::ApplyImpl";
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(inference_dtype_transfer_pass,
+              paddle::framework::ir::InferenceDtypeTransferPass);
diff --git a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h
new file mode 100644
index 00000000000..3111968ea2b
--- /dev/null
+++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class InferenceDtypeTransferPass : public Pass {
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc
index 8357ec05c24..1ef03b1bd9c 100644
--- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc
+++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc
@@ -90,6 +90,9 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
   ipu_strategy_instance_->available_memory_proportion =
       graph->Get<float>("available_memory_proportion");
 
+  // Set tiles_per_ipu for IPUMODEL
+  ipu_strategy_instance_->tiles_per_ipu = 128;
+
   ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));
 
   // Get feed_list and fetch list
@@ -124,7 +127,8 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
   std::vector<std::string> graph_pass = {"forward_graph_extract_pass",
                                          "infer_shape_pass",
                                          "avg_shard_pass",
-                                         "popart_canonicalization_pass"};
+                                         "popart_canonicalization_pass",
+                                         "inference_dtype_transfer_pass"};
   std::vector<std::string> compile_pass = {"ipu_inplace_pass",
                                            "ipu_graph_builder_pass",
                                            "ipu_runtime_replacer_pass",
diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
index 330ddef577e..74b216f4e0f 100644
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
@@ -111,7 +111,13 @@ struct ConstantOpAttrVisitor {
     framework::TensorFromVector<int64_t>(vec, tensor_);
   }
   void operator()(const std::vector<double>& vec) const {
-    framework::TensorFromVector<double>(vec, tensor_);
+    // popart do not support float64 constant
+    std::vector<float> vec_fp32;
+    std::transform(vec.begin(),
+                   vec.end(),
+                   std::back_inserter(vec_fp32),
+                   [](double f) -> float { return static_cast<float>(f); });
+    framework::TensorFromVector<float>(vec_fp32, tensor_);
   }
 #define RAISE_ERROR \
   PADDLE_THROW(     \
@@ -416,7 +422,7 @@ void Compiler::LowerWeights(const Scope* scope) {
     auto* node = graph_helper_->nodes_id_map[id];
     // Weights are var node and Persistable
     if (node->IsVar() && !node->IsCtrlVar() && node->Var() &&
-        node->Var()->Persistable()) {
+        node->Var()->Persistable() && node->inputs.empty()) {
       // Weights are Parameter in training mode
       if (ipu_strategy_->is_training && !node->Var()->IsParameter()) {
         continue;
diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc
index 4db25e880f3..3cd4a12b378 100644
--- a/paddle/fluid/platform/device/ipu/ipu_executor.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc
@@ -257,6 +257,7 @@ void Executor::AcquireDevice() {
             "numIPUs",
             std::to_string(ipu_strategy_->num_ipus),
         },
+        {"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)},
         {"ipuVersion", "ipu2"},
     };
     device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
@@ -269,6 +270,7 @@ void Executor::AcquireDevice() {
             "numIPUs",
             std::to_string(ipu_strategy_->num_ipus),
         },
+        {"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)},
         {"ipuVersion", "ipu2"},
     };
     device_ =
diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
index eeffd0a36e0..e7d53c751f2 100644
--- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
@@ -91,6 +91,7 @@ IpuStrategy::IpuStrategy() {
   ADD_UINT64_OPTION(batches_per_step);
   ADD_UINT64_OPTION(micro_batch_size);
   ADD_UINT64_OPTION(random_seed);
+  ADD_UINT64_OPTION(tiles_per_ipu);
   ADD_DOUBLE_OPTION(available_memory_proportion);
   ADD_DOUBLE_OPTION(loss_scaling);
   ADD_DOUBLE_OPTION(max_weight_norm);
diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h
index 1fdde59cf85..9ae54108ac5 100644
--- a/paddle/fluid/platform/device/ipu/ipu_strategy.h
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h
@@ -41,7 +41,7 @@ class IpuStrategy {
   // Average sharding, debugging used
   bool need_avg_shard = false;
 
-  // Flag for fp16, true for pure fp16
+  // Flag for fp16, true for inference with pure fp16
   bool enable_fp16 = false;
 
   // The mode of Adam/Lamb optimizer
@@ -64,6 +64,9 @@ class IpuStrategy {
   // Micro batch-size
   int micro_batch_size = 1;
 
+  // The number of virtual tiles for IPUMODEL
+  int tiles_per_ipu = 4;
+
   // Random seed
   std::uint64_t random_seed = std::numeric_limits<std::uint64_t>::max();
 
-- 
GitLab