update paddle inference fp16 mode (#44014)

a42f48bd · yaozhixin · GitHub · 01fedf4f · a42f48bd · a42f48bd
8 changed file
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -240,6 +240,7 @@ if(WITH_IPU)
  pass_library(infer_shape_pass base DIR ipu)
  pass_library(delete_scale_op_pass base DIR ipu)
  pass_library(avg_shard_pass base DIR ipu)
+  pass_library(inference_dtype_transfer_pass base DIR ipu)
 endif()

 cc_library(

--- a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc
+++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h"
+
+#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
+
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/ir/graph_helper.h"
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/phi/common/place.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
+  VLOG(10) << "enter InferenceDtypeTransferPass::ApplyImpl";
+  VLOG(10) << "Raw Graph: ";
+  VLOG(10) << DebugString(graph);
+
+  auto* ipu_backend = platform::ipu::IpuBackend::GetInstance();
+  auto enable_fp16 = ipu_backend->GetIpuStrategy()->enable_fp16;
+
+  if (enable_fp16) {
+    VLOG(10) << "Transfer var to fp16...";
+    auto* scope = ipu_backend->GetScope();
+
+    std::unordered_set<std::string> used_var_names;
+    for (auto* node : graph->Nodes()) {
+      if (node->IsVar()) {
+        auto var_desc = node->Var();
+        if (var_desc->GetDataType() == proto::VarType::FP32) {
+          // Transfer the dtypes of var_desc
+          var_desc->SetDataType(proto::VarType::FP16);
+          VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to "
+                   << var_desc->GetDataType();
+
+          if (node->inputs.empty() && node->Var()->Persistable() &&
+              scope->FindVar(var_desc->Name()) &&
+              used_var_names.find(var_desc->Name()) == used_var_names.end()) {
+            // Transfer the dtypes of weight tensors
+            std::vector<float16> fp16_data;
+            auto* tensor = scope->FindVar(var_desc->Name())
+                               ->GetMutable<framework::LoDTensor>();
+            auto* data_ptr = tensor->data<float>();
+            auto num_elem = tensor->numel();
+
+            std::transform(data_ptr,
+                           data_ptr + num_elem,
+                           std::back_inserter(fp16_data),
+                           [&](float elem) { return float16(elem); });
+            memcpy(reinterpret_cast<void*>(data_ptr),
+                   fp16_data.data(),
+                   num_elem * sizeof(float16));
+            tensor->set_type(
+                framework::TransToPhiDataType(proto::VarType::FP16));
+          }
+        }
+        used_var_names.insert(var_desc->Name());
+      }
+      if (node->IsOp()) {
+        auto* op_desc = node->Op();
+        if (op_desc->Type() == "popart_cast") {
+          // Transfer the target dtype of cast Op
+          if (BOOST_GET_CONST(std::string, op_desc->GetAttr("to")) == "FLOAT") {
+            op_desc->SetAttr("to", std::string("FLOAT16"));
+            op_desc->Flush();
+          }
+        }
+        if (op_desc->Type() == "popart_constant") {
+          // Transfer the dtype of fill_constant Op
+          if (op_desc->GetAttrIfExists<int>("dtype") == 1) {
+            op_desc->SetAttr("dtype", 10);
+            op_desc->Flush();
+          }
+        }
+      }
+    }
+    VLOG(10) << "Transfer var to fp16...Done";
+  }
+
+  VLOG(10) << "Post Graph: ";
+  VLOG(10) << DebugString(graph);
+  VLOG(10) << "leave InferenceDtypeTransferPass::ApplyImpl";
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(inference_dtype_transfer_pass,
+              paddle::framework::ir::InferenceDtypeTransferPass);
--- a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h
+++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/fluid/framework/ir/pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class InferenceDtypeTransferPass : public Pass {
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc
+++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc
@@ -90,6 +90,9 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
  ipu_strategy_instance_->available_memory_proportion =
      graph->Get<float>("available_memory_proportion");

+  // Set tiles_per_ipu for IPUMODEL
+  ipu_strategy_instance_->tiles_per_ipu = 128;
+
  ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));

  // Get feed_list and fetch list
@@ -124,7 +127,8 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
  std::vector<std::string> graph_pass = {"forward_graph_extract_pass",
                                         "infer_shape_pass",
                                         "avg_shard_pass",
-                                         "popart_canonicalization_pass"};
+                                         "popart_canonicalization_pass",
+                                         "inference_dtype_transfer_pass"};
  std::vector<std::string> compile_pass = {"ipu_inplace_pass",
                                           "ipu_graph_builder_pass",
                                           "ipu_runtime_replacer_pass",

--- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
@@ -111,7 +111,13 @@ struct ConstantOpAttrVisitor {
    framework::TensorFromVector<int64_t>(vec, tensor_);
  }
  void operator()(const std::vector<double>& vec) const {
-    framework::TensorFromVector<double>(vec, tensor_);
+    // popart do not support float64 constant
+    std::vector<float> vec_fp32;
+    std::transform(vec.begin(),
+                   vec.end(),
+                   std::back_inserter(vec_fp32),
+                   [](double f) -> float { return static_cast<float>(f); });
+    framework::TensorFromVector<float>(vec_fp32, tensor_);
  }
 #define RAISE_ERROR \
  PADDLE_THROW(     \
@@ -416,7 +422,7 @@ void Compiler::LowerWeights(const Scope* scope) {
    auto* node = graph_helper_->nodes_id_map[id];
    // Weights are var node and Persistable
    if (node->IsVar() && !node->IsCtrlVar() && node->Var() &&
-        node->Var()->Persistable()) {
+        node->Var()->Persistable() && node->inputs.empty()) {
      // Weights are Parameter in training mode
      if (ipu_strategy_->is_training && !node->Var()->IsParameter()) {
        continue;

--- a/paddle/fluid/platform/device/ipu/ipu_executor.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc
@@ -257,6 +257,7 @@ void Executor::AcquireDevice() {
            "numIPUs",
            std::to_string(ipu_strategy_->num_ipus),
        },
+        {"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)},
        {"ipuVersion", "ipu2"},
    };
    device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
@@ -269,6 +270,7 @@ void Executor::AcquireDevice() {
            "numIPUs",
            std::to_string(ipu_strategy_->num_ipus),
        },
+        {"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)},
        {"ipuVersion", "ipu2"},
    };
    device_ =

--- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc
@@ -91,6 +91,7 @@ IpuStrategy::IpuStrategy() {
  ADD_UINT64_OPTION(batches_per_step);
  ADD_UINT64_OPTION(micro_batch_size);
  ADD_UINT64_OPTION(random_seed);
+  ADD_UINT64_OPTION(tiles_per_ipu);
  ADD_DOUBLE_OPTION(available_memory_proportion);
  ADD_DOUBLE_OPTION(loss_scaling);
  ADD_DOUBLE_OPTION(max_weight_norm);

--- a/paddle/fluid/platform/device/ipu/ipu_strategy.h
+++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h
@@ -41,7 +41,7 @@ class IpuStrategy {
  // Average sharding, debugging used
  bool need_avg_shard = false;

-  // Flag for fp16, true for pure fp16
+  // Flag for fp16, true for inference with pure fp16
  bool enable_fp16 = false;

  // The mode of Adam/Lamb optimizer
@@ -64,6 +64,9 @@ class IpuStrategy {
  // Micro batch-size
  int micro_batch_size = 1;

+  // The number of virtual tiles for IPUMODEL
+  int tiles_per_ipu = 4;
+
  // Random seed
  std::uint64_t random_seed = std::numeric_limits<std::uint64_t>::max();