diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index d19b163817e41e9b2fad42916d84778192edcb08..8569a3bb6151f71c57a016892cef4881ff01308b 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -240,6 +240,7 @@ if(WITH_IPU) pass_library(infer_shape_pass base DIR ipu) pass_library(delete_scale_op_pass base DIR ipu) pass_library(avg_shard_pass base DIR ipu) + pass_library(inference_dtype_transfer_pass base DIR ipu) endif() cc_library( diff --git a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..f06f05e9f0242cd541b9a653c5e4d3ed69699aca --- /dev/null +++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.cc @@ -0,0 +1,104 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h" + +#include "paddle/fluid/platform/device/ipu/ipu_backend.h" + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/phi/common/place.h" + +namespace paddle { +namespace framework { +namespace ir { + +void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const { + VLOG(10) << "enter InferenceDtypeTransferPass::ApplyImpl"; + VLOG(10) << "Raw Graph: "; + VLOG(10) << DebugString(graph); + + auto* ipu_backend = platform::ipu::IpuBackend::GetInstance(); + auto enable_fp16 = ipu_backend->GetIpuStrategy()->enable_fp16; + + if (enable_fp16) { + VLOG(10) << "Transfer var to fp16..."; + auto* scope = ipu_backend->GetScope(); + + std::unordered_set used_var_names; + for (auto* node : graph->Nodes()) { + if (node->IsVar()) { + auto var_desc = node->Var(); + if (var_desc->GetDataType() == proto::VarType::FP32) { + // Transfer the dtypes of var_desc + var_desc->SetDataType(proto::VarType::FP16); + VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to " + << var_desc->GetDataType(); + + if (node->inputs.empty() && node->Var()->Persistable() && + scope->FindVar(var_desc->Name()) && + used_var_names.find(var_desc->Name()) == used_var_names.end()) { + // Transfer the dtypes of weight tensors + std::vector fp16_data; + auto* tensor = scope->FindVar(var_desc->Name()) + ->GetMutable(); + auto* data_ptr = tensor->data(); + auto num_elem = tensor->numel(); + + std::transform(data_ptr, + data_ptr + num_elem, + std::back_inserter(fp16_data), + [&](float elem) { return float16(elem); }); + memcpy(reinterpret_cast(data_ptr), + fp16_data.data(), + num_elem * sizeof(float16)); + tensor->set_type( + framework::TransToPhiDataType(proto::VarType::FP16)); + } + } + used_var_names.insert(var_desc->Name()); + } + if (node->IsOp()) { + auto* op_desc = node->Op(); + if (op_desc->Type() == "popart_cast") { + // Transfer the target dtype of cast Op + if (BOOST_GET_CONST(std::string, op_desc->GetAttr("to")) == "FLOAT") { + op_desc->SetAttr("to", std::string("FLOAT16")); + op_desc->Flush(); + } + } + if (op_desc->Type() == "popart_constant") { + // Transfer the dtype of fill_constant Op + if (op_desc->GetAttrIfExists("dtype") == 1) { + op_desc->SetAttr("dtype", 10); + op_desc->Flush(); + } + } + } + } + VLOG(10) << "Transfer var to fp16...Done"; + } + + VLOG(10) << "Post Graph: "; + VLOG(10) << DebugString(graph); + VLOG(10) << "leave InferenceDtypeTransferPass::ApplyImpl"; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(inference_dtype_transfer_pass, + paddle::framework::ir::InferenceDtypeTransferPass); diff --git a/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..3111968ea2bba20723f242db2898406d242dbbb5 --- /dev/null +++ b/paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class InferenceDtypeTransferPass : public Pass { + protected: + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc index 8357ec05c24f60196ea5308dd72c3e7cf5188d0c..1ef03b1bd9cfbdcdcdacb09a28e9f0bd36a458f4 100644 --- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc @@ -90,6 +90,9 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { ipu_strategy_instance_->available_memory_proportion = graph->Get("available_memory_proportion"); + // Set tiles_per_ipu for IPUMODEL + ipu_strategy_instance_->tiles_per_ipu = 128; + ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get())); // Get feed_list and fetch list @@ -124,7 +127,8 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const { std::vector graph_pass = {"forward_graph_extract_pass", "infer_shape_pass", "avg_shard_pass", - "popart_canonicalization_pass"}; + "popart_canonicalization_pass", + "inference_dtype_transfer_pass"}; std::vector compile_pass = {"ipu_inplace_pass", "ipu_graph_builder_pass", "ipu_runtime_replacer_pass", diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index 330ddef577ef20f85805311d2640fe5ada1d0145..74b216f4e0f5864a2b27ba5af97b2af2a3f49cbd 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -111,7 +111,13 @@ struct ConstantOpAttrVisitor { framework::TensorFromVector(vec, tensor_); } void operator()(const std::vector& vec) const { - framework::TensorFromVector(vec, tensor_); + // popart do not support float64 constant + std::vector vec_fp32; + std::transform(vec.begin(), + vec.end(), + std::back_inserter(vec_fp32), + [](double f) -> float { return static_cast(f); }); + framework::TensorFromVector(vec_fp32, tensor_); } #define RAISE_ERROR \ PADDLE_THROW( \ @@ -416,7 +422,7 @@ void Compiler::LowerWeights(const Scope* scope) { auto* node = graph_helper_->nodes_id_map[id]; // Weights are var node and Persistable if (node->IsVar() && !node->IsCtrlVar() && node->Var() && - node->Var()->Persistable()) { + node->Var()->Persistable() && node->inputs.empty()) { // Weights are Parameter in training mode if (ipu_strategy_->is_training && !node->Var()->IsParameter()) { continue; diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index 4db25e880f3a92b878a0a4de7cfa86e314b3f703..3cd4a12b378a383dc30894d556548b6b630d4b00 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -257,6 +257,7 @@ void Executor::AcquireDevice() { "numIPUs", std::to_string(ipu_strategy_->num_ipus), }, + {"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)}, {"ipuVersion", "ipu2"}, }; device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice( @@ -269,6 +270,7 @@ void Executor::AcquireDevice() { "numIPUs", std::to_string(ipu_strategy_->num_ipus), }, + {"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)}, {"ipuVersion", "ipu2"}, }; device_ = diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.cc b/paddle/fluid/platform/device/ipu/ipu_strategy.cc index eeffd0a36e015347d04880be0632428c2fc54cf0..e7d53c751f2b920e0602ebb2423af55c6825b32b 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.cc +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.cc @@ -91,6 +91,7 @@ IpuStrategy::IpuStrategy() { ADD_UINT64_OPTION(batches_per_step); ADD_UINT64_OPTION(micro_batch_size); ADD_UINT64_OPTION(random_seed); + ADD_UINT64_OPTION(tiles_per_ipu); ADD_DOUBLE_OPTION(available_memory_proportion); ADD_DOUBLE_OPTION(loss_scaling); ADD_DOUBLE_OPTION(max_weight_norm); diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index 1fdde59cf856c53dfa49d84df312c64ccc1a41f6..9ae54108ac5282dfaf838c830f1229af51d92775 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -41,7 +41,7 @@ class IpuStrategy { // Average sharding, debugging used bool need_avg_shard = false; - // Flag for fp16, true for pure fp16 + // Flag for fp16, true for inference with pure fp16 bool enable_fp16 = false; // The mode of Adam/Lamb optimizer @@ -64,6 +64,9 @@ class IpuStrategy { // Micro batch-size int micro_batch_size = 1; + // The number of virtual tiles for IPUMODEL + int tiles_per_ipu = 4; + // Random seed std::uint64_t random_seed = std::numeric_limits::max();