未验证 提交 a42f48bd 编写于 作者: Y yaozhixin 提交者: GitHub

update paddle inference fp16 mode (#44014)

上级 01fedf4f
......@@ -240,6 +240,7 @@ if(WITH_IPU)
pass_library(infer_shape_pass base DIR ipu)
pass_library(delete_scale_op_pass base DIR ipu)
pass_library(avg_shard_pass base DIR ipu)
pass_library(inference_dtype_transfer_pass base DIR ipu)
endif()
cc_library(
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/ipu/inference_dtype_transfer_pass.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace framework {
namespace ir {
void InferenceDtypeTransferPass::ApplyImpl(ir::Graph* graph) const {
VLOG(10) << "enter InferenceDtypeTransferPass::ApplyImpl";
VLOG(10) << "Raw Graph: ";
VLOG(10) << DebugString(graph);
auto* ipu_backend = platform::ipu::IpuBackend::GetInstance();
auto enable_fp16 = ipu_backend->GetIpuStrategy()->enable_fp16;
if (enable_fp16) {
VLOG(10) << "Transfer var to fp16...";
auto* scope = ipu_backend->GetScope();
std::unordered_set<std::string> used_var_names;
for (auto* node : graph->Nodes()) {
if (node->IsVar()) {
auto var_desc = node->Var();
if (var_desc->GetDataType() == proto::VarType::FP32) {
// Transfer the dtypes of var_desc
var_desc->SetDataType(proto::VarType::FP16);
VLOG(10) << "Transfer the VarDesc of " << var_desc->Name() << " to "
<< var_desc->GetDataType();
if (node->inputs.empty() && node->Var()->Persistable() &&
scope->FindVar(var_desc->Name()) &&
used_var_names.find(var_desc->Name()) == used_var_names.end()) {
// Transfer the dtypes of weight tensors
std::vector<float16> fp16_data;
auto* tensor = scope->FindVar(var_desc->Name())
->GetMutable<framework::LoDTensor>();
auto* data_ptr = tensor->data<float>();
auto num_elem = tensor->numel();
std::transform(data_ptr,
data_ptr + num_elem,
std::back_inserter(fp16_data),
[&](float elem) { return float16(elem); });
memcpy(reinterpret_cast<void*>(data_ptr),
fp16_data.data(),
num_elem * sizeof(float16));
tensor->set_type(
framework::TransToPhiDataType(proto::VarType::FP16));
}
}
used_var_names.insert(var_desc->Name());
}
if (node->IsOp()) {
auto* op_desc = node->Op();
if (op_desc->Type() == "popart_cast") {
// Transfer the target dtype of cast Op
if (BOOST_GET_CONST(std::string, op_desc->GetAttr("to")) == "FLOAT") {
op_desc->SetAttr("to", std::string("FLOAT16"));
op_desc->Flush();
}
}
if (op_desc->Type() == "popart_constant") {
// Transfer the dtype of fill_constant Op
if (op_desc->GetAttrIfExists<int>("dtype") == 1) {
op_desc->SetAttr("dtype", 10);
op_desc->Flush();
}
}
}
}
VLOG(10) << "Transfer var to fp16...Done";
}
VLOG(10) << "Post Graph: ";
VLOG(10) << DebugString(graph);
VLOG(10) << "leave InferenceDtypeTransferPass::ApplyImpl";
}
} // namespace ir
} // namespace framework
} // namespace paddle
REGISTER_PASS(inference_dtype_transfer_pass,
paddle::framework::ir::InferenceDtypeTransferPass);
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/pass.h"
namespace paddle {
namespace framework {
namespace ir {
class InferenceDtypeTransferPass : public Pass {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};
} // namespace ir
} // namespace framework
} // namespace paddle
......@@ -90,6 +90,9 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
ipu_strategy_instance_->available_memory_proportion =
graph->Get<float>("available_memory_proportion");
// Set tiles_per_ipu for IPUMODEL
ipu_strategy_instance_->tiles_per_ipu = 128;
ipu_backend->SetIpuStrategy(*(ipu_strategy_instance_.get()));
// Get feed_list and fetch list
......@@ -124,7 +127,8 @@ void InferenceProcessPass::ApplyImpl(ir::Graph* graph) const {
std::vector<std::string> graph_pass = {"forward_graph_extract_pass",
"infer_shape_pass",
"avg_shard_pass",
"popart_canonicalization_pass"};
"popart_canonicalization_pass",
"inference_dtype_transfer_pass"};
std::vector<std::string> compile_pass = {"ipu_inplace_pass",
"ipu_graph_builder_pass",
"ipu_runtime_replacer_pass",
......
......@@ -111,7 +111,13 @@ struct ConstantOpAttrVisitor {
framework::TensorFromVector<int64_t>(vec, tensor_);
}
void operator()(const std::vector<double>& vec) const {
framework::TensorFromVector<double>(vec, tensor_);
// popart do not support float64 constant
std::vector<float> vec_fp32;
std::transform(vec.begin(),
vec.end(),
std::back_inserter(vec_fp32),
[](double f) -> float { return static_cast<float>(f); });
framework::TensorFromVector<float>(vec_fp32, tensor_);
}
#define RAISE_ERROR \
PADDLE_THROW( \
......@@ -416,7 +422,7 @@ void Compiler::LowerWeights(const Scope* scope) {
auto* node = graph_helper_->nodes_id_map[id];
// Weights are var node and Persistable
if (node->IsVar() && !node->IsCtrlVar() && node->Var() &&
node->Var()->Persistable()) {
node->Var()->Persistable() && node->inputs.empty()) {
// Weights are Parameter in training mode
if (ipu_strategy_->is_training && !node->Var()->IsParameter()) {
continue;
......
......@@ -257,6 +257,7 @@ void Executor::AcquireDevice() {
"numIPUs",
std::to_string(ipu_strategy_->num_ipus),
},
{"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)},
{"ipuVersion", "ipu2"},
};
device_ = popart::DeviceManager::createDeviceManager().createIpuModelDevice(
......@@ -269,6 +270,7 @@ void Executor::AcquireDevice() {
"numIPUs",
std::to_string(ipu_strategy_->num_ipus),
},
{"tilesPerIPU", std::to_string(ipu_strategy_->tiles_per_ipu)},
{"ipuVersion", "ipu2"},
};
device_ =
......
......@@ -91,6 +91,7 @@ IpuStrategy::IpuStrategy() {
ADD_UINT64_OPTION(batches_per_step);
ADD_UINT64_OPTION(micro_batch_size);
ADD_UINT64_OPTION(random_seed);
ADD_UINT64_OPTION(tiles_per_ipu);
ADD_DOUBLE_OPTION(available_memory_proportion);
ADD_DOUBLE_OPTION(loss_scaling);
ADD_DOUBLE_OPTION(max_weight_norm);
......
......@@ -41,7 +41,7 @@ class IpuStrategy {
// Average sharding, debugging used
bool need_avg_shard = false;
// Flag for fp16, true for pure fp16
// Flag for fp16, true for inference with pure fp16
bool enable_fp16 = false;
// The mode of Adam/Lamb optimizer
......@@ -64,6 +64,9 @@ class IpuStrategy {
// Micro batch-size
int micro_batch_size = 1;
// The number of virtual tiles for IPUMODEL
int tiles_per_ipu = 4;
// Random seed
std::uint64_t random_seed = std::numeric_limits<std::uint64_t>::max();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册