未验证 提交 b850acb2 编写于 作者: H hong 提交者: GitHub

[NewIR]support new ir load combine (#56101)

* support new ir load combine

* update

* polish code

* remove print

* polish code

* fix bug

* polish code

* fix compile bug
上级 a2fe1e24
......@@ -21,7 +21,6 @@
#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/ir/core/value.h"
namespace ir {
class Value;
......
......@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/event.h"
......@@ -42,7 +42,7 @@ std::vector<int> GetValueIds(
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
std::vector<int> ids;
std::string var_name = value_2_var_name.at(value);
auto& var_name = value_2_var_name.at(value);
ids.push_back(var_name_2_id.at(var_name));
// NOTE(zhangbo): Value maybe a VariableRefArray
auto var = inner_scope->FindVar(var_name);
......@@ -61,7 +61,7 @@ platform::DeviceContext* ParseDeviceContext(
const platform::Place& place,
const std::string& execution_stream,
const int stream_priority) {
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
interpreter::ContextManager& ctx_manager =
......@@ -149,7 +149,7 @@ OpFuncType AnalyseOpFuncType(::ir::Operation* op,
// computing. They execute serially in device thread and block CUDA kernel
// launching in other GPU OPs. To improve performance, set them as kGpuSync
// and so that they would be dispatched to host thread.
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
if (op_name == kCoalesceTensor &&
......
......@@ -43,7 +43,7 @@ LegacyKernelInstruction::LegacyKernelInstruction(
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name)
: InstructionBase(id, place) {
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
ir::OpInfo op_info = ir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
......@@ -97,18 +97,20 @@ LegacyKernelInstruction::LegacyKernelInstruction(
yaml_interface->get_op_info_());
VLOG(6) << "finish process yaml_info_parser";
::ir::BuildPhiContext<
phi::InferMetaContext,
phi::MetaTensor,
phi::MetaTensor,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&infer_meta_context_);
if (infer_meta_interface_) {
::ir::BuildPhiContext<
phi::InferMetaContext,
phi::MetaTensor,
phi::MetaTensor,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&infer_meta_context_);
}
VLOG(6) << "finish process infer meta context";
auto kernel_name =
......@@ -123,8 +125,10 @@ LegacyKernelInstruction::LegacyKernelInstruction(
phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
VLOG(6) << "finish process select kernel";
operator_base_ =
ir::BuildOperatorBase(op, value_2_var_name, yaml_info_parser);
Scope* inner_scope = local_scope == nullptr ? scope : local_scope;
operator_base_ = ir::BuildOperatorBase(
op, value_2_var_name, yaml_info_parser, variable_2_var_name, inner_scope);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
auto dev_ctx = phi::DeviceContextPool::Instance().Get(
......@@ -151,7 +155,6 @@ LegacyKernelInstruction::LegacyKernelInstruction(
GetStreamPriority()));
VLOG(6) << "finish process device context";
Scope* inner_scope = local_scope == nullptr ? scope : local_scope;
InitInputsOutputsIds(
op, inner_scope, value_2_var_name, var_name_2_id, variable_2_var_name);
VLOG(6) << "finish process inputs outputs index";
......@@ -169,10 +172,16 @@ LegacyKernelInstruction::~LegacyKernelInstruction() {
if (kernel_context_ != nullptr) {
delete kernel_context_;
}
if (phi_kernel_ != nullptr) {
delete phi_kernel_;
}
}
void LegacyKernelInstruction::Run() {
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
if (infer_meta_interface_) {
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
}
VLOG(6) << "Run op " << legacy_op_name_ << " infer meta.";
(*(phi_kernel_))((kernel_context_));
VLOG(6) << "Run op " << legacy_op_name_ << " kernel.";
......
......@@ -1076,36 +1076,17 @@ void BuildOpFuncList(
"not found kernel for [%s]",
kernel_name);
if (kernel_name == "fused_softmax_mask_upper_triangle" ||
kernel_name == "fused_softmax_mask_upper_triangle_grad") {
// builder operator
op_func_node.operator_base_ =
ir::BuildOperatorBase(op, value_2_name_map, op_yaml_info_parser);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
op_func_node.runtime_ctx_ =
std::make_shared<paddle::framework::RuntimeContext>(
paddle::framework::RuntimeContext(in_map, out_map));
ir::BuildRuntimeContext(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
op_func_node.runtime_ctx_.get());
op_func_node.fluid_op = true;
} else {
::ir::BuildPhiContext<phi::KernelContext,
const phi::TensorBase*,
phi::TensorBase*,
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_));
}
::ir::BuildPhiContext<phi::KernelContext,
const phi::TensorBase*,
phi::TensorBase*,
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_));
VLOG(6) << "finish process kernel context";
op_func_node.kernel_context_.SetDeviceContext(
......
......@@ -38,6 +38,7 @@
#include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h"
#include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h"
#include "paddle/fluid/ir/dialect/utils.h"
#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h"
#include "paddle/ir/core/builtin_attribute.h"
......@@ -428,8 +429,7 @@ void NewIRInterpreter::BuildInstruction() {
}
VLOG(6) << "process " << op_name;
if (op_name == "pd.fused_softmax_mask_upper_triangle" ||
op_name == "pd.fused_softmax_mask_upper_triangle_grad") {
if (dialect::IsLegacyOp(op_name)) {
vec_instruction_base_.emplace_back(
std::make_unique<LegacyKernelInstruction>(op_idx++,
place_,
......
......@@ -103,7 +103,7 @@
- {typename: bool, name: load_as_fp16}
- {typename: bool, name: model_from_memory}
outputs:
- {typename: 'Tensor[]', name: out, optional: true, intermediate: false}
- {typename: 'Tensor[]', name: Out, optional: true, intermediate: false}
no_need_buffer: null
data_transform: null
kernel:
......
......@@ -17,6 +17,11 @@
namespace paddle {
namespace dialect {
const std::unordered_set<std::string> LegacyOpList = {
"pd.fused_softmax_mask_upper_triangle",
"pd.fused_softmax_mask_upper_triangle_grad",
"pd.load_combine"};
enum class AttrType {
UNDEFINED = 0,
BOOL,
......@@ -167,5 +172,7 @@ VariantType GetAttributeData(const ir::Attribute& attr) {
return kAttrCastMap[attr_type](attr);
}
bool IsLegacyOp(const std::string& name) { return LegacyOpList.count(name); }
} // namespace dialect
} // namespace paddle
......@@ -147,5 +147,7 @@ static inline ir::Attribute TransToIrAttribute(phi::Scalar scalar,
VariantType GetAttributeData(const ir::Attribute& attr);
bool IsLegacyOp(const std::string& name);
} // namespace dialect
} // namespace paddle
......@@ -598,17 +598,39 @@ void BuildRuntimeContext(
PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
phi::errors::PreconditionNotMet(
"can not find var[%s] in scope", in_var_name));
auto var = inner_scope->FindVar(in_var_name);
std::vector<paddle::framework::Variable*> vec_tmp = {var};
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
runtime_ctx->outputs[legacy_attr_name] = vec_tmp;
auto type = ptr.type();
auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
std::vector<paddle::framework::Variable*> vec_tmp = {var};
runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
} else if (type.isa<ir::VectorType>()) {
auto var_ref = var->Get<paddle::framework::VariableRefArray>();
std::vector<paddle::framework::Variable*> vec_tmp;
vec_tmp.reserve(var_ref.size());
for (size_t k = 0; k < var_ref.size(); ++k) {
vec_tmp.push_back(const_cast<paddle::framework::Variable*>(var_ref[k]));
}
runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support AllocatedDenseTensor, AllocatedSelectedRowsType and "
"ir::vector type"));
}
}
}
std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name,
const paddle::framework::Scope* scope) {
paddle::framework::VariableNameMap in_name_map;
paddle::framework::VariableNameMap out_name_map;
paddle::framework::AttributeMap attr_map;
......@@ -637,6 +659,30 @@ std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
}
// build attribute
auto& op_attr_map = op->attributes();
auto attr_name_list = op_yaml_info.AttrParams(true);
for (auto& name : attr_name_list) {
auto& val = op_attr_map.at(name);
if (val.isa<ir::StrAttribute>()) {
attr_map[name] = val.dyn_cast<ir::StrAttribute>().AsString();
} else if (val.isa<ir::Int32Attribute>()) {
attr_map[name] = val.dyn_cast<ir::Int32Attribute>().data();
} else if (val.isa<ir::BoolAttribute>()) {
attr_map[name] = val.dyn_cast<ir::BoolAttribute>().data();
} else if (val.isa<ir::FloatAttribute>()) {
attr_map[name] = val.dyn_cast<ir::FloatAttribute>().data();
} else if (val.isa<ir::DoubleAttribute>()) {
attr_map[name] = val.dyn_cast<ir::DoubleAttribute>().data();
} else if (val.isa<ir::Int64Attribute>()) {
attr_map[name] = val.dyn_cast<ir::Int64Attribute>().data();
} else {
std::stringstream ss;
val.Print(ss);
VLOG(1) << "type not support " << ss.str() << std::endl;
PADDLE_THROW("Type[%s] in attribute map not support yet", ss.str());
}
}
auto& output_name_list = op_yaml_info.OutputNames();
for (size_t i = 0; i < output_name_list.size(); ++i) {
......@@ -644,8 +690,26 @@ std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Value ptr = op->result(i);
auto out_var_name = name_map.at(ptr);
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
out_name_map[legacy_attr_name].push_back(out_var_name);
auto type = ptr.type();
auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
out_name_map[legacy_arg_name].push_back(out_var_name);
} else if (type.isa<ir::VectorType>()) {
auto var = scope->FindVar(out_var_name);
auto var_ref = var->Get<paddle::framework::VariableRefArray>();
for (size_t k = 0; k < var_ref.size(); ++k) {
PADDLE_ENFORCE(variable_2_var_name.count(var_ref[k]),
"Variable MUST in variable_2_var_name map");
out_name_map[legacy_arg_name].push_back(
variable_2_var_name.at(var_ref[k]));
}
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support AllocatedDenseTensor, AllocatedSelectedRowsType and "
"ir::vector type"));
}
}
auto& op_info = paddle::framework::OpInfoMap::Instance().Get(fluid_op_name);
......
......@@ -62,7 +62,10 @@ void BuildRuntimeContext(
std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info);
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name,
const paddle::framework::Scope* scope);
template <typename Context,
typename InType,
......
......@@ -59,10 +59,6 @@ const std::unordered_set<std::string> UnchangeOutputOps = {
"builtin.get_parameter",
"pd.shadow_output"};
const std::unordered_set<std::string> LegacyOpList = {
"pd.fused_softmax_mask_upper_triangle",
"pd.fused_softmax_mask_upper_triangle_grad"};
bool NeedFallBackCpu(const ir::Operation* op,
const std::string& kernel_fn_name,
const phi::KernelKey& kernel_key) {
......@@ -553,6 +549,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
GetKernelKey(op_item, place, map_value_pair, op_info_parser.get());
VLOG(6) << "kernel type " << kernel_key;
if (op_item->name() == "pd.load_combine") {
kernel_key.set_dtype(phi::DataType::FLOAT32);
}
if (NeedFallBackCpu((op_item), kernel_fn_str, kernel_key)) {
kernel_key.set_backend(phi::Backend::CPU);
}
......@@ -571,7 +570,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
auto args_def = phi_kernel.args_def();
auto output_defs = args_def.output_defs();
if (!UnchangeOutputOps.count(op_item->name()) &&
!LegacyOpList.count(op_item->name())) {
!IsLegacyOp(op_item->name())) {
PADDLE_ENFORCE_EQ(
op_item->num_results(),
output_defs.size(),
......@@ -583,7 +582,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
for (size_t i = 0; i < op_item->num_results(); ++i) {
phi::Place out_place;
if ((!UnchangeOutputOps.count(op_item->name())) &&
(!LegacyOpList.count(op_item->name())) && phi_kernel.IsValid()) {
(!IsLegacyOp(op_item->name())) && phi_kernel.IsValid()) {
out_place = phi::TransToPhiPlace(output_defs[i].backend);
} else {
out_place = phi::TransToPhiPlace(kernel_key.backend());
......
......@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
import numpy as np
......@@ -287,11 +288,18 @@ class TestNewIrPrint(unittest.TestCase):
class TestJitSaveOp(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
self.model_path = os.path.join(self.temp_dir.name, "new_ir_save_load")
def tearDown(self):
self.temp_dir.cleanup()
def test_with_new_ir(self):
paddle.disable_static()
linear = paddle.nn.Linear(10, 10)
path = "example_model/linear"
path = os.path.join(self.model_path, "linear")
paddle.jit.save(
linear,
......@@ -299,6 +307,26 @@ class TestJitSaveOp(unittest.TestCase):
input_spec=[paddle.static.InputSpec([10, 10], 'float32', 'x')],
)
paddle.enable_static()
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
exe = paddle.static.Executor(place)
[
inference_program,
feed_target_names,
fetch_targets,
] = paddle.static.io.load_inference_model(
self.model_path,
executor=exe,
model_filename="linear.pdmodel",
params_filename="linear.pdiparams",
)
if __name__ == "__main__":
paddle.enable_static()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册