未验证 提交 1e323137 编写于 作者: H hong 提交者: GitHub

Support feed op new ir (#54840)

* add fetch kernel

* support fetch var in new ir

* fix bug

* polish code

* change array equal to np.testing

* support feed in new ir

* fix bug

* try to hack combine op

* add scope guard

* revert atan2 op

* polish code
上级 5d9af9db
...@@ -952,8 +952,8 @@ void BuildOpFuncList( ...@@ -952,8 +952,8 @@ void BuildOpFuncList(
auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data(); auto op_name = attr_map.at("op_name").dyn_cast<::ir::StrAttribute>().data();
if (op_name == "builtin.combine") { if (op_name == "builtin.combine" || op_name == "pd.feed") {
VLOG(6) << "skip process pd.fetch op"; VLOG(6) << "skip process " << op_name;
continue; continue;
} }
......
...@@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names, ...@@ -192,7 +192,7 @@ FetchList NewIRInterpreter::Run(const std::vector<std::string>& feed_names,
local_scope_, local_scope_,
value_2_var_name_map_, value_2_var_name_map_,
execution_config_); execution_config_);
SetFeedVarsInplaceSkip(feed_names); // SetFeedVarsInplaceSkip(feed_names);
// convert vec func_list to graph // convert vec func_list to graph
Convert(&op_func_nodes); Convert(&op_func_nodes);
UpdateSyncOpNum(); UpdateSyncOpNum();
......
...@@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, ...@@ -69,7 +69,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
if (FLAGS_enable_new_ir_in_executor) { if (FLAGS_enable_new_ir_in_executor) {
VLOG(6) << "begin to translate" << std::endl; VLOG(6) << "begin to translate" << std::endl;
auto base_program = paddle::TranslateLegacyProgramToProgram(*program); auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
auto kernel_program = auto kernel_program =
paddle::dialect::PdOpLowerToKernelPass(base_program.get()); paddle::dialect::PdOpLowerToKernelPass(base_program.get());
interpretercores_.emplace_back(std::make_shared<InterpreterCore>( interpretercores_.emplace_back(std::make_shared<InterpreterCore>(
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
inputs: [] inputs: []
attrs: attrs:
- {typename: str, name: name} - {typename: str, name: name}
- {typename: int, name: col}
outputs: outputs:
- {typename: Tensor, name: out, optional: false, intermediate: false} - {typename: Tensor, name: out, optional: false, intermediate: false}
no_need_buffer: null no_need_buffer: null
......
...@@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey( ...@@ -35,6 +35,9 @@ phi::KernelKey GetKernelKey(
ir::Operation* op, ir::Operation* op,
const phi::Place& place, const phi::Place& place,
const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) { const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair) {
if (op->name() == "pd.feed") {
return {phi::Backend::CPU, phi::DataLayout::ANY, phi::DataType::FLOAT32};
}
phi::Backend kernel_backend = phi::Backend::UNDEFINED; phi::Backend kernel_backend = phi::Backend::UNDEFINED;
phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED; phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
phi::DataType kernel_data_type = phi::DataType::UNDEFINED; phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
...@@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey( ...@@ -110,7 +113,9 @@ phi::KernelKey GetKernelKey(
continue; continue;
} }
auto input_tmp = op->operand(i).source(); auto input_tmp = op->operand(i).source();
auto new_input_tmp = map_value_pair.at(input_tmp); auto new_input_tmp = map_value_pair.at(input_tmp);
auto input_type = new_input_tmp.type(); auto input_type = new_input_tmp.type();
dialect::AllocatedDenseTensorType type; dialect::AllocatedDenseTensorType type;
if (input_type.isa<dialect::AllocatedDenseTensorType>()) { if (input_type.isa<dialect::AllocatedDenseTensorType>()) {
...@@ -181,7 +186,8 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) { ...@@ -181,7 +186,8 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
std::vector<ir::Type> op_output_types; std::vector<ir::Type> op_output_types;
if ((*it)->num_results() > 0) { if ((*it)->num_results() > 0) {
auto result_type = (*it)->result(0).type(); for (size_t i = 0; i < (*it)->num_results(); ++i) {
auto result_type = (*it)->result(i).type();
if (result_type.isa<dialect::DenseTensorType>()) { if (result_type.isa<dialect::DenseTensorType>()) {
auto allocated_dense_tensor_dtype = auto allocated_dense_tensor_dtype =
paddle::dialect::AllocatedDenseTensorType::get( paddle::dialect::AllocatedDenseTensorType::get(
...@@ -209,6 +215,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) { ...@@ -209,6 +215,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
op_output_types.push_back(t1); op_output_types.push_back(t1);
} }
} }
}
// constuct input // constuct input
std::vector<ir::OpResult> vec_inputs; std::vector<ir::OpResult> vec_inputs;
...@@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) { ...@@ -249,7 +256,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
// only deal with single output // only deal with single output
if ((*it)->num_results() > 0) { if ((*it)->num_results() > 0) {
map_value_pair[(*it)->result(0)] = op1->result(0); for (size_t i = 0; i < (*it)->num_results(); ++i) {
map_value_pair[(*it)->result(i)] = op1->result(i);
}
} }
program->block()->push_back(op1); program->block()->push_back(op1);
......
...@@ -66,6 +66,27 @@ void BuildScope(ir::Block* block, ...@@ -66,6 +66,27 @@ void BuildScope(ir::Block* block,
continue; continue;
} }
if (op_name == "pd.feed") {
auto ptr = (*it)->result(0);
std::string name = "inner_var_" + std::to_string(count++);
name_map->emplace(ptr, name);
auto var = scope->Var(name);
// TODO(phlrain): need to update here, support StringTensor
auto out_tensor = var->GetMutable<phi::DenseTensor>();
name_map->emplace(ptr, name);
auto feed_var = scope->Var("feed");
int index =
(*it)->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
auto feed_list = feed_var->Get<paddle::framework::FeedList>();
auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));
out_tensor->ShareDataWith(in_tensor);
continue;
}
if (op_name == "builtin.combine") { if (op_name == "builtin.combine") {
auto out_value = (*it)->result(0); auto out_value = (*it)->result(0);
...@@ -162,12 +183,12 @@ void BuildInferMetaContext( ...@@ -162,12 +183,12 @@ void BuildInferMetaContext(
auto runtime_info = std::get<3>(op_yaml_info); auto runtime_info = std::get<3>(op_yaml_info);
// int input_index = 0; // int input_index = 0;
std::vector<std::string> vec_param_list = runtime_info.infer_meta_param; std::vector<std::string> vec_param_list = runtime_info.infer_meta_param;
for (size_t input_index = 0; input_index < vec_param_list.size(); for (size_t input_index = 0; input_index < vec_param_list.size();
input_index++) { input_index++) {
auto& t = vec_param_list[input_index]; auto& t = vec_param_list[input_index];
if (input_index_map.count(t)) { if (input_index_map.count(t)) {
// get information from input // get information from input
ir::Value ptr = op->operand(input_index_map[t]).source(); ir::Value ptr = op->operand(input_index_map[t]).source();
...@@ -197,7 +218,7 @@ void BuildInferMetaContext( ...@@ -197,7 +218,7 @@ void BuildInferMetaContext(
if (var->IsType<phi::DenseTensor>()) { if (var->IsType<phi::DenseTensor>()) {
const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>()); const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in)); ctx->EmplaceBackInput(const_cast<phi::TensorBase*>(tensor_in));
} else { } else if (var->IsType<paddle::framework::TensorRefArray>()) {
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize> paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>
inputs; inputs;
auto& tensor_array = var->Get<paddle::framework::TensorRefArray>(); auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
...@@ -206,6 +227,9 @@ void BuildInferMetaContext( ...@@ -206,6 +227,9 @@ void BuildInferMetaContext(
} }
ctx->EmplaceBackInputs(std::move(inputs)); ctx->EmplaceBackInputs(std::move(inputs));
} else {
PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
var->Type()));
} }
} }
} }
...@@ -238,8 +262,7 @@ void BuildInferMetaContext( ...@@ -238,8 +262,7 @@ void BuildInferMetaContext(
} }
} }
// update here, support fetch list for now // TODO(phlrain): use var type instead of op name
// [todo update here]
if (op->attributes().count("op_name") && if (op->attributes().count("op_name") &&
(op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() == (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
"pd.fetch")) { "pd.fetch")) {
...@@ -249,10 +272,12 @@ void BuildInferMetaContext( ...@@ -249,10 +272,12 @@ void BuildInferMetaContext(
auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0))); auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
ctx->EmplaceBackOutput(out_tensor); ctx->EmplaceBackOutput(out_tensor);
} else { } else {
ir::Value out_ptr = op->result(0); for (size_t i = 0; i < op->num_results(); ++i) {
ir::Value out_ptr = op->result(i);
auto name = name_map.at(out_ptr); auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>()); ctx->EmplaceBackOutput(scope->Var(name)->Get<phi::DenseTensor>());
} }
}
} }
void BuildPhiKernelContext( void BuildPhiKernelContext(
...@@ -293,10 +318,14 @@ void BuildPhiKernelContext( ...@@ -293,10 +318,14 @@ void BuildPhiKernelContext(
// get information from input // get information from input
ir::Value ptr = op->operand(input_index_map[t]).source(); ir::Value ptr = op->operand(input_index_map[t]).source();
auto in_var_name = name_map.at(ptr); auto in_var_name = name_map.at(ptr);
if (input_map != nullptr) { if (input_map != nullptr) {
// only deal with single input for now, [todo] need support multi input // only deal with single input for now, [todo] need support multi input
// like concat // like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10
size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str()); size_t tmp_id = std::atol(in_var_name.substr(4, 100).c_str());
(*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id); (*input_map)[std::to_string(input_index_map.at(t))].push_back(tmp_id);
} }
...@@ -331,7 +360,7 @@ void BuildPhiKernelContext( ...@@ -331,7 +360,7 @@ void BuildPhiKernelContext(
if (var->IsType<phi::DenseTensor>()) { if (var->IsType<phi::DenseTensor>()) {
const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>()); const phi::TensorBase* tensor_in = &(var->Get<phi::DenseTensor>());
ctx->EmplaceBackInput(tensor_in); ctx->EmplaceBackInput(tensor_in);
} else { } else if (var->IsType<paddle::framework::TensorRefArray>()) {
paddle::small_vector<const phi::TensorBase*> inputs; paddle::small_vector<const phi::TensorBase*> inputs;
auto& tensor_array = var->Get<paddle::framework::TensorRefArray>(); auto& tensor_array = var->Get<paddle::framework::TensorRefArray>();
for (size_t i = 0; i < tensor_array.size(); ++i) { for (size_t i = 0; i < tensor_array.size(); ++i) {
...@@ -339,6 +368,13 @@ void BuildPhiKernelContext( ...@@ -339,6 +368,13 @@ void BuildPhiKernelContext(
} }
ctx->EmplaceBackInputs(std::move(inputs)); ctx->EmplaceBackInputs(std::move(inputs));
} else if (var->IsType<paddle::framework::FeedList>()) {
auto feed_list = var->Get<paddle::framework::FeedList>();
auto* in_tensor = &(PADDLE_GET(phi::DenseTensor, feed_list.at(0)));
ctx->EmplaceBackOutput(in_tensor);
} else {
PADDLE_THROW(phi::errors::Unimplemented("Not support var type [%d] ",
var->Type()));
} }
} }
} }
...@@ -371,6 +407,7 @@ void BuildPhiKernelContext( ...@@ -371,6 +407,7 @@ void BuildPhiKernelContext(
} }
} }
// TODO(phlrain): use var type instead of op name
if (op->attributes().count("op_name") && if (op->attributes().count("op_name") &&
(op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() == (op->attributes().at("op_name").dyn_cast<ir::StrAttribute>().data() ==
"pd.fetch")) { "pd.fetch")) {
...@@ -380,7 +417,8 @@ void BuildPhiKernelContext( ...@@ -380,7 +417,8 @@ void BuildPhiKernelContext(
auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0))); auto* out_tensor = &(PADDLE_GET(phi::DenseTensor, fetch_list->at(0)));
ctx->EmplaceBackOutput(out_tensor); ctx->EmplaceBackOutput(out_tensor);
} else { } else {
ir::Value out_ptr = op->result(0); for (size_t i = 0; i < op->num_results(); ++i) {
ir::Value out_ptr = op->result(i);
auto name = name_map.at(out_ptr); auto name = name_map.at(out_ptr);
ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>( ctx->EmplaceBackOutput(const_cast<phi::DenseTensor*>(
&(scope->Var(name)->Get<phi::DenseTensor>()))); &(scope->Var(name)->Get<phi::DenseTensor>())));
...@@ -388,10 +426,16 @@ void BuildPhiKernelContext( ...@@ -388,10 +426,16 @@ void BuildPhiKernelContext(
if (output_map != nullptr) { if (output_map != nullptr) {
// only deal with single input for now, [todo] need support multi input // only deal with single input for now, [todo] need support multi input
// like concat // like concat
// TODO(phlrain): OpFuncNode need input_index and output_index,
// construct input_index and output_here, should remove input_index and
// output_index from OpFuncNode Each in_var_name named "inner_var_" +
// index, len("inner_var_") = 10
size_t tmp_id = std::atol(name.substr(4, 100).c_str()); size_t tmp_id = std::atol(name.substr(4, 100).c_str());
(*output_map)["out"].push_back(tmp_id); (*output_map)["out"].push_back(tmp_id);
} }
} }
}
} }
} // namespace ir } // namespace ir
...@@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx, ...@@ -540,6 +540,8 @@ ir::Operation* FeedOpHandler(ir::IrContext* ctx,
GenerateOperationOutput(ctx, op_desc, output_infos); GenerateOperationOutput(ctx, op_desc, output_infos);
ir::AttributeMap attribute_map = { ir::AttributeMap attribute_map = {
{"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])}, {"name", ir::StrAttribute::get(ctx, op_desc.OutputArgumentNames()[0])},
{"col",
ir::Int32Attribute::get(ctx, op_desc.GetAttrIfExists<int>("col"))},
}; };
ir::Operation* operation = ir::Operation* operation =
......
...@@ -297,6 +297,7 @@ ...@@ -297,6 +297,7 @@
out : Out out : Out
- op : atan2 - op : atan2
backward : atan2_grad
inputs : inputs :
{x : X1, y : X2} {x : X1, y : X2}
outputs : outputs :
......
...@@ -1635,6 +1635,7 @@ class Executor: ...@@ -1635,6 +1635,7 @@ class Executor:
) )
self._feed_data(program, feed, feed_var_name, scope) self._feed_data(program, feed, feed_var_name, scope)
if hasattr(program, 'lr_scheduler'): if hasattr(program, 'lr_scheduler'):
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
......
...@@ -27,13 +27,15 @@ class TestNewIr(unittest.TestCase): ...@@ -27,13 +27,15 @@ class TestNewIr(unittest.TestCase):
place = paddle.CPUPlace() place = paddle.CPUPlace()
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.ones([2, 2], dtype="float32") x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32") y = paddle.ones([2, 2], dtype="float32")
z = x + y z = x + y
out = exe.run( out = exe.run(main_program, {}, fetch_list=[z.name])
paddle.static.default_main_program(), {}, fetch_list=[z.name]
)
gold_res = np.ones([2, 2], dtype="float32") * 2 gold_res = np.ones([2, 2], dtype="float32") * 2
...@@ -45,15 +47,44 @@ class TestCombineOp(unittest.TestCase): ...@@ -45,15 +47,44 @@ class TestCombineOp(unittest.TestCase):
place = paddle.CPUPlace() place = paddle.CPUPlace()
exe = paddle.static.Executor(place) exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.ones([2, 2], dtype="float32") x = paddle.ones([2, 2], dtype="float32")
y = paddle.ones([2, 2], dtype="float32") y = paddle.ones([2, 2], dtype="float32")
z = paddle.linalg.multi_dot([x, y]) z = paddle.linalg.multi_dot([x, y])
out = exe.run(main_program, {}, fetch_list=[z.name])
gold_res = np.ones([2, 2], dtype="float32") * 2
np.testing.assert_array_equal(out[0], gold_res)
class TestFeedOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
new_scope = paddle.static.Scope()
with paddle.static.scope_guard(new_scope):
with paddle.static.program_guard(main_program):
x = paddle.static.data("x", [2, 2], dtype="float32")
y = paddle.static.data("y", [2, 2], dtype="float32")
z = x + y
np_a = np.random.rand(2, 2).astype("float32")
np_b = np.random.rand(2, 2).astype("float32")
out = exe.run( out = exe.run(
paddle.static.default_main_program(), {}, fetch_list=[z.name] main_program,
feed={"x": np_a, "y": np_b},
fetch_list=[z.name],
) )
gold_res = np.ones([2, 2], dtype="float32") * 2 gold_res = np_a + np_b
np.testing.assert_array_equal(out[0], gold_res) np.testing.assert_array_equal(out[0], gold_res)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册