未验证 提交 75517841 编写于 作者: H hong 提交者: GitHub

[NewIR]Change feed list to variable list && support GPU (#55401)

* add feed with place op

* remove useless unitest

* udpate mkldnn

* update

* new ir support builtin slice op

* fix phi kernel adaptor bug

* add enable_static

* remove useless test case

* change feed list to single variable

* support gpu

* fix bug

* remove template

* add more data type

* fix cimpile bug
上级 7eeff7b1
......@@ -18,6 +18,8 @@ limitations under the License. */
#include "glog/logging.h"
PHI_DECLARE_bool(enable_new_ir_in_executor);
namespace phi {
class DenseTensor;
} // namespace phi
......@@ -34,6 +36,19 @@ void SetFeedVariable(Scope* scope,
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
if (FLAGS_enable_new_ir_in_executor) {
// shared data with input tensor
auto inner_var_name = var_name + "_" + std::to_string(index);
auto feed_ele = scope->Var(inner_var_name);
if (!feed_ele->IsType<phi::DenseTensor>()) {
VLOG(3) << "Reset " << inner_var_name << " to phi::DenseTensor";
feed_ele->Clear();
}
auto val = feed_ele->GetMutable<phi::DenseTensor>();
val->ShareDataWith(input);
// set lod
val->set_lod(input.lod());
} else {
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedList>());
if (index >= feed_inputs.size()) {
......@@ -44,6 +59,7 @@ void SetFeedVariable(Scope* scope,
val.ShareDataWith(input);
// set lod
val.set_lod(input.lod());
}
}
void SetFeedVariable(Scope* scope,
......
......@@ -62,11 +62,11 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
execution_config.skip_gc_vars = job->SkipGcVars();
// TODO(phlrain) we only support cpu for now
if (FLAGS_enable_new_ir_in_executor && platform::is_cpu_place(place)) {
if (FLAGS_enable_new_ir_in_executor) {
VLOG(6) << "begin to translate" << std::endl;
auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
auto kernel_program =
paddle::dialect::PdOpLowerToKernelPass(base_program.get());
paddle::dialect::PdOpLowerToKernelPass(base_program.get(), place);
interpretercores_.emplace_back(std::make_shared<InterpreterCore>(
place_, std::move(kernel_program), scope_, execution_config));
} else {
......
......@@ -227,3 +227,30 @@
inplace: null
view: null
backward: null
- name: shaddow_feed
inputs:
- typename: Tensor
name: x
optional: false
no_need_buffer: false
data_transform: {}
attrs: []
outputs:
- {typename: Tensor, name: out, optional: false, intermediate: false}
no_need_buffer: null
data_transform: null
infer_meta:
func: UnchangedInferMeta
param: [x]
kernel:
func: [shaddow_feed]
param: [x]
backend: null
layout: null
data_type: null
dispatch: {fetch: null}
force_backend: null
inplace: null
backward: null
......@@ -193,26 +193,13 @@ void HandleForSpecialOp(
if (op_name == "pd.feed") {
auto value = op->result(0);
auto var = CreateVar(value,
inner_scope,
var_name_prefix,
false,
value_2_var_name,
variable_2_var_name,
var_name_2_id,
variable_list);
// TODO(phlrain): need to update here, support StringTensor
auto out_tensor = var->GetMutable<phi::DenseTensor>();
VLOG(6) << "link feed output to feed in variable" << inner_scope;
auto feed_var =
const_cast<paddle::framework::Scope*>(inner_scope->root())->Var("feed");
VLOG(6) << "Create var: feed in scope " << inner_scope->root();
int index =
op->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
auto feed_list = feed_var->Get<paddle::framework::FeedList>();
auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));
out_tensor->ShareDataWith(in_tensor);
out_tensor->set_lod(in_tensor.lod());
auto feed_var_name = "feed_" + std::to_string(index);
value_2_var_name->emplace(value, feed_var_name);
}
if (op_name == "builtin.combine") {
......
......@@ -53,7 +53,7 @@ phi::KernelKey GetKernelKey(
ir::Operation* op,
const phi::Place& place,
const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair,
const dialect::OpYamlInfoParser* op_info_parser = nullptr) {
std::unique_ptr<dialect::OpYamlInfoParser> op_info_parser = nullptr) {
if (op->name() == "pd.feed") {
// NOTE, for now feed op don't need a kernel, so the data type from Op
// Result the next op use base program datatype
......@@ -223,11 +223,11 @@ phi::KernelKey GetKernelKey(
return res;
}
std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
phi::Place place) {
auto program = std::make_unique<ir::Program>(ir::IrContext::Instance());
auto block = prog->block();
phi::Place cpu_place(phi::AllocationType::CPU);
ir::IrContext* ctx = ir::IrContext::Instance();
ctx->GetOrRegisterDialect<paddle::dialect::PaddleDialect>();
......@@ -244,14 +244,19 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
VLOG(6) << "op name " << (*it)->name();
paddle::dialect::OpYamlInfoInterface op_info_interface =
(*it)->dyn_cast<paddle::dialect::OpYamlInfoInterface>();
OpYamlInfoParser* op_info_parser = nullptr;
std::unique_ptr<OpYamlInfoParser> op_info_parser;
if (op_info_interface) {
op_info_parser = new OpYamlInfoParser(op_info_interface.GetOpInfo());
op_info_parser.reset(new OpYamlInfoParser(op_info_interface.GetOpInfo()));
}
std::string kernel_fn_str;
if (op_info_parser != nullptr) {
kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0];
}
auto kernel_key =
GetKernelKey(*it, cpu_place, map_value_pair, op_info_parser);
GetKernelKey(*it, place, map_value_pair, std::move(op_info_parser));
VLOG(6) << "kernel type " << kernel_key;
// create new Op
// only for single output
// need update new kernel key layout and data tyep
......@@ -305,11 +310,6 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
// constuct input
std::vector<ir::OpResult> vec_inputs;
std::string kernel_fn_str;
if (op_info_parser != nullptr) {
kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0];
}
if ((*it)->num_operands() > 0) {
for (size_t i = 0; i < (*it)->num_operands(); ++i) {
auto cur_in = (*it)->operand(i);
......@@ -404,6 +404,35 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
}
program->block()->push_back(op);
if ((*it)->name() == "pd.feed" && platform::is_gpu_place(place)) {
// add shaddow feed op
phi::KernelKey shaddow_key{
phi::Backend::GPU,
phi::DataLayout::ANY,
TransToPhiDataType(
(*it)->result(0).type().dyn_cast<DenseTensorType>().dtype())};
std::unordered_map<std::string, ir::Attribute> attr_map{
{"op_name", ir::StrAttribute::get(ctx, "pd.shaddow_feed")},
{"kernel_name", ir::StrAttribute::get(ctx, "shaddow_feed")},
{"kernel_key", dialect::KernelAttribute::get(ctx, shaddow_key)}};
auto out_type = paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(shaddow_key.backend()),
(*it)->result(0).type().dyn_cast<dialect::DenseTensorType>());
ir::Operation* shaddow_op =
ir::Operation::Create({op->result(0)}, attr_map, {out_type}, op_info);
map_op_pair[*it] = shaddow_op;
program->block()->push_back(shaddow_op);
if ((*it)->num_results() > 0) {
for (size_t i = 0; i < shaddow_op->num_results(); ++i) {
map_value_pair[(*it)->result(i)] = shaddow_op->result(i);
}
}
}
}
return program;
......
......@@ -14,11 +14,13 @@
#pragma once
#include "paddle/ir/core/program.h"
#include "paddle/phi/common/place.h"
namespace paddle {
namespace dialect {
std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog);
std::unique_ptr<ir::Program> PdOpLowerToKernelPass(
ir::Program* prog, phi::Place place = phi::CPUPlace());
} // namespace dialect
} // namespace paddle
......@@ -16,6 +16,7 @@
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/feed_with_place_impl.h"
namespace phi {
......@@ -26,11 +27,20 @@ void FeedWithPlaceKernel(const Context& ctx,
DenseTensor* out) {}
} // namespace phi
PD_REGISTER_KERNEL(feed_with_place,
PD_REGISTER_KERNEL(
feed_with_place, CPU, ALL_LAYOUT, phi::FeedWithPlaceKernel, float) {}
PD_REGISTER_KERNEL(shaddow_feed,
CPU,
ALL_LAYOUT,
phi::FeedWithPlaceKernel,
phi::ShaddowFeedKernel,
bool,
float,
int32_t,
int64_t,
double) {}
double,
phi::float16,
phi::bfloat16,
phi::complex64,
phi::complex128) {}
......@@ -16,17 +16,8 @@
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/fetch_impl.h"
namespace phi {
template <typename T, typename Context>
void FetchKernel(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) {
phi::Copy(dev_ctx, x, phi::CPUPlace(), true, out);
out->set_lod(x.lod());
}
} // namespace phi
PD_REGISTER_KERNEL(fetch,
CPU,
ALL_LAYOUT,
......
......@@ -24,4 +24,9 @@ void FeedWithPlaceKernel(const Context& ctx,
phi::DataType data_type,
DenseTensor* out);
template <typename T, typename Context>
void ShaddowFeedKernel(const Context& ctx,
const DenseTensor& x,
DenseTensor* out);
} // namespace phi
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/feed_with_place_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/feed_with_place_impl.h"
PD_REGISTER_KERNEL(shaddow_feed,
GPU,
ALL_LAYOUT,
phi::ShaddowFeedKernel,
bool,
float,
int32_t,
int64_t,
double,
phi::float16,
phi::bfloat16,
phi::complex64,
phi::complex128) {}
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/fetch_kernel.h"
#include "paddle/phi/kernels/impl/fetch_impl.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
PD_REGISTER_KERNEL(fetch,
GPU,
ALL_LAYOUT,
phi::FetchKernel,
float,
double,
int,
int64_t,
uint8_t,
int8_t,
int16_t,
phi::float16,
phi::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
bool) {}
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h"
namespace phi {
template <typename T, typename Context>
void ShaddowFeedKernel(const Context& ctx,
const DenseTensor& x,
DenseTensor* out) {
ctx.template Alloc<T>(out);
if (x.place() == out->place()) {
out->ShareDataWith(x);
out->set_lod(x.lod());
} else {
phi::Copy<Context>(ctx, x, ctx.GetPlace(), true, out);
}
}
} // namespace phi
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/tensor_utils.h"
namespace phi {
template <typename T, typename Context>
void FetchKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) {
phi::Copy(ctx, x, phi::CPUPlace(), true, out);
}
} // namespace phi
......@@ -24,7 +24,11 @@ paddle.enable_static()
class TestNewIr(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
......@@ -44,7 +48,11 @@ class TestNewIr(unittest.TestCase):
class TestCombineOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
......@@ -64,7 +72,11 @@ class TestCombineOp(unittest.TestCase):
class TestFeedOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
......@@ -91,6 +103,8 @@ class TestFeedOp(unittest.TestCase):
class TestSelectedRows(unittest.TestCase):
def test_with_new_ir(self):
# TODO(phlrain): support selected rows in GPU
# place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace()
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
......@@ -113,7 +127,11 @@ class TestSelectedRows(unittest.TestCase):
class TestAddGradOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
......@@ -143,7 +161,11 @@ class TestAddGradOp(unittest.TestCase):
class TestSplitOp(unittest.TestCase):
def test_with_new_ir(self):
place = paddle.CPUPlace()
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
exe = paddle.static.Executor(place)
main_program = paddle.static.Program()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册