未验证 提交 4f652ac2 编写于 作者: Y YuanRisheng 提交者: GitHub

[New IR]Support build New IR model in python (#56315)

上级 d128a695
......@@ -378,7 +378,7 @@ std::unique_ptr<::ir::Program> ConstructFowardIrProgram(
auto op_desc = block->PrependOp();
op_desc->SetType("data");
op_desc->SetAttr("index", 0);
op_desc->SetAttr("shape", std::vector<int64_t>());
// TODO(phlrain) : using tensor dtype
op_desc->SetAttr("dtype", 0);
op_desc->SetAttr("place", static_cast<int>(place));
......@@ -393,7 +393,7 @@ std::unique_ptr<::ir::Program> ConstructFowardIrProgram(
auto op_desc = local_program.MutableBlock(0)->PrependOp();
op_desc->SetType("data");
op_desc->SetAttr("index", 0);
op_desc->SetAttr("shape", std::vector<int64_t>());
// TODO(phlrain) : using tensor dtype
op_desc->SetAttr("dtype", 0);
op_desc->SetAttr("place", static_cast<int>(place));
......@@ -479,7 +479,7 @@ std::unique_ptr<::ir::Program> ConstructBackwardIrProgram(
}
auto op_desc = local_program.MutableBlock(0)->PrependOp();
op_desc->SetType("data");
op_desc->SetAttr("index", 0);
op_desc->SetAttr("shape", std::vector<int64_t>());
// TODO(phlrain) : using tensor dtype
op_desc->SetAttr("dtype", 0);
op_desc->SetAttr("place", static_cast<int>(place));
......
......@@ -19,6 +19,7 @@ limitations under the License. */
#include "glog/logging.h"
PHI_DECLARE_bool(enable_new_ir_in_executor);
PHI_DECLARE_bool(enable_new_ir_api);
namespace phi {
class DenseTensor;
......
......@@ -39,6 +39,28 @@ Plan::Plan(const std::vector<std::shared_ptr<Job>>& job_list,
}
}
Plan::Plan(
const std::vector<std::shared_ptr<Job>>& job_list,
const std::unordered_map<std::string, std::shared_ptr<::ir::Program>>&
type_to_ir_program)
: job_list_(job_list),
type_to_ir_program_(type_to_ir_program),
micro_batch_num_(1) {
for (size_t i = 0; i < job_list_.size(); ++i) {
const auto& job = job_list_[i];
PADDLE_ENFORCE(
type_to_ir_program_.find(job->Type()) != type_to_ir_program_.end(),
phi::errors::InvalidArgument(
"The %d-th job (type:%s, micro_batch_id:%d) has no "
"corresponding Program.",
i,
job->Type(),
job->MicroBatchId()));
micro_batch_num_ = std::max(micro_batch_num_, job->MicroBatchId() + 1);
}
}
const std::vector<std::shared_ptr<Job>>& Plan::JobList() const {
return job_list_;
}
......@@ -47,6 +69,11 @@ const ProgramDesc* Plan::Program(const std::string& job_type) const {
return type_to_program_.at(job_type);
}
std::shared_ptr<::ir::Program> Plan::IrProgram(
const std::string& job_type) const {
return type_to_ir_program_.at(job_type);
}
int64_t Plan::MicroBatchNum() const { return micro_batch_num_; }
} // namespace interpreter
......
......@@ -21,6 +21,7 @@
#include "paddle/fluid/framework/new_executor/interpreter/job.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/ir/core/program.h"
#include "paddle/phi/core/macros.h"
namespace paddle {
......@@ -31,17 +32,24 @@ class Plan final {
public:
Plan(const std::vector<std::shared_ptr<Job>>& job_list,
const std::unordered_map<std::string, ProgramDesc*>& type_to_program);
Plan(const std::vector<std::shared_ptr<Job>>& job_list,
const std::unordered_map<std::string, std::shared_ptr<::ir::Program>>&
type_to_ir_program);
~Plan() = default;
const std::vector<std::shared_ptr<Job>>& JobList() const;
const ProgramDesc* Program(const std::string& job_type) const;
std::shared_ptr<::ir::Program> IrProgram(const std::string& job_type) const;
int64_t MicroBatchNum() const;
private:
const std::vector<std::shared_ptr<Job>> job_list_;
const std::unordered_map<std::string, ProgramDesc*> type_to_program_;
const std::unordered_map<std::string, std::shared_ptr<::ir::Program>>
type_to_ir_program_;
int64_t micro_batch_num_;
};
......
......@@ -21,8 +21,10 @@
#include "paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h"
#include "paddle/fluid/ir_adaptor/translator/translate.h"
#include "paddle/ir/core/program.h"
PHI_DECLARE_bool(enable_new_ir_in_executor);
PHI_DECLARE_bool(enable_new_ir_api);
namespace paddle {
namespace framework {
......@@ -34,7 +36,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
for (int64_t i = 0; i < micro_batch_num; ++i) {
micro_batch_scopes_.emplace_back(&scope->NewScope());
}
std::stringstream ss;
ss << "Create " << micro_batch_num << " micro_batch_scopes for scope "
<< scope_ << " : ";
......@@ -46,9 +47,14 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
const auto& jobs = plan_.JobList();
for (const auto& job : jobs) {
const std::string& job_type = job->Type();
std::shared_ptr<ProgramDesc> program =
std::make_shared<ProgramDesc>(*(plan_.Program(job_type)));
SetColAttrForFetchOps(*job, program);
std::shared_ptr<ProgramDesc> program = nullptr;
std::shared_ptr<::ir::Program> ir_program = nullptr;
if (FLAGS_enable_new_ir_api) {
ir_program = plan_.IrProgram(job_type);
} else {
program = std::make_shared<ProgramDesc>(*(plan_.Program(job_type)));
SetColAttrForFetchOps(*job, program);
}
int64_t micro_batch_id = job->MicroBatchId();
PADDLE_ENFORCE(
......@@ -64,9 +70,11 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
// TODO(phlrain) we only support cpu for now
if (FLAGS_enable_new_ir_in_executor) {
VLOG(6) << "begin to translate" << std::endl;
auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
std::shared_ptr<::ir::Program> base_program = ir_program;
if (!FLAGS_enable_new_ir_api) {
VLOG(6) << "begin to translate" << std::endl;
base_program = paddle::TranslateLegacyProgramToProgram(*program);
}
auto block = base_program->block();
for (auto it = block->begin(); it != block->end(); ++it) {
if ((*it)->name() == "pd.fetch") {
......@@ -88,7 +96,6 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
"@fetch";
}
}
auto kernel_program =
paddle::dialect::PdOpLowerToKernelPass(base_program.get(), place);
interpretercores_.emplace_back(
......
......@@ -22,28 +22,12 @@
#include "paddle/ir/core/builtin_type.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/attribute.h"
namespace paddle {
namespace dialect {
using VariantType = paddle::variant<bool,
int,
int64_t,
float,
double,
std::string,
std::vector<bool>,
std::vector<int>,
std::vector<int64_t>,
std::vector<float>,
std::vector<double>,
std::vector<std::string>,
phi::Scalar,
std::vector<phi::Scalar>,
phi::IntArray,
phi::DataType,
phi::DataLayout,
phi::Place>;
using VariantType = phi::Attribute;
// TODO(zhangbo): The builtin type needs to cover all data types of
// phi::DataType.
......
......@@ -226,10 +226,10 @@ phi::KernelKey GetKernelKey(
if (op->name() == "pd.data") {
// NOTE, for now feed op don't need a kernel, so the data type from Op
// Result the next op use base program datatype
auto t =
auto data_place =
op->attributes().at("place").dyn_cast<dialect::PlaceAttribute>().data();
auto backend = paddle::experimental::ParseBackend(t);
auto backend = paddle::experimental::ParseBackend(data_place);
return {backend,
phi::DataLayout::ANY,
......@@ -240,6 +240,7 @@ phi::KernelKey GetKernelKey(
phi::Backend kernel_backend = phi::Backend::UNDEFINED;
phi::DataLayout kernel_layout = phi::DataLayout::UNDEFINED;
phi::DataType kernel_data_type = phi::DataType::UNDEFINED;
if (op_info_parser != nullptr) {
// only suppurt non vector input for now
int tensor_input_number = op_info_parser->InputTensorNumber();
......@@ -355,6 +356,27 @@ phi::KernelKey GetKernelKey(
for (auto& fake_tensor : fake_tensors) {
kernel_key_parser.AssignKernelKeySet(fake_tensor);
}
// Because we can't make sure the place when build data op
// and the output place of data op is undefined. It means we
// don't know how to select the kernel in the next of op that
// uses data op outout as inputs. So, we need set kernel backend
// manually.
if (op->operand_source(i).GetDefiningOp()->name() == "pd.data") {
auto data_op = op->operand_source(i).GetDefiningOp();
auto data_place = data_op->attributes()
.at("place")
.dyn_cast<dialect::PlaceAttribute>()
.data();
auto data_op_backend = paddle::experimental::ParseBackend(data_place);
if (data_op_backend == phi::Backend::UNDEFINED) {
data_op_backend = paddle::experimental::ParseBackend(place);
}
kernel_key_parser.key_set.backend_set =
kernel_key_parser.key_set.backend_set |
paddle::experimental::BackendSet(data_op_backend);
}
}
auto kernel_key_set = kernel_key_parser.key_set;
......@@ -652,7 +674,6 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
if (op_info_parser != nullptr) {
kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0];
}
auto kernel_key =
GetKernelKey(op_item, place, map_value_pair, op_info_parser.get());
VLOG(6) << "kernel type " << kernel_key;
......
......@@ -965,11 +965,14 @@ struct DataOpTranscriber : public FeedOpTranscriber {
const OpAttributeInfoList& op_attr_infos,
const OpDesc& op_desc) override {
int allocate_type = paddle::get<int>(op_desc.GetAttr("place"));
auto& attribute_translator = AttributeTranslator::instance();
ir::Attribute shape = attribute_translator(
"paddle::dialect::IntArrayAttribute", op_desc.GetAttr("shape"));
ir::AttributeMap attribute_map = {
{"name",
ir::StrAttribute::get(ctx,
op_desc.GetAttrIfExists<std::string>("name"))},
{"index", ir::Int64Attribute::get(ctx, 0)},
{"shape", shape},
{"dtype",
paddle::dialect::DataTypeAttribute::get(ctx, phi::DataType::FLOAT32)},
{"place",
......
......@@ -24,6 +24,7 @@
#include "paddle/fluid/pybind/pybind_variant_caster.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/ir/dialect/paddle_dialect/interface/op_yaml_info.h"
#include "paddle/fluid/ir/dialect/paddle_dialect/ir/api_builder.h"
#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.h"
......@@ -58,7 +59,7 @@ PyTypeObject *g_ir_opresult_pytype = nullptr;
void BindOpsAPI(pybind11::module *module);
void BindProgram(py::module *m) {
py::class_<Program> program(*m, "Program", R"DOC(
py::class_<Program, std::shared_ptr<Program>> program(*m, "Program", R"DOC(
Create Python Program. Program is an abstraction of model structure, divided into
computational graphs and weights. The Program has a main block that stores the computational
graphs.
......@@ -103,18 +104,23 @@ void BindProgram(py::module *m) {
"__init__",
[](Program &self) { new (&self) Program(ir::IrContext::Instance()); })
.def("__str__",
[](Program &self) {
[](const std::shared_ptr<Program> &self) {
std::ostringstream print_stream;
self.Print(print_stream);
self->Print(print_stream);
return print_stream.str();
})
.def("parameters_num", &Program::parameters_num)
.def("block",
py::overload_cast<>(&Program::block),
return_value_policy::reference)
.def("block",
py::overload_cast<>(&Program::block, py::const_),
return_value_policy::reference);
.def("parameters_num",
[](const std::shared_ptr<Program> &self) {
return self->parameters_num();
})
.def(
"block",
[](std::shared_ptr<Program> self) { return self->block(); },
return_value_policy::reference)
.def(
"block",
[](const std::shared_ptr<Program> &self) { return self->block(); },
return_value_policy::reference);
}
void BindBlock(py::module *m) {
......@@ -395,7 +401,14 @@ void BindUtils(pybind11::module *m) {
[]() { APIBuilder::Instance().ResetInsertionPointToStart(); });
m->def("reset_insertion_point_to_end",
[]() { APIBuilder::Instance().ResetInsertionPointToEnd(); });
m->def("translate_to_new_ir", &paddle::TranslateLegacyProgramToProgram, R"DOC(
m->def(
"translate_to_new_ir",
[](const ::paddle::framework::ProgramDesc &legacy_program) {
std::shared_ptr<Program> ret =
std::move(paddle::TranslateLegacyProgramToProgram(legacy_program));
return ret;
},
R"DOC(
Convert Fluid Program to New IR Program.
Args:
......
......@@ -40,6 +40,14 @@ static PyObject *divide(PyObject *self, PyObject *args, PyObject *kwargs) {
return static_api_divide(self, args, kwargs);
}
static PyObject *data(PyObject *self, PyObject *args, PyObject *kwargs) {
return static_api_data(self, args, kwargs);
}
static PyObject *fetch(PyObject *self, PyObject *args, PyObject *kwargs) {
return static_api_fetch(self, args, kwargs);
}
static PyObject *concat(PyObject *self, PyObject *args, PyObject *kwargs) {
return static_api_concat(self, args, kwargs);
}
......@@ -68,6 +76,14 @@ static PyMethodDef OpsAPI[] = {{"add_n",
(PyCFunction)(void (*)(void))full,
METH_VARARGS | METH_KEYWORDS,
"C++ interface function for full."},
{"data",
(PyCFunction)(void (*)(void))data,
METH_VARARGS | METH_KEYWORDS,
"C++ interface function for data."},
{"fetch",
(PyCFunction)(void (*)(void))fetch,
METH_VARARGS | METH_KEYWORDS,
"C++ interface function for fetch."},
{nullptr, nullptr, 0, nullptr}};
void BindOpsAPI(pybind11::module *module) {
......
......@@ -199,6 +199,7 @@ limitations under the License. */
#include "paddle/fluid/prim/utils/eager/eager_tensor_operants.h"
#include "paddle/fluid/prim/utils/static/static_tensor_operants.h"
#include "paddle/fluid/pybind/eager_utils.h"
#include "paddle/ir/core/program.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/api/include/operants_manager.h"
#include "paddle/phi/api/include/tensor_operants.h"
......@@ -1979,6 +1980,13 @@ All parameter, weight, gradient are variables in Paddle.
&>(),
py::arg("job_list"),
py::arg("type_to_program"))
.def(
py::init<
const std::vector<std::shared_ptr<framework::interpreter::Job>> &,
const std::unordered_map<std::string,
std::shared_ptr<::ir::Program>> &>(),
py::arg("job_list"),
py::arg("type_to_ir_program"))
.def("job_list", &framework::interpreter::Plan::JobList)
.def("micro_batch_num", &framework::interpreter::Plan::MicroBatchNum)
.def("program", &framework::interpreter::Plan::Program);
......
......@@ -46,19 +46,16 @@ namespace pybind {
namespace py = pybind11;
namespace reader = operators::reader;
// Check whether the tensor shape matches the VarDesc shape
// Return the different shape if exists
static paddle::optional<std::vector<int64_t>> DiffTensorShapeWithVarDesc(
static paddle::optional<std::vector<int64_t>> DiffTensorShape(
const phi::DenseTensor &tensor,
const framework::VarDesc &var_desc,
const std::vector<int64_t> &target_shape,
size_t num_places) {
auto tensor_shape = tensor.dims();
auto desc_shape = var_desc.GetShape();
int64_t rank = tensor_shape.size();
if (UNLIKELY(rank == 0)) {
if (!desc_shape.empty()) { // Tensor rank = 0 but desc does not match
if (!target_shape.empty()) { // Tensor rank = 0 but desc does not match
return phi::vectorize<int64_t>(tensor_shape);
} else {
return paddle::none;
......@@ -76,8 +73,8 @@ static paddle::optional<std::vector<int64_t>> DiffTensorShapeWithVarDesc(
int64_t split_size = (tensor_shape[0] + num_places - 1) / num_places;
int64_t remainder = (split_size == 0 ? 0 : tensor_shape[0] % split_size);
tensor_shape[0] = split_size;
if (desc_shape[0] >= 0) { // need check dim 0
if (tensor_shape[0] != desc_shape[0]) {
if (target_shape[0] >= 0) { // need check dim 0
if (tensor_shape[0] != target_shape[0]) {
return phi::vectorize<int64_t>(tensor_shape);
}
......@@ -94,7 +91,7 @@ static paddle::optional<std::vector<int64_t>> DiffTensorShapeWithVarDesc(
0,
platform::errors::InvalidArgument(
"Tensor shape at dim %d must not be less than 0", idx));
if (desc_shape[idx] >= 0 && tensor_shape[idx] != desc_shape[idx]) {
if (target_shape[idx] >= 0 && tensor_shape[idx] != target_shape[idx]) {
return phi::vectorize<int64_t>(tensor_shape);
}
}
......@@ -102,6 +99,16 @@ static paddle::optional<std::vector<int64_t>> DiffTensorShapeWithVarDesc(
return paddle::none;
}
// Check whether the tensor shape matches the VarDesc shape
// Return the different shape if exists
static paddle::optional<std::vector<int64_t>> DiffTensorShapeWithVarDesc(
const phi::DenseTensor &tensor,
const framework::VarDesc &var_desc,
size_t num_places) {
auto desc_shape = var_desc.GetShape();
return DiffTensorShape(tensor, desc_shape, num_places);
}
static const std::shared_ptr<reader::LoDTensorBlockingQueue> &GetQueue(
const std::shared_ptr<reader::LoDTensorBlockingQueue> &queue, size_t idx) {
return queue;
......@@ -399,6 +406,18 @@ void BindReader(py::module *module) {
}
});
m.def("diff_tensor_shape",
[](const phi::DenseTensor &tensor,
const std::vector<int64_t> &target_shape,
size_t num_places) -> py::object {
auto diff = DiffTensorShape(tensor, target_shape, num_places);
if (diff) {
return py::cast(std::move(diff.get()));
} else {
return py::cast(nullptr);
}
});
m.def(
"init_lod_tensor_blocking_queue",
[](framework::Variable &var,
......
......@@ -155,5 +155,55 @@ PyObject *static_api_full(PyObject *self, PyObject *args, PyObject *kwargs) {
}
}
PyObject *static_api_data(PyObject *self, PyObject *args, PyObject *kwargs) {
try {
VLOG(6) << "Add data op into program";
VLOG(8) << "args count: " << (PyTuple_Size(args) / 2);
// Parse Attributes if needed
PyObject *name_obj = PyTuple_GET_ITEM(args, 0);
std::string name = CastPyArg2String(name_obj, "data", 0);
PyObject *shape_obj = PyTuple_GET_ITEM(args, 1);
paddle::experimental::IntArray shape =
CastPyArg2IntArray(shape_obj, "data", 1);
PyObject *dtype_obj = PyTuple_GET_ITEM(args, 2);
phi::DataType dtype = CastPyArg2DataTypeDirectly(dtype_obj, "data", 2);
PyObject *place_obj = PyTuple_GET_ITEM(args, 3);
paddle::Place place = CastPyArg2Place(place_obj, "data", 3);
// Call ir static api
auto out = paddle::dialect::data(name, shape.GetData(), dtype, place);
return ToPyObject(out);
} catch (...) {
ThrowExceptionToPython(std::current_exception());
return nullptr;
}
}
PyObject *static_api_fetch(PyObject *self, PyObject *args, PyObject *kwargs) {
try {
VLOG(6) << "Add fetch op into program";
VLOG(8) << "args count: " << (PyTuple_Size(args) / 2);
// Get OpResult from args
PyObject *x_obj = PyTuple_GET_ITEM(args, 0);
auto x = CastPyArg2OpResult("fetch", x_obj, 0);
// Parse Attributes if needed
PyObject *name_obj = PyTuple_GET_ITEM(args, 1);
std::string name = CastPyArg2String(name_obj, "fetch", 1);
PyObject *col_obj = PyTuple_GET_ITEM(args, 2);
int col = CastPyArg2Int(col_obj, "fetch", 2);
// Call ir static api
auto out = paddle::dialect::fetch(x, name, col);
return ToPyObject(out);
} catch (...) {
ThrowExceptionToPython(std::current_exception());
return nullptr;
}
}
} // namespace pybind
} // namespace paddle
......@@ -30,6 +30,8 @@ PyObject *static_api_sum(PyObject *self, PyObject *args, PyObject *kwargs);
PyObject *static_api_divide(PyObject *self, PyObject *args, PyObject *kwargs);
PyObject *static_api_concat(PyObject *self, PyObject *args, PyObject *kwargs);
PyObject *static_api_full(PyObject *self, PyObject *args, PyObject *kwargs);
PyObject *static_api_data(PyObject *self, PyObject *args, PyObject *kwargs);
PyObject *static_api_fetch(PyObject *self, PyObject *args, PyObject *kwargs);
} // namespace pybind
} // namespace paddle
......@@ -631,14 +631,14 @@
backward : cumsum_grad
- op : data
args : (int64_t index, DataType dtype, str name, Place place)
args : (str name, IntArray shape, DataType dtype, Place place)
output : Tensor(out)
infer_meta :
func : FeedWithPlaceInferMeta
param : [index, dtype]
func : DataInferMeta
param : [name, shape, dtype]
kernel:
func : data
param : [index, dtype]
param : [name, shape, dtype]
data_type : dtype
backend : place
......
......@@ -60,6 +60,15 @@ void CreateInferMetaBase(const std::vector<int64_t>& shape,
out->set_layout(layout);
}
void DataInferMeta(const std::string& name,
const phi::IntArray& shape,
phi::DataType data_type,
MetaTensor* out) {
auto out_dims = phi::make_ddim(shape.GetData());
out->set_dims(out_dims);
out->set_dtype(data_type);
}
void EyeInferMeta(const Scalar& num_rows,
const Scalar& num_columns,
DataType dtype,
......@@ -82,10 +91,6 @@ void EyeInferMeta(const Scalar& num_rows,
out->set_dtype(dtype);
}
void FeedWithPlaceInferMeta(int64_t index,
phi::DataType data_type,
MetaTensor* out) {}
void GaussianInferMeta(const IntArray& shape,
float mean,
float std,
......
......@@ -46,16 +46,17 @@ void CreateInferMetaBase(const std::vector<int64_t>& shape,
DataLayout layout,
MetaTensor* out);
void DataInferMeta(const std::string& name,
const phi::IntArray& shape,
phi::DataType data_type,
MetaTensor* out);
void EyeInferMeta(const Scalar& num_rows,
const Scalar& num_columns,
DataType dtype,
MetaTensor* out,
MetaConfig config = MetaConfig());
void FeedWithPlaceInferMeta(int64_t index,
phi::DataType data_type,
MetaTensor* out);
void GaussianInferMeta(const IntArray& shape,
float mean,
float std,
......
......@@ -24,7 +24,8 @@ namespace phi {
template <typename T, typename Context>
void DataKernel(const Context& ctx,
int64_t index,
const std::string& name,
const phi::IntArray& shape,
phi::DataType data_type,
DenseTensor* out) {}
......
......@@ -14,15 +14,16 @@
#pragma once
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void DataKernel(const Context& ctx,
int64_t index,
const std::string& name,
const phi::IntArray& shape,
phi::DataType data_type,
// std::string name,
DenseTensor* out);
template <typename T, typename Context>
......
......@@ -449,6 +449,11 @@ if is_compiled_with_cinn():
os.environ.setdefault('runtime_include_dir', runtime_include_dir)
disable_static()
from .new_ir_utils import _switch_to_new_ir # noqa: F401
_switch_to_new_ir()
__all__ = [ # noqa
'iinfo',
'finfo',
......
......@@ -18,15 +18,25 @@ import multiprocessing
import sys
import warnings
import numpy as np
from . import set_flags, get_flags
from .framework import Program, default_main_program
from ..ir import core as ir_core
from ..ir import OpResult
from .wrapped_decorator import signature_safe_contextmanager
from .data_feeder import convert_dtype
from .framework import Program, default_main_program, Variable, Operator
from .framework import convert_np_dtype_to_dtype_, _apply_pass
from .framework import Variable, Operator
from .framework import (
convert_np_dtype_to_dtype_,
_apply_pass,
paddle_type_to_proto_type,
)
from . import core
from . import unique_name
from . import compiler
from . import set_flags, get_flags
from .trainer_factory import TrainerFactory
from .trainer_factory import FetchHandlerMonitor
import copy
......@@ -260,6 +270,55 @@ def check_feed_shape_type(var, feed, num_places=1):
return True
def new_ir_check_feed_shape_type(feed, name, target_shape, dtype, num_places=1):
"""
Returns True if the variable doesn't require feed check or it is compatible
with the shape and have same dtype as the fed value.
A dimension is compatible with the other if:
1. The length of the dimensions are same.
2. Each non-negative number of the two dimensions are same.
3. For negative number or 'None' in a dimension, it means unknown so it
is compatible with any number.
Args:
feed (LoDTensor): the fed value, which must be a LoDTensor
name (str): name of the variable
target_shape (list): the shape that will be compared with feed
dtype (core.VarDesc.VarType): the dtype that will be compared with feed
num_places: an integer value indicating the number of places.
ParallelExecutor will divide data into devices (CPU/GPU) evenly.
Returns:
True if the shape and dtype of variable is compatible with the feed value
Raises:
ValueError: if the shape or dtype of the variable is not compatible with
the feed value
"""
diff_shape = core.diff_tensor_shape(feed, target_shape, num_places)
if diff_shape is not None:
raise ValueError(
'The fed Variable %r should have dimensions = %d, shape = '
'%r, but received fed shape %r on each device'
% (name, len(target_shape), target_shape, diff_shape)
)
if not dtype_is_compatible_with(feed._dtype(), dtype):
var_dtype_format = (
convert_dtype(dtype)
if isinstance(dtype, core.VarDesc.VarType)
else dtype
)
feed_dtype_format = (
convert_dtype(feed._dtype())
if isinstance(feed._dtype(), core.VarDesc.VarType)
else feed._dtype()
)
raise ValueError(
'The data type of fed Variable %r must be %r, but received %r'
% (name, var_dtype_format, feed_dtype_format)
)
return True
def has_feed_operators(block, feed_targets, feed_holder_name):
"""Check whether the block already has feed operators.
......@@ -349,6 +408,43 @@ def has_fetch_operators(
return fetch_count > 0
def has_fetch_operations(
block, fetch_targets, fetch_holder_name, fetch_op='pd.fetch'
):
"""Check whether the block already has fetch operation.
Return false if the block does not have any fetch operation.
If some fetch operation have been appended to the block, check that
the info contained in these fetch operation matches the fetch_targets.
Raise exception when any mismatch is found.
Return true when the block has fetch operation with matching info.
Args:
block: a block instance (typically global block of a program)
fetch_targets: a list of fetch_target_data
fetch_op: the operator name of fetch
Return:
A boolean value that indicates whether a block has fetch operators
that match the info contained in fetch_targets.
"""
fetch_count = 0
for op in block.ops:
if op.name() == fetch_op:
fetch_count += 1
if op.operand_source() not in fetch_targets:
raise Exception(
"There is a fetch op in Program which will fetch variable that is not belong to fetch_targets."
)
if fetch_count > 0 and fetch_count != len(fetch_targets):
raise Exception(
"Fetch operations in program do not match 'fetch_targets'"
)
return fetch_count > 0
def _add_feed_fetch_ops(
program, feed, fetch_list, feed_var_name, fetch_var_name, use_fetch_v2=False
):
......@@ -414,6 +510,21 @@ def _add_feed_fetch_ops(
return tmp_program
def _add_new_ir_fetch_ops(program, fetch_list, fetch_var_name):
import paddle
global_block = program.block()
fetch_op = "pd.fetch"
if not has_fetch_operations(
global_block, fetch_list, fetch_var_name, fetch_op
):
for i, fetch_input in enumerate(fetch_list):
assert isinstance(
fetch_input, OpResult
), "Wrong type for fetch_list[%s]: %s" % (i, type(fetch_input))
paddle._ir_ops.fetch(fetch_input, fetch_var_name + str(i), i)
def _set_micro_batch_fetch(plan):
if plan.micro_batch_num() <= 1:
return
......@@ -636,6 +747,28 @@ def _as_lodtensor(data, place, dtype=None):
return tensor
def _can_use_interpreter_core(program, place):
compiled = isinstance(program, compiler.CompiledProgram) or isinstance(
program._graph, compiler.CompiledProgram
)
if compiled:
compiled_program = (
program
if isinstance(program, compiler.CompiledProgram)
else program._graph
)
# Unsupported case 1: inference
if compiled_program._is_inference:
warnings.warn(
"Standalone executor is not used for inference",
UserWarning,
)
return False
return True
class FetchHandler:
def __init__(self, var_dict=None, period_secs=60):
assert var_dict is not None
......@@ -899,6 +1032,29 @@ class _ExecutorCache:
new_exe = _StandaloneExecutor(place, plan, scope)
return new_program, new_exe
def get_new_ir_program_and_executor(
self,
program,
feed,
fetch_list,
feed_var_name,
fetch_var_name,
place,
scope,
):
_add_new_ir_fetch_ops(
program, fetch_list=fetch_list, fetch_var_name=fetch_var_name
)
default_job = core.Job("default")
type_to_program = {"default": program}
plan = core.Plan([default_job], type_to_program)
_set_micro_batch_fetch(plan)
new_exe = _StandaloneExecutor(place, plan, scope)
return program, new_exe
class Executor:
"""
......@@ -1081,6 +1237,25 @@ class Executor:
else:
break
def _new_ir_feed_data(self, program, feed, scope):
# feed var to framework
global_block = program.block()
for op in global_block.ops:
if op.name() == 'pd.data':
feed_target_name = op.attrs()["name"]
var_type = paddle_type_to_proto_type[op.attrs()["dtype"]]
var_shape = op.attrs()["shape"]
cur_feed = feed[feed_target_name]
if not isinstance(cur_feed, core.LoDTensor):
cur_feed = _as_lodtensor(cur_feed, self.place, var_type)
new_ir_check_feed_shape_type(
cur_feed, feed_target_name, var_shape, var_type
)
# the last arg of set_feed_variable has no effect in new ir, we pass 0 by default.
core.set_feed_variable(scope, cur_feed, feed_target_name, 0)
else:
break
def _fetch_data(self, fetch_list, fetch_var_name, scope):
outs = [
core.get_fetch_variable(scope, fetch_var_name, i)
......@@ -1438,19 +1613,29 @@ class Executor:
'true',
]
self._log_force_set_program_cache(use_program_cache)
res = self._run_impl(
program=program,
feed=feed,
fetch_list=fetch_list,
feed_var_name=feed_var_name,
fetch_var_name=fetch_var_name,
scope=scope,
return_numpy=return_numpy,
use_program_cache=use_program_cache,
use_prune=use_prune,
)
core.update_autotune_status()
if ir_core._use_new_ir_api():
res = self._run_new_ir_impl(
program=program,
feed=feed,
fetch_list=fetch_list,
feed_var_name=feed_var_name,
fetch_var_name=fetch_var_name,
scope=scope,
return_numpy=return_numpy,
)
else:
res = self._run_impl(
program=program,
feed=feed,
fetch_list=fetch_list,
feed_var_name=feed_var_name,
fetch_var_name=fetch_var_name,
scope=scope,
return_numpy=return_numpy,
use_program_cache=use_program_cache,
use_prune=use_prune,
)
core.update_autotune_status()
return res
def _run_impl(
......@@ -1580,27 +1765,6 @@ class Executor:
feed = self._update_feed(pruned_program, feed)
program = pruned_program
def _can_use_interpreter_core(program, place):
compiled = isinstance(
program, compiler.CompiledProgram
) or isinstance(program._graph, compiler.CompiledProgram)
if compiled:
compiled_program = (
program
if isinstance(program, compiler.CompiledProgram)
else program._graph
)
# Unsupported case 1: inference
if compiled_program._is_inference:
warnings.warn(
"Standalone executor is not used for inference",
UserWarning,
)
return False
return True
if _can_use_interpreter_core(program, self.place):
if feed is None:
feed = {}
......@@ -1708,11 +1872,80 @@ class Executor:
), f"Program must have _is_inference = True, but get {program._is_inference}"
return self._run_inference(program._executor, feed)
def _run_new_ir_impl(
self,
program,
feed,
fetch_list,
feed_var_name,
fetch_var_name,
scope,
return_numpy,
):
import paddle
Program = paddle.ir.Program
default_main_program = paddle.ir.core.default_main_program
if self._closed:
raise RuntimeError("Attempted to use a closed Executor")
use_default_main_program = program is None
if use_default_main_program:
program = default_main_program()
fetch_list = self._check_fetch_list(fetch_list)
if isinstance(program, Program) and len(program.block().ops) == 0:
if use_default_main_program:
error_info = (
"Now you are using default_main_program, "
"but there are no operators in the program to be executed. "
"Please ensure you create model correctly or you can pass "
"the Program or the CompiledProgram manually."
)
else:
error_info = (
"There are no operators in the program to be executed. "
"If you pass Program manually, please use fluid.program_guard "
"to ensure the current Program is being used."
)
warnings.warn(error_info)
if scope is None:
scope = global_scope()
if feed is None:
feed = {}
elif isinstance(feed, (list, tuple)):
assert len(feed) == 1, "Not compiled with data parallel"
feed = feed[0]
if not isinstance(feed, dict):
raise TypeError(
"feed requires dict as its Parameter. But you passed in %s"
% (type(feed))
)
program, new_exe = self._executor_cache.get_new_ir_program_and_executor(
program,
feed,
fetch_list,
feed_var_name,
fetch_var_name,
self.place,
scope,
)
self._new_ir_feed_data(program, feed, scope)
ret = new_exe.run(list(feed.keys()), return_numpy)
return ret
def _run_inference(self, exe, feed):
return exe.run(feed)
def _check_fetch_list(self, fetch_list):
is_fetch_var = lambda var: isinstance(var, (Variable, str))
is_fetch_var = lambda var: isinstance(var, (Variable, str, OpResult))
is_tuple_list = lambda var: isinstance(var, (tuple, list))
if fetch_list is None:
......
......@@ -35,6 +35,7 @@ from .proto import framework_pb2, data_feed_pb2
from . import core
from . import unique_name
from .. import ir
from paddle.fluid.libpaddle import DataType
import paddle.version as fluid_version
import warnings
import functools
......@@ -157,6 +158,22 @@ extra_op_attrs = {
"unique": ["is_sorted"],
}
paddle_type_to_proto_type = {
DataType.BOOL: core.VarDesc.VarType.BOOL,
DataType.FLOAT16: core.VarDesc.VarType.FP16,
DataType.UINT16: core.VarDesc.VarType.BF16,
DataType.FLOAT32: core.VarDesc.VarType.FP32,
DataType.FLOAT64: core.VarDesc.VarType.FP64,
DataType.INT8: core.VarDesc.VarType.INT8,
DataType.INT16: core.VarDesc.VarType.INT16,
DataType.INT32: core.VarDesc.VarType.INT32,
DataType.INT64: core.VarDesc.VarType.INT64,
DataType.UINT8: core.VarDesc.VarType.UINT8,
DataType.COMPLEX64: core.VarDesc.VarType.COMPLEX64,
DataType.COMPLEX128: core.VarDesc.VarType.COMPLEX128,
}
# FIXME(dev): We haven't fully verified eager mode on XPU et.al but
# only GPU/CPU. Remove this after we improve this feature.
_is_first_import_ = True
......@@ -1015,40 +1032,34 @@ def convert_np_dtype_to_dtype_(np_dtype):
else:
dtype = np.dtype(np_dtype)
if ir.core._use_new_ir_api():
if dtype in ir.core.np_type_to_paddle_type.keys():
return ir.core.np_type_to_paddle_type[dtype]
else:
raise ValueError("Not supported numpy dtype %s" % dtype)
if dtype == np.float32:
return core.VarDesc.VarType.FP32
elif dtype == np.float64:
return core.VarDesc.VarType.FP64
elif dtype == np.float16:
return core.VarDesc.VarType.FP16
elif dtype == np.int32:
return core.VarDesc.VarType.INT32
elif dtype == np.int16:
return core.VarDesc.VarType.INT16
elif dtype == np.int64:
return core.VarDesc.VarType.INT64
elif dtype == np.bool_:
return core.VarDesc.VarType.BOOL
elif dtype == np.uint16:
# since there is still no support for bfloat16 in NumPy,
# uint16 is used for casting bfloat16
return core.VarDesc.VarType.BF16
elif dtype == np.uint8:
return core.VarDesc.VarType.UINT8
elif dtype == np.int8:
return core.VarDesc.VarType.INT8
elif dtype == np.complex64:
return core.VarDesc.VarType.COMPLEX64
elif dtype == np.complex128:
return core.VarDesc.VarType.COMPLEX128
else:
if dtype == np.float32:
return core.VarDesc.VarType.FP32
elif dtype == np.float64:
return core.VarDesc.VarType.FP64
elif dtype == np.float16:
return core.VarDesc.VarType.FP16
elif dtype == np.int32:
return core.VarDesc.VarType.INT32
elif dtype == np.int16:
return core.VarDesc.VarType.INT16
elif dtype == np.int64:
return core.VarDesc.VarType.INT64
elif dtype == np.bool_:
return core.VarDesc.VarType.BOOL
elif dtype == np.uint16:
# since there is still no support for bfloat16 in NumPy,
# uint16 is used for casting bfloat16
return core.VarDesc.VarType.BF16
elif dtype == np.uint8:
return core.VarDesc.VarType.UINT8
elif dtype == np.int8:
return core.VarDesc.VarType.INT8
elif dtype == np.complex64:
return core.VarDesc.VarType.COMPLEX64
elif dtype == np.complex128:
return core.VarDesc.VarType.COMPLEX128
else:
raise ValueError("Not supported numpy dtype %s" % dtype)
raise ValueError("Not supported numpy dtype %s" % dtype)
def dtype_is_floating(dtype):
......
......@@ -15,7 +15,6 @@
import numpy as np
import paddle
from paddle.fluid.libpaddle import DataType
from paddle.fluid.libpaddle.ir import Program, set_global_program
......@@ -37,6 +36,30 @@ np_type_to_paddle_type = {
}
def convert_np_dtype_to_dtype_(np_dtype):
"""
Convert the data type in numpy to the data type in Paddle.
Args:
np_dtype (np.dtype|str): The data type in numpy or valid data type
string.
Returns:
core.DataType : The data type in Paddle.
"""
# Convert the data type string to numpy data type.
if isinstance(np_dtype, str) and np_dtype == "bfloat16":
dtype = np.uint16
else:
dtype = np.dtype(np_dtype)
if dtype in np_type_to_paddle_type.keys():
return np_type_to_paddle_type[dtype]
else:
raise ValueError("Not supported numpy dtype %s" % dtype)
def _use_new_ir_api():
"""
This API checks whether paddle use new ir api.
......@@ -45,6 +68,9 @@ def _use_new_ir_api():
bool: Whether paddle use new ir api.
"""
# TODO(YuanRisheng): need move import to the top of this file after break import circle
import paddle
if paddle.framework.get_flags("FLAGS_enable_new_ir_api")[
'FLAGS_enable_new_ir_api'
]:
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
def _switch_to_new_ir():
if paddle.ir.core._use_new_ir_api():
paddle.framework.set_flags({"FLAGS_enable_new_ir_in_executor": True})
paddle.static.Program = paddle.ir.Program
paddle.fluid.Program = paddle.ir.Program
paddle.fluid.program_guard = paddle.ir.core.program_guard
paddle.static.program_guard = paddle.ir.core.program_guard
paddle.framework.default_main_program = (
paddle.ir.core.default_main_program
)
......@@ -76,14 +76,8 @@ from .nn.metric import auc # noqa: F401
from .nn.metric import accuracy # noqa: F401
from .nn.metric import ctr_metric_bundle # noqa: F401
import paddle
if paddle.ir.core._use_new_ir_api():
from ..ir import Program # noqa: F401
from ..ir.core import program_guard # noqa: F401
else:
from ..fluid.framework import program_guard # noqa: F401
from ..fluid.framework import Program # noqa: F401
from ..fluid.framework import program_guard # noqa: F401
from ..fluid.framework import Program # noqa: F401
__all__ = [ # noqa
'append_backward',
......
......@@ -112,6 +112,7 @@ def data(name, shape, dtype=None, lod_level=0):
is_data=True,
need_check_feed=True,
)
else:
out = helper.create_global_variable(
name=name,
......@@ -123,22 +124,29 @@ def data(name, shape, dtype=None, lod_level=0):
is_data=True,
need_check_feed=True,
)
dtype = paddle.get_default_dtype()
is_new_ir_mode = os.environ.get("FLAGS_enable_new_ir_in_executor", None)
if evaluate_flag(is_new_ir_mode):
helper = LayerHelper('data', **locals())
helper.append_op(
type='data',
inputs={},
outputs={'out': out},
attrs={
'index': 0,
'dtype': 0,
'place': 0,
'name': name,
},
)
return out
if paddle.ir.core._use_new_ir_api():
ir_dtype = paddle.ir.core.convert_np_dtype_to_dtype_(dtype)
return paddle._ir_ops.data(name, shape, ir_dtype, core.Place())
else:
is_new_ir_mode = os.environ.get("FLAGS_enable_new_ir_in_executor", None)
if evaluate_flag(is_new_ir_mode):
helper = LayerHelper('data', **locals())
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
helper.append_op(
type='data',
inputs={},
outputs={'out': out},
attrs={
'shape': shape,
'dtype': dtype,
'place': 0,
'name': name,
},
)
return out
class InputSpec:
......
......@@ -897,7 +897,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
shape = paddle.utils.convert_shape_to_list(shape)
if not isinstance(dtype, core.DataType):
dtype = convert_np_dtype_to_dtype_(dtype)
dtype = paddle.ir.core.convert_np_dtype_to_dtype_(dtype)
if out is None:
out = paddle._ir_ops.full(shape, float(value), dtype, place)
......
......@@ -4,7 +4,15 @@ file(
"test_*.py")
string(REPLACE ".py" "" TEST_INTERP_CASES "${TEST_INTERP_CASES}")
set(TEST_IR_SYSTEM_CASES test_build_model)
list(REMOVE_ITEM TEST_INTERP_CASES ${TEST_IR_SYSTEM_CASES})
foreach(target ${TEST_INTERP_CASES})
py_test_modules(${target} MODULES ${target} ENVS GLOG_v=1
FLAGS_enable_new_ir_in_executor=true)
endforeach()
foreach(target ${TEST_IR_SYSTEM_CASES})
py_test_modules(${target} MODULES ${target} ENVS GLOG_v=1
FLAGS_enable_new_ir_api=true)
endforeach()
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
paddle.enable_static()
class TestBuildModule(unittest.TestCase):
def test_basic_network(self):
main_program = paddle.static.Program()
with paddle.static.program_guard(main_program):
x = paddle.static.data('x', [4, 4], dtype='float32')
y = paddle.static.data('y', [4, 4], dtype='float32')
divide_out = paddle.divide(x, y)
sum_out = paddle.sum(divide_out)
exe = paddle.static.Executor()
x_feed = np.ones([4, 4], dtype=np.float32) * 10
y_feed = np.ones([4, 4], dtype=np.float32) * 2
(sum_value,) = exe.run(
feed={'x': x_feed, 'y': y_feed}, fetch_list=[sum_out]
)
self.assertEqual(sum_value, 5 * 4 * 4)
if __name__ == "__main__":
unittest.main()
......@@ -27,7 +27,7 @@ def data():
inputs={},
outputs={'out': out},
attrs={
'index': 0,
'shape': [1, 1],
'dtype': 0,
'place': 0,
'name': "x",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册