From 2bbc0d7d60bbe8bfc1aef18fe8b7e4490db1b3a7 Mon Sep 17 00:00:00 2001 From: flame Date: Wed, 25 Dec 2019 08:58:19 +0800 Subject: [PATCH] python zero copy inference, delete pass (#21897) * python zero copy inference * support delete inference pass --- paddle/fluid/pybind/inference_api.cc | 181 ++++++++++++++++++++++++--- 1 file changed, 165 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 1b334f3350..57547f03d6 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -16,28 +16,35 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include #include #include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/paddle_pass_builder.h" namespace py = pybind11; namespace paddle { namespace pybind { -using paddle::PaddleDType; +using paddle::AnalysisPredictor; +using paddle::NativeConfig; +using paddle::NativePaddlePredictor; using paddle::PaddleBuf; -using paddle::PaddleTensor; +using paddle::PaddleDType; +using paddle::PaddlePassBuilder; using paddle::PaddlePlace; using paddle::PaddlePredictor; -using paddle::NativeConfig; -using paddle::NativePaddlePredictor; -using paddle::AnalysisPredictor; +using paddle::PaddleTensor; +using paddle::PassStrategy; +using paddle::ZeroCopyTensor; namespace { void BindPaddleDType(py::module *m); @@ -49,6 +56,8 @@ void BindNativeConfig(py::module *m); void BindNativePredictor(py::module *m); void BindAnalysisConfig(py::module *m); void BindAnalysisPredictor(py::module *m); +void BindZeroCopyTensor(py::module *m); +void BindPaddlePassBuilder(py::module *m); #ifdef PADDLE_WITH_MKLDNN void BindMkldnnQuantizerConfig(py::module *m); @@ -57,7 +66,7 @@ void BindMkldnnQuantizerConfig(py::module *m); template PaddleBuf PaddleBufCreate(py::array_t data) { PaddleBuf buf(data.size() * sizeof(T)); - std::copy_n(static_cast(data.mutable_data()), data.size(), + std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); return buf; } @@ -65,25 +74,25 @@ PaddleBuf PaddleBufCreate(py::array_t data) { template void PaddleBufReset(PaddleBuf &buf, py::array_t data) { // NOLINT buf.Resize(data.size() * sizeof(T)); - std::copy_n(static_cast(data.mutable_data()), data.size(), + std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); } template -PaddleDType PaddleTensorGetDType(); +constexpr PaddleDType PaddleTensorGetDType(); template <> -PaddleDType PaddleTensorGetDType() { +constexpr PaddleDType PaddleTensorGetDType() { return PaddleDType::INT32; } template <> -PaddleDType PaddleTensorGetDType() { +constexpr PaddleDType PaddleTensorGetDType() { return PaddleDType::INT64; } template <> -PaddleDType PaddleTensorGetDType() { +constexpr PaddleDType PaddleTensorGetDType() { return PaddleDType::FLOAT32; } @@ -95,7 +104,7 @@ PaddleTensor PaddleTensorCreate( if (copy) { PaddleBuf buf(data.size() * sizeof(T)); - std::copy_n(static_cast(data.mutable_data()), data.size(), + std::copy_n(static_cast(data.data()), data.size(), static_cast(buf.data())); tensor.data = std::move(buf); } else { @@ -111,9 +120,9 @@ PaddleTensor PaddleTensorCreate( return tensor; } -py::array PaddleTensorGetData(PaddleTensor &tensor) { // NOLINT +py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { py::dtype dt; - switch (tensor.dtype) { + switch (dtype) { case PaddleDType::INT32: dt = py::dtype::of(); break; @@ -126,7 +135,62 @@ py::array PaddleTensorGetData(PaddleTensor &tensor) { // NOLINT default: LOG(FATAL) << "unsupported dtype"; } - return py::array(dt, {tensor.shape}, tensor.data.data()); + + return dt; +} + +py::array PaddleTensorGetData(PaddleTensor &tensor) { // NOLINT + py::dtype dt = PaddleDTypeToNumpyDType(tensor.dtype); + return py::array(std::move(dt), {tensor.shape}, tensor.data.data()); +} + +template +void ZeroCopyTensorCreate(ZeroCopyTensor &tensor, // NOLINT + py::array_t data) { + std::vector shape; + std::copy_n(data.shape(), data.ndim(), std::back_inserter(shape)); + tensor.Reshape(std::move(shape)); + tensor.copy_from_cpu(static_cast(data.data())); +} + +size_t PaddleGetDTypeSize(PaddleDType dt) { + size_t size{0}; + switch (dt) { + case PaddleDType::INT32: + size = sizeof(int32_t); + break; + case PaddleDType::INT64: + size = sizeof(int64_t); + break; + case PaddleDType::FLOAT32: + size = sizeof(float); + break; + default: + LOG(FATAL) << "unsupported dtype"; + } + return size; +} + +py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT + py::dtype dt = PaddleDTypeToNumpyDType(tensor.type()); + auto tensor_shape = tensor.shape(); + py::array::ShapeContainer shape(tensor_shape.begin(), tensor_shape.end()); + py::array array(dt, std::move(shape)); + + switch (tensor.type()) { + case PaddleDType::INT32: + tensor.copy_to_cpu(static_cast(array.mutable_data())); + break; + case PaddleDType::INT64: + tensor.copy_to_cpu(static_cast(array.mutable_data())); + break; + case PaddleDType::FLOAT32: + tensor.copy_to_cpu(static_cast(array.mutable_data())); + break; + default: + LOG(FATAL) << "unsupported dtype"; + } + return array; } } // namespace @@ -140,6 +204,8 @@ void BindInferenceApi(py::module *m) { BindNativePredictor(m); BindAnalysisConfig(m); BindAnalysisPredictor(m); + BindZeroCopyTensor(m); + BindPaddlePassBuilder(m); #ifdef PADDLE_WITH_MKLDNN BindMkldnnQuantizerConfig(m); #endif @@ -258,8 +324,11 @@ void BindPaddlePredictor(py::module *m) { }) .def("get_input_tensor", &PaddlePredictor::GetInputTensor) .def("get_output_tensor", &PaddlePredictor::GetOutputTensor) + .def("get_input_names", &PaddlePredictor::GetInputNames) + .def("get_output_names", &PaddlePredictor::GetOutputNames) .def("zero_copy_run", &PaddlePredictor::ZeroCopyRun) - .def("clone", &PaddlePredictor::Clone); + .def("clone", &PaddlePredictor::Clone) + .def("get_serialized_program", &PaddlePredictor::GetSerializedProgram); auto config = py::class_(paddle_predictor, "Config"); config.def(py::init<>()) @@ -339,6 +408,7 @@ void BindAnalysisConfig(py::module *m) { .def("enable_memory_optim", &AnalysisConfig::EnableMemoryOptim) .def("enable_profile", &AnalysisConfig::EnableProfile) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) + .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("switch_use_feed_fetch_ops", &AnalysisConfig::SwitchUseFeedFetchOps, py::arg("x") = true) @@ -379,6 +449,10 @@ void BindAnalysisConfig(py::module *m) { .def("set_mkldnn_op", &AnalysisConfig::SetMKLDNNOp) .def("set_model_buffer", &AnalysisConfig::SetModelBuffer) .def("model_from_memory", &AnalysisConfig::model_from_memory) + .def("delete_pass", + [](AnalysisConfig &self, const std::string &pass) { + self.pass_builder()->DeletePass(pass); + }) .def("pass_builder", &AnalysisConfig::pass_builder, py::return_value_policy::reference); } @@ -418,13 +492,88 @@ void BindAnalysisPredictor(py::module *m) { }) .def("get_input_tensor", &AnalysisPredictor::GetInputTensor) .def("get_output_tensor", &AnalysisPredictor::GetOutputTensor) + .def("get_input_names", &AnalysisPredictor::GetInputNames) + .def("get_output_names", &AnalysisPredictor::GetOutputNames) + .def("get_input_tensor_shape", &AnalysisPredictor::GetInputTensorShape) .def("zero_copy_run", &AnalysisPredictor::ZeroCopyRun) + .def("create_feed_fetch_var", &AnalysisPredictor::CreateFeedFetchVar) + .def("prepare_feed_fetch", &AnalysisPredictor::PrepareFeedFetch) + .def("prepare_argument", &AnalysisPredictor::PrepareArgument) + .def("optimize_inference_program", + &AnalysisPredictor::OptimizeInferenceProgram) + .def("analysis_argument", &AnalysisPredictor::analysis_argument, + py::return_value_policy::reference) .def("clone", &AnalysisPredictor::Clone) .def("scope", &AnalysisPredictor::scope, py::return_value_policy::reference) + .def("program", &AnalysisPredictor::program, + py::return_value_policy::reference) + .def("get_serialized_program", &AnalysisPredictor::GetSerializedProgram) + .def("mkldnn_quantize", &AnalysisPredictor::MkldnnQuantize) .def("SaveOptimModel", &AnalysisPredictor::SaveOptimModel, py::arg("dir")); } + +void BindZeroCopyTensor(py::module *m) { + py::class_(*m, "ZeroCopyTensor") + .def("reshape", &ZeroCopyTensor::Reshape) + .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_to_cpu", &ZeroCopyTensorToNumpy) + .def("shape", &ZeroCopyTensor::shape) + .def("set_lod", &ZeroCopyTensor::SetLoD) + .def("lod", &ZeroCopyTensor::lod) + .def("type", &ZeroCopyTensor::type); +} + +void BindPaddlePassBuilder(py::module *m) { + py::class_(*m, "PaddlePassBuilder") + .def(py::init &>()) + .def("set_passes", + [](PaddlePassBuilder &self, const std::vector &passes) { + self.ClearPasses(); + for (auto pass : passes) { + self.AppendPass(std::move(pass)); + } + }) + .def("append_pass", &PaddlePassBuilder::AppendPass) + .def("insert_pass", &PaddlePassBuilder::InsertPass) + .def("delete_pass", + [](PaddlePassBuilder &self, const std::string &pass_type) { + self.DeletePass(pass_type); + }) + .def("append_analysis_pass", &PaddlePassBuilder::AppendAnalysisPass) + .def("turn_on_debug", &PaddlePassBuilder::TurnOnDebug) + .def("debug_string", &PaddlePassBuilder::DebugString) + .def("all_passes", &PaddlePassBuilder::AllPasses, + py::return_value_policy::reference) + .def("analysis_passes", &PaddlePassBuilder::AnalysisPasses); + + py::class_(*m, "PassStrategy") + .def(py::init &>()) + .def("enable_cudnn", &PassStrategy::EnableCUDNN) + .def("enable_mkldnn", &PassStrategy::EnableMKLDNN) + .def("enable_ngraph", &PassStrategy::EnableNgraph) + .def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer) + .def("use_gpu", &PassStrategy::use_gpu); + + py::class_(*m, "CpuPassStrategy") + .def(py::init<>()) + .def(py::init()) + .def("enable_cudnn", &CpuPassStrategy::EnableCUDNN) + .def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN) + .def("enable_ngraph", &CpuPassStrategy::EnableNgraph) + .def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer); + + py::class_(*m, "GpuPassStrategy") + .def(py::init<>()) + .def(py::init()) + .def("enable_cudnn", &GpuPassStrategy::EnableCUDNN) + .def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN) + .def("enable_ngraph", &GpuPassStrategy::EnableNgraph) + .def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer); +} } // namespace } // namespace pybind } // namespace paddle -- GitLab