未验证 提交 10fd4a95 编写于 作者: Y Yuanle Liu 提交者: GitHub

[Paddle Inference] Predictor support paddle::Tensor (#50445)

上级 259b0aad
......@@ -17,24 +17,21 @@ set(PADDLE_INFERENCE_INSTALL_DIR
function(phi_header_path_compat TARGET_PATH)
message(STATUS "phi header path compat processing: ${TARGET_PATH}")
string(FIND ${TARGET_PATH} "experimental" pos)
if(pos GREATER 1)
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
if(${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/"
HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/fluid/platform/"
"paddle/include/experimental/phi/" HEADER_CONTENT
"${HEADER_CONTENT}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/"
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
endif()
endforeach()
endif()
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
if(${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/"
HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/fluid/platform/"
"paddle/include/experimental/phi/" HEADER_CONTENT
"${HEADER_CONTENT}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/"
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
endif()
endforeach()
endfunction()
phi_header_path_compat(
......@@ -51,6 +48,7 @@ phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core)
phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/)
# In order to be compatible with the original behavior, the header file name needs to be changed
file(RENAME
......
......@@ -95,7 +95,7 @@ phi::DenseTensor& GetVariableTensor(const Scope& scope,
PADDLE_ENFORCE_EQ(var->IsType<phi::DenseTensor>(),
true,
platform::errors::InvalidArgument(
"Only support lod tensor in GetVariableTensor now."));
"Only support DenseTensor in GetVariableTensor now."));
return *var->GetMutable<phi::DenseTensor>();
}
......
......@@ -155,11 +155,10 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
return phi::Backend::CPU;
}
}
} // namespace
bool PaddleTensorToLoDTensor(const PaddleTensor &pt,
phi::DenseTensor *t,
const platform::Place &place) {
bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
phi::DenseTensor *t,
const platform::Place &place) {
framework::DDim ddim = phi::make_ddim(pt.shape);
void *input_ptr;
if (pt.dtype == PaddleDType::INT64) {
......@@ -270,6 +269,7 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt,
t->set_lod(lod);
return true;
}
} // namespace
bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope,
......@@ -919,6 +919,17 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#endif
}
void AnalysisPredictor::MkldnnPreSet(
const std::vector<paddle::Tensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
std::vector<std::vector<int>> inputs_shape;
for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(phi::vectorize<int>(inputs[i].dims()));
}
MkldnnPreSet(inputs_shape);
#endif
}
void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
......@@ -1033,6 +1044,70 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
return true;
}
bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
std::vector<paddle::Tensor> *outputs) {
inference::DisplayMemoryInfo(place_, "before run");
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPreSet(inputs);
#endif
VLOG(3) << "predict start";
// set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(
scope,
platform::errors::PreconditionNotMet("The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
}
#ifdef PADDLE_WITH_TENSORRT
if (config_.tensorrt_engine_enabled()) {
inference::tensorrt::TensorRTEngine::predictor_id_per_thread =
predictor_id_;
VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: "
<< inference::tensorrt::TensorRTEngine::predictor_id_per_thread;
}
#endif
// Run the inference program
// if share variables, we need not create variables
executor_->Run();
inference::DisplayMemoryInfo(place_, "after run");
// get fetch variable
if (!GetFetch(outputs, scope)) {
LOG(ERROR) << "fail to get fetches";
return false;
}
// All the containers in the scope will be hold in inference, but the
// operators assume that the container will be reset after each batch.
// Here is a bugfix, collect all the container variables, and reset then to a
// bool; the next time, the operator will call MutableData and construct a new
// container again, so that the container will be empty for each batch.
if (sub_scope_) {
tensor_array_batch_cleaner_.CollectNoTensorVars(sub_scope_);
}
tensor_array_batch_cleaner_.ResetNoTensorVars();
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers();
#endif
return true;
}
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
......@@ -1047,7 +1122,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
for (size_t i = 0; i < inputs.size(); ++i) {
phi::DenseTensor *input = &feed_tensors_[i];
if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) {
if (!PaddleTensorToDenseTensor(inputs[i], input, place_)) {
return false;
}
int idx = -1;
......@@ -1061,7 +1136,41 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
} else {
idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col"));
}
framework::SetFeedVariable(scope, *input, "feed", idx);
framework::SetFeedVariable(scope, *input, framework::kFeedOpType, idx);
}
return true;
}
bool AnalysisPredictor::SetFeed(const std::vector<paddle::Tensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
PADDLE_ENFORCE_EQ(inputs.size(),
feeds_.size(),
platform::errors::InvalidArgument(
"wrong feed input size, need %d but get %d.",
feeds_.size(),
inputs.size()));
for (size_t i = 0; i < inputs.size(); ++i) {
PADDLE_ENFORCE_EQ(inputs[i].initialized(),
true,
paddle::platform::errors::InvalidArgument(
"The input Tensor expected to be initialized."));
}
if (std::all_of(inputs.cbegin(), inputs.cend(), [&](const paddle::Tensor &t) {
return !t.name().empty() && feed_names_.count(t.name());
})) {
for (size_t i = 0; i < inputs.size(); ++i) {
auto &t = framework::GetVariableTensor(*scope, inputs[i].name());
t.ShareDataWith(
*std::dynamic_pointer_cast<phi::DenseTensor>(inputs[i].impl()));
}
} else {
for (size_t i = 0; i < inputs.size(); ++i) {
auto &t = framework::GetVariableTensor(*scope, idx2feeds_[i]);
t.ShareDataWith(
*std::dynamic_pointer_cast<phi::DenseTensor>(inputs[i].impl()));
}
}
return true;
}
......@@ -1100,7 +1209,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
framework::GetFetchVariable(*scope, framework::kFetchOpType, idx);
auto &fetch = PADDLE_GET(phi::DenseTensor, fetch_var);
auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(outputs->at(i));
......@@ -1125,6 +1234,19 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
return true;
}
bool AnalysisPredictor::GetFetch(std::vector<paddle::Tensor> *outputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) {
auto const &name = idx2fetches_[i];
auto &t = framework::GetVariableTensor(*scope, name);
(*outputs)[i] =
std::move(paddle::Tensor(std::make_shared<phi::DenseTensor>(t), name));
}
return true;
}
void AnalysisPredictor::PrepareArgument() {
VLOG(3) << "AnalysisPredictor::PrepareArgument";
// Init std::unique_ptr argument_.
......@@ -1579,7 +1701,7 @@ void AnalysisPredictor::PrepareFeedFetch() {
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
if (op->Type() == framework::kFeedOpType) {
int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
if (feeds_.size() <= static_cast<size_t>(idx)) {
feeds_.resize(idx + 1);
......@@ -1587,7 +1709,7 @@ void AnalysisPredictor::PrepareFeedFetch() {
feeds_[idx] = op;
feed_names_[op->Output("Out")[0]] = idx;
idx2feeds_[idx] = op->Output("Out")[0];
} else if (op->Type() == "fetch") {
} else if (op->Type() == framework::kFetchOpType) {
int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
if (fetches_.size() <= static_cast<size_t>(idx)) {
fetches_.resize(idx + 1);
......@@ -1602,9 +1724,9 @@ void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(
scope,
platform::errors::InvalidArgument("The scope should not be nullptr."));
auto *var = scope->Var("feed");
auto *var = scope->Var(framework::kFeedOpType);
var->GetMutable<framework::FeedList>();
var = scope->Var("fetch");
var = scope->Var(framework::kFetchOpType);
var->GetMutable<framework::FetchList>();
}
......@@ -2186,7 +2308,7 @@ void AnalysisPredictor::ClearIntermediateTensor() {
const std::string name = var->Name();
auto *variable = executor_->GetScope()->FindVar(name);
if (variable != nullptr && variable->IsType<phi::DenseTensor>() &&
name != "feed" && name != "fetch") {
name != framework::kFeedOpType && name != framework::kFetchOpType) {
VLOG(3) << "Clear Intermediate Tensor: " << name;
auto *t = variable->GetMutable<phi::DenseTensor>();
t->clear();
......@@ -2653,6 +2775,11 @@ std::map<std::string, DataType> Predictor::GetOutputTypes() {
bool Predictor::Run() { return predictor_->ZeroCopyRun(); }
bool Predictor::Run(const std::vector<paddle::Tensor> &inputs,
std::vector<paddle::Tensor> *outputs) {
return predictor_->Run(inputs, outputs);
}
std::unique_ptr<Predictor> Predictor::Clone(void *stream) {
auto analysis_pred = predictor_->Clone(stream);
std::unique_ptr<Predictor> pred(new Predictor(std::move(analysis_pred)));
......
......@@ -31,15 +31,14 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/resource_manager.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/core/dense_tensor.h"
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif
namespace paddle_infer {
using float16 = paddle::platform::float16;
namespace experimental {
class InternalUtils;
};
......@@ -150,6 +149,16 @@ class AnalysisPredictor : public PaddlePredictor {
std::vector<PaddleTensor> *output_data,
int batch_size = -1) override;
///
/// \brief Run the prediction engine (Recommended).
///
/// \param[in] inputs input tensors
/// \param[out] outputs output tensors
/// \return Whether the function executed successfully
///
bool Run(const std::vector<paddle::Tensor> &inputs,
std::vector<paddle::Tensor> *outputs) override;
///
/// \brief Get the input names
///
......@@ -378,6 +387,17 @@ class AnalysisPredictor : public PaddlePredictor {
///
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
framework::Scope *scope);
///
/// \brief Prepare input data, only used in Run()
///
/// \param[in] inputs inpute tensors
/// \param[in] scope the scope used by predictor
/// \return Whether the function executed successfully
///
bool SetFeed(const std::vector<paddle::Tensor> &inputs,
framework::Scope *scope);
///
/// \brief Get the output data, only used in Run()
///
......@@ -387,6 +407,16 @@ class AnalysisPredictor : public PaddlePredictor {
///
bool GetFetch(std::vector<PaddleTensor> *output_data,
framework::Scope *scope);
///
/// \brief Get the output data, only used in Run()
///
/// \param[out] outputs output tensors
/// \param[in] scope the scope used by predictor
/// \return Whether the function executed successfully
///
bool GetFetch(std::vector<paddle::Tensor> *outputs, framework::Scope *scope);
///
/// \brief Get the output data, only used in GetFetch()
///
......@@ -404,6 +434,14 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run().
///
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<paddle::Tensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
......
......@@ -83,7 +83,7 @@ else()
if(WITH_MKL)
set(FLAG_OPENMP "-fopenmp")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 ${FLAG_OPENMP}")
endif()
if(WITH_GPU)
......
......@@ -221,6 +221,16 @@ class PD_INFER_DECL PaddlePredictor {
std::vector<PaddleTensor>* output_data,
int batch_size = -1) = 0;
/// \brief This interface takes input and runs the network (Recommended).
/// \param[in] inputs An list of Tensor as the input to the network.
/// \param[out] output_data Pointer to the tensor list, which holds the output
/// Tensor
/// \return Whether the run is successful
virtual bool Run(const std::vector<paddle::Tensor>& inputs,
std::vector<paddle::Tensor>* outputs) {
return false;
}
/// \brief Used to get the name of the network input.
/// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
/// \return Input tensor names.
......
......@@ -128,6 +128,17 @@ class PD_INFER_DECL Predictor {
///
bool Run();
///
/// \brief Run the prediction engine (Recommended)
///
/// \param[in] inputs An list of Tensor as the input to the network.
/// \param[out] outputs Pointer to the tensor list, which holds the output
/// Tensor
///
/// \return Whether the run is successful
bool Run(const std::vector<paddle::Tensor>& inputs,
std::vector<paddle::Tensor>* outputs);
///
/// \brief Get the output names
///
......
......@@ -21,6 +21,8 @@
#include "paddle_infer_declare.h" // NOLINT
#include "paddle/phi/api/include/tensor.h" // expose paddle::Tensor
#ifdef PADDLE_WITH_ONNXRUNTIME
#include "onnxruntime_c_api.h" // NOLINT
#include "onnxruntime_cxx_api.h" // NOLINT
......
......@@ -22,11 +22,6 @@
namespace paddle {
namespace jit {
static PaddleTensor DenseTensorToPaddleTensor(DenseTensor *t);
static bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
DenseTensor *t,
const platform::Place &place);
PredictorEngine::PredictorEngine(
const std::shared_ptr<FunctionInfo> &info,
const std::shared_ptr<VariableMap> &params_dict,
......@@ -52,6 +47,7 @@ PredictorEngine::PredictorEngine(
config.SetSkipLoadParams(true);
config.SetApplyOptim(true);
config.SwitchIrOptim(true);
config.SwitchUseFeedFetchOps(false);
predictor_.reset(new AnalysisPredictor(config));
......@@ -78,135 +74,15 @@ std::unique_ptr<BaseEngine> PredictorEngine::Clone(void *stream) {
std::vector<Tensor> PredictorEngine::operator()(
const std::vector<Tensor> &inputs) {
auto dense_tensors = utils::ToDenseTensors(inputs);
return utils::ToTensors(this->operator()(dense_tensors));
}
std::vector<DenseTensor> PredictorEngine::operator()(
const std::vector<DenseTensor> &inputs) {
std::vector<PaddleTensor> pt_inputs;
std::vector<PaddleTensor> pt_outputs;
for (auto &t : inputs) {
auto non_const_t = const_cast<DenseTensor *>(&t);
pt_inputs.emplace_back(DenseTensorToPaddleTensor(non_const_t));
}
predictor_->Run(pt_inputs, &pt_outputs);
std::vector<DenseTensor> outputs;
for (auto &pt : pt_outputs) {
DenseTensor t;
PaddleTensorToDenseTensor(pt, &t, place_);
outputs.emplace_back(t);
}
std::vector<Tensor> outputs;
predictor_->Run(inputs, &outputs);
return outputs;
}
static PaddleTensor DenseTensorToPaddleTensor(DenseTensor *t) {
PaddleTensor pt;
switch (framework::TransToProtoVarType(t->dtype())) {
case framework::proto::VarType::INT32: {
pt.data.Reset(t->data(), t->numel() * sizeof(int32_t));
pt.dtype = PaddleDType::INT32;
} break;
case framework::proto::VarType::INT64: {
pt.data.Reset(t->data(), t->numel() * sizeof(int64_t));
pt.dtype = PaddleDType::INT64;
} break;
case framework::proto::VarType::FP32: {
pt.data.Reset(t->data(), t->numel() * sizeof(float));
pt.dtype = PaddleDType::FLOAT32;
} break;
default:
PADDLE_THROW(
platform::errors::Unimplemented("Unsupported tensor date type. Now "
"only supports INT64, FP32, INT32."));
}
pt.shape = phi::vectorize<int>(t->dims());
return pt;
}
static bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
DenseTensor *t,
const platform::Place &place) {
framework::DDim ddim = phi::make_ddim(pt.shape);
void *input_ptr;
switch (pt.dtype) {
case PaddleDType::INT64:
input_ptr = t->mutable_data<int64_t>(ddim, place);
break;
case PaddleDType::FLOAT32:
input_ptr = t->mutable_data<float>(ddim, place);
break;
case PaddleDType::INT32:
input_ptr = t->mutable_data<int32_t>(ddim, place);
break;
case PaddleDType::FLOAT16:
input_ptr = t->mutable_data<float16>(ddim, place);
break;
default:
LOG(ERROR) << "unsupported feed type " << pt.dtype;
return false;
}
PADDLE_ENFORCE_NOT_NULL(
input_ptr,
paddle::platform::errors::Fatal(
"Cannot convert to LoDTensor because LoDTensor creation failed."));
PADDLE_ENFORCE_NOT_NULL(
pt.data.data(),
paddle::platform::errors::InvalidArgument(
"The data contained in the input PaddleTensor is illegal."));
if (platform::is_cpu_place(place)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(
static_cast<void *>(input_ptr), pt.data.data(), pt.data.length());
} else if (platform::is_ipu_place(place)) {
#ifdef PADDLE_WITH_IPU
std::memcpy(
static_cast<void *>(input_ptr), pt.data.data(), pt.data.length());
#else
PADDLE_THROW(paddle::platform::errors::Fatal(
"Not compile with WITH_IPU, should not reach here."));
#endif
} else if (platform::is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(platform::is_xpu_place(place),
false,
platform::errors::InvalidArgument(
"Only one choice can be made between CPU and XPU."));
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto *dev_ctx = static_cast<const phi::GPUContext *>(pool.Get(place));
auto dst_gpu_place = place;
memory::Copy(dst_gpu_place,
static_cast<void *>(input_ptr),
platform::CPUPlace(),
pt.data.data(),
pt.data.length(),
dev_ctx->stream());
#else
PADDLE_THROW(paddle::platform::errors::Fatal(
"Not compile with CUDA, should not reach here."));
#endif
} else if (platform::is_xpu_place(place)) {
#ifdef PADDLE_WITH_XPU
auto dst_xpu_place = place;
memory::Copy(dst_xpu_place,
static_cast<void *>(input_ptr),
platform::CPUPlace(),
pt.data.data(),
pt.data.length());
#else
PADDLE_THROW(paddle::platform::errors::Fatal(
"Not compile with XPU, should not reach here."));
#endif
} else {
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"The analysis predictor supports CPU, GPU and XPU now."));
}
return true;
std::vector<DenseTensor> PredictorEngine::operator()(
const std::vector<DenseTensor> &inputs) {
return utils::ToDenseTensors(this->operator()(utils::ToTensors(inputs)));
}
} // namespace jit
......
......@@ -770,7 +770,11 @@ PyObject* ToPyObject(const std::vector<std::vector<size_t>>& value) {
PyObject* ToPyObject(const std::vector<paddle::Tensor>& value,
bool return_py_none_if_not_initialize) {
// NOTE(liuyuanle): I encountered a bug(access violation) in windows. ref to
// https://stackoverflow.com/questions/55598839/how-to-fix-access-violation-error-when-returning-pyobject-from-c-function-usin
PyGILState_STATE gstate = PyGILState_Ensure();
PyObject* result = PyList_New((Py_ssize_t)value.size());
PyGILState_Release(gstate);
for (size_t i = 0; i < value.size(); i++) {
if (!value[i].initialized() && return_py_none_if_not_initialize) {
......
......@@ -65,7 +65,7 @@ constexpr int NPY_UINT16_ = 4;
// paddle::platform::float16 as numpy.float16.
// Ref: https://github.com/pybind/pybind11/issues/1776
template <>
struct npy_format_descriptor<paddle_infer::float16> {
struct npy_format_descriptor<phi::dtype::float16> {
static py::dtype dtype() {
handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_);
return reinterpret_borrow<py::dtype>(ptr);
......@@ -180,7 +180,7 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) {
dt = py::dtype::of<float>();
break;
case PaddleDType::FLOAT16:
dt = py::dtype::of<paddle_infer::float16>();
dt = py::dtype::of<phi::dtype::float16>();
break;
case PaddleDType::UINT8:
dt = py::dtype::of<uint8_t>();
......@@ -264,7 +264,7 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT
ToPaddleInferPlace(input_tensor.place().GetType()));
} else if (input_tensor.dtype() == phi::DataType::FLOAT16) {
tensor.ShareExternalData(
static_cast<paddle::platform::float16 *>(input_tensor.data()),
static_cast<phi::dtype::float16 *>(input_tensor.data()),
shape,
ToPaddleInferPlace(input_tensor.place().GetType()));
} else if (input_tensor.dtype() == phi::DataType::INT32) {
......@@ -353,7 +353,7 @@ size_t PaddleGetDTypeSize(PaddleDType dt) {
size = sizeof(float);
break;
case PaddleDType::FLOAT16:
size = sizeof(paddle_infer::float16);
size = sizeof(phi::dtype::float16);
break;
case PaddleDType::INT8:
size = sizeof(int8_t);
......@@ -392,8 +392,8 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT
tensor.copy_to_cpu<float>(static_cast<float *>(array.mutable_data()));
break;
case PaddleDType::FLOAT16:
tensor.copy_to_cpu<paddle::platform::float16>(
static_cast<paddle::platform::float16 *>(array.mutable_data()));
tensor.copy_to_cpu<phi::dtype::float16>(
static_cast<phi::dtype::float16 *>(array.mutable_data()));
break;
case PaddleDType::UINT8:
tensor.copy_to_cpu<uint8_t>(static_cast<uint8_t *>(array.mutable_data()));
......@@ -432,8 +432,8 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT
tensor.CopyToCpu<float>(static_cast<float *>(array.mutable_data()));
break;
case PaddleDType::FLOAT16:
tensor.CopyToCpu<paddle::platform::float16>(
static_cast<paddle::platform::float16 *>(array.mutable_data()));
tensor.CopyToCpu<phi::dtype::float16>(
static_cast<phi::dtype::float16 *>(array.mutable_data()));
break;
case PaddleDType::UINT8:
tensor.CopyToCpu(static_cast<uint8_t *>(array.mutable_data()));
......@@ -1062,6 +1062,16 @@ void BindPaddleInferPredictor(py::module *m) {
.def("get_output_names", &paddle_infer::Predictor::GetOutputNames)
.def("get_input_handle", &paddle_infer::Predictor::GetInputHandle)
.def("get_output_handle", &paddle_infer::Predictor::GetOutputHandle)
.def(
"run",
[](paddle_infer::Predictor &self, py::handle py_in_tensor_list) {
auto in_tensor_list =
CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0);
std::vector<paddle::Tensor> outputs;
self.Run(in_tensor_list, &outputs);
return py::handle(ToPyObject(outputs));
},
py::arg("inputs"))
.def("run", [](paddle_infer::Predictor &self) { self.Run(); })
.def("clone",
[](paddle_infer::Predictor &self) { return self.Clone(nullptr); })
......@@ -1091,9 +1101,9 @@ void BindZeroCopyTensor(py::module *m) {
.def("copy_from_cpu", &ZeroCopyTensorCreate<int32_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<int64_t>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<float>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<phi::dtype::float16>)
// NOTE(liuyuanle): double must be bound after float.
.def("copy_from_cpu", &ZeroCopyTensorCreate<double>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<paddle_infer::float16>)
.def("copy_from_cpu", &ZeroCopyTensorCreate<bool>)
.def("copy_from_cpu", &ZeroCopyStringTensorCreate)
.def("copy_to_cpu", &ZeroCopyTensorToNumpy)
......@@ -1116,10 +1126,9 @@ void BindPaddleInferTensor(py::module *m) {
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int32_t>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<int64_t>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<float>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<phi::dtype::float16>)
// NOTE(liuyuanle): double must be bound after float.
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<double>)
.def("_copy_from_cpu_bind",
&PaddleInferTensorCreate<paddle_infer::float16>)
.def("_copy_from_cpu_bind", &PaddleInferTensorCreate<bool>)
.def("_copy_from_cpu_bind", &PaddleInferStringTensorCreate)
.def("_share_external_data_bind", &PaddleInferShareExternalData)
......
......@@ -416,7 +416,7 @@ class PADDLE_API Tensor final {
/**
* @brief Return the name of Tensor.
* @note Used to adapt original execution mechanism and debug analysis
* in the development of new dygraph. It may be removed in the future.
* in the development of new dygraph.
*
* @return const std::string&
*/
......@@ -425,7 +425,7 @@ class PADDLE_API Tensor final {
/**
* @brief Set name of Tensor.
* @note Used to adapt original execution mechanism and debug analysis
* in the development of new dygraph. It may be removed in the future.
* in the development of new dygraph.
*
* @param const std::string& name
*/
......@@ -657,7 +657,7 @@ class PADDLE_API Tensor final {
/**
* Tensor name: used to adapt original execution mechanism and debug analysis
* in the development of new dygraph. It may be removed in the future.
* in the development of new dygraph.
*/
std::string name_{""};
......
......@@ -136,6 +136,7 @@ Tensor add_n_impl(const std::vector<Tensor>& x) {
Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) {
Tensor out;
copy(x, place, blocking, &out);
out.set_name(x.name());
return out;
}
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
import numpy as np
import paddle
from paddle.inference import Config, create_predictor
class TestNet(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.fc1 = paddle.nn.Linear(4, 4)
self.fc2 = paddle.nn.Linear(4, 4)
def forward(self, x1, x2):
y1 = self.fc1(x1)
y2 = self.fc2(x2)
return y1 + y2
@unittest.skipIf(
not paddle.is_compiled_with_cuda(), 'should compile with cuda.'
)
class TestPredictorRunWithTensor(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
net = TestNet()
model = paddle.jit.to_static(
net,
input_spec=[
paddle.static.InputSpec(
shape=[None, 4], dtype='float32', name='input0'
),
paddle.static.InputSpec(
shape=[None, 4], dtype='float32', name='input1'
),
],
)
paddle.jit.save(
model,
os.path.join(
self.temp_dir.name, 'test_predictor_run_model/inference'
),
)
def tearDown(self):
self.temp_dir.cleanup()
def init_predictor(self):
config = Config(
os.path.join(
self.temp_dir.name,
'test_predictor_run_model/inference.pdmodel',
),
os.path.join(
self.temp_dir.name,
'test_predictor_run_model/inference.pdiparams',
),
)
config.enable_use_gpu(256, 0)
config.enable_memory_optim()
predictor = create_predictor(config)
return predictor
def get_inputs(self):
input0 = np.array([[1, 2, 3, 4], [2, 3, 4, 5]]).astype(np.float32)
input1 = np.array([[0.1, 0.2, 0.3, 0.4], [1.2, 1.3, 1.4, 1.5]]).astype(
np.float32
)
input0_tensor = paddle.to_tensor(input0)
input1_tensor = paddle.to_tensor(input1)
return [input0_tensor, input1_tensor]
def get_disorder_output(self):
predictor = self.init_predictor()
[input0_tensor, input1_tensor] = self.get_inputs()
input_names = predictor.get_input_names()
input0_tensor.name = input_names[0]
input1_tensor.name = input_names[1]
# disorder
inputs = [input1_tensor, input0_tensor]
outputs = predictor.run(inputs)
return outputs[0]
def get_inorder_output(self):
predictor = self.init_predictor()
[input0_tensor, input1_tensor] = self.get_inputs()
# inorder
inputs = [input0_tensor, input1_tensor]
outputs = predictor.run(inputs)
return outputs[0]
def test_output(self):
inorder_output = self.get_inorder_output()
disorder_output = self.get_disorder_output()
assert np.allclose(
inorder_output.numpy().flatten(), disorder_output.numpy().flatten()
)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册