未验证 提交 e3f39833 编写于 作者: H heliqi 提交者: GitHub

[Paddle Inference]Support PaddlePaddle Backend on Triton (#49758)

* support PaddlePaddle Backend on Triton

* fix test cases

* fix Codestyle

* add test case

* add test case
上级 a1b2e1e2
...@@ -1609,6 +1609,51 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() { ...@@ -1609,6 +1609,51 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
return output_names; return output_names;
} }
std::map<std::string, std::vector<int64_t>>
AnalysisPredictor::GetOutputTensorShape() {
std::map<std::string, std::vector<int64_t>> output_shapes;
std::vector<std::string> names = GetOutputNames();
for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var,
platform::errors::PreconditionNotMet(
"Output %s does not exist.", name));
output_shapes[name] = var->GetShape();
}
return output_shapes;
}
std::map<std::string, paddle_infer::DataType>
AnalysisPredictor::GetOutputTypes() {
std::map<std::string, paddle_infer::DataType> output_type;
std::vector<std::string> names = GetOutputNames();
for (const auto &name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(
var,
platform::errors::PreconditionNotMet(
"Output %s does not exist inference_program_.", name));
auto dtype = var->GetDataType();
if (dtype == paddle::framework::proto::VarType::FP32) {
output_type[name] = paddle_infer::DataType::FLOAT32;
} else if (dtype == paddle::framework::proto::VarType::FP16) {
output_type[name] = paddle_infer::DataType::FLOAT16;
} else if (dtype == paddle::framework::proto::VarType::INT64) {
output_type[name] = paddle_infer::DataType::INT64;
} else if (dtype == paddle::framework::proto::VarType::INT32) {
output_type[name] = paddle_infer::DataType::INT32;
} else if (dtype == paddle::framework::proto::VarType::UINT8) {
output_type[name] = paddle_infer::DataType::UINT8;
} else if (dtype == paddle::framework::proto::VarType::INT8) {
output_type[name] = paddle_infer::DataType::INT8;
} else {
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when get output dtype ", dtype));
}
}
return output_type;
}
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor( std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) { const std::string &name) {
framework::Scope *scope; framework::Scope *scope;
...@@ -2477,6 +2522,10 @@ std::vector<std::string> Predictor::GetInputNames() { ...@@ -2477,6 +2522,10 @@ std::vector<std::string> Predictor::GetInputNames() {
return predictor_->GetInputNames(); return predictor_->GetInputNames();
} }
std::map<std::string, std::vector<int64_t>> Predictor::GetInputTensorShape() {
return predictor_->GetInputTensorShape();
}
std::map<std::string, DataType> Predictor::GetInputTypes() { std::map<std::string, DataType> Predictor::GetInputTypes() {
return predictor_->GetInputTypes(); return predictor_->GetInputTypes();
} }
...@@ -2493,6 +2542,14 @@ std::unique_ptr<Tensor> Predictor::GetOutputHandle(const std::string &name) { ...@@ -2493,6 +2542,14 @@ std::unique_ptr<Tensor> Predictor::GetOutputHandle(const std::string &name) {
return predictor_->GetOutputTensor(name); return predictor_->GetOutputTensor(name);
} }
std::map<std::string, std::vector<int64_t>> Predictor::GetOutputTensorShape() {
return predictor_->GetOutputTensorShape();
}
std::map<std::string, DataType> Predictor::GetOutputTypes() {
return predictor_->GetOutputTypes();
}
bool Predictor::Run() { return predictor_->ZeroCopyRun(); } bool Predictor::Run() { return predictor_->ZeroCopyRun(); }
std::unique_ptr<Predictor> Predictor::Clone(void *stream) { std::unique_ptr<Predictor> Predictor::Clone(void *stream) {
......
...@@ -191,6 +191,18 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -191,6 +191,18 @@ class AnalysisPredictor : public PaddlePredictor {
/// \return the map of input names and type /// \return the map of input names and type
/// ///
std::map<std::string, paddle_infer::DataType> GetInputTypes() override; std::map<std::string, paddle_infer::DataType> GetInputTypes() override;
///
/// \brief Get all output names and their corresponding shapes
///
/// \return the map of output names and shapes
///
std::map<std::string, std::vector<int64_t>> GetOutputTensorShape() override;
///
/// \brief Get all output names and their corresponding type
///
/// \return the map of output names and type
///
std::map<std::string, paddle_infer::DataType> GetOutputTypes() override;
/// ///
/// \brief Run the prediction engine /// \brief Run the prediction engine
......
...@@ -106,6 +106,8 @@ TEST(AnalysisPredictor, analysis_on) { ...@@ -106,6 +106,8 @@ TEST(AnalysisPredictor, analysis_on) {
ASSERT_EQ(predictor->scope_->parent(), nullptr); ASSERT_EQ(predictor->scope_->parent(), nullptr);
ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get()); ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get());
ASSERT_EQ(predictor->GetInputTypes().size(), 4UL); ASSERT_EQ(predictor->GetInputTypes().size(), 4UL);
ASSERT_EQ(predictor->GetOutputTypes().size(), 1UL);
ASSERT_EQ(predictor->GetOutputTensorShape().size(), 1UL);
// 2. Dummy Input Data // 2. Dummy Input Data
int64_t data[4] = {1, 2, 3, 4}; int64_t data[4] = {1, 2, 3, 4};
PaddleTensor tensor; PaddleTensor tensor;
...@@ -430,6 +432,8 @@ TEST(Predictor, Run) { ...@@ -430,6 +432,8 @@ TEST(Predictor, Run) {
auto predictor = CreatePredictor(config); auto predictor = CreatePredictor(config);
ASSERT_EQ(predictor->GetInputTypes().size(), 4UL); ASSERT_EQ(predictor->GetInputTypes().size(), 4UL);
ASSERT_EQ(predictor->GetOutputTypes().size(), 1UL);
ASSERT_EQ(predictor->GetOutputTensorShape().size(), 1UL);
auto w0 = predictor->GetInputHandle("firstw"); auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw"); auto w1 = predictor->GetInputHandle("secondw");
......
...@@ -243,6 +243,19 @@ class PD_INFER_DECL PaddlePredictor { ...@@ -243,6 +243,19 @@ class PD_INFER_DECL PaddlePredictor {
/// \return Output tensor names. /// \return Output tensor names.
virtual std::vector<std::string> GetOutputNames() { return {}; } virtual std::vector<std::string> GetOutputNames() { return {}; }
/// \brief Get the output shape of the model.
/// \return A map contains all the output names and shape defined in the
/// model.
virtual std::map<std::string, std::vector<int64_t>> GetOutputTensorShape() {
return {};
}
/// \brief Get the output type of the model.
/// \return A map contains all the output names and type defined in the model.
virtual std::map<std::string, paddle_infer::DataType> GetOutputTypes() {
return {};
}
/// \brief Get the input ZeroCopyTensor by name. /// \brief Get the input ZeroCopyTensor by name.
/// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios. /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
/// The name is obtained from the GetInputNames() interface. /// The name is obtained from the GetInputNames() interface.
......
...@@ -92,6 +92,13 @@ class PD_INFER_DECL Predictor { ...@@ -92,6 +92,13 @@ class PD_INFER_DECL Predictor {
/// ///
explicit Predictor(const Config& config); explicit Predictor(const Config& config);
///
/// \brief Get all input names and their corresponding shapes
///
/// \return the map of input names and shape
///
std::map<std::string, std::vector<int64_t>> GetInputTensorShape();
/// ///
/// \brief Get all input names and their corresponding type /// \brief Get all input names and their corresponding type
/// ///
...@@ -136,6 +143,20 @@ class PD_INFER_DECL Predictor { ...@@ -136,6 +143,20 @@ class PD_INFER_DECL Predictor {
/// ///
std::unique_ptr<Tensor> GetOutputHandle(const std::string& name); std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
///
/// \brief Get all output names and their corresponding shapes
///
/// \return the map of output names and shape
///
std::map<std::string, std::vector<int64_t>> GetOutputTensorShape();
///
/// \brief Get all output names and their corresponding type
///
/// \return the map of output names and type
///
std::map<std::string, DataType> GetOutputTypes();
/// ///
/// \brief Clone to get the new predictor. thread safe. /// \brief Clone to get the new predictor. thread safe.
/// ///
......
...@@ -55,8 +55,9 @@ __pd_give PD_Config* PD_ConfigCreate() { ...@@ -55,8 +55,9 @@ __pd_give PD_Config* PD_ConfigCreate() {
} }
void PD_ConfigDestroy(__pd_take PD_Config* pd_config) { void PD_ConfigDestroy(__pd_take PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG; if (pd_config != NULL) {
delete reinterpret_cast<Config*>(config); delete reinterpret_cast<Config*>(pd_config);
}
} }
void PD_ConfigSetModel(__pd_keep PD_Config* pd_config, void PD_ConfigSetModel(__pd_keep PD_Config* pd_config,
...@@ -116,9 +117,12 @@ PD_Bool PD_ConfigUseFcPadding(__pd_keep PD_Config* pd_config) { ...@@ -116,9 +117,12 @@ PD_Bool PD_ConfigUseFcPadding(__pd_keep PD_Config* pd_config) {
void PD_ConfigEnableUseGpu(__pd_keep PD_Config* pd_config, void PD_ConfigEnableUseGpu(__pd_keep PD_Config* pd_config,
uint64_t memory_pool_init_size_mb, uint64_t memory_pool_init_size_mb,
int32_t device_id) { int32_t device_id,
PD_PrecisionType precision_mode) {
CHECK_AND_CONVERT_PD_CONFIG; CHECK_AND_CONVERT_PD_CONFIG;
config->EnableUseGpu(memory_pool_init_size_mb, device_id); config->EnableUseGpu(memory_pool_init_size_mb,
device_id,
ConvertToCxxPrecisionType(precision_mode));
} }
void PD_ConfigDisableGpu(__pd_keep PD_Config* pd_config) { void PD_ConfigDisableGpu(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG; CHECK_AND_CONVERT_PD_CONFIG;
...@@ -427,6 +431,14 @@ void PD_ConfigSetBfloat16Op(__pd_keep PD_Config* pd_config, ...@@ -427,6 +431,14 @@ void PD_ConfigSetBfloat16Op(__pd_keep PD_Config* pd_config,
} }
config->SetBfloat16Op(std::move(op_names)); config->SetBfloat16Op(std::move(op_names));
} }
void PD_ConfigEnableMkldnnInt8(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableMkldnnInt8();
}
PD_Bool PD_ConfigMkldnnInt8Enabled(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->mkldnn_int8_enabled();
}
PD_Bool PD_ConfigThreadLocalStreamEnabled(__pd_keep PD_Config* pd_config) { PD_Bool PD_ConfigThreadLocalStreamEnabled(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG; CHECK_AND_CONVERT_PD_CONFIG;
return config->thread_local_stream_enabled(); return config->thread_local_stream_enabled();
...@@ -484,6 +496,10 @@ void PD_ConfigEnableGpuMultiStream(__pd_keep PD_Config* pd_config) { ...@@ -484,6 +496,10 @@ void PD_ConfigEnableGpuMultiStream(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG; CHECK_AND_CONVERT_PD_CONFIG;
config->EnableGpuMultiStream(); config->EnableGpuMultiStream();
} }
void PD_ConfigSetExecStream(__pd_keep PD_Config* pd_config, void* stream) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->SetExecStream(stream);
}
void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) { void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG; CHECK_AND_CONVERT_PD_CONFIG;
config->PartiallyRelease(); config->PartiallyRelease();
......
...@@ -132,11 +132,13 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseFcPadding( ...@@ -132,11 +132,13 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseFcPadding(
/// \param[in] memory_pool_init_size_mb initial size of the GPU memory pool in /// \param[in] memory_pool_init_size_mb initial size of the GPU memory pool in
/// MB. /// MB.
/// \param[in] device_id device_id the GPU card to use. /// \param[in] device_id device_id the GPU card to use.
/// \param[in] precision_mode the precision used in Paddle-GPU inference.
/// ///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableUseGpu( PADDLE_CAPI_EXPORT extern void PD_ConfigEnableUseGpu(
__pd_keep PD_Config* pd_config, __pd_keep PD_Config* pd_config,
uint64_t memory_pool_init_size_mb, uint64_t memory_pool_init_size_mb,
int32_t device_id); int32_t device_id,
PD_PrecisionType precision_mode);
/// ///
/// \brief Turn off GPU. /// \brief Turn off GPU.
/// ///
...@@ -607,6 +609,22 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled( ...@@ -607,6 +609,22 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled(
PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op( PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op(
__pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list); __pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list);
/// ///
/// \brief Turn on MKLDNN int8.
///
/// \param[in] pd_onfig config
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnInt8(
__pd_keep PD_Config* pd_config);
///
/// \brief A boolean state telling whether to use the MKLDNN int8.
///
/// \param[in] pd_onfig config
/// \return Whether to use the MKLDNN int8.
///
PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnInt8Enabled(
__pd_keep PD_Config* pd_config);
///
/// \brief Enable the GPU multi-computing stream feature. /// \brief Enable the GPU multi-computing stream feature.
/// NOTE: The current behavior of this interface is to bind the computation /// NOTE: The current behavior of this interface is to bind the computation
/// stream to the thread, and this behavior may be changed in the future. /// stream to the thread, and this behavior may be changed in the future.
...@@ -625,6 +643,12 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableGpuMultiStream( ...@@ -625,6 +643,12 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableGpuMultiStream(
PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigThreadLocalStreamEnabled( PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigThreadLocalStreamEnabled(
__pd_keep PD_Config* pd_config); __pd_keep PD_Config* pd_config);
/// ///
/// \brief Set execution stream. If not set a stream will be created
/// internally.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigSetExecStream(
__pd_keep PD_Config* pd_config, void* stream);
///
/// \brief Specify the memory buffer of program and parameter. /// \brief Specify the memory buffer of program and parameter.
/// Used when model and params are loaded directly from memory. /// Used when model and params are loaded directly from memory.
/// ///
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/inference/capi_exp/pd_predictor.h" #include "paddle/fluid/inference/capi_exp/pd_predictor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/capi_exp/pd_config.h"
#include "paddle/fluid/inference/capi_exp/pd_types.h" #include "paddle/fluid/inference/capi_exp/pd_types.h"
#include "paddle/fluid/inference/capi_exp/pd_utils.h" #include "paddle/fluid/inference/capi_exp/pd_utils.h"
#include "paddle/fluid/inference/capi_exp/types_internal.h" #include "paddle/fluid/inference/capi_exp/types_internal.h"
...@@ -38,7 +39,6 @@ __pd_give PD_Predictor* PD_PredictorCreate(__pd_take PD_Config* pd_config) { ...@@ -38,7 +39,6 @@ __pd_give PD_Predictor* PD_PredictorCreate(__pd_take PD_Config* pd_config) {
paddle_infer::Config* config = paddle_infer::Config* config =
reinterpret_cast<paddle_infer::Config*>(pd_config); reinterpret_cast<paddle_infer::Config*>(pd_config);
pd_predictor->predictor = paddle_infer::CreatePredictor(*config); pd_predictor->predictor = paddle_infer::CreatePredictor(*config);
delete config;
return pd_predictor; return pd_predictor;
} }
...@@ -57,6 +57,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetInputNames( ...@@ -57,6 +57,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetInputNames(
return paddle_infer::CvtVecToOneDimArrayCstr(names); return paddle_infer::CvtVecToOneDimArrayCstr(names);
} }
__pd_give PD_IOInfos* PD_PredictorGetInputInfos(
__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR;
std::vector<std::string> names = predictor->GetInputNames();
std::map<std::string, std::vector<int64_t>> input_shapes =
predictor->GetInputTensorShape();
std::map<std::string, paddle_infer::DataType> input_dtypes =
predictor->GetInputTypes();
PD_IOInfos* input_infos = new PD_IOInfos;
input_infos->size = names.size();
input_infos->io_info = names.empty() ? NULL : new PD_IOInfo*[names.size()];
for (size_t i = 0; i < names.size(); i++) {
const std::string& name = names[i];
input_infos->io_info[i] = new PD_IOInfo;
input_infos->io_info[i]->name = paddle_infer::CvtStrToCstr(name);
input_infos->io_info[i]->shape =
paddle_infer::CvtVecToOneDimArrayInt64(input_shapes[name]);
input_infos->io_info[i]->dtype =
paddle_infer::CvtFromCxxDatatype(input_dtypes[name]);
}
return input_infos;
}
__pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames( __pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
__pd_keep PD_Predictor* pd_predictor) { __pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR; CHECK_AND_CONVERT_PD_PREDICTOR;
...@@ -64,6 +88,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames( ...@@ -64,6 +88,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
return paddle_infer::CvtVecToOneDimArrayCstr(names); return paddle_infer::CvtVecToOneDimArrayCstr(names);
} }
__pd_give PD_IOInfos* PD_PredictorGetOutputInfos(
__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR;
std::vector<std::string> names = predictor->GetOutputNames();
std::map<std::string, std::vector<int64_t>> output_shapes =
predictor->GetOutputTensorShape();
std::map<std::string, paddle_infer::DataType> output_dtypes =
predictor->GetOutputTypes();
PD_IOInfos* output_infos = new PD_IOInfos;
output_infos->size = names.size();
output_infos->io_info = names.empty() ? NULL : new PD_IOInfo*[names.size()];
for (size_t i = 0; i < names.size(); i++) {
const std::string& name = names[i];
output_infos->io_info[i] = new PD_IOInfo;
output_infos->io_info[i]->name = paddle_infer::CvtStrToCstr(name);
output_infos->io_info[i]->shape =
paddle_infer::CvtVecToOneDimArrayInt64(output_shapes[name]);
output_infos->io_info[i]->dtype =
paddle_infer::CvtFromCxxDatatype(output_dtypes[name]);
}
return output_infos;
}
size_t PD_PredictorGetInputNum(__pd_keep PD_Predictor* pd_predictor) { size_t PD_PredictorGetInputNum(__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR; CHECK_AND_CONVERT_PD_PREDICTOR;
return predictor->GetInputNames().size(); return predictor->GetInputNames().size();
......
...@@ -30,6 +30,7 @@ typedef struct PD_Predictor PD_Predictor; ...@@ -30,6 +30,7 @@ typedef struct PD_Predictor PD_Predictor;
typedef struct PD_Config PD_Config; typedef struct PD_Config PD_Config;
typedef struct PD_Tensor PD_Tensor; typedef struct PD_Tensor PD_Tensor;
typedef struct PD_OneDimArrayCstr PD_OneDimArrayCstr; typedef struct PD_OneDimArrayCstr PD_OneDimArrayCstr;
typedef struct PD_IOInfos PD_IOInfos;
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
...@@ -60,6 +61,14 @@ PADDLE_CAPI_EXPORT extern __pd_give PD_Predictor* PD_PredictorClone( ...@@ -60,6 +61,14 @@ PADDLE_CAPI_EXPORT extern __pd_give PD_Predictor* PD_PredictorClone(
PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr* PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor); PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
/// ///
/// \brief Get the input infos(name/shape/dtype)
///
/// \param[in] pd_predictor predictor
/// \return input infos(name/shape/dtype)
///
PADDLE_CAPI_EXPORT extern __pd_give PD_IOInfos* PD_PredictorGetInputInfos(
__pd_keep PD_Predictor* pd_predictor);
///
/// \brief Get the output names /// \brief Get the output names
/// ///
/// \param[in] pd_predictor predictor /// \param[in] pd_predictor predictor
...@@ -67,7 +76,14 @@ PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor); ...@@ -67,7 +76,14 @@ PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
/// ///
PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr* PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
PD_PredictorGetOutputNames(__pd_keep PD_Predictor* pd_predictor); PD_PredictorGetOutputNames(__pd_keep PD_Predictor* pd_predictor);
///
/// \brief Get the output infos(name/shape/dtype)
///
/// \param[in] pd_predictor predictor
/// \return output infos(name/shape/dtype)
///
PADDLE_CAPI_EXPORT extern __pd_give PD_IOInfos* PD_PredictorGetOutputInfos(
__pd_keep PD_Predictor* pd_predictor);
/// ///
/// \brief Get the input number /// \brief Get the input number
/// ///
......
...@@ -29,6 +29,11 @@ typedef struct PD_OneDimArraySize { ...@@ -29,6 +29,11 @@ typedef struct PD_OneDimArraySize {
size_t* data; size_t* data;
} PD_OneDimArraySize; // std::vector<size_t> } PD_OneDimArraySize; // std::vector<size_t>
typedef struct PD_OneDimArrayInt64 {
size_t size;
int64_t* data;
} PD_OneDimArrayInt64; // std::vector<int64_t>
typedef struct PD_OneDimArrayCstr { typedef struct PD_OneDimArrayCstr {
size_t size; size_t size;
char** data; char** data;
...@@ -43,3 +48,14 @@ typedef struct PD_TwoDimArraySize { ...@@ -43,3 +48,14 @@ typedef struct PD_TwoDimArraySize {
size_t size; size_t size;
PD_OneDimArraySize** data; PD_OneDimArraySize** data;
} PD_TwoDimArraySize; // std::vector<std::vector<size_t>> } PD_TwoDimArraySize; // std::vector<std::vector<size_t>>
typedef struct PD_IOInfo {
PD_Cstr* name;
PD_OneDimArrayInt64* shape;
PD_DataType dtype;
} PD_IOInfo; // input or output info
typedef struct PD_IOInfos {
size_t size;
PD_IOInfo** io_info;
} PD_IOInfos; // inputs or outputs info
...@@ -11,12 +11,10 @@ ...@@ -11,12 +11,10 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/inference/capi_exp/pd_utils.h"
#include <string> #include <string>
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/capi_exp/pd_utils.h"
#include "paddle/fluid/inference/capi_exp/utils_internal.h" #include "paddle/fluid/inference/capi_exp/utils_internal.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -62,6 +60,7 @@ ...@@ -62,6 +60,7 @@
ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int32_t, Int32, int) ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int32_t, Int32, int)
ONE_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t) ONE_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int64_t, Int64, int64_t)
#undef ONE_DIM_ARRAY_UTILS_FUNC_IMPL #undef ONE_DIM_ARRAY_UTILS_FUNC_IMPL
#undef CONVERT_ONE_DIM_ARRAY_TO_VEC #undef CONVERT_ONE_DIM_ARRAY_TO_VEC
...@@ -178,6 +177,38 @@ TWO_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t) ...@@ -178,6 +177,38 @@ TWO_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
#undef CONVERT_VEC_TO_TWO_DIM_ARRAY #undef CONVERT_VEC_TO_TWO_DIM_ARRAY
#undef DESTROY_TWO_DIM_ARRAY #undef DESTROY_TWO_DIM_ARRAY
#ifdef __cplusplus
extern "C" {
#endif
void PD_IOInfoDestroy(__pd_take PD_IOInfo* io_info) {
if (io_info != NULL) {
PD_CstrDestroy(io_info->name);
io_info->name = NULL;
PD_OneDimArrayInt64Destroy(io_info->shape);
io_info->shape = NULL;
delete io_info;
}
}
void PD_IOInfosDestroy(__pd_take PD_IOInfos* io_infos) {
if (io_infos != NULL) {
if (io_infos->size != 0) {
for (size_t index = 0; index < io_infos->size; ++index) {
PD_IOInfoDestroy(io_infos->io_info[index]);
}
io_infos->size = 0;
}
delete[] io_infos->io_info;
io_infos->io_info = NULL;
delete io_infos;
}
}
#ifdef __cplusplus
} // extern "C"
#endif
namespace paddle_infer { namespace paddle_infer {
PlaceType CvtToCxxPlaceType(PD_PlaceType place_type) { PlaceType CvtToCxxPlaceType(PD_PlaceType place_type) {
......
...@@ -41,6 +41,14 @@ extern "C" { ...@@ -41,6 +41,14 @@ extern "C" {
PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt32Destroy( PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt32Destroy(
__pd_take PD_OneDimArrayInt32* array); __pd_take PD_OneDimArrayInt32* array);
///
/// \brief Destroy the PD_OneDimArrayInt64 object pointed to by the pointer.
///
/// \param[in] array pointer to the PD_OneDimArrayInt64 object.
///
PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt64Destroy(
__pd_take PD_OneDimArrayInt64* array);
/// ///
/// \brief Destroy the PD_OneDimArrayCstr object pointed to by the pointer. /// \brief Destroy the PD_OneDimArrayCstr object pointed to by the pointer.
/// ///
...@@ -74,6 +82,21 @@ PADDLE_CAPI_EXPORT extern void PD_TwoDimArraySizeDestroy( ...@@ -74,6 +82,21 @@ PADDLE_CAPI_EXPORT extern void PD_TwoDimArraySizeDestroy(
/// ///
PADDLE_CAPI_EXPORT extern void PD_CstrDestroy(__pd_take PD_Cstr* cstr); PADDLE_CAPI_EXPORT extern void PD_CstrDestroy(__pd_take PD_Cstr* cstr);
///
/// \brief Destroy the PD_IOInfo object pointed to by the pointer.
///
/// \param[in] cstr pointer to the PD_IOInfo object.
///
PADDLE_CAPI_EXPORT extern void PD_IOInfoDestroy(__pd_take PD_IOInfo* io_info);
///
/// \brief Destroy the PD_IOInfos object pointed to by the pointer.
///
/// \param[in] cstr pointer to the PD_IOInfos object.
///
PADDLE_CAPI_EXPORT extern void PD_IOInfosDestroy(
__pd_take PD_IOInfos* io_infos);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif
...@@ -44,6 +44,16 @@ namespace paddle_infer { ...@@ -44,6 +44,16 @@ namespace paddle_infer {
__pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32( __pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
const std::vector<int>& vec); const std::vector<int>& vec);
///
/// \brief Convert the 'std::vector<int64_t>' object to a 'PD_OneDimArrayInt64'
/// object.
///
/// \param[in] vec source object.
/// \return target object.
///
__pd_give PD_OneDimArrayInt64* CvtVecToOneDimArrayInt64(
const std::vector<int64_t>& vec);
/// ///
/// \brief Convert the 'PD_OneDimArrayInt32' object to a 'std::vector<int>' /// \brief Convert the 'PD_OneDimArrayInt32' object to a 'std::vector<int>'
/// object. /// object.
...@@ -54,6 +64,16 @@ __pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32( ...@@ -54,6 +64,16 @@ __pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
std::vector<int> CvtOneDimArrayToVecInt32( std::vector<int> CvtOneDimArrayToVecInt32(
__pd_keep const PD_OneDimArrayInt32* array); __pd_keep const PD_OneDimArrayInt32* array);
///
/// \brief Convert the 'PD_OneDimArrayInt64' object to a 'std::vector<int64_t>'
/// object.
///
/// \param[in] array source object.
/// \return target object.
///
std::vector<int64_t> CvtOneDimArrayToVecInt64(
__pd_keep const PD_OneDimArrayInt64* array);
/// ///
/// \brief Convert the 'std::vector<size_t>' object to a 'PD_OneDimArraySize' /// \brief Convert the 'std::vector<size_t>' object to a 'PD_OneDimArraySize'
/// object. /// object.
......
...@@ -157,7 +157,7 @@ func (config *Config) UseFcPadding() bool { ...@@ -157,7 +157,7 @@ func (config *Config) UseFcPadding() bool {
/// \param deviceId the GPU card to use. /// \param deviceId the GPU card to use.
/// ///
func (config *Config) EnableUseGpu(memorySize uint64, deviceId int32) { func (config *Config) EnableUseGpu(memorySize uint64, deviceId int32) {
C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId)) C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId), 0)
} }
/// ///
......
...@@ -19,6 +19,10 @@ limitations under the License. */ ...@@ -19,6 +19,10 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#if defined(PADDLE_WITH_CUDA)
#include <cuda_runtime.h>
#endif
#include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
...@@ -37,7 +41,7 @@ TEST(PD_Config, gpu_interface) { ...@@ -37,7 +41,7 @@ TEST(PD_Config, gpu_interface) {
PD_ConfigSetModel(config, prog_file.c_str(), param_file.c_str()); PD_ConfigSetModel(config, prog_file.c_str(), param_file.c_str());
PD_ConfigSetOptimCacheDir(config, opt_cache_dir.c_str()); PD_ConfigSetOptimCacheDir(config, opt_cache_dir.c_str());
PD_ConfigEnableUseGpu(config, 100, 0); PD_ConfigEnableUseGpu(config, 100, 0, 0);
bool use_gpu = PD_ConfigUseGpu(config); bool use_gpu = PD_ConfigUseGpu(config);
EXPECT_TRUE(use_gpu); EXPECT_TRUE(use_gpu);
int init_size = PD_ConfigMemoryPoolInitSizeMb(config); int init_size = PD_ConfigMemoryPoolInitSizeMb(config);
...@@ -84,6 +88,14 @@ TEST(PD_Config, gpu_interface) { ...@@ -84,6 +88,14 @@ TEST(PD_Config, gpu_interface) {
bool thread_local_thread = PD_ConfigThreadLocalStreamEnabled(config); bool thread_local_thread = PD_ConfigThreadLocalStreamEnabled(config);
EXPECT_TRUE(thread_local_thread); EXPECT_TRUE(thread_local_thread);
#if defined(PADDLE_WITH_CUDA)
{
cudaStream_t external_stream;
cudaStreamCreate(&external_stream);
PD_ConfigSetExecStream(config, external_stream);
}
#endif
PD_ConfigDisableGpu(config); PD_ConfigDisableGpu(config);
PD_ConfigDestroy(config); PD_ConfigDestroy(config);
} }
...@@ -104,7 +116,7 @@ TEST(PD_Config, use_gpu) { ...@@ -104,7 +116,7 @@ TEST(PD_Config, use_gpu) {
const char* model_dir_ = PD_ConfigGetModelDir(config); const char* model_dir_ = PD_ConfigGetModelDir(config);
LOG(INFO) << model_dir_; LOG(INFO) << model_dir_;
PD_ConfigEnableUseGpu(config, 100, 0); PD_ConfigEnableUseGpu(config, 100, 0, 0);
bool use_gpu = PD_ConfigUseGpu(config); bool use_gpu = PD_ConfigUseGpu(config);
EXPECT_TRUE(use_gpu); EXPECT_TRUE(use_gpu);
int device_id = PD_ConfigGpuDeviceId(config); int device_id = PD_ConfigGpuDeviceId(config);
...@@ -142,7 +154,7 @@ TEST(PD_Config, use_gpu) { ...@@ -142,7 +154,7 @@ TEST(PD_Config, use_gpu) {
TEST(PD_Config, trt_int8) { TEST(PD_Config, trt_int8) {
std::string model_dir = FLAGS_infer_model + "/mobilenet"; std::string model_dir = FLAGS_infer_model + "/mobilenet";
PD_Config* config = PD_ConfigCreate(); PD_Config* config = PD_ConfigCreate();
PD_ConfigEnableUseGpu(config, 100, 0); PD_ConfigEnableUseGpu(config, 100, 0, 0);
PD_ConfigEnableTensorRtEngine( PD_ConfigEnableTensorRtEngine(
config, 1 << 20, 1, 3, PD_PRECISION_INT8, FALSE, TRUE); config, 1 << 20, 1, 3, PD_PRECISION_INT8, FALSE, TRUE);
bool trt_enable = PD_ConfigTensorRtEngineEnabled(config); bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
...@@ -153,7 +165,7 @@ TEST(PD_Config, trt_int8) { ...@@ -153,7 +165,7 @@ TEST(PD_Config, trt_int8) {
TEST(PD_Config, trt_fp16) { TEST(PD_Config, trt_fp16) {
std::string model_dir = FLAGS_infer_model + "/mobilenet"; std::string model_dir = FLAGS_infer_model + "/mobilenet";
PD_Config* config = PD_ConfigCreate(); PD_Config* config = PD_ConfigCreate();
PD_ConfigEnableUseGpu(config, 100, 0); PD_ConfigEnableUseGpu(config, 100, 0, 0);
PD_ConfigEnableTensorRtEngine( PD_ConfigEnableTensorRtEngine(
config, 1 << 20, 1, 3, PD_PRECISION_HALF, FALSE, FALSE); config, 1 << 20, 1, 3, PD_PRECISION_HALF, FALSE, FALSE);
bool trt_enable = PD_ConfigTensorRtEngineEnabled(config); bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
......
...@@ -37,6 +37,9 @@ void predictor_run() { ...@@ -37,6 +37,9 @@ void predictor_run() {
PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor); PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
LOG(INFO) << "The inputs' size is: " << input_names->size; LOG(INFO) << "The inputs' size is: " << input_names->size;
EXPECT_EQ(input_names->size, 2u); EXPECT_EQ(input_names->size, 2u);
PD_IOInfos* in_infos = PD_PredictorGetInputInfos(predictor);
EXPECT_EQ(in_infos->size, 2u);
PD_IOInfos* out_infos = PD_PredictorGetOutputInfos(predictor);
int32_t shape_0[4] = {1, 3, 224, 224}; int32_t shape_0[4] = {1, 3, 224, 224};
float data_0[1 * 3 * 224 * 224] = {0}; float data_0[1 * 3 * 224 * 224] = {0};
...@@ -79,6 +82,8 @@ void predictor_run() { ...@@ -79,6 +82,8 @@ void predictor_run() {
PD_TensorDestroy(input_1); PD_TensorDestroy(input_1);
PD_TensorDestroy(input_0); PD_TensorDestroy(input_0);
PD_OneDimArrayCstrDestroy(input_names); PD_OneDimArrayCstrDestroy(input_names);
PD_IOInfosDestroy(in_infos);
PD_IOInfosDestroy(out_infos);
PD_PredictorDestroy(predictor); PD_PredictorDestroy(predictor);
} }
......
...@@ -85,6 +85,10 @@ TEST(PD_Config, interface) { ...@@ -85,6 +85,10 @@ TEST(PD_Config, interface) {
PD_ConfigEnableMkldnnBfloat16(config); PD_ConfigEnableMkldnnBfloat16(config);
PD_ConfigSetBfloat16Op(config, 1, &ops_name); PD_ConfigSetBfloat16Op(config, 1, &ops_name);
PD_ConfigEnableMkldnnInt8(config);
bool mkldnn_int8_enabled = PD_ConfigMkldnnInt8Enabled(config);
EXPECT_TRUE(mkldnn_int8_enabled);
#endif #endif
PD_ConfigEnableONNXRuntime(config); PD_ConfigEnableONNXRuntime(config);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册