未验证 提交 e3f39833 编写于 作者: H heliqi 提交者: GitHub

[Paddle Inference]Support PaddlePaddle Backend on Triton (#49758)

* support PaddlePaddle Backend on Triton

* fix test cases

* fix Codestyle

* add test case

* add test case
上级 a1b2e1e2
......@@ -1609,6 +1609,51 @@ std::vector<std::string> AnalysisPredictor::GetOutputNames() {
return output_names;
}
std::map<std::string, std::vector<int64_t>>
AnalysisPredictor::GetOutputTensorShape() {
std::map<std::string, std::vector<int64_t>> output_shapes;
std::vector<std::string> names = GetOutputNames();
for (std::string name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var,
platform::errors::PreconditionNotMet(
"Output %s does not exist.", name));
output_shapes[name] = var->GetShape();
}
return output_shapes;
}
std::map<std::string, paddle_infer::DataType>
AnalysisPredictor::GetOutputTypes() {
std::map<std::string, paddle_infer::DataType> output_type;
std::vector<std::string> names = GetOutputNames();
for (const auto &name : names) {
auto *var = inference_program_->Block(0).FindVar(name);
PADDLE_ENFORCE_NOT_NULL(
var,
platform::errors::PreconditionNotMet(
"Output %s does not exist inference_program_.", name));
auto dtype = var->GetDataType();
if (dtype == paddle::framework::proto::VarType::FP32) {
output_type[name] = paddle_infer::DataType::FLOAT32;
} else if (dtype == paddle::framework::proto::VarType::FP16) {
output_type[name] = paddle_infer::DataType::FLOAT16;
} else if (dtype == paddle::framework::proto::VarType::INT64) {
output_type[name] = paddle_infer::DataType::INT64;
} else if (dtype == paddle::framework::proto::VarType::INT32) {
output_type[name] = paddle_infer::DataType::INT32;
} else if (dtype == paddle::framework::proto::VarType::UINT8) {
output_type[name] = paddle_infer::DataType::UINT8;
} else if (dtype == paddle::framework::proto::VarType::INT8) {
output_type[name] = paddle_infer::DataType::INT8;
} else {
PADDLE_THROW(paddle::platform::errors::Unimplemented(
"Unsupported data type `%s` when get output dtype ", dtype));
}
}
return output_type;
}
std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetInputTensor(
const std::string &name) {
framework::Scope *scope;
......@@ -2477,6 +2522,10 @@ std::vector<std::string> Predictor::GetInputNames() {
return predictor_->GetInputNames();
}
std::map<std::string, std::vector<int64_t>> Predictor::GetInputTensorShape() {
return predictor_->GetInputTensorShape();
}
std::map<std::string, DataType> Predictor::GetInputTypes() {
return predictor_->GetInputTypes();
}
......@@ -2493,6 +2542,14 @@ std::unique_ptr<Tensor> Predictor::GetOutputHandle(const std::string &name) {
return predictor_->GetOutputTensor(name);
}
std::map<std::string, std::vector<int64_t>> Predictor::GetOutputTensorShape() {
return predictor_->GetOutputTensorShape();
}
std::map<std::string, DataType> Predictor::GetOutputTypes() {
return predictor_->GetOutputTypes();
}
bool Predictor::Run() { return predictor_->ZeroCopyRun(); }
std::unique_ptr<Predictor> Predictor::Clone(void *stream) {
......
......@@ -191,6 +191,18 @@ class AnalysisPredictor : public PaddlePredictor {
/// \return the map of input names and type
///
std::map<std::string, paddle_infer::DataType> GetInputTypes() override;
///
/// \brief Get all output names and their corresponding shapes
///
/// \return the map of output names and shapes
///
std::map<std::string, std::vector<int64_t>> GetOutputTensorShape() override;
///
/// \brief Get all output names and their corresponding type
///
/// \return the map of output names and type
///
std::map<std::string, paddle_infer::DataType> GetOutputTypes() override;
///
/// \brief Run the prediction engine
......
......@@ -106,6 +106,8 @@ TEST(AnalysisPredictor, analysis_on) {
ASSERT_EQ(predictor->scope_->parent(), nullptr);
ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get());
ASSERT_EQ(predictor->GetInputTypes().size(), 4UL);
ASSERT_EQ(predictor->GetOutputTypes().size(), 1UL);
ASSERT_EQ(predictor->GetOutputTensorShape().size(), 1UL);
// 2. Dummy Input Data
int64_t data[4] = {1, 2, 3, 4};
PaddleTensor tensor;
......@@ -430,6 +432,8 @@ TEST(Predictor, Run) {
auto predictor = CreatePredictor(config);
ASSERT_EQ(predictor->GetInputTypes().size(), 4UL);
ASSERT_EQ(predictor->GetOutputTypes().size(), 1UL);
ASSERT_EQ(predictor->GetOutputTensorShape().size(), 1UL);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
......
......@@ -243,6 +243,19 @@ class PD_INFER_DECL PaddlePredictor {
/// \return Output tensor names.
virtual std::vector<std::string> GetOutputNames() { return {}; }
/// \brief Get the output shape of the model.
/// \return A map contains all the output names and shape defined in the
/// model.
virtual std::map<std::string, std::vector<int64_t>> GetOutputTensorShape() {
return {};
}
/// \brief Get the output type of the model.
/// \return A map contains all the output names and type defined in the model.
virtual std::map<std::string, paddle_infer::DataType> GetOutputTypes() {
return {};
}
/// \brief Get the input ZeroCopyTensor by name.
/// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
/// The name is obtained from the GetInputNames() interface.
......
......@@ -92,6 +92,13 @@ class PD_INFER_DECL Predictor {
///
explicit Predictor(const Config& config);
///
/// \brief Get all input names and their corresponding shapes
///
/// \return the map of input names and shape
///
std::map<std::string, std::vector<int64_t>> GetInputTensorShape();
///
/// \brief Get all input names and their corresponding type
///
......@@ -136,6 +143,20 @@ class PD_INFER_DECL Predictor {
///
std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
///
/// \brief Get all output names and their corresponding shapes
///
/// \return the map of output names and shape
///
std::map<std::string, std::vector<int64_t>> GetOutputTensorShape();
///
/// \brief Get all output names and their corresponding type
///
/// \return the map of output names and type
///
std::map<std::string, DataType> GetOutputTypes();
///
/// \brief Clone to get the new predictor. thread safe.
///
......
......@@ -55,8 +55,9 @@ __pd_give PD_Config* PD_ConfigCreate() {
}
void PD_ConfigDestroy(__pd_take PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
delete reinterpret_cast<Config*>(config);
if (pd_config != NULL) {
delete reinterpret_cast<Config*>(pd_config);
}
}
void PD_ConfigSetModel(__pd_keep PD_Config* pd_config,
......@@ -116,9 +117,12 @@ PD_Bool PD_ConfigUseFcPadding(__pd_keep PD_Config* pd_config) {
void PD_ConfigEnableUseGpu(__pd_keep PD_Config* pd_config,
uint64_t memory_pool_init_size_mb,
int32_t device_id) {
int32_t device_id,
PD_PrecisionType precision_mode) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableUseGpu(memory_pool_init_size_mb, device_id);
config->EnableUseGpu(memory_pool_init_size_mb,
device_id,
ConvertToCxxPrecisionType(precision_mode));
}
void PD_ConfigDisableGpu(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
......@@ -427,6 +431,14 @@ void PD_ConfigSetBfloat16Op(__pd_keep PD_Config* pd_config,
}
config->SetBfloat16Op(std::move(op_names));
}
void PD_ConfigEnableMkldnnInt8(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableMkldnnInt8();
}
PD_Bool PD_ConfigMkldnnInt8Enabled(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->mkldnn_int8_enabled();
}
PD_Bool PD_ConfigThreadLocalStreamEnabled(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->thread_local_stream_enabled();
......@@ -484,6 +496,10 @@ void PD_ConfigEnableGpuMultiStream(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableGpuMultiStream();
}
void PD_ConfigSetExecStream(__pd_keep PD_Config* pd_config, void* stream) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->SetExecStream(stream);
}
void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->PartiallyRelease();
......
......@@ -132,11 +132,13 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseFcPadding(
/// \param[in] memory_pool_init_size_mb initial size of the GPU memory pool in
/// MB.
/// \param[in] device_id device_id the GPU card to use.
/// \param[in] precision_mode the precision used in Paddle-GPU inference.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableUseGpu(
__pd_keep PD_Config* pd_config,
uint64_t memory_pool_init_size_mb,
int32_t device_id);
int32_t device_id,
PD_PrecisionType precision_mode);
///
/// \brief Turn off GPU.
///
......@@ -607,6 +609,22 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnBfloat16Enabled(
PADDLE_CAPI_EXPORT extern void PD_ConfigSetBfloat16Op(
__pd_keep PD_Config* pd_config, size_t ops_num, const char** op_list);
///
/// \brief Turn on MKLDNN int8.
///
/// \param[in] pd_onfig config
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMkldnnInt8(
__pd_keep PD_Config* pd_config);
///
/// \brief A boolean state telling whether to use the MKLDNN int8.
///
/// \param[in] pd_onfig config
/// \return Whether to use the MKLDNN int8.
///
PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigMkldnnInt8Enabled(
__pd_keep PD_Config* pd_config);
///
/// \brief Enable the GPU multi-computing stream feature.
/// NOTE: The current behavior of this interface is to bind the computation
/// stream to the thread, and this behavior may be changed in the future.
......@@ -625,6 +643,12 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableGpuMultiStream(
PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigThreadLocalStreamEnabled(
__pd_keep PD_Config* pd_config);
///
/// \brief Set execution stream. If not set a stream will be created
/// internally.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigSetExecStream(
__pd_keep PD_Config* pd_config, void* stream);
///
/// \brief Specify the memory buffer of program and parameter.
/// Used when model and params are loaded directly from memory.
///
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/inference/capi_exp/pd_predictor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/capi_exp/pd_config.h"
#include "paddle/fluid/inference/capi_exp/pd_types.h"
#include "paddle/fluid/inference/capi_exp/pd_utils.h"
#include "paddle/fluid/inference/capi_exp/types_internal.h"
......@@ -38,7 +39,6 @@ __pd_give PD_Predictor* PD_PredictorCreate(__pd_take PD_Config* pd_config) {
paddle_infer::Config* config =
reinterpret_cast<paddle_infer::Config*>(pd_config);
pd_predictor->predictor = paddle_infer::CreatePredictor(*config);
delete config;
return pd_predictor;
}
......@@ -57,6 +57,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetInputNames(
return paddle_infer::CvtVecToOneDimArrayCstr(names);
}
__pd_give PD_IOInfos* PD_PredictorGetInputInfos(
__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR;
std::vector<std::string> names = predictor->GetInputNames();
std::map<std::string, std::vector<int64_t>> input_shapes =
predictor->GetInputTensorShape();
std::map<std::string, paddle_infer::DataType> input_dtypes =
predictor->GetInputTypes();
PD_IOInfos* input_infos = new PD_IOInfos;
input_infos->size = names.size();
input_infos->io_info = names.empty() ? NULL : new PD_IOInfo*[names.size()];
for (size_t i = 0; i < names.size(); i++) {
const std::string& name = names[i];
input_infos->io_info[i] = new PD_IOInfo;
input_infos->io_info[i]->name = paddle_infer::CvtStrToCstr(name);
input_infos->io_info[i]->shape =
paddle_infer::CvtVecToOneDimArrayInt64(input_shapes[name]);
input_infos->io_info[i]->dtype =
paddle_infer::CvtFromCxxDatatype(input_dtypes[name]);
}
return input_infos;
}
__pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR;
......@@ -64,6 +88,30 @@ __pd_give PD_OneDimArrayCstr* PD_PredictorGetOutputNames(
return paddle_infer::CvtVecToOneDimArrayCstr(names);
}
__pd_give PD_IOInfos* PD_PredictorGetOutputInfos(
__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR;
std::vector<std::string> names = predictor->GetOutputNames();
std::map<std::string, std::vector<int64_t>> output_shapes =
predictor->GetOutputTensorShape();
std::map<std::string, paddle_infer::DataType> output_dtypes =
predictor->GetOutputTypes();
PD_IOInfos* output_infos = new PD_IOInfos;
output_infos->size = names.size();
output_infos->io_info = names.empty() ? NULL : new PD_IOInfo*[names.size()];
for (size_t i = 0; i < names.size(); i++) {
const std::string& name = names[i];
output_infos->io_info[i] = new PD_IOInfo;
output_infos->io_info[i]->name = paddle_infer::CvtStrToCstr(name);
output_infos->io_info[i]->shape =
paddle_infer::CvtVecToOneDimArrayInt64(output_shapes[name]);
output_infos->io_info[i]->dtype =
paddle_infer::CvtFromCxxDatatype(output_dtypes[name]);
}
return output_infos;
}
size_t PD_PredictorGetInputNum(__pd_keep PD_Predictor* pd_predictor) {
CHECK_AND_CONVERT_PD_PREDICTOR;
return predictor->GetInputNames().size();
......
......@@ -30,6 +30,7 @@ typedef struct PD_Predictor PD_Predictor;
typedef struct PD_Config PD_Config;
typedef struct PD_Tensor PD_Tensor;
typedef struct PD_OneDimArrayCstr PD_OneDimArrayCstr;
typedef struct PD_IOInfos PD_IOInfos;
#ifdef __cplusplus
extern "C" {
......@@ -60,6 +61,14 @@ PADDLE_CAPI_EXPORT extern __pd_give PD_Predictor* PD_PredictorClone(
PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
///
/// \brief Get the input infos(name/shape/dtype)
///
/// \param[in] pd_predictor predictor
/// \return input infos(name/shape/dtype)
///
PADDLE_CAPI_EXPORT extern __pd_give PD_IOInfos* PD_PredictorGetInputInfos(
__pd_keep PD_Predictor* pd_predictor);
///
/// \brief Get the output names
///
/// \param[in] pd_predictor predictor
......@@ -67,7 +76,14 @@ PD_PredictorGetInputNames(__pd_keep PD_Predictor* pd_predictor);
///
PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr*
PD_PredictorGetOutputNames(__pd_keep PD_Predictor* pd_predictor);
///
/// \brief Get the output infos(name/shape/dtype)
///
/// \param[in] pd_predictor predictor
/// \return output infos(name/shape/dtype)
///
PADDLE_CAPI_EXPORT extern __pd_give PD_IOInfos* PD_PredictorGetOutputInfos(
__pd_keep PD_Predictor* pd_predictor);
///
/// \brief Get the input number
///
......
......@@ -29,6 +29,11 @@ typedef struct PD_OneDimArraySize {
size_t* data;
} PD_OneDimArraySize; // std::vector<size_t>
typedef struct PD_OneDimArrayInt64 {
size_t size;
int64_t* data;
} PD_OneDimArrayInt64; // std::vector<int64_t>
typedef struct PD_OneDimArrayCstr {
size_t size;
char** data;
......@@ -43,3 +48,14 @@ typedef struct PD_TwoDimArraySize {
size_t size;
PD_OneDimArraySize** data;
} PD_TwoDimArraySize; // std::vector<std::vector<size_t>>
typedef struct PD_IOInfo {
PD_Cstr* name;
PD_OneDimArrayInt64* shape;
PD_DataType dtype;
} PD_IOInfo; // input or output info
typedef struct PD_IOInfos {
size_t size;
PD_IOInfo** io_info;
} PD_IOInfos; // inputs or outputs info
......@@ -11,12 +11,10 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/capi_exp/pd_utils.h"
#include <string>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/capi_exp/pd_utils.h"
#include "paddle/fluid/inference/capi_exp/utils_internal.h"
#include "paddle/fluid/platform/enforce.h"
......@@ -62,6 +60,7 @@
ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int32_t, Int32, int)
ONE_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
ONE_DIM_ARRAY_UTILS_FUNC_IMPL(int64_t, Int64, int64_t)
#undef ONE_DIM_ARRAY_UTILS_FUNC_IMPL
#undef CONVERT_ONE_DIM_ARRAY_TO_VEC
......@@ -178,6 +177,38 @@ TWO_DIM_ARRAY_UTILS_FUNC_IMPL(size_t, Size, size_t)
#undef CONVERT_VEC_TO_TWO_DIM_ARRAY
#undef DESTROY_TWO_DIM_ARRAY
#ifdef __cplusplus
extern "C" {
#endif
void PD_IOInfoDestroy(__pd_take PD_IOInfo* io_info) {
if (io_info != NULL) {
PD_CstrDestroy(io_info->name);
io_info->name = NULL;
PD_OneDimArrayInt64Destroy(io_info->shape);
io_info->shape = NULL;
delete io_info;
}
}
void PD_IOInfosDestroy(__pd_take PD_IOInfos* io_infos) {
if (io_infos != NULL) {
if (io_infos->size != 0) {
for (size_t index = 0; index < io_infos->size; ++index) {
PD_IOInfoDestroy(io_infos->io_info[index]);
}
io_infos->size = 0;
}
delete[] io_infos->io_info;
io_infos->io_info = NULL;
delete io_infos;
}
}
#ifdef __cplusplus
} // extern "C"
#endif
namespace paddle_infer {
PlaceType CvtToCxxPlaceType(PD_PlaceType place_type) {
......
......@@ -41,6 +41,14 @@ extern "C" {
PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt32Destroy(
__pd_take PD_OneDimArrayInt32* array);
///
/// \brief Destroy the PD_OneDimArrayInt64 object pointed to by the pointer.
///
/// \param[in] array pointer to the PD_OneDimArrayInt64 object.
///
PADDLE_CAPI_EXPORT extern void PD_OneDimArrayInt64Destroy(
__pd_take PD_OneDimArrayInt64* array);
///
/// \brief Destroy the PD_OneDimArrayCstr object pointed to by the pointer.
///
......@@ -74,6 +82,21 @@ PADDLE_CAPI_EXPORT extern void PD_TwoDimArraySizeDestroy(
///
PADDLE_CAPI_EXPORT extern void PD_CstrDestroy(__pd_take PD_Cstr* cstr);
///
/// \brief Destroy the PD_IOInfo object pointed to by the pointer.
///
/// \param[in] cstr pointer to the PD_IOInfo object.
///
PADDLE_CAPI_EXPORT extern void PD_IOInfoDestroy(__pd_take PD_IOInfo* io_info);
///
/// \brief Destroy the PD_IOInfos object pointed to by the pointer.
///
/// \param[in] cstr pointer to the PD_IOInfos object.
///
PADDLE_CAPI_EXPORT extern void PD_IOInfosDestroy(
__pd_take PD_IOInfos* io_infos);
#ifdef __cplusplus
} // extern "C"
#endif
......@@ -44,6 +44,16 @@ namespace paddle_infer {
__pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
const std::vector<int>& vec);
///
/// \brief Convert the 'std::vector<int64_t>' object to a 'PD_OneDimArrayInt64'
/// object.
///
/// \param[in] vec source object.
/// \return target object.
///
__pd_give PD_OneDimArrayInt64* CvtVecToOneDimArrayInt64(
const std::vector<int64_t>& vec);
///
/// \brief Convert the 'PD_OneDimArrayInt32' object to a 'std::vector<int>'
/// object.
......@@ -54,6 +64,16 @@ __pd_give PD_OneDimArrayInt32* CvtVecToOneDimArrayInt32(
std::vector<int> CvtOneDimArrayToVecInt32(
__pd_keep const PD_OneDimArrayInt32* array);
///
/// \brief Convert the 'PD_OneDimArrayInt64' object to a 'std::vector<int64_t>'
/// object.
///
/// \param[in] array source object.
/// \return target object.
///
std::vector<int64_t> CvtOneDimArrayToVecInt64(
__pd_keep const PD_OneDimArrayInt64* array);
///
/// \brief Convert the 'std::vector<size_t>' object to a 'PD_OneDimArraySize'
/// object.
......
......@@ -157,7 +157,7 @@ func (config *Config) UseFcPadding() bool {
/// \param deviceId the GPU card to use.
///
func (config *Config) EnableUseGpu(memorySize uint64, deviceId int32) {
C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId))
C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId), 0)
}
///
......
......@@ -19,6 +19,10 @@ limitations under the License. */
#include <string>
#include <vector>
#if defined(PADDLE_WITH_CUDA)
#include <cuda_runtime.h>
#endif
#include "paddle/fluid/inference/capi_exp/pd_inference_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
......@@ -37,7 +41,7 @@ TEST(PD_Config, gpu_interface) {
PD_ConfigSetModel(config, prog_file.c_str(), param_file.c_str());
PD_ConfigSetOptimCacheDir(config, opt_cache_dir.c_str());
PD_ConfigEnableUseGpu(config, 100, 0);
PD_ConfigEnableUseGpu(config, 100, 0, 0);
bool use_gpu = PD_ConfigUseGpu(config);
EXPECT_TRUE(use_gpu);
int init_size = PD_ConfigMemoryPoolInitSizeMb(config);
......@@ -84,6 +88,14 @@ TEST(PD_Config, gpu_interface) {
bool thread_local_thread = PD_ConfigThreadLocalStreamEnabled(config);
EXPECT_TRUE(thread_local_thread);
#if defined(PADDLE_WITH_CUDA)
{
cudaStream_t external_stream;
cudaStreamCreate(&external_stream);
PD_ConfigSetExecStream(config, external_stream);
}
#endif
PD_ConfigDisableGpu(config);
PD_ConfigDestroy(config);
}
......@@ -104,7 +116,7 @@ TEST(PD_Config, use_gpu) {
const char* model_dir_ = PD_ConfigGetModelDir(config);
LOG(INFO) << model_dir_;
PD_ConfigEnableUseGpu(config, 100, 0);
PD_ConfigEnableUseGpu(config, 100, 0, 0);
bool use_gpu = PD_ConfigUseGpu(config);
EXPECT_TRUE(use_gpu);
int device_id = PD_ConfigGpuDeviceId(config);
......@@ -142,7 +154,7 @@ TEST(PD_Config, use_gpu) {
TEST(PD_Config, trt_int8) {
std::string model_dir = FLAGS_infer_model + "/mobilenet";
PD_Config* config = PD_ConfigCreate();
PD_ConfigEnableUseGpu(config, 100, 0);
PD_ConfigEnableUseGpu(config, 100, 0, 0);
PD_ConfigEnableTensorRtEngine(
config, 1 << 20, 1, 3, PD_PRECISION_INT8, FALSE, TRUE);
bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
......@@ -153,7 +165,7 @@ TEST(PD_Config, trt_int8) {
TEST(PD_Config, trt_fp16) {
std::string model_dir = FLAGS_infer_model + "/mobilenet";
PD_Config* config = PD_ConfigCreate();
PD_ConfigEnableUseGpu(config, 100, 0);
PD_ConfigEnableUseGpu(config, 100, 0, 0);
PD_ConfigEnableTensorRtEngine(
config, 1 << 20, 1, 3, PD_PRECISION_HALF, FALSE, FALSE);
bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
......
......@@ -37,6 +37,9 @@ void predictor_run() {
PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
LOG(INFO) << "The inputs' size is: " << input_names->size;
EXPECT_EQ(input_names->size, 2u);
PD_IOInfos* in_infos = PD_PredictorGetInputInfos(predictor);
EXPECT_EQ(in_infos->size, 2u);
PD_IOInfos* out_infos = PD_PredictorGetOutputInfos(predictor);
int32_t shape_0[4] = {1, 3, 224, 224};
float data_0[1 * 3 * 224 * 224] = {0};
......@@ -79,6 +82,8 @@ void predictor_run() {
PD_TensorDestroy(input_1);
PD_TensorDestroy(input_0);
PD_OneDimArrayCstrDestroy(input_names);
PD_IOInfosDestroy(in_infos);
PD_IOInfosDestroy(out_infos);
PD_PredictorDestroy(predictor);
}
......
......@@ -85,6 +85,10 @@ TEST(PD_Config, interface) {
PD_ConfigEnableMkldnnBfloat16(config);
PD_ConfigSetBfloat16Op(config, 1, &ops_name);
PD_ConfigEnableMkldnnInt8(config);
bool mkldnn_int8_enabled = PD_ConfigMkldnnInt8Enabled(config);
EXPECT_TRUE(mkldnn_int8_enabled);
#endif
PD_ConfigEnableONNXRuntime(config);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册