未验证 提交 c7e3c918 编写于 作者: W Wilber 提交者: GitHub

[Inference] Update go inference api based on new capi. (#33113)

上级 ab0272eb
# Paddle 预测golang API
## 安装
首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``paddle_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c`
## 在Go中使用Paddle预测
首先创建预测配置
``` go
config := paddle.NewAnalysisConfig()
config.SetModel(model_file, params_file)
config.SwitchUseFeedFetchOps(false)
config.SwitchSpecifyInputNames(true)
```
创建predictor
``` go
predictor := paddle.NewPredictor(config)
```
获取输入Tensor和输出Tensor
``` go
inputs = predictor.GetInputTensors()
```
设置输入数据(假设只有一个输入)
``` go
input := inputs[0]
input.SetValue(data)
input.Reshape([]int32{1, 3, 300, 300})
```
运行预测
``` go
predictor.ZeroCopyRun()
```
获取输入Tensor的真实值
``` go
output := outputs[0]
predictor.GetZeroCopyOutput(output)
value := reflect.ValueOf(output.Value())
shape, dtype := paddle.ShapeAndTypeOf(value)
output_data := value.Interface().([][]float32)
```
## 示例
源码见[mobilenet](./demo/mobilenet.go)
下载[数据](https://paddle-inference-dist.cdn.bcebos.com/mobilenet-test-model-data.tar.gz)并解压到当前目录
运行
```bash
go mod init github.com/paddlepaddle
export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
go run ./demo/mobilenet.go
```
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import "github.com/paddlepaddle/paddle"
import "strings"
import "io/ioutil"
import "strconv"
import "reflect"
func main() {
config := paddle.NewAnalysisConfig()
config.SetModel("data/model/__model__", "data/model/__params__")
config.DisableGlogInfo()
config.SwitchUseFeedFetchOps(false)
config.SwitchSpecifyInputNames(true)
predictor := paddle.NewPredictor(config)
println("============== paddle inference ==============")
println("input num: ", predictor.GetInputNum())
println("input name: ", predictor.GetInputNames()[0])
println("output num: ", predictor.GetOutputNum())
println("output name: ", predictor.GetInputNames()[0])
println("============== run inference =================")
input := predictor.GetInputTensors()[0]
output := predictor.GetOutputTensors()[0]
filename := "data/data.txt"
data := ReadData(filename)
input.SetValue(data[:1 * 3 * 300 * 300])
input.Reshape([]int32{1, 3, 300, 300})
predictor.SetZeroCopyInput(input)
predictor.ZeroCopyRun()
predictor.GetZeroCopyOutput(output)
println("============= parse output ===================")
output_val := output.Value()
value := reflect.ValueOf(output_val)
shape, dtype := paddle.ShapeAndTypeOf(value)
switch dtype {
case paddle.PaddleDType(paddle.FLOAT32):
v := value.Interface().([][]float32)
println("v: ", v[0][0], v[0][1], "...")
case paddle.PaddleDType(paddle.UINT8):
v := value.Interface().([][]uint8)
println("v: ", v[0][0], v[0][1], "...")
case paddle.PaddleDType(paddle.INT32):
v := value.Interface().([][]int32)
println("v: ", v[0][0], v[0][1], "...")
case paddle.PaddleDType(paddle.INT64):
v := value.Interface().([][]int64)
println("v: ", v[0][0], v[0][1], "...")
}
println(shape[0], shape[1])
println(output.Shape()[0])
}
func ReadData(filename string) []float32 {
file_bytes, _ := ioutil.ReadFile(filename)
data_slice := strings.Split(string(file_bytes), " ")
var result []float32
for _, n := range data_slice {
r, _ := strconv.ParseFloat(n, 32)
result = append(result, float32(r))
}
return result
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <paddle_c_api.h>
#include <stdio.h>
#include <stdlib.h>
void SetConfig(PD_AnalysisConfig *);
void ReadData(float *data, int size);
int main(int argc, char *argv[]) {
PD_AnalysisConfig *config = PD_NewAnalysisConfig();
SetConfig(config);
PD_Predictor *predictor = PD_NewPredictor(config);
int input_num = PD_GetInputNum(predictor);
printf("Input num: %d\n", input_num);
int output_num = PD_GetOutputNum(predictor);
printf("Output num: %d\n", output_num);
PD_ZeroCopyTensor input;
PD_InitZeroCopyTensor(&input);
input.name = const_cast<char *>(PD_GetInputName(predictor, 0)); // NOLINT
input.data.capacity = sizeof(float) * 1 * 3 * 300 * 300;
input.data.length = input.data.capacity;
input.data.data = malloc(input.data.capacity);
int shape[] = {1, 3, 300, 300};
input.shape.data = static_cast<int *>(shape);
input.shape.capacity = sizeof(shape);
input.shape.length = sizeof(shape);
input.dtype = PD_FLOAT32;
ReadData((float *)input.data.data, 1 * 3 * 300 * 300); // NOLINT
float *data = (float *)input.data.data; // NOLINT
PD_SetZeroCopyInput(predictor, &input);
int *shape_ptr = (int *)input.shape.data; // NOLINT
PD_ZeroCopyRun(predictor);
PD_ZeroCopyTensor output;
PD_InitZeroCopyTensor(&output);
output.name = const_cast<char *>(PD_GetOutputName(predictor, 0)); // NOLINT
PD_GetZeroCopyOutput(predictor, &output);
PD_DestroyZeroCopyTensor(&output);
PD_DeleteAnalysisConfig(config);
PD_DeletePredictor(predictor);
return 0;
}
void SetConfig(PD_AnalysisConfig *config) {
PD_SetModel(config, "data/model/__model__", "data/model/__params__");
PD_SwitchUseFeedFetchOps(config, false);
PD_SwitchSpecifyInputNames(config, true);
PD_DisableGlogInfo(config);
// PD_SwitchIrOptim(config, false);
}
void ReadData(float *data, int n) {
FILE *fp = fopen("data/data.txt", "r");
for (int i = 0; i < n; i++) {
fscanf(fp, "%f", &data[i]);
}
fclose(fp);
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pd_inference_api.h>
#include <stdio.h>
#include <stdlib.h>
void ReadData(float* data, int size);
int main(int argc, char* argv[]) {
PD_Config* config = PD_ConfigCreate();
PD_ConfigSetModel(config, "data/model/__model__", "data/model/__params__");
PD_ConfigDisableGlogInfo(config);
PD_Predictor* predictor = PD_PredictorCreate(config);
// config has destroyed in PD_PredictorCreate
config = NULL;
int input_num = PD_PredictorGetInputNum(predictor);
printf("Input num: %d\n", input_num);
int output_num = PD_PredictorGetOutputNum(predictor);
printf("Output num: %d\n", output_num);
PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
PD_Tensor* input_tensor =
PD_PredictorGetInputHandle(predictor, input_names->data[0]);
PD_OneDimArrayCstrDestroy(input_names);
input_names = NULL;
int32_t shape[] = {1, 3, 300, 300};
float* data = (float*)malloc(sizeof(float) * 1 * 3 * 300 * 300); // NOLINT
ReadData(data, 1 * 3 * 300 * 300); // NOLINT
PD_TensorReshape(input_tensor, 4, shape);
PD_TensorCopyFromCpuFloat(input_tensor, data);
free(data);
data = NULL;
PD_PredictorRun(predictor);
PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
PD_Tensor* output_tensor =
PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
PD_OneDimArrayCstrDestroy(output_names);
output_names = nullptr;
PD_OneDimArrayInt32* out_shape = PD_TensorGetShape(output_tensor);
int32_t size = 1;
for (size_t index = 0; index < out_shape->size; ++index) {
size = size * out_shape->data[index];
}
PD_OneDimArrayInt32Destroy(out_shape);
out_shape = NULL;
data = (float*)malloc(sizeof(float) * size); // NOLINT
PD_TensorCopyToCpuFloat(output_tensor, data);
free(data);
data = NULL;
PD_TensorDestroy(output_tensor);
output_tensor = NULL;
PD_TensorDestroy(input_tensor);
input_tensor = NULL;
PD_PredictorDestroy(predictor);
predictor = NULL;
return 0;
}
void ReadData(float* data, int n) {
FILE* fp = fopen("data/data.txt", "r");
for (int i = 0; i < n; i++) {
fscanf(fp, "%f", &data[i]);
}
fclose(fp);
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <paddle_c_api.h>
import "C"
import "runtime"
import "unsafe"
type Precision C.Precision
const (
Precision_FLOAT32 Precision = C.kFloat32
Precision_INT8 Precision = C.kInt8
Precision_HALF Precision = C.kHalf
)
type AnalysisConfig struct {
c *C.PD_AnalysisConfig
}
func NewAnalysisConfig() *AnalysisConfig {
c_config := C.PD_NewAnalysisConfig()
config := &AnalysisConfig{c: c_config}
runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
return config
}
func (config *AnalysisConfig) finalize() {
C.PD_DeleteAnalysisConfig(config.c)
}
func (config *AnalysisConfig) SetModel(model, params string) {
//C.printString((*C.char)(unsafe.Pointer(&s[0])))
c_model := C.CString(model)
defer C.free(unsafe.Pointer(c_model))
var c_params *C.char
if params == "" {
c_params = nil
} else {
c_params = C.CString(params)
defer C.free(unsafe.Pointer(c_params))
}
C.PD_SetModel(config.c, c_model, c_params)
}
func (config *AnalysisConfig) ModelDir() string {
return C.GoString(C.PD_ModelDir(config.c))
}
func (config *AnalysisConfig) ProgFile() string {
return C.GoString(C.PD_ProgFile(config.c))
}
func (config *AnalysisConfig) ParamsFile() string {
return C.GoString(C.PD_ParamsFile(config.c))
}
func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb int, device_id int) {
C.PD_EnableUseGpu(config.c, C.int(memory_pool_init_size_mb), C.int(device_id))
}
func (config *AnalysisConfig) DisableGpu() {
C.PD_DisableGpu(config.c)
}
func (config *AnalysisConfig) UseGpu() bool {
return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
}
func (config *AnalysisConfig) GpuDeviceId() int {
return int(C.PD_GpuDeviceId(config.c))
}
func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
return int(C.PD_MemoryPoolInitSizeMb(config.c))
}
func (config *AnalysisConfig) FractionOfGpuMemoryForPool() float32 {
return float32(C.PD_FractionOfGpuMemoryForPool(config.c))
}
func (config *AnalysisConfig) EnableCudnn() {
C.PD_EnableCUDNN(config.c)
}
func (config *AnalysisConfig) CudnnEnabled() bool {
return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
}
func (config *AnalysisConfig) SwitchIrOptim(x bool) {
C.PD_SwitchIrOptim(config.c, C.bool(x))
}
func (config *AnalysisConfig) IrOptim() bool {
return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
}
func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
}
func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
}
func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
}
func (config *AnalysisConfig) SpecifyInputName() bool {
return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
}
func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int, max_batch_size int, min_subgraph_size int, precision Precision, use_static bool, use_calib_mode bool) {
C.PD_EnableTensorRtEngine(config.c, C.int(workspace_size), C.int(max_batch_size), C.int(min_subgraph_size), C.Precision(precision), C.bool(use_static), C.bool(use_calib_mode))
}
func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
}
func (config *AnalysisConfig) SwitchIrDebug(x bool) {
C.PD_SwitchIrDebug(config.c, C.bool(x))
}
func (config *AnalysisConfig) EnableMkldnn() {
C.PD_EnableMKLDNN(config.c)
}
func (config *AnalysisConfig) MkldnnEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnEnabled(config.c))
}
func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
}
func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
return int(C.PD_CpuMathLibraryNumThreads(config.c))
}
func (config *AnalysisConfig) EnableMkldnnQuantizer() {
C.PD_EnableMkldnnQuantizer(config.c)
}
func (config *AnalysisConfig) EnableMkldnnBfloat16() {
C.PD_EnableMkldnnBfloat16(config.c)
}
func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
}
func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
}
// SetModelBuffer
// ModelFromMemory
func (config *AnalysisConfig) EnableMemoryOptim() {
C.PD_EnableMemoryOptim(config.c)
}
func (config *AnalysisConfig) MemoryOptimEnabled() bool {
return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
}
func (config *AnalysisConfig) EnableProfile() {
C.PD_EnableProfile(config.c)
}
func (config *AnalysisConfig) ProfileEnabled() bool {
return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
}
func (config *AnalysisConfig) DisableGlogInfo() {
C.PD_DisableGlogInfo(config.c)
}
func (config *AnalysisConfig) DeletePass(pass string) {
c_pass := C.CString(pass)
defer C.free(unsafe.Pointer(c_pass))
C.PD_DeletePass(config.c, c_pass)
}
func (config *AnalysisConfig) SetInValid() {
C.PD_SetInValid(config.c)
}
func (config *AnalysisConfig) IsValid() bool {
return ConvertCBooleanToGo(C.PD_IsValid(config.c))
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include "paddle_c_api.h"
import "C"
import "reflect"
import "runtime"
import "unsafe"
type Predictor struct {
c *C.PD_Predictor
}
func NewPredictor(config *AnalysisConfig) *Predictor {
c_predictor := C.PD_NewPredictor((*config).c)
predictor := &Predictor{c: c_predictor}
runtime.SetFinalizer(predictor, (*Predictor).finalize)
return predictor
}
func (predictor *Predictor) finalize() {
C.PD_DeletePredictor(predictor.c)
}
func DeletePredictor(predictor *Predictor) {
C.PD_DeletePredictor(predictor.c)
}
func (predictor *Predictor) GetInputNum() int {
return int(C.PD_GetInputNum(predictor.c))
}
func (predictor *Predictor) GetOutputNum() int {
return int(C.PD_GetOutputNum(predictor.c))
}
func (predictor *Predictor) GetInputName(n int) string {
return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
}
func (predictor *Predictor) GetOutputName(n int) string {
return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
}
func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
var result [](*ZeroCopyTensor)
for i := 0; i < predictor.GetInputNum(); i++ {
tensor := NewZeroCopyTensor()
tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
result = append(result, tensor)
}
return result
}
func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
var result [](*ZeroCopyTensor)
for i := 0; i < predictor.GetOutputNum(); i++ {
tensor := NewZeroCopyTensor()
tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
result = append(result, tensor)
}
return result
}
func (predictor *Predictor) GetInputNames() []string {
names := make([]string, predictor.GetInputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetInputName(i)
}
return names
}
func (predictor *Predictor) GetOutputNames() []string {
names := make([]string, predictor.GetOutputNum())
for i := 0; i < len(names); i++ {
names[i] = predictor.GetOutputName(i)
}
return names
}
func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
C.PD_SetZeroCopyInput(predictor.c, tensor.c)
}
func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
tensor.name = C.GoString(tensor.c.name)
var shape []int32
shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
tensor.Reshape(shape)
}
func (predictor *Predictor) ZeroCopyRun() {
C.PD_ZeroCopyRun(predictor.c)
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <stdlib.h>
// #include <string.h>
// #include <paddle_c_api.h>
import "C"
import "runtime"
import "reflect"
import "unsafe"
import (
"bytes"
"encoding/binary"
)
type PaddleDType C.PD_DataType
const (
FLOAT32 PaddleDType = C.PD_FLOAT32
INT32 PaddleDType = C.PD_INT32
INT64 PaddleDType = C.PD_INT64
UINT8 PaddleDType = C.PD_UINT8
UNKDTYPE PaddleDType = C.PD_UNKDTYPE
)
var types = []struct {
gotype reflect.Type
dtype PaddleDType
}{
{reflect.TypeOf(float32(0)), FLOAT32},
{reflect.TypeOf(int32(0)), INT32},
{reflect.TypeOf(int64(0)), INT64},
{reflect.TypeOf(uint8(0)), UINT8},
}
func TypeOfShape(dtype PaddleDType, shape []int32) reflect.Type {
var ret reflect.Type
for _, t := range types {
if dtype == PaddleDType(t.dtype) {
ret = t.gotype
break
}
}
if ret == nil {
panic(bug("Data %v type is not support", dtype))
}
for range shape {
ret = reflect.SliceOf(ret)
}
return ret
}
type ZeroCopyTensor struct {
c *C.PD_ZeroCopyTensor
name string
shape []int32
}
func NewZeroCopyTensor() *ZeroCopyTensor {
c_tensor := C.PD_NewZeroCopyTensor()
tensor := &ZeroCopyTensor{c: c_tensor}
runtime.SetFinalizer(tensor, (*ZeroCopyTensor).finalize)
return tensor
}
func (tensor *ZeroCopyTensor) finalize() {
C.PD_DeleteZeroCopyTensor(tensor.c)
}
func (tensor *ZeroCopyTensor) Shape() []int32 {
return tensor.shape
}
func (tensor *ZeroCopyTensor) Name() string {
return C.GoString(tensor.c.name)
}
func (tensor *ZeroCopyTensor) Rename(name string) {
tensor.name = name
tensor.c.name = (*C.char)(unsafe.Pointer(tensor.c.name))
//tensor.c.name = C.CString(tensor.name)
//defer C.free(unsafe.Pointer(tensor.c.name))
}
func (tensor *ZeroCopyTensor) Reshape(shape []int32) {
tensor.shape = make([]int32, len(shape))
copy(tensor.shape, shape)
length := C.sizeof_int * C.size_t(len(shape))
if tensor.c.shape.capacity < C.size_t(length) {
if tensor.c.shape.capacity != C.size_t(0) {
C.free(tensor.c.shape.data)
}
tensor.c.shape.data = C.malloc(length)
tensor.c.shape.capacity = length
}
tensor.c.shape.length = length
C.memcpy(tensor.c.shape.data, unsafe.Pointer(&shape[0]), length)
}
func (tensor *ZeroCopyTensor) DataType() PaddleDType {
return PaddleDType(tensor.c.dtype)
}
func (tensor *ZeroCopyTensor) SetValue(value interface{}) {
val := reflect.ValueOf(value)
shape, dtype := ShapeAndTypeOf(val)
tensor.Reshape(shape)
num := numel(shape)
length := C.size_t(SizeofDataType(dtype) * num)
if tensor.c.data.capacity < length {
if tensor.c.data.capacity != C.size_t(0) {
C.free(tensor.c.data.data)
}
tensor.c.data.data = C.malloc(length)
tensor.c.data.capacity = length
}
tensor.c.data.length = length
switch dtype {
case PaddleDType(UINT8):
data := val.Interface().([]uint8)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(INT32):
data := val.Interface().([]int32)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(INT64):
data := val.Interface().([]int64)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
case PaddleDType(FLOAT32):
data := val.Interface().([]float32)
C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
}
tensor.c.dtype = C.PD_DataType(dtype)
}
func TypeOf(dtype PaddleDType, shape []int32) reflect.Type {
var ret reflect.Type
for _, t := range types {
if t.dtype == dtype {
ret = t.gotype
break
}
}
for range shape {
ret = reflect.SliceOf(ret)
}
return ret
}
func (tensor *ZeroCopyTensor) Value() interface{} {
t := TypeOf(PaddleDType(tensor.c.dtype), tensor.shape)
value := reflect.New(t)
c_bytes := tensor.c.data.data
length := tensor.c.data.length
var slice []byte
if unsafe.Sizeof(unsafe.Pointer(nil)) == 8 {
slice = (*[1<<50 - 1]byte)(unsafe.Pointer(c_bytes))[:length:length]
} else {
slice = (*[1 << 30]byte)(unsafe.Pointer(c_bytes))[:length:length]
}
r := bytes.NewReader(slice)
DecodeTensor(r, tensor.Shape(), t, value)
return reflect.Indirect(value).Interface()
}
func Endian() binary.ByteOrder {
buf := [2]byte{}
*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
var endian binary.ByteOrder
switch buf {
case [2]byte{0xCD, 0xAB}:
endian = binary.LittleEndian
case [2]byte{0xAB, 0xCD}:
endian = binary.BigEndian
default:
panic("Could not determine native endianness.")
}
return endian
}
func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Value) {
switch t.Kind() {
case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
binary.Read(r, Endian(), ptr.Interface())
case reflect.Slice:
value := reflect.Indirect(ptr)
value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
if len(shape) == 1 && value.Len() > 0 {
switch value.Index(0).Kind() {
case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
binary.Read(r, Endian(), value.Interface())
return
}
}
for i := 0; i < value.Len(); i++ {
DecodeTensor(r, shape[1:], t.Elem(), value.Index(i).Addr())
}
}
}
func SizeofDataType(dtype PaddleDType) int32 {
switch dtype {
case UINT8:
return int32(C.sizeof_uchar)
case INT32:
return int32(C.sizeof_int)
case INT64:
return int32(C.sizeof_longlong)
case FLOAT32:
return int32(C.sizeof_float)
}
return -1
}
func ShapeAndTypeOf(val reflect.Value) (shape []int32, dt PaddleDType) {
gotype := val.Type()
for gotype.Kind() == reflect.Array || gotype.Kind() == reflect.Slice {
shape = append(shape, int32(val.Len()))
if val.Len() > 0 {
val = val.Index(0)
}
gotype = gotype.Elem()
}
for _, t := range types {
if gotype.Kind() == t.gotype.Kind() {
return shape, PaddleDType(t.dtype)
}
}
return shape, dt
}
......@@ -104,6 +104,8 @@ DataType Tensor::type() const {
return DataType::INT32;
} else if (type == paddle::framework::proto::VarType::UINT8) {
return DataType::UINT8;
} else if (type == paddle::framework::proto::VarType::INT8) {
return DataType::INT8;
}
return DataType::FLOAT32;
}
......
......@@ -31,6 +31,7 @@
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle_infer_declare.h" // NOLINT
/*! \file */
......@@ -177,6 +178,26 @@ struct PD_INFER_DECL AnalysisConfig {
///
void DisableGpu();
///
/// \brief Turn on XPU.
///
/// \param l3_workspace_size The size of the video memory allocated by the l3
/// cache, the maximum is 16M.
/// \param locked Whether the allocated L3 cache can be locked. If false,
/// it means that the L3 cache is not locked, and the allocated L3
/// cache can be shared by multiple models, and multiple models
/// sharing the L3 cache will be executed sequentially on the card.
/// \param autotune Whether to autotune the conv operator in the model. If
/// true, when the conv operator of a certain dimension is executed
/// for the first time, it will automatically search for a better
/// algorithm to improve the performance of subsequent conv operators
/// of the same dimension.
/// \param autotune_file Specify the path of the autotune file. If
/// autotune_file is specified, the algorithm specified in the
/// file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
///
void EnableXpu(int l3_workspace_size = 0xfffc00, bool locked = false,
bool autotune = true, const std::string& autotune_file = "",
const std::string& precision = "int16",
......
......@@ -71,5 +71,5 @@ PD_ENUM(PD_PlaceType){PD_PLACE_UNK = -1, PD_PLACE_CPU, PD_PLACE_GPU,
PD_ENUM(PD_DataType){
PD_DATA_UNK = -1, PD_DATA_FLOAT32, PD_DATA_INT32,
PD_DATA_INT64, PD_DATA_UINT8,
PD_DATA_INT64, PD_DATA_UINT8, PD_DATA_INT8,
};
......@@ -14,6 +14,8 @@
#include "paddle/fluid/inference/capi_exp/pd_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/capi_exp/pd_types.h"
#include "paddle/fluid/inference/capi_exp/utils_internal.h"
#include "paddle/fluid/platform/enforce.h"
#define CHECK_NULL_POINTER_PARM(param) \
......@@ -125,10 +127,14 @@ PD_Bool PD_ConfigUseGpu(__pd_keep PD_Config* pd_config) {
}
void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config,
int32_t l3_workspace_size) {
int32_t l3_workspace_size, PD_Bool locked,
PD_Bool autotune, const char* autotune_file,
const char* precision, PD_Bool adaptive_seqlen) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableXpu(l3_workspace_size);
config->EnableXpu(l3_workspace_size, locked, autotune, autotune_file,
precision, adaptive_seqlen);
}
PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
return config->use_xpu();
......@@ -378,5 +384,24 @@ void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
config->PartiallyRelease();
}
void PD_ConfigDeletePass(__pd_keep PD_Config* pd_config, const char* pass) {
CHECK_AND_CONVERT_PD_CONFIG;
config->pass_builder()->DeletePass(pass);
}
void PD_ConfigInsertPass(__pd_keep PD_Config* pd_config, size_t idx,
const char* pass) {
CHECK_AND_CONVERT_PD_CONFIG;
config->pass_builder()->InsertPass(idx, pass);
}
void PD_ConfigAppendPass(__pd_keep PD_Config* pd_config, const char* pass) {
CHECK_AND_CONVERT_PD_CONFIG;
config->pass_builder()->AppendPass(pass);
}
__pd_give PD_OneDimArrayCstr* PD_ConfigAllPasses(
__pd_keep PD_Config* pd_config) {
CHECK_AND_CONVERT_PD_CONFIG;
std::vector<std::string> passes = config->pass_builder()->AllPasses();
return paddle_infer::CvtVecToOneDimArrayCstr(passes);
}
} // extern "C"
......@@ -25,6 +25,7 @@
#pragma once
#include "pd_common.h" // NOLINT
#include "pd_types.h" // NOLINT
typedef struct PD_Config PD_Config;
......@@ -154,10 +155,27 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseGpu(
/// \brief Turn on XPU.
///
/// \param[in] pd_onfig config
/// \param[in] l3_workspace_size l3 workspace size.
/// \param l3_workspace_size The size of the video memory allocated by the l3
/// cache, the maximum is 16M.
/// \param locked Whether the allocated L3 cache can be locked. If false,
/// it means that the L3 cache is not locked, and the allocated L3
/// cache can be shared by multiple models, and multiple models
/// sharing the L3 cache will be executed sequentially on the card.
/// \param autotune Whether to autotune the conv operator in the model. If
/// true, when the conv operator of a certain dimension is executed
/// for the first time, it will automatically search for a better
/// algorithm to improve the performance of subsequent conv operators
/// of the same dimension.
/// \param autotune_file Specify the path of the autotune file. If
/// autotune_file is specified, the algorithm specified in the
/// file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
__pd_keep PD_Config* pd_config, int32_t l3_workspace_size);
__pd_keep PD_Config* pd_config, int32_t l3_workspace_size, PD_Bool locked,
PD_Bool autotune, const char* autotune_file, const char* precision,
PD_Bool adaptive_seqlen);
///
/// \brief A boolean state telling whether the XPU is turned on.
///
......@@ -565,6 +583,35 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIsValid(
///
PADDLE_CAPI_EXPORT extern void PD_ConfigPartiallyRelease(
__pd_keep PD_Config* pd_config);
///
/// \brief Delete all passes that has a certain type 'pass'.
///
/// \param[in] pass the certain pass type to be deleted.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigDeletePass(
__pd_keep PD_Config* pd_config, const char* pass);
///
/// \brief Insert a pass to a specific position
///
/// \param[in] idx the position to insert.
/// \param[in] pass the new pass.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigInsertPass(
__pd_keep PD_Config* pd_config, size_t idx, const char* pass);
///
/// \brief Append a pass to the end of the passes
///
/// \param[in] pass the new pass.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigAppendPass(
__pd_keep PD_Config* pd_config, const char* pass);
///
/// \brief Get information of passes.
///
/// \return Return list of the passes.
///
PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr* PD_ConfigAllPasses(
__pd_keep PD_Config* pd_config);
#ifdef __cplusplus
} // extern "C"
......
......@@ -106,4 +106,9 @@ void PD_PredictorDestroy(__pd_take PD_Predictor* pd_predictor) {
delete pd_predictor;
}
const char* PD_GetVersion() {
static std::string version = paddle_infer::GetVersion();
return version.c_str();
}
} // extern "C"
......@@ -143,6 +143,13 @@ PADDLE_CAPI_EXPORT extern uint64_t PD_PredictorTryShrinkMemory(
PADDLE_CAPI_EXPORT extern void PD_PredictorDestroy(
__pd_take PD_Predictor* pd_predictor);
///
/// \brief Get version info.
///
/// \return version
///
PADDLE_CAPI_EXPORT extern const char* PD_GetVersion();
#ifdef __cplusplus
} // extern "C"
#endif
......@@ -196,6 +196,8 @@ DataType CvtToCxxDatatype(PD_DataType data_type) {
return DataType::INT32;
case PD_DATA_UINT8:
return DataType::UINT8;
case PD_DATA_INT8:
return DataType::INT8;
default:
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"Unsupport paddle data type %d.", data_type));
......
# Paddle Inference golang API
Paddle Inference golang API 基于 [capi](../capi_exp) 和 cgo 实现,需要您提前准备好C预测库。
## 安装
1. 确认使用Paddle的CommitId
您可以通过`git log -1`的方式,确认您使用的Paddle版本的CommitId
2. 使用`go get`获取golang paddle api
```
# 此处使用上一步记录的CommitId,假设为76e5724
COMMITID=76e5724
go get -d -v github.com/paddlepaddle/paddle/paddle/fluid/inference/goapi@${COMMITID}
```
3. 下载C预测库
您可以选择直接下载[paddle_inference_c](https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/docs/user_guides/download_lib.md)预测库,或通过源码编译的方式安装,源码编译方式参考官网文档,注意这里cmake编译时打开`-DON_INFER=ON`,在编译目录下得到`paddle_inference_c_install_dir`
4. 软链
go1.15新增了`GOMODCACHE`环境变量,`go get`默认会将代码下载到`GOMODCACHE`目录下,您可以通过`go env | grep GOMODCACHE`的方式,查看该路径,在官网发布的docker镜像中该路径一般默认为`/root/gopath/pkg/mod`,进入到golang api代码路径建立软连接,将c预测库命名为`paddle_inference_c`
```bash
eval $(go env | grep GOMODCACHE)
# 按需修改最后的goapi版本号
cd ${GOMODCACHE}/github.com/paddlepaddle/paddle/paddle/fluid/inference/goapi\@v0.0.0-20210517084506-76e5724c16a5/
ln -s ${PADDLE_C_DOWNLOAD_DIR}/paddle_inference_c_install_dir paddle_inference_c
```
5. 运行单测,验证
```
bash test.sh
```
## 在Go中使用Paddle预测
首先创建预测配置
```go
config := paddle.NewConfig()
config.SetModel(model_file, params_file)
```
创建predictor
```go
predictor := paddle.NewPredictor(config)
```
获取输入Tensor和输出Tensor
```go
inNames := predictor.GetInputNames()
inHandle = predictor.GetInputHandle(inNames[0])
outNames := predictor.GetOutputNames()
outHandle := predictor.GetOutputHandle(outNames[0])
```
设置输入数据(假设只有一个输入)
```go
data := make([]float32, 1*3*224*224)
for i := 0; i < len(data); i++ {
data[i] = float32(i%255) * 0.1
}
inHandle.Reshape([]int32{1, 3, 224, 224})
inHandle.CopyFromCpu(data)
```
设置Lod
```go
lod := make([][]uint, 2)
for i:=0; i < len(lod); i++ {
lod[i] = make([]uint, 2)
// 设置输入...
lod[i][0] = 0
lod[i][0] = 10
}
inHandle.SetLod(lod)
```
运行预测
```go
predictor.Run()
```
获取输入Tensor的真实值
```go
func numElements(shape []int32) int32 {
n := int32(1)
for _, v := range shape {
n *= v
}
return n
}
outData := make([]float32, numElements(outHandle.Shape()))
outHandle.CopyToCpu(outData)
fmt.Println(outHandle.Lod())
```
## 示例
Demo示例见[Paddle-Inference-Demo](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/go)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #include "pd_config.h"
// #include "pd_common.h"
// #include "pd_types.h"
// #include "pd_utils.h"
// #include <stdlib.h>
// #include <string.h>
import "C"
import (
"unsafe"
)
type Precision C.PD_PrecisionType
const (
PrecisionFloat32 Precision = C.PD_PRECISION_FLOAT32
PrecisionInt8 Precision = C.PD_PRECISION_INT8
PrecisionHalf Precision = C.PD_PRECISION_HALF
)
type Config struct {
c *C.PD_Config
}
///
/// \brief Create a new config.
///
func NewConfig() *Config {
cConfig := C.PD_ConfigCreate()
config := &Config{c: cConfig}
return config
}
///
/// \brief Set the combined model with two specific pathes for program and
/// parameters.
///
/// \param model model file path of the combined model.
/// \param params params file path of the combined model.
///
func (config *Config) SetModel(model, params string) {
cmodel := C.CString(model)
cparams := C.CString(params)
C.PD_ConfigSetModel(config.c, cmodel, cparams)
defer func() {
C.free(unsafe.Pointer(cmodel))
C.free(unsafe.Pointer(cparams))
}()
}
///
/// \brief Set the no-combined model dir path.
///
/// \param modelDir model dir path.
///
func (config *Config) SetModelDir(modelDir string) {
cmodel := C.CString(modelDir)
C.PD_ConfigSetModelDir(config.c, cmodel)
defer C.free(unsafe.Pointer(cmodel))
}
///
/// \brief Set the model file path of a combined model.
///
/// \param x model file path.
///
func (config *Config) SetProgFile(model string) {
cmodel := C.CString(model)
C.PD_ConfigSetProgFile(config.c, cmodel)
defer C.free(unsafe.Pointer(cmodel))
}
///
/// \brief Set the params file path of a combined model.
///
/// \param x params file path.
///
func (config *Config) SetParamsFile(params string) {
cparams := C.CString(params)
C.PD_ConfigSetParamsFile(config.c, cparams)
defer C.free(unsafe.Pointer(cparams))
}
///
/// \brief Set the path of optimization cache directory.
///
/// \param cacheDir the path of optimization cache directory.
///
func (config *Config) SetOptimCacheDir(cacheDir string) {
ccacheDir := C.CString(cacheDir)
C.PD_ConfigSetOptimCacheDir(config.c, ccacheDir)
defer C.free(unsafe.Pointer(ccacheDir))
}
///
/// \brief Get the model directory path.
///
/// \return string The model directory path.
///
func (config *Config) ModelDir() string {
return C.GoString(C.PD_ConfigGetModelDir(config.c))
}
///
/// \brief Get the program file path.
///
/// \return string The program file path.
///
func (config *Config) ProgFile() string {
return C.GoString(C.PD_ConfigGetProgFile(config.c))
}
///
/// \brief Get the combined parameters file.
///
/// \return string The combined parameters file.
///
func (config *Config) ParamsFile() string {
return C.GoString(C.PD_ConfigGetParamsFile(config.c))
}
///
/// \brief Turn off FC Padding.
///
func (config *Config) DisableFCPadding() {
C.PD_ConfigDisableFCPadding(config.c)
}
///
/// \brief A boolean state telling whether fc padding is used.
///
/// \return bool Whether fc padding is used.
///
func (config *Config) UseFcPadding() bool {
return cvtPDBoolToGo(C.PD_ConfigUseFcPadding(config.c))
}
///
/// \brief Turn on GPU.
///
/// \param memorySize initial size of the GPU memory pool in MB.
/// \param deviceId the GPU card to use.
///
func (config *Config) EnableUseGpu(memorySize uint64, deviceId int32) {
C.PD_ConfigEnableUseGpu(config.c, C.uint64_t(memorySize), C.int32_t(deviceId))
}
///
/// \brief Turn on XPU.
///
/// \param l3_workspace_size The size of the video memory allocated by the l3 cache, the maximum is 16M.
/// \param locked Whether the allocated L3 cache can be locked. If false, it means that the L3 cache is not locked, and the allocated L3 cache can be shared by multiple models, and multiple models sharing the L3 cache will be executed sequentially on the card.
/// \param autotune Whether to autotune the conv operator in the model. If true, when the conv operator of a certain dimension is executed for the first time, it will automatically search for a better algorithm to improve the performance of subsequent conv operators of the same dimension.
/// \param autotune_file Specify the path of the autotune file. If autotune_file is specified, the algorithm specified in the file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
///
func (config *Config) EnableXpu(l3WorkspaceSize int32, locked bool, autotune bool, autotuneFile string, precision string, adaptiveSeqlen bool) {
cAutotuneFile := C.CString(autotuneFile)
cPrecision := C.CString(precision)
defer func() {
C.free(unsafe.Pointer(cAutotuneFile))
C.free(unsafe.Pointer(cPrecision))
}()
C.PD_ConfigEnableXpu(config.c, C.int32_t(l3WorkspaceSize), cvtGoBoolToPD(locked), cvtGoBoolToPD(autotune),
cAutotuneFile, cPrecision, cvtGoBoolToPD(adaptiveSeqlen))
}
///
/// \brief A boolean state telling whether the GPU is turned on.
///
/// \return bool Whether the GPU is turned on.
///
func (config *Config) UseGpu() bool {
return cvtPDBoolToGo(C.PD_ConfigUseGpu(config.c))
}
///
/// \brief A boolean state telling whether the XPU is turned on.
///
/// \return bool Whether the XPU is turned on.
///
func (config *Config) UseXpu() bool {
return cvtPDBoolToGo(C.PD_ConfigUseXpu(config.c))
}
///
/// \brief Get the GPU device id.
///
/// \return int32 The GPU device id.
///
func (config *Config) GpuDeviceId() int32 {
return int32(C.PD_ConfigGpuDeviceId(config.c))
}
///
/// \brief Get the XPU device id.
///
/// \return int32 The XPU device id.
///
func (config *Config) XpuDeviceId() int32 {
return int32(C.PD_ConfigXpuDeviceId(config.c))
}
///
/// \brief Get the initial size in MB of the GPU memory pool.
///
/// \return int32 The initial size in MB of the GPU memory pool.
///
func (config *Config) MemoryPoolInitSizeMb() int32 {
return int32(C.PD_ConfigMemoryPoolInitSizeMb(config.c))
}
///
/// \brief Get the proportion of the initial memory pool size compared to the
/// device.
///
/// \return float32 The proportion of the initial memory pool size.
///
func (config *Config) FractionOfGpuMemoryForPool() float32 {
return float32(C.PD_ConfigFractionOfGpuMemoryForPool(config.c))
}
///
/// \brief Control whether to perform IR graph optimization.
/// If turned off, the AnalysisConfig will act just like a NativeConfig.
///
/// \param x Whether the ir graph optimization is actived.
///
func (config *Config) SwitchIrOptim(x bool) {
C.PD_ConfigSwitchIrOptim(config.c, cvtGoBoolToPD(x))
}
///
/// \brief A boolean state telling whether the ir graph optimization is
/// actived.
///
/// \return bool Whether to use ir graph optimization.
///
// bool ir_optim() const { return enable_ir_optim_; }
func (config *Config) IrOptim() bool {
return cvtPDBoolToGo(C.PD_ConfigIrOptim(config.c))
}
///
/// \brief Turn on the TensorRT engine.
/// The TensorRT engine will accelerate some subgraphes in the original Fluid
/// computation graph. In some models such as resnet50, GoogleNet and so on,
/// it gains significant performance acceleration.
///
/// \param workspaceSize The memory size(in byte) used for TensorRT
/// workspace.
/// \param maxBatchSize The maximum batch size of this prediction task,
/// better set as small as possible for less performance loss.
/// \param minSubgraphSize The minimum TensorRT subgraph size needed, if a
/// subgraph is smaller than this, it will not be transferred to TensorRT
/// engine.
/// \param precision The precision used in TensorRT.
/// \param useStatic Serialize optimization information to disk for reusing.
/// \param useCalibMode Use TRT int8 calibration(post training
/// quantization).
///
func (config *Config) EnableTensorRtEngine(workspaceSize int32, maxBatchSize int32, minSubgraphSize int32,
precision Precision, useStatic bool, useCalibMode bool) {
C.PD_ConfigEnableTensorRtEngine(config.c, C.int32_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode))
}
///
/// \brief A boolean state telling whether the TensorRT engine is used.
///
/// \return bool Whether the TensorRT engine is used.
///
func (config *Config) TensorRtEngineEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigTensorRtEngineEnabled(config.c))
}
///
/// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
/// \param minInputShape The min input shape of the subgraph input.
/// \param maxInputShape The max input shape of the subgraph input.
/// \param optimInputShape The opt input shape of the subgraph input.
/// \param disableTrtPluginFp16 Setting this parameter to true means that
/// TRT plugin will not run fp16.
///
func (config *Config) SetTRTDynamicShapeInfo(minInputShape map[string][]int32, maxInputShape map[string][]int32,
optimInputShape map[string][]int32, disableTrtPluginFp16 bool) {
tensorNum := uint(len(minInputShape))
names := make([](*C.char), tensorNum)
goNames := make([]string, tensorNum)
var shapeNum []uint
idx := 0
for n := range minInputShape {
char := C.CString(n)
defer C.free(unsafe.Pointer(char))
names[idx] = (*C.char)(unsafe.Pointer(char))
goNames[idx] = n
shapeNum = append(shapeNum, uint(len(minInputShape[n])))
idx++
}
cMinInputShape := make([]*C.int32_t, len(goNames))
cMaxInputShape := make([]*C.int32_t, len(goNames))
cOptInputShape := make([]*C.int32_t, len(goNames))
for i, n := range goNames {
pMin := (*C.int32_t)(C.malloc(C.size_t(C.sizeof_int32_t * len(minInputShape[n]))))
cMinInputShape[i] = pMin
// A []C.int32_t slice backed by C memory.
// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
// Using [1<<27] instead of [1<<30] so it works on 32-bit architecture
pMinData := (*[1 << 27]C.int32_t)(unsafe.Pointer(pMin))
for j, v := range minInputShape[n] {
(*pMinData)[j] = C.int32_t(v)
}
defer C.free(unsafe.Pointer(pMin))
pMax := (*C.int32_t)(C.malloc(C.size_t(C.sizeof_int32_t * len(maxInputShape[n]))))
cMaxInputShape[i] = pMax
pMaxData := (*[1 << 27]C.int32_t)(unsafe.Pointer(pMax))
for j, v := range maxInputShape[n] {
(*pMaxData)[j] = C.int32_t(v)
}
defer C.free(unsafe.Pointer(pMax))
pOpt := (*C.int32_t)(C.malloc(C.size_t(C.sizeof_int32_t * len(optimInputShape[n]))))
cOptInputShape[i] = pOpt
pOptData := (*[1 << 27]C.int32_t)(unsafe.Pointer(pOpt))
for j, v := range optimInputShape[n] {
(*pOptData)[j] = C.int32_t(v)
}
defer C.free(unsafe.Pointer(pOpt))
}
C.PD_ConfigSetTrtDynamicShapeInfo(config.c, C.size_t(tensorNum), (**C.char)(unsafe.Pointer(&names[0])),
(*C.size_t)(unsafe.Pointer(&shapeNum[0])),
(**C.int32_t)(unsafe.Pointer(&cMinInputShape[0])),
(**C.int32_t)(unsafe.Pointer(&cMaxInputShape[0])),
(**C.int32_t)(unsafe.Pointer(&cOptInputShape[0])),
cvtGoBoolToPD(disableTrtPluginFp16))
}
///
/// \brief Prevent ops running in Paddle-TRT
/// NOTE: just experimental, not an official stable API, easy to be broken.
///
func (config *Config) DisableTensorRtOPs(ops []string) {
num := uint(len(ops))
var buf = make([]*C.char, num+1)
for i, _ := range ops {
char := C.CString(ops[i])
defer C.free(unsafe.Pointer(char))
buf[i] = (*C.char)(unsafe.Pointer(char))
}
C.PD_ConfigDisableTensorRtOPs(config.c, C.size_t(num), (**C.char)(unsafe.Pointer(&buf[0])))
}
///
/// \brief Replace some TensorRT plugins to TensorRT OSS(
/// https://github.com/NVIDIA/TensorRT), with which some models's inference
/// may be more high-performance. Libnvinfer_plugin.so greater than
/// V7.2.1 is needed.
///
func (config *Config) EnableTensorRtOSS() {
C.PD_ConfigEnableTensorRtOSS(config.c)
}
///
/// \brief A boolean state telling whether to use the TensorRT OSS.
///
/// \return bool Whether to use the TensorRT OSS.
///
func (config *Config) TensorrtOssEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigTensorRtOssEnabled(config.c))
}
///
/// \brief Enable TensorRT DLA
/// \param dlaCore ID of DLACore, which should be 0, 1,
/// ..., IBuilder.getNbDLACores() - 1
///
func (config *Config) EnableTensorRtDLA(dlaCore int32) {
C.PD_ConfigEnableTensorRtDla(config.c, C.int32_t(dlaCore))
}
///
/// \brief A boolean state telling whether to use the TensorRT DLA.
///
/// \return bool Whether to use the TensorRT DLA.
///
func (config *Config) TensorrtDlaEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigTensorRtDlaEnabled(config.c))
}
///
/// \brief Turn on the usage of Lite sub-graph engine.
///
/// \param precision Precion used in Lite sub-graph engine.
/// \param zeroCopy Set the zero copy mode.
/// \param passesFilter Set the passes used in Lite sub-graph engine.
/// \param opsFilter Operators not supported by Lite.
///
func (config *Config) EnableLiteEngine(precision Precision, zeroCopy bool, passesFilter []string, opsFilter []string) {
passesFilterNum := uint(len(passesFilter))
var passesFilterBuf = make([]*C.char, passesFilterNum+1)
for i, _ := range passesFilter {
char := C.CString(passesFilter[i])
defer C.free(unsafe.Pointer(char))
passesFilterBuf[i] = (*C.char)(unsafe.Pointer(char))
}
opsFilterNum := uint(len(opsFilter))
var opsFilterBuf = make([]*C.char, passesFilterNum+1)
for i, _ := range opsFilter {
char := C.CString(opsFilter[i])
defer C.free(unsafe.Pointer(char))
opsFilterBuf[i] = (*C.char)(unsafe.Pointer(char))
}
C.PD_ConfigEnableLiteEngine(config.c, C.int32_t(precision), cvtGoBoolToPD(zeroCopy), C.size_t(passesFilterNum), (**C.char)(unsafe.Pointer(&passesFilterBuf[0])), C.size_t(opsFilterNum), (**C.char)(unsafe.Pointer(&opsFilterBuf[0])))
}
///
/// \brief A boolean state indicating whether the Lite sub-graph engine is
/// used.
///
/// \return bool whether the Lite sub-graph engine is used.
///
func (config *Config) LiteEngineEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigLiteEngineEnabled(config.c))
}
///
/// \brief Control whether to debug IR graph analysis phase.
/// This will generate DOT files for visualizing the computation graph after
/// each analysis pass applied.
///
/// \param x whether to debug IR graph analysis phase.
///
func (config *Config) SwitchIrDebug(x bool) {
C.PD_ConfigSwitchIrDebug(config.c, cvtGoBoolToPD(x))
}
///
/// \brief Turn on MKLDNN.
///
func (config *Config) EnableMKLDNN() {
C.PD_ConfigEnableMKLDNN(config.c)
}
///
/// \brief Set the cache capacity of different input shapes for MKLDNN.
/// Default value 0 means not caching any shape.
/// Please see MKL-DNN Data Caching Design Document:
/// https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/design/mkldnn/caching/caching.md
///
/// \param capacity The cache capacity.
///
func (config *Config) SetMkldnnCacheCapacity(capacity int32) {
C.PD_ConfigSetMkldnnCacheCapacity(config.c, C.int32_t(capacity))
}
///
/// \brief A boolean state telling whether to use the MKLDNN.
///
/// \return bool Whether to use the MKLDNN.
///
func (config *Config) MkldnnEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigMkldnnEnabled(config.c))
}
///
/// \brief Set the number of cpu math library threads.
///
/// \param mathThreadsNum The number of cpu math library
/// threads.
///
func (config *Config) SetCpuMathLibraryNumThreads(mathThreadsNum int) {
C.PD_ConfigSetCpuMathLibraryNumThreads(config.c, C.int32_t(mathThreadsNum))
}
///
/// \brief An int state telling how many threads are used in the CPU math
/// library.
///
/// \return int The number of threads used in the CPU math library.
///
func (config *Config) CpuMathLibraryNumThreads() int32 {
return int32(C.PD_ConfigGetCpuMathLibraryNumThreads(config.c))
}
///
/// \brief Transform the AnalysisConfig to NativeConfig.
///
/// \return NativeConfig The NativeConfig transformed.
///
// NativeConfig ToNativeConfig() const;
///
/// \brief Specify the operator type list to use MKLDNN acceleration.
///
/// \param opList The operator type list.
///
func (config *Config) SetMKLDNNOp(opList []string) {
num := uint(len(opList))
// Add one in case num is zero.
var buf = make([]*C.char, num+1)
for i, _ := range opList {
char := C.CString(opList[i])
defer C.free(unsafe.Pointer(char))
buf[i] = (*C.char)(unsafe.Pointer(char))
}
C.PD_ConfigSetMkldnnOp(config.c, C.size_t(num), (**C.char)(unsafe.Pointer(&buf[0])))
}
///
/// \brief Turn on MKLDNN quantization.
///
func (config *Config) EnableMkldnnQuantizer() {
C.PD_ConfigEnableMkldnnQuantizer(config.c)
}
///
/// \brief Turn on MKLDNN bfloat16.
///
func (config *Config) EnableMkldnnBfloat16() {
C.PD_ConfigEnableMkldnnBfloat16(config.c)
}
///
/// \brief A boolean state telling whether to use the MKLDNN Bfloat16.
///
/// \return bool Whether to use the MKLDNN Bfloat16.
///
func (config *Config) MkldnnBfloat16Enabled() bool {
return cvtPDBoolToGo(C.PD_ConfigMkldnnBfloat16Enabled(config.c))
}
/// \brief Specify the operator type list to use Bfloat16 acceleration.
///
/// \param opList The operator type list.
///
func (config *Config) SetBfloat16Op(opList []string) {
num := uint(len(opList))
// Add one in case num is zero.
var buf = make([]*C.char, num+1)
for i, _ := range opList {
char := C.CString(opList[i])
defer C.free(unsafe.Pointer(char))
buf[i] = (*C.char)(unsafe.Pointer(char))
}
C.PD_ConfigSetBfloat16Op(config.c, C.size_t(num), (**C.char)(unsafe.Pointer(&buf[0])))
}
///
/// \brief A boolean state telling whether the thread local CUDA stream is
/// enabled.
///
/// \return bool Whether the thread local CUDA stream is enabled.
///
func (config *Config) ThreadLocalStreamEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigThreadLocalStreamEnabled(config.c))
}
///
/// \brief A boolean state telling whether the MKLDNN quantization is enabled.
///
/// \return bool Whether the MKLDNN quantization is enabled.
///
func (config *Config) MkldnnQuantizerEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigMkldnnQuantizerEnabled(config.c))
}
///
/// \brief Specify the memory buffer of program and parameter.
/// Used when model and params are loaded directly from memory.
///
/// \param prog The memory buffer of program.
/// \param params The memory buffer of the combined parameters file.
///
func (config *Config) SetModelBuffer(prog, params string) {
cProg := C.CString(prog)
cParams := C.CString(params)
defer func() {
C.free(unsafe.Pointer(cProg))
C.free(unsafe.Pointer(cParams))
}()
C.PD_ConfigSetModelBuffer(config.c, cProg, C.size_t(len(prog)), cParams, C.size_t(len(params)))
}
///
/// \brief A boolean state telling whether the model is set from the CPU
/// memory.
///
/// \return bool Whether model and params are loaded directly from memory.
///
func (config *Config) ModelFromMemory() bool {
return cvtPDBoolToGo(C.PD_ConfigModelFromMemory(config.c))
}
///
/// \brief Turn on memory optimize
/// NOTE still in development.
///
func (config *Config) EnableMemoryOptim() {
C.PD_ConfigEnableMemoryOptim(config.c)
}
///
/// \brief A boolean state telling whether the memory optimization is
/// activated.
///
/// \return bool Whether the memory optimization is activated.
///
func (config *Config) MemoryOptimEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigMemoryOptimEnabled(config.c))
}
///
/// \brief Turn on profiling report.
/// If not turned on, no profiling report will be generated.
///
func (config *Config) EnableProfile() {
C.PD_ConfigEnableProfile(config.c)
}
///
/// \brief A boolean state telling whether the profiler is activated.
///
/// \return bool Whether the profiler is activated.
///
func (config *Config) ProfileEnabled() bool {
return cvtPDBoolToGo(C.PD_ConfigProfileEnabled(config.c))
}
///
/// \brief Mute all logs in Paddle inference.
///
func (config *Config) DisableGlogInfo() {
C.PD_ConfigDisableGlogInfo(config.c)
}
///
/// \brief A boolean state telling whether logs in Paddle inference are muted.
///
/// \return bool Whether logs in Paddle inference are muted.
///
func (config *Config) GlogInfoDisabled() bool {
return cvtPDBoolToGo(C.PD_ConfigGlogInfoDisabled(config.c))
}
///
/// \brief A boolean state telling whether the AnalysisConfig is valid.
///
/// \return bool Whether the AnalysisConfig is valid.
///
func (config *Config) IsValid() bool {
return cvtPDBoolToGo(C.PD_ConfigIsValid(config.c))
}
///
/// \brief Enable the GPU multi-computing stream feature.
/// NOTE: The current behavior of this interface is to bind the computation
/// stream to the thread, and this behavior may be changed in the future.
///
func (config *Config) EnableGpuMultiStream() {
C.PD_ConfigEnableGpuMultiStream(config.c)
}
///
/// \brief Delete all passes that has a certain type 'pass'.
///
/// \param[in] pass the certain pass type to be deleted.
///
func (config *Config) DeletePass(pass string) {
cPass := C.CString(pass)
C.PD_ConfigDeletePass(config.c, cPass)
C.free(unsafe.Pointer(cPass))
}
///
/// \brief Append a pass to the end of the passes
///
/// \param[in] pass the new pass.
///
func (config *Config) AppendPass(pass string) {
cPass := C.CString(pass)
C.PD_ConfigAppendPass(config.c, cPass)
C.free(unsafe.Pointer(cPass))
}
///
/// \brief Insert a pass to a specific position
///
/// \param[in] idx the position to insert.
/// \param[in] pass the new pass.
///
func (config *Config) InsertPass(idx uint64, pass string) {
cPass := C.CString(pass)
C.PD_ConfigInsertPass(config.c, C.size_t(idx), cPass)
C.free(unsafe.Pointer(cPass))
}
///
/// \brief Get information of passes.
///
/// \return Return list of the passes.
///
func (config *Config) AllPasses() []string {
cPasses := C.PD_ConfigAllPasses(config.c)
num := int(cPasses.size)
passes := cvtToGoSliceString(num, cPasses.data)
C.PD_OneDimArrayCstrDestroy(cPasses)
return passes
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
import "testing"
func TestNewConfig(t *testing.T) {
config := NewConfig()
config.SetProgFile("model")
config.SetParamsFile("params")
config.SetOptimCacheDir("cache")
config.DisableFCPadding()
t.Logf("UseFcPadding:%+v", config.UseFcPadding())
// It will break when we have no xpu env.
// config.EnableXpu(100)
// t.Logf("EnableXpu, UseXpu:%+v ", config.UseXpu())
config.SwitchIrOptim(true)
t.Logf("IrOptim:%+v", config.IrOptim())
config.EnableUseGpu(100, 0)
t.Logf("use_gpu:%+v, gpu_id:%+v", config.UseGpu(), config.GpuDeviceId())
t.Logf("MemoryPoolInitSizeMb:%+v, FractionOfGpuMemoryForPool:%+v", config.MemoryPoolInitSizeMb(), config.FractionOfGpuMemoryForPool())
config.EnableTensorRtEngine(1024, 16, 3, PrecisionFloat32, false, false)
t.Logf("TensorRtEngineEnabled:%+v", config.TensorRtEngineEnabled())
minInputShape := map[string][]int32{
"image": []int32{-1, 3, 100, 100},
"shape": []int32{-1, 2},
}
maxInputShape := map[string][]int32{
"image": []int32{-1, 3, 608, 608},
"shape": []int32{-1, 2},
}
optInputShape := map[string][]int32{
"image": []int32{-1, 3, 406, 406},
"shape": []int32{-1, 2},
}
config.SetTRTDynamicShapeInfo(minInputShape, maxInputShape, optInputShape, false)
config.EnableTensorRtOSS()
t.Logf("TensorrtOssEnabled:%+v", config.TensorrtOssEnabled())
config.EnableTensorRtDLA(0)
t.Logf("TensorrtDlaEnabled:%+v", config.TensorrtDlaEnabled())
config.DisableTensorRtOPs([]string{"mul", "fc"})
config.EnableGpuMultiStream()
t.Logf("ThreadLocalStreamEnabled:%+v", config.ThreadLocalStreamEnabled())
config.SwitchIrDebug(false)
config.EnableMKLDNN()
config.EnableMemoryOptim()
t.Logf("MemoryOptimEnabled:%+v", config.MemoryOptimEnabled())
config.EnableProfile()
t.Logf("ProfileEnabled:%+v", config.ProfileEnabled())
config.DisableGlogInfo()
t.Logf("GlogInfoDisabled:%+v", config.GlogInfoDisabled())
t.Logf("IsValid:%+v", config.IsValid())
config.AppendPass("test_pass")
t.Logf("After AppendPass, AllPasses:%+v", config.AllPasses())
config.DeletePass("test_pass")
t.Logf("After DeletePass, AllPasses:%+v", config.AllPasses())
}
func TestLite(t *testing.T) {
config := NewConfig()
config.SetModel("model", "params")
t.Log(config.ProgFile())
t.Log(config.ParamsFile())
config.EnableLiteEngine(PrecisionFloat32, true, []string{}, []string{})
t.Logf("LiteEngineEnabled:%+v", config.LiteEngineEnabled())
}
func TestMkldnn(t *testing.T) {
config := NewConfig()
config.SetModelDir("modelDir")
t.Log(config.ModelDir())
config.EnableMKLDNN()
t.Logf("MkldnnEnabled:%+v", config.MkldnnEnabled())
config.SetMkldnnCacheCapacity(4)
config.SetCpuMathLibraryNumThreads(4)
t.Logf("CpuMathLibraryNumThreads:%+v", config.CpuMathLibraryNumThreads())
config.SetMKLDNNOp([]string{"fc", "conv"})
config.EnableMkldnnQuantizer()
t.Logf("MkldnnQuantizerEnabled:%+v", config.MkldnnQuantizerEnabled())
config.EnableMkldnnBfloat16()
t.Logf("MkldnnBfloat16Enabled:%+v", config.MkldnnBfloat16Enabled())
config.SetBfloat16Op([]string{"fc", "mul"})
}
module github.com/jiweibo/paddle/paddle/fluid/inference/goapi
go 1.15
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #cgo CFLAGS: -I${SRCDIR}/paddle_inference_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/paddle_inference_c/paddle/lib -lpaddle_inference_c
import "C"
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #include "pd_predictor.h"
// #include "pd_tensor.h"
// #include "pd_common.h"
// #include "pd_types.h"
// #include "pd_utils.h"
// #include <stdlib.h>
// #include <string.h>
import "C"
import (
"runtime"
"unsafe"
)
type Predictor struct {
c *C.PD_Predictor
}
///
/// \brief Create a new Predictor
///
/// \param[in] Config config
/// \return new predicor.
///
func NewPredictor(config *Config) *Predictor {
cPredictor := C.PD_PredictorCreate(config.c)
predictor := &Predictor{c: cPredictor}
runtime.SetFinalizer(predictor, func(predictor *Predictor) {
C.PD_PredictorDestroy(predictor.c)
})
return predictor
}
///
/// \brief Clone a new Predictor
///
/// \return new predictor.
///
func (p *Predictor) Clone() *Predictor {
cPredictor := C.PD_PredictorClone(p.c)
predictor := &Predictor{c: cPredictor}
runtime.SetFinalizer(predictor, func(predictor *Predictor) {
C.PD_PredictorDestroy(predictor.c)
})
return predictor
}
///
/// \brief Get the input number
///
/// \return input number
///
func (p *Predictor) GetInputNum() uint {
return uint(C.PD_PredictorGetInputNum(p.c))
}
///
/// \brief Get the output number
///
/// \return output number
///
func (p *Predictor) GetOutputNum() uint {
return uint(C.PD_PredictorGetOutputNum(p.c))
}
///
/// \brief Get the input names
///
/// \return input names
///
func (p *Predictor) GetInputNames() []string {
cNames := C.PD_PredictorGetInputNames(p.c)
numNames := int(cNames.size)
names := cvtToGoSliceString(numNames, cNames.data)
C.PD_OneDimArrayCstrDestroy(cNames)
return names
}
///
/// \brief Get the output names
///
/// \return output names
///
func (p *Predictor) GetOutputNames() []string {
cNames := C.PD_PredictorGetOutputNames(p.c)
numNames := int(cNames.size)
names := cvtToGoSliceString(numNames, cNames.data)
C.PD_OneDimArrayCstrDestroy(cNames)
return names
}
///
/// \brief Get the Input Tensor object
///
/// \param[in] name input name
/// \return input tensor
///
func (p *Predictor) GetInputHandle(name string) *Tensor {
cName := C.CString(name)
cHandle := C.PD_PredictorGetInputHandle(p.c, cName)
C.free(unsafe.Pointer(cName))
handle := &Tensor{c: cHandle}
runtime.SetFinalizer(handle, func(handle *Tensor) {
C.PD_TensorDestroy(handle.c)
})
return handle
}
///
/// \brief Get the Output Tensor object
///
/// \param[in] name output name
/// \return output tensor
///
func (p *Predictor) GetOutputHandle(name string) *Tensor {
cName := C.CString(name)
cHandle := C.PD_PredictorGetOutputHandle(p.c, cName)
C.free(unsafe.Pointer(cName))
handle := &Tensor{c: cHandle}
runtime.SetFinalizer(handle, func(handle *Tensor) {
C.PD_TensorDestroy(handle.c)
})
return handle
}
///
/// \brief Run the prediction engine
///
func (p *Predictor) Run() {
C.PD_PredictorRun(p.c)
}
///
/// \brief Clear the intermediate tensors of the predictor
///
func (p *Predictor) ClearIntermediateTensor() {
C.PD_PredictorClearIntermediateTensor(p.c)
}
///
/// \brief Release all tmp tensor to compress the size of the memory pool.
/// The memory pool is considered to be composed of a list of chunks, if
/// the chunk is not occupied, it can be released.
///
/// \return Number of bytes released. It may be smaller than the actual
/// released memory, because part of the memory is not managed by the
/// MemoryPool.
///
func (p *Predictor) TryShrinkMemory() {
C.PD_PredictorTryShrinkMemory(p.c)
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
import (
"io/ioutil"
"os"
"testing"
)
func TestNewPredictor(t *testing.T) {
t.Logf("Version:\n%+v", Version())
config := NewConfig()
config.SetModel("./mobilenetv1/inference.pdmodel", "./mobilenetv1/inference.pdiparams")
config.EnableUseGpu(100, 0)
predictor := NewPredictor(config)
inNames := predictor.GetInputNames()
t.Logf("InputNames:%+v", inNames)
outNames := predictor.GetOutputNames()
t.Logf("OutputNames:%+v", outNames)
inHandle := predictor.GetInputHandle(inNames[0])
inHandle.Reshape([]int32{1, 3, 224, 224})
t.Logf("inHandle name:%+v, shape:%+v", inHandle.Name(), inHandle.Shape())
var lod [][]uint
lod = append(lod, []uint{0, 1, 2})
lod = append(lod, []uint{1, 2, 3, 4})
inHandle.SetLod(lod)
t.Logf("inHandle Lod:%+v", inHandle.Lod())
data := make([]float32, numElements([]int32{1, 3, 224, 224}))
for i := 0; i < int(numElements([]int32{1, 3, 224, 224})); i++ {
data[i] = float32(i%255) * 0.1
}
inHandle.CopyFromCpu(data)
t.Logf("inHandle Type:%+v", inHandle.Type())
predictor.Run()
outHandle := predictor.GetOutputHandle(outNames[0])
t.Logf("outHandle name:%+v", outHandle.Name())
outShape := outHandle.Shape()
t.Logf("outHandle Shape:%+v", outShape)
outData := make([]float32, numElements(outShape))
outHandle.CopyToCpu(outData)
t.Log(outData)
cloned := predictor.Clone()
t.Logf("InputNum:%+v", cloned.GetInputNum())
t.Logf("OutputNum:%+v", cloned.GetInputNum())
cloned.ClearIntermediateTensor()
}
func TestFromBuffer(t *testing.T) {
modelFile, err := os.Open("./mobilenetv1/inference.pdmodel")
if err != nil {
t.Fatal(err)
}
paramsFile, err := os.Open("./mobilenetv1/inference.pdiparams")
if err != nil {
t.Fatal(err)
}
defer modelFile.Close()
defer paramsFile.Close()
model, err := ioutil.ReadAll(modelFile)
if err != nil {
t.Fatal(err)
}
params, err := ioutil.ReadAll(paramsFile)
if err != nil {
t.Fatal(err)
}
config := NewConfig()
config.SetModelBuffer(string(model), string(params))
predictor := NewPredictor(config)
inNames := predictor.GetInputNames()
outNames := predictor.GetOutputNames()
inHandle := predictor.GetInputHandle(inNames[0])
inHandle.Reshape([]int32{1, 3, 224, 224})
data := make([]float32, numElements([]int32{1, 3, 224, 224}))
for i := 0; i < int(numElements([]int32{1, 3, 224, 224})); i++ {
data[i] = float32(i%255) * 0.1
}
inHandle.CopyFromCpu(data)
predictor.Run()
outHandle := predictor.GetOutputHandle(outNames[0])
outShape := outHandle.Shape()
t.Logf("outHandle Shape:%+v", outShape)
outData := make([]float32, numElements(outShape))
outHandle.CopyToCpu(outData)
t.Log(outData)
}
func numElements(shape []int32) int32 {
n := int32(1)
for _, v := range shape {
n *= v
}
return n
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package paddle
// #include "pd_tensor.h"
// #include "pd_utils.h"
// #include "pd_types.h"
// #include "pd_common.h"
// #include "stdlib.h"
import "C"
import (
"fmt"
"reflect"
"unsafe"
)
type DataType C.PD_DataType
const (
Unk DataType = C.PD_DATA_UNK
Float32 DataType = C.PD_DATA_FLOAT32
Int32 DataType = C.PD_DATA_INT32
Int64 DataType = C.PD_DATA_INT64
Uint8 DataType = C.PD_DATA_UINT8
Int8 DataType = C.PD_DATA_INT8
)
type PlaceType C.PD_PlaceType
const (
UnkPlace PlaceType = C.PD_PLACE_UNK
CpuPlace PlaceType = C.PD_PLACE_CPU
GpuPlace PlaceType = C.PD_PLACE_GPU
XpuPlace PlaceType = C.PD_PLACE_XPU
)
type Tensor struct {
c *C.PD_Tensor
}
///
/// \brief Reset the shape of the tensor.
/// Generally it's only used for the input tensor.
///
/// \param[in] shape The shape to set.
///
func (t *Tensor) Reshape(shape []int32) {
C.PD_TensorReshape(t.c, C.size_t(len(shape)), (*C.int32_t)(unsafe.Pointer(&shape[0])))
}
///
/// \brief Get the tensor shape
///
/// \return The tensor shape.
///
func (t *Tensor) Shape() []int32 {
cData := C.PD_TensorGetShape(t.c)
length := int(cData.size)
defer C.PD_OneDimArrayInt32Destroy(cData)
return cvtToGoSliceInt32(length, cData.data)
}
///
/// \brief Set the tensor lod information
/// \param[in] pd_tensor tensor.
/// \param[in] lod lod information.
///
func (t *Tensor) SetLod(lod [][]uint) {
cLod := (*C.struct_PD_TwoDimArraySize)(C.malloc(C.size_t(C.sizeof_struct_PD_TwoDimArraySize)))
length := len(lod)
cLod.size = C.size_t(uint(length))
var lodList = make([]*C.struct_PD_OneDimArraySize, length+1)
for i, v := range lod {
oneDimArray := (*C.struct_PD_OneDimArraySize)(C.malloc(C.size_t(C.sizeof_struct_PD_OneDimArraySize)))
defer C.free(unsafe.Pointer(oneDimArray))
tmpLength := len(v)
oneDimArray.size = C.size_t(uint(tmpLength))
tmpC := (*C.size_t)(C.malloc(C.size_t(C.sizeof_size_t * tmpLength)))
defer C.free(unsafe.Pointer(tmpC))
tmpSlice := (*[1 << 27]C.size_t)(unsafe.Pointer(tmpC))[:tmpLength:tmpLength]
for j, w := range v {
tmpSlice[j] = C.size_t(w)
}
oneDimArray.data = tmpC
lodList[i] = oneDimArray
}
cLod.data = (**C.struct_PD_OneDimArraySize)(unsafe.Pointer(&lodList[0]))
C.PD_TensorSetLod(t.c, cLod)
C.free(unsafe.Pointer(cLod))
// C.PD_TwoDimArraySizeDestroy(cLod)
}
///
/// \brief Get the tensor lod information
///
/// \return the lod information.
///
func (t *Tensor) Lod() [][]uint {
cLod := C.PD_TensorGetLod(t.c)
length := int(cLod.size)
res := make([][]uint, length)
if length == 0 {
return res
}
cLodSlice := (*[1 << 27]*C.struct_PD_OneDimArraySize)(unsafe.Pointer(cLod.data))[:length:length]
for i := 0; i < length; i++ {
size := uint(cLodSlice[i].size)
lod := make([]uint, size)
tmpSlice := (*[1 << 27]C.size_t)(unsafe.Pointer(cLodSlice[i].data))[:size:size]
for j, v := range tmpSlice {
lod[j] = uint(v)
}
res[i] = lod
}
C.PD_TwoDimArraySizeDestroy(cLod)
return res
}
///
/// \brief Get the tensor data type
/// \param[in] pd_tensor tensor.
/// \return the tensor data type.
///
func (t *Tensor) Type() DataType {
cDtype := C.PD_TensorGetDataType(t.c)
return DataType(cDtype)
}
///
/// \brief Get the tensor name
///
/// \return the tensor name.
///
func (t *Tensor) Name() string {
return C.GoString(C.PD_TensorGetName(t.c))
}
///
/// \brief Copy the host memory to tensor data.
/// It's usually used to set the input tensor data.
///
/// \param[in] value
///
func (t *Tensor) CopyFromCpu(value interface{}) {
val := reflect.ValueOf(value)
dtype, _ := dataTypeOf(val)
switch dtype {
case Float32:
data := val.Interface().([]float32)
C.PD_TensorCopyFromCpuFloat(t.c, (*C.float)(unsafe.Pointer(&data[0])))
case Int32:
data := val.Interface().([]int32)
C.PD_TensorCopyFromCpuInt32(t.c, (*C.int32_t)(unsafe.Pointer(&data[0])))
case Int64:
data := val.Interface().([]int64)
C.PD_TensorCopyFromCpuInt64(t.c, (*C.int64_t)(unsafe.Pointer(&data[0])))
case Uint8:
data := val.Interface().([]uint8)
C.PD_TensorCopyFromCpuUint8(t.c, (*C.uint8_t)(unsafe.Pointer(&data[0])))
case Int8:
data := val.Interface().([]int8)
C.PD_TensorCopyFromCpuInt8(t.c, (*C.int8_t)(unsafe.Pointer(&data[0])))
}
}
///
/// \brief Copy the tensor data to the host memory.
/// It's usually used to get the output tensor data.
///
/// \param[value] data The tensor will copy the data to the address.
///
func (t *Tensor) CopyToCpu(value interface{}) {
val := reflect.ValueOf(value)
dtype, _ := dataTypeOf(val)
switch dtype {
case Float32:
data := val.Interface().([]float32)
C.PD_TensorCopyToCpuFloat(t.c, (*C.float)(unsafe.Pointer(&data[0])))
case Int32:
data := val.Interface().([]int32)
C.PD_TensorCopyToCpuInt32(t.c, (*C.int32_t)(unsafe.Pointer(&data[0])))
case Int64:
data := val.Interface().([]int64)
C.PD_TensorCopyToCpuInt64(t.c, (*C.int64_t)(unsafe.Pointer(&data[0])))
case Uint8:
data := val.Interface().([]uint8)
C.PD_TensorCopyToCpuUint8(t.c, (*C.uint8_t)(unsafe.Pointer(&data[0])))
case Int8:
data := val.Interface().([]int8)
C.PD_TensorCopyToCpuInt8(t.c, (*C.int8_t)(unsafe.Pointer(&data[0])))
}
}
var types = []struct {
typ reflect.Type
dataType C.PD_DataType
}{
{reflect.TypeOf(float32(0)), C.PD_DATA_FLOAT32},
{reflect.TypeOf(int32(0)), C.PD_DATA_INT32},
{reflect.TypeOf(int64(0)), C.PD_DATA_INT64},
{reflect.TypeOf(uint8(0)), C.PD_DATA_UINT8},
{reflect.TypeOf(int8(0)), C.PD_DATA_INT8},
}
func dataTypeOf(val reflect.Value) (dt DataType, err error) {
typ := val.Type()
for typ.Kind() == reflect.Array || typ.Kind() == reflect.Slice {
if val.Len() > 0 {
val = val.Index(0)
}
typ = typ.Elem()
}
for _, t := range types {
if typ.Kind() == t.typ.Kind() {
return DataType(t.dataType), nil
}
}
return dt, fmt.Errorf("unsupported type %v", typ)
}
#!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 1. download the mobilenetv1 model to test config and predictor
if [ ! -d mobilenetv1 ]; then
wget https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/mobilenetv1.tgz
tar xzf mobilenetv1.tgz
fi
# 2. set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$PWD/paddle_inference_c/paddle/lib
# 3. go test
go test -v ./...
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -11,37 +11,51 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <paddle_inference_api.h>
#include <fstream>
#include <iostream>
void SetConfig(paddle::AnalysisConfig *);
package paddle
int main(int argc, char *argv[]) {
paddle::AnalysisConfig config;
SetConfig(&config);
auto predictor = paddle::CreatePaddlePredictor(config);
auto input_name = predictor->GetInputNames()[0];
auto input = predictor->GetInputTensor(input_name);
std::cout << predictor->GetOutputNames()[0] << std::endl;
std::vector<int> shape{1, 3, 300, 300};
input->Reshape(std::move(shape));
std::vector<float> data(1 * 300 * 300 * 3);
std::ifstream fin("data/data.txt");
for (int i = 0; i < data.size(); i++) {
fin >> data[i];
}
// #include <stdint.h>
// #include <stdlib.h>
import "C"
import (
"unsafe"
)
input->copy_from_cpu(data.data());
predictor->ZeroCopyRun();
auto output_name = predictor->GetOutputNames()[0];
auto output = predictor->GetOutputTensor(output_name);
return 0;
func cvtPDBoolToGo(b C.int8_t) bool {
var cFalse C.int8_t
if b != cFalse {
return true
}
return false
}
void SetConfig(paddle::AnalysisConfig *config) {
config->SetModel("data/model/__model__", "data/model/__params__");
config->SwitchUseFeedFetchOps(false);
config->SwitchSpecifyInputNames(true);
config->SwitchIrOptim(false);
func cvtGoBoolToPD(b bool) C.int8_t {
if b == false {
return 0
}
return 1
}
func cvtToGoSliceString(length int, str **C.char) []string {
if str == nil {
return nil
}
tmpSlice := (*[1 << 27]*C.char)(unsafe.Pointer(str))[:length:length]
goStrings := make([]string, length)
for i, s := range tmpSlice {
goStrings[i] = C.GoString(s)
}
return goStrings
}
func cvtToGoSliceInt32(length int, data *C.int32_t) []int32 {
if data == nil {
return nil
}
tmpSlice := (*[1 << 27]C.int32_t)(unsafe.Pointer(data))[:length:length]
res := make([]int32, length)
for i, s := range tmpSlice {
res[i] = int32(s)
}
return res
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -14,29 +14,13 @@
package paddle
// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
// #include <stdbool.h>
// #include <paddle_c_api.h>
// #include "pd_common.h"
// #include "pd_predictor.h"
// #include "pd_types.h"
// #include "pd_utils.h"
import "C"
import "fmt"
func ConvertCBooleanToGo(b C.bool) bool {
var c_false C.bool
if b != c_false {
return true
}
return false
}
func numel(shape []int32) int32 {
n := int32(1)
for _, d := range shape {
n *= d
}
return n
}
func bug(format string, args ...interface{}) error {
return fmt.Errorf("Bug %v", fmt.Sprintf(format, args...))
func Version() string {
cVersion := C.PD_GetVersion()
return C.GoString(cVersion)
}
......@@ -1985,6 +1985,26 @@ EOF
fi
}
function test_go_inference_api() {
cat <<EOF
========================================
Testing go inference api ...
========================================
EOF
# ln paddle_inference_c lib
cd ${PADDLE_ROOT}/build
ln -s ${PADDLE_ROOT}/build/paddle_inference_c_install_dir/ ${PADDLE_ROOT}/paddle/fluid/inference/goapi/paddle_inference_c
# run go test
cd ${PADDLE_ROOT}/paddle/fluid/inference/goapi
bash test.sh
EXIT_CODE=$?
if [[ "$EXIT_CODE" != "0" ]]; then
exit 8;
fi
}
function test_fluid_lib_train() {
cat <<EOF
========================================
......@@ -2226,6 +2246,8 @@ function main() {
gen_fluid_lib ${parallel_number}
test_fluid_lib
#test_fluid_lib_train
#go inference test
test_go_inference_api
;;
test_train)
gen_fluid_lib ${parallel_number}
......
......@@ -123,7 +123,7 @@ RUN rm Python-$version.tgz setuptools-40.6.2.zip setuptools-50.3.2.zip pip-20.0.
# Install Go and glide
WORKDIR /home
RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.8.1.linux-amd64.tar.gz | \
RUN wget -qO- https://paddle-ci.gz.bcebos.com/go1.15.12.linux-amd64.tar.gz | \
tar -xz -C /usr/local && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册