[Inference] Update go inference api based on new capi. (#33113)

c7e3c918 · Wilber · GitHub · ab0272eb · ab0272eb · ab0272eb
28 changed file
--- a/go/README_cn.md
+++ b/go/README_cn.md
-# Paddle 预测golang API
-
-## 安装
-首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``paddle_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c`
-
-## 在Go中使用Paddle预测
-首先创建预测配置
-``` go
-config := paddle.NewAnalysisConfig()
-config.SetModel(model_file, params_file)
-config.SwitchUseFeedFetchOps(false)
-config.SwitchSpecifyInputNames(true)
-```
-
-创建predictor
-``` go
-predictor := paddle.NewPredictor(config)
-```
-
-获取输入Tensor和输出Tensor
-``` go
-inputs = predictor.GetInputTensors()
-```
-
-设置输入数据(假设只有一个输入)
-``` go
-input := inputs[0]
-input.SetValue(data)
-input.Reshape([]int32{1, 3, 300, 300})
-```
-
-运行预测
-``` go
-predictor.ZeroCopyRun()
-```
-
-获取输入Tensor的真实值
-``` go
-output := outputs[0]
-predictor.GetZeroCopyOutput(output)
-value := reflect.ValueOf(output.Value())
-shape, dtype := paddle.ShapeAndTypeOf(value)
-output_data := value.Interface().([][]float32)
-```
-
-## 示例
-源码见[mobilenet](./demo/mobilenet.go)
-
-下载[数据](https://paddle-inference-dist.cdn.bcebos.com/mobilenet-test-model-data.tar.gz)并解压到当前目录
-
-运行
-```bash
-go mod init github.com/paddlepaddle
-export LD_LIBRARY_PATH=`pwd`/paddle_c/paddle/lib:$LD_LIBRARY_PATH
-go run ./demo/mobilenet.go
-```
--- a/go/demo/mobilenet.go
+++ b/go/demo/mobilenet.go
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-package main
-
-import "github.com/paddlepaddle/paddle"
-import "strings"
-import "io/ioutil"
-import "strconv"
-import "reflect"
-
-func main() {
-	config := paddle.NewAnalysisConfig()
-	config.SetModel("data/model/__model__", "data/model/__params__")
-    config.DisableGlogInfo()
-    config.SwitchUseFeedFetchOps(false)
-    config.SwitchSpecifyInputNames(true)
-
-    predictor := paddle.NewPredictor(config)
-
-    println("============== paddle inference ==============")
-    println("input num: ", predictor.GetInputNum())
-    println("input name: ", predictor.GetInputNames()[0])
-    println("output num: ", predictor.GetOutputNum())
-    println("output name: ", predictor.GetInputNames()[0])
-    println("============== run inference =================")
-
-    input := predictor.GetInputTensors()[0]
-    output := predictor.GetOutputTensors()[0]
-
-    filename := "data/data.txt"
-    data := ReadData(filename)
-    input.SetValue(data[:1 * 3 * 300 * 300])
-    input.Reshape([]int32{1, 3, 300, 300})
-
-    predictor.SetZeroCopyInput(input)
-    predictor.ZeroCopyRun()
-    predictor.GetZeroCopyOutput(output)
-
-    println("============= parse output ===================")
-    output_val := output.Value()
-    value := reflect.ValueOf(output_val)
-    shape, dtype := paddle.ShapeAndTypeOf(value)
-    switch dtype {
-    case paddle.PaddleDType(paddle.FLOAT32):
-        v := value.Interface().([][]float32)
-        println("v: ", v[0][0], v[0][1], "...")
-    case paddle.PaddleDType(paddle.UINT8):
-        v := value.Interface().([][]uint8)
-        println("v: ", v[0][0], v[0][1], "...")
-    case paddle.PaddleDType(paddle.INT32):
-        v := value.Interface().([][]int32)
-        println("v: ", v[0][0], v[0][1], "...")
-    case paddle.PaddleDType(paddle.INT64):
-        v := value.Interface().([][]int64)
-        println("v: ", v[0][0], v[0][1], "...")
-    }
-    println(shape[0], shape[1])
-    println(output.Shape()[0])
-}
-
-func ReadData(filename string) []float32 {
-    file_bytes, _ := ioutil.ReadFile(filename)
-    data_slice := strings.Split(string(file_bytes), " ")
-    var result []float32
-    for _, n := range data_slice {
-        r, _ := strconv.ParseFloat(n, 32)
-        result = append(result, float32(r))
-    }
-    return result
-}
--- a/go/demo/mobilenet_c.cc
+++ b/go/demo/mobilenet_c.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <paddle_c_api.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-void SetConfig(PD_AnalysisConfig *);
-void ReadData(float *data, int size);
-
-int main(int argc, char *argv[]) {
-  PD_AnalysisConfig *config = PD_NewAnalysisConfig();
-  SetConfig(config);
-  PD_Predictor *predictor = PD_NewPredictor(config);
-
-  int input_num = PD_GetInputNum(predictor);
-  printf("Input num: %d\n", input_num);
-  int output_num = PD_GetOutputNum(predictor);
-  printf("Output num: %d\n", output_num);
-
-  PD_ZeroCopyTensor input;
-  PD_InitZeroCopyTensor(&input);
-  input.name = const_cast<char *>(PD_GetInputName(predictor, 0));  // NOLINT
-  input.data.capacity = sizeof(float) * 1 * 3 * 300 * 300;
-  input.data.length = input.data.capacity;
-  input.data.data = malloc(input.data.capacity);
-  int shape[] = {1, 3, 300, 300};
-  input.shape.data = static_cast<int *>(shape);
-  input.shape.capacity = sizeof(shape);
-  input.shape.length = sizeof(shape);
-  input.dtype = PD_FLOAT32;
-  ReadData((float *)input.data.data, 1 * 3 * 300 * 300);  // NOLINT
-  float *data = (float *)input.data.data;                 // NOLINT
-  PD_SetZeroCopyInput(predictor, &input);
-  int *shape_ptr = (int *)input.shape.data;  // NOLINT
-
-  PD_ZeroCopyRun(predictor);
-  PD_ZeroCopyTensor output;
-  PD_InitZeroCopyTensor(&output);
-  output.name = const_cast<char *>(PD_GetOutputName(predictor, 0));  // NOLINT
-  PD_GetZeroCopyOutput(predictor, &output);
-
-  PD_DestroyZeroCopyTensor(&output);
-
-  PD_DeleteAnalysisConfig(config);
-  PD_DeletePredictor(predictor);
-  return 0;
-}
-
-void SetConfig(PD_AnalysisConfig *config) {
-  PD_SetModel(config, "data/model/__model__", "data/model/__params__");
-  PD_SwitchUseFeedFetchOps(config, false);
-  PD_SwitchSpecifyInputNames(config, true);
-  PD_DisableGlogInfo(config);
-  // PD_SwitchIrOptim(config, false);
-}
-
-void ReadData(float *data, int n) {
-  FILE *fp = fopen("data/data.txt", "r");
-  for (int i = 0; i < n; i++) {
-    fscanf(fp, "%f", &data[i]);
-  }
-  fclose(fp);
-}
--- a/go/demo/mobilenet_c_exp.cc
+++ b/go/demo/mobilenet_c_exp.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <pd_inference_api.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-void ReadData(float* data, int size);
-
-int main(int argc, char* argv[]) {
-  PD_Config* config = PD_ConfigCreate();
-  PD_ConfigSetModel(config, "data/model/__model__", "data/model/__params__");
-  PD_ConfigDisableGlogInfo(config);
-
-  PD_Predictor* predictor = PD_PredictorCreate(config);
-  // config has destroyed in PD_PredictorCreate
-  config = NULL;
-
-  int input_num = PD_PredictorGetInputNum(predictor);
-  printf("Input num: %d\n", input_num);
-  int output_num = PD_PredictorGetOutputNum(predictor);
-  printf("Output num: %d\n", output_num);
-
-  PD_OneDimArrayCstr* input_names = PD_PredictorGetInputNames(predictor);
-  PD_Tensor* input_tensor =
-      PD_PredictorGetInputHandle(predictor, input_names->data[0]);
-  PD_OneDimArrayCstrDestroy(input_names);
-  input_names = NULL;
-
-  int32_t shape[] = {1, 3, 300, 300};
-  float* data = (float*)malloc(sizeof(float) * 1 * 3 * 300 * 300);  // NOLINT
-  ReadData(data, 1 * 3 * 300 * 300);                                // NOLINT
-  PD_TensorReshape(input_tensor, 4, shape);
-  PD_TensorCopyFromCpuFloat(input_tensor, data);
-  free(data);
-  data = NULL;
-  PD_PredictorRun(predictor);
-
-  PD_OneDimArrayCstr* output_names = PD_PredictorGetOutputNames(predictor);
-  PD_Tensor* output_tensor =
-      PD_PredictorGetOutputHandle(predictor, output_names->data[0]);
-  PD_OneDimArrayCstrDestroy(output_names);
-  output_names = nullptr;
-
-  PD_OneDimArrayInt32* out_shape = PD_TensorGetShape(output_tensor);
-  int32_t size = 1;
-  for (size_t index = 0; index < out_shape->size; ++index) {
-    size = size * out_shape->data[index];
-  }
-  PD_OneDimArrayInt32Destroy(out_shape);
-  out_shape = NULL;
-
-  data = (float*)malloc(sizeof(float) * size);  // NOLINT
-  PD_TensorCopyToCpuFloat(output_tensor, data);
-  free(data);
-  data = NULL;
-
-  PD_TensorDestroy(output_tensor);
-  output_tensor = NULL;
-  PD_TensorDestroy(input_tensor);
-  input_tensor = NULL;
-  PD_PredictorDestroy(predictor);
-  predictor = NULL;
-
-  return 0;
-}
-
-void ReadData(float* data, int n) {
-  FILE* fp = fopen("data/data.txt", "r");
-  for (int i = 0; i < n; i++) {
-    fscanf(fp, "%f", &data[i]);
-  }
-  fclose(fp);
-}
--- a/go/paddle/config.go
+++ b/go/paddle/config.go
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package paddle
-
-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include <stdbool.h>
-// #include <stdlib.h>
-// #include <paddle_c_api.h>
-import "C"
-
-import "runtime"
-import "unsafe"
-
-type Precision C.Precision
-
-const (
-	Precision_FLOAT32 Precision = C.kFloat32
-	Precision_INT8    Precision = C.kInt8
-	Precision_HALF    Precision = C.kHalf
-)
-
-type AnalysisConfig struct {
-	c *C.PD_AnalysisConfig
-}
-
-func NewAnalysisConfig() *AnalysisConfig {
-	c_config := C.PD_NewAnalysisConfig()
-	config := &AnalysisConfig{c: c_config}
-	runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
-	return config
-}
-
-func (config *AnalysisConfig) finalize() {
-	C.PD_DeleteAnalysisConfig(config.c)
-}
-
-func (config *AnalysisConfig) SetModel(model, params string) {
-	//C.printString((*C.char)(unsafe.Pointer(&s[0])))
-	c_model := C.CString(model)
-	defer C.free(unsafe.Pointer(c_model))
-	var c_params *C.char
-	if params == "" {
-		c_params = nil
-	} else {
-		c_params = C.CString(params)
-		defer C.free(unsafe.Pointer(c_params))
-	}
-
-	C.PD_SetModel(config.c, c_model, c_params)
-}
-
-func (config *AnalysisConfig) ModelDir() string {
-	return C.GoString(C.PD_ModelDir(config.c))
-}
-
-func (config *AnalysisConfig) ProgFile() string {
-	return C.GoString(C.PD_ProgFile(config.c))
-}
-
-func (config *AnalysisConfig) ParamsFile() string {
-	return C.GoString(C.PD_ParamsFile(config.c))
-}
-
-func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb int, device_id int) {
-	C.PD_EnableUseGpu(config.c, C.int(memory_pool_init_size_mb), C.int(device_id))
-}
-
-func (config *AnalysisConfig) DisableGpu() {
-	C.PD_DisableGpu(config.c)
-}
-
-func (config *AnalysisConfig) UseGpu() bool {
-	return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
-}
-
-func (config *AnalysisConfig) GpuDeviceId() int {
-	return int(C.PD_GpuDeviceId(config.c))
-}
-
-func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
-	return int(C.PD_MemoryPoolInitSizeMb(config.c))
-}
-
-func (config *AnalysisConfig) FractionOfGpuMemoryForPool() float32 {
-	return float32(C.PD_FractionOfGpuMemoryForPool(config.c))
-}
-
-func (config *AnalysisConfig) EnableCudnn() {
-	C.PD_EnableCUDNN(config.c)
-}
-
-func (config *AnalysisConfig) CudnnEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SwitchIrOptim(x bool) {
-	C.PD_SwitchIrOptim(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) IrOptim() bool {
-	return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
-}
-
-func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
-	C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
-	C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) SpecifyInputName() bool {
-	return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
-}
-
-func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int, max_batch_size int, min_subgraph_size int, precision Precision, use_static bool, use_calib_mode bool) {
-	C.PD_EnableTensorRtEngine(config.c, C.int(workspace_size), C.int(max_batch_size), C.int(min_subgraph_size), C.Precision(precision), C.bool(use_static), C.bool(use_calib_mode))
-}
-
-func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SwitchIrDebug(x bool) {
-	C.PD_SwitchIrDebug(config.c, C.bool(x))
-}
-
-func (config *AnalysisConfig) EnableMkldnn() {
-	C.PD_EnableMKLDNN(config.c)
-}
-
-func (config *AnalysisConfig) MkldnnEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_MkldnnEnabled(config.c))
-}
-
-func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
-	C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
-}
-
-func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
-	return int(C.PD_CpuMathLibraryNumThreads(config.c))
-}
-
-func (config *AnalysisConfig) EnableMkldnnQuantizer() {
-	C.PD_EnableMkldnnQuantizer(config.c)
-}
-
-func (config *AnalysisConfig) EnableMkldnnBfloat16() {
-	C.PD_EnableMkldnnBfloat16(config.c)
-}
-
-func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
-}
-
-func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
-	return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
-}
-// SetModelBuffer
-// ModelFromMemory
-
-func (config *AnalysisConfig) EnableMemoryOptim() {
-	C.PD_EnableMemoryOptim(config.c)
-}
-
-func (config *AnalysisConfig) MemoryOptimEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
-}
-
-func (config *AnalysisConfig) EnableProfile() {
-	C.PD_EnableProfile(config.c)
-}
-
-func (config *AnalysisConfig) ProfileEnabled() bool {
-	return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
-}
-
-func (config *AnalysisConfig) DisableGlogInfo() {
-	C.PD_DisableGlogInfo(config.c)
-}
-
-func (config *AnalysisConfig) DeletePass(pass string) {
-	c_pass := C.CString(pass)
-	defer C.free(unsafe.Pointer(c_pass))
-	C.PD_DeletePass(config.c, c_pass)
-}
-
-func (config *AnalysisConfig) SetInValid() {
-	C.PD_SetInValid(config.c)
-}
-
-func (config *AnalysisConfig) IsValid() bool {
-	return ConvertCBooleanToGo(C.PD_IsValid(config.c))
-}
--- a/go/paddle/predictor.go
+++ b/go/paddle/predictor.go
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package paddle
-
-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include <stdbool.h>
-// #include "paddle_c_api.h"
-import "C"
-
-import "reflect"
-import "runtime"
-import "unsafe"
-
-type Predictor struct {
-	c *C.PD_Predictor
-}
-
-func NewPredictor(config *AnalysisConfig) *Predictor {
-	c_predictor := C.PD_NewPredictor((*config).c)
-	predictor := &Predictor{c: c_predictor}
-	runtime.SetFinalizer(predictor, (*Predictor).finalize)
-	return predictor
-}
-
-func (predictor *Predictor) finalize() {
-	C.PD_DeletePredictor(predictor.c)
-}
-
-func DeletePredictor(predictor *Predictor) {
-	C.PD_DeletePredictor(predictor.c)
-}
-
-func (predictor *Predictor) GetInputNum() int {
-	return int(C.PD_GetInputNum(predictor.c))
-}
-
-func (predictor *Predictor) GetOutputNum() int {
-	return int(C.PD_GetOutputNum(predictor.c))
-}
-
-func (predictor *Predictor) GetInputName(n int) string {
-	return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
-}
-
-func (predictor *Predictor) GetOutputName(n int) string {
-	return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
-}
-
-func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
-	var result [](*ZeroCopyTensor)
-	for i := 0; i < predictor.GetInputNum(); i++ {
-		tensor := NewZeroCopyTensor()
-		tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
-		result = append(result, tensor)
-	}
-	return result
-}
-
-func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
-	var result [](*ZeroCopyTensor)
-	for i := 0; i < predictor.GetOutputNum(); i++ {
-		tensor := NewZeroCopyTensor()
-		tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
-		result = append(result, tensor)
-	}
-	return result
-}
-
-func (predictor *Predictor) GetInputNames() []string {
-	names := make([]string, predictor.GetInputNum())
-	for i := 0; i < len(names); i++ {
-		names[i] = predictor.GetInputName(i)
-	}
-	return names
-}
-
-func (predictor *Predictor) GetOutputNames() []string {
-	names := make([]string, predictor.GetOutputNum())
-	for i := 0; i < len(names); i++ {
-		names[i] = predictor.GetOutputName(i)
-	}
-	return names
-}
-
-func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
-	C.PD_SetZeroCopyInput(predictor.c, tensor.c)
-}
-
-func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
-	C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
-	tensor.name = C.GoString(tensor.c.name)
-	var shape []int32
-	shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
-	shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
-	shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
-	shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
-	tensor.Reshape(shape)
-}
-
-func (predictor *Predictor) ZeroCopyRun() {
-	C.PD_ZeroCopyRun(predictor.c)
-}
--- a/go/paddle/tensor.go
+++ b/go/paddle/tensor.go
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package paddle
-
-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include <stdbool.h>
-// #include <stdlib.h>
-// #include <string.h>
-// #include <paddle_c_api.h>
-import "C"
-
-import "runtime"
-import "reflect"
-import "unsafe"
-import (
-	"bytes"
-	"encoding/binary"
-)
-
-type PaddleDType C.PD_DataType
-
-const (
-	FLOAT32  PaddleDType = C.PD_FLOAT32
-	INT32    PaddleDType = C.PD_INT32
-	INT64    PaddleDType = C.PD_INT64
-	UINT8    PaddleDType = C.PD_UINT8
-	UNKDTYPE PaddleDType = C.PD_UNKDTYPE
-)
-
-var types = []struct {
-	gotype reflect.Type
-	dtype  PaddleDType
-}{
-	{reflect.TypeOf(float32(0)), FLOAT32},
-	{reflect.TypeOf(int32(0)), INT32},
-	{reflect.TypeOf(int64(0)), INT64},
-	{reflect.TypeOf(uint8(0)), UINT8},
-}
-
-func TypeOfShape(dtype PaddleDType, shape []int32) reflect.Type {
-	var ret reflect.Type
-	for _, t := range types {
-		if dtype == PaddleDType(t.dtype) {
-			ret = t.gotype
-			break
-		}
-	}
-
-	if ret == nil {
-		panic(bug("Data %v type is not support", dtype))
-	}
-
-	for range shape {
-		ret = reflect.SliceOf(ret)
-	}
-	return ret
-}
-
-type ZeroCopyTensor struct {
-	c     *C.PD_ZeroCopyTensor
-	name  string
-	shape []int32
-}
-
-func NewZeroCopyTensor() *ZeroCopyTensor {
-	c_tensor := C.PD_NewZeroCopyTensor()
-
-	tensor := &ZeroCopyTensor{c: c_tensor}
-	runtime.SetFinalizer(tensor, (*ZeroCopyTensor).finalize)
-	return tensor
-}
-
-func (tensor *ZeroCopyTensor) finalize() {
-	C.PD_DeleteZeroCopyTensor(tensor.c)
-}
-
-func (tensor *ZeroCopyTensor) Shape() []int32 {
-	return tensor.shape
-}
-
-func (tensor *ZeroCopyTensor) Name() string {
-	return C.GoString(tensor.c.name)
-}
-
-func (tensor *ZeroCopyTensor) Rename(name string) {
-	tensor.name = name
-	tensor.c.name = (*C.char)(unsafe.Pointer(tensor.c.name))
-	//tensor.c.name = C.CString(tensor.name)
-	//defer C.free(unsafe.Pointer(tensor.c.name))
-}
-
-func (tensor *ZeroCopyTensor) Reshape(shape []int32) {
-	tensor.shape = make([]int32, len(shape))
-	copy(tensor.shape, shape)
-	length := C.sizeof_int * C.size_t(len(shape))
-	if tensor.c.shape.capacity < C.size_t(length) {
-		if tensor.c.shape.capacity != C.size_t(0) {
-			C.free(tensor.c.shape.data)
-		}
-		tensor.c.shape.data = C.malloc(length)
-		tensor.c.shape.capacity = length
-	}
-	tensor.c.shape.length = length
-	C.memcpy(tensor.c.shape.data, unsafe.Pointer(&shape[0]), length)
-}
-
-func (tensor *ZeroCopyTensor) DataType() PaddleDType {
-	return PaddleDType(tensor.c.dtype)
-}
-
-func (tensor *ZeroCopyTensor) SetValue(value interface{}) {
-	val := reflect.ValueOf(value)
-	shape, dtype := ShapeAndTypeOf(val)
-	tensor.Reshape(shape)
-	num := numel(shape)
-	length := C.size_t(SizeofDataType(dtype) * num)
-	if tensor.c.data.capacity < length {
-		if tensor.c.data.capacity != C.size_t(0) {
-			C.free(tensor.c.data.data)
-		}
-		tensor.c.data.data = C.malloc(length)
-		tensor.c.data.capacity = length
-	}
-	tensor.c.data.length = length
-
-	switch dtype {
-	case PaddleDType(UINT8):
-		data := val.Interface().([]uint8)
-		C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
-	case PaddleDType(INT32):
-		data := val.Interface().([]int32)
-		C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
-	case PaddleDType(INT64):
-		data := val.Interface().([]int64)
-		C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
-	case PaddleDType(FLOAT32):
-		data := val.Interface().([]float32)
-		C.memcpy(tensor.c.data.data, unsafe.Pointer(&data[0]), length)
-	}
-	tensor.c.dtype = C.PD_DataType(dtype)
-}
-
-func TypeOf(dtype PaddleDType, shape []int32) reflect.Type {
-	var ret reflect.Type
-	for _, t := range types {
-		if t.dtype == dtype {
-			ret = t.gotype
-			break
-		}
-	}
-
-	for range shape {
-		ret = reflect.SliceOf(ret)
-	}
-	return ret
-}
-
-func (tensor *ZeroCopyTensor) Value() interface{} {
-	t := TypeOf(PaddleDType(tensor.c.dtype), tensor.shape)
-	value := reflect.New(t)
-	c_bytes := tensor.c.data.data
-	length := tensor.c.data.length
-	var slice []byte
-	if unsafe.Sizeof(unsafe.Pointer(nil)) == 8 {
-		slice = (*[1<<50 - 1]byte)(unsafe.Pointer(c_bytes))[:length:length]
-	} else {
-		slice = (*[1 << 30]byte)(unsafe.Pointer(c_bytes))[:length:length]
-	}
-	r := bytes.NewReader(slice)
-	DecodeTensor(r, tensor.Shape(), t, value)
-	return reflect.Indirect(value).Interface()
-}
-
-func Endian() binary.ByteOrder {
-	buf := [2]byte{}
-	*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
-
-	var endian binary.ByteOrder
-
-	switch buf {
-	case [2]byte{0xCD, 0xAB}:
-		endian = binary.LittleEndian
-	case [2]byte{0xAB, 0xCD}:
-		endian = binary.BigEndian
-	default:
-		panic("Could not determine native endianness.")
-	}
-	return endian
-}
-
-func DecodeTensor(r *bytes.Reader, shape []int32, t reflect.Type, ptr reflect.Value) {
-	switch t.Kind() {
-	case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
-		binary.Read(r, Endian(), ptr.Interface())
-	case reflect.Slice:
-		value := reflect.Indirect(ptr)
-		value.Set(reflect.MakeSlice(t, int(shape[0]), int(shape[0])))
-		if len(shape) == 1 && value.Len() > 0 {
-			switch value.Index(0).Kind() {
-			case reflect.Uint8, reflect.Int32, reflect.Int64, reflect.Float32:
-				binary.Read(r, Endian(), value.Interface())
-				return
-			}
-		}
-
-		for i := 0; i < value.Len(); i++ {
-			DecodeTensor(r, shape[1:], t.Elem(), value.Index(i).Addr())
-		}
-	}
-}
-
-func SizeofDataType(dtype PaddleDType) int32 {
-	switch dtype {
-	case UINT8:
-		return int32(C.sizeof_uchar)
-	case INT32:
-		return int32(C.sizeof_int)
-	case INT64:
-		return int32(C.sizeof_longlong)
-	case FLOAT32:
-		return int32(C.sizeof_float)
-	}
-	return -1
-}
-
-func ShapeAndTypeOf(val reflect.Value) (shape []int32, dt PaddleDType) {
-	gotype := val.Type()
-	for gotype.Kind() == reflect.Array || gotype.Kind() == reflect.Slice {
-		shape = append(shape, int32(val.Len()))
-		if val.Len() > 0 {
-			val = val.Index(0)
-		}
-		gotype = gotype.Elem()
-	}
-
-	for _, t := range types {
-		if gotype.Kind() == t.gotype.Kind() {
-			return shape, PaddleDType(t.dtype)
-		}
-	}
-	return shape, dt
-}
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -104,6 +104,8 @@ DataType Tensor::type() const {
    return DataType::INT32;
  } else if (type == paddle::framework::proto::VarType::UINT8) {
    return DataType::UINT8;
+  } else if (type == paddle::framework::proto::VarType::INT8) {
+    return DataType::INT8;
  }
  return DataType::FLOAT32;
 }

--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -31,6 +31,7 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
+
 #include "paddle_infer_declare.h"  // NOLINT

 /*! \file */
@@ -177,6 +178,26 @@ struct PD_INFER_DECL AnalysisConfig {
  ///
  void DisableGpu();

+  ///
+  /// \brief Turn on XPU.
+  ///
+  /// \param l3_workspace_size The size of the video memory allocated by the l3
+  ///         cache, the maximum is 16M.
+  /// \param locked Whether the allocated L3 cache can be locked. If false,
+  ///       it means that the L3 cache is not locked, and the allocated L3
+  ///       cache can be shared by multiple models, and multiple models
+  ///       sharing the L3 cache will be executed sequentially on the card.
+  /// \param autotune Whether to autotune the conv operator in the model. If
+  ///       true, when the conv operator of a certain dimension is executed
+  ///       for the first time, it will automatically search for a better
+  ///       algorithm to improve the performance of subsequent conv operators
+  ///       of the same dimension.
+  /// \param autotune_file Specify the path of the autotune file. If
+  ///       autotune_file is specified, the algorithm specified in the
+  ///       file will be used and autotune will not be performed again.
+  /// \param precision Calculation accuracy of multi_encoder
+  /// \param adaptive_seqlen Is the input of multi_encoder variable length
+  ///
  void EnableXpu(int l3_workspace_size = 0xfffc00, bool locked = false,
                 bool autotune = true, const std::string& autotune_file = "",
                 const std::string& precision = "int16",

--- a/paddle/fluid/inference/capi_exp/pd_common.h
+++ b/paddle/fluid/inference/capi_exp/pd_common.h
@@ -71,5 +71,5 @@ PD_ENUM(PD_PlaceType){PD_PLACE_UNK = -1, PD_PLACE_CPU, PD_PLACE_GPU,

 PD_ENUM(PD_DataType){
    PD_DATA_UNK = -1, PD_DATA_FLOAT32, PD_DATA_INT32,
-    PD_DATA_INT64,    PD_DATA_UINT8,
+    PD_DATA_INT64,    PD_DATA_UINT8,   PD_DATA_INT8,
 };
--- a/paddle/fluid/inference/capi_exp/pd_config.cc
+++ b/paddle/fluid/inference/capi_exp/pd_config.cc
@@ -14,6 +14,8 @@

 #include "paddle/fluid/inference/capi_exp/pd_config.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/capi_exp/pd_types.h"
+#include "paddle/fluid/inference/capi_exp/utils_internal.h"
 #include "paddle/fluid/platform/enforce.h"

 #define CHECK_NULL_POINTER_PARM(param)                  \
@@ -125,10 +127,14 @@ PD_Bool PD_ConfigUseGpu(__pd_keep PD_Config* pd_config) {
 }

 void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config,
-                        int32_t l3_workspace_size) {
+                        int32_t l3_workspace_size, PD_Bool locked,
+                        PD_Bool autotune, const char* autotune_file,
+                        const char* precision, PD_Bool adaptive_seqlen) {
  CHECK_AND_CONVERT_PD_CONFIG;
-  config->EnableXpu(l3_workspace_size);
+  config->EnableXpu(l3_workspace_size, locked, autotune, autotune_file,
+                    precision, adaptive_seqlen);
 }
+
 PD_Bool PD_ConfigUseXpu(__pd_keep PD_Config* pd_config) {
  CHECK_AND_CONVERT_PD_CONFIG;
  return config->use_xpu();
@@ -378,5 +384,24 @@ void PD_ConfigPartiallyRelease(__pd_keep PD_Config* pd_config) {
  CHECK_AND_CONVERT_PD_CONFIG;
  config->PartiallyRelease();
 }
+void PD_ConfigDeletePass(__pd_keep PD_Config* pd_config, const char* pass) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->pass_builder()->DeletePass(pass);
+}
+void PD_ConfigInsertPass(__pd_keep PD_Config* pd_config, size_t idx,
+                         const char* pass) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->pass_builder()->InsertPass(idx, pass);
+}
+void PD_ConfigAppendPass(__pd_keep PD_Config* pd_config, const char* pass) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->pass_builder()->AppendPass(pass);
+}
+__pd_give PD_OneDimArrayCstr* PD_ConfigAllPasses(
+    __pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  std::vector<std::string> passes = config->pass_builder()->AllPasses();
+  return paddle_infer::CvtVecToOneDimArrayCstr(passes);
+}

 }  // extern "C"
--- a/paddle/fluid/inference/capi_exp/pd_config.h
+++ b/paddle/fluid/inference/capi_exp/pd_config.h
@@ -25,6 +25,7 @@
 #pragma once

 #include "pd_common.h"  // NOLINT
+#include "pd_types.h"   // NOLINT

 typedef struct PD_Config PD_Config;

@@ -154,10 +155,27 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigUseGpu(
 /// \brief Turn on XPU.
 ///
 /// \param[in] pd_onfig config
-/// \param[in] l3_workspace_size l3 workspace size.
+/// \param l3_workspace_size The size of the video memory allocated by the l3
+///         cache, the maximum is 16M.
+/// \param locked Whether the allocated L3 cache can be locked. If false,
+///       it means that the L3 cache is not locked, and the allocated L3
+///       cache can be shared by multiple models, and multiple models
+///       sharing the L3 cache will be executed sequentially on the card.
+/// \param autotune Whether to autotune the conv operator in the model. If
+///       true, when the conv operator of a certain dimension is executed
+///       for the first time, it will automatically search for a better
+///       algorithm to improve the performance of subsequent conv operators
+///       of the same dimension.
+/// \param autotune_file Specify the path of the autotune file. If
+///       autotune_file is specified, the algorithm specified in the
+///       file will be used and autotune will not be performed again.
+/// \param precision Calculation accuracy of multi_encoder
+/// \param adaptive_seqlen Is the input of multi_encoder variable length
 ///
 PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
-    __pd_keep PD_Config* pd_config, int32_t l3_workspace_size);
+    __pd_keep PD_Config* pd_config, int32_t l3_workspace_size, PD_Bool locked,
+    PD_Bool autotune, const char* autotune_file, const char* precision,
+    PD_Bool adaptive_seqlen);
 ///
 /// \brief A boolean state telling whether the XPU is turned on.
 ///
@@ -565,6 +583,35 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigIsValid(
 ///
 PADDLE_CAPI_EXPORT extern void PD_ConfigPartiallyRelease(
    __pd_keep PD_Config* pd_config);
+///
+/// \brief Delete all passes that has a certain type 'pass'.
+///
+/// \param[in] pass the certain pass type to be deleted.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigDeletePass(
+    __pd_keep PD_Config* pd_config, const char* pass);
+///
+/// \brief  Insert a pass to a specific position
+///
+/// \param[in] idx the position to insert.
+/// \param[in] pass the new pass.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigInsertPass(
+    __pd_keep PD_Config* pd_config, size_t idx, const char* pass);
+///
+/// \brief Append a pass to the end of the passes
+///
+/// \param[in] pass the new pass.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigAppendPass(
+    __pd_keep PD_Config* pd_config, const char* pass);
+///
+/// \brief Get information of passes.
+///
+/// \return Return list of the passes.
+///
+PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr* PD_ConfigAllPasses(
+    __pd_keep PD_Config* pd_config);

 #ifdef __cplusplus
 }  // extern "C"

--- a/paddle/fluid/inference/capi_exp/pd_predictor.cc
+++ b/paddle/fluid/inference/capi_exp/pd_predictor.cc
@@ -106,4 +106,9 @@ void PD_PredictorDestroy(__pd_take PD_Predictor* pd_predictor) {
  delete pd_predictor;
 }

+const char* PD_GetVersion() {
+  static std::string version = paddle_infer::GetVersion();
+  return version.c_str();
+}
+
 }  // extern "C"
--- a/paddle/fluid/inference/capi_exp/pd_predictor.h
+++ b/paddle/fluid/inference/capi_exp/pd_predictor.h
@@ -143,6 +143,13 @@ PADDLE_CAPI_EXPORT extern uint64_t PD_PredictorTryShrinkMemory(
 PADDLE_CAPI_EXPORT extern void PD_PredictorDestroy(
    __pd_take PD_Predictor* pd_predictor);

+///
+/// \brief Get version info.
+///
+/// \return version
+///
+PADDLE_CAPI_EXPORT extern const char* PD_GetVersion();
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
--- a/paddle/fluid/inference/capi_exp/pd_utils.cc
+++ b/paddle/fluid/inference/capi_exp/pd_utils.cc
@@ -196,6 +196,8 @@ DataType CvtToCxxDatatype(PD_DataType data_type) {
      return DataType::INT32;
    case PD_DATA_UINT8:
      return DataType::UINT8;
+    case PD_DATA_INT8:
+      return DataType::INT8;
    default:
      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
          "Unsupport paddle data type %d.", data_type));

--- a/paddle/fluid/inference/goapi/README.md
+++ b/paddle/fluid/inference/goapi/README.md
+# Paddle Inference golang API
+
+Paddle Inference golang API 基于 [capi](../capi_exp) 和 cgo 实现，需要您提前准备好C预测库。
+
+## 安装
+
+1. 确认使用Paddle的CommitId
+
+您可以通过`git log -1`的方式，确认您使用的Paddle版本的CommitId
+
+2. 使用`go get`获取golang paddle api
+
+```
+# 此处使用上一步记录的CommitId，假设为76e5724
+COMMITID=76e5724
+go get -d -v github.com/paddlepaddle/paddle/paddle/fluid/inference/goapi@${COMMITID}
+```
+
+3. 下载C预测库
+
+您可以选择直接下载[paddle_inference_c](https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/docs/user_guides/download_lib.md)预测库，或通过源码编译的方式安装，源码编译方式参考官网文档，注意这里cmake编译时打开`-DON_INFER=ON`,在编译目录下得到`paddle_inference_c_install_dir`。
+
+
+4. 软链
+
+go1.15新增了`GOMODCACHE`环境变量，`go get`默认会将代码下载到`GOMODCACHE`目录下，您可以通过`go env | grep GOMODCACHE`的方式，查看该路径，在官网发布的docker镜像中该路径一般默认为`/root/gopath/pkg/mod`，进入到golang api代码路径建立软连接，将c预测库命名为`paddle_inference_c`。
+
+```bash
+eval $(go env | grep GOMODCACHE)
+# 按需修改最后的goapi版本号
+cd ${GOMODCACHE}/github.com/paddlepaddle/paddle/paddle/fluid/inference/goapi\@v0.0.0-20210517084506-76e5724c16a5/
+ln -s ${PADDLE_C_DOWNLOAD_DIR}/paddle_inference_c_install_dir paddle_inference_c
+```
+
+5. 运行单测，验证
+
+```
+bash test.sh
+```
+
+## 在Go中使用Paddle预测
+
+首先创建预测配置
+```go
+config := paddle.NewConfig()
+config.SetModel(model_file, params_file)
+```
+
+创建predictor
+```go
+predictor := paddle.NewPredictor(config)
+```
+
+获取输入Tensor和输出Tensor
+```go
+inNames := predictor.GetInputNames()
+inHandle = predictor.GetInputHandle(inNames[0])
+
+outNames := predictor.GetOutputNames()
+outHandle := predictor.GetOutputHandle(outNames[0])
+```
+
+设置输入数据(假设只有一个输入)
+```go
+data := make([]float32, 1*3*224*224)
+for i := 0; i < len(data); i++ {
+    data[i] = float32(i%255) * 0.1
+}
+inHandle.Reshape([]int32{1, 3, 224, 224})
+inHandle.CopyFromCpu(data)
+```
+
+设置Lod
+```go
+lod := make([][]uint, 2)
+for i:=0; i < len(lod); i++ {
+    lod[i] = make([]uint, 2)
+    // 设置输入...
+    lod[i][0] = 0
+    lod[i][0] = 10
+}
+inHandle.SetLod(lod)
+```
+
+运行预测
+```go
+predictor.Run()
+```
+
+获取输入Tensor的真实值
+```go
+func numElements(shape []int32) int32 {
+	n := int32(1)
+	for _, v := range shape {
+		n *= v
+	}
+	return n
+}
+
+outData := make([]float32, numElements(outHandle.Shape()))
+outHandle.CopyToCpu(outData)
+fmt.Println(outHandle.Lod())
+```
+
+## 示例
+
+Demo示例见[Paddle-Inference-Demo](https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/go)
--- a/paddle/fluid/inference/goapi/config.go
+++ b/paddle/fluid/inference/goapi/config.go
--- a/paddle/fluid/inference/goapi/config_test.go
+++ b/paddle/fluid/inference/goapi/config_test.go
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+import "testing"
+
+func TestNewConfig(t *testing.T) {
+	config := NewConfig()
+	config.SetProgFile("model")
+	config.SetParamsFile("params")
+
+	config.SetOptimCacheDir("cache")
+
+	config.DisableFCPadding()
+	t.Logf("UseFcPadding:%+v", config.UseFcPadding())
+
+	// It will break when we have no xpu env.
+	// config.EnableXpu(100)
+	// t.Logf("EnableXpu, UseXpu:%+v ", config.UseXpu())
+
+	config.SwitchIrOptim(true)
+	t.Logf("IrOptim:%+v", config.IrOptim())
+
+	config.EnableUseGpu(100, 0)
+	t.Logf("use_gpu:%+v, gpu_id:%+v", config.UseGpu(), config.GpuDeviceId())
+	t.Logf("MemoryPoolInitSizeMb:%+v, FractionOfGpuMemoryForPool:%+v", config.MemoryPoolInitSizeMb(), config.FractionOfGpuMemoryForPool())
+
+	config.EnableTensorRtEngine(1024, 16, 3, PrecisionFloat32, false, false)
+	t.Logf("TensorRtEngineEnabled:%+v", config.TensorRtEngineEnabled())
+
+	minInputShape := map[string][]int32{
+		"image": []int32{-1, 3, 100, 100},
+		"shape": []int32{-1, 2},
+	}
+	maxInputShape := map[string][]int32{
+		"image": []int32{-1, 3, 608, 608},
+		"shape": []int32{-1, 2},
+	}
+	optInputShape := map[string][]int32{
+		"image": []int32{-1, 3, 406, 406},
+		"shape": []int32{-1, 2},
+	}
+	config.SetTRTDynamicShapeInfo(minInputShape, maxInputShape, optInputShape, false)
+
+	config.EnableTensorRtOSS()
+	t.Logf("TensorrtOssEnabled:%+v", config.TensorrtOssEnabled())
+
+	config.EnableTensorRtDLA(0)
+	t.Logf("TensorrtDlaEnabled:%+v", config.TensorrtDlaEnabled())
+
+	config.DisableTensorRtOPs([]string{"mul", "fc"})
+
+	config.EnableGpuMultiStream()
+	t.Logf("ThreadLocalStreamEnabled:%+v", config.ThreadLocalStreamEnabled())
+
+	config.SwitchIrDebug(false)
+
+	config.EnableMKLDNN()
+
+	config.EnableMemoryOptim()
+	t.Logf("MemoryOptimEnabled:%+v", config.MemoryOptimEnabled())
+
+	config.EnableProfile()
+	t.Logf("ProfileEnabled:%+v", config.ProfileEnabled())
+
+	config.DisableGlogInfo()
+	t.Logf("GlogInfoDisabled:%+v", config.GlogInfoDisabled())
+
+	t.Logf("IsValid:%+v", config.IsValid())
+
+	config.AppendPass("test_pass")
+	t.Logf("After AppendPass, AllPasses:%+v", config.AllPasses())
+
+	config.DeletePass("test_pass")
+	t.Logf("After DeletePass, AllPasses:%+v", config.AllPasses())
+}
+
+func TestLite(t *testing.T) {
+	config := NewConfig()
+	config.SetModel("model", "params")
+	t.Log(config.ProgFile())
+	t.Log(config.ParamsFile())
+
+	config.EnableLiteEngine(PrecisionFloat32, true, []string{}, []string{})
+	t.Logf("LiteEngineEnabled:%+v", config.LiteEngineEnabled())
+}
+
+func TestMkldnn(t *testing.T) {
+	config := NewConfig()
+	config.SetModelDir("modelDir")
+	t.Log(config.ModelDir())
+
+	config.EnableMKLDNN()
+	t.Logf("MkldnnEnabled:%+v", config.MkldnnEnabled())
+
+	config.SetMkldnnCacheCapacity(4)
+
+	config.SetCpuMathLibraryNumThreads(4)
+	t.Logf("CpuMathLibraryNumThreads:%+v", config.CpuMathLibraryNumThreads())
+
+	config.SetMKLDNNOp([]string{"fc", "conv"})
+
+	config.EnableMkldnnQuantizer()
+	t.Logf("MkldnnQuantizerEnabled:%+v", config.MkldnnQuantizerEnabled())
+
+	config.EnableMkldnnBfloat16()
+	t.Logf("MkldnnBfloat16Enabled:%+v", config.MkldnnBfloat16Enabled())
+
+	config.SetBfloat16Op([]string{"fc", "mul"})
+}
--- a/paddle/fluid/inference/goapi/go.mod
+++ b/paddle/fluid/inference/goapi/go.mod
+module github.com/jiweibo/paddle/paddle/fluid/inference/goapi
+
+go 1.15
--- a/paddle/fluid/inference/goapi/lib.go
+++ b/paddle/fluid/inference/goapi/lib.go
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+// #cgo CFLAGS: -I${SRCDIR}/paddle_inference_c/paddle/include
+// #cgo LDFLAGS: -L${SRCDIR}/paddle_inference_c/paddle/lib -lpaddle_inference_c
+import "C"
--- a/paddle/fluid/inference/goapi/predictor.go
+++ b/paddle/fluid/inference/goapi/predictor.go
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+// #include "pd_predictor.h"
+// #include "pd_tensor.h"
+// #include "pd_common.h"
+// #include "pd_types.h"
+// #include "pd_utils.h"
+// #include <stdlib.h>
+// #include <string.h>
+import "C"
+import (
+	"runtime"
+	"unsafe"
+)
+
+type Predictor struct {
+	c *C.PD_Predictor
+}
+
+///
+/// \brief Create a new Predictor
+///
+/// \param[in] Config config
+/// \return new predicor.
+///
+func NewPredictor(config *Config) *Predictor {
+	cPredictor := C.PD_PredictorCreate(config.c)
+	predictor := &Predictor{c: cPredictor}
+	runtime.SetFinalizer(predictor, func(predictor *Predictor) {
+		C.PD_PredictorDestroy(predictor.c)
+	})
+	return predictor
+}
+
+///
+/// \brief Clone a new Predictor
+///
+/// \return new predictor.
+///
+func (p *Predictor) Clone() *Predictor {
+	cPredictor := C.PD_PredictorClone(p.c)
+	predictor := &Predictor{c: cPredictor}
+	runtime.SetFinalizer(predictor, func(predictor *Predictor) {
+		C.PD_PredictorDestroy(predictor.c)
+	})
+	return predictor
+}
+
+///
+/// \brief Get the input number
+///
+/// \return input number
+///
+func (p *Predictor) GetInputNum() uint {
+	return uint(C.PD_PredictorGetInputNum(p.c))
+}
+
+///
+/// \brief Get the output number
+///
+/// \return output number
+///
+func (p *Predictor) GetOutputNum() uint {
+	return uint(C.PD_PredictorGetOutputNum(p.c))
+}
+
+///
+/// \brief Get the input names
+///
+/// \return input names
+///
+func (p *Predictor) GetInputNames() []string {
+	cNames := C.PD_PredictorGetInputNames(p.c)
+	numNames := int(cNames.size)
+	names := cvtToGoSliceString(numNames, cNames.data)
+	C.PD_OneDimArrayCstrDestroy(cNames)
+	return names
+}
+
+///
+/// \brief Get the output names
+///
+/// \return output names
+///
+func (p *Predictor) GetOutputNames() []string {
+	cNames := C.PD_PredictorGetOutputNames(p.c)
+	numNames := int(cNames.size)
+	names := cvtToGoSliceString(numNames, cNames.data)
+	C.PD_OneDimArrayCstrDestroy(cNames)
+	return names
+}
+
+///
+/// \brief Get the Input Tensor object
+///
+/// \param[in] name input name
+/// \return input tensor
+///
+func (p *Predictor) GetInputHandle(name string) *Tensor {
+	cName := C.CString(name)
+	cHandle := C.PD_PredictorGetInputHandle(p.c, cName)
+	C.free(unsafe.Pointer(cName))
+	handle := &Tensor{c: cHandle}
+	runtime.SetFinalizer(handle, func(handle *Tensor) {
+		C.PD_TensorDestroy(handle.c)
+	})
+	return handle
+}
+
+///
+/// \brief Get the Output Tensor object
+///
+/// \param[in] name output name
+/// \return output tensor
+///
+func (p *Predictor) GetOutputHandle(name string) *Tensor {
+	cName := C.CString(name)
+	cHandle := C.PD_PredictorGetOutputHandle(p.c, cName)
+	C.free(unsafe.Pointer(cName))
+	handle := &Tensor{c: cHandle}
+	runtime.SetFinalizer(handle, func(handle *Tensor) {
+		C.PD_TensorDestroy(handle.c)
+	})
+	return handle
+}
+
+///
+/// \brief Run the prediction engine
+///
+func (p *Predictor) Run() {
+	C.PD_PredictorRun(p.c)
+}
+
+///
+/// \brief Clear the intermediate tensors of the predictor
+///
+func (p *Predictor) ClearIntermediateTensor() {
+	C.PD_PredictorClearIntermediateTensor(p.c)
+}
+
+///
+/// \brief Release all tmp tensor to compress the size of the memory pool.
+/// The memory pool is considered to be composed of a list of chunks, if
+/// the chunk is not occupied, it can be released.
+///
+/// \return Number of bytes released. It may be smaller than the actual
+/// released memory, because part of the memory is not managed by the
+/// MemoryPool.
+///
+func (p *Predictor) TryShrinkMemory() {
+	C.PD_PredictorTryShrinkMemory(p.c)
+}
--- a/paddle/fluid/inference/goapi/predictor_test.go
+++ b/paddle/fluid/inference/goapi/predictor_test.go
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+import (
+	"io/ioutil"
+	"os"
+	"testing"
+)
+
+func TestNewPredictor(t *testing.T) {
+	t.Logf("Version:\n%+v", Version())
+	config := NewConfig()
+	config.SetModel("./mobilenetv1/inference.pdmodel", "./mobilenetv1/inference.pdiparams")
+	config.EnableUseGpu(100, 0)
+	predictor := NewPredictor(config)
+	inNames := predictor.GetInputNames()
+	t.Logf("InputNames:%+v", inNames)
+	outNames := predictor.GetOutputNames()
+	t.Logf("OutputNames:%+v", outNames)
+
+	inHandle := predictor.GetInputHandle(inNames[0])
+	inHandle.Reshape([]int32{1, 3, 224, 224})
+	t.Logf("inHandle name:%+v, shape:%+v", inHandle.Name(), inHandle.Shape())
+
+	var lod [][]uint
+	lod = append(lod, []uint{0, 1, 2})
+	lod = append(lod, []uint{1, 2, 3, 4})
+	inHandle.SetLod(lod)
+	t.Logf("inHandle Lod:%+v", inHandle.Lod())
+	data := make([]float32, numElements([]int32{1, 3, 224, 224}))
+	for i := 0; i < int(numElements([]int32{1, 3, 224, 224})); i++ {
+		data[i] = float32(i%255) * 0.1
+	}
+	inHandle.CopyFromCpu(data)
+	t.Logf("inHandle Type:%+v", inHandle.Type())
+
+	predictor.Run()
+
+	outHandle := predictor.GetOutputHandle(outNames[0])
+	t.Logf("outHandle name:%+v", outHandle.Name())
+
+	outShape := outHandle.Shape()
+	t.Logf("outHandle Shape:%+v", outShape)
+	outData := make([]float32, numElements(outShape))
+	outHandle.CopyToCpu(outData)
+	t.Log(outData)
+
+	cloned := predictor.Clone()
+	t.Logf("InputNum:%+v", cloned.GetInputNum())
+	t.Logf("OutputNum:%+v", cloned.GetInputNum())
+	cloned.ClearIntermediateTensor()
+}
+
+func TestFromBuffer(t *testing.T) {
+	modelFile, err := os.Open("./mobilenetv1/inference.pdmodel")
+	if err != nil {
+		t.Fatal(err)
+	}
+	paramsFile, err := os.Open("./mobilenetv1/inference.pdiparams")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer modelFile.Close()
+	defer paramsFile.Close()
+
+	model, err := ioutil.ReadAll(modelFile)
+	if err != nil {
+		t.Fatal(err)
+	}
+	params, err := ioutil.ReadAll(paramsFile)
+	if err != nil {
+		t.Fatal(err)
+	}
+	config := NewConfig()
+	config.SetModelBuffer(string(model), string(params))
+
+	predictor := NewPredictor(config)
+	inNames := predictor.GetInputNames()
+	outNames := predictor.GetOutputNames()
+	inHandle := predictor.GetInputHandle(inNames[0])
+	inHandle.Reshape([]int32{1, 3, 224, 224})
+	data := make([]float32, numElements([]int32{1, 3, 224, 224}))
+	for i := 0; i < int(numElements([]int32{1, 3, 224, 224})); i++ {
+		data[i] = float32(i%255) * 0.1
+	}
+	inHandle.CopyFromCpu(data)
+	predictor.Run()
+	outHandle := predictor.GetOutputHandle(outNames[0])
+	outShape := outHandle.Shape()
+	t.Logf("outHandle Shape:%+v", outShape)
+	outData := make([]float32, numElements(outShape))
+	outHandle.CopyToCpu(outData)
+	t.Log(outData)
+}
+
+func numElements(shape []int32) int32 {
+	n := int32(1)
+	for _, v := range shape {
+		n *= v
+	}
+	return n
+}
--- a/paddle/fluid/inference/goapi/tensor.go
+++ b/paddle/fluid/inference/goapi/tensor.go
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+// #include "pd_tensor.h"
+// #include "pd_utils.h"
+// #include "pd_types.h"
+// #include "pd_common.h"
+// #include "stdlib.h"
+import "C"
+import (
+	"fmt"
+	"reflect"
+	"unsafe"
+)
+
+type DataType C.PD_DataType
+
+const (
+	Unk     DataType = C.PD_DATA_UNK
+	Float32 DataType = C.PD_DATA_FLOAT32
+	Int32   DataType = C.PD_DATA_INT32
+	Int64   DataType = C.PD_DATA_INT64
+	Uint8   DataType = C.PD_DATA_UINT8
+	Int8    DataType = C.PD_DATA_INT8
+)
+
+type PlaceType C.PD_PlaceType
+
+const (
+	UnkPlace PlaceType = C.PD_PLACE_UNK
+	CpuPlace PlaceType = C.PD_PLACE_CPU
+	GpuPlace PlaceType = C.PD_PLACE_GPU
+	XpuPlace PlaceType = C.PD_PLACE_XPU
+)
+
+type Tensor struct {
+	c *C.PD_Tensor
+}
+
+///
+/// \brief Reset the shape of the tensor.
+/// Generally it's only used for the input tensor.
+///
+/// \param[in] shape The shape to set.
+///
+func (t *Tensor) Reshape(shape []int32) {
+	C.PD_TensorReshape(t.c, C.size_t(len(shape)), (*C.int32_t)(unsafe.Pointer(&shape[0])))
+}
+
+///
+/// \brief Get the tensor shape
+///
+/// \return The tensor shape.
+///
+func (t *Tensor) Shape() []int32 {
+	cData := C.PD_TensorGetShape(t.c)
+	length := int(cData.size)
+	defer C.PD_OneDimArrayInt32Destroy(cData)
+	return cvtToGoSliceInt32(length, cData.data)
+}
+
+///
+/// \brief Set the tensor lod information
+/// \param[in] pd_tensor tensor.
+/// \param[in] lod lod information.
+///
+func (t *Tensor) SetLod(lod [][]uint) {
+	cLod := (*C.struct_PD_TwoDimArraySize)(C.malloc(C.size_t(C.sizeof_struct_PD_TwoDimArraySize)))
+	length := len(lod)
+	cLod.size = C.size_t(uint(length))
+	var lodList = make([]*C.struct_PD_OneDimArraySize, length+1)
+
+	for i, v := range lod {
+		oneDimArray := (*C.struct_PD_OneDimArraySize)(C.malloc(C.size_t(C.sizeof_struct_PD_OneDimArraySize)))
+		defer C.free(unsafe.Pointer(oneDimArray))
+		tmpLength := len(v)
+		oneDimArray.size = C.size_t(uint(tmpLength))
+
+		tmpC := (*C.size_t)(C.malloc(C.size_t(C.sizeof_size_t * tmpLength)))
+		defer C.free(unsafe.Pointer(tmpC))
+		tmpSlice := (*[1 << 27]C.size_t)(unsafe.Pointer(tmpC))[:tmpLength:tmpLength]
+		for j, w := range v {
+			tmpSlice[j] = C.size_t(w)
+		}
+		oneDimArray.data = tmpC
+
+		lodList[i] = oneDimArray
+	}
+	cLod.data = (**C.struct_PD_OneDimArraySize)(unsafe.Pointer(&lodList[0]))
+	C.PD_TensorSetLod(t.c, cLod)
+	C.free(unsafe.Pointer(cLod))
+	// C.PD_TwoDimArraySizeDestroy(cLod)
+}
+
+///
+/// \brief Get the tensor lod information
+///
+/// \return the lod information.
+///
+func (t *Tensor) Lod() [][]uint {
+	cLod := C.PD_TensorGetLod(t.c)
+	length := int(cLod.size)
+	res := make([][]uint, length)
+	if length == 0 {
+		return res
+	}
+	cLodSlice := (*[1 << 27]*C.struct_PD_OneDimArraySize)(unsafe.Pointer(cLod.data))[:length:length]
+
+	for i := 0; i < length; i++ {
+		size := uint(cLodSlice[i].size)
+		lod := make([]uint, size)
+
+		tmpSlice := (*[1 << 27]C.size_t)(unsafe.Pointer(cLodSlice[i].data))[:size:size]
+		for j, v := range tmpSlice {
+			lod[j] = uint(v)
+		}
+
+		res[i] = lod
+	}
+
+	C.PD_TwoDimArraySizeDestroy(cLod)
+	return res
+}
+
+///
+/// \brief Get the tensor data type
+/// \param[in] pd_tensor tensor.
+/// \return the tensor data type.
+///
+func (t *Tensor) Type() DataType {
+	cDtype := C.PD_TensorGetDataType(t.c)
+	return DataType(cDtype)
+}
+
+///
+/// \brief Get the tensor name
+///
+/// \return the tensor name.
+///
+func (t *Tensor) Name() string {
+	return C.GoString(C.PD_TensorGetName(t.c))
+}
+
+///
+/// \brief Copy the host memory to tensor data.
+/// It's usually used to set the input tensor data.
+///
+/// \param[in] value
+///
+func (t *Tensor) CopyFromCpu(value interface{}) {
+	val := reflect.ValueOf(value)
+	dtype, _ := dataTypeOf(val)
+
+	switch dtype {
+	case Float32:
+		data := val.Interface().([]float32)
+		C.PD_TensorCopyFromCpuFloat(t.c, (*C.float)(unsafe.Pointer(&data[0])))
+	case Int32:
+		data := val.Interface().([]int32)
+		C.PD_TensorCopyFromCpuInt32(t.c, (*C.int32_t)(unsafe.Pointer(&data[0])))
+	case Int64:
+		data := val.Interface().([]int64)
+		C.PD_TensorCopyFromCpuInt64(t.c, (*C.int64_t)(unsafe.Pointer(&data[0])))
+	case Uint8:
+		data := val.Interface().([]uint8)
+		C.PD_TensorCopyFromCpuUint8(t.c, (*C.uint8_t)(unsafe.Pointer(&data[0])))
+	case Int8:
+		data := val.Interface().([]int8)
+		C.PD_TensorCopyFromCpuInt8(t.c, (*C.int8_t)(unsafe.Pointer(&data[0])))
+	}
+}
+
+///
+/// \brief Copy the tensor data to the host memory.
+/// It's usually used to get the output tensor data.
+///
+/// \param[value] data The tensor will copy the data to the address.
+///
+func (t *Tensor) CopyToCpu(value interface{}) {
+	val := reflect.ValueOf(value)
+	dtype, _ := dataTypeOf(val)
+
+	switch dtype {
+	case Float32:
+		data := val.Interface().([]float32)
+		C.PD_TensorCopyToCpuFloat(t.c, (*C.float)(unsafe.Pointer(&data[0])))
+	case Int32:
+		data := val.Interface().([]int32)
+		C.PD_TensorCopyToCpuInt32(t.c, (*C.int32_t)(unsafe.Pointer(&data[0])))
+	case Int64:
+		data := val.Interface().([]int64)
+		C.PD_TensorCopyToCpuInt64(t.c, (*C.int64_t)(unsafe.Pointer(&data[0])))
+	case Uint8:
+		data := val.Interface().([]uint8)
+		C.PD_TensorCopyToCpuUint8(t.c, (*C.uint8_t)(unsafe.Pointer(&data[0])))
+	case Int8:
+		data := val.Interface().([]int8)
+		C.PD_TensorCopyToCpuInt8(t.c, (*C.int8_t)(unsafe.Pointer(&data[0])))
+	}
+}
+
+var types = []struct {
+	typ      reflect.Type
+	dataType C.PD_DataType
+}{
+	{reflect.TypeOf(float32(0)), C.PD_DATA_FLOAT32},
+	{reflect.TypeOf(int32(0)), C.PD_DATA_INT32},
+	{reflect.TypeOf(int64(0)), C.PD_DATA_INT64},
+	{reflect.TypeOf(uint8(0)), C.PD_DATA_UINT8},
+	{reflect.TypeOf(int8(0)), C.PD_DATA_INT8},
+}
+
+func dataTypeOf(val reflect.Value) (dt DataType, err error) {
+	typ := val.Type()
+	for typ.Kind() == reflect.Array || typ.Kind() == reflect.Slice {
+		if val.Len() > 0 {
+			val = val.Index(0)
+		}
+		typ = typ.Elem()
+	}
+	for _, t := range types {
+		if typ.Kind() == t.typ.Kind() {
+			return DataType(t.dataType), nil
+		}
+	}
+	return dt, fmt.Errorf("unsupported type %v", typ)
+}
--- a/paddle/fluid/inference/goapi/test.sh
+++ b/paddle/fluid/inference/goapi/test.sh
+#!/bin/bash
+
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 1. download the mobilenetv1 model to test config and predictor
+if [ ! -d mobilenetv1 ]; then
+    wget https://paddle-inference-dist.bj.bcebos.com/Paddle-Inference-Demo/mobilenetv1.tgz
+    tar xzf mobilenetv1.tgz 
+fi
+
+# 2. set LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$PWD/paddle_inference_c/paddle/lib
+
+# 3. go test
+go test -v ./...
--- a/go/demo/mobilenet_cxx.cc
+++ b/go/demo/mobilenet_cxx.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -11,37 +11,51 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include <paddle_inference_api.h>
-#include <fstream>
-#include <iostream>

-void SetConfig(paddle::AnalysisConfig *);
+package paddle

-int main(int argc, char *argv[]) {
-  paddle::AnalysisConfig config;
-  SetConfig(&config);
-  auto predictor = paddle::CreatePaddlePredictor(config);
-  auto input_name = predictor->GetInputNames()[0];
-  auto input = predictor->GetInputTensor(input_name);
-  std::cout << predictor->GetOutputNames()[0] << std::endl;
-  std::vector<int> shape{1, 3, 300, 300};
-  input->Reshape(std::move(shape));
-  std::vector<float> data(1 * 300 * 300 * 3);
-  std::ifstream fin("data/data.txt");
-  for (int i = 0; i < data.size(); i++) {
-    fin >> data[i];
-  }
+// #include <stdint.h>
+// #include <stdlib.h>
+import "C"
+import (
+	"unsafe"
+)

-  input->copy_from_cpu(data.data());
-  predictor->ZeroCopyRun();
-  auto output_name = predictor->GetOutputNames()[0];
-  auto output = predictor->GetOutputTensor(output_name);
-  return 0;
+func cvtPDBoolToGo(b C.int8_t) bool {
+	var cFalse C.int8_t
+	if b != cFalse {
+		return true
+	}
+	return false
 }

-void SetConfig(paddle::AnalysisConfig *config) {
-  config->SetModel("data/model/__model__", "data/model/__params__");
-  config->SwitchUseFeedFetchOps(false);
-  config->SwitchSpecifyInputNames(true);
-  config->SwitchIrOptim(false);
+func cvtGoBoolToPD(b bool) C.int8_t {
+	if b == false {
+		return 0
+	}
+	return 1
+}
+
+func cvtToGoSliceString(length int, str **C.char) []string {
+	if str == nil {
+		return nil
+	}
+	tmpSlice := (*[1 << 27]*C.char)(unsafe.Pointer(str))[:length:length]
+	goStrings := make([]string, length)
+	for i, s := range tmpSlice {
+		goStrings[i] = C.GoString(s)
+	}
+	return goStrings
+}
+
+func cvtToGoSliceInt32(length int, data *C.int32_t) []int32 {
+	if data == nil {
+		return nil
+	}
+	tmpSlice := (*[1 << 27]C.int32_t)(unsafe.Pointer(data))[:length:length]
+	res := make([]int32, length)
+	for i, s := range tmpSlice {
+		res[i] = int32(s)
+	}
+	return res
 }
--- a/go/paddle/common.go
+++ b/go/paddle/common.go
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,29 +14,13 @@

 package paddle

-// #cgo CFLAGS: -I${SRCDIR}/../paddle_c/paddle/include
-// #cgo LDFLAGS: -L${SRCDIR}/../paddle_c/paddle/lib -lpaddle_inference_c
-// #include <stdbool.h>
-// #include <paddle_c_api.h>
+// #include "pd_common.h"
+// #include "pd_predictor.h"
+// #include "pd_types.h"
+// #include "pd_utils.h"
 import "C"
-import "fmt"

-func ConvertCBooleanToGo(b C.bool) bool {
-	var c_false C.bool
-	if b != c_false {
-		return true
-	}
-	return false
-}
-
-func numel(shape []int32) int32 {
-	n := int32(1)
-	for _, d := range shape {
-		n *= d
-	}
-	return n
-}
-
-func bug(format string, args ...interface{}) error {
-	return fmt.Errorf("Bug %v", fmt.Sprintf(format, args...))
+func Version() string {
+	cVersion := C.PD_GetVersion()
+	return C.GoString(cVersion)
 }
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -1985,6 +1985,26 @@ EOF
    fi
 }

+function test_go_inference_api() {
+    cat <<EOF
+    ========================================
+    Testing go inference api ...
+    ========================================
+EOF
+
+    # ln paddle_inference_c lib
+    cd ${PADDLE_ROOT}/build
+    ln -s ${PADDLE_ROOT}/build/paddle_inference_c_install_dir/ ${PADDLE_ROOT}/paddle/fluid/inference/goapi/paddle_inference_c
+
+    # run go test
+    cd ${PADDLE_ROOT}/paddle/fluid/inference/goapi
+    bash test.sh
+    EXIT_CODE=$?
+    if [[ "$EXIT_CODE" != "0" ]]; then
+        exit 8;
+    fi
+}
+
 function test_fluid_lib_train() {
    cat <<EOF
    ========================================
@@ -2226,6 +2246,8 @@ function main() {
        gen_fluid_lib ${parallel_number}
        test_fluid_lib
        #test_fluid_lib_train
+        #go inference test
+        test_go_inference_api
        ;;
      test_train)
        gen_fluid_lib ${parallel_number}

--- a/tools/dockerfile/Dockerfile.ubuntu
+++ b/tools/dockerfile/Dockerfile.ubuntu
@@ -123,7 +123,7 @@ RUN rm Python-$version.tgz setuptools-40.6.2.zip setuptools-50.3.2.zip pip-20.0.

 # Install Go and glide
 WORKDIR /home
-RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.8.1.linux-amd64.tar.gz | \
+RUN wget -qO- https://paddle-ci.gz.bcebos.com/go1.15.12.linux-amd64.tar.gz | \
    tar -xz -C /usr/local && \
    mkdir /root/gopath && \
    mkdir /root/gopath/bin && \