transformer c++版本预测(gpu) 出现内存问题
Created by: yxzero
- 版本、环境信息: 1)PaddlePaddle版本:1.4.1 2)GPU:k40 -预测信息 1)C++预测bcloud
WORKROOT('../../../') PLATFORM('centos6u3') COMPILER('gcc482') CXXFLAGS('-std=c++11 -Os -g -pipe -W -Wall -fPIC') LDFLAGS('-lpthread -lcrypto -lrt -lm -lcrypto -lcrypt -ldl -lz') INCPATHS('../../paddlepaddle/paddle/third_party/install/gflags/include') INCPATHS('../../paddlepaddle/paddle/third_party/install/glog/include') CONFIGS("baidu/paddlepaddle/paddle@paddle_prebuilt_cuda-1-0-0-4_PD_BL@git_tag", NeedPreBuild()) LIBS(GLOB('../../paddlepaddle/paddle/third_party/install/mklml/lib/.so')) LIBS(GLOB('../../paddlepaddle/paddle/third_party/install/tensorrt/lib/.a')) LDFLAGS('-L/opt/compiler/cuda-8.0/lib64 -lcudart') LIBS('../../paddlepaddle/paddle/lib/libpaddle_fluid.a') Libs('../../paddlepaddle/paddle/lib/libpaddle_fluid.so') Libs(GLOB('../../paddlepaddle/paddle/third_party/install//lib/.a')) Libs(GLOB('../../paddlepaddle/paddle/third_party/install/mklml/lib/.so')) LIBS(GLOB('../../paddlepaddle/paddle/third_party/install//lib/*.a')) UTApplication('transformer_e2e', Sources('./tests/transformer_e2e.cc'))
预测代码:
void Main1(int batch_size) {
AnalysisConfig config;
config.SetModel(FLAGS_modeldir + "/__model__", FLAGS_modeldir + "/__params__");
// gpu
config.EnableUseGpu(10 /*the initial size of the GPU memory pool in MB*/, 3 /*gpu_id*/);
//config.device = 3;
//config.eager_delete_tensor_gb=0.0;
//config.fast_eager_deletion_mode=1;
//config.EnableMemoryOptim();
// gpu end
// cpu
//config.DisableGpu();
//config.EnableMKLDNN();
//config.SetCpuMathLibraryNumThreads(10);
// cpu end
config.SwitchUseFeedFetchOps(false);
config.SwitchSpecifyInputNames(true);
//config.SwitchIrDebug(true);
LOG(INFO) << "create predictor";
std::unique_ptr<paddle::PaddlePredictor> predictor =
CreatePaddlePredictor(config);
//std::vector<PaddleTensor> input_slots;
LOG(INFO) << "open data";
DataReader reader(FLAGS_datapath);
reader.get_word_dict();
//std::vector<PaddleTensor> outputs;
LOG(INFO) << "run all the test data";
double whole_time = 0;
Timer timer;
int num_batches = 0;
std::vector<std::string> source_query_vec;
std::vector<float> print_f;
std::vector<int64_t> print_int;
while (reader.NextBatch(predictor, FLAGS_batch_size, source_query_vec)) {
timer.tic();
CHECK(predictor->ZeroCopyRun());
auto output_names = predictor->GetOutputNames();
//LOG(INFO) << source_query_vec[0] + "\n";
//LOG(INFO) << output_names[0] << " " << output_names[1];
//LOG(INFO) << DescribeTensor(predictor->GetOutputTensor(output_names[0]), print_int);
//LOG(INFO) << DescribeTensor(predictor->GetOutputTensor(output_names[1]), print_f);
std::vector<DataResult> dataresultvec;
get_result_tensor(predictor->GetOutputTensor(output_names[0]),
predictor->GetOutputTensor(output_names[1]),
dataresultvec,
reader.num2word_dict);
for (int sour_idx = 0; sour_idx < source_query_vec.size(); sour_idx++) {
std::string out_str = source_query_vec[sour_idx];
for (int i = 0; i < FLAGS_beam_search; ++i) {
out_str += "\t" + dataresultvec[i].reslult_q + "\001" + to_string(dataresultvec[i].score);
}
LOG(INFO) << out_str << "\n";
}
whole_time += timer.toc();
num_batches++;
source_query_vec.clear();
//break;
}
LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size;
LOG(INFO) << "batch_size:" << batch_size <<", time: " << whole_time;
LOG(INFO) << "average latency of each sample: " << whole_time / num_batches / FLAGS_batch_size;
//for (auto &out : outputs) {
// size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
// [](int a, int b) { return a * b; });
// int64_t *data = static_cast<int64_t *>(out.data.data());
// for (size_t i = 0; i < size; i++) {
// VLOG(3) << data[i];
// }
//}
}
} // namespace inference
} // namespace paddle