transformer c++版本预测(gpu) 出现内存问题 (#19229) · Issue · PaddlePaddle / Paddle

transformer c++版本预测(gpu) 出现内存问题

Created by: yxzero

报错信息：

版本、环境信息： 1）PaddlePaddle版本：1.4.1 2）GPU：k40 -预测信息 1）C++预测bcloud

WORKROOT('../../../') PLATFORM('centos6u3') COMPILER('gcc482') CXXFLAGS('-std=c++11 -Os -g -pipe -W -Wall -fPIC') LDFLAGS('-lpthread -lcrypto -lrt -lm -lcrypto -lcrypt -ldl -lz') INCPATHS('../../paddlepaddle/paddle/third_party/install/gflags/include') INCPATHS('../../paddlepaddle/paddle/third_party/install/glog/include') CONFIGS("baidu/paddlepaddle/paddle@paddle_prebuilt_cuda-1-0-0-4_PD_BL@git_tag", NeedPreBuild()) LIBS(GLOB('../../paddlepaddle/paddle/third_party/install/mklml/lib/.so')) LIBS(GLOB('../../paddlepaddle/paddle/third_party/install/tensorrt/lib/.a')) LDFLAGS('-L/opt/compiler/cuda-8.0/lib64 -lcudart') LIBS('../../paddlepaddle/paddle/lib/libpaddle_fluid.a') Libs('../../paddlepaddle/paddle/lib/libpaddle_fluid.so') Libs(GLOB('../../paddlepaddle/paddle/third_party/install//lib/.a')) Libs(GLOB('../../paddlepaddle/paddle/third_party/install/mklml/lib/.so')) LIBS(GLOB('../../paddlepaddle/paddle/third_party/install//lib/*.a')) UTApplication('transformer_e2e', Sources('./tests/transformer_e2e.cc'))

预测代码：

void Main1(int batch_size) {
    AnalysisConfig config;
    config.SetModel(FLAGS_modeldir + "/__model__", FLAGS_modeldir + "/__params__");
    // gpu
    config.EnableUseGpu(10 /*the initial size of the GPU memory pool in MB*/,  3 /*gpu_id*/);
    //config.device = 3;
    //config.eager_delete_tensor_gb=0.0;
    //config.fast_eager_deletion_mode=1;
    //config.EnableMemoryOptim();
    // gpu end

    // cpu
    //config.DisableGpu();
    //config.EnableMKLDNN();
    //config.SetCpuMathLibraryNumThreads(10);
    // cpu end

    config.SwitchUseFeedFetchOps(false);
    config.SwitchSpecifyInputNames(true);
    //config.SwitchIrDebug(true);
    LOG(INFO) << "create predictor";
    std::unique_ptr<paddle::PaddlePredictor> predictor =
        CreatePaddlePredictor(config);

    //std::vector<PaddleTensor> input_slots;
    LOG(INFO) << "open data";
    DataReader reader(FLAGS_datapath);
    reader.get_word_dict();

    //std::vector<PaddleTensor> outputs;
    LOG(INFO) << "run all the test data";
    double whole_time = 0;
    Timer timer;
    int num_batches = 0;
    std::vector<std::string> source_query_vec;

    std::vector<float> print_f;
    std::vector<int64_t> print_int;
    while (reader.NextBatch(predictor, FLAGS_batch_size, source_query_vec)) {
        timer.tic();
        CHECK(predictor->ZeroCopyRun());
        auto output_names = predictor->GetOutputNames();
        //LOG(INFO) << source_query_vec[0] + "\n";
        //LOG(INFO) << output_names[0] << " " << output_names[1];
        //LOG(INFO) << DescribeTensor(predictor->GetOutputTensor(output_names[0]), print_int);
        //LOG(INFO) << DescribeTensor(predictor->GetOutputTensor(output_names[1]), print_f);
        std::vector<DataResult> dataresultvec;
        get_result_tensor(predictor->GetOutputTensor(output_names[0]),
            predictor->GetOutputTensor(output_names[1]),
            dataresultvec,
            reader.num2word_dict);
        for (int sour_idx = 0; sour_idx < source_query_vec.size(); sour_idx++) {
            std::string out_str = source_query_vec[sour_idx];
            for (int i = 0; i < FLAGS_beam_search; ++i) {
                out_str += "\t" + dataresultvec[i].reslult_q + "\001" + to_string(dataresultvec[i].score);
            }
            LOG(INFO) << out_str << "\n";
        }
        whole_time += timer.toc();
        num_batches++;
        source_query_vec.clear();
        //break;
    }
    LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size;
    LOG(INFO) << "batch_size:" << batch_size <<", time: " << whole_time;
    LOG(INFO) << "average latency of each sample: " << whole_time / num_batches / FLAGS_batch_size;
    //for (auto &out : outputs) {
    //    size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
    //            [](int a, int b) { return a * b; });
    //    int64_t *data = static_cast<int64_t *>(out.data.data());
    //    for (size_t i = 0; i < size; i++) {
    //        VLOG(3) << data[i];
    //    }
    //}
}
}  // namespace inference
}  // namespace paddle

PaddlePaddle / Paddle 1 年多 前同步成功

transformer c++版本预测(gpu) 出现内存问题

PaddlePaddle / Paddle
1 年多前同步成功