diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 779ede5e460d0ceb6fd404c4a32374f9f9d92088..d43ecc722ea3c78541835fb3f5efc9a3529fbf11 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -40,23 +40,20 @@ function (inference_analysis_test TARGET) endif(WITH_TESTING) endfunction(inference_analysis_test) -set(DITU_RNN_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fmodel.tar.gz") -set(DITU_RNN_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fdata.txt.tar.gz") -set(DITU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/ditu_rnn" CACHE PATH "Ditu RNN model and data root." FORCE) -set(DITU_RNN_MODEL ${DITU_INSTALL_DIR}/model) -set(DITU_RNN_DATA ${DITU_INSTALL_DIR}/data.txt) - -function (inference_download_and_uncompress target url gz_filename) +function (inference_download_and_uncompress install_dir url gz_filename) message(STATUS "Download inference test stuff ${gz_filename} from ${url}") - execute_process(COMMAND bash -c "mkdir -p ${DITU_INSTALL_DIR}") - execute_process(COMMAND bash -c "cd ${DITU_INSTALL_DIR} && wget -q ${url}") - execute_process(COMMAND bash -c "cd ${DITU_INSTALL_DIR} && tar xzf ${gz_filename}") + execute_process(COMMAND bash -c "mkdir -p ${install_dir}") + execute_process(COMMAND bash -c "cd ${install_dir} && wget -q ${url}") + execute_process(COMMAND bash -c "cd ${install_dir} && tar xzf ${gz_filename}") message(STATUS "finish downloading ${gz_filename}") endfunction(inference_download_and_uncompress) +set(DITU_RNN_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fmodel.tar.gz") +set(DITU_RNN_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/ditu_rnn_fluid%2Fdata.txt.tar.gz") +set(DITU_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/ditu_rnn" CACHE PATH "Ditu RNN model and data root." FORCE) if (NOT EXISTS ${DITU_INSTALL_DIR}) - inference_download_and_uncompress(ditu_rnn_model ${DITU_RNN_MODEL_URL} "ditu_rnn_fluid%2Fmodel.tar.gz") - inference_download_and_uncompress(ditu_rnn_data ${DITU_RNN_DATA_URL} "ditu_rnn_fluid%2Fdata.txt.tar.gz") + inference_download_and_uncompress(${DITU_INSTALL_DIR} ${DITU_RNN_MODEL_URL} "ditu_rnn_fluid%2Fmodel.tar.gz") + inference_download_and_uncompress(${DITU_INSTALL_DIR} ${DITU_RNN_DATA_URL} "ditu_rnn_fluid%2Fdata.txt.tar.gz") endif() inference_analysis_test(test_analyzer SRCS analyzer_tester.cc @@ -87,3 +84,17 @@ inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_ inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc) inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc) inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc) + +set(CHINESE_NER_MODEL_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner_model.tar.gz") +set(CHINESE_NER_DATA_URL "http://paddle-inference-dist.bj.bcebos.com/chinese_ner-data.txt.tar.gz") +set(CHINESE_NER_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo/chinese_ner" CACHE PATH "Chinese ner model and data root." FORCE) +if (NOT EXISTS ${CHINESE_NER_INSTALL_DIR}) + inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_MODEL_URL} "chinese_ner_model.tar.gz") + inference_download_and_uncompress(${CHINESE_NER_INSTALL_DIR} ${CHINESE_NER_DATA_URL} "chinese_ner-data.txt.tar.gz") +endif() + +inference_analysis_test(test_chinese_ner SRCS chinese_ner_tester.cc + EXTRA_DEPS paddle_inference_api paddle_fluid_api + ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model + --infer_model=${CHINESE_NER_INSTALL_DIR}/model + --infer_data=${CHINESE_NER_INSTALL_DIR}/data.txt) diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 263fbb044902e886c357835ab298b4f646c7a3ed..cccd6b55ad493f9cb0eeedeab02c1a3970a55fb5 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -196,13 +196,13 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, minute_tensor.lod.assign({one_batch.lod3}); // clang-format on // assign data - TensorAssignData(&lod_attention_tensor, - std::vector>({{0, 0}})); + TensorAssignData(&lod_attention_tensor, + std::vector>({{0, 0}})); std::vector tmp_zeros(batch_size * 15, 0.); - TensorAssignData(&init_zero_tensor, {tmp_zeros}); - TensorAssignData(&lod_tensor_tensor, one_batch.rnn_link_data); - TensorAssignData(&week_tensor, one_batch.rnn_week_datas); - TensorAssignData(&minute_tensor, one_batch.rnn_minute_datas); + TensorAssignData(&init_zero_tensor, {tmp_zeros}); + TensorAssignData(&lod_tensor_tensor, one_batch.rnn_link_data); + TensorAssignData(&week_tensor, one_batch.rnn_week_datas); + TensorAssignData(&minute_tensor, one_batch.rnn_minute_datas); // Set inputs. auto init_zero_tensor1 = init_zero_tensor; init_zero_tensor1.name = "hidden_init"; diff --git a/paddle/fluid/inference/analysis/chinese_ner_tester.cc b/paddle/fluid/inference/analysis/chinese_ner_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..9088a29d504309bc2c7b96fd49a0bf44e7cf0da9 --- /dev/null +++ b/paddle/fluid/inference/analysis/chinese_ner_tester.cc @@ -0,0 +1,154 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" +#include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/platform/profiler.h" + +DEFINE_string(infer_model, "", "model path"); +DEFINE_string(infer_data, "", "data path"); +DEFINE_int32(batch_size, 10, "batch size."); +DEFINE_int32(repeat, 1, "Running the inference program repeat times."); + +namespace paddle { +namespace inference { + +struct DataRecord { + std::vector> word_data_all, mention_data_all; + std::vector> rnn_word_datas, rnn_mention_datas; + std::vector lod; // two inputs have the same lod info. + size_t batch_iter{0}; + size_t batch_size{1}; + DataRecord() = default; + explicit DataRecord(const std::string &path, int batch_size = 1) + : batch_size(batch_size) { + Load(path); + } + DataRecord NextBatch() { + DataRecord data; + size_t batch_end = batch_iter + batch_size; + // NOTE skip the final batch, if no enough data is provided. + if (batch_end <= word_data_all.size()) { + data.word_data_all.assign(word_data_all.begin() + batch_iter, + word_data_all.begin() + batch_end); + data.mention_data_all.assign(mention_data_all.begin() + batch_iter, + mention_data_all.begin() + batch_end); + // Prepare LoDs + data.lod.push_back(0); + CHECK(!data.word_data_all.empty()); + CHECK(!data.mention_data_all.empty()); + CHECK_EQ(data.word_data_all.size(), data.mention_data_all.size()); + for (size_t j = 0; j < data.word_data_all.size(); j++) { + data.rnn_word_datas.push_back(data.word_data_all[j]); + data.rnn_mention_datas.push_back(data.mention_data_all[j]); + // calculate lod + data.lod.push_back(data.lod.back() + data.word_data_all[j].size()); + } + } + batch_iter += batch_size; + return data; + } + void Load(const std::string &path) { + std::ifstream file(path); + std::string line; + int num_lines = 0; + while (std::getline(file, line)) { + num_lines++; + std::vector data; + split(line, ';', &data); + // load word data + std::vector word_data; + split_to_int64(data[1], ' ', &word_data); + // load mention data + std::vector mention_data; + split_to_int64(data[3], ' ', &mention_data); + word_data_all.push_back(std::move(word_data)); + mention_data_all.push_back(std::move(mention_data)); + } + } +}; + +void PrepareInputs(std::vector *input_slots, DataRecord *data, + int batch_size) { + PaddleTensor lod_word_tensor, lod_mention_tensor; + lod_word_tensor.name = "word"; + lod_mention_tensor.name = "mention"; + auto one_batch = data->NextBatch(); + int size = one_batch.lod[one_batch.lod.size() - 1]; // token batch size + lod_word_tensor.shape.assign({size, 1}); + lod_word_tensor.lod.assign({one_batch.lod}); + lod_mention_tensor.shape.assign({size, 1}); + lod_mention_tensor.lod.assign({one_batch.lod}); + // assign data + TensorAssignData(&lod_word_tensor, one_batch.rnn_word_datas); + TensorAssignData(&lod_mention_tensor, one_batch.rnn_mention_datas); + // Set inputs. + input_slots->assign({lod_word_tensor, lod_mention_tensor}); + for (auto &tensor : *input_slots) { + tensor.dtype = PaddleDType::INT64; + } +} + +// the first inference result +const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, + 48, 39, 38, 16, 25}; + +void TestChineseNERPrediction() { + NativeConfig config; + config.prog_file = FLAGS_infer_model + "/__model__"; + config.param_file = FLAGS_infer_model + "/param"; + config.use_gpu = false; + config.device = 0; + config.specify_input_name = true; + + auto predictor = + CreatePaddlePredictor(config); + std::vector input_slots; + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + // Prepare inputs. + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + std::vector outputs; + + Timer timer; + timer.tic(); + for (int i = 0; i < FLAGS_repeat; i++) { + predictor->Run(input_slots, &outputs); + } + LOG(INFO) << "===========profile result==========="; + LOG(INFO) << "batch_size: " << FLAGS_batch_size + << ", repeat: " << FLAGS_repeat + << ", latency: " << timer.toc() / FLAGS_repeat << "ms"; + LOG(INFO) << "====================================="; + + PADDLE_ENFORCE(outputs.size(), 1UL); + auto &out = outputs[0]; + size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, + [](int a, int b) { return a * b; }); + PADDLE_ENFORCE_GT(size, 0); + int64_t *result = static_cast(out.data.data()); + for (size_t i = 0; i < std::min(11UL, size); i++) { + PADDLE_ENFORCE(result[i], chinese_ner_result_data[i]); + } +} + +// Directly infer with the original model. +TEST(Analyzer, Chinese_ner) { TestChineseNERPrediction(); } + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 7d7a14ed08a9d04c15ece9fc81050d7cb901e02d..530274f0c9262b6ed0e43766606585c8459eabb9 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -62,14 +62,14 @@ void NativePaddlePredictor::PrepareFeedFetch() { for (auto *op : inference_program_->Block(0).AllOps()) { if (op->Type() == "feed") { int idx = boost::get(op->GetAttr("col")); - if (feeds_.size() <= idx) { + if (feeds_.size() <= (size_t)idx) { feeds_.resize(idx + 1); } feeds_[idx] = op; feed_names_[op->Output("Out")[0]] = idx; } else if (op->Type() == "fetch") { int idx = boost::get(op->GetAttr("col")); - if (fetchs_.size() <= idx) { + if (fetchs_.size() <= (size_t)idx) { fetchs_.resize(idx + 1); } fetchs_[idx] = op; @@ -222,6 +222,62 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, } return true; } +template +void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch, + PaddleTensor *output) { + std::vector shape; + auto dims_i = fetch.dims(); + auto lod = fetch.lod(); + const T *output_ptr = fetch.data(); + auto num = fetch.numel(); + std::vector data; + if (0 == lod.size()) { + std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); + for (int j = 0; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } else { + // for batch detection + // image[0] -> output[0] shape {145, 6} + // image[1] -> output[1] shape {176, 6} + // then, + // the batch output shape {321, 6} + // the lod {{0, 145, 321}} + // so we should append output[0] to {176, 6} + size_t max_dim = 0; + for (size_t j = 1; j < lod[0].size(); j++) { + max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); + } + size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); + if (max_dim > 0) { + data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); + } + for (size_t j = 1; j < lod[0].size(); j++) { + size_t start = lod[0][j - 1] * common_dim; + size_t end = lod[0][j] * common_dim; + if (end > start) { + std::copy(output_ptr + start, output_ptr + end, + data.begin() + (j - 1) * max_dim * common_dim); + } + } + shape.push_back(lod[0].size() - 1); + shape.push_back(max_dim); + for (int j = 1; j < dims_i.size(); ++j) { + shape.push_back(dims_i[j]); + } + } + + output->shape = shape; + auto &buffer = output->data; + if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) { + buffer.Resize(sizeof(T) * data.size()); + } + std::memcpy(buffer.data(), data.data(), buffer.length()); + // copy LoD + for (const auto &level : fetch.lod()) { + output->lod.emplace_back(level); + } +} bool NativePaddlePredictor::GetFetch(std::vector *outputs, framework::Scope *scope) { @@ -229,70 +285,20 @@ bool NativePaddlePredictor::GetFetch(std::vector *outputs, outputs->resize(fetchs_.size()); for (size_t i = 0; i < fetchs_.size(); ++i) { int idx = boost::get(fetchs_[i]->GetAttr("col")); - PADDLE_ENFORCE(idx == i); - framework::LoDTensor &output = + PADDLE_ENFORCE((size_t)idx == i); + framework::LoDTensor &fetch = framework::GetFetchVariable(*scope, "fetch", idx); - // TODO(panyx0718): Support fetch of other types. - if (output.type() != typeid(float)) { - LOG(ERROR) << "only support fetching float now."; - return false; - } - - std::vector shape; - auto dims_i = output.dims(); - auto lod = output.lod(); - const float *output_ptr = output.data(); - // const int64_t* output_ptr = fetchs[i].data(); - auto num = output.numel(); - std::vector data; - if (0 == lod.size()) { - std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); - for (int j = 0; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } + auto type = fetch.type(); + auto output = &(outputs->at(i)); + if (type == typeid(float)) { + GetFetchOne(fetch, output); + output->dtype = PaddleDType::FLOAT32; + } else if (type == typeid(int64_t)) { + GetFetchOne(fetch, output); + output->dtype = PaddleDType::INT64; } else { - // for batch detection - // image[0] -> output[0] shape {145, 6} - // image[1] -> output[1] shape {176, 6} - // then, - // the batch output shape {321, 6} - // the lod {{0, 145, 321}} - // so we should append output[0] to {176, 6} - size_t max_dim = 0; - for (size_t j = 1; j < lod[0].size(); j++) { - max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); - } - size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); - if (max_dim > 0) { - data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); - } - for (size_t j = 1; j < lod[0].size(); j++) { - size_t start = lod[0][j - 1] * common_dim; - size_t end = lod[0][j] * common_dim; - if (end > start) { - std::copy(output_ptr + start, output_ptr + end, - data.begin() + (j - 1) * max_dim * common_dim); - } - } - shape.push_back(lod[0].size() - 1); - shape.push_back(max_dim); - for (int j = 1; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } - - outputs->at(i).shape = shape; - auto &buffer = outputs->at(i).data; - if (buffer.empty() || buffer.length() < sizeof(float) * data.size()) { - buffer.Resize(sizeof(float) * data.size()); - } - std::memcpy(buffer.data(), data.data(), buffer.length()); - // copy LoD - for (const auto &level : output.lod()) { - outputs->at(i).lod.emplace_back(level); + LOG(ERROR) << "unknown type, only support float32 and int64 now."; } - outputs->at(i).dtype = PaddleDType::FLOAT32; - // TODO(panyx0718): support other types? fill tensor name? avoid a copy. } return true; } diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index 4eff9204eba987aed11e4066fa7b6f6cc610a763..ec801c58857e716241d28404510530e551ed25aa 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -51,7 +51,9 @@ class NativePaddlePredictor : public PaddlePredictor { framework::Scope *scope); bool GetFetch(std::vector *output_data, framework::Scope *scope); - + template + void GetFetchOne(const framework::LoDTensor &fetchs, + PaddleTensor *output_data); void PrepareFeedFetch(); NativeConfig config_; diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index e44b1b74bc385c015fa6efcebac05359a810cbc1..8eac449a1081330877eac6d4f40c064533b0acab 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -68,6 +68,13 @@ static void split_to_float(const std::string &str, char sep, std::transform(pieces.begin(), pieces.end(), std::back_inserter(*fs), [](const std::string &v) { return std::stof(v); }); } +static void split_to_int64(const std::string &str, char sep, + std::vector *is) { + std::vector pieces; + split(str, sep, &pieces); + std::transform(pieces.begin(), pieces.end(), std::back_inserter(*is), + [](const std::string &v) { return std::stoi(v); }); +} template std::string to_string(const std::vector &vec) { std::stringstream ss; @@ -84,14 +91,18 @@ template <> std::string to_string>>( const std::vector>> &vec); -// clang-format off -static void TensorAssignData(PaddleTensor *tensor, const std::vector> &data) { +template +static void TensorAssignData(PaddleTensor *tensor, + const std::vector> &data) { // Assign buffer - int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1, [](int a, int b) { return a * b; }); - tensor->data.Resize(sizeof(float) * dim); + int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1, + [](int a, int b) { return a * b; }); + tensor->data.Resize(sizeof(T) * dim); int c = 0; for (const auto &f : data) { - for (float v : f) { static_cast(tensor->data.data())[c++] = v; } + for (T v : f) { + static_cast(tensor->data.data())[c++] = v; + } } }