提交 9c7fde45 编写于 作者: L luotao1

enhance test_analyzer to profile ditu inference demo

上级 decda738
...@@ -163,8 +163,8 @@ TEST(GraphPatternDetecter, MultiSubgraph) { ...@@ -163,8 +163,8 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
// 3. Detect op2 -> var2 -> op4 // 3. Detect op2 -> var2 -> op4
// 4. Detect op2 -> var3 -> op5 // 4. Detect op2 -> var3 -> op5
// But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2 // But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2
ASSERT_GE(count, 1UL); ASSERT_GE(count, 1);
ASSERT_LE(count, 2UL); ASSERT_LE(count, 2);
} }
} // namespace ir } // namespace ir
......
...@@ -139,7 +139,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) { ...@@ -139,7 +139,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) {
} }
auto write_iter = id_to_index_.find(key); auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) { if (write_iter == id_to_index_.end()) {
size_t row_num = rows_.size(); int row_num = rows_.size();
if (row_num == value_->dims()[0]) { if (row_num == value_->dims()[0]) {
rwlock_->UNLock(); rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num); PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
...@@ -182,7 +182,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, ...@@ -182,7 +182,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
"output tensor should have the same shape with table " "output tensor should have the same shape with table "
"except the dims[0]."); "except the dims[0].");
for (size_t i = 0; i < ids.numel(); ++i) { for (int i = 0; i < ids.numel(); ++i) {
int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown); int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown);
framework::VisitDataType( framework::VisitDataType(
framework::ToDataType(value_->type()), framework::ToDataType(value_->type()),
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN"); DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN");
DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN"); DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN");
DEFINE_int32(batch_size, 10, "batch size.");
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
namespace paddle { namespace paddle {
namespace inference { namespace inference {
...@@ -92,7 +94,7 @@ struct DataRecord { ...@@ -92,7 +94,7 @@ struct DataRecord {
size_t batch_iter{0}; size_t batch_iter{0};
size_t batch_size{1}; size_t batch_size{1};
DataRecord() = default; DataRecord() = default;
DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
Load(path); Load(path);
} }
...@@ -165,7 +167,6 @@ struct DataRecord { ...@@ -165,7 +167,6 @@ struct DataRecord {
}; };
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
int batch_size) { int batch_size) {
// DataRecord data(FLAGS_datapath, batch_size);
PaddleTensor lod_attention_tensor, init_zero_tensor, lod_tensor_tensor, PaddleTensor lod_attention_tensor, init_zero_tensor, lod_tensor_tensor,
week_tensor, minute_tensor; week_tensor, minute_tensor;
lod_attention_tensor.name = "data_lod_attention"; lod_attention_tensor.name = "data_lod_attention";
...@@ -174,28 +175,33 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -174,28 +175,33 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
week_tensor.name = "week"; week_tensor.name = "week";
minute_tensor.name = "minute"; minute_tensor.name = "minute";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
// clang-format off std::vector<int> rnn_link_data_shape(
std::vector<int> rnn_link_data_shape {static_cast<int>(one_batch.rnn_link_data.size()),
({static_cast<int>(one_batch.rnn_link_data.size()), static_cast<int>(one_batch.rnn_link_data.front().size())}); static_cast<int>(one_batch.rnn_link_data.front().size())});
lod_attention_tensor.shape.assign({1, 2}); lod_attention_tensor.shape.assign({1, 2});
lod_attention_tensor.lod.assign({one_batch.lod1, one_batch.lod2}); lod_attention_tensor.lod.assign({one_batch.lod1, one_batch.lod2});
init_zero_tensor.shape.assign({batch_size, 15}); init_zero_tensor.shape.assign({batch_size, 15});
init_zero_tensor.lod.assign({one_batch.lod3}); init_zero_tensor.lod.assign({one_batch.lod3});
lod_tensor_tensor.shape = rnn_link_data_shape; lod_tensor_tensor.shape = rnn_link_data_shape;
lod_tensor_tensor.lod.assign({one_batch.lod1}); lod_tensor_tensor.lod.assign({one_batch.lod1});
week_tensor.shape.assign({(int) one_batch.rnn_week_datas.size(), (int) one_batch.rnn_week_datas.front().size()}); // clang-format off
week_tensor.shape.assign(
{static_cast<int>(one_batch.rnn_week_datas.size()),
static_cast<int>(one_batch.rnn_week_datas.front().size())});
week_tensor.lod.assign({one_batch.lod3}); week_tensor.lod.assign({one_batch.lod3});
minute_tensor.shape.assign({(int) one_batch.rnn_minute_datas.size(), minute_tensor.shape.assign(
(int) one_batch.rnn_minute_datas.front().size()}); {static_cast<int>(one_batch.rnn_minute_datas.size()),
static_cast<int>(one_batch.rnn_minute_datas.front().size())});
minute_tensor.lod.assign({one_batch.lod3}); minute_tensor.lod.assign({one_batch.lod3});
// clang-format on
// assign data // assign data
TensorAssignData(&lod_attention_tensor, std::vector<std::vector<float>>({{0, 0}})); TensorAssignData(&lod_attention_tensor,
std::vector<std::vector<float>>({{0, 0}}));
std::vector<float> tmp_zeros(batch_size * 15, 0.); std::vector<float> tmp_zeros(batch_size * 15, 0.);
TensorAssignData(&init_zero_tensor, {tmp_zeros}); TensorAssignData(&init_zero_tensor, {tmp_zeros});
TensorAssignData(&lod_tensor_tensor, one_batch.rnn_link_data); TensorAssignData(&lod_tensor_tensor, one_batch.rnn_link_data);
TensorAssignData(&week_tensor, one_batch.rnn_week_datas); TensorAssignData(&week_tensor, one_batch.rnn_week_datas);
TensorAssignData(&minute_tensor, one_batch.rnn_minute_datas); TensorAssignData(&minute_tensor, one_batch.rnn_minute_datas);
// clang-format on
// Set inputs. // Set inputs.
auto init_zero_tensor1 = init_zero_tensor; auto init_zero_tensor1 = init_zero_tensor;
init_zero_tensor1.name = "hidden_init"; init_zero_tensor1.name = "hidden_init";
...@@ -231,12 +237,9 @@ std::string DescribeTensor(const PaddleTensor &tensor) { ...@@ -231,12 +237,9 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
os << "\n"; os << "\n";
os << " - data: "; os << " - data: ";
// clang-format off int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
int dim = std::accumulate(tensor.shape.begin(), [](int a, int b) { return a * b; });
tensor.shape.end(), for (int i = 0; i < dim; i++) {
1,
[](int a, int b) { return a * b; }); // clang-format on
for (size_t i = 0; i < dim; i++) {
os << static_cast<float *>(tensor.data.data())[i] << " "; os << static_cast<float *>(tensor.data.data())[i] << " ";
} }
os << '\n'; os << '\n';
...@@ -300,13 +303,16 @@ void TestDituRNNPrediction(const std::string &model_path, ...@@ -300,13 +303,16 @@ void TestDituRNNPrediction(const std::string &model_path,
for (int i = 0; i < num_times; i++) { for (int i = 0; i < num_times; i++) {
predictor->Run(input_slots, &outputs); predictor->Run(input_slots, &outputs);
} }
LOG(INFO) << "time/batch: " << timer.toc() / num_times; LOG(INFO) << "===========profile result===========";
LOG(INFO) << "batch_size: " << batch_size << ", repeat: " << num_times
<< ", latency: " << timer.toc() / num_times << "ms";
LOG(INFO) << "=====================================";
for (auto &out : outputs) { for (auto &out : outputs) {
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; }); [](int a, int b) { return a * b; });
float *data = static_cast<float *>(out.data.data()); float *data = static_cast<float *>(out.data.data());
for (int i = 0; for (size_t i = 0;
i < std::min(sizeof(ditu_rnn_target_data) / sizeof(float), size); i < std::min(sizeof(ditu_rnn_target_data) / sizeof(float), size);
i++) { i++) {
EXPECT_NEAR(data[i], ditu_rnn_target_data[i], 1e-3); EXPECT_NEAR(data[i], ditu_rnn_target_data[i], 1e-3);
...@@ -336,7 +342,7 @@ TEST(Analyzer, SupportIRPass) { ...@@ -336,7 +342,7 @@ TEST(Analyzer, SupportIRPass) {
// Directly infer with the original model. // Directly infer with the original model.
TEST(Analyzer, DituRNN_without_analysis) { TEST(Analyzer, DituRNN_without_analysis) {
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data, TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
10, false, false); FLAGS_batch_size, false, false, FLAGS_repeat);
} }
// Inference with the original model with the analysis turned on, the analysis // Inference with the original model with the analysis turned on, the analysis
...@@ -344,14 +350,14 @@ TEST(Analyzer, DituRNN_without_analysis) { ...@@ -344,14 +350,14 @@ TEST(Analyzer, DituRNN_without_analysis) {
TEST(Analyzer, DituRNN_with_analysis) { TEST(Analyzer, DituRNN_with_analysis) {
LOG(INFO) << "ditu rnn with analysis"; LOG(INFO) << "ditu rnn with analysis";
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data, TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
10, true, false, 1); FLAGS_batch_size, true, false, FLAGS_repeat);
} }
// Inference with analysis and IR. The IR module will fuse some large kernels. // Inference with analysis and IR. The IR module will fuse some large kernels.
TEST(Analyzer, DituRNN_with_analysis_with_IR) { TEST(Analyzer, DituRNN_with_analysis_with_IR) {
LOG(INFO) << "ditu rnn with analysis and IR fuse"; LOG(INFO) << "ditu rnn with analysis and IR fuse";
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data, TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
10, true, true, 1); FLAGS_batch_size, true, true, FLAGS_repeat);
} }
} // namespace analysis } // namespace analysis
......
...@@ -54,7 +54,7 @@ class SamplingIdKernel : public framework::OpKernel<T> { ...@@ -54,7 +54,7 @@ class SamplingIdKernel : public framework::OpKernel<T> {
static_cast<T>(context.Attr<float>("max"))); static_cast<T>(context.Attr<float>("max")));
std::vector<T> ids(batch_size); std::vector<T> ids(batch_size);
for (size_t i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
T r = dist(engine); T r = dist(engine);
int idx = width - 1; int idx = width - 1;
for (int j = 0; j < width; ++j) { for (int j = 0; j < width; ++j) {
......
...@@ -116,7 +116,6 @@ function cmake_gen() { ...@@ -116,7 +116,6 @@ function cmake_gen() {
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON}
-DPY_VERSION=${PY_VERSION:-2.7} -DPY_VERSION=${PY_VERSION:-2.7}
======================================== ========================================
EOF EOF
...@@ -146,7 +145,6 @@ EOF ...@@ -146,7 +145,6 @@ EOF
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} \
-DPY_VERSION=${PY_VERSION:-2.7} -DPY_VERSION=${PY_VERSION:-2.7}
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册