diff --git a/benchmark/fluid/args.py b/benchmark/fluid/args.py index 0d5c9652de6b814627e54018366137e214726619..9540900b112f54594bbfdbc8d7cd3b6e1f5269dd 100644 --- a/benchmark/fluid/args.py +++ b/benchmark/fluid/args.py @@ -136,10 +136,6 @@ def parse_args(): '--no_random', action='store_true', help='If set, keep the random seed and do not shuffle the data.') - parser.add_argument( - '--use_lars', - action='store_true', - help='If set, use lars for optimizers, ONLY support resnet module.') parser.add_argument( '--reduce_strategy', type=str, diff --git a/benchmark/fluid/models/resnet.py b/benchmark/fluid/models/resnet.py index 1b3bfe659c7d97b58dc4121387d4db22266381c5..f692e7722a1c9a54a4509ce7c78cc68e1f28da74 100644 --- a/benchmark/fluid/models/resnet.py +++ b/benchmark/fluid/models/resnet.py @@ -200,11 +200,6 @@ def get_model(args, is_train, main_prog, startup_prog): # configure optimize optimizer = None if is_train: - if args.use_lars: - lars_decay = 1.0 - else: - lars_decay = 0.0 - total_images = 1281167 / trainer_count step = int(total_images / (args.batch_size * args.gpus) + 1) diff --git a/benchmark/fluid/models/resnet_with_preprocess.py b/benchmark/fluid/models/resnet_with_preprocess.py index e8d661d847516a15e4e28796960815935b82ae6f..e996c9a704531757891354c7c75a9d7915195ee0 100644 --- a/benchmark/fluid/models/resnet_with_preprocess.py +++ b/benchmark/fluid/models/resnet_with_preprocess.py @@ -224,11 +224,6 @@ def get_model(args, is_train, main_prog, startup_prog): # configure optimize optimizer = None if is_train: - if args.use_lars: - lars_decay = 1.0 - else: - lars_decay = 0.0 - total_images = 1281167 / trainer_count step = int(total_images / args.batch_size + 1) diff --git a/benchmark/fluid/models/se_resnext.py b/benchmark/fluid/models/se_resnext.py index 9f887fb324dc86a30b708b9ef04068282a3e6c3e..7fbb83c2ec1bab29731ae4e432dda202007b2e2c 100644 --- a/benchmark/fluid/models/se_resnext.py +++ b/benchmark/fluid/models/se_resnext.py @@ -244,11 +244,6 @@ def get_model(args, is_train, main_prog, startup_prog): optimizer = None if is_train: - if args.use_lars: - lars_decay = 1.0 - else: - lars_decay = 0.0 - total_images = 1281167 / trainer_count step = int(total_images / args.batch_size + 1) @@ -262,8 +257,7 @@ def get_model(args, is_train, main_prog, startup_prog): learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr), momentum=0.9, - regularization=fluid.regularizer.L2Decay(1e-4), - LARS_weight_decay=lars_decay) + regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) if args.memory_optimize: diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f61d1254fd1419490c483532ff15257e5d8f4507..1ce2cf83e8dcbcc3eb18ea3b7099dfb257fc8158 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -350,25 +350,25 @@ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'fi paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max')) paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)) -paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate'], varargs=None, keywords='kwargs', defaults=None) +paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov'], varargs=None, keywords='kwargs', defaults=(False,)) +paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)) paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon'], varargs=None, keywords='kwargs', defaults=(1e-06,)) +paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None)) paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon'], varargs=None, keywords='kwargs', defaults=(0.001, 0.9, 0.999, 1e-08)) +paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)) paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon'], varargs=None, keywords='kwargs', defaults=(0.001, 0.9, 0.999, 1e-08)) +paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None)) paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06)) +paddle.fluid.optimizer.DecayedAdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'decay', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, None, None)) paddle.fluid.optimizer.DecayedAdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power'], varargs=None, keywords='kwargs', defaults=(0.0, 0.0, -0.5)) +paddle.fluid.optimizer.FtrlOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'l1', 'l2', 'lr_power', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.0, 0.0, -0.5, None, None)) paddle.fluid.optimizer.FtrlOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered'], varargs=None, keywords='kwargs', defaults=(0.95, 1e-06, 0.0, False)) +paddle.fluid.optimizer.RMSPropOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'rho', 'epsilon', 'momentum', 'centered', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.95, 1e-06, 0.0, False, None, None)) paddle.fluid.optimizer.RMSPropOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho'], varargs=None, keywords='kwargs', defaults=(1e-06, 0.95)) +paddle.fluid.optimizer.AdadeltaOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)) paddle.fluid.optimizer.AdadeltaOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) -paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window'], varargs=None, keywords='kwargs', defaults=(10000, 10000)) +paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_window_rate', 'min_average_window', 'max_average_window', 'regularization', 'name'], varargs=None, keywords=None, defaults=(10000, 10000, None, None)) paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index bf893e32569f4b50a583ab6f43cb214ec3620e09..36bbec473114cfd2e68c97a53264957477ade3fb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -103,108 +103,74 @@ void GetOneBatch(std::vector *input_slots, DataRecord *data, input_slots->assign({input_tensor}); } -const int64_t lac_ref_data[] = {24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, - 25, 25, 25, 25, 44, 24, 25, 25, 25, 36, 42, 43, - 44, 14, 15, 44, 14, 15, 44, 14, 15, 44, 38, 39, - 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; - -void TestLACPrediction(const std::string &model_path, - const std::string &data_file, const int batch_size, - const int repeat, bool use_analysis = false) { - AnalysisConfig cfg; - cfg.model_dir = model_path; - cfg.use_gpu = false; - cfg.device = 0; - cfg.specify_input_name = true; - cfg.enable_ir_optim = true; +void SetConfig(AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} - std::vector input_slots, outputs_slots; - DataRecord data(data_file, batch_size); - GetOneBatch(&input_slots, &data, batch_size); - std::unique_ptr predictor; - if (use_analysis) { - predictor = - CreatePaddlePredictor(cfg); - } else { - predictor = - CreatePaddlePredictor(cfg); - } - for (int i = 0; i < FLAGS_burning; i++) { - predictor->Run(input_slots, &outputs_slots); +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.batched_datas.size() : 1; + LOG(INFO) << "number of samples: " << epoch; + for (int bid = 0; bid < epoch; ++bid) { + GetOneBatch(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); } - Timer timer; - if (FLAGS_test_all_data) { - LOG(INFO) << "test all data"; - std::vector> input_slots_all; - for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) { - GetOneBatch(&input_slots, &data, batch_size); - input_slots_all.emplace_back(input_slots); - } - LOG(INFO) << "total number of samples: " << data.datasets.size(); - TestPrediction(cfg, input_slots_all, &outputs_slots, FLAGS_num_threads); - return; - } - timer.tic(); - for (int i = 0; i < repeat; i++) { - predictor->Run(input_slots, &outputs_slots); - } - PrintTime(batch_size, repeat, 1, 0, timer.toc() / repeat); +} - // check result - EXPECT_EQ(outputs_slots.size(), 1UL); - auto &out = outputs_slots[0]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); - PADDLE_ENFORCE_GT(size, 0); - EXPECT_GE(size, batch1_size); - int64_t *pdata = static_cast(out.data.data()); - for (size_t i = 0; i < batch1_size; ++i) { - EXPECT_EQ(pdata[i], lac_ref_data[i]); - } +// Easy for profiling independently. +TEST(Analyzer_LAC, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - if (use_analysis) { - // run once for comparion as reference - auto ref_predictor = - CreatePaddlePredictor(cfg); - std::vector ref_outputs_slots; - ref_predictor->Run(input_slots, &ref_outputs_slots); - CompareResult(ref_outputs_slots, outputs_slots); + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; - } + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + const int64_t lac_ref_data[] = { + 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25, + 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14, + 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); + PADDLE_ENFORCE_GE(size, batch1_size); + int64_t *pdata = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < batch1_size; ++i) { + EXPECT_EQ(pdata[i], lac_ref_data[i]); } - LOG(INFO) << "has num ops: " << num_ops; - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); - EXPECT_EQ(num_ops, 11); } } -TEST(Analyzer_LAC, native) { - LOG(INFO) << "LAC with native"; - TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size, - FLAGS_repeat); +// Check the fuse status +TEST(Analyzer_LAC, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); + EXPECT_EQ(num_ops, 11); } -TEST(Analyzer_LAC, analysis) { - LOG(INFO) << "LAC with analysis"; - TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size, - FLAGS_repeat, true); +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_LAC, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } } // namespace analysis diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index f8c651e32f7e2ce1d8ced0e6774ffd555d351167..8cf230a51d05c3a141f7cfd4e30bf30f064f0989 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -95,97 +95,73 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -// the first inference result -const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, - 48, 39, 38, 16, 25}; - -void TestChineseNERPrediction(bool use_analysis) { - AnalysisConfig cfg; - cfg.prog_file = FLAGS_infer_model + "/__model__"; - cfg.param_file = FLAGS_infer_model + "/param"; - cfg.use_gpu = false; - cfg.device = 0; - cfg.specify_input_name = true; - cfg.enable_ir_optim = true; - - std::vector input_slots, outputs; - std::unique_ptr predictor; - Timer timer; - if (use_analysis) { - predictor = - CreatePaddlePredictor(cfg); - } else { - predictor = - CreatePaddlePredictor(cfg); - } +void SetConfig(AnalysisConfig *cfg) { + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} - if (FLAGS_test_all_data) { - LOG(INFO) << "test all data"; - DataRecord data(FLAGS_infer_data, FLAGS_batch_size); - std::vector> input_slots_all; - for (size_t bid = 0; bid < data.num_samples / FLAGS_batch_size; ++bid) { - PrepareInputs(&input_slots, &data, FLAGS_batch_size); - input_slots_all.emplace_back(input_slots); - } - LOG(INFO) << "total number of samples: " << data.num_samples; - TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - return; - } - // Prepare inputs. +void SetInput(std::vector> *inputs) { DataRecord data(FLAGS_infer_data, FLAGS_batch_size); - PrepareInputs(&input_slots, &data, FLAGS_batch_size); - - timer.tic(); - for (int i = 0; i < FLAGS_repeat; i++) { - predictor->Run(input_slots, &outputs); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); } - PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, timer.toc() / FLAGS_repeat); +} - PADDLE_ENFORCE(outputs.size(), 1UL); - auto &out = outputs[0]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - PADDLE_ENFORCE_GT(size, 0); - int64_t *result = static_cast(out.data.data()); - for (size_t i = 0; i < std::min(11UL, size); i++) { - PADDLE_ENFORCE(result[i], chinese_ner_result_data[i]); - } +// Easy for profiling independently. +TEST(Analyzer_Chinese_ner, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - if (use_analysis) { - // run once for comparion as reference - auto ref_predictor = - CreatePaddlePredictor(cfg); - std::vector ref_outputs_slots; - ref_predictor->Run(input_slots, &ref_outputs_slots); - CompareResult(ref_outputs_slots, outputs); + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; - } + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, + 48, 39, 38, 16, 25}; + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + int64_t *result = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < std::min(11UL, size); i++) { + EXPECT_EQ(result[i], chinese_ner_result_data[i]); } - LOG(INFO) << "has num ops: " << num_ops; - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); - EXPECT_EQ(num_ops, 14); } } -TEST(Analyzer_Chinese_ner, native) { TestChineseNERPrediction(false); } +// Check the fuse status +TEST(Analyzer_Chinese_ner, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); -TEST(Analyzer_Chinese_ner, analysis) { TestChineseNERPrediction(true); } + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); + EXPECT_EQ(num_ops, 14); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_Chinese_ner, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index df96be544eaf51c52aa5592966f499fad91aab82..14bdf76efc71b326bd130858ea246be81c9bd45c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -25,6 +25,7 @@ struct DataRecord { std::vector lod1, lod2, lod3; std::vector> rnn_link_data, rnn_week_datas, rnn_minute_datas; + size_t num_samples; // total number of samples size_t batch_iter{0}; size_t batch_size{1}; DataRecord() = default; @@ -97,6 +98,7 @@ struct DataRecord { week_data_all.push_back(std::move(week_data)); minute_data_all.push_back(std::move(minute_data)); } + num_samples = num_lines; } }; void PrepareInputs(std::vector *input_slots, DataRecord *data, @@ -147,89 +149,72 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -// Test with a really complicate model. -void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) { - AnalysisConfig config; - config.prog_file = FLAGS_infer_model + "/__model__"; - config.param_file = FLAGS_infer_model + "/param"; - config.use_gpu = false; - config.device = 0; - config.specify_input_name = true; - config.enable_ir_optim = activate_ir; - PADDLE_ENFORCE(config.ir_mode == - AnalysisConfig::IrPassMode::kExclude); // default - config.ir_passes.clear(); // Do not exclude any pass. - - int batch_size = FLAGS_batch_size; +void SetConfig(AnalysisConfig *cfg) { + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; + cfg->ir_passes.clear(); // Do not exclude any pass. +} - auto base_predictor = - CreatePaddlePredictor(config); - auto predictor = - CreatePaddlePredictor( - config); +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); std::vector input_slots; - DataRecord data(FLAGS_infer_data, batch_size); - // Prepare inputs. - PrepareInputs(&input_slots, &data, batch_size); - std::vector outputs, base_outputs; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); + } +} - base_predictor->Run(input_slots, &base_outputs); +// Easy for profiling independently. +TEST(Analyzer_rnn1, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; std::vector> input_slots_all; - input_slots_all.emplace_back(input_slots); - if (num_threads == 1) { - TestOneThreadPrediction(config, input_slots_all, &outputs); - CompareResult(outputs, base_outputs); - } else { - // only return the output of first thread - TestMultiThreadPrediction(config, input_slots_all, &outputs, num_threads); - } + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); +} - if (use_analysis && activate_ir) { - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } +// Check the fuse status +TEST(Analyzer_rnn1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; - } - } - LOG(INFO) << "has num ops: " << num_ops; + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM + EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); + EXPECT_EQ(num_ops, + 13); // After graph optimization, only 13 operators exists. +} - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM - EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); - EXPECT_EQ(num_ops, - 13); // After graph optimization, only 13 operators exists. - } +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_rnn1, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } -// Inference with analysis and IR, easy for profiling independently. -TEST(Analyzer, rnn1) { TestRNN1Prediction(true, true, FLAGS_num_threads); } +// Test Multi-Thread. +TEST(Analyzer_rnn1, multi_thread) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; -// Other unit-tests of RNN1, test different options of use_analysis, -// activate_ir and multi-threads. -TEST(Analyzer, RNN_tests) { - int num_threads[2] = {1, 4}; - for (auto i : num_threads) { - // Directly infer with the original model. - TestRNN1Prediction(false, false, i); - // Inference with the original model with the analysis turned on, the - // analysis module will transform the program to a data flow graph. - TestRNN1Prediction(true, false, i); - // Inference with analysis and IR. The IR module will fuse some large - // kernels. - TestRNN1Prediction(true, true, i); - } + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, 4 /* num_threads */); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc index c40ea58eea9c10a85acf84108f1d081a779f526d..ba04d030b94c0924311dcff5c6a34270a764f877 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc @@ -12,24 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" - -#include -#include -#include // NOLINT -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" - -DEFINE_string(infer_model, "", "model path"); -DEFINE_string(infer_data, "", "data path"); -DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(repeat, 1, "Running the inference program repeat times."); -DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { @@ -41,6 +24,7 @@ struct DataRecord { std::vector lod; std::vector> rnn_link_data; std::vector result_data; + size_t num_samples; // total number of samples size_t batch_iter{0}; size_t batch_size{1}; DataRecord() = default; @@ -100,6 +84,7 @@ struct DataRecord { result_data.insert(result_data.end(), tmp.begin(), tmp.end()); } } + num_samples = num_lines / 2; } }; void PrepareInputs(std::vector *input_slots, DataRecord *data, @@ -118,64 +103,58 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, input_slots->assign({feed_tensor}); } -void CompareResult(const std::vector &outputs, - const std::vector &base_result) { - PADDLE_ENFORCE_GT(outputs.size(), 0); - for (size_t i = 0; i < outputs.size(); i++) { - auto &out = outputs[i]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - PADDLE_ENFORCE_GT(size, 0); - float *data = static_cast(out.data.data()); - for (size_t i = 0; i < size; i++) { - EXPECT_NEAR(data[i], base_result[i], 1e-3); - } +void SetConfig(AnalysisConfig *cfg) { + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} + +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); } } -// Test with a really complicate model. -void TestRNN2Prediction() { - AnalysisConfig config; - config.prog_file = FLAGS_infer_model + "/__model__"; - config.param_file = FLAGS_infer_model + "/param"; - config.use_gpu = false; - config.device = 0; - config.specify_input_name = true; - config.enable_ir_optim = true; - PADDLE_ENFORCE(config.ir_mode == - AnalysisConfig::IrPassMode::kExclude); // default - int batch_size = FLAGS_batch_size; - int num_times = FLAGS_repeat; +// Easy for profiling independently. +TEST(Analyzer_rnn2, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - auto base_predictor = - CreatePaddlePredictor(config); - auto predictor = - CreatePaddlePredictor( - config); - std::vector input_slots; - DataRecord data(FLAGS_infer_data, batch_size); - PrepareInputs(&input_slots, &data, batch_size); - std::vector outputs, base_outputs; + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - Timer timer1; - timer1.tic(); - for (int i = 0; i < num_times; i++) { - base_predictor->Run(input_slots, &base_outputs); + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + PADDLE_ENFORCE_GT(outputs.size(), 0); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < size; i++) { + EXPECT_NEAR(result[i], data.result_data[i], 1e-3); + } } - PrintTime(batch_size, num_times, 1, 0, timer1.toc() / num_times); +} - Timer timer2; - timer2.tic(); - for (int i = 0; i < num_times; i++) { - predictor->Run(input_slots, &outputs); - } - PrintTime(batch_size, num_times, 1, 0, timer2.toc() / num_times); +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_rnn2, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); - CompareResult(base_outputs, data.result_data); - CompareResult(outputs, data.result_data); + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } -TEST(Analyzer, rnn2) { TestRNN2Prediction(); } - } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index 1472c475e4a3061ffcad96925ea215a41a7e63eb..340ef152f0b1a15a451f840b36ae845ef4984740 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -46,54 +46,63 @@ struct DataReader { std::unique_ptr file; }; -void Main(int batch_size) { - // shape -- - // Create Predictor -- - AnalysisConfig config; - config.model_dir = FLAGS_infer_model; - config.use_gpu = false; - config.enable_ir_optim = true; +void SetConfig(AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} - std::vector input_slots, output_slots; +void SetInput(std::vector> *inputs) { + std::vector input_slots; DataReader reader(FLAGS_infer_data); - std::vector> input_slots_all; - - if (FLAGS_test_all_data) { - LOG(INFO) << "test all data"; - int num_batches = 0; - while (reader.NextBatch(&input_slots, FLAGS_batch_size)) { - input_slots_all.emplace_back(input_slots); - ++num_batches; - } - LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size; - TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); - return; + int num_batches = 0; + while (reader.NextBatch(&input_slots, FLAGS_batch_size)) { + (*inputs).emplace_back(input_slots); + ++num_batches; + if (!FLAGS_test_all_data) return; } + LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size; +} - // one batch starts - // data -- - reader.NextBatch(&input_slots, FLAGS_batch_size); - input_slots_all.emplace_back(input_slots); - TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); +// Easy for profiling independently. +TEST(Analyzer_Text_Classification, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - // Get output - LOG(INFO) << "get outputs " << output_slots.size(); + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - for (auto &output : output_slots) { - LOG(INFO) << "output.shape: " << to_string(output.shape); - // no lod ? - CHECK_EQ(output.lod.size(), 0UL); - LOG(INFO) << "output.dtype: " << output.dtype; - std::stringstream ss; - for (int i = 0; i < 5; i++) { - ss << static_cast(output.data.data())[i] << " "; + if (FLAGS_num_threads == 1) { + // Get output + LOG(INFO) << "get outputs " << outputs.size(); + for (auto &output : outputs) { + LOG(INFO) << "output.shape: " << to_string(output.shape); + // no lod ? + CHECK_EQ(output.lod.size(), 0UL); + LOG(INFO) << "output.dtype: " << output.dtype; + std::stringstream ss; + for (int i = 0; i < 5; i++) { + ss << static_cast(output.data.data())[i] << " "; + } + LOG(INFO) << "output.data summary: " << ss.str(); + // one batch ends } - LOG(INFO) << "output.data summary: " << ss.str(); - // one batch ends } } -TEST(text_classification, basic) { Main(FLAGS_batch_size); } +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_Text_Classification, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index a207c41b7140c806b4c1fdc7f24a317b165c9aef..483ae66c5b24f6147b1b07da86494a914f80c34c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -49,84 +49,83 @@ Record ProcessALine(const std::string &line) { return record; } -/* - * Use the native and analysis fluid engine to inference the demo. - * ocr, mobilenet and se_resnext50 - */ -void TestVisualPrediction(bool use_mkldnn) { - std::unique_ptr predictor; - AnalysisConfig cfg; - cfg.param_file = FLAGS_infer_model + "/__params__"; - cfg.prog_file = FLAGS_infer_model + "/__model__"; - cfg.use_gpu = false; - cfg._use_mkldnn = use_mkldnn; - cfg.device = 0; - cfg.enable_ir_optim = true; +void SetConfig(AnalysisConfig *cfg) { + cfg->param_file = FLAGS_infer_model + "/__params__"; + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->enable_ir_optim = true; + cfg->specify_input_name = true; // TODO(TJ): fix fusion gru - cfg.ir_passes.push_back("fc_gru_fuse_pass"); + cfg->ir_passes.push_back("fc_gru_fuse_pass"); #ifdef PADDLE_WITH_MKLDNN + cfg->_use_mkldnn = true; // disable mkldnn fuse since it should have some bugs - cfg.ir_passes.push_back("conv_relu_mkldnn_fuse_pass"); + cfg->ir_passes.push_back("conv_relu_mkldnn_fuse_pass"); #endif - predictor = - CreatePaddlePredictor(cfg); +} - // Only have single batch of data. +void SetInput(std::vector> *inputs) { + PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); std::string line; std::ifstream file(FLAGS_infer_data); std::getline(file, line); auto record = ProcessALine(line); - file.close(); - // Inference. PaddleTensor input; input.shape = record.shape; - input.data = - PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); input.dtype = PaddleDType::FLOAT32; + size_t input_size = record.data.size() * sizeof(float); + input.data.Resize(input_size); + memcpy(input.data.data(), record.data.data(), input_size); + std::vector input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); +} - std::vector outputs_slots; - Timer timer; - timer.tic(); - for (int i = 0; i < FLAGS_repeat; i++) { - predictor->Run({input}, &outputs_slots); - } - PrintTime(/*batch size*/ 1, FLAGS_repeat, /*num threads*/ 1, /*thread id*/ 0, - timer.toc() / FLAGS_repeat); - - VLOG(3) << "output.size " << outputs_slots.size(); - - // run native as reference - auto ref_predictor = - CreatePaddlePredictor(cfg); - std::vector ref_outputs_slots; - ref_predictor->Run({input}, &ref_outputs_slots); - CompareResult(outputs_slots, ref_outputs_slots); - // print what are fused - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; +// Easy for profiling independently. +// ocr, mobilenet and se_resnext50 +TEST(Analyzer_vis, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); + + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + const float ocr_result_data[] = { + 5.273636460856323538e-08, 3.296741795111302054e-07, + 1.873261190610264748e-08, 3.403730275408634043e-08, + 3.383312474625199684e-08}; + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < std::min(5UL, size); i++) { + EXPECT_NEAR(result[i], ocr_result_data[i], 1e-3); } } - LOG(INFO) << "has num ops: " << num_ops; } -TEST(Analyzer_vis, analysis) { TestVisualPrediction(/*use_mkldnn*/ false); } -#ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_vis, analysis_mkldnn) { - TestVisualPrediction(/*use_mkldnn*/ true); +// Check the fuse status +TEST(Analyzer_vis, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + GetFuseStatis(cfg, &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_vis, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } -#endif } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 43e97614e3ad9c14c8deee9f340757f373eb593e..384a40a3f992d1a9734e3189b422be0ce6adb938 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include // NOLINT #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" @@ -28,17 +29,18 @@ DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_data, "", "data file"); DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(burning, 0, "Burning before repeat."); DEFINE_int32(repeat, 1, "Running the inference program repeat times."); DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); +DEFINE_bool(use_analysis, true, + "Running the inference program in analysis mode."); namespace paddle { namespace inference { void CompareResult(const std::vector &outputs, const std::vector &ref_outputs) { - EXPECT_GT(outputs.size(), 0); + EXPECT_GT(outputs.size(), 0UL); EXPECT_EQ(outputs.size(), ref_outputs.size()); for (size_t i = 0; i < outputs.size(); i++) { auto &out = outputs[i]; @@ -72,14 +74,50 @@ void CompareResult(const std::vector &outputs, } } +std::unique_ptr GetPrediction(AnalysisConfig config, + bool use_analysis = true) { + if (use_analysis) { + return CreatePaddlePredictor( + config); + } else { + return CreatePaddlePredictor( + config); + } +} + +size_t GetSize(const PaddleTensor &out) { + return std::accumulate(out.shape.begin(), out.shape.end(), 1, + [](int a, int b) { return a * b; }); +} + +std::unordered_map GetFuseStatis(AnalysisConfig config, + int *num_ops) { + auto predictor = GetPrediction(config); + AnalysisPredictor *analysis_predictor = + dynamic_cast(predictor.get()); + auto &fuse_statis = analysis_predictor->analysis_argument() + .Get>( + framework::ir::kFuseStatisAttr); + for (auto &item : fuse_statis) { + LOG(INFO) << "fused " << item.first << " " << item.second; + } + int num = 0; + for (auto &node : + analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { + if (node->IsFunction()) { + ++num; + } + } + *num_ops = num; + return fuse_statis; +} + void TestOneThreadPrediction( AnalysisConfig config, const std::vector> inputs, - std::vector *outputs) { + std::vector *outputs, bool use_analysis = true) { int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; - auto predictor = - CreatePaddlePredictor( - config); + auto predictor = GetPrediction(config, use_analysis); Timer timer; timer.tic(); for (int i = 0; i < num_times; i++) { @@ -93,7 +131,8 @@ void TestOneThreadPrediction( void TestMultiThreadPrediction( AnalysisConfig config, const std::vector> inputs, - std::vector *outputs, int num_threads) { + std::vector *outputs, int num_threads, + bool use_analysis = true) { int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; std::vector threads; @@ -101,9 +140,7 @@ void TestMultiThreadPrediction( // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled // because AttentionLSTM's hard code nodeid will be damanged. for (int tid = 0; tid < num_threads; ++tid) { - predictors.emplace_back( - CreatePaddlePredictor( - config)); + predictors.emplace_back(GetPrediction(config, use_analysis)); } for (int tid = 0; tid < num_threads; ++tid) { threads.emplace_back([&, tid]() { @@ -129,13 +166,25 @@ void TestMultiThreadPrediction( void TestPrediction(AnalysisConfig config, const std::vector> inputs, - std::vector *outputs, int num_threads) { + std::vector *outputs, int num_threads, + bool use_analysis = FLAGS_use_analysis) { + LOG(INFO) << "use_analysis: " << use_analysis; if (num_threads == 1) { - TestOneThreadPrediction(config, inputs, outputs); + TestOneThreadPrediction(config, inputs, outputs, use_analysis); } else { - TestMultiThreadPrediction(config, inputs, outputs, num_threads); + TestMultiThreadPrediction(config, inputs, outputs, num_threads, + use_analysis); } } +void CompareNativeAndAnalysis( + AnalysisConfig config, + const std::vector> inputs) { + std::vector native_outputs, analysis_outputs; + TestOneThreadPrediction(config, inputs, &native_outputs, false); + TestOneThreadPrediction(config, inputs, &analysis_outputs, true); + CompareResult(analysis_outputs, native_outputs); +} + } // namespace inference } // namespace paddle diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index ef7b16a19e10a28bd1cc34496fb908580c5d7330..ad09005d866b10146e6fcd7cf108c51f34322607 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -43,11 +43,7 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self, - learning_rate, - regularization=None, - LARS_weight_decay=0.0, - name=None): + def __init__(self, learning_rate, regularization=None, name=None): if not isinstance(learning_rate, float) and \ not isinstance(learning_rate, framework.Variable): raise TypeError("learning rate should be float or Variable") @@ -68,7 +64,6 @@ class Optimizer(object): # {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...} self._accumulators = defaultdict(lambda: dict()) self.helper = None - self._LARS_weight_decay = LARS_weight_decay def _create_global_learning_rate(self): lr = self._global_learning_rate() @@ -109,7 +104,6 @@ class Optimizer(object): param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] if type(param_lr) == Variable: - # param learning rate has been updated (LARS) print("returns updated param lr ", param_lr) return param_lr else: @@ -227,10 +221,6 @@ class Optimizer(object): self._create_accumulators(loss.block, [p[0] for p in parameters_and_grads]) self._create_global_learning_rate() - if self._LARS_weight_decay > 0.0: - layers.append_LARS(parameters_and_grads, - self._global_learning_rate(), - self._LARS_weight_decay) optimize_ops = [] for param_and_grad in parameters_and_grads: @@ -287,6 +277,9 @@ class SGDOptimizer(Optimizer): Args: learning_rate (float|Variable): the learning rate used to update parameters. \ Can be a float value or a Variable with one float value as data element. + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -295,10 +288,12 @@ class SGDOptimizer(Optimizer): sgd_optimizer.minimize(cost) """ - def __init__(self, learning_rate, **kwargs): + def __init__(self, learning_rate, regularization=None, name=None): assert learning_rate is not None super(SGDOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "sgd" def _append_optimize_op(self, block, param_and_grad): @@ -343,6 +338,9 @@ class MomentumOptimizer(Optimizer): Can be a float value or a Variable with one float value as data element. momentum (float): momentum factor use_nesterov (bool): enables Nesterov momentum + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -352,11 +350,18 @@ class MomentumOptimizer(Optimizer): """ _velocity_acc_str = "velocity" - def __init__(self, learning_rate, momentum, use_nesterov=False, **kwargs): + def __init__(self, + learning_rate, + momentum, + use_nesterov=False, + regularization=None, + name=None): assert learning_rate is not None assert momentum is not None super(MomentumOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "momentum" self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -412,6 +417,9 @@ class AdagradOptimizer(Optimizer): learning_rate (float|Variable): the learning rate used to update parameters. \ Can be a float value or a Variable with one float value as data element. epsilon (float): a small float value for numerical stability. + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -421,11 +429,17 @@ class AdagradOptimizer(Optimizer): """ _moment_acc_str = "moment" - def __init__(self, learning_rate, epsilon=1.0e-6, **kwargs): + def __init__(self, + learning_rate, + epsilon=1.0e-6, + regularization=None, + name=None): assert learning_rate is not None assert epsilon is not None super(AdagradOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "adagrad" self._epsilon = epsilon @@ -485,6 +499,9 @@ class AdamOptimizer(Optimizer): beta1 (float): The exponential decay rate for the 1st moment estimates. beta2 (float): The exponential decay rate for the 2nd moment estimates. epsilon (float): a small float value for numerical stability. + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -503,13 +520,16 @@ class AdamOptimizer(Optimizer): beta1=0.9, beta2=0.999, epsilon=1e-8, - **kwargs): + regularization=None, + name=None): assert learning_rate is not None assert beta1 is not None assert beta2 is not None assert epsilon is not None super(AdamOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "adam" self._beta1 = beta1 self._beta2 = beta2 @@ -629,6 +649,9 @@ class AdamaxOptimizer(Optimizer): beta1 (float): The exponential decay rate for the 1st moment estimates. beta2 (float): The exponential decay rate for the 2nd moment estimates. epsilon (float): a small float value for numerical stability. + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -645,13 +668,16 @@ class AdamaxOptimizer(Optimizer): beta1=0.9, beta2=0.999, epsilon=1e-8, - **kwargs): + regularization=None, + name=None): assert learning_rate is not None assert beta1 is not None assert beta2 is not None assert epsilon is not None super(AdamaxOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "adamax" self._beta1 = beta1 self._beta2 = beta2 @@ -742,6 +768,9 @@ class DecayedAdagradOptimizer(Optimizer): Can be a float value or a Variable with one float value as data element. decay (float): decay rate. epsilon (float): a small float value for numerical stability. + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -751,13 +780,20 @@ class DecayedAdagradOptimizer(Optimizer): """ _moment_acc_str = "moment" - def __init__(self, learning_rate, decay=0.95, epsilon=1.0e-6, **kwargs): + def __init__(self, + learning_rate, + decay=0.95, + epsilon=1.0e-6, + regularization=None, + name=None): assert learning_rate is not None assert decay is not None assert epsilon is not None super(DecayedAdagradOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "decayed_adagrad" self._decay = decay self._epsilon = epsilon @@ -811,6 +847,9 @@ class AdadeltaOptimizer(Optimizer): learning_rate(float): global learning rate rho(float): rho in equation epsilon(float): epsilon in equation + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -823,7 +862,12 @@ class AdadeltaOptimizer(Optimizer): _avg_squared_grad_acc_str = "_avg_squared_grad" _avg_squared_update_acc_str = "_avg_squared_update" - def __init__(self, learning_rate, epsilon=1.0e-6, rho=0.95, **kwargs): + def __init__(self, + learning_rate, + epsilon=1.0e-6, + rho=0.95, + regularization=None, + name=None): if learning_rate is None: raise ValueError("learning_rate is not set.") if epsilon is None: @@ -831,7 +875,9 @@ class AdadeltaOptimizer(Optimizer): if rho is None: raise ValueError("rho is not set.") super(AdadeltaOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) self.type = "adadelta" self._epsilon = epsilon self._rho = rho @@ -932,6 +978,9 @@ class RMSPropOptimizer(Optimizer): the gradient; if False, by the uncentered second moment. Setting this to True may help with training, but is slightly more expensive in terms of computation and memory. Defaults to False. + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Raises: ValueError: If learning_rate, rho, epsilon, momentum are None. @@ -953,9 +1002,12 @@ class RMSPropOptimizer(Optimizer): epsilon=1.0e-6, momentum=0.0, centered=False, - **kwargs): + regularization=None, + name=None): super(RMSPropOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) if learning_rate is None: raise ValueError("learning_rate is not set.") if rho is None: @@ -1061,6 +1113,9 @@ class FtrlOptimizer(Optimizer): l1 (float): l2 (float): lr_power (float): + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Raises: ValueError: If learning_rate, rho, epsilon, momentum are None. @@ -1075,9 +1130,17 @@ class FtrlOptimizer(Optimizer): _squared_acc_str = "squared" _linear_acc_str = "linear" - def __init__(self, learning_rate, l1=0.0, l2=0.0, lr_power=-0.5, **kwargs): + def __init__(self, + learning_rate, + l1=0.0, + l2=0.0, + lr_power=-0.5, + regularization=None, + name=None): super(FtrlOptimizer, self).__init__( - learning_rate=learning_rate, **kwargs) + learning_rate=learning_rate, + regularization=regularization, + name=name) if learning_rate is None: raise ValueError("learning_rate is not set.") @@ -1155,7 +1218,9 @@ class ModelAverage(Optimizer): average_window_rate: The rate of average window. min_average_window: The minimum size of average window. max_average_window: The maximum size of average window. - + regularization: A Regularizer, such as + fluid.regularizer.L2DecayRegularizer. + name: A optional name prefix. Examples: .. code-block:: python @@ -1178,8 +1243,10 @@ class ModelAverage(Optimizer): average_window_rate, min_average_window=10000, max_average_window=10000, - **kwargs): - super(ModelAverage, self).__init__(0.0, **kwargs) + regularization=None, + name=None): + super(ModelAverage, self).__init__( + 0.0, regularization=regularization, name=name) self.average_window = average_window_rate self.min_average_window = min_average_window self.max_average_window = max_average_window diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index 8f4678649f2146c84150ad2659e497bbf0365d03..a4336e955f21b0b09bf3dadbd437855c06745860 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -190,14 +190,11 @@ class L1DecayRegularizer(WeightDecayRegularizer): Examples: .. code-block:: python - program = fluid.framework.Program() - block = program.global_block() - mul_x = block.create_parameter( - dtype="float32", - shape=[5, 10], - lod_level=0, - name="mul.x", - regularizer=fluid.regularizer.L1DecayRegularizer(0.5)) + optimizer = fluid.optimizer.Adagrad( + learning_rate=1e-4, + regularization=fluid.regularizer.L1DecayRegularizer( + regularization_coeff=0.1)) + optimizer.minimize(avg_cost) """ def __init__(self, regularization_coeff=0.0): diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 135f11d24c8fde35995cfe577859874ca8428cd4..4b4f3e403776625fb5ca2f9b03d14ee7efe23d53 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -99,7 +99,7 @@ def train(nn_type, test_program = fluid.default_main_program().clone(for_test=True) - optimizer = fluid.optimizer.Adam(learning_rate=0.001, LARS_weight_decay=0.3) + optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(avg_loss) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 88d36fe639c7437b478efb2ee292d792677403d9..f53fe6d69d0855c8ba88eac8059708b690d2475b 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -34,12 +34,13 @@ if(APPLE) list(REMOVE_ITEM TEST_OPS test_desc_clone) list(REMOVE_ITEM TEST_OPS test_program_code) endif(NOT WITH_DISTRIBUTE) - message(WARNING "These tests has been disabled in OSX before being fixed: \n test_detection_map_op \n test_dist_se_resnext") + message(WARNING "These tests has been disabled in OSX before being fixed: \n test_fuse_elewise_add_act_pass \n test_detection_map_op \n test_dist_se_resnext") # this op is not support on mac list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) # TODO: add the unitest back when it fixed list(REMOVE_ITEM TEST_OPS test_detection_map_op) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) + list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass) endif() function(py_test_modules TARGET_NAME)