diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 01397ef929607c1f5dedd39aa2b19c17e7996f16..a54308383f21251fbc649ed9941e3b781c6d1594 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -43,26 +44,48 @@ int main(int argc, char **argv) { config.PrintConfigInfo(); - std::string img_path(argv[2]); + std::string path(argv[2]); - cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); - cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB); + std::vector img_files_list; + if (cv::utils::fs::isDirectory(path)) { + std::vector filenames; + cv::glob(path, filenames); + for (auto f : filenames) { + img_files_list.push_back(f); + } + } else { + img_files_list.push_back(path); + } + + std::cout << "img_file_list length: " << img_files_list.size() << std::endl; Classifier classifier(config.cls_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, config.cpu_math_library_num_threads, config.use_mkldnn, config.use_zero_copy_run, config.resize_short_size, config.crop_size); - auto start = std::chrono::system_clock::now(); - classifier.Run(srcimg); - auto end = std::chrono::system_clock::now(); - auto duration = - std::chrono::duration_cast(end - start); - std::cout << "Cost " - << double(duration.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den - << " s" << std::endl; + double elapsed_time = 0.0; + int warmup_iter = img_files_list.size() > 5 ? 5 : 0; + for (int idx = 0; idx < img_files_list.size(); ++idx) { + std::string img_path = img_files_list[idx]; + cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); + cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB); + + auto start = std::chrono::system_clock::now(); + classifier.Run(srcimg); + auto end = std::chrono::system_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + double curr_time = double(duration.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den; + if (idx >= warmup_iter) { + elapsed_time += curr_time; + } + std::cout << "Current time cost: " << curr_time << " s, " + << "average time cost in all: " + << elapsed_time / (idx + 1 - warmup_iter) << " s." << std::endl; + } return 0; } diff --git a/tools/program.py b/tools/program.py index a55bacefd86fc505dd5395978f00dc12a895d018..1154f8e620249515176a9959d31c02fedfe289de 100644 --- a/tools/program.py +++ b/tools/program.py @@ -295,16 +295,11 @@ def run(dataloader, feeds = create_feeds(batch, use_mix) fetchs = create_fetchs(feeds, net, config, mode) if mode == 'train': - if config["use_data_parallel"]: - avg_loss = net.scale_loss(fetchs['loss']) - avg_loss.backward() - net.apply_collective_grads() - else: - avg_loss = fetchs['loss'] - avg_loss.backward() + avg_loss = fetchs['loss'] + avg_loss.backward() - optimizer.minimize(avg_loss) - net.clear_gradients() + optimizer.step() + optimizer.clear_grad() metric_list['lr'].update( optimizer._global_learning_rate().numpy()[0], batch_size) diff --git a/tools/train.py b/tools/train.py index 61612b4dd99408c47ead7718445570b8de7f68dc..db6235819c23482c31ba18614e98513357860f9f 100644 --- a/tools/train.py +++ b/tools/train.py @@ -63,13 +63,15 @@ def main(args): use_data_parallel = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) != 1 config["use_data_parallel"] = use_data_parallel + if config["use_data_parallel"]: + strategy = paddle.distributed.init_parallel_env() + net = program.create_model(config.ARCHITECTURE, config.classes_num) optimizer, lr_scheduler = program.create_optimizer( config, parameter_list=net.parameters()) if config["use_data_parallel"]: - strategy = paddle.distributed.init_parallel_env() net = paddle.DataParallel(net, strategy) # load model from checkpoint or pretrained model