#include "seg_predictor.h" #include #undef min namespace PaddleSolution { int Predictor::init(const std::string& conf) { if (!_model_config.load_config(conf)) { LOG(FATAL) << "Fail to load config file: [" << conf << "]"; return -1; } _preprocessor = PaddleSolution::create_processor(conf); if (_preprocessor == nullptr) { LOG(FATAL) << "Failed to create_processor"; return -1; } _mask.resize(_model_config._resize[0] * _model_config._resize[1]); _scoremap.resize(_model_config._resize[0] * _model_config._resize[1]); bool use_gpu = _model_config._use_gpu; const auto& model_dir = _model_config._model_path; const auto& model_filename = _model_config._model_file_name; const auto& params_filename = _model_config._param_file_name; // load paddle model file if (_model_config._predictor_mode == "NATIVE") { paddle::NativeConfig config; auto prog_file = utils::path_join(model_dir, model_filename); auto param_file = utils::path_join(model_dir, params_filename); config.prog_file = prog_file; config.param_file = param_file; config.fraction_of_gpu_memory = 0; config.use_gpu = use_gpu; config.device = 0; _main_predictor = paddle::CreatePaddlePredictor(config); } else if (_model_config._predictor_mode == "ANALYSIS") { paddle::AnalysisConfig config; if (use_gpu) { config.EnableUseGpu(100, 0); } auto prog_file = utils::path_join(model_dir, model_filename); auto param_file = utils::path_join(model_dir, params_filename); config.SetModel(prog_file, param_file); config.SwitchUseFeedFetchOps(false); config.SwitchSpecifyInputNames(true); config.EnableMemoryOptim(); _main_predictor = paddle::CreatePaddlePredictor(config); } else { return -1; } return 0; } int Predictor::predict(const std::vector& imgs) { if (_model_config._predictor_mode == "NATIVE") { return native_predict(imgs); } else if (_model_config._predictor_mode == "ANALYSIS") { return analysis_predict(imgs); } return -1; } int Predictor::output_mask(const std::string& fname, float* p_out, int length, int* height, int* width) { int eval_width = _model_config._resize[0]; int eval_height = _model_config._resize[1]; int eval_num_class = _model_config._class_num; int blob_out_len = length; int seg_out_len = eval_height * eval_width * eval_num_class; if (blob_out_len != seg_out_len) { LOG(ERROR) << " [FATAL] unequal: input vs output [" << seg_out_len << "|" << blob_out_len << "]" << std::endl; return -1; } //post process _mask.clear(); _scoremap.clear(); std::vector out_shape{eval_num_class, eval_height, eval_width}; utils::argmax(p_out, out_shape, _mask, _scoremap); cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); mask_png.data = _mask.data(); std::string nname(fname); auto pos = fname.find("."); nname[pos] = '_'; std::string mask_save_name = nname + ".png"; cv::imwrite(mask_save_name, mask_png); cv::Mat scoremap_png = cv::Mat(eval_height, eval_width, CV_8UC1); scoremap_png.data = _scoremap.data(); std::string scoremap_save_name = nname + std::string("_scoremap.png"); cv::imwrite(scoremap_save_name, scoremap_png); std::cout << "save mask of [" << fname << "] done" << std::endl; if (height && width) { int recover_height = *height; int recover_width = *width; cv::Mat recover_png = cv::Mat(recover_height, recover_width, CV_8UC1); cv::resize(scoremap_png, recover_png, cv::Size(recover_width, recover_height), 0, 0, cv::INTER_CUBIC); std::string recover_name = nname + std::string("_recover.png"); cv::imwrite(recover_name, recover_png); } return 0; } int Predictor::native_predict(const std::vector& imgs) { if (imgs.size() == 0) { LOG(ERROR) << "No image found"; return -1; } int config_batch_size = _model_config._batch_size; int channels = _model_config._channels; int eval_width = _model_config._resize[0]; int eval_height = _model_config._resize[1]; std::size_t total_size = imgs.size(); int default_batch_size = std::min(config_batch_size, (int)total_size); int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0); int batch_buffer_size = default_batch_size * channels * eval_width * eval_height; auto& input_buffer = _buffer; auto& org_width = _org_width; auto& org_height = _org_height; auto& imgs_batch = _imgs_batch; input_buffer.resize(batch_buffer_size); org_width.resize(default_batch_size); org_height.resize(default_batch_size); for (int u = 0; u < batch; ++u) { int batch_size = default_batch_size; if (u == (batch - 1) && (total_size % default_batch_size)) { batch_size = total_size % default_batch_size; } int real_buffer_size = batch_size * channels * eval_width * eval_height; std::vector feeds; input_buffer.resize(real_buffer_size); org_height.resize(batch_size); org_width.resize(batch_size); for (int i = 0; i < batch_size; ++i) { org_width[i] = org_height[i] = 0; } imgs_batch.clear(); for (int i = 0; i < batch_size; ++i) { int idx = u * default_batch_size + i; imgs_batch.push_back(imgs[idx]); } if (!_preprocessor->batch_process(imgs_batch, input_buffer.data(), org_width.data(), org_height.data())) { return -1; } paddle::PaddleTensor im_tensor; im_tensor.name = "image"; im_tensor.shape = std::vector({ batch_size, channels, eval_height, eval_width }); im_tensor.data.Reset(input_buffer.data(), real_buffer_size * sizeof(float)); im_tensor.dtype = paddle::PaddleDType::FLOAT32; feeds.push_back(im_tensor); _outputs.clear(); auto t1 = std::chrono::high_resolution_clock::now(); if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { LOG(ERROR) << "Failed: NativePredictor->Run() return false at batch: " << u; continue; } auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); std::cout << "runtime = " << duration << std::endl; int out_num = 1; // print shape of first output tensor for debugging std::cout << "size of outputs[" << 0 << "]: ("; for (int j = 0; j < _outputs[0].shape.size(); ++j) { out_num *= _outputs[0].shape[j]; std::cout << _outputs[0].shape[j] << ","; } std::cout << ")" << std::endl; const size_t nums = _outputs.front().data.length() / sizeof(float); if (out_num % batch_size != 0 || out_num != nums) { LOG(ERROR) << "outputs data size mismatch with shape size."; return -1; } for (int i = 0; i < batch_size; ++i) { float* output_addr = (float*)(_outputs[0].data.data()) + i * (out_num / batch_size); output_mask(imgs_batch[i], output_addr, out_num / batch_size, &org_height[i], &org_width[i]); } } return 0; } int Predictor::analysis_predict(const std::vector& imgs) { if (imgs.size() == 0) { LOG(ERROR) << "No image found"; return -1; } int config_batch_size = _model_config._batch_size; int channels = _model_config._channels; int eval_width = _model_config._resize[0]; int eval_height = _model_config._resize[1]; auto total_size = imgs.size(); int default_batch_size = std::min(config_batch_size, (int)total_size); int batch = total_size / default_batch_size + ((total_size % default_batch_size) != 0); int batch_buffer_size = default_batch_size * channels * eval_width * eval_height; auto& input_buffer = _buffer; auto& org_width = _org_width; auto& org_height = _org_height; auto& imgs_batch = _imgs_batch; input_buffer.resize(batch_buffer_size); org_width.resize(default_batch_size); org_height.resize(default_batch_size); for (int u = 0; u < batch; ++u) { int batch_size = default_batch_size; if (u == (batch - 1) && (total_size % default_batch_size)) { batch_size = total_size % default_batch_size; } int real_buffer_size = batch_size * channels * eval_width * eval_height; std::vector feeds; input_buffer.resize(real_buffer_size); org_height.resize(batch_size); org_width.resize(batch_size); for (int i = 0; i < batch_size; ++i) { org_width[i] = org_height[i] = 0; } imgs_batch.clear(); for (int i = 0; i < batch_size; ++i) { int idx = u * default_batch_size + i; imgs_batch.push_back(imgs[idx]); } if (!_preprocessor->batch_process(imgs_batch, input_buffer.data(), org_width.data(), org_height.data())) { return -1; } auto im_tensor = _main_predictor->GetInputTensor("image"); im_tensor->Reshape({ batch_size, channels, eval_height, eval_width }); im_tensor->copy_from_cpu(input_buffer.data()); auto t1 = std::chrono::high_resolution_clock::now(); _main_predictor->ZeroCopyRun(); auto t2 = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(t2 - t1).count(); std::cout << "runtime = " << duration << std::endl; auto output_names = _main_predictor->GetOutputNames(); auto output_t = _main_predictor->GetOutputTensor(output_names[0]); std::vector out_data; std::vector output_shape = output_t->shape(); int out_num = 1; std::cout << "size of outputs[" << 0 << "]: ("; for (int j = 0; j < output_shape.size(); ++j) { out_num *= output_shape[j]; std::cout << output_shape[j] << ","; } std::cout << ")" << std::endl; out_data.resize(out_num); output_t->copy_to_cpu(out_data.data()); for (int i = 0; i < batch_size; ++i) { float* out_addr = out_data.data() + (out_num / batch_size) * i; output_mask(imgs_batch[i], out_addr, out_num / batch_size, &org_height[i], &org_width[i]); } } return 0; } }