// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle_api.h" // NOLINT #include #include #include #include #include #include #include #include #include "AutoLog/auto_log/lite_autolog.h" using namespace paddle::lite_api; // NOLINT using namespace std; struct RESULT { std::string class_name; int class_id; float score; }; std::vector PostProcess(const float *output_data, int output_size, const std::vector &word_labels, cv::Mat &output_image) { const int TOPK = 5; int max_indices[TOPK]; double max_scores[TOPK]; for (int i = 0; i < TOPK; i++) { max_indices[i] = 0; max_scores[i] = 0; } for (int i = 0; i < output_size; i++) { float score = output_data[i]; int index = i; for (int j = 0; j < TOPK; j++) { if (score > max_scores[j]) { index += max_indices[j]; max_indices[j] = index - max_indices[j]; index -= max_indices[j]; score += max_scores[j]; max_scores[j] = score - max_scores[j]; score -= max_scores[j]; } } } std::vector results(TOPK); for (int i = 0; i < results.size(); i++) { results[i].class_name = "Unknown"; if (max_indices[i] >= 0 && max_indices[i] < word_labels.size()) { results[i].class_name = word_labels[max_indices[i]]; } results[i].score = max_scores[i]; results[i].class_id = max_indices[i]; cv::putText(output_image, "Top" + std::to_string(i + 1) + "." + results[i].class_name + ":" + std::to_string(results[i].score), cv::Point2d(5, i * 18 + 20), cv::FONT_HERSHEY_PLAIN, 1, cv::Scalar(51, 255, 255)); } return results; } // fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up void NeonMeanScale(const float *din, float *dout, int size, const std::vector mean, const std::vector scale) { if (mean.size() != 3 || scale.size() != 3) { std::cerr << "[ERROR] mean or scale size must equal to 3\n"; exit(1); } float32x4_t vmean0 = vdupq_n_f32(mean[0]); float32x4_t vmean1 = vdupq_n_f32(mean[1]); float32x4_t vmean2 = vdupq_n_f32(mean[2]); float32x4_t vscale0 = vdupq_n_f32(scale[0]); float32x4_t vscale1 = vdupq_n_f32(scale[1]); float32x4_t vscale2 = vdupq_n_f32(scale[2]); float *dout_c0 = dout; float *dout_c1 = dout + size; float *dout_c2 = dout + size * 2; int i = 0; for (; i < size - 3; i += 4) { float32x4x3_t vin3 = vld3q_f32(din); float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0); float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1); float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2); float32x4_t vs0 = vmulq_f32(vsub0, vscale0); float32x4_t vs1 = vmulq_f32(vsub1, vscale1); float32x4_t vs2 = vmulq_f32(vsub2, vscale2); vst1q_f32(dout_c0, vs0); vst1q_f32(dout_c1, vs1); vst1q_f32(dout_c2, vs2); din += 12; dout_c0 += 4; dout_c1 += 4; dout_c2 += 4; } for (; i < size; i++) { *(dout_c0++) = (*(din++) - mean[0]) * scale[0]; *(dout_c1++) = (*(din++) - mean[1]) * scale[1]; *(dout_c2++) = (*(din++) - mean[2]) * scale[2]; } } cv::Mat ResizeImage(const cv::Mat &img, const int &resize_short_size) { int w = img.cols; int h = img.rows; cv::Mat resize_img; float ratio = 1.f; if (h < w) { ratio = float(resize_short_size) / float(h); } else { ratio = float(resize_short_size) / float(w); } int resize_h = round(float(h) * ratio); int resize_w = round(float(w) * ratio); cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); return resize_img; } cv::Mat CenterCropImg(const cv::Mat &img, const int &crop_size) { int resize_w = img.cols; int resize_h = img.rows; int w_start = int((resize_w - crop_size) / 2); int h_start = int((resize_h - crop_size) / 2); cv::Rect rect(w_start, h_start, crop_size, crop_size); cv::Mat crop_img = img(rect); return crop_img; } std::vector RunClasModel(std::shared_ptr predictor, const cv::Mat &img, const std::map &config, const std::vector &word_labels, double &cost_time, std::vector *time_info) { // Read img auto preprocess_start = std::chrono::steady_clock::now(); int resize_short_size = stoi(config.at("resize_short_size")); int crop_size = stoi(config.at("crop_size")); int visualize = stoi(config.at("visualize")); cv::Mat resize_image = ResizeImage(img, resize_short_size); cv::Mat crop_image = CenterCropImg(resize_image, crop_size); cv::Mat img_fp; double e = 1.0 / 255.0; crop_image.convertTo(img_fp, CV_32FC3, e); // Prepare input data from image std::unique_ptr input_tensor(std::move(predictor->GetInput(0))); input_tensor->Resize({1, 3, img_fp.rows, img_fp.cols}); auto *data0 = input_tensor->mutable_data(); std::vector mean = {0.485f, 0.456f, 0.406f}; std::vector scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; const float *dimg = reinterpret_cast(img_fp.data); NeonMeanScale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale); auto preprocess_end = std::chrono::steady_clock::now(); auto inference_start = std::chrono::system_clock::now(); // Run predictor predictor->Run(); // Get output and post process std::unique_ptr output_tensor( std::move(predictor->GetOutput(0))); auto *output_data = output_tensor->data(); auto inference_end = std::chrono::system_clock::now(); auto postprocess_start = std::chrono::system_clock::now(); auto duration = std::chrono::duration_cast(inference_end - inference_start); cost_time = double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den; int output_size = 1; for (auto dim : output_tensor->shape()) { output_size *= dim; } cv::Mat output_image; auto results = PostProcess(output_data, output_size, word_labels, output_image); auto postprocess_end = std::chrono::system_clock::now(); std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; time_info->push_back(double(preprocess_diff.count() * 1000)); std::chrono::duration inference_diff = inference_end - inference_start; time_info->push_back(double(inference_diff.count() * 1000)); std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; time_info->push_back(double(postprocess_diff.count() * 1000)); if (visualize) { std::string output_image_path = "./clas_result.png"; cv::imwrite(output_image_path, output_image); std::cout << "save output image into " << output_image_path << std::endl; } return results; } std::shared_ptr LoadModel(std::string model_file) { MobileConfig config; config.set_model_from_file(model_file); std::shared_ptr predictor = CreatePaddlePredictor(config); return predictor; } std::vector split(const std::string &str, const std::string &delim) { std::vector res; if ("" == str) return res; char *strs = new char[str.length() + 1]; std::strcpy(strs, str.c_str()); char *d = new char[delim.length() + 1]; std::strcpy(d, delim.c_str()); char *p = std::strtok(strs, d); while (p) { string s = p; res.push_back(s); p = std::strtok(NULL, d); } return res; } std::vector ReadDict(std::string path) { std::ifstream in(path); std::string filename; std::string line; std::vector m_vec; if (in) { while (getline(in, line)) { m_vec.push_back(line); } } else { std::cout << "no such file" << std::endl; } return m_vec; } std::map LoadConfigTxt(std::string config_path) { auto config = ReadDict(config_path); std::map dict; for (int i = 0; i < config.size(); i++) { std::vector res = split(config[i], " "); dict[res[0]] = res[1]; } return dict; } void PrintConfig(const std::map &config) { std::cout << "=======PaddleClas lite demo config======" << std::endl; for (auto iter = config.begin(); iter != config.end(); iter++) { std::cout << iter->first << " : " << iter->second << std::endl; } std::cout << "=======End of PaddleClas lite demo config======" << std::endl; } std::vector LoadLabels(const std::string &path) { std::ifstream file; std::vector labels; file.open(path); while (file) { std::string line; std::getline(file, line); std::string::size_type pos = line.find(" "); if (pos != std::string::npos) { line = line.substr(pos); } labels.push_back(line); } file.clear(); file.close(); return labels; } int main(int argc, char **argv) { if (argc < 3) { std::cerr << "[ERROR] usage: " << argv[0] << " config_path img_path\n"; exit(1); } std::string config_path = argv[1]; std::string img_path = argv[2]; // load config auto config = LoadConfigTxt(config_path); PrintConfig(config); double elapsed_time = 0.0; int warmup_iter = 10; bool enable_benchmark = bool(stoi(config.at("enable_benchmark"))); int total_cnt = enable_benchmark ? 1000 : 1; std::string clas_model_file = config.at("clas_model_file"); std::string label_path = config.at("label_path"); std::string crop_size = config.at("crop_size"); int num_threads = stoi(config.at("num_threads")); int batch_size = stoi(config.at("batch_size")); std::string precision = config.at("precision"); std::string runtime_device = config.at("runtime_device"); bool tipc_benchmark = bool(stoi(config.at("tipc_benchmark"))); // Load Labels std::vector word_labels = LoadLabels(label_path); auto clas_predictor = LoadModel(clas_model_file); for (int j = 0; j < total_cnt; ++j) { cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB); double run_time = 0; std::vector time_info; std::vector results = RunClasModel(clas_predictor, srcimg, config, word_labels, run_time, &time_info); std::cout << "===clas result for image: " << img_path << "===" << std::endl; for (int i = 0; i < results.size(); i++) { std::cout << "\t" << "Top-" << i + 1 << ", class_id: " << results[i].class_id << ", class_name: " << results[i].class_name << ", score: " << results[i].score << std::endl; } if (j >= warmup_iter) { elapsed_time += run_time; std::cout << "Current image path: " << img_path << std::endl; std::cout << "Current time cost: " << run_time << " s, " << "average time cost in all: " << elapsed_time / (j + 1 - warmup_iter) << " s." << std::endl; } else { std::cout << "Current time cost: " << run_time << " s." << std::endl; } if (tipc_benchmark) { AutoLogger autolog(clas_model_file, runtime_device, num_threads, batch_size, crop_size, precision, time_info, 1); std::cout << "=======================TIPC Lite Information=======================" << std::endl; autolog.report(); } } return 0; }