diff --git a/configs/keypoint/hrnet/hrnet_w32_256x192.yml b/configs/keypoint/hrnet/hrnet_w32_256x192.yml index 03318026f2b2a2971b0f9fa5a830fa1ec1aca273..37782b7483430a75dc3f54582ccab66dd8d4655b 100644 --- a/configs/keypoint/hrnet/hrnet_w32_256x192.yml +++ b/configs/keypoint/hrnet/hrnet_w32_256x192.yml @@ -139,3 +139,4 @@ TestReader: is_scale: true - Permute: {} batch_size: 1 + fuse_normalize: false #whether to fuse nomalize layer into model while export model diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt index 0bc0be9aa949dfb89f726555bac16066127502fb..27a8093bb7e56a2563ec92d20f2d7a5a02f2ade6 100644 --- a/deploy/cpp/CMakeLists.txt +++ b/deploy/cpp/CMakeLists.txt @@ -5,6 +5,7 @@ option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) option(WITH_TENSORRT "Compile demo with TensorRT." OFF) +option(WITH_KEYPOINT "Whether to Compile KeyPoint detector" ON) SET(PADDLE_DIR "" CACHE PATH "Location of libraries") SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference") @@ -20,6 +21,12 @@ include_directories("${CMAKE_SOURCE_DIR}/") include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include") link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib") +if (WITH_KEYPOINT) + set(SRCS src/main_keypoint.cc src/preprocess_op.cc src/object_detector.cc src/keypoint_detector.cc src/keypoint_postprocess.cc) +else () + set(SRCS src/main.cc src/preprocess_op.cc src/object_detector.cc) +endif() + macro(safe_set_static_flag) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE @@ -37,7 +44,7 @@ endif() if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "") message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir") endif() -message("PADDLE_DIR IS:"${PADDLE_DIR}) +message("PADDLE_DIR IS:" ${PADDLE_DIR}) if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "") message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv") @@ -217,7 +224,7 @@ if (NOT WIN32) endif() set(DEPS ${DEPS} ${OpenCV_LIBS}) -add_executable(main src/main.cc src/preprocess_op.cc src/object_detector.cc) +add_executable(main ${SRCS}) ADD_DEPENDENCIES(main ext-yaml-cpp) message("DEPS:" $DEPS) target_link_libraries(main ${DEPS}) diff --git a/deploy/cpp/include/keypoint_detector.h b/deploy/cpp/include/keypoint_detector.h new file mode 100644 index 0000000000000000000000000000000000000000..84924276e5abe9ef6c8505efaeac7dd5d9cf71b5 --- /dev/null +++ b/deploy/cpp/include/keypoint_detector.h @@ -0,0 +1,132 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "paddle_inference_api.h" // NOLINT + +#include "include/config_parser.h" +#include "include/keypoint_postprocess.h" +#include "include/preprocess_op.h" + +using namespace paddle_infer; + +namespace PaddleDetection { +// Object KeyPoint Result +struct KeyPointResult { + // Keypoints: shape(N x 3); N: number of Joints; 3: x,y,conf + std::vector keypoints; + int num_joints = -1; +}; + +// Visualiztion KeyPoint Result +cv::Mat VisualizeKptsResult(const cv::Mat& img, + const std::vector& results, + const std::vector& colormap); + +class KeyPointDetector { + public: + explicit KeyPointDetector(const std::string& model_dir, + const std::string& device = "CPU", + bool use_mkldnn = false, + int cpu_threads = 1, + const std::string& run_mode = "fluid", + const int batch_size = 1, + const int gpu_id = 0, + const int trt_min_shape = 1, + const int trt_max_shape = 1280, + const int trt_opt_shape = 640, + bool trt_calib_mode = false, + bool use_dark = true) { + this->device_ = device; + this->gpu_id_ = gpu_id; + this->cpu_math_library_num_threads_ = cpu_threads; + this->use_mkldnn_ = use_mkldnn; + this->use_dark = use_dark; + + this->trt_min_shape_ = trt_min_shape; + this->trt_max_shape_ = trt_max_shape; + this->trt_opt_shape_ = trt_opt_shape; + this->trt_calib_mode_ = trt_calib_mode; + config_.load_config(model_dir); + this->use_dynamic_shape_ = config_.use_dynamic_shape_; + this->min_subgraph_size_ = config_.min_subgraph_size_; + threshold_ = config_.draw_threshold_; + preprocessor_.Init(config_.preprocess_info_); + LoadModel(model_dir, batch_size, run_mode); + } + + // Load Paddle inference model + void LoadModel(const std::string& model_dir, + const int batch_size = 1, + const std::string& run_mode = "fluid"); + + // Run predictor + void Predict(const std::vector imgs, + std::vector>& center, + std::vector>& scale, + const double threshold = 0.5, + const int warmup = 0, + const int repeats = 1, + std::vector* result = nullptr, + std::vector* times = nullptr); + + // Get Model Label list + const std::vector& GetLabelList() const { + return config_.label_list_; + } + + private: + std::string device_ = "CPU"; + int gpu_id_ = 0; + int cpu_math_library_num_threads_ = 1; + bool use_dark = true; + bool use_mkldnn_ = false; + int min_subgraph_size_ = 3; + bool use_dynamic_shape_ = false; + int trt_min_shape_ = 1; + int trt_max_shape_ = 1280; + int trt_opt_shape_ = 640; + bool trt_calib_mode_ = false; + // Preprocess image and copy data to input buffer + void Preprocess(const cv::Mat& image_mat); + // Postprocess result + void Postprocess(std::vector& output, + std::vector output_shape, + std::vector& idxout, + std::vector idx_shape, + std::vector* result, + std::vector>& center, + std::vector>& scale); + + std::shared_ptr predictor_; + Preprocessor preprocessor_; + ImageBlob inputs_; + std::vector output_data_; + std::vector idx_data_; + float threshold_; + ConfigPaser config_; +}; + +} // namespace PaddleDetection diff --git a/deploy/cpp/include/keypoint_postprocess.h b/deploy/cpp/include/keypoint_postprocess.h new file mode 100644 index 0000000000000000000000000000000000000000..4239cdf7369bdb76fa3f715c755d601b40285c5b --- /dev/null +++ b/deploy/cpp/include/keypoint_postprocess.h @@ -0,0 +1,63 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +std::vector get_3rd_point(std::vector& a, std::vector& b); + +std::vector get_dir(float src_point_x, float src_point_y, float rot_rad); + +void affine_tranform( + float pt_x, float pt_y, cv::Mat& trans, std::vector& preds, int p); + +cv::Mat get_affine_transform(std::vector& center, + std::vector& scale, + float rot, + std::vector& output_size, + int inv); + +void transform_preds(std::vector& coords, + std::vector& center, + std::vector& scale, + std::vector& output_size, + std::vector& dim, + std::vector& target_coords); + +void box_to_center_scale(std::vector& box, + int width, + int height, + std::vector& center, + std::vector& scale); + +void get_max_preds(float* heatmap, + std::vector& dim, + std::vector& preds, + float* maxvals, + int batchid, + int joint_idx); + +void get_final_preds(std::vector& heatmap, + std::vector& dim, + std::vector& idxout, + std::vector& idxdim, + std::vector& center, + std::vector scale, + std::vector& preds, + int batchid, + bool DARK = true); diff --git a/deploy/cpp/include/preprocess_op.h b/deploy/cpp/include/preprocess_op.h index aad528d675be87be541e16ca20ae54f68b6d75cf..f34bab45ff61a7d1a4f33b03d0a01012d39505be 100644 --- a/deploy/cpp/include/preprocess_op.h +++ b/deploy/cpp/include/preprocess_op.h @@ -116,6 +116,21 @@ class PadStride : public PreprocessOp { int stride_; }; +class TopDownEvalAffine : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + trainsize_ = item["trainsize"].as>(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + int interp_ = 1; + std::vector trainsize_; +}; + +void CropImg(cv::Mat &img, cv::Mat &crop_img, std::vector &area, std::vector ¢er, std::vector &scale, float expandratio=0.15); + class Preprocessor { public: void Init(const YAML::Node& config_node) { @@ -139,6 +154,8 @@ class Preprocessor { } else if (name == "PadStride") { // use PadStride instead of PadBatch return std::make_shared(); + } else if (name == "TopDownEvalAffine") { + return std::make_shared(); } std::cerr << "can not find function of OP: " << name << " and return: nullptr" << std::endl; return nullptr; diff --git a/deploy/cpp/src/keypoint_detector.cc b/deploy/cpp/src/keypoint_detector.cc new file mode 100644 index 0000000000000000000000000000000000000000..788855e2e954924ad9d1359f94eb34bed47299cb --- /dev/null +++ b/deploy/cpp/src/keypoint_detector.cc @@ -0,0 +1,313 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +// for setprecision +#include +#include +#include "include/keypoint_detector.h" + +using namespace paddle_infer; + +namespace PaddleDetection { + +// Load Model and create model predictor +void KeyPointDetector::LoadModel(const std::string& model_dir, + const int batch_size, + const std::string& run_mode) { + paddle_infer::Config config; + std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; + std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; + config.SetModel(prog_file, params_file); + if (this->device_ == "GPU") { + config.EnableUseGpu(200, this->gpu_id_); + config.SwitchIrOptim(true); + // use tensorrt + if (run_mode != "fluid") { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (run_mode == "trt_fp32") { + precision = paddle_infer::Config::Precision::kFloat32; + } else if (run_mode == "trt_fp16") { + precision = paddle_infer::Config::Precision::kHalf; + } else if (run_mode == "trt_int8") { + precision = paddle_infer::Config::Precision::kInt8; + } else { + printf( + "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); + } + // set tensorrt + config.EnableTensorRtEngine(1 << 30, + batch_size, + this->min_subgraph_size_, + precision, + false, + this->trt_calib_mode_); + + // set use dynamic shape + if (this->use_dynamic_shape_) { + // set DynamicShsape for image tensor + const std::vector min_input_shape = { + 1, 3, this->trt_min_shape_, this->trt_min_shape_}; + const std::vector max_input_shape = { + 1, 3, this->trt_max_shape_, this->trt_max_shape_}; + const std::vector opt_input_shape = { + 1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; + const std::map> map_min_input_shape = { + {"image", min_input_shape}}; + const std::map> map_max_input_shape = { + {"image", max_input_shape}}; + const std::map> map_opt_input_shape = { + {"image", opt_input_shape}}; + + config.SetTRTDynamicShapeInfo( + map_min_input_shape, map_max_input_shape, map_opt_input_shape); + std::cout << "TensorRT dynamic shape enabled" << std::endl; + } + } + + } else if (this->device_ == "XPU") { + config.EnableXpu(10 * 1024 * 1024); + } else { + config.DisableGpu(); + if (this->use_mkldnn_) { + config.EnableMKLDNN(); + // cache 10 different shapes for mkldnn to avoid memory leak + config.SetMkldnnCacheCapacity(10); + } + config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); + } + config.SwitchUseFeedFetchOps(false); + config.SwitchIrOptim(true); + config.DisableGlogInfo(); + // Memory optimization + config.EnableMemoryOptim(); + predictor_ = std::move(CreatePredictor(config)); +} + +// Visualiztion MaskDetector results +cv::Mat VisualizeKptsResult(const cv::Mat& img, + const std::vector& results, + const std::vector& colormap) { + const int edge[][2] = {{0, 1}, + {0, 2}, + {1, 3}, + {2, 4}, + {3, 5}, + {4, 6}, + {5, 7}, + {6, 8}, + {7, 9}, + {8, 10}, + {5, 11}, + {6, 12}, + {11, 13}, + {12, 14}, + {13, 15}, + {14, 16}, + {11, 12}}; + cv::Mat vis_img = img.clone(); + for (int batchid = 0; batchid < results.size(); batchid++) { + for (int i = 0; i < results[batchid].num_joints; i++) { + if (results[batchid].keypoints[i * 3] > 0.5) { + int x_coord = int(results[batchid].keypoints[i * 3 + 1]); + int y_coord = int(results[batchid].keypoints[i * 3 + 2]); + cv::circle(vis_img, + cv::Point2d(x_coord, y_coord), + 1, + cv::Scalar(0, 0, 255), + 2); + } + } + for (int i = 0; i < results[batchid].num_joints; i++) { + int x_start = int(results[batchid].keypoints[edge[i][0] * 3 + 1]); + int y_start = int(results[batchid].keypoints[edge[i][0] * 3 + 2]); + int x_end = int(results[batchid].keypoints[edge[i][1] * 3 + 1]); + int y_end = int(results[batchid].keypoints[edge[i][1] * 3 + 2]); + cv::line(vis_img, + cv::Point2d(x_start, y_start), + cv::Point2d(x_end, y_end), + colormap[i], + 1); + } + } + return vis_img; +} + +void KeyPointDetector::Preprocess(const cv::Mat& ori_im) { + // Clone the image : keep the original mat for postprocess + cv::Mat im = ori_im.clone(); + cv::cvtColor(im, im, cv::COLOR_BGR2RGB); + preprocessor_.Run(&im, &inputs_); +} + +void KeyPointDetector::Postprocess(std::vector& output, + std::vector output_shape, + std::vector& idxout, + std::vector idx_shape, + std::vector* result, + std::vector>& center_bs, + std::vector>& scale_bs) { + std::vector preds(output_shape[1] * 3, 0); + + for (int batchid = 0; batchid < output_shape[0]; batchid++) { + get_final_preds(output, + output_shape, + idxout, + idx_shape, + center_bs[batchid], + scale_bs[batchid], + preds, + batchid, + this->use_dark); + KeyPointResult result_item; + result_item.num_joints = output_shape[1]; + result_item.keypoints.clear(); + for (int i = 0; i < output_shape[1]; i++) { + result_item.keypoints.emplace_back(preds[i * 3]); + result_item.keypoints.emplace_back(preds[i * 3 + 1]); + result_item.keypoints.emplace_back(preds[i * 3 + 2]); + } + result->push_back(result_item); + } +} + +void KeyPointDetector::Predict(const std::vector imgs, + std::vector>& center_bs, + std::vector>& scale_bs, + const double threshold, + const int warmup, + const int repeats, + std::vector* result, + std::vector* times) { + auto preprocess_start = std::chrono::steady_clock::now(); + int batch_size = imgs.size(); + + // in_data_batch + std::vector in_data_all; + std::vector im_shape_all(batch_size * 2); + std::vector scale_factor_all(batch_size * 2); + + // Preprocess image + for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) { + cv::Mat im = imgs.at(bs_idx); + Preprocess(im); + im_shape_all[bs_idx * 2] = inputs_.im_shape_[0]; + im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1]; + + scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0]; + scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; + + // TODO: reduce cost time + in_data_all.insert( + in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + } + + // Prepare input tensor + + auto input_names = predictor_->GetInputNames(); + for (const auto& tensor_name : input_names) { + auto in_tensor = predictor_->GetInputHandle(tensor_name); + if (tensor_name == "image") { + int rh = inputs_.in_net_shape_[0]; + int rw = inputs_.in_net_shape_[1]; + in_tensor->Reshape({batch_size, 3, rh, rw}); + in_tensor->CopyFromCpu(in_data_all.data()); + } else if (tensor_name == "im_shape") { + in_tensor->Reshape({batch_size, 2}); + in_tensor->CopyFromCpu(im_shape_all.data()); + } else if (tensor_name == "scale_factor") { + in_tensor->Reshape({batch_size, 2}); + in_tensor->CopyFromCpu(scale_factor_all.data()); + } + } + + auto preprocess_end = std::chrono::steady_clock::now(); + std::vector output_shape, idx_shape; + // Run predictor + // warmup + for (int i = 0; i < warmup; i++) { + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + auto out_tensor = predictor_->GetOutputHandle(output_names[0]); + output_shape = out_tensor->shape(); + // Calculate output length + int output_size = 1; + for (int j = 0; j < output_shape.size(); ++j) { + output_size *= output_shape[j]; + } + output_data_.resize(output_size); + out_tensor->CopyToCpu(output_data_.data()); + + auto idx_tensor = predictor_->GetOutputHandle(output_names[1]); + idx_shape = idx_tensor->shape(); + // Calculate output length + output_size = 1; + for (int j = 0; j < idx_shape.size(); ++j) { + output_size *= idx_shape[j]; + } + idx_data_.resize(output_size); + idx_tensor->CopyToCpu(idx_data_.data()); + } + + auto inference_start = std::chrono::steady_clock::now(); + for (int i = 0; i < repeats; i++) { + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + auto out_tensor = predictor_->GetOutputHandle(output_names[0]); + output_shape = out_tensor->shape(); + // Calculate output length + int output_size = 1; + for (int j = 0; j < output_shape.size(); ++j) { + output_size *= output_shape[j]; + } + if (output_size < 6) { + std::cerr << "[WARNING] No object detected." << std::endl; + } + output_data_.resize(output_size); + out_tensor->CopyToCpu(output_data_.data()); + + auto idx_tensor = predictor_->GetOutputHandle(output_names[1]); + idx_shape = idx_tensor->shape(); + // Calculate output length + output_size = 1; + for (int j = 0; j < idx_shape.size(); ++j) { + output_size *= idx_shape[j]; + } + idx_data_.resize(output_size); + idx_tensor->CopyToCpu(idx_data_.data()); + } + auto inference_end = std::chrono::steady_clock::now(); + auto postprocess_start = std::chrono::steady_clock::now(); + // Postprocessing result + Postprocess(output_data_, + output_shape, + idx_data_, + idx_shape, + result, + center_bs, + scale_bs); + auto postprocess_end = std::chrono::steady_clock::now(); + + std::chrono::duration preprocess_diff = + preprocess_end - preprocess_start; + times->push_back(double(preprocess_diff.count() * 1000)); + std::chrono::duration inference_diff = inference_end - inference_start; + times->push_back(double(inference_diff.count() / repeats * 1000)); + std::chrono::duration postprocess_diff = + postprocess_end - postprocess_start; + times->push_back(double(postprocess_diff.count() * 1000)); +} + +} // namespace PaddleDetection diff --git a/deploy/cpp/src/keypoint_postprocess.cc b/deploy/cpp/src/keypoint_postprocess.cc new file mode 100644 index 0000000000000000000000000000000000000000..7a14684efbef125d4b2535c8a6180b59ad9dc34d --- /dev/null +++ b/deploy/cpp/src/keypoint_postprocess.cc @@ -0,0 +1,214 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "include/keypoint_postprocess.h" +#define PI 3.1415926535 +#define HALF_CIRCLE_DEGREE 180 + +cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) { + cv::Point2f direct{a.x - b.x, a.y - b.y}; + return cv::Point2f(a.x - direct.y, a.y + direct.x); +} + +std::vector get_dir(float src_point_x, + float src_point_y, + float rot_rad) { + float sn = sin(rot_rad); + float cs = cos(rot_rad); + std::vector src_result{0.0, 0.0}; + src_result[0] = src_point_x * cs - src_point_y * sn; + src_result[1] = src_point_x * sn + src_point_y * cs; + return src_result; +} + +void affine_tranform( + float pt_x, float pt_y, cv::Mat& trans, std::vector& preds, int p) { + double new1[3] = {pt_x, pt_y, 1.0}; + cv::Mat new_pt(3, 1, trans.type(), new1); + cv::Mat w = trans * new_pt; + preds[p * 3 + 1] = static_cast(w.at(0, 0)); + preds[p * 3 + 2] = static_cast(w.at(1, 0)); +} + +void get_affine_transform(std::vector& center, + std::vector& scale, + float rot, + std::vector& output_size, + cv::Mat& trans, + int inv) { + float src_w = scale[0]; + float dst_w = static_cast(output_size[0]); + float dst_h = static_cast(output_size[1]); + float rot_rad = rot * PI / HALF_CIRCLE_DEGREE; + std::vector src_dir = get_dir(-0.5 * src_w, 0, rot_rad); + std::vector dst_dir{-0.5 * dst_w, 0.0}; + cv::Point2f srcPoint2f[3], dstPoint2f[3]; + srcPoint2f[0] = cv::Point2f(center[0], center[1]); + srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]); + srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]); + + dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5); + dstPoint2f[1] = + cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]); + dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]); + if (inv == 0) { + trans = cv::getAffineTransform(srcPoint2f, dstPoint2f); + } else { + trans = cv::getAffineTransform(dstPoint2f, srcPoint2f); + } +} + +void transform_preds(std::vector& coords, + std::vector& center, + std::vector& scale, + std::vector& output_size, + std::vector& dim, + std::vector& target_coords) { + cv::Mat trans(2, 3, CV_64FC1); + get_affine_transform(center, scale, 0, output_size, trans, 1); + for (int p = 0; p < dim[1]; ++p) { + affine_tranform(coords[p * 2], coords[p * 2 + 1], trans, target_coords, p); + } +} + +// only for batchsize == 1 +void get_max_preds(float* heatmap, + std::vector& dim, + std::vector& preds, + float* maxvals, + int batchid, + int joint_idx) { + int num_joints = dim[1]; + int width = dim[3]; + std::vector idx; + idx.resize(num_joints * 2); + + for (int j = 0; j < dim[1]; j++) { + float* index = &( + heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]); + float* end = index + dim[2] * dim[3]; + float* max_dis = std::max_element(index, end); + auto max_id = std::distance(index, max_dis); + maxvals[j] = *max_dis; + if (*max_dis > 0) { + preds[j * 2] = static_cast(max_id % width); + preds[j * 2 + 1] = static_cast(max_id / width); + } + } +} + +void dark_parse(std::vector& heatmap, + std::vector& dim, + std::vector& coords, + int px, + int py, + int index, + int ch){ + /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + 1) offset = - hassian.inv() * derivative + 2) dx = (heatmap[x+1] - heatmap[x-1])/2. + 3) dxx = (dx[x+1] - dx[x-1])/2. + 4) derivative = Mat([dx, dy]) + 5) hassian = Mat([[dxx, dxy], [dxy, dyy]]) + */ + std::vector::const_iterator first1 = heatmap.begin() + index; + std::vector::const_iterator last1 = heatmap.begin() + index + dim[2]*dim[3]; + std::vector heatmap_ch(first1, last1); + cv::Mat heatmap_mat{heatmap_ch}; + heatmap_mat.resize(dim[2],dim[3]); + cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3,3), 0, 0); + heatmap_ch.assign(heatmap_mat.datastart, heatmap_mat.dataend); + + float epsilon = 1e-10; + //sample heatmap to get values in around target location + float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon)); + float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon)); + float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon)); + + float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon)); + float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon)); + float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon)); + float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon)); + float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon)); + float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon)); + float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon)); + float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon)); + float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon)); + float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon)); + + //compute dx/dy and dxx/dyy with sampled values + float dx = 0.5 * (xr - xl); + float dy = 0.5 * (yu - yd); + float dxx = 0.25 * (xr2 - 2*xy + xl2); + float dxy = 0.25 * (xryu - xryd - xlyu + xlyd); + float dyy = 0.25 * (yu2 - 2*xy + yd2); + + //finally get offset by derivative and hassian, which combined by dx/dy and dxx/dyy + if(dxx * dyy - dxy*dxy != 0){ + float M[2][2] = {dxx, dxy, dxy, dyy}; + float D[2] = {dx, dy}; + cv::Mat hassian(2,2,CV_32F,M); + cv::Mat derivative(2,1,CV_32F,D); + cv::Mat offset = - hassian.inv() * derivative; + coords[ch * 2] += offset.at(0,0); + coords[ch * 2 + 1] += offset.at(1,0); + } +} + +void get_final_preds(std::vector& heatmap, + std::vector& dim, + std::vector& idxout, + std::vector& idxdim, + std::vector& center, + std::vector scale, + std::vector& preds, + int batchid, + bool DARK) { + std::vector coords; + coords.resize(dim[1] * 2); + int heatmap_height = dim[2]; + int heatmap_width = dim[3]; + + for (int j = 0; j < dim[1]; ++j) { + int index = (batchid * dim[1] + j) * dim[2] * dim[3]; + + int idx = idxout[batchid * dim[1] + j]; + preds[j * 3] = heatmap[index + idx]; + coords[j * 2] = idx % heatmap_width; + coords[j * 2 + 1] = idx / heatmap_width; + + int px = int(coords[j * 2] + 0.5); + int py = int(coords[j * 2 + 1] + 0.5); + + if(DARK && px > 1 && px < heatmap_width - 2){ + dark_parse(heatmap, dim, coords, px, py, index, j); + } + else{ + if (px > 0 && px < heatmap_width - 1) { + float diff_x = heatmap[index + py * dim[3] + px + 1] - + heatmap[index + py * dim[3] + px - 1]; + coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25; + } + if (py > 0 && py < heatmap_height - 1) { + float diff_y = heatmap[index + (py + 1) * dim[3] + px] - + heatmap[index + (py - 1) * dim[3] + px]; + coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25; + } + } + } + + std::vector img_size{heatmap_width, heatmap_height}; + transform_preds(coords, center, scale, img_size, dim, preds); +} diff --git a/deploy/cpp/src/main_keypoint.cc b/deploy/cpp/src/main_keypoint.cc new file mode 100644 index 0000000000000000000000000000000000000000..9bd074159ebc668bf121f2f81d1e694e70e58f9e --- /dev/null +++ b/deploy/cpp/src/main_keypoint.cc @@ -0,0 +1,479 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#elif LINUX +#include +#include +#endif + +#include "include/object_detector.h" +#include "include/keypoint_detector.h" +#include "include/preprocess_op.h" +#include + + +DEFINE_string(model_dir, "", "Path of object detector inference model"); +DEFINE_string(model_dir_keypoint, "", "Path of keypoint detector inference model"); +DEFINE_string(image_file, "", "Path of input image"); +DEFINE_string(image_dir, "", "Dir of input image, `image_file` has a higher priority."); +DEFINE_int32(batch_size, 1, "batch_size of object detector"); +DEFINE_int32(batch_size_keypoint, 8, "batch_size of keypoint detector"); +DEFINE_string(video_file, "", "Path of input video, `video_file` or `camera_id` has a highest priority."); +DEFINE_int32(camera_id, -1, "Device id of camera to predict"); +DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); +DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); +DEFINE_double(threshold, 0.5, "Threshold of score."); +DEFINE_double(threshold_keypoint, 0.5, "Threshold of score."); +DEFINE_string(output_dir, "output", "Directory of output visualization files."); +DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); +DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); +DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); +DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); +DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); +DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); +DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); +DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); +DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); +DEFINE_bool(use_dark, true, "Whether use dark decode in keypoint postprocess"); + +void PrintBenchmarkLog(std::vector det_time, int img_num){ + LOG(INFO) << "----------------------- Config info -----------------------"; + LOG(INFO) << "runtime_device: " << FLAGS_device; + LOG(INFO) << "ir_optim: " << "True"; + LOG(INFO) << "enable_memory_optim: " << "True"; + int has_trt = FLAGS_run_mode.find("trt"); + if (has_trt >= 0) { + LOG(INFO) << "enable_tensorrt: " << "True"; + std::string precision = FLAGS_run_mode.substr(4, 8); + LOG(INFO) << "precision: " << precision; + } else { + LOG(INFO) << "enable_tensorrt: " << "False"; + LOG(INFO) << "precision: " << "fp32"; + } + LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); + LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; + LOG(INFO) << "----------------------- Data info -----------------------"; + LOG(INFO) << "batch_size: " << FLAGS_batch_size; + LOG(INFO) << "batch_size_keypoint: " << FLAGS_batch_size_keypoint; + LOG(INFO) << "input_shape: " << "dynamic shape"; + LOG(INFO) << "----------------------- Model info -----------------------"; + FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); + LOG(INFO) << "model_name: " << FLAGS_model_dir.substr(FLAGS_model_dir.find_last_of('/') + 1); + FLAGS_model_dir_keypoint.erase(FLAGS_model_dir_keypoint.find_last_not_of("/") + 1); + LOG(INFO) << "model_name: " << FLAGS_model_dir_keypoint.substr(FLAGS_model_dir_keypoint.find_last_of('/') + 1); + LOG(INFO) << "----------------------- Perf info ------------------------"; + LOG(INFO) << "Total number of predicted data: " << img_num + << " and total time spent(ms): " + << std::accumulate(det_time.begin(), det_time.end(), 0); + LOG(INFO) << "preproce_time(ms): " << det_time[0] / img_num + << ", inference_time(ms): " << det_time[1] / img_num + << ", postprocess_time(ms): " << det_time[2] / img_num; +} + +static std::string DirName(const std::string &filepath) { + auto pos = filepath.rfind(OS_PATH_SEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static bool PathExists(const std::string& path){ +#ifdef _WIN32 + struct _stat buffer; + return (_stat(path.c_str(), &buffer) == 0); +#else + struct stat buffer; + return (stat(path.c_str(), &buffer) == 0); +#endif // !_WIN32 +} + +static void MkDir(const std::string& path) { + if (PathExists(path)) return; + int ret = 0; +#ifdef _WIN32 + ret = _mkdir(path.c_str()); +#else + ret = mkdir(path.c_str(), 0755); +#endif // !_WIN32 + if (ret != 0) { + std::string path_error(path); + path_error += " mkdir failed!"; + throw std::runtime_error(path_error); + } +} + +static void MkDirs(const std::string& path) { + if (path.empty()) return; + if (PathExists(path)) return; + + MkDirs(DirName(path)); + MkDir(path); +} + +void PredictVideo(const std::string& video_path, + PaddleDetection::ObjectDetector* det, + PaddleDetection::KeyPointDetector* keypoint) { + // Open video + cv::VideoCapture capture; + if (FLAGS_camera_id != -1){ + capture.open(FLAGS_camera_id); + }else{ + capture.open(video_path.c_str()); + } + if (!capture.isOpened()) { + printf("can not open video : %s\n", video_path.c_str()); + return; + } + + // Get Video info : resolution, fps + int video_width = static_cast(capture.get(CV_CAP_PROP_FRAME_WIDTH)); + int video_height = static_cast(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); + int video_fps = static_cast(capture.get(CV_CAP_PROP_FPS)); + + // Create VideoWriter for output + cv::VideoWriter video_out; + std::string video_out_path = "output.mp4"; + video_out.open(video_out_path.c_str(), + 0x00000021, + video_fps, + cv::Size(video_width, video_height), + true); + if (!video_out.isOpened()) { + printf("create video writer failed!\n"); + return; + } + + std::vector result; + std::vector bbox_num; + std::vector det_times; + auto labels = det->GetLabelList(); + auto colormap = PaddleDetection::GenerateColorMap(labels.size()); + + + // Store keypoint results + std::vector result_kpts; + std::vector imgs_kpts; + std::vector> center_bs; + std::vector> scale_bs; + std::vector colormap_kpts = PaddleDetection::GenerateColorMap(20); + // Capture all frames and do inference + cv::Mat frame; + int frame_id = 0; + bool is_rbox = false; + while (capture.read(frame)) { + if (frame.empty()) { + break; + } + std::vector imgs; + imgs.push_back(frame); + det->Predict(imgs, 0.5, 0, 1, &result, &bbox_num, &det_times); + for (const auto& item : result) { + if (item.rect.size() > 6){ + is_rbox = true; + printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3], + item.rect[4], + item.rect[5], + item.rect[6], + item.rect[7]); + } + else{ + printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); + } + } + + if(keypoint) + { + int imsize = result.size(); + for (int i=0; i keypoint_times; + std::vector rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; + std::vector center; + std::vector scale; + if(item.class_id == 0) + { + PaddleDetection::CropImg(frame, crop_img, rect, center, scale); + center_bs.emplace_back(center); + scale_bs.emplace_back(scale); + imgs_kpts.emplace_back(crop_img); + } + + if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty())) + { + keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 0, 1, &result_kpts, &keypoint_times); + imgs_kpts.clear(); + center_bs.clear(); + scale_bs.clear(); + } + } + cv::Mat out_im = VisualizeKptsResult(frame, result_kpts, colormap_kpts); + video_out.write(out_im); + } + else{ + // Visualization result + cv::Mat out_im = PaddleDetection::VisualizeResult( + frame, result, labels, colormap, is_rbox); + video_out.write(out_im); + } + + frame_id += 1; + } + capture.release(); + video_out.release(); +} + +void PredictImage(const std::vector all_img_paths, + const int batch_size, + const double threshold, + const bool run_benchmark, + PaddleDetection::ObjectDetector* det, + PaddleDetection::KeyPointDetector* keypoint, + const std::string& output_dir = "output") { + std::vector det_t = {0, 0, 0}; + int steps = ceil(float(all_img_paths.size()) / batch_size); + int kpts_imgs = 0; + std::vector keypoint_t = {0, 0, 0}; + printf("total images = %d, batch_size = %d, total steps = %d\n", + all_img_paths.size(), batch_size, steps); + for (int idx = 0; idx < steps; idx++) { + std::vector batch_imgs; + int left_image_cnt = all_img_paths.size() - idx * batch_size; + if (left_image_cnt > batch_size) { + left_image_cnt = batch_size; + } + for (int bs = 0; bs < left_image_cnt; bs++) { + std::string image_file_path = all_img_paths.at(idx * batch_size+bs); + cv::Mat im = cv::imread(image_file_path, 1); + batch_imgs.insert(batch_imgs.end(), im); + } + + // Store all detected result + std::vector result; + std::vector bbox_num; + std::vector det_times; + + // Store keypoint results + std::vector result_kpts; + std::vector imgs_kpts; + std::vector> center_bs; + std::vector> scale_bs; + std::vector colormap_kpts = PaddleDetection::GenerateColorMap(20); + + bool is_rbox = false; + if (run_benchmark) { + det->Predict(batch_imgs, threshold, 10, 10, &result, &bbox_num, &det_times); + } else { + det->Predict(batch_imgs, 0.5, 10, 10, &result, &bbox_num, &det_times); + } + // get labels and colormap + auto labels = det->GetLabelList(); + auto colormap = PaddleDetection::GenerateColorMap(labels.size()); + int item_start_idx = 0; + for (int i = 0; i < left_image_cnt; i++) { + cv::Mat im = batch_imgs[i]; + std::vector im_result; + int detect_num = 0; + for (int j = 0; j < bbox_num[i]; j++) { + PaddleDetection::ObjectResult item = result[item_start_idx + j]; + if (item.confidence < threshold || item.class_id == -1) { + continue; + } + detect_num += 1; + im_result.push_back(item); + if (item.rect.size() > 6){ + is_rbox = true; + printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3], + item.rect[4], + item.rect[5], + item.rect[6], + item.rect[7]); + } + else{ + printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); + } + } + std::cout << all_img_paths.at(idx * batch_size + i) << " The number of detected box: " << detect_num << std::endl; + item_start_idx = item_start_idx + bbox_num[i]; + + std::vector compression_params; + compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); + compression_params.push_back(95); + std::string output_path(output_dir); + if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) { + output_path += OS_PATH_SEP; + } + std::string image_file_path = all_img_paths.at(idx * batch_size + i); + if(keypoint) + { + int imsize = im_result.size(); + for (int i=0; i keypoint_times; + std::vector rect = {item.rect[0], item.rect[1], item.rect[2], item.rect[3]}; + std::vector center; + std::vector scale; + if(item.class_id == 0) + { + PaddleDetection::CropImg(im, crop_img, rect, center, scale); + center_bs.emplace_back(center); + scale_bs.emplace_back(scale); + imgs_kpts.emplace_back(crop_img); + kpts_imgs += 1; + } + + if (imgs_kpts.size()==FLAGS_batch_size_keypoint || ((i==imsize-1)&&!imgs_kpts.empty())) + { + if (run_benchmark) { + keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 10, 10, &result_kpts, &keypoint_times); + } + else{ + keypoint->Predict(imgs_kpts, center_bs, scale_bs, 0.5, 0, 1, &result_kpts, &keypoint_times); + } + imgs_kpts.clear(); + center_bs.clear(); + scale_bs.clear(); + keypoint_t[0] += keypoint_times[0]; + keypoint_t[1] += keypoint_times[1]; + keypoint_t[2] += keypoint_times[2]; + } + } + std::string kpts_savepath = output_path + "keypoint_" + image_file_path.substr(image_file_path.find_last_of('/') + 1); + cv::Mat kpts_vis_img = VisualizeKptsResult(im, result_kpts, colormap_kpts); + cv::imwrite(kpts_savepath, kpts_vis_img, compression_params); + printf("Visualized output saved as %s\n", kpts_savepath.c_str()); + } + else{ + // Visualization result + cv::Mat vis_img = PaddleDetection::VisualizeResult( + im, im_result, labels, colormap, is_rbox); + std::string det_savepath = output_path + image_file_path.substr(image_file_path.find_last_of('/') + 1); + cv::imwrite(det_savepath, vis_img, compression_params); + printf("Visualized output saved as %s\n", det_savepath.c_str()); + } + } + + det_t[0] += det_times[0]; + det_t[1] += det_times[1]; + det_t[2] += det_times[2]; + } + PrintBenchmarkLog(det_t, all_img_paths.size()); + PrintBenchmarkLog(keypoint_t, kpts_imgs); +} + +int main(int argc, char** argv) { + // Parsing command-line + google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_model_dir.empty() + || (FLAGS_image_file.empty() && FLAGS_image_dir.empty() && FLAGS_video_file.empty())) { + std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ (--model_dir_keypoint=/PATH/TO/INFERENCE_MODEL/)" + << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; + return -1; + } + if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" + || FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + return -1; + } + transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { + std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; + return -1; + } + if (FLAGS_use_gpu) { + std::cout << "Deprecated, please use `--device` to set the device you want to run."; + return -1; + } + // Load model and create a object detector + PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, FLAGS_use_mkldnn, + FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, + FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode); + + PaddleDetection::KeyPointDetector* keypoint = nullptr; + if (!FLAGS_model_dir_keypoint.empty()) + { + keypoint = new PaddleDetection::KeyPointDetector(FLAGS_model_dir_keypoint, FLAGS_device, FLAGS_use_mkldnn, + FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, + FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode, FLAGS_use_dark); + } + // Do inference on input video or image + if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { + PredictVideo(FLAGS_video_file, &det, keypoint); + } else if (!FLAGS_image_file.empty() || !FLAGS_image_dir.empty()) { + if (!PathExists(FLAGS_output_dir)) { + MkDirs(FLAGS_output_dir); + } + std::vector all_img_paths; + std::vector cv_all_img_paths; + if (!FLAGS_image_file.empty()) { + all_img_paths.push_back(FLAGS_image_file); + if (FLAGS_batch_size > 1) { + std::cout << "batch_size should be 1, when set `image_file`." << std::endl; + return -1; + } + } else { + cv::glob(FLAGS_image_dir, cv_all_img_paths); + for (const auto & img_path : cv_all_img_paths) { + all_img_paths.push_back(img_path); + } + } + PredictImage(all_img_paths, FLAGS_batch_size, FLAGS_threshold, + FLAGS_run_benchmark, &det, keypoint, FLAGS_output_dir); + } + delete keypoint; + keypoint = nullptr; + return 0; +} diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc index b83cdd4dc8d88a087f4918c23136cdd33200dcfb..a5750d363a5953809fc8bdb620d715be20ae6448 100644 --- a/deploy/cpp/src/object_detector.cc +++ b/deploy/cpp/src/object_detector.cc @@ -256,9 +256,9 @@ void ObjectDetector::Predict(const std::vector imgs, // TODO: reduce cost time in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); } - + auto preprocess_end = std::chrono::steady_clock::now(); // Prepare input tensor - + auto input_names = predictor_->GetInputNames(); for (const auto& tensor_name : input_names) { auto in_tensor = predictor_->GetInputHandle(tensor_name); @@ -276,7 +276,6 @@ void ObjectDetector::Predict(const std::vector imgs, } } - auto preprocess_end = std::chrono::steady_clock::now(); // Run predictor // warmup for (int i = 0; i < warmup; i++) diff --git a/deploy/cpp/src/preprocess_op.cc b/deploy/cpp/src/preprocess_op.cc index 8c10ad98e79c7e7c44947762d5d022f0ab8dce94..070935d734a5ded81fddc550a66509a55d96c09d 100644 --- a/deploy/cpp/src/preprocess_op.cc +++ b/deploy/cpp/src/preprocess_op.cc @@ -14,6 +14,7 @@ #include #include +#include #include "include/preprocess_op.h" @@ -50,6 +51,7 @@ void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) { } void Permute::Run(cv::Mat* im, ImageBlob* data) { + (*im).convertTo(*im, CV_32FC3); int rh = im->rows; int rw = im->cols; int rc = im->channels(); @@ -131,10 +133,19 @@ void PadStride::Run(cv::Mat* im, ImageBlob* data) { }; } +void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) { + cv::resize( + *im, *im, cv::Size(trainsize_[0],trainsize_[1]), 0, 0, interp_); + // todo: Simd::ResizeBilinear(); + data->in_net_shape_ = { + static_cast(trainsize_[1]), + static_cast(trainsize_[0]), + }; +} // Preprocessor op running order const std::vector Preprocessor::RUN_ORDER = { - "InitInfo", "Resize", "NormalizeImage", "PadStride", "Permute" + "InitInfo", "TopDownEvalAffine", "Resize", "NormalizeImage", "PadStride", "Permute" }; void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { @@ -145,4 +156,37 @@ void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { } } +void CropImg(cv::Mat &img, cv::Mat &crop_img, std::vector &area, std::vector ¢er, std::vector &scale, float expandratio) { + int crop_x1 = std::max(0, area[0]); + int crop_y1 = std::max(0, area[1]); + int crop_x2 = std::min(img.cols -1, area[2]); + int crop_y2 = std::min(img.rows - 1, area[3]); + int center_x = (crop_x1 + crop_x2)/2.; + int center_y = (crop_y1 + crop_y2)/2.; + int half_h = (crop_y2 - crop_y1)/2.; + int half_w = (crop_x2 - crop_x1)/2.; + + //adjust h or w to keep image ratio, expand the shorter edge + if (half_h*3 > half_w*4){ + half_w = static_cast(half_h*0.75); + } + else{ + half_h = static_cast(half_w*4/3); + } + + crop_x1 = std::max(0, center_x - static_cast(half_w*(1+expandratio))); + crop_y1 = std::max(0, center_y - static_cast(half_h*(1+expandratio))); + crop_x2 = std::min(img.cols -1, static_cast(center_x + half_w*(1+expandratio))); + crop_y2 = std::min(img.rows - 1, static_cast(center_y + half_h*(1+expandratio))); + crop_img = img(cv::Range(crop_y1, crop_y2+1), cv::Range(crop_x1, crop_x2 + 1)); + + center.clear(); + center.emplace_back((crop_x1+crop_x2)/2); + center.emplace_back((crop_y1+crop_y2)/2); + + scale.clear(); + scale.emplace_back((crop_x2-crop_x1)); + scale.emplace_back((crop_y2-crop_y1)); +} + } // namespace PaddleDetection diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py index a46927f56924296b9b3564b3459fc703698a2e2e..35e22eec75de60f9073d096a6bf60304e7282cf5 100644 --- a/ppdet/engine/export_utils.py +++ b/ppdet/engine/export_utils.py @@ -59,6 +59,7 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): label_list = [str(cat) for cat in catid2name.values()] + fuse_normalize = reader_cfg.get('fuse_normalize', False) sample_transforms = reader_cfg['sample_transforms'] for st in sample_transforms[1:]: for key, value in st.items(): @@ -66,6 +67,8 @@ def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): if key == 'Resize': if int(image_shape[1]) != -1: value['target_size'] = image_shape[1:] + if fuse_normalize and key == 'NormalizeImage': + continue p.update(value) preprocess_list.append(p) batch_transforms = reader_cfg.get('batch_transforms', None) diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index a4a50bca5b6934deae6fa790de29c57fb4e1a21e..85490ae4b0dbf39e485c31550e8692a6656f0942 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -88,6 +88,9 @@ class Trainer(object): self.model = self.cfg.model self.is_loaded_weights = True + #normalize params for deploy + self.model.load_meanstd(cfg['TestReader']['sample_transforms']) + self.use_ema = ('use_ema' in cfg and cfg['use_ema']) if self.use_ema: ema_decay = self.cfg.get('ema_decay', 0.9998) @@ -552,7 +555,11 @@ class Trainer(object): if image_shape is None: image_shape = [3, -1, -1] - if hasattr(self.model, 'deploy'): self.model.deploy = True + if hasattr(self.model, 'deploy'): + self.model.deploy = True + if hasattr(self.model, 'fuse_norm'): + self.model.fuse_norm = self.cfg['TestReader'].get('fuse_normalize', + False) if hasattr(self.cfg, 'lite_deploy'): self.model.lite_deploy = self.cfg.lite_deploy diff --git a/ppdet/modeling/architectures/keypoint_hrnet.py b/ppdet/modeling/architectures/keypoint_hrnet.py index 64c269a7a5b6ad28f52d0cb5566aba1ead17cc3c..914bd043ce81c59616d427206b09478f299033d0 100644 --- a/ppdet/modeling/architectures/keypoint_hrnet.py +++ b/ppdet/modeling/architectures/keypoint_hrnet.py @@ -76,7 +76,12 @@ class TopDownHRNet(BaseArch): if self.training: return self.loss(hrnet_outputs, self.inputs) elif self.deploy: - return hrnet_outputs + outshape = hrnet_outputs.shape + max_idx = paddle.argmax( + hrnet_outputs.reshape( + (outshape[0], outshape[1], outshape[2] * outshape[3])), + axis=-1) + return hrnet_outputs, max_idx else: if self.flip: self.inputs['image'] = self.inputs['image'].flip([3]) @@ -199,6 +204,10 @@ class HRNetPostProcess(object): return coord def dark_postprocess(self, hm, coords, kernelsize): + '''DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + ''' + hm = self.gaussian_blur(hm, kernelsize) hm = np.maximum(hm, 1e-10) hm = np.log(hm) diff --git a/ppdet/modeling/architectures/meta_arch.py b/ppdet/modeling/architectures/meta_arch.py index 00085428d75bfc8d99533a22a08060354713e637..6ae925b7eb94d8b9cf7bf20d4f1c8714a38634f0 100644 --- a/ppdet/modeling/architectures/meta_arch.py +++ b/ppdet/modeling/architectures/meta_arch.py @@ -14,12 +14,40 @@ class BaseArch(nn.Layer): def __init__(self, data_format='NCHW'): super(BaseArch, self).__init__() self.data_format = data_format + self.inputs = {} + self.fuse_norm = False + + def load_meanstd(self, cfg_transform): + self.scale = 1. + self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape( + (1, 3, 1, 1)) + self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) + for item in cfg_transform: + if 'NormalizeImage' in item: + self.mean = paddle.to_tensor(item['NormalizeImage'][ + 'mean']).reshape((1, 3, 1, 1)) + self.std = paddle.to_tensor(item['NormalizeImage'][ + 'std']).reshape((1, 3, 1, 1)) + if item['NormalizeImage']['is_scale']: + self.scale = 1. / 255. + break + if self.data_format == 'NHWC': + self.mean = self.mean.reshape(1, 1, 1, 3) + self.std = self.std.reshape(1, 1, 1, 3) def forward(self, inputs): if self.data_format == 'NHWC': image = inputs['image'] inputs['image'] = paddle.transpose(image, [0, 2, 3, 1]) - self.inputs = inputs + + if self.fuse_norm: + image = inputs['image'] + self.inputs['image'] = (image * self.scale - self.mean) / self.std + self.inputs['im_shape'] = inputs['im_shape'] + self.inputs['scale_factor'] = inputs['scale_factor'] + else: + self.inputs = inputs + self.model_arch() if self.training: