From d35733193865a4e2b173aa5550cf553e5c769000 Mon Sep 17 00:00:00 2001
From: FlyingQianMM <245467267@qq.com>
Date: Wed, 16 Sep 2020 08:31:11 +0000
Subject: [PATCH] support mutil-channel transforms

---
 deploy/cpp/include/paddlex/paddlex.h    |  2 +
 deploy/cpp/include/paddlex/transforms.h |  4 +-
 deploy/cpp/src/paddlex.cpp              | 29 ++++++++------
 deploy/cpp/src/transforms.cpp           | 53 ++++++++++++++-----------
 4 files changed, 51 insertions(+), 37 deletions(-)
diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h
index b85cb09..e4d34ba 100644
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -232,5 +232,7 @@ class Model {
   std::vector<float> outputs_;
   // a predictor which run the model predicting
   std::unique_ptr<paddle::PaddlePredictor> predictor_;
+  // input channel
+  int input_channel;
 };
 }  // namespace PaddleX
diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h
index 8b26581..46d0768 100644
--- a/deploy/cpp/include/paddlex/transforms.h
+++ b/deploy/cpp/include/paddlex/transforms.h
@@ -85,12 +85,12 @@ class Normalize : public Transform {
     if (item["min_val"].IsDefined()) {
       min_val_ = item["min_val"].as<std::vector<float>>();
     } else {
-      min_val_ = std::vector<float>(0., mean_.size());
+      min_val_ = std::vector<float>(mean_.size(), 0.);
     }
     if (item["max_val"].IsDefined()) {
       max_val_ = item["max_val"].as<std::vector<float>>();
     } else {
-      max_val_ = std::vector<float>(255., mean_.size());
+      max_val_ = std::vector<float>(mean_.size(), 255.);
     }
   }
 
diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp
index e1c023d..a018ffd 100644
--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -134,6 +134,11 @@ bool Model::load_config(const std::string& yaml_input) {
     int index = labels.size();
     labels[index] = item.as<std::string>();
   }
+  if (config["_init_params"]["input_channel"].IsDefined()) {
+    input_channel = config["_init_params"]["input_channel"].as<int>();
+  } else {
+    input_channel = 3;
+  }
   return true;
 }
 
@@ -179,7 +184,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
   auto in_tensor = predictor_->GetInputTensor("image");
   int h = inputs_.new_im_size_[0];
   int w = inputs_.new_im_size_[1];
-  in_tensor->Reshape({1, 3, h, w});
+  in_tensor->Reshape({1, input_channel, h, w});
   in_tensor->copy_from_cpu(inputs_.im_data_.data());
   predictor_->ZeroCopyRun();
   // get result
@@ -226,12 +231,12 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
   auto in_tensor = predictor_->GetInputTensor("image");
   int h = inputs_batch_[0].new_im_size_[0];
   int w = inputs_batch_[0].new_im_size_[1];
-  in_tensor->Reshape({batch_size, 3, h, w});
-  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  in_tensor->Reshape({batch_size, input_channel, h, w});
+  std::vector<float> inputs_data(batch_size * input_channel * h * w);
   for (int i = 0; i < batch_size; ++i) {
     std::copy(inputs_batch_[i].im_data_.begin(),
               inputs_batch_[i].im_data_.end(),
-              inputs_data.begin() + i * 3 * h * w);
+              inputs_data.begin() + i * input_channel * h * w);
   }
   in_tensor->copy_from_cpu(inputs_data.data());
   // in_tensor->copy_from_cpu(inputs_.im_data_.data());
@@ -285,7 +290,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
   int h = inputs_.new_im_size_[0];
   int w = inputs_.new_im_size_[1];
   auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({1, 3, h, w});
+  im_tensor->Reshape({1, input_channel, h, w});
   im_tensor->copy_from_cpu(inputs_.im_data_.data());
 
   if (name == "YOLOv3" || name == "PPYOLO") {
@@ -439,12 +444,12 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
   int h = inputs_batch_[0].new_im_size_[0];
   int w = inputs_batch_[0].new_im_size_[1];
   auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({batch_size, 3, h, w});
-  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  im_tensor->Reshape({batch_size, input_channel, h, w});
+  std::vector<float> inputs_data(batch_size * input_channel * h * w);
   for (int i = 0; i < batch_size; ++i) {
     std::copy(inputs_batch_[i].im_data_.begin(),
               inputs_batch_[i].im_data_.end(),
-              inputs_data.begin() + i * 3 * h * w);
+              inputs_data.begin() + i * input_channel * h * w);
   }
   im_tensor->copy_from_cpu(inputs_data.data());
   if (name == "YOLOv3" || name == "PPYOLO") {
@@ -584,7 +589,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
   int h = inputs_.new_im_size_[0];
   int w = inputs_.new_im_size_[1];
   auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({1, 3, h, w});
+  im_tensor->Reshape({1, input_channel, h, w});
   im_tensor->copy_from_cpu(inputs_.im_data_.data());
 
   // predict
@@ -698,12 +703,12 @@ bool Model::predict(const std::vector<cv::Mat>& im_batch,
   int h = inputs_batch_[0].new_im_size_[0];
   int w = inputs_batch_[0].new_im_size_[1];
   auto im_tensor = predictor_->GetInputTensor("image");
-  im_tensor->Reshape({batch_size, 3, h, w});
-  std::vector<float> inputs_data(batch_size * 3 * h * w);
+  im_tensor->Reshape({batch_size, input_channel, h, w});
+  std::vector<float> inputs_data(batch_size * input_channel * h * w);
   for (int i = 0; i < batch_size; ++i) {
     std::copy(inputs_batch_[i].im_data_.begin(),
               inputs_batch_[i].im_data_.end(),
-              inputs_data.begin() + i * 3 * h * w);
+              inputs_data.begin() + i * input_channel * h * w);
   }
   im_tensor->copy_from_cpu(inputs_data.data());
   // im_tensor->copy_from_cpu(inputs_.im_data_.data());
diff --git a/deploy/cpp/src/transforms.cpp b/deploy/cpp/src/transforms.cpp
index 76dafd5..bf4fbb7 100644
--- a/deploy/cpp/src/transforms.cpp
+++ b/deploy/cpp/src/transforms.cpp
@@ -20,7 +20,6 @@
 #include <string>
 #include <vector>
 
-
 namespace PaddleX {
 
 std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
@@ -30,16 +29,20 @@ std::map<std::string, int> interpolations = {{"LINEAR", cv::INTER_LINEAR},
                                              {"LANCZOS4", cv::INTER_LANCZOS4}};
 
 bool Normalize::Run(cv::Mat* im, ImageBlob* data) {
+  std::vector<float> range_val;
   for (int c = 0; c < im->channels(); c++) {
-    float range_val = max_val_[c] - min_val_[c];
-    for (int h = 0; h < im->rows; h++) {
-      for (int w = 0; w < im->cols; w++) {
-        im->at<cv::Vec3f>(h, w)[c] =
-            ((im->at<cv::Vec3f>(h, w)[c] - min_val_[c]) / range_val -
-            mean_[c]) / std_[c];
-      }
-    }
+    range_val.push_back(max_val_[c] - min_val_[c]);
+  }
+
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    cv::subtract(split_im[c], cv::Scalar(min_val_[c]), split_im[c]);
+    cv::divide(split_im[c], cv::Scalar(range_val[c]), split_im[c]);
+    cv::subtract(split_im[c], cv::Scalar(mean_[c]), split_im[c]);
+    cv::divide(split_im[c], cv::Scalar(std_[c]), split_im[c]);
   }
+  cv::merge(split_im, *im);
   return true;
 }
 
@@ -115,7 +118,10 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
   }
   std::vector<cv::Mat> padded_im_per_channel;
   for (size_t i = 0; i < im->channels(); i++) {
-    const cv::Mat per_channel = cv::Mat(im->size(), CV_32FC1, im_value_[i]);
+    const cv::Mat per_channel = cv::Mat(im->rows + padding_h,
+                                        im->cols + padding_w,
+                                        CV_32FC1,
+                                        cv::Scalar(im_value_[i]));
     padded_im_per_channel.push_back(per_channel);
   }
   cv::Mat padded_im;
@@ -125,6 +131,7 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
   *im = padded_im;
   data->new_im_size_[0] = im->rows;
   data->new_im_size_[1] = im->cols;
+
   return true;
 }
 
@@ -171,18 +178,17 @@ bool Resize::Run(cv::Mat* im, ImageBlob* data) {
 }
 
 bool Clip::Run(cv::Mat* im, ImageBlob* data) {
-  for (int h = 0; h < im->rows; h++) {
-    for (int w = 0; w < im->cols; w++) {
-      for (int c = 0; c < im->channels(); c++) {
-        if (im->at<cv::Vec3f>(h, w)[c] < min_val_[c]) {
-          im->at<cv::Vec3f>(h, w)[c] = min_val_[c];
-        }
-        if (im->at<cv::Vec3f>(h, w)[c] > max_val_[c]) {
-          im->at<cv::Vec3f>(h, w)[c] = max_val_[c];
-        }
-      }
-    }
+  std::vector<cv::Mat> split_im;
+  cv::split(*im, split_im);
+  for (int c = 0; c < im->channels(); c++) {
+    cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c],
+                  cv::THRESH_TRUNC);
+    cv::subtract(cv::Scalar(0), split_im[c], split_im[c]);
+    cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c],
+                  cv::THRESH_TRUNC);
+    cv::divide(split_im[c], cv::Scalar(-1), split_im[c]);
   }
+  cv::merge(split_im, *im);
   return true;
 }
 
@@ -191,7 +197,6 @@ void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) {
   to_rgb_ = to_rgb;
   for (const auto& item : transforms_node) {
     std::string name = item.begin()->first.as<std::string>();
-    std::cout << "trans name: " << name << std::endl;
     std::shared_ptr<Transform> transform = CreateTransform(name);
     transform->Init(item.begin()->second);
     transforms_.push_back(transform);
@@ -212,6 +217,8 @@ std::shared_ptr<Transform> Transforms::CreateTransform(
     return std::make_shared<Padding>();
   } else if (transform_name == "ResizeByLong") {
     return std::make_shared<ResizeByLong>();
+  } else if (transform_name == "Clip") {
+    return std::make_shared<Clip>();
   } else {
     std::cerr << "There's unexpected transform(name='" << transform_name
               << "')." << std::endl;
@@ -224,7 +231,7 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
   if (to_rgb_) {
     cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB);
   }
-  (*im).convertTo(*im, CV_32FC3);
+  (*im).convertTo(*im, CV_32FC(im->channels()));
   data->ori_im_size_[0] = im->rows;
   data->ori_im_size_[1] = im->cols;
   data->new_im_size_[0] = im->rows;
-- 
GitLab