diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h index b85cb09de49c42e4182103f0239aec2222cb2349..327058e4bd3251f41be82309f154b41eae11027c 100644 --- a/deploy/cpp/include/paddlex/paddlex.h +++ b/deploy/cpp/include/paddlex/paddlex.h @@ -232,5 +232,7 @@ class Model { std::vector outputs_; // a predictor which run the model predicting std::unique_ptr predictor_; + // input channel + int input_channel_; }; } // namespace PaddleX diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h index b99abf991de0503c71127c9713df6a234f530fc3..46d0768b1bc6bcb2f2d70b541dd29314653873ac 100644 --- a/deploy/cpp/include/paddlex/transforms.h +++ b/deploy/cpp/include/paddlex/transforms.h @@ -82,6 +82,16 @@ class Normalize : public Transform { virtual void Init(const YAML::Node& item) { mean_ = item["mean"].as>(); std_ = item["std"].as>(); + if (item["min_val"].IsDefined()) { + min_val_ = item["min_val"].as>(); + } else { + min_val_ = std::vector(mean_.size(), 0.); + } + if (item["max_val"].IsDefined()) { + max_val_ = item["max_val"].as>(); + } else { + max_val_ = std::vector(mean_.size(), 255.); + } } virtual bool Run(cv::Mat* im, ImageBlob* data); @@ -89,6 +99,8 @@ class Normalize : public Transform { private: std::vector mean_; std::vector std_; + std::vector min_val_; + std::vector max_val_; }; /* @@ -229,6 +241,25 @@ class Padding : public Transform { int height_ = 0; std::vector im_value_; }; + +/* + * @brief + * This class execute clip operation on image matrix + * */ +class Clip : public Transform { + public: + virtual void Init(const YAML::Node& item) { + min_val_ = item["min_val"].as>(); + max_val_ = item["max_val"].as>(); + } + + virtual bool Run(cv::Mat* im, ImageBlob* data); + + private: + std::vector min_val_; + std::vector max_val_; +}; + /* * @brief * This class is transform operations manager. It stores all neccessary diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index e1c023d8c9618ce87c7a1b178d8f380dc0f9569e..5d33ae0e60285f41a2c24cca0ce96f51b54478bf 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -134,6 +134,11 @@ bool Model::load_config(const std::string& yaml_input) { int index = labels.size(); labels[index] = item.as(); } + if (config["_init_params"]["input_channel"].IsDefined()) { + input_channel_ = config["_init_params"]["input_channel"].as(); + } else { + input_channel_ = 3; + } return true; } @@ -179,7 +184,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) { auto in_tensor = predictor_->GetInputTensor("image"); int h = inputs_.new_im_size_[0]; int w = inputs_.new_im_size_[1]; - in_tensor->Reshape({1, 3, h, w}); + in_tensor->Reshape({1, input_channel_, h, w}); in_tensor->copy_from_cpu(inputs_.im_data_.data()); predictor_->ZeroCopyRun(); // get result @@ -226,12 +231,12 @@ bool Model::predict(const std::vector& im_batch, auto in_tensor = predictor_->GetInputTensor("image"); int h = inputs_batch_[0].new_im_size_[0]; int w = inputs_batch_[0].new_im_size_[1]; - in_tensor->Reshape({batch_size, 3, h, w}); - std::vector inputs_data(batch_size * 3 * h * w); + in_tensor->Reshape({batch_size, input_channel_, h, w}); + std::vector inputs_data(batch_size * input_channel_ * h * w); for (int i = 0; i < batch_size; ++i) { std::copy(inputs_batch_[i].im_data_.begin(), inputs_batch_[i].im_data_.end(), - inputs_data.begin() + i * 3 * h * w); + inputs_data.begin() + i * input_channel_ * h * w); } in_tensor->copy_from_cpu(inputs_data.data()); // in_tensor->copy_from_cpu(inputs_.im_data_.data()); @@ -285,7 +290,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { int h = inputs_.new_im_size_[0]; int w = inputs_.new_im_size_[1]; auto im_tensor = predictor_->GetInputTensor("image"); - im_tensor->Reshape({1, 3, h, w}); + im_tensor->Reshape({1, input_channel_, h, w}); im_tensor->copy_from_cpu(inputs_.im_data_.data()); if (name == "YOLOv3" || name == "PPYOLO") { @@ -439,12 +444,12 @@ bool Model::predict(const std::vector& im_batch, int h = inputs_batch_[0].new_im_size_[0]; int w = inputs_batch_[0].new_im_size_[1]; auto im_tensor = predictor_->GetInputTensor("image"); - im_tensor->Reshape({batch_size, 3, h, w}); - std::vector inputs_data(batch_size * 3 * h * w); + im_tensor->Reshape({batch_size, input_channel_, h, w}); + std::vector inputs_data(batch_size * input_channel_ * h * w); for (int i = 0; i < batch_size; ++i) { std::copy(inputs_batch_[i].im_data_.begin(), inputs_batch_[i].im_data_.end(), - inputs_data.begin() + i * 3 * h * w); + inputs_data.begin() + i * input_channel_ * h * w); } im_tensor->copy_from_cpu(inputs_data.data()); if (name == "YOLOv3" || name == "PPYOLO") { @@ -584,7 +589,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { int h = inputs_.new_im_size_[0]; int w = inputs_.new_im_size_[1]; auto im_tensor = predictor_->GetInputTensor("image"); - im_tensor->Reshape({1, 3, h, w}); + im_tensor->Reshape({1, input_channel_, h, w}); im_tensor->copy_from_cpu(inputs_.im_data_.data()); // predict @@ -698,12 +703,12 @@ bool Model::predict(const std::vector& im_batch, int h = inputs_batch_[0].new_im_size_[0]; int w = inputs_batch_[0].new_im_size_[1]; auto im_tensor = predictor_->GetInputTensor("image"); - im_tensor->Reshape({batch_size, 3, h, w}); - std::vector inputs_data(batch_size * 3 * h * w); + im_tensor->Reshape({batch_size, input_channel_, h, w}); + std::vector inputs_data(batch_size * input_channel_ * h * w); for (int i = 0; i < batch_size; ++i) { std::copy(inputs_batch_[i].im_data_.begin(), inputs_batch_[i].im_data_.end(), - inputs_data.begin() + i * 3 * h * w); + inputs_data.begin() + i * input_channel_ * h * w); } im_tensor->copy_from_cpu(inputs_data.data()); // im_tensor->copy_from_cpu(inputs_.im_data_.data()); diff --git a/deploy/cpp/src/transforms.cpp b/deploy/cpp/src/transforms.cpp index dfbe6d9154e397cec4337dbfdc9a053c31ea151e..bf4fbb70a11c00b7a259824ed2544afef43e3631 100644 --- a/deploy/cpp/src/transforms.cpp +++ b/deploy/cpp/src/transforms.cpp @@ -20,7 +20,6 @@ #include #include - namespace PaddleX { std::map interpolations = {{"LINEAR", cv::INTER_LINEAR}, @@ -30,16 +29,20 @@ std::map interpolations = {{"LINEAR", cv::INTER_LINEAR}, {"LANCZOS4", cv::INTER_LANCZOS4}}; bool Normalize::Run(cv::Mat* im, ImageBlob* data) { - for (int h = 0; h < im->rows; h++) { - for (int w = 0; w < im->cols; w++) { - im->at(h, w)[0] = - (im->at(h, w)[0] / 255.0 - mean_[0]) / std_[0]; - im->at(h, w)[1] = - (im->at(h, w)[1] / 255.0 - mean_[1]) / std_[1]; - im->at(h, w)[2] = - (im->at(h, w)[2] / 255.0 - mean_[2]) / std_[2]; - } + std::vector range_val; + for (int c = 0; c < im->channels(); c++) { + range_val.push_back(max_val_[c] - min_val_[c]); } + + std::vector split_im; + cv::split(*im, split_im); + for (int c = 0; c < im->channels(); c++) { + cv::subtract(split_im[c], cv::Scalar(min_val_[c]), split_im[c]); + cv::divide(split_im[c], cv::Scalar(range_val[c]), split_im[c]); + cv::subtract(split_im[c], cv::Scalar(mean_[c]), split_im[c]); + cv::divide(split_im[c], cv::Scalar(std_[c]), split_im[c]); + } + cv::merge(split_im, *im); return true; } @@ -113,11 +116,22 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) { << ", but they should be greater than 0." << std::endl; return false; } - cv::Scalar value = cv::Scalar(im_value_[0], im_value_[1], im_value_[2]); - cv::copyMakeBorder( - *im, *im, 0, padding_h, 0, padding_w, cv::BORDER_CONSTANT, value); + std::vector padded_im_per_channel; + for (size_t i = 0; i < im->channels(); i++) { + const cv::Mat per_channel = cv::Mat(im->rows + padding_h, + im->cols + padding_w, + CV_32FC1, + cv::Scalar(im_value_[i])); + padded_im_per_channel.push_back(per_channel); + } + cv::Mat padded_im; + cv::merge(padded_im_per_channel, padded_im); + cv::Rect im_roi = cv::Rect(0, 0, im->cols, im->rows); + im->copyTo(padded_im(im_roi)); + *im = padded_im; data->new_im_size_[0] = im->rows; data->new_im_size_[1] = im->cols; + return true; } @@ -163,12 +177,26 @@ bool Resize::Run(cv::Mat* im, ImageBlob* data) { return true; } +bool Clip::Run(cv::Mat* im, ImageBlob* data) { + std::vector split_im; + cv::split(*im, split_im); + for (int c = 0; c < im->channels(); c++) { + cv::threshold(split_im[c], split_im[c], max_val_[c], max_val_[c], + cv::THRESH_TRUNC); + cv::subtract(cv::Scalar(0), split_im[c], split_im[c]); + cv::threshold(split_im[c], split_im[c], min_val_[c], min_val_[c], + cv::THRESH_TRUNC); + cv::divide(split_im[c], cv::Scalar(-1), split_im[c]); + } + cv::merge(split_im, *im); + return true; +} + void Transforms::Init(const YAML::Node& transforms_node, bool to_rgb) { transforms_.clear(); to_rgb_ = to_rgb; for (const auto& item : transforms_node) { std::string name = item.begin()->first.as(); - std::cout << "trans name: " << name << std::endl; std::shared_ptr transform = CreateTransform(name); transform->Init(item.begin()->second); transforms_.push_back(transform); @@ -189,6 +217,8 @@ std::shared_ptr Transforms::CreateTransform( return std::make_shared(); } else if (transform_name == "ResizeByLong") { return std::make_shared(); + } else if (transform_name == "Clip") { + return std::make_shared(); } else { std::cerr << "There's unexpected transform(name='" << transform_name << "')." << std::endl; @@ -201,7 +231,7 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) { if (to_rgb_) { cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB); } - (*im).convertTo(*im, CV_32FC3); + (*im).convertTo(*im, CV_32FC(im->channels())); data->ori_im_size_[0] = im->rows; data->ori_im_size_[1] = im->cols; data->new_im_size_[0] = im->rows; diff --git a/deploy/cpp/src/visualize.cpp b/deploy/cpp/src/visualize.cpp index 3443848c1c56ecea071c70491efecea7d35d06fe..d6efc7f9f5c19c436d9bc32a7a7330a0749b9dd5 100644 --- a/deploy/cpp/src/visualize.cpp +++ b/deploy/cpp/src/visualize.cpp @@ -89,7 +89,7 @@ cv::Mat Visualize(const cv::Mat& img, cv::Mat bin_mask(boxes[i].mask.shape[1], boxes[i].mask.shape[0], CV_32FC1, - boxes[i].mask.data.data()); + mask_data.data()); cv::Mat full_mask = cv::Mat::zeros(vis_img.size(), CV_8UC1); bin_mask.copyTo(full_mask(roi)); cv::Mat mask_ch[3]; diff --git a/docs/apis/analysis.md b/docs/apis/analysis.md index aef04aa0ac6b42de6b52dd958f600b7088ce4260..81b1c87cf5d78f104d2ce5e1fccae64a40c888ec 100644 --- a/docs/apis/analysis.md +++ b/docs/apis/analysis.md @@ -27,7 +27,7 @@ Seg分析器的分析接口,完成以下信息的分析统计: > * 图像各通道归一化后的均值和方差 > * 标注图中各类别的数量及比重 -[代码示例](https://github.com/PaddlePaddle/PaddleX/examples/multi-channel_remote_sensing/tools/analysis.py) +[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/examples/multi-channel_remote_sensing/tools/analysis.py) [统计信息示例](../../examples/multi-channel_remote_sensing/analysis.html#id2) @@ -43,6 +43,6 @@ Seg分析器用于计算图像截断后的均值和方差的接口。 > > * **clip_max_value** (list): 截断的上限,大于max_val的数值均设为max_val。 > > * **data_info_file** (str): 在analysis()接口中保存的分析结果文件(名为`train_information.pkl`)的路径。 -[代码示例](https://github.com/PaddlePaddle/PaddleX/examples/multi-channel_remote_sensing/tools/cal_clipped_mean_std.py) +[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/examples/multi-channel_remote_sensing/tools/cal_clipped_mean_std.py) [计算结果示例](../../examples/multi-channel_remote_sensing/analysis.html#id4) diff --git a/paddlex/cv/nets/detection/yolo_v3.py b/paddlex/cv/nets/detection/yolo_v3.py index aace152fab591a505db8027858a4d4c48959a017..f0cdba1ece7138859418222bee4c4388b30c81c4 100644 --- a/paddlex/cv/nets/detection/yolo_v3.py +++ b/paddlex/cv/nets/detection/yolo_v3.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np + import paddle from paddle import fluid from paddle.fluid.param_attr import ParamAttr