未验证 提交 d919f669 编写于 作者: S SunAhong1993 提交者: GitHub

Merge pull request #13 from PaddlePaddle/develop

00
......@@ -6,6 +6,7 @@
[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleX.svg)](https://github.com/PaddlePaddle/PaddleX/releases)
![python version](https://img.shields.io/badge/python-3.6+-orange.svg)
![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
![QQGroup](https://img.shields.io/badge/QQ_Group-1045148026-52B6EF?style=social&logo=tencent-qq&logoColor=000&logoWidth=20)
PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习全流程开发工具。具备**全流程打通****融合产业实践****易用易集成**三大特点。
......
......@@ -73,7 +73,11 @@ endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
endif()
include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
# zlib does not exist in 1.8.1
if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include")
include_directories("${PADDLE_DIR}/third_party/install/zlib/include")
endif()
include_directories("${PADDLE_DIR}/third_party/boost")
include_directories("${PADDLE_DIR}/third_party/eigen3")
......@@ -84,7 +88,10 @@ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
endif()
link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
link_directories("${PADDLE_DIR}/third_party/install/zlib/lib")
endif()
link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
......@@ -107,6 +114,14 @@ include_directories(${OpenCV_INCLUDE_DIRS})
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
message("OPENMP FOUND")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${OpenMP_C_FLAGS}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${OpenMP_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OpenMP_CXX_FLAGS}")
endif()
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
......@@ -186,8 +201,13 @@ if(WITH_STATIC_LIB)
set(DEPS
${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
if (NOT WIN32)
set(DEPS
${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
set(DEPS
${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
if (NOT WIN32)
......@@ -204,13 +224,16 @@ if (NOT WIN32)
else()
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt)
glog gflags_static libprotobuf xxhash libyaml-cppmt)
if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib")
set(DEPS ${DEPS} zlibstatic)
endif()
set(DEPS ${DEPS} libcmt shlwapi)
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
set(DEPS ${DEPS} snappy)
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
set(DEPS ${DEPS} snappystream)
endif()
endif(NOT WIN32)
......@@ -236,7 +259,9 @@ if(WITH_ENCRYPTION)
link_directories("${ENCRYPTION_DIR}/lib")
set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
message(FATAL_ERROR "Encryption Tool don't support WINDOWS")
include_directories("${ENCRYPTION_DIR}/include")
link_directories("${ENCRYPTION_DIR}/lib")
set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
endif()
......@@ -284,10 +309,23 @@ if (WIN32 AND WITH_MKL)
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
)
# for encryption
if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll")
add_custom_command(TARGET classifier POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
)
add_custom_command(TARGET detector POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
)
add_custom_command(TARGET segmenter POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll
)
endif()
endif()
file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h"
......
......@@ -22,9 +22,9 @@
"type": "PATH"
},
{
"name": "CMAKE_BUILD_TYPE",
"value": "Release",
"type": "STRING"
"name": "CUDA_LIB",
"value": "",
"type": "PATH"
},
{
"name": "WITH_STATIC_LIB",
......@@ -40,6 +40,16 @@
"name": "WITH_GPU",
"value": "False",
"type": "BOOL"
},
{
"name": "WITH_ENCRYPTION",
"value": "False",
"type": "BOOL"
},
{
"name": "ENCRYPTION_DIR",
"value": "",
"type": "PATH"
}
]
}
......
find_package(Git REQUIRED)
include(ExternalProject)
message("${CMAKE_BUILD_TYPE}")
......
......@@ -13,14 +13,19 @@
// limitations under the License.
#include <glog/logging.h>
#include <omp.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
......@@ -28,6 +33,10 @@ DEFINE_int32(gpu_id, 0, "GPU card id");
DEFINE_string(key, "", "key of encryption");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_int32(thread_num,
omp_get_num_procs(),
"Number of preprocessing threads");
int main(int argc, char** argv) {
// Parsing command-line
......@@ -44,32 +53,80 @@ int main(int argc, char** argv) {
// 加载模型
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_gpu_id,
FLAGS_key);
// 进行预测
double total_running_time_s = 0.0;
double total_imread_time_s = 0.0;
int imgs = 1;
if (FLAGS_image_list != "") {
std::ifstream inf(FLAGS_image_list);
if (!inf) {
std::cerr << "Fail to open file " << FLAGS_image_list << std::endl;
return -1;
}
// 多batch预测
std::string image_path;
std::vector<std::string> image_paths;
while (getline(inf, image_path)) {
PaddleX::ClsResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
std::cout << "Predict label: " << result.category
<< ", label_id:" << result.category_id
<< ", score: " << result.score << std::endl;
image_paths.push_back(image_path);
}
imgs = image_paths.size();
for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
auto start = system_clock::now();
// 读图像
int im_vec_size =
std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
std::vector<cv::Mat> im_vec(im_vec_size - i);
std::vector<PaddleX::ClsResult> results(im_vec_size - i,
PaddleX::ClsResult());
int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
#pragma omp parallel for num_threads(thread_num)
for (int j = i; j < im_vec_size; ++j) {
im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
}
auto imread_end = system_clock::now();
model.predict(im_vec, &results, thread_num);
auto imread_duration = duration_cast<microseconds>(imread_end - start);
total_imread_time_s += static_cast<double>(imread_duration.count()) *
microseconds::period::num /
microseconds::period::den;
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
total_running_time_s += static_cast<double>(duration.count()) *
microseconds::period::num /
microseconds::period::den;
for (int j = i; j < im_vec_size; ++j) {
std::cout << "Path:" << image_paths[j]
<< ", predict label: " << results[j - i].category
<< ", label_id:" << results[j - i].category_id
<< ", score: " << results[j - i].score << std::endl;
}
}
} else {
auto start = system_clock::now();
PaddleX::ClsResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
total_running_time_s += static_cast<double>(duration.count()) *
microseconds::period::num /
microseconds::period::den;
std::cout << "Predict label: " << result.category
<< ", label_id:" << result.category_id
<< ", score: " << result.score << std::endl;
}
std::cout << "Total running time: " << total_running_time_s
<< " s, average running time: " << total_running_time_s / imgs
<< " s/img, total read img time: " << total_imread_time_s
<< " s, average read time: " << total_imread_time_s / imgs
<< " s/img, batch_size = " << FLAGS_batch_size << std::endl;
return 0;
}
......@@ -13,15 +13,21 @@
// limitations under the License.
#include <glog/logging.h>
#include <omp.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
......@@ -30,6 +36,13 @@ DEFINE_string(key, "", "key of encryption");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_string(save_dir, "output", "Path to save visualized image");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_double(threshold,
0.5,
"The minimum scores of target boxes which are shown");
DEFINE_int32(thread_num,
omp_get_num_procs(),
"Number of preprocessing threads");
int main(int argc, char** argv) {
// 解析命令行参数
......@@ -43,11 +56,17 @@ int main(int argc, char** argv) {
std::cerr << "--image or --image_list need to be defined" << std::endl;
return -1;
}
// 加载模型
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_gpu_id,
FLAGS_key);
double total_running_time_s = 0.0;
double total_imread_time_s = 0.0;
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
std::string save_dir = "output";
// 进行预测
......@@ -58,47 +77,83 @@ int main(int argc, char** argv) {
return -1;
}
std::string image_path;
std::vector<std::string> image_paths;
while (getline(inf, image_path)) {
PaddleX::DetResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << "image file: " << image_path
<< ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
<< result.boxes[i].coordinate[3] << ")" << std::endl;
image_paths.push_back(image_path);
}
imgs = image_paths.size();
for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
auto start = system_clock::now();
int im_vec_size =
std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
std::vector<cv::Mat> im_vec(im_vec_size - i);
std::vector<PaddleX::DetResult> results(im_vec_size - i,
PaddleX::DetResult());
int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
#pragma omp parallel for num_threads(thread_num)
for (int j = i; j < im_vec_size; ++j) {
im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
}
auto imread_end = system_clock::now();
model.predict(im_vec, &results, thread_num);
auto imread_duration = duration_cast<microseconds>(imread_end - start);
total_imread_time_s += static_cast<double>(imread_duration.count()) *
microseconds::period::num /
microseconds::period::den;
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
total_running_time_s += static_cast<double>(duration.count()) *
microseconds::period::num /
microseconds::period::den;
// 输出结果目标框
for (int j = 0; j < im_vec_size - i; ++j) {
for (int k = 0; k < results[j].boxes.size(); ++k) {
std::cout << "image file: " << image_paths[i + j] << ", ";
std::cout << "predict label: " << results[j].boxes[k].category
<< ", label_id:" << results[j].boxes[k].category_id
<< ", score: " << results[j].boxes[k].score
<< ", box(xmin, ymin, w, h):("
<< results[j].boxes[k].coordinate[0] << ", "
<< results[j].boxes[k].coordinate[1] << ", "
<< results[j].boxes[k].coordinate[2] << ", "
<< results[j].boxes[k].coordinate[3] << ")" << std::endl;
}
}
// 可视化
cv::Mat vis_img =
PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
for (int j = 0; j < im_vec_size - i; ++j) {
cv::Mat vis_img = PaddleX::Visualize(
im_vec[j], results[j], model.labels, colormap, FLAGS_threshold);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, image_path);
PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
cv::imwrite(save_path, vis_img);
result.clear();
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
} else {
auto start = system_clock::now();
PaddleX::DetResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
total_running_time_s += static_cast<double>(duration.count()) *
microseconds::period::num /
microseconds::period::den;
// 输出结果目标框
for (int i = 0; i < result.boxes.size(); ++i) {
std::cout << "image file: " << FLAGS_image << std::endl;
std::cout << ", predict label: " << result.boxes[i].category
<< ", label_id:" << result.boxes[i].category_id
<< ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
<< result.boxes[i].coordinate[0] << ", "
<< result.boxes[i].coordinate[1] << ", "
<< ", score: " << result.boxes[i].score
<< ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0]
<< ", " << result.boxes[i].coordinate[1] << ", "
<< result.boxes[i].coordinate[2] << ", "
<< result.boxes[i].coordinate[3] << ")" << std::endl;
}
// 可视化
cv::Mat vis_img =
PaddleX::Visualize(im, result, model.labels, colormap, 0.5);
PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);
cv::imwrite(save_path, vis_img);
......@@ -106,5 +161,11 @@ int main(int argc, char** argv) {
std::cout << "Visualized output saved as " << save_path << std::endl;
}
std::cout << "Total running time: " << total_running_time_s
<< " s, average running time: " << total_running_time_s / imgs
<< " s/img, total read img time: " << total_imread_time_s
<< " s, average read img time: " << total_imread_time_s / imgs
<< " s, batch_size = " << FLAGS_batch_size << std::endl;
return 0;
}
......@@ -13,15 +13,20 @@
// limitations under the License.
#include <glog/logging.h>
#include <omp.h>
#include <algorithm>
#include <chrono> // NOLINT
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include "include/paddlex/paddlex.h"
#include "include/paddlex/visualize.h"
using namespace std::chrono; // NOLINT
DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
DEFINE_bool(use_trt, false, "Infering with TensorRT");
......@@ -30,6 +35,10 @@ DEFINE_string(key, "", "key of encryption");
DEFINE_string(image, "", "Path of test image file");
DEFINE_string(image_list, "", "Path of test image list file");
DEFINE_string(save_dir, "output", "Path to save visualized image");
DEFINE_int32(batch_size, 1, "Batch size of infering");
DEFINE_int32(thread_num,
omp_get_num_procs(),
"Number of preprocessing threads");
int main(int argc, char** argv) {
// 解析命令行参数
......@@ -46,8 +55,15 @@ int main(int argc, char** argv) {
// 加载模型
PaddleX::Model model;
model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key);
model.Init(FLAGS_model_dir,
FLAGS_use_gpu,
FLAGS_use_trt,
FLAGS_gpu_id,
FLAGS_key);
double total_running_time_s = 0.0;
double total_imread_time_s = 0.0;
int imgs = 1;
auto colormap = PaddleX::GenerateColorMap(model.labels.size());
// 进行预测
if (FLAGS_image_list != "") {
......@@ -57,23 +73,54 @@ int main(int argc, char** argv) {
return -1;
}
std::string image_path;
std::vector<std::string> image_paths;
while (getline(inf, image_path)) {
PaddleX::SegResult result;
cv::Mat im = cv::imread(image_path, 1);
model.predict(im, &result);
image_paths.push_back(image_path);
}
imgs = image_paths.size();
for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) {
auto start = system_clock::now();
int im_vec_size =
std::min(static_cast<int>(image_paths.size()), i + FLAGS_batch_size);
std::vector<cv::Mat> im_vec(im_vec_size - i);
std::vector<PaddleX::SegResult> results(im_vec_size - i,
PaddleX::SegResult());
int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);
#pragma omp parallel for num_threads(thread_num)
for (int j = i; j < im_vec_size; ++j) {
im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));
}
auto imread_end = system_clock::now();
model.predict(im_vec, &results, thread_num);
auto imread_duration = duration_cast<microseconds>(imread_end - start);
total_imread_time_s += static_cast<double>(imread_duration.count()) *
microseconds::period::num /
microseconds::period::den;
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
total_running_time_s += static_cast<double>(duration.count()) *
microseconds::period::num /
microseconds::period::den;
// 可视化
for (int j = 0; j < im_vec_size - i; ++j) {
cv::Mat vis_img =
PaddleX::Visualize(im, result, model.labels, colormap);
PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap);
std::string save_path =
PaddleX::generate_save_path(FLAGS_save_dir, image_path);
PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);
cv::imwrite(save_path, vis_img);
result.clear();
std::cout << "Visualized output saved as " << save_path << std::endl;
}
}
} else {
auto start = system_clock::now();
PaddleX::SegResult result;
cv::Mat im = cv::imread(FLAGS_image, 1);
model.predict(im, &result);
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
total_running_time_s += static_cast<double>(duration.count()) *
microseconds::period::num /
microseconds::period::den;
// 可视化
cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap);
std::string save_path =
......@@ -82,6 +129,11 @@ int main(int argc, char** argv) {
result.clear();
std::cout << "Visualized output saved as " << save_path << std::endl;
}
std::cout << "Total running time: " << total_running_time_s
<< " s, average running time: " << total_running_time_s / imgs
<< " s/img, total read img time: " << total_imread_time_s
<< " s, average read img time: " << total_imread_time_s / imgs
<< " s, batch_size = " << FLAGS_batch_size << std::endl;
return 0;
}
......@@ -54,4 +54,4 @@ class ConfigPaser {
YAML::Node Transforms_;
};
} // namespace PaddleDetection
} // namespace PaddleX
......@@ -16,8 +16,11 @@
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <numeric>
#include <string>
#include <vector>
#include "yaml-cpp/yaml.h"
#ifdef _WIN32
......@@ -28,19 +31,48 @@
#include "paddle_inference_api.h" // NOLINT
#include "config_parser.h"
#include "results.h"
#include "transforms.h"
#include "config_parser.h" // NOLINT
#include "results.h" // NOLINT
#include "transforms.h" // NOLINT
#ifdef WITH_ENCRYPTION
#include "paddle_model_decrypt.h"
#include "model_code.h"
#include "paddle_model_decrypt.h" // NOLINT
#include "model_code.h" // NOLINT
#endif
namespace PaddleX {
/*
* @brief
* This class encapsulates all necessary proccess steps of model infering, which
* include image matrix preprocessing, model predicting and results postprocessing.
* The entire process of model infering can be simplified as below:
* 1. preprocess image matrix (resize, padding, ......)
* 2. model infer
* 3. postprocess the results which generated from model infering
*
* @example
* PaddleX::Model cls_model;
* // initialize model configuration
* cls_model.Init(cls_model_dir, use_gpu, use_trt, gpu_id, encryption_key);
* // define a Classification result object
* PaddleX::ClsResult cls_result;
* // get image matrix from image file
* cv::Mat im = cv::imread(image_file_path, 1);
* cls_model.predict(im, &cls_result);
* */
class Model {
public:
/*
* @brief
* This method aims to initialize the model configuration
*
* @param model_dir: the directory which contains model.yml
* @param use_gpu: use gpu or not when infering
* @param use_trt: use Tensor RT or not when infering
* @param gpu_id: the id of gpu when infering with using gpu
* @param key: the key of encryption when using encrypted model
* */
void Init(const std::string& model_dir,
bool use_gpu = false,
bool use_trt = false,
......@@ -55,26 +87,134 @@ class Model {
int gpu_id = 0,
std::string key = "");
bool load_config(const std::string& model_dir);
/*
* @brief
* This method aims to load model configurations which include
* transform steps and label list
*
* @param yaml_input: model configuration string
* @return true if load configuration successfully
* */
bool load_config(const std::string& yaml_input);
/*
* @brief
* This method aims to transform single image matrix, the result will be
* returned at second parameter.
*
* @param input_im: single image matrix to be transformed
* @param blob: the raw data of single image matrix after transformed
* @return true if preprocess image matrix successfully
* */
bool preprocess(const cv::Mat& input_im, ImageBlob* blob);
/*
* @brief
* This method aims to transform mutiple image matrixs, the result will be
* returned at second parameter.
*
* @param input_im_batch: a batch of image matrixs to be transformed
* @param blob_blob: raw data of a batch of image matrixs after transformed
* @param thread_num: the number of preprocessing threads,
* each thread run preprocess on single image matrix
* @return true if preprocess a batch of image matrixs successfully
* */
bool preprocess(const std::vector<cv::Mat> &input_im_batch,
std::vector<ImageBlob> *blob_batch,
int thread_num = 1);
/*
* @brief
* This method aims to execute classification model prediction on single image matrix,
* the result will be returned at second parameter.
*
* @param im: single image matrix to be predicted
* @param result: classification prediction result data after postprocessed
* @return true if predict successfully
* */
bool predict(const cv::Mat& im, ClsResult* result);
/*
* @brief
* This method aims to execute classification model prediction on a batch of image matrixs,
* the result will be returned at second parameter.
*
* @param im: a batch of image matrixs to be predicted
* @param results: a batch of classification prediction result data after postprocessed
* @param thread_num: the number of predicting threads, each thread run prediction
* on single image matrix
* @return true if predict successfully
* */
bool predict(const std::vector<cv::Mat> &im_batch,
std::vector<ClsResult> *results,
int thread_num = 1);
/*
* @brief
* This method aims to execute detection or instance segmentation model prediction
* on single image matrix, the result will be returned at second parameter.
*
* @param im: single image matrix to be predicted
* @param result: detection or instance segmentation prediction result data after postprocessed
* @return true if predict successfully
* */
bool predict(const cv::Mat& im, DetResult* result);
/*
* @brief
* This method aims to execute detection or instance segmentation model prediction
* on a batch of image matrixs, the result will be returned at second parameter.
*
* @param im: a batch of image matrix to be predicted
* @param result: detection or instance segmentation prediction result data after postprocessed
* @param thread_num: the number of predicting threads, each thread run prediction
* on single image matrix
* @return true if predict successfully
* */
bool predict(const std::vector<cv::Mat> &im_batch,
std::vector<DetResult> *result,
int thread_num = 1);
/*
* @brief
* This method aims to execute segmentation model prediction on single image matrix,
* the result will be returned at second parameter.
*
* @param im: single image matrix to be predicted
* @param result: segmentation prediction result data after postprocessed
* @return true if predict successfully
* */
bool predict(const cv::Mat& im, SegResult* result);
bool postprocess(SegResult* result);
bool postprocess(DetResult* result);
/*
* @brief
* This method aims to execute segmentation model prediction on a batch of image matrix,
* the result will be returned at second parameter.
*
* @param im: a batch of image matrix to be predicted
* @param result: segmentation prediction result data after postprocessed
* @param thread_num: the number of predicting threads, each thread run prediction
* on single image matrix
* @return true if predict successfully
* */
bool predict(const std::vector<cv::Mat> &im_batch,
std::vector<SegResult> *result,
int thread_num = 1);
// model type, include 3 type: classifier, detector, segmenter
std::string type;
// model name, such as FasterRCNN, YOLOV3 and so on.
std::string name;
std::map<int, std::string> labels;
// transform(preprocessing) pipeline manager
Transforms transforms_;
// single input preprocessed data
ImageBlob inputs_;
// batch input preprocessed data
std::vector<ImageBlob> inputs_batch_;
// raw data of predicting results
std::vector<float> outputs_;
// a predictor which run the model predicting
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
} // namespce of PaddleX
} // namespace PaddleX
......@@ -20,9 +20,15 @@
namespace PaddleX {
/*
* @brief
* This class represents mask in instance segmentation tasks.
* */
template <class T>
struct Mask {
// raw data of mask
std::vector<T> data;
// the shape of mask
std::vector<int> shape;
void clear() {
data.clear();
......@@ -30,19 +36,34 @@ struct Mask {
}
};
/*
* @brief
* This class represents target box in detection or instance segmentation tasks.
* */
struct Box {
int category_id;
// category label this box belongs to
std::string category;
// confidence score
float score;
std::vector<float> coordinate;
Mask<float> mask;
};
/*
* @brief
* This class is prediction result based class.
* */
class BaseResult {
public:
// model type
std::string type = "base";
};
/*
* @brief
* This class represent classification result.
* */
class ClsResult : public BaseResult {
public:
int category_id;
......@@ -51,17 +72,28 @@ class ClsResult : public BaseResult {
std::string type = "cls";
};
/*
* @brief
* This class represent detection or instance segmentation result.
* */
class DetResult : public BaseResult {
public:
// target boxes
std::vector<Box> boxes;
int mask_resolution;
std::string type = "det";
void clear() { boxes.clear(); }
};
/*
* @brief
* This class represent segmentation result.
* */
class SegResult : public BaseResult {
public:
// represent label of each pixel on image matrix
Mask<int64_t> label_map;
// represent score of each pixel on image matrix
Mask<float> score_map;
std::string type = "seg";
void clear() {
......
......@@ -28,7 +28,10 @@
namespace PaddleX {
// Object for storing all preprocessed data
/*
* @brief
* This class represents object for storing all preprocessed data
* */
class ImageBlob {
public:
// Original image height and width
......@@ -45,21 +48,34 @@ class ImageBlob {
std::vector<float> im_data_;
void clear() {
ori_im_size_.clear();
new_im_size_.clear();
im_size_before_resize_.clear();
reshape_order_.clear();
im_data_.clear();
}
};
// Abstraction of preprocessing opration class
/*
* @brief
* Abstraction of preprocessing operation class
* */
class Transform {
public:
virtual void Init(const YAML::Node& item) = 0;
/*
* @brief
* This method executes preprocessing operation on image matrix,
* result will be returned at second parameter.
* @param im: single image matrix to be preprocessed
* @param data: the raw data of single image matrix after preprocessed
* @return true if transform successfully
* */
virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
};
/*
* @brief
* This class execute normalization operation on image matrix
* */
class Normalize : public Transform {
public:
virtual void Init(const YAML::Node& item) {
......@@ -74,6 +90,14 @@ class Normalize : public Transform {
std::vector<float> std_;
};
/*
* @brief
* This class execute resize by short operation on image matrix. At first, it resizes
* the short side of image matrix to specified length. Accordingly, the long side
* will be resized in the same proportion. If new length of long side exceeds max
* size, the long size will be resized to max size, and the short size will be
* resized in the same proportion
* */
class ResizeByShort : public Transform {
public:
virtual void Init(const YAML::Node& item) {
......@@ -92,6 +116,12 @@ class ResizeByShort : public Transform {
int max_size_;
};
/*
* @brief
* This class execute resize by long operation on image matrix. At first, it resizes
* the long side of image matrix to specified length. Accordingly, the short side
* will be resized in the same proportion.
* */
class ResizeByLong : public Transform {
public:
virtual void Init(const YAML::Node& item) {
......@@ -103,13 +133,20 @@ class ResizeByLong : public Transform {
int long_size_;
};
/*
* @brief
* This class execute resize operation on image matrix. It resizes width and height
* to specified length.
* */
class Resize : public Transform {
public:
virtual void Init(const YAML::Node& item) {
if (item["interp"].IsDefined()) {
interp_ = item["interp"].as<std::string>();
}
if (item["target_size"].IsScalar()) {
height_ = item["target_size"].as<int>();
width_ = item["target_size"].as<int>();
interp_ = item["interp"].as<std::string>();
} else if (item["target_size"].IsSequence()) {
std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
width_ = target_size[0];
......@@ -128,6 +165,11 @@ class Resize : public Transform {
std::string interp_;
};
/*
* @brief
* This class execute center crop operation on image matrix. It crops the center
* of image matrix accroding to specified size.
* */
class CenterCrop : public Transform {
public:
virtual void Init(const YAML::Node& item) {
......@@ -147,6 +189,11 @@ class CenterCrop : public Transform {
int width_;
};
/*
* @brief
* This class execute padding operation on image matrix. It makes border on edge
* of image matrix.
* */
class Padding : public Transform {
public:
virtual void Init(const YAML::Node& item) {
......@@ -175,7 +222,11 @@ class Padding : public Transform {
int width_ = 0;
int height_ = 0;
};
/*
* @brief
* This class is transform operations manager. It stores all neccessary
* transform operations and run them in correct order.
* */
class Transforms {
public:
void Init(const YAML::Node& node, bool to_rgb = true);
......
......@@ -43,20 +43,55 @@
namespace PaddleX {
// Generate visualization colormap for each class
/*
* @brief
* Generate visualization colormap for each class
*
* @param number of class
* @return color map, the size of vector is 3 * num_class
* */
std::vector<int> GenerateColorMap(int num_class);
/*
* @brief
* Visualize the detection result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const DetResult& results,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap,
float threshold = 0.5);
/*
* @brief
* Visualize the segmentation result
*
* @param img: initial image matrix
* @param results: the detection result
* @param labels: label map
* @param colormap: visualization color map
* @return visualized image matrix
* */
cv::Mat Visualize(const cv::Mat& img,
const SegResult& result,
const std::map<int, std::string>& labels,
const std::vector<int>& colormap);
/*
* @brief
* generate save path for visualized image matrix
*
* @param save_dir: directory for saving visualized image matrix
* @param file_path: sourcen image file path
* @return path of saving visualized result
* */
std::string generate_save_path(const std::string& save_dir,
const std::string& file_path);
} // namespce of PaddleX
} // namespace PaddleX
......@@ -4,10 +4,10 @@ WITH_GPU=OFF
WITH_MKL=ON
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=OFF
# TensorRT 的路径
TENSORRT_DIR=/path/to/TensorRT/
# Paddle 预测库路径
PADDLE_DIR=/docker/jiangjiajun/PaddleDetection/deploy/cpp/fluid_inference
# TensorRT 的路径,如果需要集成TensorRT,需修改为您实际安装的TensorRT路径
TENSORRT_DIR=/root/projects/TensorRT/
# Paddle 预测库路径, 请修改为您实际安装的预测库路径
PADDLE_DIR=/root/projects/fluid_inference
# Paddle 的预测库是否使用静态库来编译
# 使用TensorRT时,Paddle的预测库通常为动态库
WITH_STATIC_LIB=OFF
......
......@@ -11,9 +11,11 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <omp.h>
#include <algorithm>
#include <fstream>
#include <cstring>
#include "include/paddlex/paddlex.h"
namespace PaddleX {
void Model::create_predictor(const std::string& model_dir,
......@@ -21,22 +23,37 @@ void Model::create_predictor(const std::string& model_dir,
bool use_trt,
int gpu_id,
std::string key) {
// 读取配置文件
if (!load_config(model_dir)) {
std::cerr << "Parse file 'model.yml' failed!" << std::endl;
exit(-1);
}
paddle::AnalysisConfig config;
std::string model_file = model_dir + OS_PATH_SEP + "__model__";
std::string params_file = model_dir + OS_PATH_SEP + "__params__";
std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
std::string yaml_input = "";
#ifdef WITH_ENCRYPTION
if (key != ""){
if (key != "") {
model_file = model_dir + OS_PATH_SEP + "__model__.encrypted";
params_file = model_dir + OS_PATH_SEP + "__params__.encrypted";
paddle_security_load_model(&config, key.c_str(), model_file.c_str(), params_file.c_str());
yaml_file = model_dir + OS_PATH_SEP + "model.yml.encrypted";
paddle_security_load_model(
&config, key.c_str(), model_file.c_str(), params_file.c_str());
yaml_input = decrypt_file(yaml_file.c_str(), key.c_str());
}
#endif
if (key == ""){
if (yaml_input == "") {
// 读取配置文件
std::ifstream yaml_fin(yaml_file);
yaml_fin.seekg(0, std::ios::end);
size_t yaml_file_size = yaml_fin.tellg();
yaml_input.assign(yaml_file_size, ' ');
yaml_fin.seekg(0);
yaml_fin.read(&yaml_input[0], yaml_file_size);
}
// 读取配置文件内容
if (!load_config(yaml_input)) {
std::cerr << "Parse file 'model.yml' failed!" << std::endl;
exit(-1);
}
if (key == "") {
config.SetModel(model_file, params_file);
}
if (use_gpu) {
......@@ -60,18 +77,17 @@ void Model::create_predictor(const std::string& model_dir,
predictor_ = std::move(CreatePaddlePredictor(config));
}
bool Model::load_config(const std::string& model_dir) {
std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml";
YAML::Node config = YAML::LoadFile(yaml_file);
bool Model::load_config(const std::string& yaml_input) {
YAML::Node config = YAML::Load(yaml_input);
type = config["_Attributes"]["model_type"].as<std::string>();
name = config["Model"].as<std::string>();
std::string version = config["version"].as<std::string>();
if (version[0] == '0') {
std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment "
<< "cannot be done, please refer to "
<< "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md "
<< "to transfer version."
<< std::endl;
std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, "
<< "deployment cannot be done, please refer to "
<< "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs"
<< "/tutorials/deploy/upgrade_version.md "
<< "to transfer version." << std::endl;
return false;
}
bool to_rgb = true;
......@@ -104,17 +120,29 @@ bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
return true;
}
// use openmp
bool Model::preprocess(const std::vector<cv::Mat>& input_im_batch,
std::vector<ImageBlob>* blob_batch,
int thread_num) {
int batch_size = input_im_batch.size();
bool success = true;
thread_num = std::min(thread_num, batch_size);
#pragma omp parallel for num_threads(thread_num)
for (int i = 0; i < input_im_batch.size(); ++i) {
cv::Mat im = input_im_batch[i].clone();
if (!transforms_.Run(&im, &(*blob_batch)[i])) {
success = false;
}
}
return success;
}
bool Model::predict(const cv::Mat& im, ClsResult* result) {
inputs_.clear();
if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!"
<< std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!"
<< std::endl;
"to function predict()!" << std::endl;
return false;
}
// 处理输入图像
......@@ -144,20 +172,80 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
result->category_id = std::distance(std::begin(outputs_), ptr);
result->score = *ptr;
result->category = labels[result->category_id];
return true;
}
bool Model::predict(const std::vector<cv::Mat>& im_batch,
std::vector<ClsResult>* results,
int thread_num) {
for (auto& inputs : inputs_batch_) {
inputs.clear();
}
if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!" << std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!" << std::endl;
return false;
}
inputs_batch_.assign(im_batch.size(), ImageBlob());
// 处理输入图像
if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// 使用加载的模型进行预测
int batch_size = im_batch.size();
auto in_tensor = predictor_->GetInputTensor("image");
int h = inputs_batch_[0].new_im_size_[0];
int w = inputs_batch_[0].new_im_size_[1];
in_tensor->Reshape({batch_size, 3, h, w});
std::vector<float> inputs_data(batch_size * 3 * h * w);
for (int i = 0; i < batch_size; ++i) {
std::copy(inputs_batch_[i].im_data_.begin(),
inputs_batch_[i].im_data_.end(),
inputs_data.begin() + i * 3 * h * w);
}
in_tensor->copy_from_cpu(inputs_data.data());
// in_tensor->copy_from_cpu(inputs_.im_data_.data());
predictor_->ZeroCopyRun();
// 取出模型的输出结果
auto output_names = predictor_->GetOutputNames();
auto output_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_shape = output_tensor->shape();
int size = 1;
for (const auto& i : output_shape) {
size *= i;
}
outputs_.resize(size);
output_tensor->copy_to_cpu(outputs_.data());
// 对模型输出结果进行后处理
int single_batch_size = size / batch_size;
for (int i = 0; i < batch_size; ++i) {
auto start_ptr = std::begin(outputs_);
auto end_ptr = std::begin(outputs_);
std::advance(start_ptr, i * single_batch_size);
std::advance(end_ptr, (i + 1) * single_batch_size);
auto ptr = std::max_element(start_ptr, end_ptr);
(*results)[i].category_id = std::distance(start_ptr, ptr);
(*results)[i].score = *ptr;
(*results)[i].category = labels[(*results)[i].category_id];
}
return true;
}
bool Model::predict(const cv::Mat& im, DetResult* result) {
result->clear();
inputs_.clear();
result->clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!"
<< std::endl;
"to function predict()!" << std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!"
<< std::endl;
"to function predict()!" << std::endl;
return false;
}
......@@ -172,6 +260,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
auto im_tensor = predictor_->GetInputTensor("image");
im_tensor->Reshape({1, 3, h, w});
im_tensor->copy_from_cpu(inputs_.im_data_.data());
if (name == "YOLOv3") {
auto im_size_tensor = predictor_->GetInputTensor("im_size");
im_size_tensor->Reshape({1, 2});
......@@ -247,6 +336,181 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
static_cast<int>(box->coordinate[3])};
}
}
return true;
}
bool Model::predict(const std::vector<cv::Mat>& im_batch,
std::vector<DetResult>* result,
int thread_num) {
for (auto& inputs : inputs_batch_) {
inputs.clear();
}
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "segmenter") {
std::cerr << "Loading model is a 'segmenter', SegResult should be passed "
"to function predict()!" << std::endl;
return false;
}
inputs_batch_.assign(im_batch.size(), ImageBlob());
int batch_size = im_batch.size();
// 处理输入图像
if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
// 对RCNN类模型做批量padding
if (batch_size > 1) {
if (name == "FasterRCNN" || name == "MaskRCNN") {
int max_h = -1;
int max_w = -1;
for (int i = 0; i < batch_size; ++i) {
max_h = std::max(max_h, inputs_batch_[i].new_im_size_[0]);
max_w = std::max(max_w, inputs_batch_[i].new_im_size_[1]);
// std::cout << "(" << inputs_batch_[i].new_im_size_[0]
// << ", " << inputs_batch_[i].new_im_size_[1]
// << ")" << std::endl;
}
thread_num = std::min(thread_num, batch_size);
#pragma omp parallel for num_threads(thread_num)
for (int i = 0; i < batch_size; ++i) {
int h = inputs_batch_[i].new_im_size_[0];
int w = inputs_batch_[i].new_im_size_[1];
int c = im_batch[i].channels();
if (max_h != h || max_w != w) {
std::vector<float> temp_buffer(c * max_h * max_w);
float* temp_ptr = temp_buffer.data();
float* ptr = inputs_batch_[i].im_data_.data();
for (int cur_channel = c - 1; cur_channel >= 0; --cur_channel) {
int ori_pos = cur_channel * h * w + (h - 1) * w;
int des_pos = cur_channel * max_h * max_w + (h - 1) * max_w;
int last_pos = cur_channel * h * w;
for (; ori_pos >= last_pos; ori_pos -= w, des_pos -= max_w) {
memcpy(temp_ptr + des_pos, ptr + ori_pos, w * sizeof(float));
}
}
inputs_batch_[i].im_data_.swap(temp_buffer);
inputs_batch_[i].new_im_size_[0] = max_h;
inputs_batch_[i].new_im_size_[1] = max_w;
}
}
}
}
int h = inputs_batch_[0].new_im_size_[0];
int w = inputs_batch_[0].new_im_size_[1];
auto im_tensor = predictor_->GetInputTensor("image");
im_tensor->Reshape({batch_size, 3, h, w});
std::vector<float> inputs_data(batch_size * 3 * h * w);
for (int i = 0; i < batch_size; ++i) {
std::copy(inputs_batch_[i].im_data_.begin(),
inputs_batch_[i].im_data_.end(),
inputs_data.begin() + i * 3 * h * w);
}
im_tensor->copy_from_cpu(inputs_data.data());
if (name == "YOLOv3") {
auto im_size_tensor = predictor_->GetInputTensor("im_size");
im_size_tensor->Reshape({batch_size, 2});
std::vector<int> inputs_data_size(batch_size * 2);
for (int i = 0; i < batch_size; ++i) {
std::copy(inputs_batch_[i].ori_im_size_.begin(),
inputs_batch_[i].ori_im_size_.end(),
inputs_data_size.begin() + 2 * i);
}
im_size_tensor->copy_from_cpu(inputs_data_size.data());
} else if (name == "FasterRCNN" || name == "MaskRCNN") {
auto im_info_tensor = predictor_->GetInputTensor("im_info");
auto im_shape_tensor = predictor_->GetInputTensor("im_shape");
im_info_tensor->Reshape({batch_size, 3});
im_shape_tensor->Reshape({batch_size, 3});
std::vector<float> im_info(3 * batch_size);
std::vector<float> im_shape(3 * batch_size);
for (int i = 0; i < batch_size; ++i) {
float ori_h = static_cast<float>(inputs_batch_[i].ori_im_size_[0]);
float ori_w = static_cast<float>(inputs_batch_[i].ori_im_size_[1]);
float new_h = static_cast<float>(inputs_batch_[i].new_im_size_[0]);
float new_w = static_cast<float>(inputs_batch_[i].new_im_size_[1]);
im_info[i * 3] = new_h;
im_info[i * 3 + 1] = new_w;
im_info[i * 3 + 2] = inputs_batch_[i].scale;
im_shape[i * 3] = ori_h;
im_shape[i * 3 + 1] = ori_w;
im_shape[i * 3 + 2] = 1.0;
}
im_info_tensor->copy_from_cpu(im_info.data());
im_shape_tensor->copy_from_cpu(im_shape.data());
}
// 使用加载的模型进行预测
predictor_->ZeroCopyRun();
// 读取所有box
std::vector<float> output_box;
auto output_names = predictor_->GetOutputNames();
auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_box_shape = output_box_tensor->shape();
int size = 1;
for (const auto& i : output_box_shape) {
size *= i;
}
output_box.resize(size);
output_box_tensor->copy_to_cpu(output_box.data());
if (size < 6) {
std::cerr << "[WARNING] There's no object detected." << std::endl;
return true;
}
auto lod_vector = output_box_tensor->lod();
int num_boxes = size / 6;
// 解析预测框box
for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
for (int j = lod_vector[0][i]; j < lod_vector[0][i + 1]; ++j) {
Box box;
box.category_id = static_cast<int>(round(output_box[j * 6]));
box.category = labels[box.category_id];
box.score = output_box[j * 6 + 1];
float xmin = output_box[j * 6 + 2];
float ymin = output_box[j * 6 + 3];
float xmax = output_box[j * 6 + 4];
float ymax = output_box[j * 6 + 5];
float w = xmax - xmin + 1;
float h = ymax - ymin + 1;
box.coordinate = {xmin, ymin, w, h};
(*result)[i].boxes.push_back(std::move(box));
}
}
// 实例分割需解析mask
if (name == "MaskRCNN") {
std::vector<float> output_mask;
auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]);
std::vector<int> output_mask_shape = output_mask_tensor->shape();
int masks_size = 1;
for (const auto& i : output_mask_shape) {
masks_size *= i;
}
int mask_pixels = output_mask_shape[2] * output_mask_shape[3];
int classes = output_mask_shape[1];
output_mask.resize(masks_size);
output_mask_tensor->copy_to_cpu(output_mask.data());
int mask_idx = 0;
for (int i = 0; i < lod_vector[0].size() - 1; ++i) {
(*result)[i].mask_resolution = output_mask_shape[2];
for (int j = 0; j < (*result)[i].boxes.size(); ++j) {
Box* box = &(*result)[i].boxes[j];
int category_id = box->category_id;
auto begin_mask = output_mask.begin() +
(mask_idx * classes + category_id) * mask_pixels;
auto end_mask = begin_mask + mask_pixels;
box->mask.data.assign(begin_mask, end_mask);
box->mask.shape = {static_cast<int>(box->coordinate[2]),
static_cast<int>(box->coordinate[3])};
mask_idx++;
}
}
}
return true;
}
bool Model::predict(const cv::Mat& im, SegResult* result) {
......@@ -254,13 +518,11 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
inputs_.clear();
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!"
<< std::endl;
"to function predict()!" << std::endl;
return false;
} else if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!"
<< std::endl;
"function predict()!" << std::endl;
return false;
}
......@@ -288,6 +550,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
size *= i;
result->label_map.shape.push_back(i);
}
result->label_map.data.resize(size);
output_label_tensor->copy_to_cpu(result->label_map.data.data());
......@@ -299,6 +562,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
size *= i;
result->score_map.shape.push_back(i);
}
result->score_map.data.resize(size);
output_score_tensor->copy_to_cpu(result->score_map.data.data());
......@@ -325,8 +589,8 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
inputs_.im_size_before_resize_.pop_back();
auto padding_w = before_shape[0];
auto padding_h = before_shape[1];
mask_label = mask_label(cv::Rect(0, 0, padding_w, padding_h));
mask_score = mask_score(cv::Rect(0, 0, padding_w, padding_h));
mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
} else if (*iter == "resize") {
auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx];
inputs_.im_size_before_resize_.pop_back();
......@@ -343,7 +607,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_NEAREST);
cv::INTER_LINEAR);
}
++idx;
}
......@@ -353,6 +617,156 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
result->score_map.data.assign(mask_score.begin<float>(),
mask_score.end<float>());
result->score_map.shape = {mask_score.rows, mask_score.cols};
return true;
}
bool Model::predict(const std::vector<cv::Mat>& im_batch,
std::vector<SegResult>* result,
int thread_num) {
for (auto& inputs : inputs_batch_) {
inputs.clear();
}
if (type == "classifier") {
std::cerr << "Loading model is a 'classifier', ClsResult should be passed "
"to function predict()!" << std::endl;
return false;
} else if (type == "detector") {
std::cerr << "Loading model is a 'detector', DetResult should be passed to "
"function predict()!" << std::endl;
return false;
}
// 处理输入图像
inputs_batch_.assign(im_batch.size(), ImageBlob());
if (!preprocess(im_batch, &inputs_batch_, thread_num)) {
std::cerr << "Preprocess failed!" << std::endl;
return false;
}
int batch_size = im_batch.size();
(*result).clear();
(*result).resize(batch_size);
int h = inputs_batch_[0].new_im_size_[0];
int w = inputs_batch_[0].new_im_size_[1];
auto im_tensor = predictor_->GetInputTensor("image");
im_tensor->Reshape({batch_size, 3, h, w});
std::vector<float> inputs_data(batch_size * 3 * h * w);
for (int i = 0; i < batch_size; ++i) {
std::copy(inputs_batch_[i].im_data_.begin(),
inputs_batch_[i].im_data_.end(),
inputs_data.begin() + i * 3 * h * w);
}
im_tensor->copy_from_cpu(inputs_data.data());
// im_tensor->copy_from_cpu(inputs_.im_data_.data());
// 使用加载的模型进行预测
predictor_->ZeroCopyRun();
// 获取预测置信度,经过argmax后的labelmap
auto output_names = predictor_->GetOutputNames();
auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]);
std::vector<int> output_label_shape = output_label_tensor->shape();
int size = 1;
for (const auto& i : output_label_shape) {
size *= i;
}
std::vector<int64_t> output_labels(size, 0);
output_label_tensor->copy_to_cpu(output_labels.data());
auto output_labels_iter = output_labels.begin();
int single_batch_size = size / batch_size;
for (int i = 0; i < batch_size; ++i) {
(*result)[i].label_map.data.resize(single_batch_size);
(*result)[i].label_map.shape.push_back(1);
for (int j = 1; j < output_label_shape.size(); ++j) {
(*result)[i].label_map.shape.push_back(output_label_shape[j]);
}
std::copy(output_labels_iter + i * single_batch_size,
output_labels_iter + (i + 1) * single_batch_size,
(*result)[i].label_map.data.data());
}
// 获取预测置信度scoremap
auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]);
std::vector<int> output_score_shape = output_score_tensor->shape();
size = 1;
for (const auto& i : output_score_shape) {
size *= i;
}
std::vector<float> output_scores(size, 0);
output_score_tensor->copy_to_cpu(output_scores.data());
auto output_scores_iter = output_scores.begin();
int single_batch_score_size = size / batch_size;
for (int i = 0; i < batch_size; ++i) {
(*result)[i].score_map.data.resize(single_batch_score_size);
(*result)[i].score_map.shape.push_back(1);
for (int j = 1; j < output_score_shape.size(); ++j) {
(*result)[i].score_map.shape.push_back(output_score_shape[j]);
}
std::copy(output_scores_iter + i * single_batch_score_size,
output_scores_iter + (i + 1) * single_batch_score_size,
(*result)[i].score_map.data.data());
}
// 解析输出结果到原图大小
for (int i = 0; i < batch_size; ++i) {
std::vector<uint8_t> label_map((*result)[i].label_map.data.begin(),
(*result)[i].label_map.data.end());
cv::Mat mask_label((*result)[i].label_map.shape[1],
(*result)[i].label_map.shape[2],
CV_8UC1,
label_map.data());
cv::Mat mask_score((*result)[i].score_map.shape[2],
(*result)[i].score_map.shape[3],
CV_32FC1,
(*result)[i].score_map.data.data());
int idx = 1;
int len_postprocess = inputs_batch_[i].im_size_before_resize_.size();
for (std::vector<std::string>::reverse_iterator iter =
inputs_batch_[i].reshape_order_.rbegin();
iter != inputs_batch_[i].reshape_order_.rend();
++iter) {
if (*iter == "padding") {
auto before_shape =
inputs_batch_[i].im_size_before_resize_[len_postprocess - idx];
inputs_batch_[i].im_size_before_resize_.pop_back();
auto padding_w = before_shape[0];
auto padding_h = before_shape[1];
mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w));
mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w));
} else if (*iter == "resize") {
auto before_shape =
inputs_batch_[i].im_size_before_resize_[len_postprocess - idx];
inputs_batch_[i].im_size_before_resize_.pop_back();
auto resize_w = before_shape[0];
auto resize_h = before_shape[1];
cv::resize(mask_label,
mask_label,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_NEAREST);
cv::resize(mask_score,
mask_score,
cv::Size(resize_h, resize_w),
0,
0,
cv::INTER_LINEAR);
}
++idx;
}
(*result)[i].label_map.data.assign(mask_label.begin<uint8_t>(),
mask_label.end<uint8_t>());
(*result)[i].label_map.shape = {mask_label.rows, mask_label.cols};
(*result)[i].score_map.data.assign(mask_score.begin<float>(),
mask_score.end<float>());
(*result)[i].score_map.shape = {mask_score.rows, mask_score.cols};
}
return true;
}
} // namespce of PaddleX
} // namespace PaddleX
......@@ -95,11 +95,13 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) {
if (width_ > 1 & height_ > 1) {
padding_w = width_ - im->cols;
padding_h = height_ - im->rows;
} else if (coarsest_stride_ > 1) {
} else if (coarsest_stride_ >= 1) {
int h = im->rows;
int w = im->cols;
padding_h =
ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows;
padding_w =
ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols;
}
if (padding_h < 0 || padding_w < 0) {
......@@ -219,4 +221,5 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) {
}
return true;
}
} // namespace PaddleX
......@@ -145,4 +145,4 @@ std::string generate_save_path(const std::string& save_dir,
std::string image_name(file_path.substr(pos + 1));
return save_dir + OS_PATH_SEP + image_name;
}
} // namespace of PaddleX
} // namespace PaddleX
......@@ -13,7 +13,7 @@
> 可以使用模型裁剪,参考文档[模型裁剪使用教程](slim/prune.md),通过调整裁剪参数,可以控制模型裁剪后的大小,在实际实验中,如VOC检测数据,使用yolov3-mobilenet,原模型大小为XXM,裁剪后为XX M,精度基本保持不变
## 4. 如何配置训练时GPU的卡数
> 通过在终端export环境变量,或在Python代码中设置,可参考文档[CPU/多卡GPU训练](gpu_configure.md)
> 通过在终端export环境变量,或在Python代码中设置,可参考文档[CPU/多卡GPU训练](appendix/gpu_configure.md)
## 5. 想将之前训练的模型参数上继续训练
> 在训练调用`train`接口时,将`pretrain_weights`设为之前的模型保存路径即可
......@@ -52,7 +52,7 @@
> 1. 用户自行训练时,如不确定迭代的轮数,可以将轮数设高一些,同时注意设置`save_interval_epochs`,这样模型迭代每间隔相应轮数就会在验证集上进行评估和保存,可以根据不同轮数模型在验证集上的评估指标,判断模型是否已经收敛,若模型已收敛,可以自行结束训练进程
>
## 9. 只有CPU,没有GPU,如何提升训练速度
> 当没有GPU时,可以根据自己的CPU配置,选择是否使用多CPU进行训练,具体配置方式可以参考文档[多卡CPU/GPU训练](gpu_configure.md)
> 当没有GPU时,可以根据自己的CPU配置,选择是否使用多CPU进行训练,具体配置方式可以参考文档[多卡CPU/GPU训练](appendix/gpu_configure.md)
>
## 10. 电脑不能联网,训练时因为下载预训练模型失败,如何解决
> 可以预先通过其它方式准备好预训练模型,然后训练时自定义`pretrain_weights`即可,可参考文档[无联网模型训练](how_to_offline_run.md)
......@@ -61,7 +61,7 @@
> 1.可以按照9的方式来解决这个问题
> 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载
## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决?
## 12. PaddleX GUI启动时提示"Failed to execute script PaddleX",如何解决?
> 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。
> 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。
> 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决
......
......@@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
> 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md)
> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29)
> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29)
> **参数**
......@@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None,
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
## CocoDetection类
```
......@@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers=
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
## EasyDataDet类
```
......@@ -61,3 +81,13 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n
> > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。
> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。
> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可:
> ```
> add_negative_samples(image_dir)
> ```
> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4)
> > **参数**
> > > * **image_dir** (str): 背景图片所在的目录路径。
......@@ -80,7 +80,7 @@ predict(self, img_file, transforms=None, topk=5)
## 其它分类器类
PaddleX提供了共计22种分类器,所有分类器均提供同`ResNet50`相同的训练`train`,评估`evaluate`和预测`predict`接口,各模型效果可参考[模型库](../appendix/model_zoo.md)
PaddleX提供了共计22种分类器,所有分类器均提供同`ResNet50`相同的训练`train`,评估`evaluate`和预测`predict`接口,各模型效果可参考[模型库](https://paddlex.readthedocs.io/zh_CN/latest/appendix/model_zoo.html)
### ResNet18
```python
......
......@@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为20。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为1000。
......@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18的COCO预训练模型);为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。
......
......@@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认值为'output'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18和HRNet_W18的COCO预训练模型);若为None,则不使用预训练模型。默认为None。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。
> > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。
......
......@@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride
> **参数**
> > - **num_classes** (int): 类别数。
> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],'MobileNetV2_x1.0'。
> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],默认值为'MobileNetV2_x1.0'。
> > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。默认16。
> > - **aspp_with_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。
> > - **decoder_use_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。
......@@ -40,12 +40,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的COCO预训练模型);若为字符串'CITYSCAPES',则自动下载在CITYSCAPES数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
......@@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
......@@ -186,10 +186,10 @@ paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=Fal
> **参数**
> > - **num_classes** (int): 类别数。
> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]
> > - **width** (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。'18_small_v1'是18的轻量级版本
> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **class_weight** (list|str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
### train 训练接口
......@@ -209,12 +209,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重(注意:目前仅提供`width`取值为18的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
......@@ -258,3 +258,88 @@ predict(self, im_file, transforms=None):
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
## FastSCNN类
```python
paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0])
```
> 构建FastSCNN分割器。
> **参数**
> > - **num_classes** (int): 类别数。
> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。
> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。
> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
### train 训练接口
```python
train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None):
```
> FastSCNN模型训练接口。
> **参数**
> >
> > - **num_epochs** (int): 训练迭代轮数。
> > - **train_dataset** (paddlex.datasets): 训练数据读取器。
> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。
> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
> > - **save_dir** (str): 模型保存路径。默认'output'
> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'CITYSCAPES'。
> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。
> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。
> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。
> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。
> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。
> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。
> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
#### evaluate 评估接口
```
evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False):
```
> FastSCNN模型评估接口。
> **参数**
> >
> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。
> > - **batch_size** (int): 评估时的batch大小。默认1。
> > - **epoch_id** (int): 当前评估模型所在的训练轮数。
> > - **return_details** (bool): 是否返回详细信息。默认False。
> **返回值**
> >
> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、
> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。
> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details),
> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。
#### predict 预测接口
```
predict(self, im_file, transforms=None):
```
> FastSCNN模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。
> **参数**
> >
> > - **img_file** (str): 预测图像路径。
> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。
> **返回值**
> >
> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。
......@@ -200,7 +200,7 @@ ComposedSegTransforms.add_augmenters(augmenters)
import paddlex as pdx
from paddlex.seg import transforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512])
eval_transforms = transforms.ComposedYOLOTransforms(mode='eval')
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 添加数据增强
import imgaug.augmenters as iaa
......
......@@ -146,10 +146,11 @@ paddlex.interpret.normlime(img_file,
dataset=None,
num_samples=3000,
batch_size=50,
save_dir='./')
save_dir='./',
normlime_weights_file=None)
```
使用NormLIME算法将模型预测结果的可解释性可视化。
NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系
NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式:使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间;然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定
**注意:** 可解释性结果可视化目前只支持分类模型。
......@@ -160,8 +161,26 @@ NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会
>* **num_samples** (int): LIME用于学习线性模型的采样数,默认为3000。
>* **batch_size** (int): 预测数据batch大小,默认为50。
>* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。
>* **normlime_weights_file** (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。
**注意:** dataset`读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。
**注意:** dataset`读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。NormLIME可解释性结果可视化目前只支持分类模型。
### 使用示例
> 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)。
## 数据预处理/增强过程可视化
```
paddlex.transforms.visualize(dataset,
img_count=3,
save_dir='vdl_output')
```
对数据预处理/增强中间结果进行可视化。
可使用VisualDL查看中间结果:
1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001
2. 浏览器打开 https://0.0.0.0:8001即可,
其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
### 参数
>* **dataset** (paddlex.datasets): 数据集读取器。
>* **img_count** (int): 需要进行数据预处理/增强的图像数目。默认为3。
>* **save_dir** (str): 日志保存的路径。默认为'vdl_output'。
\ No newline at end of file
......@@ -7,6 +7,7 @@
:caption: 目录:
model_zoo.md
slim_model_zoo.md
metrics.md
interpret.md
parameters.md
......
......@@ -20,9 +20,20 @@ LIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/Paddl
## NormLIME
NormLIME是在LIME上的改进,LIME的解释是局部性的,是针对当前样本给的特定解释,而NormLIME是利用一定数量的样本对当前样本的一个全局性的解释,有一定的降噪效果。其实现步骤如下所示:
1. 下载Kmeans模型参数和ResNet50_vc网络前三层参数。(ResNet50_vc的参数是在ImageNet上训练所得网络的参数;使用ImageNet图像作为数据集,每张图像从ResNet50_vc的第三层输出提取对应超象素位置上的平均特征和质心上的特征,训练将得到此处的Kmeans模型)
2. 计算测试集中每张图像的LIME结果。(如无测试集,可用验证集代替)
3. 使用Kmeans模型对所有图像中的所有像素进行聚类。
4. 对在同一个簇的超像素(相同的特征)进行权重的归一化,得到每个超像素的权重,以此来解释模型。
2. 使用测试集中的数据计算normlime的权重信息(如无测试集,可用验证集代替):
对每张图像的处理:
(1) 获取图像的超像素。
(2) 使用ResNet50_vc获取第三层特征,针对每个超像素位置,组合质心特征和均值特征`F`
(3) 把`F`作为Kmeans模型的输入,计算每个超像素位置的聚类中心。
(4) 使用训练好的分类模型,预测该张图像的`label`
对所有图像的处理:
(1) 以每张图像的聚类中心信息组成的向量(若某聚类中心出现在盖章途中设置为1,反之为0)为输入,
预测的`label`为输出,构建逻辑回归函数`regression_func`
(2) 由`regression_func`可获得每个聚类中心不同类别下的权重,并对权重进行归一化。
3. 使用Kmeans模型获取需要可视化图像的每个超像素的聚类中心。
4. 对需要可视化的图像的超像素进行随机遮掩构成新的图像。
5. 对每张构造的图像使用预测模型预测label。
6. 根据normlime的权重信息,每个超像素可获不同的权重,选取最高的权重为最终的权重,以此来解释模型。
NormLIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)[api介绍](../apis/visualize.html#normlime)。在使用时,参数中的`num_samples`设置尤为重要,其表示上述步骤2中的随机采样的个数,若设置过小会影响可解释性结果的稳定性,若设置过大则将在上述步骤3耗费较长时间;参数`batch_size`则表示在计算上述步骤3时,预测的batch size,若设置过小将在上述步骤3耗费较长时间,而上限则根据机器配置决定;而`dataset`则是由测试集或验证集构造的数据。
......
......@@ -6,48 +6,56 @@
| 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) |
| :----| :------- | :----------- | :--------- | :--------- |
| ResNet18| 46.9MB | - | 71.0 | 89.9 |
| ResNet34| 87.5MB | - | 74.6 | 92.1 |
| ResNet50| 102.7MB | - | 76.5 | 93.0 |
| ResNet101 |179.1MB | - | 77.6 | 93.6 |
| ResNet50_vd |102.8MB |- | 79.1 | 94.4 |
| ResNet101_vd| 179.2MB | - | 80.2 | 95.0 |
| ResNet50_vd_ssld |102.8MB | - | 82.4 | 96.1 |
| ResNet101_vd_ssld| 179.2MB | - | 83.7 | 96.7 |
| DarkNet53|166.9MB | - | 78.0 | 94.1 |
| MobileNetV1 | 16.0MB | - | 71.0 | 89.7 |
| MobileNetV2 | 14.0MB | - | 72.2 | 90.7 |
| MobileNetV3_large| 21.0MB | - | 75.3 | 93.2 |
| MobileNetV3_small | 12.0MB | - | 68.2 | 88.1 |
| MobileNetV3_large_ssld| 21.0MB | - | 79.0 | 94.5 |
| MobileNetV3_small_ssld | 12.0MB | - | 71.3 | 90.1 |
| Xception41 |92.4MB | - | 79.6 | 94.4 |
| Xception65 | 144.6MB | - | 80.3 | 94.5 |
| DenseNet121 | 32.8MB | - | 75.7 | 92.6 |
| DenseNet161|116.3MB | - | 78.6 | 94.1 |
| DenseNet201| 84.6MB | - | 77.6 | 93.7 |
| ShuffleNetV2 | 9.0MB | - | 68.8 | 88.5 |
| HRNet_W18 | 21.29MB | - | 76.9 | 93.4 |
| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB | 3.72882 | 71.0 | 89.9 |
| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB | 5.50876 | 74.6 | 92.1 |
| [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB | 7.76659 | 76.5 | 93.0 |
| [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB | 13.80876 | 77.6 | 93.6 |
| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB | 8.20476 | 79.1 | 94.4 |
| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB | 14.24643 | 80.2 | 95.0 |
| [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB | 7.79264 | 82.4 | 96.1 |
| [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB | 13.34580 | 83.7 | 96.7 |
| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB | 8.82047 | 78.0 | 94.1 |
| [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB | 3.42838 | 71.0 | 89.7 |
| [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB | 5.92667 | 72.2 | 90.7 |
| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)| 22.8MB | 8.31428 | 75.3 | 93.2 |
| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 12.5MB | 7.30689 | 68.2 | 88.1 |
| [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)| 22.8MB | 8.06651 | 79.0 | 94.5 |
| [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) | 12.5MB | 7.08837 | 71.3 | 90.1 |
| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB | 8.15611 | 79.6 | 94.4 |
| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB | 13.87017 | 80.3 | 94.5 |
| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB | 17.09874 | 75.7 | 92.6 |
| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB | 22.79690 | 78.6 | 94.1 |
| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 |
| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 |
| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 |
## 目标检测模型
> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。
> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。
| 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) |
|:-------|:-----------|:-------------|:----------|
|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 |
|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 |
|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 |
|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 |
|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 |
|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 |
|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 |
|FasterRCNN-HRNet_W18-FPN |115.5MB | 57.11 | 36 |
|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 |
|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 |
|YOLOv3-MobileNetv3|94.6MB | - | 31.6 |
| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 |
|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 |
|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 |
|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 |
|[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 |
|[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 |
|[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 119.788 | 38.7 |
|[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 156.097 | 40.5 |
|[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 |
|[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 |
|[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 |
|[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 |
| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 |
## 实例分割模型
> 表中模型相关指标均为在MSCOCO数据集上测试得到。
| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) |
|:-------|:-----------|:-------------|:----------|
|DeepLabv3+-MobileNetV2_x1.0|-| - | - |
|DeepLabv3+-Xception41|-| - | - |
|DeepLabv3+-Xception65|-| - | - |
|UNet|-| - | - |
|HRNet_w18|-| - | - |
# PaddleX压缩模型库
## 图像分类
数据集:ImageNet-1000
### 量化
| 模型 | 压缩策略 | Top-1准确率 | 存储体积 | TensorRT时延(V100, ms) |
|:--:|:---:|:--:|:--:|:--:|
|MobileNetV1| 无 |70.99%| 17MB | -|
|MobileNetV1| 量化 |70.18% (-0.81%)| 4.4MB | - |
| MobileNetV2 | 无 |72.15%| 15MB | - |
| MobileNetV2 | 量化 | 71.15% (-1%)| 4.0MB | - |
|ResNet50| 无 |76.50%| 99MB | 2.71 |
|ResNet50| 量化 |76.33% (-0.17%)| 25.1MB | 1.19 |
分类模型Lite时延(ms)
| 设备 | 模型类型 | 压缩策略 | armv7 Thread 1 | armv7 Thread 2 | armv7 Thread 4 | armv8 Thread 1 | armv8 Thread 2 | armv8 Thread 4 |
| ------- | ----------- | ------------- | -------------- | -------------- | -------------- | -------------- | -------------- | -------------- |
| 高通835 | MobileNetV1 | 无 | 96.1942 | 53.2058 | 32.4468 | 88.4955 | 47.95 | 27.5189 |
| 高通835 | MobileNetV1 | 量化 | 60.5615 | 32.4016 | 16.6596 | 56.5266 | 29.7178 | 15.1459 |
| 高通835 | MobileNetV2 | 无 | 65.715 | 38.1346 | 25.155 | 61.3593 | 36.2038 | 22.849 |
| 高通835 | MobileNetV2 | 量化 | 48.3495 | 30.3069 | 22.1506 | 45.8715 | 27.4105 | 18.2223 |
| 高通835 | ResNet50 | 无 | 526.811 | 319.6486 | 205.8345 | 506.1138 | 335.1584 | 214.8936 |
| 高通835 | ResNet50 | 量化 | 476.0507 | 256.5963 | 139.7266 | 461.9176 | 248.3795 | 149.353 |
| 高通855 | MobileNetV1 | 无 | 33.5086 | 19.5773 | 11.7534 | 31.3474 | 18.5382 | 10.0811 |
| 高通855 | MobileNetV1 | 量化 | 37.0498 | 21.7081 | 11.0779 | 14.0947 | 8.1926 | 4.2934 |
| 高通855 | MobileNetV2 | 无 | 25.0396 | 15.2862 | 9.6609 | 22.909 | 14.1797 | 8.8325 |
| 高通855 | MobileNetV2 | 量化 | 28.1631 | 18.3917 | 11.8333 | 16.9399 | 11.1772 | 7.4176 |
| 高通855 | ResNet50 | 无 | 185.3705 | 113.0825 | 87.0741 | 177.7367 | 110.0433 | 74.4114 |
| 高通855 | ResNet50 | 量化 | 328.2683 | 201.9937 | 106.744 | 242.6397 | 150.0338 | 79.8659 |
| 麒麟970 | MobileNetV1 | 无 | 101.2455 | 56.4053 | 35.6484 | 94.8985 | 51.7251 | 31.9511 |
| 麒麟970 | MobileNetV1 | 量化 | 62.4412 | 32.2585 | 16.6215 | 57.825 | 29.2573 | 15.1206 |
| 麒麟970 | MobileNetV2 | 无 | 70.4176 | 42.0795 | 25.1939 | 68.9597 | 39.2145 | 22.6617 |
| 麒麟970 | MobileNetV2 | 量化 | 53.0961 | 31.7987 | 21.8334 | 49.383 | 28.2358 | 18.3642 |
| 麒麟970 | ResNet50 | 无 | 586.8943 | 344.0858 | 228.2293 | 573.3344 | 351.4332 | 225.8006 |
| 麒麟970 | ResNet50 | 量化 | 489.6188 | 258.3279 | 142.6063 | 480.0064 | 249.5339 | 138.5284 |
### 剪裁
PaddleLite推理耗时说明:
环境:Qualcomm SnapDragon 845 + armv8
速度指标:Thread1/Thread2/Thread4耗时
| 模型 | 压缩策略 | Top-1 | 存储体积 |PaddleLite推理耗时|TensorRT推理速度(FPS)|
|:--:|:---:|:--:|:--:|:--:|:--:|
| MobileNetV1 | 无 | 70.99% | 17MB | 66.052\35.8014\19.5762|-|
| MobileNetV1 | 剪裁 -30% | 70.4% (-0.59%) | 12MB | 46.5958\25.3098\13.6982|-|
| MobileNetV1 | 剪裁 -50% | 69.8% (-1.19%) | 9MB | 37.9892\20.7882\11.3144|-|
## 目标检测
### 量化
数据集: COCO2017
| 模型 | 压缩策略 | 数据集 | Image/GPU | 输入608 Box AP | 存储体积 | TensorRT时延(V100, ms) |
| :----------------------------: | :---------: | :----: | :-------: | :------------: | :------------: | :----------: |
| MobileNet-V1-YOLOv3 | 无 | COCO | 8 | 29.3 | 95MB | - |
| MobileNet-V1-YOLOv3 | 量化 | COCO | 8 | 27.9 (-1.4)| 25MB | - |
| R34-YOLOv3 | 无 | COCO | 8 | 36.2 | 162MB | - |
| R34-YOLOv3 | 量化 | COCO | 8 | 35.7 (-0.5) | 42.7MB | - |
### 剪裁
数据集:Pasacl VOC & COCO2017
PaddleLite推理耗时说明:
环境:Qualcomm SnapDragon 845 + armv8
速度指标:Thread1/Thread2/Thread4耗时
| 模型 | 压缩策略 | 数据集 | Image/GPU | 输入608 Box mmAP | 存储体积 | PaddleLite推理耗时(ms)(608*608) | TensorRT推理速度(FPS)(608*608) |
| :----------------------------: | :---------------: | :--------: | :-------: | :------------: | :----------: | :--------------: | :--------------: |
| MobileNet-V1-YOLOv3 | 无 | Pascal VOC | 8 | 76.2 | 94MB | 1238\796.943\520.101|60.04|
| MobileNet-V1-YOLOv3 | 剪裁 -52.88% | Pascal VOC | 8 | 77.6 (+1.4) | 31MB | 602.497\353.759\222.427 |99.36|
| MobileNet-V1-YOLOv3 | 无 | COCO | 8 | 29.3 | 95MB |-|-|
| MobileNet-V1-YOLOv3 | 剪裁 -51.77% | COCO | 8 | 26.0 (-3.3) | 32MB |-|73.93|
## 语义分割
数据集:Cityscapes
### 量化
| 模型 | 压缩策略 | mIoU | 存储体积 |
| :--------------------: | :---------: | :-----------: | :------------: |
| DeepLabv3-MobileNetv2 | 无 | 69.81 | 7.4MB |
| DeepLabv3-MobileNetv2 | 量化 | 67.59 (-2.22) | 2.1MB |
图像分割模型Lite时延(ms), 输入尺寸769 x 769
| 设备 | 模型类型 | 压缩策略 | armv7 Thread 1 | armv7 Thread 2 | armv7 Thread 4 | armv8 Thread 1 | armv8 Thread 2 | armv8 Thread 4 |
| ------- | ---------------------- | ------------- | -------------- | -------------- | -------------- | -------------- | -------------- | -------------- |
| 高通835 | Deeplabv3-MobileNetV2 | 无 | 1282.8126 | 793.2064 | 653.6538 | 1193.9908 | 737.1827 | 593.4522 |
| 高通835 | Deeplabv3-MobileNetV2 | 量化 | 981.44 | 658.4969 | 538.6166 | 885.3273 | 586.1284 | 484.0018 |
| 高通855 | Deeplabv3-MobileNetV2 | 无 | 639.4425 | 390.1851 | 322.7014 | 477.7667 | 339.7411 | 262.2847 |
| 高通855 | Deeplabv3-MobileNetV2 | 量化 | 705.7589 | 474.4076 | 427.2951 | 394.8352 | 297.4035 | 264.6724 |
| 麒麟970 | Deeplabv3-MobileNetV2 | 无 | 1771.1301 | 1746.0569 | 1222.4805 | 1448.9739 | 1192.4491 | 760.606 |
| 麒麟970 | Deeplabv3-MobileNetV2 | 量化 | 1320.386 | 918.5328 | 672.2481 | 1020.753 | 820.094 | 591.4114 |
### 剪裁
PaddleLite推理耗时说明:
环境:Qualcomm SnapDragon 845 + armv8
速度指标:Thread1/Thread2/Thread4耗时
| 模型 | 压缩方法 | mIoU | 存储体积 | PaddleLite推理耗时 | TensorRT推理速度(FPS) |
| :-------: | :---------------: | :-----------: | :------: | :------------: | :----: |
| FastSCNN | 无 | 69.64 | 11MB | 1226.36\682.96\415.664 |39.53|
| FastSCNN | 剪裁 -47.60% | 66.68 (-2.96) | 5.7MB | 866.693\494.467\291.748 |51.48|
# PaddleX视觉方案介绍
PaddleX目前提供了4种视觉任务解决方案,分别为图像分类、目标检测、实例分割和语义分割。用户可以根据自己的任务类型按需选取
PaddleX针对图像分类、目标检测、实例分割和语义分割4种视觉任务提供了包含模型选择、压缩策略选择、部署方案选择在内的解决方案。用户根据自己的需求选择合适的模型,选择合适的压缩策略来减小模型的计算量和存储体积、加速模型预测推理,最后选择合适的部署方案将模型部署在移动端或者服务器端
## 图像分类
## 模型选择
### 图像分类
图像分类任务指的是输入一张图片,模型预测图片的类别,如识别为风景、动物、车等。
![](./images/image_classification.png)
对于图像分类任务,针对不同的应用场景,PaddleX提供了百度改进的模型,见下表所示
对于图像分类任务,针对不同的应用场景,PaddleX提供了百度改进的模型,见下表所示:
> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。
> 表中CPU预测速度 (测试CPU型号为)。
> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
> 测速时模型输入大小为224 x 224,Top1准确率为ImageNet-1000数据集上评估所得。
| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | 准确率 | 备注 |
| :--------- | :------ | :---------- | :-----------| :------------- | :----- | :--- |
| MobileNetV3_small_ssld | 12M | - | - | - | 71.3% |适用于移动端场景 |
| MobileNetV3_large_ssld | 21M | - | - | - | 79.0% | 适用于移动端/服务端场景 |
| ResNet50_vd_ssld | 102.8MB | - | - | - | 82.4% | 适用于服务端场景 |
| ResNet101_vd_ssld | 179.2MB | - | - | - |83.7% | 适用于服务端场景 |
| 模型 | 模型特点 | 存储体积 | GPU预测速度(毫秒) | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Top1准确率 |
| :--------- | :------ | :---------- | :-----------| :------------- | :------------- |:--- |
| MobileNetV3_small_ssld | 轻量高速,适用于追求高速的实时移动端场景 | 12.5MB | 7.08837 | - | 6.546 | 71.3.0% |
| ShuffleNetV2 | 轻量级模型,精度相对偏低,适用于要求更小存储体积的实时移动端场景 | 10.2MB | 15.40 | - | 10.941 | 68.8% |
| MobileNetV3_large_ssld | 轻量级模型,在存储方面优势不大,在速度和精度上表现适中,适合于移动端场景 | 22.8MB | 8.06651 | - | 19.803 | 79.0% |
| MobileNetV2 | 轻量级模型,适用于使用GPU预测的移动端场景 | 15.0MB | 5.92667 | - | 23.318| 72.2 % |
| ResNet50_vd_ssld | 高精度模型,预测时间较短,适用于大多数的服务器端场景 | 103.5MB | 7.79264 | - | - | 82.4% |
| ResNet101_vd_ssld | 超高精度模型,预测时间相对较长,适用于有大数据量时的服务器端场景 | 180.5MB | 13.34580 | - | -| 83.7% |
| Xception65 | 超高精度模型,预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 161.6MB | 13.87017 | - | - | 80.3% |
除上述模型外,PaddleX还支持近20种图像分类模型,模型列表可参考[PaddleX模型库](../appendix/model_zoo.md)
包括上述模型,PaddleX支持近20种图像分类模型,其余模型可参考[PaddleX模型库](../appendix/model_zoo.md)
## 目标检测
### 目标检测
目标检测任务指的是输入图像,模型识别出图像中物体的位置(用矩形框框出来,并给出框的位置),和物体的类别,如在手机等零件质检中,用于检测外观上的瑕疵等。
![](./images/object_detection.png)
对于目标检测,针对不同的应用场景,PaddleX提供了主流的YOLOv3模型和Faster-RCNN模型,见下表所示
| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 |ARM芯片预测速度 | BoxMAP | 备注 |
| :------- | :------- | :--------- | :---------- | :------------- | :----- | :--- |
| YOLOv3-MobileNetV1 | 101.2M | - | - | - | 29.3 | |
| YOLOv3-MobileNetV3 | 94.6M | - | - | - | 31.6 | |
| YOLOv3-ResNet34 | 169.7M | - | - | - | 36.2 | |
| YOLOv3-DarkNet53 | 252.4 | - | - | - | 38.9 | |
除YOLOv3模型外,PaddleX同时也支持FasterRCNN模型,支持FPN结构和5种backbone网络,详情可参考[PaddleX模型库](../appendix/model_zoo.md)
## 实例分割
> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。
> 表中CPU预测速度 (测试CPU型号为)。
> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
> 测速时YOLOv3的输入大小为608 x 608,FasterRCNN的输入大小为800 x 1333,Box mmAP为COCO2017数据集上评估所得。
| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Box mmAP |
| :------- | :------- | :--------- | :---------- | :------------- | :------------- |:--- |
| YOLOv3-MobileNetV3_larget | 适用于追求高速预测的移动端场景 | 100.7MB | 143.322 | - | - | 31.6 |
| YOLOv3-MobileNetV1 | 精度相对偏低,适用于追求高速预测的服务器端场景 | 99.2MB| 15.422 | - | - | 29.3 |
| YOLOv3-DarkNet53 | 在预测速度和模型精度上都有较好的表现,适用于大多数的服务器端场景| 249.2MB | 42.672 | - | - | 38.9 |
| FasterRCNN-ResNet50-FPN | 经典的二阶段检测器,预测速度相对较慢,适用于重视模型精度的服务器端场景 | 167.MB | 83.189 | - | -| 37.2 |
| FasterRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | 115.5MB | 81.592 | - | - | 36 |
| FasterRCNN-ResNet101_vd-FPN | 超高精度模型,预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 244.3MB | 156.097 | - | - | 40.5 |
除上述模型外,YOLOv3和Faster RCNN还支持其他backbone,详情可参考[PaddleX模型库](../appendix/model_zoo.md)
### 实例分割
在目标检测中,模型识别出图像中物体的位置和物体的类别。而实例分割则是在目标检测的基础上,做了像素级的分类,将框内的属于目标物体的像素识别出来。
![](./images/instance_segmentation.png)
PaddleX目前提供了实例分割MaskRCNN模型,支持5种不同的backbone网络,详情可参考[PaddleX模型库](../appendix/model_zoo.md)
| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | BoxMAP | SegMAP | 备注 |
| :---- | :------- | :---------- | :---------- | :------------- | :----- | :----- | :--- |
| MaskRCNN-ResNet50_vd-FPN | 185.5M | - | - | - | 39.8 | 35.4 | |
| MaskRCNN-ResNet101_vd-FPN | 268.6M | - | - | - | 41.4 | 36.8 | |
## 语义分割
> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。
> 表中CPU预测速度 (测试CPU型号为)。
> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
> 测速时MaskRCNN的输入大小为800 x 1333,Box mmAP和Seg mmAP为COCO2017数据集上评估所得。
| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Box mmAP | Seg mmAP |
| :---- | :------- | :---------- | :---------- | :----- | :----- | :--- |:--- |
| MaskRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | - | - | - | - | 37.0 | 33.4 |
| MaskRCNN-ResNet50-FPN | 精度较高,适合大多数的服务器端场景| 185.5M | - | - | - | 37.9 | 34.2 |
| MaskRCNN-ResNet101_vd-FPN | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 268.6M | - | - | - | 41.4 | 36.8 |
### 语义分割
语义分割用于对图像做像素级的分类,应用在人像分类、遥感图像识别等场景。
![](./images/semantic_segmentation.png)
对于语义分割,PaddleX也针对不同的应用场景,提供了不同的模型选择,如下表所示
> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。
> 表中CPU预测速度 (测试CPU型号为)。
> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。
> 测速时模型的输入大小为1024 x 2048,mIOU为Cityscapes数据集上评估所得。
| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| mIOU |
| :---- | :------- | :---------- | :---------- | :----- | :----- |:--- |
| DeepLabv3p-MobileNetV2_x1.0 | 轻量级模型,适用于移动端场景| - | - | - | 69.8% |
| HRNet_W18_Small_v1 | 轻量高速,适用于移动端场景 | - | - | - | - |
| FastSCNN | 轻量高速,适用于追求高速预测的移动端或服务器端场景 | - | - | - | 69.64 |
| HRNet_W18 | 高精度模型,适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景| - | - | - | 79.36 |
| DeepLabv3p-Xception65 | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器且背景复杂的场景| - | - | - | 79.3% |
## 压缩策略选择
PaddleX提供包含模型剪裁、定点量化的模型压缩策略来减小模型的计算量和存储体积,加快模型部署后的预测速度。使用不同压缩策略在图像分类、目标检测和语义分割模型上的模型精度和预测速度详见以下内容,用户可以选择根据自己的需求选择合适的压缩策略,进一步优化模型的性能。
| 压缩策略 | 策略特点 |
| :---- | :------- |
| 量化 | 较为显著地减少模型的存储体积,适用于移动端或服务期端TensorRT部署,在移动端对于MobileNet系列模型有明显的加速效果 |
| 剪裁 | 能够去除冗余的参数,达到显著减少参数计算量和模型体积的效果,提升模型的预测性能,适用于CPU部署或移动端部署(GPU上无明显加速效果) |
| 先剪裁后量化 | 可以进一步提升模型的预测性能,适用于移动端或服务器端TensorRT部署 |
### 性能对比
* 表中各指标的格式为XXX/YYY,XXX表示未采取压缩策略时的指标,YYY表示压缩后的指标
* 分类模型的准确率指的是ImageNet-1000数据集上的Top1准确率(模型输入大小为224x224),检测模型的准确率指的是COCO2017数据集上的mmAP(模型输入大小为608x608),分割模型的准确率指的是Cityscapes数据集上mIOU(模型输入大小为769x769)
* 量化策略中,PaddleLiter推理环境为Qualcomm SnapDragon 855 + armv8,速度指标为Thread4耗时
* 剪裁策略中,PaddleLiter推理环境为Qualcomm SnapDragon 845 + armv8,速度指标为Thread4耗时
| 模型 | 压缩策略 | 存储体积(MB) | 准确率(%) | PaddleLite推理耗时(ms) |
| :--: | :------: | :------: | :----: | :----------------: |
| MobileNetV1 | 量化 | 17/4.4 | 70.99/70.18 | 10.0811/4.2934 |
| MobileNetV1 | 剪裁 -30% | 17/12 | 70.99/70.4 | 19.5762/13.6982 |
| YOLOv3-MobileNetV1 | 量化 | 95/25 | 29.3/27.9 | - |
| YOLOv3-MobileNetV1 | 剪裁 -51.77% | 95/25 | 29.3/26 | - |
| Deeplabv3-MobileNetV2 | 量化 | 7.4/1.8 | 63.26/62.03 | 593.4522/484.0018 |
| FastSCNN | 剪裁 -47.60% | 11/5.7 | 69.64/66.68 | 415.664/291.748 |
更多模型在不同设备上压缩前后的指标对比详见[PaddleX压缩模型库](appendix/slim_model_zoo.md)
压缩策略的具体使用流程详见[模型压缩](tutorials/compress)
**注意:PaddleX中全部图像分类模型和语义分割模型都支持量化和剪裁操作,目标检测仅有YOLOv3支持量化和剪裁操作。**
## 模型部署
PaddleX提供服务器端python部署、服务器端c++部署、服务器端加密部署、OpenVINO部署、移动端部署共5种部署方案,用户可以根据自己的需求选择合适的部署方案,点击以下链接了解部署的具体流程。
| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | mIOU | 备注 |
| :---- | :------- | :---------- | :---------- | :------------- | :----- | :----- |
| DeepLabv3p-MobileNetV2_x0.25 | | - | - | - | - | - |
| DeepLabv3p-MobileNetV2_x1.0 | | - | - | - | - | - |
| DeepLabv3p-Xception65 | | - | - | - | - | - |
| UNet | | - | - | - | - | - |
| 部署方案 | 部署流程 |
| :------: | :------: |
| 服务器端python部署 | [部署流程](tutorials/deploy/deploy_server/deploy_python.html)|
| 服务器端c++部署 | [部署流程](tutorials/deploy/deploy_server/deploy_cpp/) |
| 服务器端加密部署 | [部署流程](tutorials/deploy/deploy_server/encryption.html) |
| OpenVINO部署 | [部署流程](tutorials/deploy/deploy_openvino.html) |
| 移动端部署 | [部署流程](tutorials/deploy/deploy_lite.html) |
docs/images/lime.png

802.8 KB | W: | H:

docs/images/lime.png

423.0 KB | W: | H:

docs/images/lime.png
docs/images/lime.png
docs/images/lime.png
docs/images/lime.png
  • 2-up
  • Swipe
  • Onion skin
docs/images/normlime.png

1.5 MB | W: | H:

docs/images/normlime.png

546.2 KB | W: | H:

docs/images/normlime.png
docs/images/normlime.png
docs/images/normlime.png
docs/images/normlime.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -26,6 +26,7 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习
cv_solutions.md
apis/index.rst
paddlex_gui/index.rst
tuning_strategy/index.rst
update.md
FAQ.md
appendix/index.rst
......
目标检测
============================
PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。
.. toctree::
:maxdepth: 1
negatives_training.md
# 通过负样本学习降低误检率
## 应用场景
在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。
## 效果对比
* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**
* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**
表1 违禁品验证集上**框级别精度**对比
||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)|
|:---|:---|:---|
|基准模型 | 45.8% | 83% |
|通过负样本学习后的模型 | 49.4% | 83.1% |
表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比
||违禁品图片级别的召回率| 无违禁品图片级别的误检率|
|:---|:--------------------|:------------------------|
|基准模型 | 98.97% | 55.27% |
|通过负样本学习后的模型 | 97.75% | 5.59% |
【名词解释】
* 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。
* 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。
## 使用方法
在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下:
```
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 定义训练和验证时的transforms
train_transforms = transforms.ComposedRCNNTransforms(
mode='train', min_max_size=[600, 1000])
eval_transforms = transforms.ComposedRCNNTransforms(
mode='eval', min_max_size=[600, 1000])
# 定义训练所用的数据集
train_dataset = pdx.datasets.CocoDetection(
data_dir='jinnan2_round1_train_20190305/restricted/',
ann_file='jinnan2_round1_train_20190305/train.json',
transforms=train_transforms,
shuffle=True,
num_workers=2)
# 训练集中加入无目标背景图片
train_dataset.add_negative_samples(
'jinnan2_round1_train_20190305/normal_train_back/')
# 定义验证所用的数据集
eval_dataset = pdx.datasets.CocoDetection(
data_dir='jinnan2_round1_train_20190305/restricted/',
ann_file='jinnan2_round1_train_20190305/val.json',
transforms=eval_transforms,
num_workers=2)
# 初始化模型,并进行训练
model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1)
model.train(
num_epochs=17,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
train_batch_size=8,
learning_rate=0.01,
lr_decay_epochs=[13, 16],
save_dir='./output')
```
## 实验细则
(1) 数据集
我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。
* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。
* 训练集有883张违禁品图片,验证集有98张违禁品图片。
* 无违禁品的X光图片有2540张。
(2) 基准模型
使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。
(3) 通过负样本学习后的模型
把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。
通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**
此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。
PaddleX调优策略介绍
============================
.. toctree::
:maxdepth: 2
detection/index.rst
......@@ -21,7 +21,7 @@ step 2: 将PaddleX模型导出为inference模型
step 3: 将inference模型转换成PaddleLite模型
```
python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run
python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/lite_model --place place/to/run
```
......
......@@ -19,18 +19,18 @@
### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
PaddlePaddle C++ 预测库针对不同的`CPU``CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.7版本,以下提供了多个不同版本的Paddle预测库:
PaddlePaddle C++ 预测库针对不同的`CPU``CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.8版本,以下提供了多个不同版本的Paddle预测库:
| 版本说明 | 预测库(1.7.2版本) |
| 版本说明 | 预测库(1.8.2版本) |
| ---- | ---- |
| ubuntu14.04_cpu_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-mkl/fluid_inference.tgz) |
| ubuntu14.04_cpu_avx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-openblas/fluid_inference.tgz) |
| ubuntu14.04_cpu_noavx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-noavx-openblas/fluid_inference.tgz) |
| ubuntu14.04_cuda9.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
| ubuntu14.04_cuda10.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
| ubuntu14.04_cpu_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) |
| ubuntu14.04_cpu_avx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) |
| ubuntu14.04_cpu_noavx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) |
| ubuntu14.04_cuda9.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) |
| ubuntu14.04_cuda10.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) |
| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) |
更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html#id1)
更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
下载并解压后`/root/projects/fluid_inference`目录包含内容为:
```
......@@ -42,7 +42,7 @@ fluid_inference
└── version.txt # 版本和编译信息
```
**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)
**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)
### Step4: 编译
......@@ -55,17 +55,17 @@ WITH_GPU=OFF
WITH_MKL=ON
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=OFF
# TensorRT 的lib路径
TENSORRT_DIR=/path/to/TensorRT/
# Paddle 预测库路径
PADDLE_DIR=/path/to/fluid_inference/
# TensorRT 的路径,如果需要集成TensorRT,需修改为您实际安装的TensorRT路径
TENSORRT_DIR=/root/projects/TensorRT/
# Paddle 预测库路径, 请修改为您实际安装的预测库路径
PADDLE_DIR=/root/projects/fluid_inference
# Paddle 的预测库是否使用静态库来编译
# 使用TensorRT时,Paddle的预测库通常为动态库
WITH_STATIC_LIB=ON
WITH_STATIC_LIB=OFF
# CUDA 的 lib 路径
CUDA_LIB=/path/to/cuda/lib/
CUDA_LIB=/usr/local/cuda/lib64
# CUDNN 的 lib 路径
CUDNN_LIB=/path/to/cudnn/lib/
CUDNN_LIB=/usr/local/cuda/lib64
# 是否加载加密后的模型
WITH_ENCRYPTION=ON
......@@ -74,8 +74,8 @@ sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具
ENCRYPTION_DIR=$(pwd)/paddlex-encryption
# OPENCV 路径, 如果使用自带预编译版本可不修改
sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的opencv
OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/
sh $(pwd)/scripts/bootstrap.sh
# 以下无需改动
rm -rf build
......@@ -94,7 +94,6 @@ cmake .. \
-DENCRYPTION_DIR=${ENCRYPTION_DIR} \
-DOPENCV_DIR=${OPENCV_DIR}
make
```
**注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载:
......@@ -117,9 +116,7 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifier``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
......@@ -127,34 +124,37 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://
| image | 要预测的图片文件路径 |
| image_list | 按行存储图片路径的.txt文件 |
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
| gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** |
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
| batch_size | 预测的批量大小,默认为1 |
| thread_num | 预测的线程数,默认为cpu处理器个数 |
## 样例
可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。
可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测,导出到/root/projects,模型路径为/root/projects/inference_model
`样例一`
不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`
不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`
```shell
./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output
./build/demo/detector --model_dir=/root/projects/inference_model --image=/root/projects/images/xiaoduxiong.jpeg --save_dir=output
```
图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
`样例二`:
使用`GPU`预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
使用`GPU`预测多个图片`/root/projects/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/xiaoduxiong1.jpeg
/path/to/images/xiaoduxiong2.jpeg
/root/projects/images/xiaoduxiong1.jpeg
/root/projects/images/xiaoduxiong2.jpeg
...
/path/to/images/xiaoduxiongn.jpeg
/root/projects/images/xiaoduxiongn.jpeg
```
```shell
./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output
./build/demo/detector --model_dir=/root/projects/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
```
图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
......@@ -10,11 +10,10 @@ Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试
请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。
**下面所有示例以工作目录为 `D:\projects`演示**
**下面所有示例以工作目录为 `D:\projects`演示。**
### Step1: 下载代码
### Step1: 下载PaddleX预测代码
下载源代码
```shell
d:
mkdir projects
......@@ -22,25 +21,24 @@ cd projects
git clone https://github.com/PaddlePaddle/PaddleX.git
```
**说明**:其中`C++`预测代码在`PaddleX/deploy/cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。
**说明**:其中`C++`预测代码在`PaddleX\deploy\cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。
### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference
PaddlePaddle C++ 预测库针对不同的`CPU``CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.7版本,以下提供了多个不同版本的Paddle预测库:
PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX依赖于Paddle 1.8,基于Paddle 1.8的Paddle预测库下载链接如下所示:
| 版本说明 | 预测库(1.7.2版本) | 编译器 | 构建工具| cuDNN | CUDA
| 版本说明 | 预测库(1.8.2版本) | 编译器 | 构建工具| cuDNN | CUDA |
| ---- | ---- | ---- | ---- | ---- | ---- |
| cpu_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
| cpu_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
| cuda9.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
| cuda9.0_cudnn7_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
| cuda10.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
| cpu_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
| cpu_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 |
| cuda9.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
| cuda9.0_cudnn7_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 |
| cuda10.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 |
请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。
更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1)
解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为:
将预测库解压后,其所在目录(例如`D:\projects\fluid_inference\`)下主要包含的内容有:
```
├── \paddle\ # paddle核心库和头文件
|
......@@ -51,8 +49,8 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
### Step3: 安装配置OpenCV
1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)
2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv`
1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe)
2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv`
3. 配置环境变量,如下流程所示
- 我的电脑->属性->高级系统设置->环境变量
- 在系统变量中找到Path(如没有,自行创建),并双击编辑
......@@ -63,22 +61,21 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
1. 打开Visual Studio 2019 Community,点击`继续但无需代码`
![step2](../../images/vs2019_step1.png)
2. 点击: `文件`->`打开`->`CMake`
![step2.1](../../images/vs2019_step2.png)
选择项目代码所在路径,并打开`CMakeList.txt`:
选择C++预测代码所在路径(例如`D:\projects\PaddleX\deploy\cpp`),并打开`CMakeList.txt`:
![step2.2](../../images/vs2019_step3.png)
3. 点击:`项目`->`PADDLEX_INFERENCE的CMake设置`
3. 点击:`项目`->`CMake设置`
![step3](../../images/vs2019_step4.png)
4. 点击`浏览`,分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径
依赖库路径的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**):
![step3](../../images/vs2019_step5.png)
依赖库路径的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量与Paddle预测库的对齐,例如Paddle预测库是**使用9.0、10.0版本**编译的,则编译PaddleX预测代码时**不使用9.2、10.1等版本**CUDA库):
| 参数名 | 含义 |
| ---- | ---- |
......@@ -87,38 +84,33 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens
| PADDLE_DIR | Paddle c++预测库的路径 |
**注意:**
1. 使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾
1. 如果使用`CPU`版预测库,请把`WITH_GPU`的``去掉勾
2. 如果使用的是`openblas`版本,请把`WITH_MKL`的``去掉勾
3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip)
yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。
4. 如果需要使用模型加密功能,需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)。例如解压到D:/projects,解压后目录为D:/projects/paddlex-encryption。编译时需勾选WITH_EBNCRYPTION并且在ENCRTYPTION_DIR填入D:/projects/paddlex-encryption。
![step4](../../images/vs2019_step5.png)
![step_encryption](../../images/vs2019_step_encryption.png)
**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
![step4](../../images/vs2019_step6.png)
**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。
5. 点击`生成`->`全部生成`
![step6](../../images/vs2019_step6.png)
![step6](../../images/vs2019_step7.png)
### Step5: 预测及可视化
**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。**
**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。**
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
```
d:
D:
cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
```
编译成功后,预测demo的入口程序为`paddlex_inference\detector.exe`,`paddlex_inference\classifer.exe`,`paddlex_inference\segmenter.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
编译成功后,预测demo的入口程序为`paddlex_inference\detector.exe`,`paddlex_inference\classifier.exe`,`paddlex_inference\segmenter.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
......@@ -128,33 +120,45 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 |
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
| batch_size | 预测的批量大小,默认为1 |
| thread_num | 预测的线程数,默认为cpu处理器个数 |
## 样例
可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测
可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测, 例如导出到D:\projects,模型路径为D:\projects\inference_model
`样例一`:
### 样例一:(使用未加密的模型对单张图像做预测)
不使用`GPU`测试图片 `\\path\\to\\xiaoduxiong.jpeg`
不使用`GPU`测试图片 `D:\images\xiaoduxiong.jpeg`
```shell
.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image=D:\\images\\xiaoduxiong.jpeg --save_dir=output
```
.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output
```
图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
`样例二`:
### 样例二:(使用未加密的模型对图像列表做预测)
使用`GPU`预测多个图片`\\path\\to\\image_list.txt`,image_list.txt内容的格式如下:
使用`GPU`预测多个图片`D:\images\image_list.txt`,image_list.txt内容的格式如下:
```
\\path\\to\\images\\xiaoduxiong1.jpeg
\\path\\to\\images\\xiaoduxiong2.jpeg
D:\images\xiaoduxiong1.jpeg
D:\images\xiaoduxiong2.jpeg
...
\\path\\to\\images\\xiaoduxiongn.jpeg
D:\images\xiaoduxiongn.jpeg
```
```shell
.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output
```
.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image_list=D:\images\image_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2
```
图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
### 样例三:(使用加密后的模型对单张图片进行预测)
如果未对模型进行加密,请参考[加密PaddleX模型](../encryption.html#paddlex)对模型进行加密。例如加密后的模型所在目录为`D:\projects\encrypted_inference_model`。
```
.\paddlex_inference\detector.exe --model_dir=D:\projects\encrypted_inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
```
`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件可视化预测结果会保存在`save_dir`参数设置的目录下。
......@@ -2,7 +2,7 @@
PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的模型加密工具对推理模型进行加密,预测部署SDK支持直接加载密文模型并完成推理,提升AI模型部署的安全性。
**注意:目前加密方案仅支持Linux系统**
**目前加密方案已支持Windows,Linux系统**
## 1. 方案简介
......@@ -20,7 +20,7 @@ PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的
![](../images/encryption_process.png)
下面是对提供的C/C++加解密库内部实现的中文描述,参考以下步骤可以实现 一套加解密库 来适应自己的场景并通过内存数据load到paddlepaddle中(c/c++预测服务)
下面是对提供的C/C++加解密库内部实现的中文描述,参考以下步骤可以实现一套加解密库来适应自己的场景并通过内存数据加载到Paddle Inference预测库中
> 1)考虑到加密的模型文件解密后需要从内存加载数据,使用conbine的模式生成模型文件和参数文件。
>
......@@ -34,15 +34,17 @@ PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的
>
> 6)在模型解密环节根据加密后的文件读取相关的加密数据到内存中,对内存数据使用AES算法进行解密,注意解密时需要采用与加密时一致的加密算法和加密的模式,以及密钥的数据和长度,否则会导致解密后数据错误。
>
> 7)集成模型预测的C/C++库,在具体使用paddlepaddle预测时一般涉及paddle::AnalysisConfig和paddle:Predictor,为了能够从内存数据中直接load解密后的模型明文数据(避免模型解密后创建临时文件),这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。
> 7)集成模型预测的C/C++库,在具体使用预测时一般涉及paddle::AnalysisConfig和paddle:Predictor,为了能够从内存数据中直接load解密后的模型明文数据(避免模型解密后创建临时文件),这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。
需要注意的是,在本方案中,密钥集成在上层预测服务的代码中。故模型的安全强度等同于代码抵御逆向调试的强度。为了保护密钥和模型的安全,开发者还需对自己的应用进行加固保护。常见的应用加固手段有:代码混淆,二进制文件加壳 等等,亦或将加密机制更改为AES白盒加密技术来保护密钥。这类技术领域内有大量商业和开源产品可供选择,此处不一一赘述。
### 1.2 加密工具
[PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)。在编译部署代码时,编译脚本会自动下载加密工具,您也可以选择手动下载。
[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip),编译脚本会自动下载该版本加密工具,您也可以选择手动下载。
加密工具包含内容为:
[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip),该版本加密工具需手动下载,如果您在使用Visual Studio 2019编译C++预测代码的过程中已经下载过该工具,此处可不必重复下载。
Linux加密工具包含内容为:
```
paddlex-encryption
├── include # 头文件:paddle_model_decrypt.h(解密)和paddle_model_encrypt.h(加密)
......@@ -52,22 +54,40 @@ paddlex-encryption
└── tool # paddlex_encrypt_tool
```
Windows加密工具包含内容为:
```
paddlex-encryption
├── include # 头文件:paddle_model_decrypt.h(解密)和paddle_model_encrypt.h(加密)
|
├── lib # pmodel-encrypt.dll和pmodel-decrypt.dll动态库 pmodel-encrypt.lib和pmodel-encrypt.lib静态库
|
└── tool # paddlex_encrypt_tool.exe 模型加密工具
```
### 1.3 加密PaddleX模型
对模型完成加密后,加密工具会产生随机密钥信息(用于AES加解密使用),需要在后续加密部署时传入该密钥来用于解密。
> 密钥由32字节key + 16字节iv组成, 注意这里产生的key是经过base64编码后的,这样可以扩充key的选取范围
Linux平台:
```
./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model
# 假设模型在/root/projects下
./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /root/projects/paddlex_inference_model -save_dir /root/projects/paddlex_encrypted_model
```
`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted``__params__.encrypted``model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
Windows平台:
```
# 假设模型在D:/projects下
.\paddlex-encryption\tool\paddlex_encrypt_tool.exe -model_dir D:\projects\paddlex_inference_model -save_dir D:\projects\paddlex_encrypted_model
```
`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted``__params__.encrypted``model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`
![](../images/encrypt.png)
## 2. PaddleX C++加密部署
参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.html#linux)编译C++部署代码。编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifer``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
### 2.1 Linux平台使用
参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后,预测demo的可执行程序分别为`build/demo/detector``build/demo/classifier``build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
......@@ -75,36 +95,72 @@ paddlex-encryption
| image | 要预测的图片文件路径 |
| image_list | 按行存储图片路径的.txt文件 |
| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) |
| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) |
| gpu_id | GPU 设备ID, 默认值为0 |
| save_dir | 保存可视化结果的路径, 默认值为"output",classifier无该参数 |
| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 |
| batch_size | 预测的批量大小,默认为1 |
| thread_num | 预测的线程数,默认为cpu处理器个数 |
## 样例
### 样例
可使用[导出小度熊识别模型](deploy_python.html#inference)中的测试图片进行预测。
可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
`样例一`
#### 样例一
不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg`
不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg`
```shell
./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
./build/demo/detector --model_dir=/root/projects/paddlex_encrypted_model --image=/root/projects/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
```
`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
`样例二`:
#### 样例二:
使用`GPU`预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下:
使用`GPU`预测多个图片`/root/projects/image_list.txt`,image_list.txt内容的格式如下:
```
/path/to/images/xiaoduxiong1.jpeg
/path/to/images/xiaoduxiong2.jpeg
/root/projects/images/xiaoduxiong1.jpeg
/root/projects/xiaoduxiong2.jpeg
...
/path/to/images/xiaoduxiongn.jpeg
/root/projects/xiaoduxiongn.jpeg
```
```shell
./build/demo/detector --model_dir=/root/projects/models/paddlex_encrypted_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
```
`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
### 2.2 Windows平台使用
参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。需自行下载Windows版PaddleX加密工具压缩包,解压,在编译指南的编译流程基础上,在CMake设置中勾选WITH_ENCRYPTION,ENCRYPTION_DIR填写为加密工具包解压后的目录,再进行编译。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe,paddlex_inference\classifier.exe,paddlex_inference\segmenter.exe。
### 样例
可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。
#### 样例一:
不使用`GPU`测试单张图片,例如图片为`D:\images\xiaoduxiong.jpeg`,加密后的模型目录为`D:\projects\paddlex_encrypted_model`
```shell
./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
```
`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
#### 样例二:
使用`GPU`预测图片列表,例如图片列表为`D:\projects\image_list.txt``image_list.txt`的内容如下:
```
D:\projects\images\xiaoduxiong1.jpeg
D:\projects\images\xiaoduxiong2.jpeg
...
D:\projects\images\xiaoduxiongn.jpeg
```
加密后的模型目录例如为`D:\projects\paddlex_encrypted_model`
```
.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image_list=D:\projects\images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=
```
`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。
docs/tutorials/deploy/images/vs2019_step5.png

427.5 KB | W: | H:

docs/tutorials/deploy/images/vs2019_step5.png

215.3 KB | W: | H:

docs/tutorials/deploy/images/vs2019_step5.png
docs/tutorials/deploy/images/vs2019_step5.png
docs/tutorials/deploy/images/vs2019_step5.png
docs/tutorials/deploy/images/vs2019_step5.png
  • 2-up
  • Swipe
  • Onion skin
# HumanSeg人像分割模型
本教程基于PaddleX核心分割网络,提供针对人像分割场景从预训练模型、Fine-tune、视频分割预测部署的全流程应用指南。
## 安装
**前置依赖**
* paddlepaddle >= 1.8.0
* python >= 3.5
```
pip install paddlex -i https://mirror.baidu.com/pypi/simple
```
安装的相关问题参考[PaddleX安装](https://paddlex.readthedocs.io/zh_CN/latest/install.html)
## 预训练模型
HumanSeg开放了在大规模人像数据上训练的两个预训练模型,满足多种使用场景的需求
| 模型类型 | Checkpoint Parameter | Inference Model | Quant Inference Model | 备注 |
| --- | --- | --- | ---| --- |
| HumanSeg-server | [humanseg_server_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server.pdparams) | [humanseg_server_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 输入大小(512, 512) |
| HumanSeg-mobile | [humanseg_mobile_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile.pdparams) | [humanseg_mobile_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) |
模型性能
| 模型 | 模型大小 | 计算耗时 |
| --- | --- | --- |
|humanseg_server_inference| 158M | - |
|humanseg_mobile_inference | 5.8 M | 42.35ms |
|humanseg_mobile_quant | 1.6M | 24.93ms |
计算耗时运行环境: 小米,cpu:骁龙855, 内存:6GB, 图片大小:192*192
**NOTE:**
其中Checkpoint Parameter为模型权重,用于Fine-tuning场景。
* Inference Model和Quant Inference Model为预测部署模型,包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。
* 其中Inference Model适用于服务端的CPU和GPU预测部署,Qunat Inference Model为量化版本,适用于通过Paddle Lite进行移动端等端侧设备部署。
执行以下脚本进行HumanSeg预训练模型的下载
```bash
python pretrain_weights/download_pretrain_weights.py
```
## 下载测试数据
我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleX可直接加载数据格式。通过运行以下代码进行快速下载,其中包含手机前置摄像头的人像测试视频`video_test.mp4`.
```bash
python data/download_data.py
```
## 快速体验视频流人像分割
结合DIS(Dense Inverse Search-basedmethod)光流算法预测结果与分割结果,改善视频流人像分割
```bash
# 通过电脑摄像头进行实时分割处理
python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference
# 对人像视频进行分割处理
python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4
```
视频分割结果如下:
<img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/result.gif" width="20%" height="20%">
根据所选背景进行背景替换,背景可以是一张图片,也可以是一段视频。
```bash
# 通过电脑摄像头进行实时背景替换处理, 也可通过'--background_video_path'传入背景视频
python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --background_image_path data/background.jpg
# 对人像视频进行背景替换处理, 也可通过'--background_video_path'传入背景视频
python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg
# 对单张图像进行背景替换
python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --image_path data/human_image.jpg --background_image_path data/background.jpg
```
背景替换结果如下:
<img src="https://paddleseg.bj.bcebos.com/humanseg/data/video_test.gif" width="20%" height="20%"><img src="https://paddleseg.bj.bcebos.com/humanseg/data/bg_replace.gif" width="20%" height="20%">
**NOTE**:
视频分割处理时间需要几分钟,请耐心等待。
提供的模型适用于手机摄像头竖屏拍摄场景,宽屏效果会略差一些。
## 训练
使用下述命令基于与训练模型进行Fine-tuning,请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。
```bash
# 指定GPU卡号(以0号卡为例)
export CUDA_VISIBLE_DEVICES=0
# 若不使用GPU,则将CUDA_VISIBLE_DEVICES指定为空
# export CUDA_VISIBLE_DEVICES=
python train.py --model_type HumanSegMobile \
--save_dir output/ \
--data_dir data/mini_supervisely \
--train_list data/mini_supervisely/train.txt \
--val_list data/mini_supervisely/val.txt \
--pretrain_weights pretrain_weights/humanseg_mobile_params \
--batch_size 8 \
--learning_rate 0.001 \
--num_epochs 10 \
--image_shape 192 192
```
其中参数含义如下:
* `--model_type`: 模型类型,可选项为:HumanSegServer和HumanSegMobile
* `--save_dir`: 模型保存路径
* `--data_dir`: 数据集路径
* `--train_list`: 训练集列表路径
* `--val_list`: 验证集列表路径
* `--pretrain_weights`: 预训练模型路径
* `--batch_size`: 批大小
* `--learning_rate`: 初始学习率
* `--num_epochs`: 训练轮数
* `--image_shape`: 网络输入图像大小(w, h)
更多命令行帮助可运行下述命令进行查看:
```bash
python train.py --help
```
**NOTE**
可通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。
## 评估
使用下述命令进行评估
```bash
python eval.py --model_dir output/best_model \
--data_dir data/mini_supervisely \
--val_list data/mini_supervisely/val.txt \
--image_shape 192 192
```
其中参数含义如下:
* `--model_dir`: 模型路径
* `--data_dir`: 数据集路径
* `--val_list`: 验证集列表路径
* `--image_shape`: 网络输入图像大小(w, h)
## 预测
使用下述命令进行预测, 预测结果默认保存在`./output/result/`文件夹中。
```bash
python infer.py --model_dir output/best_model \
--data_dir data/mini_supervisely \
--test_list data/mini_supervisely/test.txt \
--save_dir output/result \
--image_shape 192 192
```
其中参数含义如下:
* `--model_dir`: 模型路径
* `--data_dir`: 数据集路径
* `--test_list`: 测试集列表路径
* `--image_shape`: 网络输入图像大小(w, h)
## 模型导出
```bash
paddlex --export_inference --model_dir output/best_model \
--save_dir output/export
```
其中参数含义如下:
* `--model_dir`: 模型路径
* `--save_dir`: 导出模型保存路径
## 离线量化
```bash
python quant_offline.py --model_dir output/best_model \
--data_dir data/mini_supervisely \
--quant_list data/mini_supervisely/val.txt \
--save_dir output/quant_offline \
--image_shape 192 192
```
其中参数含义如下:
* `--model_dir`: 待量化模型路径
* `--data_dir`: 数据集路径
* `--quant_list`: 量化数据集列表路径,一般直接选择训练集或验证集
* `--save_dir`: 量化模型保存路径
* `--image_shape`: 网络输入图像大小(w, h)
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import os.path as osp
import cv2
import numpy as np
from postprocess import postprocess, threshold_mask
import paddlex as pdx
import paddlex.utils.logging as logging
from paddlex.seg import transforms
def parse_args():
parser = argparse.ArgumentParser(
description='HumanSeg inference for video')
parser.add_argument(
'--model_dir',
dest='model_dir',
help='Model path for inference',
type=str)
parser.add_argument(
'--image_path',
dest='image_path',
help='Image including human',
type=str,
default=None)
parser.add_argument(
'--background_image_path',
dest='background_image_path',
help='Background image for replacing',
type=str,
default=None)
parser.add_argument(
'--video_path',
dest='video_path',
help='Video path for inference',
type=str,
default=None)
parser.add_argument(
'--background_video_path',
dest='background_video_path',
help='Background video path for replacing',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the inference results',
type=str,
default='./output')
parser.add_argument(
"--image_shape",
dest="image_shape",
help="The image shape for net inputs.",
nargs=2,
default=[192, 192],
type=int)
return parser.parse_args()
def bg_replace(label_map, img, bg):
h, w, _ = img.shape
bg = cv2.resize(bg, (w, h))
label_map = np.repeat(label_map[:, :, np.newaxis], 3, axis=2)
comb = (label_map * img + (1 - label_map) * bg).astype(np.uint8)
return comb
def recover(img, im_info):
if im_info[0] == 'resize':
w, h = im_info[1][1], im_info[1][0]
img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
elif im_info[0] == 'padding':
w, h = im_info[1][0], im_info[1][0]
img = img[0:h, 0:w, :]
return img
def infer(args):
resize_h = args.image_shape[1]
resize_w = args.image_shape[0]
test_transforms = transforms.Compose([transforms.Normalize()])
model = pdx.load_model(args.model_dir)
if not osp.exists(args.save_dir):
os.makedirs(args.save_dir)
# 图像背景替换
if args.image_path is not None:
if not osp.exists(args.image_path):
raise Exception('The --image_path is not existed: {}'.format(
args.image_path))
if args.background_image_path is None:
raise Exception(
'The --background_image_path is not set. Please set it')
else:
if not osp.exists(args.background_image_path):
raise Exception(
'The --background_image_path is not existed: {}'.format(
args.background_image_path))
img = cv2.imread(args.image_path)
im_shape = img.shape
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
img,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_LINEAR)
image = im.astype('float32')
im_info = ('resize', im_shape[0:2])
pred = model.predict(image, test_transforms)
label_map = pred['label_map']
label_map = recover(label_map, im_info)
bg = cv2.imread(args.background_image_path)
save_name = osp.basename(args.image_path)
save_path = osp.join(args.save_dir, save_name)
result = bg_replace(label_map, img, bg)
cv2.imwrite(save_path, result)
# 视频背景替换,如果提供背景视频则以背景视频作为背景,否则采用提供的背景图片
else:
is_video_bg = False
if args.background_video_path is not None:
if not osp.exists(args.background_video_path):
raise Exception(
'The --background_video_path is not existed: {}'.format(
args.background_video_path))
is_video_bg = True
elif args.background_image_path is not None:
if not osp.exists(args.background_image_path):
raise Exception(
'The --background_image_path is not existed: {}'.format(
args.background_image_path))
else:
raise Exception(
'Please offer backgound image or video. You should set --backbground_iamge_paht or --background_video_path'
)
disflow = cv2.DISOpticalFlow_create(
cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
prev_gray = np.zeros((resize_h, resize_w), np.uint8)
prev_cfd = np.zeros((resize_h, resize_w), np.float32)
is_init = True
if args.video_path is not None:
logging.info('Please wait. It is computing......')
if not osp.exists(args.video_path):
raise Exception('The --video_path is not existed: {}'.format(
args.video_path))
cap_video = cv2.VideoCapture(args.video_path)
fps = cap_video.get(cv2.CAP_PROP_FPS)
width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
save_name = osp.basename(args.video_path)
save_name = save_name.split('.')[0]
save_path = osp.join(args.save_dir, save_name + '.avi')
cap_out = cv2.VideoWriter(
save_path,
cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
(width, height))
if is_video_bg:
cap_bg = cv2.VideoCapture(args.background_video_path)
frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT)
current_frame_bg = 1
else:
img_bg = cv2.imread(args.background_image_path)
while cap_video.isOpened():
ret, frame = cap_video.read()
if ret:
im_shape = frame.shape
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
frame,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_LINEAR)
image = im.astype('float32')
im_info = ('resize', im_shape[0:2])
pred = model.predict(image, test_transforms)
score_map = pred['score_map']
cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
score_map = 255 * score_map[:, :, 1]
optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
disflow, is_init)
prev_gray = cur_gray.copy()
prev_cfd = optflow_map.copy()
is_init = False
optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
optflow_map = threshold_mask(
optflow_map, thresh_bg=0.2, thresh_fg=0.8)
score_map = recover(optflow_map, im_info)
#循环读取背景帧
if is_video_bg:
ret_bg, frame_bg = cap_bg.read()
if ret_bg:
if current_frame_bg == frames_bg:
current_frame_bg = 1
cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0)
else:
break
current_frame_bg += 1
comb = bg_replace(score_map, frame, frame_bg)
else:
comb = bg_replace(score_map, frame, img_bg)
cap_out.write(comb)
else:
break
if is_video_bg:
cap_bg.release()
cap_video.release()
cap_out.release()
# 当没有输入预测图像和视频的时候,则打开摄像头
else:
cap_video = cv2.VideoCapture(0)
if not cap_video.isOpened():
raise IOError("Error opening video stream or file, "
"--video_path whether existing: {}"
" or camera whether working".format(
args.video_path))
return
if is_video_bg:
cap_bg = cv2.VideoCapture(args.background_video_path)
frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT)
current_frame_bg = 1
else:
img_bg = cv2.imread(args.background_image_path)
while cap_video.isOpened():
ret, frame = cap_video.read()
if ret:
im_shape = frame.shape
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
frame,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_LINEAR)
image = im.astype('float32')
im_info = ('resize', im_shape[0:2])
pred = model.predict(image, test_transforms)
score_map = pred['score_map']
cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
score_map = 255 * score_map[:, :, 1]
optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
disflow, is_init)
prev_gray = cur_gray.copy()
prev_cfd = optflow_map.copy()
is_init = False
optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
optflow_map = threshold_mask(
optflow_map, thresh_bg=0.2, thresh_fg=0.8)
score_map = recover(optflow_map, im_info)
#循环读取背景帧
if is_video_bg:
ret_bg, frame_bg = cap_bg.read()
if ret_bg:
if current_frame_bg == frames_bg:
current_frame_bg = 1
cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0)
else:
break
current_frame_bg += 1
comb = bg_replace(score_map, frame, frame_bg)
else:
comb = bg_replace(score_map, frame, img_bg)
cv2.imshow('HumanSegmentation', comb)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
if is_video_bg:
cap_bg.release()
cap_video.release()
if __name__ == "__main__":
args = parse_args()
infer(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
import paddlex as pdx
def download_data(savepath):
url = "https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip"
pdx.utils.download_and_decompress(url=url, path=savepath)
url = "https://paddleseg.bj.bcebos.com/humanseg/data/video_test.zip"
pdx.utils.download_and_decompress(url=url, path=savepath)
if __name__ == "__main__":
download_data(LOCAL_PATH)
print("Data download finish!")
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import paddlex as pdx
import paddlex.utils.logging as logging
from paddlex.seg import transforms
def parse_args():
parser = argparse.ArgumentParser(description='HumanSeg training')
parser.add_argument(
'--model_dir',
dest='model_dir',
help='Model path for evaluating',
type=str,
default='output/best_model')
parser.add_argument(
'--data_dir',
dest='data_dir',
help='The root directory of dataset',
type=str)
parser.add_argument(
'--val_list',
dest='val_list',
help='Val list file of dataset',
type=str,
default=None)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size',
type=int,
default=128)
parser.add_argument(
"--image_shape",
dest="image_shape",
help="The image shape for net inputs.",
nargs=2,
default=[192, 192],
type=int)
return parser.parse_args()
def dict2str(dict_input):
out = ''
for k, v in dict_input.items():
try:
v = round(float(v), 6)
except:
pass
out = out + '{}={}, '.format(k, v)
return out.strip(', ')
def evaluate(args):
eval_transforms = transforms.Compose(
[transforms.Resize(args.image_shape), transforms.Normalize()])
eval_dataset = pdx.datasets.SegDataset(
data_dir=args.data_dir,
file_list=args.val_list,
transforms=eval_transforms)
model = pdx.load_model(args.model_dir)
metrics = model.evaluate(eval_dataset, args.batch_size)
logging.info('[EVAL] Finished, {} .'.format(dict2str(metrics)))
if __name__ == '__main__':
args = parse_args()
evaluate(args)
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import os.path as osp
import cv2
import numpy as np
import tqdm
import paddlex as pdx
from paddlex.seg import transforms
def parse_args():
parser = argparse.ArgumentParser(
description='HumanSeg prediction and visualization')
parser.add_argument(
'--model_dir',
dest='model_dir',
help='Model path for prediction',
type=str)
parser.add_argument(
'--data_dir',
dest='data_dir',
help='The root directory of dataset',
type=str)
parser.add_argument(
'--test_list',
dest='test_list',
help='Test list file of dataset',
type=str)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the inference results',
type=str,
default='./output/result')
parser.add_argument(
"--image_shape",
dest="image_shape",
help="The image shape for net inputs.",
nargs=2,
default=[192, 192],
type=int)
return parser.parse_args()
def infer(args):
def makedir(path):
sub_dir = osp.dirname(path)
if not osp.exists(sub_dir):
os.makedirs(sub_dir)
test_transforms = transforms.Compose(
[transforms.Resize(args.image_shape), transforms.Normalize()])
model = pdx.load_model(args.model_dir)
added_saved_path = osp.join(args.save_dir, 'added')
mat_saved_path = osp.join(args.save_dir, 'mat')
scoremap_saved_path = osp.join(args.save_dir, 'scoremap')
with open(args.test_list, 'r') as f:
files = f.readlines()
for file in tqdm.tqdm(files):
file = file.strip()
im_file = osp.join(args.data_dir, file)
im = cv2.imread(im_file)
result = model.predict(im_file, transforms=test_transforms)
# save added image
added_image = pdx.seg.visualize(
im_file, result, weight=0.6, save_dir=None)
added_image_file = osp.join(added_saved_path, file)
makedir(added_image_file)
cv2.imwrite(added_image_file, added_image)
# save score map
score_map = result['score_map'][:, :, 1]
score_map = (score_map * 255).astype(np.uint8)
score_map_file = osp.join(scoremap_saved_path, file)
makedir(score_map_file)
cv2.imwrite(score_map_file, score_map)
# save mat image
score_map = np.expand_dims(score_map, axis=-1)
mat_image = np.concatenate([im, score_map], axis=2)
mat_file = osp.join(mat_saved_path, file)
ext = osp.splitext(mat_file)[-1]
mat_file = mat_file.replace(ext, '.png')
makedir(mat_file)
cv2.imwrite(mat_file, mat_image)
if __name__ == '__main__':
args = parse_args()
infer(args)
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
def cal_optical_flow_tracking(pre_gray, cur_gray, prev_cfd, dl_weights,
disflow):
"""计算光流跟踪匹配点和光流图
输入参数:
pre_gray: 上一帧灰度图
cur_gray: 当前帧灰度图
prev_cfd: 上一帧光流图
dl_weights: 融合权重图
disflow: 光流数据结构
返回值:
is_track: 光流点跟踪二值图,即是否具有光流点匹配
track_cfd: 光流跟踪图
"""
check_thres = 8
h, w = pre_gray.shape[:2]
track_cfd = np.zeros_like(prev_cfd)
is_track = np.zeros_like(pre_gray)
flow_fw = disflow.calc(pre_gray, cur_gray, None)
flow_bw = disflow.calc(cur_gray, pre_gray, None)
flow_fw = np.round(flow_fw).astype(np.int)
flow_bw = np.round(flow_bw).astype(np.int)
y_list = np.array(range(h))
x_list = np.array(range(w))
yv, xv = np.meshgrid(y_list, x_list)
yv, xv = yv.T, xv.T
cur_x = xv + flow_fw[:, :, 0]
cur_y = yv + flow_fw[:, :, 1]
# 超出边界不跟踪
not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h)
flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]]
not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) +
np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])
) >= check_thres
track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track]
is_track[cur_y[~not_track], cur_x[~not_track]] = 1
not_flow = np.all(np.abs(flow_fw) == 0,
axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1)
dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05
return track_cfd, is_track, dl_weights
def fuse_optical_flow_tracking(track_cfd, dl_cfd, dl_weights, is_track):
"""光流追踪图和人像分割结构融合
输入参数:
track_cfd: 光流追踪图
dl_cfd: 当前帧分割结果
dl_weights: 融合权重图
is_track: 光流点匹配二值图
返回
cur_cfd: 光流跟踪图和人像分割结果融合图
"""
fusion_cfd = dl_cfd.copy()
is_track = is_track.astype(np.bool)
fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + (
1 - dl_weights[is_track]) * track_cfd[is_track]
# 确定区域
index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track
index_less01 = (dl_weights < 0.1) * index_certain
fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[
index_less01]
index_larger09 = (dl_weights >= 0.1) * index_certain
fusion_cfd[index_larger09] = 0.4 * dl_cfd[
index_larger09] + 0.6 * track_cfd[index_larger09]
return fusion_cfd
def threshold_mask(img, thresh_bg, thresh_fg):
dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg)
dst[np.where(dst > 1)] = 1
dst[np.where(dst < 0)] = 0
return dst.astype(np.float32)
def postprocess(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init):
"""光流优化
Args:
cur_gray : 当前帧灰度图
pre_gray : 前一帧灰度图
pre_cfd :前一帧融合结果
scoremap : 当前帧分割结果
difflow : 光流
is_init : 是否第一帧
Returns:
fusion_cfd : 光流追踪图和预测结果融合图
"""
h, w = scoremap.shape
cur_cfd = scoremap.copy()
if is_init:
if h <= 64 or w <= 64:
disflow.setFinestScale(1)
elif h <= 160 or w <= 160:
disflow.setFinestScale(2)
else:
disflow.setFinestScale(3)
fusion_cfd = cur_cfd
else:
weights = np.ones((h, w), np.float32) * 0.3
track_cfd, is_track, weights = cal_optical_flow_tracking(
prev_gray, cur_gray, pre_cfd, weights, disflow)
fusion_cfd = fuse_optical_flow_tracking(track_cfd, cur_cfd, weights,
is_track)
return fusion_cfd
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
import paddlex as pdx
import paddlehub as hub
model_urls = {
"PaddleX_HumanSeg_Server_Params":
"https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_params.tar",
"PaddleX_HumanSeg_Server_Inference":
"https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_inference.tar",
"PaddleX_HumanSeg_Mobile_Params":
"https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_params.tar",
"PaddleX_HumanSeg_Mobile_Inference":
"https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_inference.tar",
"PaddleX_HumanSeg_Mobile_Quant":
"https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_quant.tar"
}
if __name__ == "__main__":
for model_name, url in model_urls.items():
pdx.utils.download_and_decompress(url=url, path=LOCAL_PATH)
print("Pretrained Model download success!")
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import paddlex as pdx
from paddlex.seg import transforms
def parse_args():
parser = argparse.ArgumentParser(description='HumanSeg training')
parser.add_argument(
'--model_dir',
dest='model_dir',
help='Model path for quant',
type=str,
default='output/best_model')
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size',
type=int,
default=1)
parser.add_argument(
'--batch_nums',
dest='batch_nums',
help='Batch number for quant',
type=int,
default=10)
parser.add_argument(
'--data_dir',
dest='data_dir',
help='the root directory of dataset',
type=str)
parser.add_argument(
'--quant_list',
dest='quant_list',
help='Image file list for model quantization, it can be vat.txt or train.txt',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the quant model',
type=str,
default='./output/quant_offline')
parser.add_argument(
"--image_shape",
dest="image_shape",
help="The image shape for net inputs.",
nargs=2,
default=[192, 192],
type=int)
return parser.parse_args()
def evaluate(args):
eval_transforms = transforms.Compose(
[transforms.Resize(args.image_shape), transforms.Normalize()])
eval_dataset = pdx.datasets.SegDataset(
data_dir=args.data_dir,
file_list=args.quant_list,
transforms=eval_transforms)
model = pdx.load_model(args.model_dir)
pdx.slim.export_quant_model(model, eval_dataset, args.batch_size,
args.batch_nums, args.save_dir)
if __name__ == '__main__':
args = parse_args()
evaluate(args)
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import paddlex as pdx
from paddlex.seg import transforms
MODEL_TYPE = ['HumanSegMobile', 'HumanSegServer']
def parse_args():
parser = argparse.ArgumentParser(description='HumanSeg training')
parser.add_argument(
'--model_type',
dest='model_type',
help="Model type for traing, which is one of ('HumanSegMobile', 'HumanSegServer')",
type=str,
default='HumanSegMobile')
parser.add_argument(
'--data_dir',
dest='data_dir',
help='The root directory of dataset',
type=str)
parser.add_argument(
'--train_list',
dest='train_list',
help='Train list file of dataset',
type=str)
parser.add_argument(
'--val_list',
dest='val_list',
help='Val list file of dataset',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output')
parser.add_argument(
'--num_classes',
dest='num_classes',
help='Number of classes',
type=int,
default=2)
parser.add_argument(
"--image_shape",
dest="image_shape",
help="The image shape for net inputs.",
nargs=2,
default=[192, 192],
type=int)
parser.add_argument(
'--num_epochs',
dest='num_epochs',
help='Number epochs for training',
type=int,
default=100)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size',
type=int,
default=128)
parser.add_argument(
'--learning_rate',
dest='learning_rate',
help='Learning rate',
type=float,
default=0.01)
parser.add_argument(
'--pretrain_weights',
dest='pretrain_weights',
help='The path of pretrianed weight',
type=str,
default=None)
parser.add_argument(
'--resume_checkpoint',
dest='resume_checkpoint',
help='The path of resume checkpoint',
type=str,
default=None)
parser.add_argument(
'--use_vdl',
dest='use_vdl',
help='Whether to use visualdl',
action='store_true')
parser.add_argument(
'--save_interval_epochs',
dest='save_interval_epochs',
help='The interval epochs for save a model snapshot',
type=int,
default=5)
return parser.parse_args()
def train(args):
train_transforms = transforms.Compose([
transforms.Resize(args.image_shape), transforms.RandomHorizontalFlip(),
transforms.Normalize()
])
eval_transforms = transforms.Compose(
[transforms.Resize(args.image_shape), transforms.Normalize()])
train_dataset = pdx.datasets.SegDataset(
data_dir=args.data_dir,
file_list=args.train_list,
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir=args.data_dir,
file_list=args.val_list,
transforms=eval_transforms)
if args.model_type == 'HumanSegMobile':
model = pdx.seg.HRNet(
num_classes=args.num_classes, width='18_small_v1')
elif args.model_type == 'HumanSegServer':
model = pdx.seg.DeepLabv3p(
num_classes=args.num_classes, backbone='Xception65')
else:
raise ValueError(
"--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', "
"'HumanSegLite', 'HumanSegServer')".format(args.model_type))
model.train(
num_epochs=args.num_epochs,
train_dataset=train_dataset,
train_batch_size=args.batch_size,
eval_dataset=eval_dataset,
save_interval_epochs=args.save_interval_epochs,
learning_rate=args.learning_rate,
pretrain_weights=args.pretrain_weights,
resume_checkpoint=args.resume_checkpoint,
save_dir=args.save_dir,
use_vdl=args.use_vdl)
if __name__ == '__main__':
args = parse_args()
train(args)
# coding: utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import os.path as osp
import cv2
import numpy as np
from postprocess import postprocess, threshold_mask
import paddlex as pdx
import paddlex.utils.logging as logging
from paddlex.seg import transforms
def parse_args():
parser = argparse.ArgumentParser(
description='HumanSeg inference for video')
parser.add_argument(
'--model_dir',
dest='model_dir',
help='Model path for inference',
type=str)
parser.add_argument(
'--video_path',
dest='video_path',
help='Video path for inference, camera will be used if the path not existing',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the inference results',
type=str,
default='./output')
parser.add_argument(
"--image_shape",
dest="image_shape",
help="The image shape for net inputs.",
nargs=2,
default=[192, 192],
type=int)
return parser.parse_args()
def recover(img, im_info):
if im_info[0] == 'resize':
w, h = im_info[1][1], im_info[1][0]
img = cv2.resize(img, (w, h), cv2.INTER_LINEAR)
elif im_info[0] == 'padding':
w, h = im_info[1][0], im_info[1][0]
img = img[0:h, 0:w, :]
return img
def video_infer(args):
resize_h = args.image_shape[1]
resize_w = args.image_shape[0]
model = pdx.load_model(args.model_dir)
test_transforms = transforms.Compose([transforms.Normalize()])
if not args.video_path:
cap = cv2.VideoCapture(0)
else:
cap = cv2.VideoCapture(args.video_path)
if not cap.isOpened():
raise IOError("Error opening video stream or file, "
"--video_path whether existing: {}"
" or camera whether working".format(args.video_path))
return
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
prev_gray = np.zeros((resize_h, resize_w), np.uint8)
prev_cfd = np.zeros((resize_h, resize_w), np.float32)
is_init = True
fps = cap.get(cv2.CAP_PROP_FPS)
if args.video_path:
logging.info("Please wait. It is computing......")
# 用于保存预测结果视频
if not osp.exists(args.save_dir):
os.makedirs(args.save_dir)
out = cv2.VideoWriter(
osp.join(args.save_dir, 'result.avi'),
cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height))
# 开始获取视频帧
while cap.isOpened():
ret, frame = cap.read()
if ret:
im_shape = frame.shape
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
frame,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_LINEAR)
image = im.astype('float32')
im_info = ('resize', im_shape[0:2])
pred = model.predict(image, test_transforms)
score_map = pred['score_map']
cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
score_map = 255 * score_map[:, :, 1]
optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
disflow, is_init)
prev_gray = cur_gray.copy()
prev_cfd = optflow_map.copy()
is_init = False
optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
optflow_map = threshold_mask(
optflow_map, thresh_bg=0.2, thresh_fg=0.8)
img_matting = np.repeat(
optflow_map[:, :, np.newaxis], 3, axis=2)
img_matting = recover(img_matting, im_info)
bg_im = np.ones_like(img_matting) * 255
comb = (img_matting * frame +
(1 - img_matting) * bg_im).astype(np.uint8)
out.write(comb)
else:
break
cap.release()
out.release()
else:
while cap.isOpened():
ret, frame = cap.read()
if ret:
im_shape = frame.shape
im_scale_x = float(resize_w) / float(im_shape[1])
im_scale_y = float(resize_h) / float(im_shape[0])
im = cv2.resize(
frame,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=cv2.INTER_LINEAR)
image = im.astype('float32')
im_info = ('resize', im_shape[0:2])
pred = model.predict(image, test_transforms)
score_map = pred['score_map']
cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
score_map = 255 * score_map[:, :, 1]
optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \
disflow, is_init)
prev_gray = cur_gray.copy()
prev_cfd = optflow_map.copy()
is_init = False
optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
optflow_map = threshold_mask(
optflow_map, thresh_bg=0.2, thresh_fg=0.8)
img_matting = np.repeat(
optflow_map[:, :, np.newaxis], 3, axis=2)
img_matting = recover(img_matting, im_info)
bg_im = np.ones_like(img_matting) * 255
comb = (img_matting * frame +
(1 - img_matting) * bg_im).astype(np.uint8)
cv2.imshow('HumanSegmentation', comb)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cap.release()
if __name__ == "__main__":
args = parse_args()
video_infer(args)
# 使用教程——训练模型
本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。
|代码 | 模型任务 | 数据 |
|------|--------|---------|
|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 |
|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 |
|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 |
|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 |
|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 |
|segmentation/unet.py | 语义分割UNet | 视盘分割 |
## 开始训练
在安装PaddleX后,使用如下命令开始训练
```
python classification/mobilenetv2.py
```
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
lr_decay_epochs=[4, 6, 8],
learning_rate=0.025,
save_dir='output/mobilenetv2',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddle.fluid as fluid
from paddlex.cls import transforms
import paddlex as pdx
# 下载和解压蔬菜分类数据集
veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
pdx.utils.download_and_decompress(veg_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms
train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224])
eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet
train_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/train_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.ImageNet(
data_dir='vegetables_cls',
file_list='vegetables_cls/val_list.txt',
label_list='vegetables_cls/labels.txt',
transforms=eval_transforms)
# PaddleX支持自定义构建优化器
step_each_epoch = train_dataset.num_samples // 32
learning_rate = fluid.layers.cosine_decay(
learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(4e-5))
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50
model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
model.train(
num_epochs=10,
train_dataset=train_dataset,
train_batch_size=32,
eval_dataset=eval_dataset,
optimizer=optimizer,
save_dir='output/resnet50',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.FasterRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=2,
eval_dataset=eval_dataset,
learning_rate=0.0025,
lr_decay_epochs=[8, 11],
save_dir='output/faster_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压小度熊分拣数据集
xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms
train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333])
eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection
train_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/train.json',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.CocoDetection(
data_dir='xiaoduxiong_ins_det/JPEGImages',
ann_file='xiaoduxiong_ins_det/val.json',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn
num_classes = len(train_dataset.labels) + 1
model = pdx.det.MaskRCNN(num_classes=num_classes)
model.train(
num_epochs=12,
train_dataset=train_dataset,
train_batch_size=1,
eval_dataset=eval_dataset,
learning_rate=0.00125,
warmup_steps=10,
lr_decay_epochs=[8, 11],
save_dir='output/mask_rcnn_r50_fpn',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from paddlex.det import transforms
import paddlex as pdx
# 下载和解压昆虫检测数据集
insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
pdx.utils.download_and_decompress(insect_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms
train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608])
eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection
train_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/train_list.txt',
label_list='insect_det/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.VOCDetection(
data_dir='insect_det',
file_list='insect_det/val_list.txt',
label_list='insect_det/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3
num_classes = len(train_dataset.labels)
model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53')
model.train(
num_epochs=270,
train_dataset=train_dataset,
train_batch_size=8,
eval_dataset=eval_dataset,
learning_rate=0.000125,
lr_decay_epochs=[210, 240],
save_dir='output/yolov3_darknet53',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
train_transforms.add_augmenters([
transforms.RandomRotate()
])
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p
num_classes = len(train_dataset.labels)
model = pdx.seg.DeepLabv3p(num_classes=num_classes)
model.train(
num_epochs=40,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/deeplab',
use_vdl=True)
import os
# 选择使用0号卡
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import paddlex as pdx
from paddlex.seg import transforms
# 下载和解压视盘分割数据集
optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz'
pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset
train_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/train_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=train_transforms,
shuffle=True)
eval_dataset = pdx.datasets.SegDataset(
data_dir='optic_disc_seg',
file_list='optic_disc_seg/val_list.txt',
label_list='optic_disc_seg/labels.txt',
transforms=eval_transforms)
# 初始化模型,并进行训练
# 可使用VisualDL查看训练指标
# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet
num_classes = len(train_dataset.labels)
model = pdx.seg.UNet(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/unet',
use_vdl=True)
......@@ -48,9 +48,10 @@ if hub.version.hub_version < '1.6.2':
env_info = get_environ_info()
load_model = cv.models.load_model
datasets = cv.datasets
transforms = cv.transforms
log_level = 2
from . import interpret
__version__ = '1.0.6'
__version__ = '1.0.7'
......@@ -15,6 +15,7 @@
from six import text_type as _text_type
import argparse
import sys
import paddlex.utils.logging as logging
def arg_parser():
......@@ -94,15 +95,15 @@ def main():
if args.export_onnx:
assert args.model_dir is not None, "--model_dir should be defined while exporting onnx model"
assert args.save_dir is not None, "--save_dir should be defined to create onnx model"
assert args.fixed_input_shape is not None, "--fixed_input_shape should be defined [w,h] to create onnx model, such as [224,224]"
fixed_input_shape = []
if args.fixed_input_shape is not None:
fixed_input_shape = eval(args.fixed_input_shape)
assert len(
fixed_input_shape
) == 2, "len of fixed input shape must == 2, such as [224,224]"
model = pdx.load_model(args.model_dir, fixed_input_shape)
model = pdx.load_model(args.model_dir)
if model.status == "Normal" or model.status == "Prune":
logging.error(
"Only support inference model, try to export model first as below,",
exit=False)
logging.error(
"paddlex --export_inference --model_dir model_path --save_dir infer_model"
)
pdx.convertor.export_onnx_model(model, args.save_dir)
......
......@@ -30,119 +30,17 @@ def export_onnx(model_dir, save_dir, fixed_input_shape):
def export_onnx_model(model, save_dir):
support_list = [
'ResNet18', 'ResNet34', 'ResNet50', 'ResNet101', 'ResNet50_vd',
'ResNet101_vd', 'ResNet50_vd_ssld', 'ResNet101_vd_ssld', 'DarkNet53',
'MobileNetV1', 'MobileNetV2', 'DenseNet121', 'DenseNet161',
'DenseNet201'
]
if model.__class__.__name__ not in support_list:
raise Exception("Model: {} unsupport export to ONNX".format(
model.__class__.__name__))
try:
from fluid.utils import op_io_info, init_name_prefix
from onnx import helper, checker
import fluid_onnx.ops as ops
from fluid_onnx.variables import paddle_variable_to_onnx_tensor, paddle_onnx_weight
from debug.model_check import debug_model, Tracker
except Exception as e:
if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN":
logging.error(
"Import Module Failed! Please install paddle2onnx. Related requirements see https://github.com/PaddlePaddle/paddle2onnx."
"Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX"
)
raise e
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.global_scope()
with fluid.scope_guard(inference_scope):
test_input_names = [
var.name for var in list(model.test_inputs.values())
]
inputs_outputs_list = ["fetch", "feed"]
weights, weights_value_info = [], []
global_block = model.test_prog.global_block()
for var_name in global_block.vars:
var = global_block.var(var_name)
if var_name not in test_input_names\
and var.persistable:
weight, val_info = paddle_onnx_weight(
var=var, scope=inference_scope)
weights.append(weight)
weights_value_info.append(val_info)
# Create inputs
inputs = [
paddle_variable_to_onnx_tensor(v, global_block)
for v in test_input_names
]
logging.INFO("load the model parameter done.")
onnx_nodes = []
op_check_list = []
op_trackers = []
nms_first_index = -1
nms_outputs = []
for block in model.test_prog.blocks:
for op in block.ops:
if op.type in ops.node_maker:
# TODO: deal with the corner case that vars in
# different blocks have the same name
node_proto = ops.node_maker[str(op.type)](
operator=op, block=block)
op_outputs = []
last_node = None
if isinstance(node_proto, tuple):
onnx_nodes.extend(list(node_proto))
last_node = list(node_proto)
else:
onnx_nodes.append(node_proto)
last_node = [node_proto]
tracker = Tracker(str(op.type), last_node)
op_trackers.append(tracker)
op_check_list.append(str(op.type))
if op.type == "multiclass_nms" and nms_first_index < 0:
nms_first_index = 0
if nms_first_index >= 0:
_, _, output_op = op_io_info(op)
for output in output_op:
nms_outputs.extend(output_op[output])
else:
if op.type not in ['feed', 'fetch']:
op_check_list.append(op.type)
logging.info('The operator sets to run test case.')
logging.info(set(op_check_list))
# Create outputs
# Get the new names for outputs if they've been renamed in nodes' making
renamed_outputs = op_io_info.get_all_renamed_outputs()
test_outputs = list(model.test_outputs.values())
test_outputs_names = [var.name for var in model.test_outputs.values()]
test_outputs_names = [
name if name not in renamed_outputs else renamed_outputs[name]
for name in test_outputs_names
]
outputs = [
paddle_variable_to_onnx_tensor(v, global_block)
for v in test_outputs_names
]
# Make graph
onnx_name = 'paddlex.onnx'
onnx_graph = helper.make_graph(
nodes=onnx_nodes,
name=onnx_name,
initializer=weights,
inputs=inputs + weights_value_info,
outputs=outputs)
# Make model
onnx_model = helper.make_model(
onnx_graph, producer_name='PaddlePaddle')
# Model check
checker.check_model(onnx_model)
if onnx_model is not None:
onnx_model_file = os.path.join(save_dir, onnx_name)
if not os.path.exists(save_dir):
os.mkdir(save_dir)
with open(onnx_model_file, 'wb') as f:
f.write(onnx_model.SerializeToString())
logging.info("Saved converted model to path: %s" % onnx_model_file)
try:
import x2paddle
if x2paddle.__version__ < '0.7.4':
logging.error("You need to upgrade x2paddle >= 0.7.4")
except:
logging.error(
"You need to install x2paddle first, pip install x2paddle>=0.7.4")
from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper
mapper = PaddleOpMapper()
mapper.convert(model.test_prog, save_dir)
......@@ -100,7 +100,7 @@ class CocoDetection(VOCDetection):
gt_score = np.ones((num_bbox, 1), dtype=np.float32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
difficult = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = None
gt_poly = [None] * num_bbox
for i, box in enumerate(bboxes):
catid = box['category_id']
......@@ -108,8 +108,6 @@ class CocoDetection(VOCDetection):
gt_bbox[i, :] = box['clean_bbox']
is_crowd[i][0] = box['iscrowd']
if 'segmentation' in box:
if gt_poly is None:
gt_poly = [None] * num_bbox
gt_poly[i] = box['segmentation']
im_info = {
......@@ -121,14 +119,12 @@ class CocoDetection(VOCDetection):
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'gt_poly': gt_poly,
'difficult': difficult
}
if gt_poly is not None:
label_info['gt_poly'] = gt_poly
coco_rec = (im_info, label_info)
self.file_list.append([im_fname, coco_rec])
if not len(self.file_list) > 0:
raise Exception('not found any coco record in %s' % (ann_file))
logging.info("{} samples in file {}".format(
......
......@@ -46,7 +46,7 @@ class EasyDataCls(ImageNet):
label_list,
transforms=None,
num_workers='auto',
buffer_size=100,
buffer_size=8,
parallel_method='process',
shuffle=False):
super(ImageNet, self).__init__(
......@@ -73,8 +73,8 @@ class EasyDataCls(ImageNet):
if not osp.isfile(json_file):
continue
if not osp.exists(img_file):
raise IOError(
'The image file {} is not exist!'.format(img_file))
raise IOError('The image file {} is not exist!'.format(
img_file))
with open(json_file, mode='r', \
encoding=get_encoding(json_file)) as j:
json_info = json.load(j)
......@@ -83,4 +83,3 @@ class EasyDataCls(ImageNet):
self.num_samples = len(self.file_list)
logging.info("{} samples in file {}".format(
len(self.file_list), file_list))
\ No newline at end of file
......@@ -45,7 +45,7 @@ class ImageNet(Dataset):
label_list,
transforms=None,
num_workers='auto',
buffer_size=100,
buffer_size=8,
parallel_method='process',
shuffle=False):
super(ImageNet, self).__init__(
......@@ -70,8 +70,8 @@ class ImageNet(Dataset):
continue
full_path = osp.join(data_dir, items[0])
if not osp.exists(full_path):
raise IOError(
'The image file {} is not exist!'.format(full_path))
raise IOError('The image file {} is not exist!'.format(
full_path))
self.file_list.append([full_path, int(items[1])])
self.num_samples = len(self.file_list)
logging.info("{} samples in file {}".format(
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -28,7 +28,7 @@ class SegDataset(Dataset):
Args:
data_dir (str): 数据集所在的目录路径。
file_list (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对data_dir的相对路)。
label_list (str): 描述数据集包含的类别信息文件路径。
label_list (str): 描述数据集包含的类别信息文件路径。默认值为None。
transforms (list): 数据集中每个样本的预处理/增强算子。
num_workers (int): 数据集中样本在预处理过程中的线程或进程数。默认为4。
buffer_size (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。
......@@ -40,7 +40,7 @@ class SegDataset(Dataset):
def __init__(self,
data_dir,
file_list,
label_list,
label_list=None,
transforms=None,
num_workers='auto',
buffer_size=100,
......@@ -56,6 +56,7 @@ class SegDataset(Dataset):
self.labels = list()
self._epoch = 0
if label_list is not None:
with open(label_list, encoding=get_encoding(label_list)) as f:
for line in f:
item = line.strip()
......@@ -69,8 +70,8 @@ class SegDataset(Dataset):
full_path_im = osp.join(data_dir, items[0])
full_path_label = osp.join(data_dir, items[1])
if not osp.exists(full_path_im):
raise IOError(
'The image file {} is not exist!'.format(full_path_im))
raise IOError('The image file {} is not exist!'.format(
full_path_im))
if not osp.exists(full_path_label):
raise IOError('The image file {} is not exist!'.format(
full_path_label))
......
......@@ -14,8 +14,10 @@
from __future__ import absolute_import
import copy
import os
import os.path as osp
import random
import re
import numpy as np
from collections import OrderedDict
import xml.etree.ElementTree as ET
......@@ -103,23 +105,60 @@ class VOCDetection(Dataset):
else:
ct = int(tree.find('id').text)
im_id = np.array([int(tree.find('id').text)])
objs = tree.findall('object')
im_w = float(tree.find('size').find('width').text)
im_h = float(tree.find('size').find('height').text)
pattern = re.compile('<object>', re.IGNORECASE)
obj_tag = pattern.findall(
str(ET.tostringlist(tree.getroot())))[0][1:-1]
objs = tree.findall(obj_tag)
pattern = re.compile('<size>', re.IGNORECASE)
size_tag = pattern.findall(
str(ET.tostringlist(tree.getroot())))[0][1:-1]
size_element = tree.find(size_tag)
pattern = re.compile('<width>', re.IGNORECASE)
width_tag = pattern.findall(
str(ET.tostringlist(size_element)))[0][1:-1]
im_w = float(size_element.find(width_tag).text)
pattern = re.compile('<height>', re.IGNORECASE)
height_tag = pattern.findall(
str(ET.tostringlist(size_element)))[0][1:-1]
im_h = float(size_element.find(height_tag).text)
gt_bbox = np.zeros((len(objs), 4), dtype=np.float32)
gt_class = np.zeros((len(objs), 1), dtype=np.int32)
gt_score = np.ones((len(objs), 1), dtype=np.float32)
is_crowd = np.zeros((len(objs), 1), dtype=np.int32)
difficult = np.zeros((len(objs), 1), dtype=np.int32)
for i, obj in enumerate(objs):
cname = obj.find('name').text.strip()
pattern = re.compile('<name>', re.IGNORECASE)
name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
1:-1]
cname = obj.find(name_tag).text.strip()
gt_class[i][0] = cname2cid[cname]
_difficult = int(obj.find('difficult').text)
x1 = float(obj.find('bndbox').find('xmin').text)
y1 = float(obj.find('bndbox').find('ymin').text)
x2 = float(obj.find('bndbox').find('xmax').text)
y2 = float(obj.find('bndbox').find('ymax').text)
pattern = re.compile('<difficult>', re.IGNORECASE)
diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
1:-1]
try:
_difficult = int(obj.find(diff_tag).text)
except Exception:
_difficult = 0
pattern = re.compile('<bndbox>', re.IGNORECASE)
box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:
-1]
box_element = obj.find(box_tag)
pattern = re.compile('<xmin>', re.IGNORECASE)
xmin_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
x1 = float(box_element.find(xmin_tag).text)
pattern = re.compile('<ymin>', re.IGNORECASE)
ymin_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
y1 = float(box_element.find(ymin_tag).text)
pattern = re.compile('<xmax>', re.IGNORECASE)
xmax_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
x2 = float(box_element.find(xmax_tag).text)
pattern = re.compile('<ymax>', re.IGNORECASE)
ymax_tag = pattern.findall(
str(ET.tostringlist(box_element)))[0][1:-1]
y2 = float(box_element.find(ymax_tag).text)
x1 = max(0, x1)
y1 = max(0, y1)
if im_w > 0.5 and im_h > 0.5:
......@@ -148,6 +187,7 @@ class VOCDetection(Dataset):
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'gt_poly': [],
'difficult': difficult
}
voc_rec = (im_info, label_info)
......@@ -170,6 +210,44 @@ class VOCDetection(Dataset):
self.coco_gt.dataset = annotations
self.coco_gt.createIndex()
def add_negative_samples(self, image_dir):
import cv2
if not osp.exists(image_dir):
raise Exception("{} background images directory does not exist.".
format(image_dir))
image_list = os.listdir(image_dir)
max_img_id = max(self.coco_gt.getImgIds())
for image in image_list:
if not is_pic(image):
continue
# False ground truth
gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32)
gt_class = np.array([[0]], dtype=np.int32)
gt_score = np.ones((1, 1), dtype=np.float32)
is_crowd = np.array([[0]], dtype=np.int32)
difficult = np.zeros((1, 1), dtype=np.int32)
gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]]
max_img_id += 1
im_fname = osp.join(image_dir, image)
img_data = cv2.imread(im_fname)
im_h, im_w, im_c = img_data.shape
im_info = {
'im_id': np.array([max_img_id]).astype('int32'),
'image_shape': np.array([im_h, im_w]).astype('int32'),
}
label_info = {
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'difficult': difficult,
'gt_poly': gt_poly
}
coco_rec = (im_info, label_info)
self.file_list.append([im_fname, coco_rec])
self.num_samples = len(self.file_list)
def iterator(self):
self._epoch += 1
self._pos = 0
......
......@@ -43,5 +43,6 @@ from .mask_rcnn import MaskRCNN
from .unet import UNet
from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .fast_scnn import FastSCNN
from .load_model import load_model
from .slim import prune
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
......@@ -194,14 +194,37 @@ class BaseAPI:
if os.path.exists(pretrain_dir):
os.remove(pretrain_dir)
os.makedirs(pretrain_dir)
if pretrain_weights is not None and not os.path.exists(
pretrain_weights):
if self.model_type == 'classifier':
if pretrain_weights not in ['IMAGENET']:
logging.warning(
"Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
elif self.model_type == 'detector':
if pretrain_weights not in ['IMAGENET', 'COCO']:
logging.warning(
"Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
elif self.model_type == 'segmenter':
if pretrain_weights not in [
'IMAGENET', 'COCO', 'CITYSCAPES'
]:
logging.warning(
"Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'".
format(pretrain_weights))
pretrain_weights = 'IMAGENET'
if hasattr(self, 'backbone'):
backbone = self.backbone
else:
backbone = self.__class__.__name__
if backbone == "HRNet":
backbone = backbone + "_W{}".format(self.width)
class_name = self.__class__.__name__
pretrain_weights = get_pretrain_weights(
pretrain_weights, self.model_type, backbone, pretrain_dir)
pretrain_weights, class_name, backbone, pretrain_dir)
if startup_prog is None:
startup_prog = fluid.default_startup_program()
self.exe.run(startup_prog)
......@@ -221,8 +244,8 @@ class BaseAPI:
logging.info(
"Load pretrain weights from {}.".format(pretrain_weights),
use_color=True)
paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog,
pretrain_weights, fuse_bn)
paddlex.utils.utils.load_pretrain_weights(
self.exe, self.train_prog, pretrain_weights, fuse_bn)
# 进行裁剪
if sensitivities_file is not None:
import paddleslim
......@@ -326,7 +349,9 @@ class BaseAPI:
logging.info("Model saved in {}.".format(save_dir))
def export_inference_model(self, save_dir):
test_input_names = [var.name for var in list(self.test_inputs.values())]
test_input_names = [
var.name for var in list(self.test_inputs.values())
]
test_outputs = list(self.test_outputs.values())
if self.__class__.__name__ == 'MaskRCNN':
from paddlex.utils.save import save_mask_inference_model
......@@ -363,7 +388,8 @@ class BaseAPI:
# 模型保存成功的标志
open(osp.join(save_dir, '.success'), 'w').close()
logging.info("Model for inference deploy saved in {}.".format(save_dir))
logging.info("Model for inference deploy saved in {}.".format(
save_dir))
def train_loop(self,
num_epochs,
......@@ -487,10 +513,12 @@ class BaseAPI:
eta = ((num_epochs - i) * total_num_steps - step - 1
) * avg_step_time
if time_eval_one_epoch is not None:
eval_eta = (total_eval_times - i // save_interval_epochs
eval_eta = (
total_eval_times - i // save_interval_epochs
) * time_eval_one_epoch
else:
eval_eta = (total_eval_times - i // save_interval_epochs
eval_eta = (
total_eval_times - i // save_interval_epochs
) * total_num_steps_eval * avg_step_time
eta_str = seconds_to_hms(eta + eval_eta)
......@@ -508,6 +536,7 @@ class BaseAPI:
epoch_start_time = time.time()
# 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存
self.completed_epochs += 1
eval_epoch_start_time = time.time()
if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1:
current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1))
......@@ -521,7 +550,6 @@ class BaseAPI:
return_details=True)
logging.info('[EVAL] Finished, Epoch={}, {} .'.format(
i + 1, dict2str(self.eval_metrics)))
self.completed_epochs += 1
# 保存最优模型
best_accuracy_key = list(self.eval_metrics.keys())[0]
current_accuracy = self.eval_metrics[best_accuracy_key]
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import numpy as np
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import os.path as osp
......@@ -242,14 +242,16 @@ class DeepLabv3p(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为字符串'CITYSCAPES',
则自动下载在CITYSCAPES数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率衰减指数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
import paddlex
from collections import OrderedDict
from .deeplabv3p import DeepLabv3p
class FastSCNN(DeepLabv3p):
"""实现Fast SCNN网络的构建并进行训练、评估、预测和模型导出。
Args:
num_classes (int): 类别数。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
即平时使用的交叉熵损失函数。
ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。
multi_loss_weight (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。
也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,
fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,
lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。
Raises:
ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。
ValueError: class_weight为list, 但长度不等于num_class。
class_weight为str, 但class_weight.low()不等于dynamic。
TypeError: class_weight不为None时,其类型不是list或str。
TypeError: multi_loss_weight不为list。
ValueError: multi_loss_weight为list但长度小于0或者大于3。
"""
def __init__(self,
num_classes=2,
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
ignore_index=255,
multi_loss_weight=[1.0]):
self.init_params = locals()
super(DeepLabv3p, self).__init__('segmenter')
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
if not isinstance(multi_loss_weight, list):
raise TypeError(
'Expect multi_loss_weight is a list but receive {}'.format(
type(multi_loss_weight)))
if len(multi_loss_weight) > 3 or len(multi_loss_weight) < 0:
raise ValueError(
"Length of multi_loss_weight should be lower than or equal to 3 but greater than 0."
)
self.num_classes = num_classes
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.multi_loss_weight = multi_loss_weight
self.ignore_index = ignore_index
self.labels = None
self.fixed_input_shape = None
def build_net(self, mode='train'):
model = paddlex.cv.nets.segmentation.FastSCNN(
self.num_classes,
mode=mode,
use_bce_loss=self.use_bce_loss,
use_dice_loss=self.use_dice_loss,
class_weight=self.class_weight,
ignore_index=self.ignore_index,
multi_loss_weight=self.multi_loss_weight,
fixed_input_shape=self.fixed_input_shape)
inputs = model.generate_inputs()
model_out = model.build_net(inputs)
outputs = OrderedDict()
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
return inputs, outputs
def train(self,
num_epochs,
train_dataset,
train_batch_size=2,
eval_dataset=None,
save_interval_epochs=1,
log_interval_steps=2,
save_dir='output',
pretrain_weights='CITYSCAPES',
optimizer=None,
learning_rate=0.01,
lr_decay_power=0.9,
use_vdl=False,
sensitivities_file=None,
eval_metric_loss=0.05,
early_stop=False,
early_stop_patience=5,
resume_checkpoint=None):
"""训练。
Args:
num_epochs (int): 训练迭代轮数。
train_dataset (paddlex.datasets): 训练数据读取器。
train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。
eval_dataset (paddlex.datasets): 评估数据读取器。
save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES'
则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'CITYSCAPES'。
optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
连续下降或持平,则终止训练。默认值为5。
resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
Raises:
ValueError: 模型从inference model进行加载。
"""
return super(FastSCNN, self).train(
num_epochs, train_dataset, train_batch_size, eval_dataset,
save_interval_epochs, log_interval_steps, save_dir,
pretrain_weights, optimizer, learning_rate, lr_decay_power,
use_vdl, sensitivities_file, eval_metric_loss, early_stop,
early_stop_patience, resume_checkpoint)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import math
......@@ -196,7 +196,8 @@ class FasterRCNN(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。
save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的初始学习率。默认为0.0025。
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddle.fluid as fluid
......@@ -24,11 +24,12 @@ class HRNet(DeepLabv3p):
Args:
num_classes (int): 类别数。
width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。
width (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。
'18_small_v1'是18的轻量级版本。
use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。
use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。
当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。
class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
class_weight (list|str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为
num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重
自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,
即平时使用的交叉熵损失函数。
......@@ -95,11 +96,6 @@ class HRNet(DeepLabv3p):
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
......@@ -152,14 +148,15 @@ class HRNet(DeepLabv3p):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。
save_dir (str): 模型保存路径。默认'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在IMAGENET图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
则自动下载在IMAGENET图片数据上预训练的模型权重;若为字符串'CITYSCAPES'
则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用
fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。
learning_rate (float): 默认优化器的初始学习率。默认0.01。
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
......@@ -172,6 +169,6 @@ class HRNet(DeepLabv3p):
return super(HRNet, self).train(
num_epochs, train_dataset, train_batch_size, eval_dataset,
save_interval_epochs, log_interval_steps, save_dir,
pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl,
sensitivities_file, eval_metric_loss, early_stop,
pretrain_weights, optimizer, learning_rate, lr_decay_power,
use_vdl, sensitivities_file, eval_metric_loss, early_stop,
early_stop_patience, resume_checkpoint)
......@@ -108,6 +108,7 @@ def load_model(model_dir, fixed_input_shape=None):
logging.info("Model[{}] loaded.".format(info['Model']))
model.trainable = False
model.status = status
return model
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import math
......@@ -155,7 +155,8 @@ class MaskRCNN(FasterRCNN):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。
save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的学习率。默认为1.0/800。
......
......@@ -158,6 +158,7 @@ def prune_program(model, prune_params_ratios=None):
prune_params_ratios (dict): 由裁剪参数名和裁剪率组成的字典,当为None时
使用默认裁剪参数名和裁剪率。默认为None。
"""
assert model.status == 'Normal', 'Only the models saved while training are supported!'
place = model.places[0]
train_prog = model.train_prog
eval_prog = model.test_prog
......@@ -235,6 +236,7 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8):
其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。
"""
assert model.status == 'Normal', 'Only the models saved while training are supported!'
if os.path.exists(save_file):
os.remove(save_file)
......
......@@ -19,6 +19,8 @@ import paddle.fluid as fluid
import paddlex
sensitivities_data = {
'AlexNet':
'https://bj.bcebos.com/paddlex/slim_prune/alexnet_sensitivities.data',
'ResNet18':
'https://bj.bcebos.com/paddlex/slim_prune/resnet18.sensitivities',
'ResNet34':
......@@ -41,6 +43,10 @@ sensitivities_data = {
'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_large.sensitivities',
'MobileNetV3_small':
'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_small.sensitivities',
'MobileNetV3_large_ssld':
'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_large_ssld_sensitivities.data',
'MobileNetV3_small_ssld':
'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_small_ssld_sensitivities.data',
'DenseNet121':
'https://bj.bcebos.com/paddlex/slim_prune/densenet121.sensitivities',
'DenseNet161':
......@@ -51,6 +57,8 @@ sensitivities_data = {
'https://bj.bcebos.com/paddlex/slim_prune/xception41.sensitivities',
'Xception65':
'https://bj.bcebos.com/paddlex/slim_prune/xception65.sensitivities',
'ShuffleNetV2':
'https://bj.bcebos.com/paddlex/slim_prune/shufflenetv2_sensitivities.data',
'YOLOv3_MobileNetV1':
'https://bj.bcebos.com/paddlex/slim_prune/yolov3_mobilenetv1.sensitivities',
'YOLOv3_MobileNetV3_large':
......@@ -143,7 +151,8 @@ def get_prune_params(model):
if model_type.startswith('ResNet') or \
model_type.startswith('DenseNet') or \
model_type.startswith('DarkNet') or \
model_type.startswith('AlexNet'):
model_type.startswith('AlexNet') or \
model_type.startswith('ShuffleNetV2'):
for block in program.blocks:
for param in block.all_parameters():
pd_var = fluid.global_scope().find_var(param.name)
......@@ -152,6 +161,28 @@ def get_prune_params(model):
prune_names.append(param.name)
if model_type == 'AlexNet':
prune_names.remove('conv5_weights')
if model_type == 'ShuffleNetV2':
not_prune_names = ['stage_2_1_conv5_weights',
'stage_2_1_conv3_weights',
'stage_2_2_conv3_weights',
'stage_2_3_conv3_weights',
'stage_2_4_conv3_weights',
'stage_3_1_conv5_weights',
'stage_3_1_conv3_weights',
'stage_3_2_conv3_weights',
'stage_3_3_conv3_weights',
'stage_3_4_conv3_weights',
'stage_3_5_conv3_weights',
'stage_3_6_conv3_weights',
'stage_3_7_conv3_weights',
'stage_3_8_conv3_weights',
'stage_4_1_conv5_weights',
'stage_4_1_conv3_weights',
'stage_4_2_conv3_weights',
'stage_4_3_conv3_weights',
'stage_4_4_conv3_weights',]
for name in not_prune_names:
prune_names.remove(name)
elif model_type == "MobileNetV1":
prune_names.append("conv1_weights")
for param in program.global_block().all_parameters():
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import paddlex
......@@ -95,11 +95,6 @@ class UNet(DeepLabv3p):
if mode == 'train':
self.optimizer.minimize(model_out)
outputs['loss'] = model_out
elif mode == 'eval':
outputs['loss'] = model_out[0]
outputs['pred'] = model_out[1]
outputs['label'] = model_out[2]
outputs['mask'] = model_out[3]
else:
outputs['pred'] = model_out[0]
outputs['logit'] = model_out[1]
......@@ -141,7 +136,7 @@ class UNet(DeepLabv3p):
lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。
use_vdl (bool): 是否使用VisualDL进行可视化。默认False。
sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
early_stop (bool): 是否使用提前终止训练策略。默认值为False。
early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
......
import paddlex
import paddlex.utils.logging as logging
import paddlehub as hub
import os
import os.path as osp
......@@ -75,16 +76,101 @@ image_pretrain = {
}
coco_pretrain = {
'UNet': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz'
'YOLOv3_DarkNet53_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar',
'YOLOv3_MobileNetV1_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar',
'YOLOv3_MobileNetV3_large_COCO':
'https://bj.bcebos.com/paddlex/models/yolov3_mobilenet_v3.tar',
'YOLOv3_ResNet34_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar',
'YOLOv3_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn.tar',
'FasterRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar',
'FasterRCNN_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar',
'FasterRCNN_ResNet101_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar',
'FasterRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar',
'FasterRCNN_HRNet_W18_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_2x.tar',
'MaskRCNN_ResNet50_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar',
'MaskRCNN_ResNet50_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar',
'MaskRCNN_ResNet101_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar',
'MaskRCNN_ResNet101_vd_COCO':
'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar',
'UNet_COCO': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz',
'DeepLabv3p_MobileNetV2_x1.0_COCO':
'https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz',
'DeepLabv3p_Xception65_COCO':
'https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz'
}
cityscapes_pretrain = {
'DeepLabv3p_MobileNetV2_x1.0_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz',
'DeepLabv3p_Xception65_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz',
'HRNet_W18_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz',
'FastSCNN_CITYSCAPES':
'https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar'
}
def get_pretrain_weights(flag, model_type, backbone, save_dir):
def get_pretrain_weights(flag, class_name, backbone, save_dir):
if flag is None:
return None
elif osp.isdir(flag):
return flag
elif osp.isfile(flag):
return flag
warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}"
if flag == 'COCO':
if class_name == "FasterRCNN" and backbone in ['ResNet18'] or \
class_name == "MaskRCNN" and backbone in ['ResNet18', 'HRNet_W18'] or \
class_name == 'DeepLabv3p' and backbone in ['Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif class_name == 'HRNet':
logging.warning(warning_info.format(class_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif class_name == 'FastSCNN':
logging.warning(
warning_info.format(class_name, flag, 'CITYSCAPES'))
flag = 'CITYSCAPES'
elif flag == 'CITYSCAPES':
model_name = '{}_{}'.format(class_name, backbone)
if class_name == 'UNet':
logging.warning(warning_info.format(class_name, flag, 'COCO'))
flag = 'COCO'
if class_name == 'HRNet' and backbone.split('_')[
-1] in ['W30', 'W32', 'W40', 'W48', 'W60', 'W64']:
logging.warning(warning_info.format(backbone, flag, 'IMAGENET'))
flag = 'IMAGENET'
if class_name == 'DeepLabv3p' and backbone in [
'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5',
'MobileNetV2_x1.5', 'MobileNetV2_x2.0'
]:
model_name = '{}_{}'.format(class_name, backbone)
logging.warning(warning_info.format(model_name, flag, 'IMAGENET'))
flag = 'IMAGENET'
elif flag == 'IMAGENET':
if class_name == 'UNet':
logging.warning(warning_info.format(class_name, flag, 'COCO'))
flag = 'COCO'
elif class_name == 'FastSCNN':
logging.warning(
warning_info.format(class_name, flag, 'CITYSCAPES'))
flag = 'CITYSCAPES'
if flag == 'IMAGENET':
new_save_dir = save_dir
if hasattr(paddlex, 'pretrain_dir'):
new_save_dir = paddlex.pretrain_dir
......@@ -96,7 +182,7 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
backbone = 'MobileNetV3_small_x1_0_ssld'
elif backbone == 'MobileNetV3_large_ssld':
backbone = 'MobileNetV3_large_x1_0_ssld'
if model_type == 'detector':
if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN']:
if backbone == 'ResNet50':
backbone = 'DetResNet50'
assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format(
......@@ -121,17 +207,20 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
raise Exception(
"Unexpected error, please make sure paddlehub >= 1.6.2")
return osp.join(new_save_dir, backbone)
elif flag == 'COCO':
elif flag in ['COCO', 'CITYSCAPES']:
new_save_dir = save_dir
if hasattr(paddlex, 'pretrain_dir'):
new_save_dir = paddlex.pretrain_dir
if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN', 'DeepLabv3p']:
backbone = '{}_{}'.format(class_name, backbone)
backbone = "{}_{}".format(backbone, flag)
if flag == 'COCO':
url = coco_pretrain[backbone]
elif flag == 'CITYSCAPES':
url = cityscapes_pretrain[backbone]
fname = osp.split(url)[-1].split('.')[0]
# paddlex.utils.download_and_decompress(url, path=new_save_dir)
# return osp.join(new_save_dir, fname)
assert backbone in coco_pretrain, "There is not COCO pretrain weights for {}, you may try ImageNet.".format(
backbone)
try:
hub.download(backbone, save_path=new_save_dir)
except Exception as e:
......@@ -148,5 +237,5 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir):
return osp.join(new_save_dir, backbone)
else:
raise Exception(
"pretrain_weights need to be defined as directory path or `IMAGENET` or 'COCO' (download pretrain weights automatically)."
"pretrain_weights need to be defined as directory path or 'IMAGENET' or 'COCO' or 'Cityscapes' (download pretrain weights automatically)."
)
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# -*- coding: utf-8 -*
import os
import cv2
import colorsys
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import math
......@@ -188,7 +188,8 @@ class YOLOv3(BaseAPI):
log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为10。
save_dir (str): 模型保存路径。默认值为'output'。
pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',
则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',
则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。
optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:
fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。
learning_rate (float): 默认优化器的学习率。默认为1.0/8000。
......
......@@ -20,6 +20,7 @@ from .mobilenet_v2 import MobileNetV2
from .mobilenet_v3 import MobileNetV3
from .segmentation import UNet
from .segmentation import DeepLabv3p
from .segmentation import FastSCNN
from .xception import Xception
from .densenet import DenseNet
from .shufflenet_v2 import ShuffleNetV2
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......
......@@ -51,15 +51,38 @@ class HRNet(object):
self.width = width
self.has_se = has_se
self.num_modules = {
'18_small_v1': [1, 1, 1, 1],
'18': [1, 1, 4, 3],
'30': [1, 1, 4, 3],
'32': [1, 1, 4, 3],
'40': [1, 1, 4, 3],
'44': [1, 1, 4, 3],
'48': [1, 1, 4, 3],
'60': [1, 1, 4, 3],
'64': [1, 1, 4, 3]
}
self.num_blocks = {
'18_small_v1': [[1], [2, 2], [2, 2, 2], [2, 2, 2, 2]],
'18': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'30': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'32': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'40': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'44': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'48': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'60': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]],
'64': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]]
}
self.channels = {
18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]],
'18_small_v1': [[32], [16, 32], [16, 32, 64], [16, 32, 64, 128]],
'18': [[64], [18, 36], [18, 36, 72], [18, 36, 72, 144]],
'30': [[64], [30, 60], [30, 60, 120], [30, 60, 120, 240]],
'32': [[64], [32, 64], [32, 64, 128], [32, 64, 128, 256]],
'40': [[64], [40, 80], [40, 80, 160], [40, 80, 160, 320]],
'44': [[64], [44, 88], [44, 88, 176], [44, 88, 176, 352]],
'48': [[64], [48, 96], [48, 96, 192], [48, 96, 192, 384]],
'60': [[64], [60, 120], [60, 120, 240], [60, 120, 240, 480]],
'64': [[64], [64, 128], [64, 128, 256], [64, 128, 256, 512]],
}
self.freeze_at = freeze_at
......@@ -73,31 +96,38 @@ class HRNet(object):
def net(self, input):
width = self.width
channels_2, channels_3, channels_4 = self.channels[width]
num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
channels_1, channels_2, channels_3, channels_4 = self.channels[str(
width)]
num_modules_1, num_modules_2, num_modules_3, num_modules_4 = self.num_modules[
str(width)]
num_blocks_1, num_blocks_2, num_blocks_3, num_blocks_4 = self.num_blocks[
str(width)]
x = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=64,
num_filters=channels_1[0],
stride=2,
if_act=True,
name='layer1_1')
x = self.conv_bn_layer(
input=x,
filter_size=3,
num_filters=64,
num_filters=channels_1[0],
stride=2,
if_act=True,
name='layer1_2')
la1 = self.layer1(x, name='layer2')
la1 = self.layer1(x, num_blocks_1, channels_1, name='layer2')
tr1 = self.transition_layer([la1], [256], channels_2, name='tr1')
st2 = self.stage(tr1, num_modules_2, channels_2, name='st2')
st2 = self.stage(
tr1, num_modules_2, num_blocks_2, channels_2, name='st2')
tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2')
st3 = self.stage(tr2, num_modules_3, channels_3, name='st3')
st3 = self.stage(
tr2, num_modules_3, num_blocks_3, channels_3, name='st3')
tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3')
st4 = self.stage(tr3, num_modules_4, channels_4, name='st4')
st4 = self.stage(
tr3, num_modules_4, num_blocks_4, channels_4, name='st4')
# classification
if self.num_classes:
......@@ -139,12 +169,12 @@ class HRNet(object):
self.end_points = st4
return st4[-1]
def layer1(self, input, name=None):
def layer1(self, input, num_blocks, channels, name=None):
conv = input
for i in range(4):
for i in range(num_blocks[0]):
conv = self.bottleneck_block(
conv,
num_filters=64,
num_filters=channels[0],
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1))
return conv
......@@ -178,7 +208,7 @@ class HRNet(object):
out = []
for i in range(len(channels)):
residual = x[i]
for j in range(block_num):
for j in range(block_num[i]):
residual = self.basic_block(
residual,
channels[i],
......@@ -240,10 +270,11 @@ class HRNet(object):
def high_resolution_module(self,
x,
num_blocks,
channels,
multi_scale_output=True,
name=None):
residual = self.branches(x, 4, channels, name=name)
residual = self.branches(x, num_blocks, channels, name=name)
out = self.fuse_layers(
residual,
channels,
......@@ -254,6 +285,7 @@ class HRNet(object):
def stage(self,
x,
num_modules,
num_blocks,
channels,
multi_scale_output=True,
name=None):
......@@ -262,12 +294,13 @@ class HRNet(object):
if i == num_modules - 1 and multi_scale_output == False:
out = self.high_resolution_module(
out,
num_blocks,
channels,
multi_scale_output=False,
name=name + '_' + str(i + 1))
else:
out = self.high_resolution_module(
out, channels, name=name + '_' + str(i + 1))
out, num_blocks, channels, name=name + '_' + str(i + 1))
return out
......
......@@ -15,5 +15,6 @@
from .unet import UNet
from .deeplabv3p import DeepLabv3p
from .hrnet import HRNet
from .fast_scnn import FastSCNN
from .model_utils import libs
from .model_utils import loss
......@@ -28,7 +28,6 @@ from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
import paddlex.utils.logging as logging
from paddlex.cv.nets.xception import Xception
from paddlex.cv.nets.mobilenet_v2 import MobileNetV2
......
# coding: utf8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
from .model_utils.libs import scope
from .model_utils.libs import bn, bn_relu, relu, conv_bn_layer
from .model_utils.libs import conv, avg_pool
from .model_utils.libs import separate_conv
from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
class FastSCNN(object):
def __init__(self,
num_classes,
mode='train',
use_bce_loss=False,
use_dice_loss=False,
class_weight=None,
multi_loss_weight=[1.0],
ignore_index=255,
fixed_input_shape=None):
# dice_loss或bce_loss只适用两类分割中
if num_classes > 2 and (use_bce_loss or use_dice_loss):
raise ValueError(
"dice loss and bce loss is only applicable to binary classfication"
)
if class_weight is not None:
if isinstance(class_weight, list):
if len(class_weight) != num_classes:
raise ValueError(
"Length of class_weight should be equal to number of classes"
)
elif isinstance(class_weight, str):
if class_weight.lower() != 'dynamic':
raise ValueError(
"if class_weight is string, must be dynamic!")
else:
raise TypeError(
'Expect class_weight is a list or string but receive {}'.
format(type(class_weight)))
self.num_classes = num_classes
self.mode = mode
self.use_bce_loss = use_bce_loss
self.use_dice_loss = use_dice_loss
self.class_weight = class_weight
self.ignore_index = ignore_index
self.multi_loss_weight = multi_loss_weight
self.fixed_input_shape = fixed_input_shape
def build_net(self, inputs):
if self.use_dice_loss or self.use_bce_loss:
self.num_classes = 1
image = inputs['image']
size = fluid.layers.shape(image)[2:]
with scope('learning_to_downsample'):
higher_res_features = self._learning_to_downsample(image, 32, 48,
64)
with scope('global_feature_extractor'):
lower_res_feature = self._global_feature_extractor(
higher_res_features, 64, [64, 96, 128], 128, 6, [3, 3, 3])
with scope('feature_fusion'):
x = self._feature_fusion(higher_res_features, lower_res_feature,
64, 128, 128)
with scope('classifier'):
logit = self._classifier(x, 128)
logit = fluid.layers.resize_bilinear(logit, size, align_mode=0)
if len(self.multi_loss_weight) == 3:
with scope('aux_layer_higher'):
higher_logit = self._aux_layer(higher_res_features,
self.num_classes)
higher_logit = fluid.layers.resize_bilinear(
higher_logit, size, align_mode=0)
with scope('aux_layer_lower'):
lower_logit = self._aux_layer(lower_res_feature,
self.num_classes)
lower_logit = fluid.layers.resize_bilinear(
lower_logit, size, align_mode=0)
logit = (logit, higher_logit, lower_logit)
elif len(self.multi_loss_weight) == 2:
with scope('aux_layer_higher'):
higher_logit = self._aux_layer(higher_res_features,
self.num_classes)
higher_logit = fluid.layers.resize_bilinear(
higher_logit, size, align_mode=0)
logit = (logit, higher_logit)
else:
logit = (logit, )
if self.num_classes == 1:
out = sigmoid_to_softmax(logit[0])
out = fluid.layers.transpose(out, [0, 2, 3, 1])
else:
out = fluid.layers.transpose(logit[0], [0, 2, 3, 1])
pred = fluid.layers.argmax(out, axis=3)
pred = fluid.layers.unsqueeze(pred, axes=[3])
if self.mode == 'train':
label = inputs['label']
return self._get_loss(logit, label)
elif self.mode == 'eval':
label = inputs['label']
loss = self._get_loss(logit, label)
return loss, pred, label, mask
else:
if self.num_classes == 1:
logit = sigmoid_to_softmax(logit[0])
else:
logit = fluid.layers.softmax(logit[0], axis=1)
return pred, logit
def generate_inputs(self):
inputs = OrderedDict()
if self.fixed_input_shape is not None:
input_shape = [
None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
]
inputs['image'] = fluid.data(
dtype='float32', shape=input_shape, name='image')
else:
inputs['image'] = fluid.data(
dtype='float32', shape=[None, 3, None, None], name='image')
if self.mode == 'train':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
elif self.mode == 'eval':
inputs['label'] = fluid.data(
dtype='int32', shape=[None, 1, None, None], name='label')
return inputs
def _get_loss(self, logits, label):
avg_loss = 0
if not (self.use_dice_loss or self.use_bce_loss):
for i, logit in enumerate(logits):
logit_mask = (
label.astype('int32') != self.ignore_index).astype('int32')
loss = softmax_with_loss(
logit,
label,
logit_mask,
num_classes=self.num_classes,
weight=self.class_weight,
ignore_index=self.ignore_index)
avg_loss += self.multi_loss_weight[i] * loss
else:
if self.use_dice_loss:
for i, logit in enumerate(logits):
logit_mask = (label.astype('int32') != self.ignore_index
).astype('int32')
loss = dice_loss(logit, label, logit_mask)
avg_loss += self.multi_loss_weight[i] * loss
if self.use_bce_loss:
for i, logit in enumerate(logits):
#logit_label = fluid.layers.resize_nearest(label, logit_shape[2:])
logit_mask = (label.astype('int32') != self.ignore_index
).astype('int32')
loss = bce_loss(
logit,
label,
logit_mask,
ignore_index=self.ignore_index)
avg_loss += self.multi_loss_weight[i] * loss
return avg_loss
def _learning_to_downsample(self,
x,
dw_channels1=32,
dw_channels2=48,
out_channels=64):
x = relu(bn(conv(x, dw_channels1, 3, 2)))
with scope('dsconv1'):
x = separate_conv(
x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu)
with scope('dsconv2'):
x = separate_conv(
x, out_channels, stride=2, filter=3, act=fluid.layers.relu)
return x
def _shortcut(self, input, data_residual):
return fluid.layers.elementwise_add(input, data_residual)
def _dropout2d(self, input, prob, is_train=False):
if not is_train:
return input
keep_prob = 1.0 - prob
shape = fluid.layers.shape(input)
channels = shape[1]
random_tensor = keep_prob + fluid.layers.uniform_random(
[shape[0], channels, 1, 1], min=0., max=1.)
binary_tensor = fluid.layers.floor(random_tensor)
output = input / keep_prob * binary_tensor
return output
def _inverted_residual_unit(self,
input,
num_in_filter,
num_filters,
ifshortcut,
stride,
filter_size,
padding,
expansion_factor,
name=None):
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name=name + '_expand')
bottleneck_conv = conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=padding,
num_groups=num_expfilter,
if_act=True,
name=name + '_dwise',
use_cudnn=False)
depthwise_output = bottleneck_conv
linear_out = conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False,
name=name + '_linear')
if ifshortcut:
out = self._shortcut(input=input, data_residual=linear_out)
return out, depthwise_output
else:
return linear_out, depthwise_output
def _inverted_blocks(self, input, in_c, t, c, n, s, name=None):
first_block, depthwise_output = self._inverted_residual_unit(
input=input,
num_in_filter=in_c,
num_filters=c,
ifshortcut=False,
stride=s,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_1')
last_residual_block = first_block
last_c = c
for i in range(1, n):
last_residual_block, depthwise_output = self._inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=c,
ifshortcut=True,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t,
name=name + '_' + str(i + 1))
return last_residual_block, depthwise_output
def _psp_module(self, input, out_features):
cat_layers = []
sizes = (1, 2, 3, 6)
for size in sizes:
psp_name = "psp" + str(size)
with scope(psp_name):
pool = fluid.layers.adaptive_pool2d(
input,
pool_size=[size, size],
pool_type='avg',
name=psp_name + '_adapool')
data = conv(
pool,
out_features,
filter_size=1,
bias_attr=False,
name=psp_name + '_conv')
data_bn = bn(data, act='relu')
interp = fluid.layers.resize_bilinear(
data_bn,
out_shape=fluid.layers.shape(input)[2:],
name=psp_name + '_interp',
align_mode=0)
cat_layers.append(interp)
cat_layers = [input] + cat_layers
out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat')
return out
def _aux_layer(self, x, num_classes):
x = relu(bn(conv(x, 32, 3, padding=1)))
x = self._dropout2d(x, 0.1, is_train=(self.mode == 'train'))
with scope('logit'):
x = conv(x, num_classes, 1, bias_attr=True)
return x
def _feature_fusion(self,
higher_res_feature,
lower_res_feature,
higher_in_channels,
lower_in_channels,
out_channels,
scale_factor=4):
shape = fluid.layers.shape(higher_res_feature)
w = shape[-1]
h = shape[-2]
lower_res_feature = fluid.layers.resize_bilinear(
lower_res_feature, [h, w], align_mode=0)
with scope('dwconv'):
lower_res_feature = relu(
bn(conv(lower_res_feature, out_channels,
1))) #(lower_res_feature)
with scope('conv_lower_res'):
lower_res_feature = bn(
conv(
lower_res_feature, out_channels, 1, bias_attr=True))
with scope('conv_higher_res'):
higher_res_feature = bn(
conv(
higher_res_feature, out_channels, 1, bias_attr=True))
out = higher_res_feature + lower_res_feature
return relu(out)
def _global_feature_extractor(self,
x,
in_channels=64,
block_channels=(64, 96, 128),
out_channels=128,
t=6,
num_blocks=(3, 3, 3)):
x, _ = self._inverted_blocks(x, in_channels, t, block_channels[0],
num_blocks[0], 2, 'inverted_block_1')
x, _ = self._inverted_blocks(x, block_channels[0], t,
block_channels[1], num_blocks[1], 2,
'inverted_block_2')
x, _ = self._inverted_blocks(x, block_channels[1], t,
block_channels[2], num_blocks[2], 1,
'inverted_block_3')
x = self._psp_module(x, block_channels[2] // 4)
with scope('out'):
x = relu(bn(conv(x, out_channels, 1)))
return x
def _classifier(self, x, dw_channels, stride=1):
with scope('dsconv1'):
x = separate_conv(
x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu)
with scope('dsconv2'):
x = separate_conv(
x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu)
x = self._dropout2d(x, 0.1, is_train=self.mode == 'train')
x = conv(x, self.num_classes, 1, bias_attr=True)
return x
......@@ -27,7 +27,6 @@ from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
import paddlex
import paddlex.utils.logging as logging
class HRNet(object):
......@@ -83,7 +82,8 @@ class HRNet(object):
st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape)
out = fluid.layers.concat(st4, axis=1)
last_channels = sum(self.backbone.channels[self.backbone.width][-1])
last_channels = sum(self.backbone.channels[str(self.backbone.width)][
-1])
out = self._conv_bn_layer(
input=out,
......
......@@ -27,7 +27,6 @@ from .model_utils.libs import sigmoid_to_softmax
from .model_utils.loss import softmax_with_loss
from .model_utils.loss import dice_loss
from .model_utils.loss import bce_loss
import paddlex.utils.logging as logging
class UNet(object):
......@@ -106,7 +105,8 @@ class UNet(object):
name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.33))
with scope("conv0"):
data = bn_relu(
conv(
......@@ -140,8 +140,7 @@ class UNet(object):
name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.XavierInitializer(),
)
initializer=fluid.initializer.XavierInitializer(), )
with scope("up"):
if self.upsample_mode == 'bilinear':
short_cut_shape = fluid.layers.shape(short_cut)
......@@ -197,7 +196,8 @@ class UNet(object):
name='weights',
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0),
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=0.01))
with scope("logit"):
data = conv(
data,
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
......
......@@ -15,3 +15,5 @@
from . import cls_transforms
from . import det_transforms
from . import seg_transforms
from . import visualize
visualize = visualize.visualize
......@@ -32,10 +32,8 @@ class ClsTransform:
class Compose(ClsTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
......@@ -70,8 +68,8 @@ class Compose(ClsTransform):
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimension, but now is {}-dimensions".
format(len(im.shape)))
"im should be 3-dimension, but now is {}-dimensions".format(
len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
......@@ -100,7 +98,9 @@ class Compose(ClsTransform):
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
logging.error(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
......@@ -139,8 +139,8 @@ class RandomCrop(ClsTransform):
tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据;
当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。
"""
im = random_crop(im, self.crop_size, self.lower_scale,
self.lower_ratio, self.upper_ratio)
im = random_crop(im, self.crop_size, self.lower_scale, self.lower_ratio,
self.upper_ratio)
if label is None:
return (im, )
else:
......@@ -270,14 +270,12 @@ class ResizeByShort(ClsTransform):
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR)
if label is None:
return (im, )
......@@ -434,6 +432,7 @@ class RandomDistort(ClsTransform):
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
im = im.astype('float32')
if label is None:
return (im, )
else:
......@@ -490,13 +489,15 @@ class ComposedClsTransforms(Compose):
crop_size(int|list): 输入模型里的图像大小
mean(list): 图像均值
std(list): 图像方差
random_horizontal_flip(bool): 是否以0.5的概率使用随机水平翻转增强,该仅在mode为`train`时生效,默认为True
"""
def __init__(self,
mode,
crop_size=[224, 224],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
std=[0.229, 0.224, 0.225],
random_horizontal_flip=True):
width = crop_size
if isinstance(crop_size, list):
if crop_size[0] != crop_size[1]:
......@@ -512,10 +513,11 @@ class ComposedClsTransforms(Compose):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
Normalize(
RandomCrop(crop_size=width), Normalize(
mean=mean, std=std)
]
if random_horizontal_flip:
transforms.insert(0, RandomHorizontalFlip())
else:
# 验证/预测时的transforms
transforms = [
......
......@@ -41,10 +41,8 @@ class DetTransform:
class Compose(DetTransform):
"""根据数据预处理/增强列表对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强列表。
Raises:
TypeError: 形参数据类型不满足需求。
ValueError: 数据长度不匹配。
......@@ -160,7 +158,9 @@ class Compose(DetTransform):
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
logging.error(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
......@@ -220,15 +220,13 @@ class ResizeByShort(DetTransform):
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
im_resize_info = [resized_height, resized_width, scale]
im = cv2.resize(
im, (resized_width, resized_height),
interpolation=cv2.INTER_LINEAR)
im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR)
im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
if label_info is None:
return (im, im_info)
......@@ -268,8 +266,7 @@ class Padding(DetTransform):
if not isinstance(target_size, tuple) and not isinstance(
target_size, list):
raise TypeError(
"Padding: Type of target_size must in (int|list|tuple)."
)
"Padding: Type of target_size must in (int|list|tuple).")
elif len(target_size) != 2:
raise ValueError(
"Padding: Length of target_size must equal 2.")
......@@ -454,8 +451,7 @@ class RandomHorizontalFlip(DetTransform):
ValueError: 数据长度不匹配。
"""
if not isinstance(im, np.ndarray):
raise TypeError(
"RandomHorizontalFlip: image is not a numpy array.")
raise TypeError("RandomHorizontalFlip: image is not a numpy array.")
if len(im.shape) != 3:
raise ValueError(
"RandomHorizontalFlip: image is not 3-dimensional.")
......@@ -621,6 +617,7 @@ class RandomDistort(DetTransform):
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
im = im.astype('float32')
if label_info is None:
return (im, im_info)
else:
......@@ -727,22 +724,38 @@ class MixupImage(DetTransform):
'Becasuse gt_bbox/gt_class/gt_score is not in label_info!')
gt_bbox1 = label_info['gt_bbox']
gt_bbox2 = im_info['mixup'][2]['gt_bbox']
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class1 = label_info['gt_class']
gt_class2 = im_info['mixup'][2]['gt_class']
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score1 = label_info['gt_score']
gt_score2 = im_info['mixup'][2]['gt_score']
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
if 'gt_poly' in label_info:
gt_poly1 = label_info['gt_poly']
gt_poly2 = im_info['mixup'][2]['gt_poly']
label_info['gt_poly'] = gt_poly1 + gt_poly2
is_crowd1 = label_info['is_crowd']
is_crowd2 = im_info['mixup'][2]['is_crowd']
if 0 not in gt_class1 and 0 not in gt_class2:
gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
gt_score = np.concatenate(
(gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1 + gt_poly2
is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
elif 0 in gt_class1:
gt_bbox = gt_bbox2
gt_class = gt_class2
gt_score = gt_score2 * (1. - factor)
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly2
is_crowd = is_crowd2
else:
gt_bbox = gt_bbox1
gt_class = gt_class1
gt_score = gt_score1 * factor
if 'gt_poly' in label_info:
label_info['gt_poly'] = gt_poly1
is_crowd = is_crowd1
label_info['gt_bbox'] = gt_bbox
label_info['gt_score'] = gt_score
label_info['gt_class'] = gt_class
......@@ -769,9 +782,7 @@ class RandomExpand(DetTransform):
fill_value (list): 扩张图像的初始填充值(0-255)。默认为[123.675, 116.28, 103.53]。
"""
def __init__(self,
ratio=4.,
prob=0.5,
def __init__(self, ratio=4., prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
super(RandomExpand, self).__init__()
assert ratio > 1.01, "expand ratio must be larger than 1.01"
......@@ -811,9 +822,11 @@ class RandomExpand(DetTransform):
'gt_class' not in label_info:
raise TypeError('Cannot do RandomExpand! ' + \
'Becasuse gt_bbox/gt_class is not in label_info!')
if np.random.uniform(0., 1.) < self.prob:
if np.random.uniform(0., 1.) > self.prob:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
height = int(image_shape[0])
width = int(image_shape[1])
......@@ -909,6 +922,8 @@ class RandomCrop(DetTransform):
if len(label_info['gt_bbox']) == 0:
return (im, im_info, label_info)
if 'gt_class' in label_info and 0 in label_info['gt_class']:
return (im, im_info, label_info)
image_shape = im_info['image_shape']
w = image_shape[1]
......@@ -1204,6 +1219,7 @@ class ArrangeYOLOv3(DetTransform):
if gt_num > 0:
label_info['gt_class'][:gt_num, 0] = label_info[
'gt_class'][:gt_num, 0] - 1
if -1 not in label_info['gt_class']:
gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0]
......@@ -1260,21 +1276,25 @@ class ComposedRCNNTransforms(Compose):
min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
mean(list): 图像均值
std(list): 图像方差
random_horizontal_flip(bool): 是否以0.5的概率使用随机水平翻转增强,该仅在mode为`train`时生效,默认为True
"""
def __init__(self,
mode,
min_max_size=[800, 1333],
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
std=[0.229, 0.224, 0.225],
random_horizontal_flip=True):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), Normalize(
Normalize(
mean=mean, std=std), ResizeByShort(
short_size=min_max_size[0], max_size=min_max_size[1]),
Padding(coarsest_stride=32)
]
if random_horizontal_flip:
transforms.insert(0, RandomHorizontalFlip())
else:
# 验证/预测时的transforms
transforms = [
......@@ -1304,9 +1324,14 @@ class ComposedYOLOv3Transforms(Compose):
Args:
mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略, 若设为-1,则表示不使用该策略
mean(list): 图像均值
std(list): 图像方差
random_distort(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机扰动图像,默认为True
random_expand(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机扩张图像,默认为True
random_crop(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机裁剪图像,默认为True
random_horizontal_flip(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机水平翻转图像,默认为True
"""
def __init__(self,
......@@ -1314,7 +1339,11 @@ class ComposedYOLOv3Transforms(Compose):
shape=[608, 608],
mixup_epoch=250,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
std=[0.229, 0.224, 0.225],
random_distort=True,
random_expand=True,
random_crop=True,
random_horizontal_flip=True):
width = shape
if isinstance(shape, list):
if shape[0] != shape[1]:
......@@ -1329,12 +1358,18 @@ class ComposedYOLOv3Transforms(Compose):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
RandomExpand(), RandomCrop(), Resize(
target_size=width,
interp='RANDOM'), RandomHorizontalFlip(), Normalize(
MixupImage(mixup_epoch=mixup_epoch), Resize(
target_size=width, interp='RANDOM'), Normalize(
mean=mean, std=std)
]
if random_horizontal_flip:
transforms.insert(1, RandomHorizontalFlip())
if random_crop:
transforms.insert(1, RandomCrop())
if random_expand:
transforms.insert(1, RandomExpand())
if random_distort:
transforms.insert(1, RandomDistort())
else:
# 验证/预测时的transforms
transforms = [
......
......@@ -35,14 +35,11 @@ class SegTransform:
class Compose(SegTransform):
"""根据数据预处理/增强算子对输入数据进行操作。
所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。
Args:
transforms (list): 数据预处理/增强算子。
Raises:
TypeError: transforms不是list对象
ValueError: transforms元素个数小于1。
"""
def __init__(self, transforms):
......@@ -71,7 +68,6 @@ class Compose(SegTransform):
图像在过resize前shape为(200, 300), 过padding前shape为
(400, 600)
label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。
Returns:
tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。
"""
......@@ -116,7 +112,9 @@ class Compose(SegTransform):
transform_names = [type(x).__name__ for x in self.transforms]
for aug in augmenters:
if type(aug).__name__ in transform_names:
logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__))
logging.error(
"{} is already in ComposedTransforms, need to remove it from add_augmenters().".
format(type(aug).__name__))
self.transforms = augmenters + self.transforms
......@@ -401,8 +399,7 @@ class ResizeByShort(SegTransform):
im_short_size = min(im.shape[0], im.shape[1])
im_long_size = max(im.shape[0], im.shape[1])
scale = float(self.short_size) / im_short_size
if self.max_size > 0 and np.round(scale *
im_long_size) > self.max_size:
if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size:
scale = float(self.max_size) / float(im_long_size)
resized_width = int(round(im.shape[1] * scale))
resized_height = int(round(im.shape[0] * scale))
......@@ -1053,6 +1050,7 @@ class RandomDistort(SegTransform):
params['im'] = im
if np.random.uniform(0, 1) < prob:
im = ops[id](**params)
im = im.astype('float32')
if label is None:
return (im, im_info)
else:
......@@ -1113,25 +1111,35 @@ class ComposedSegTransforms(Compose):
Args:
mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
train_crop_size(list): 模型训练阶段,随机从原图crop的大小
min_max_size(list): 训练过程中,图像的最长边会随机resize至此区间(短边按比例相应resize);预测阶段,图像最长边会resize至此区间中间值,即(min_size+max_size)/2。默认为[400, 600]
train_crop_size(list): 仅在mode为'train`时生效,训练过程中,随机从图像中裁剪出对应大小的子图(如若原图小于此大小,则会padding到此大小),默认为[400, 600]
mean(list): 图像均值
std(list): 图像方差
random_horizontal_flip(bool): 数据增强方式,仅在mode为`train`时生效,表示训练过程是否随机水平翻转图像,默认为True
"""
def __init__(self,
mode,
train_crop_size=[769, 769],
min_max_size=[400, 600],
train_crop_size=[512, 512],
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]):
std=[0.5, 0.5, 0.5],
random_horizontal_flip=True):
if mode == 'train':
# 训练时的transforms,包含数据增强
transforms = [
RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
ResizeRangeScaling(
min_value=min(min_max_size), max_value=max(min_max_size)),
RandomPaddingCrop(crop_size=train_crop_size), Normalize(
mean=mean, std=std)
]
if random_horizontal_flip:
transforms.insert(0, RandomHorizontalFlip())
else:
# 验证/预测时的transforms
transforms = [Normalize(mean=mean, std=std)]
long_size = (min(min_max_size) + max(min_max_size)) // 2
transforms = [
ResizeByLong(long_size=long_size), Normalize(
mean=mean, std=std)
]
super(ComposedSegTransforms, self).__init__(transforms)
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
import cv2
from PIL import Image
import numpy as np
import math
from .imgaug_support import execute_imgaug
from .cls_transforms import ClsTransform
from .det_transforms import DetTransform
from .seg_transforms import SegTransform
import paddlex as pdx
from paddlex.cv.models.utils.visualize import get_color_map_list
def _draw_rectangle_and_cname(img, xmin, ymin, xmax, ymax, cname, color):
""" 根据提供的标注信息,给图片描绘框体和类别显示
Args:
img: 图片路径
xmin: 检测框最小的x坐标
ymin: 检测框最小的y坐标
xmax: 检测框最大的x坐标
ymax: 检测框最大的y坐标
cname: 类别信息
color: 类别与颜色的对应信息
"""
# 描绘检测框
line_width = math.ceil(2 * max(img.shape[0:2]) / 600)
cv2.rectangle(
img,
pt1=(xmin, ymin),
pt2=(xmax, ymax),
color=color,
thickness=line_width)
return img
def cls_compose(im, label=None, transforms=None, vdl_writer=None, step=0):
"""
Args:
im (str/np.ndarray): 图像路径/图像np.ndarray数据。
label (int): 每张图像所对应的类别序号。
vdl_writer (visualdl.LogWriter): VisualDL存储器,日志信息将保存在其中。
当为None时,不对日志进行保存。默认为None。
step (int): 数据预处理的轮数,当vdl_writer不为None时有效。默认为0。
Returns:
tuple: 根据网络所需字段所组成的tuple;
字段由transforms中的最后一个数据预处理操作决定。
"""
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimension, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(im))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
if vdl_writer is not None:
vdl_writer.add_image(tag='0. OriginalImange/' + str(step),
img=im,
step=0)
op_id = 1
for op in transforms:
if isinstance(op, ClsTransform):
if vdl_writer is not None and hasattr(op, 'prob'):
op.prob = 1.0
outputs = op(im, label)
im = outputs[0]
if len(outputs) == 2:
label = outputs[1]
if isinstance(op, pdx.cv.transforms.cls_transforms.Normalize):
continue
else:
import imgaug.augmenters as iaa
if isinstance(op, iaa.Augmenter):
im = execute_imgaug(op, im)
outputs = (im, )
if label is not None:
outputs = (im, label)
if vdl_writer is not None:
tag = str(op_id) + '. ' + op.__class__.__name__ + '/' + str(step)
vdl_writer.add_image(tag=tag,
img=im,
step=0)
op_id += 1
def det_compose(im, im_info=None, label_info=None, transforms=None, vdl_writer=None, step=0,
labels=[], catid2color=None):
def decode_image(im_file, im_info, label_info):
if im_info is None:
im_info = dict()
if isinstance(im_file, np.ndarray):
if len(im_file.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im_file.shape)))
im = im_file
else:
try:
im = cv2.imread(im_file).astype('float32')
except:
raise TypeError('Can\'t read The image file {}!'.format(
im_file))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
# make default im_info with [h, w, 1]
im_info['im_resize_info'] = np.array(
[im.shape[0], im.shape[1], 1.], dtype=np.float32)
im_info['image_shape'] = np.array([im.shape[0],
im.shape[1]]).astype('int32')
use_mixup = False
for t in transforms:
if type(t).__name__ == 'MixupImage':
use_mixup = True
if not use_mixup:
if 'mixup' in im_info:
del im_info['mixup']
# decode mixup image
if 'mixup' in im_info:
im_info['mixup'] = \
decode_image(im_info['mixup'][0],
im_info['mixup'][1],
im_info['mixup'][2])
if label_info is None:
return (im, im_info)
else:
return (im, im_info, label_info)
outputs = decode_image(im, im_info, label_info)
im = outputs[0]
im_info = outputs[1]
if len(outputs) == 3:
label_info = outputs[2]
if vdl_writer is not None:
vdl_writer.add_image(tag='0. OriginalImange/' + str(step),
img=im,
step=0)
op_id = 1
bboxes = label_info['gt_bbox']
transforms = [None] + transforms
for op in transforms:
if im is None:
return None
if isinstance(op, DetTransform) or op is None:
if vdl_writer is not None and hasattr(op, 'prob'):
op.prob = 1.0
if op is not None:
outputs = op(im, im_info, label_info)
else:
outputs = (im, im_info, label_info)
im = outputs[0]
vdl_im = im
if vdl_writer is not None:
if isinstance(op, pdx.cv.transforms.det_transforms.ResizeByShort):
scale = outputs[1]['im_resize_info'][2]
bboxes = bboxes * scale
elif isinstance(op, pdx.cv.transforms.det_transforms.Resize):
h = outputs[1]['image_shape'][0]
w = outputs[1]['image_shape'][1]
target_size = op.target_size
if isinstance(target_size, int):
h_scale = float(target_size) / h
w_scale = float(target_size) / w
else:
h_scale = float(target_size[0]) / h
w_scale = float(target_size[1]) / w
bboxes[:,0] = bboxes[:,0] * w_scale
bboxes[:,1] = bboxes[:,1] * h_scale
bboxes[:,2] = bboxes[:,2] * w_scale
bboxes[:,3] = bboxes[:,3] * h_scale
else:
bboxes = outputs[2]['gt_bbox']
if not isinstance(op, pdx.cv.transforms.det_transforms.RandomHorizontalFlip):
for i in range(bboxes.shape[0]):
bbox = bboxes[i]
cname = labels[outputs[2]['gt_class'][i][0]-1]
vdl_im = _draw_rectangle_and_cname(vdl_im,
int(bbox[0]),
int(bbox[1]),
int(bbox[2]),
int(bbox[3]),
cname,
catid2color[outputs[2]['gt_class'][i][0]-1])
if isinstance(op, pdx.cv.transforms.det_transforms.Normalize):
continue
else:
im = execute_imgaug(op, im)
if label_info is not None:
outputs = (im, im_info, label_info)
else:
outputs = (im, im_info)
vdl_im = im
if vdl_writer is not None:
tag = str(op_id) + '. ' + op.__class__.__name__ + '/' + str(step)
if op is None:
tag = str(op_id) + '. OriginalImangeWithGTBox/' + str(step)
vdl_writer.add_image(tag=tag,
img=vdl_im,
step=0)
op_id += 1
def seg_compose(im, im_info=None, label=None, transforms=None, vdl_writer=None, step=0):
if im_info is None:
im_info = list()
if isinstance(im, np.ndarray):
if len(im.shape) != 3:
raise Exception(
"im should be 3-dimensions, but now is {}-dimensions".
format(len(im.shape)))
else:
try:
im = cv2.imread(im).astype('float32')
except:
raise ValueError('Can\'t read The image file {}!'.format(im))
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
if label is not None:
if not isinstance(label, np.ndarray):
label = np.asarray(Image.open(label))
if vdl_writer is not None:
vdl_writer.add_image(tag='0. OriginalImange' + '/' + str(step),
img=im,
step=0)
op_id = 1
for op in transforms:
if isinstance(op, SegTransform):
outputs = op(im, im_info, label)
im = outputs[0]
if len(outputs) >= 2:
im_info = outputs[1]
if len(outputs) == 3:
label = outputs[2]
if isinstance(op, pdx.cv.transforms.seg_transforms.Normalize):
continue
else:
im = execute_imgaug(op, im)
if label is not None:
outputs = (im, im_info, label)
else:
outputs = (im, im_info)
if vdl_writer is not None:
tag = str(op_id) + '. ' + op.__class__.__name__ + '/' + str(step)
vdl_writer.add_image(tag=tag,
img=im,
step=0)
op_id += 1
def visualize(dataset, img_count=3, save_dir='vdl_output'):
'''对数据预处理/增强中间结果进行可视化。
可使用VisualDL查看中间结果:
1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001
2. 浏览器打开 https://0.0.0.0:8001即可,
其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
Args:
dataset (paddlex.datasets): 数据集读取器。
img_count (int): 需要进行数据预处理/增强的图像数目。默认为3。
save_dir (str): 日志保存的路径。默认为'vdl_output'。
'''
if dataset.num_samples < img_count:
img_count = dataset.num_samples
transforms = dataset.transforms
if not osp.isdir(save_dir):
if osp.exists(save_dir):
os.remove(save_dir)
os.makedirs(save_dir)
from visualdl import LogWriter
vdl_save_dir = osp.join(save_dir, 'image_transforms')
vdl_writer = LogWriter(vdl_save_dir)
for i, data in enumerate(dataset.iterator()):
if i == img_count:
break
data.append(transforms.transforms)
data.append(vdl_writer)
data.append(i)
if isinstance(transforms, ClsTransform):
cls_compose(*data)
elif isinstance(transforms, DetTransform):
labels = dataset.labels
color_map = get_color_map_list(len(labels) + 1)
catid2color = {}
for catid in range(len(labels)):
catid2color[catid] = color_map[catid + 1]
data.append(labels)
data.append(catid2color)
det_compose(*data)
elif isinstance(transforms, SegTransform):
seg_compose(*data)
else:
raise Exception('The transform must the subclass of \
ClsTransform or DetTransform or SegTransform!')
\ No newline at end of file
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .interpretation_algorithms import CAM, LIME, NormLIME
from .normlime_base import precompute_normlime_weights
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import os.path as osp
......
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
......@@ -70,8 +70,10 @@ def normlime(img_file,
normlime_weights_file=None):
"""使用NormLIME算法将模型预测结果的可解释性可视化。
NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测
试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系。
NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式:
使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间;
然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。
之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定。
注意1:dataset读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。
注意2:NormLIME可解释性结果可视化目前只支持分类模型。
......
......@@ -18,5 +18,6 @@ from . import cv
UNet = cv.models.UNet
DeepLabv3p = cv.models.DeepLabv3p
HRNet = cv.models.HRNet
FastSCNN = cv.models.FastSCNN
transforms = cv.transforms.seg_transforms
visualize = cv.models.utils.visualize.visualize_segmentation
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from . import logging
......
......@@ -67,8 +67,8 @@ def parse_param_file(param_file, return_shape=True):
f.close()
return tuple(tensor_desc.dims)
if tensor_desc.data_type != 5:
raise Exception(
"Unexpected data type while parse {}".format(param_file))
raise Exception("Unexpected data type while parse {}".format(
param_file))
data_size = 4
for i in range(len(tensor_shape)):
data_size *= tensor_shape[i]
......@@ -139,7 +139,12 @@ def load_pdparams(exe, main_prog, model_dir):
vars_to_load = list()
import pickle
with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f:
if osp.isfile(model_dir):
params_file = model_dir
else:
params_file = osp.join(model_dir, 'model.pdparams')
with open(params_file, 'rb') as f:
params_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
unused_vars = list()
......@@ -185,8 +190,8 @@ def is_belong_to_optimizer(var):
import paddle.fluid as fluid
from paddle.fluid.proto.framework_pb2 import VarType
if not (isinstance(var, fluid.framework.Parameter)
or var.desc.need_check_feed()):
if not (isinstance(var, fluid.framework.Parameter) or
var.desc.need_check_feed()):
return is_persistable(var)
return False
......@@ -206,9 +211,8 @@ def load_pdopt(exe, main_prog, model_dir):
if len(optimizer_var_list) > 0:
for var in optimizer_var_list:
if var.name not in opt_dict:
raise Exception(
"{} is not in saved paddlex optimizer, {}".format(
var.name, exception_message))
raise Exception("{} is not in saved paddlex optimizer, {}".
format(var.name, exception_message))
if var.shape != opt_dict[var.name].shape:
raise Exception(
"Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}"
......@@ -227,9 +231,8 @@ def load_pdopt(exe, main_prog, model_dir):
"There is no optimizer parameters in the model, please set the optimizer!"
)
else:
logging.info(
"There are {} optimizer parameters in {} are loaded.".format(
len(optimizer_var_list), model_dir))
logging.info("There are {} optimizer parameters in {} are loaded.".
format(len(optimizer_var_list), model_dir))
def load_pretrain_weights(exe,
......@@ -239,6 +242,12 @@ def load_pretrain_weights(exe,
resume=False):
if not osp.exists(weights_dir):
raise Exception("Path {} not exists.".format(weights_dir))
if osp.isfile(weights_dir):
if not weights_dir.endswith('.pdparams'):
raise Exception("File {} is not a paddle parameter file".format(
weights_dir))
load_pdparams(exe, main_prog, weights_dir)
return
if osp.exists(osp.join(weights_dir, "model.pdparams")):
load_pdparams(exe, main_prog, weights_dir)
if resume:
......@@ -255,9 +264,8 @@ def load_pretrain_weights(exe,
if not isinstance(var, fluid.framework.Parameter):
continue
if not osp.exists(osp.join(weights_dir, var.name)):
logging.debug(
"[SKIP] Pretrained weight {}/{} doesn't exist".format(
weights_dir, var.name))
logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist".
format(weights_dir, var.name))
continue
pretrained_shape = parse_param_file(osp.join(weights_dir, var.name))
actual_shape = tuple(var.shape)
......@@ -317,9 +325,8 @@ def load_pretrain_weights(exe,
"There is no optimizer parameters in the model, please set the optimizer!"
)
else:
logging.info(
"There are {} optimizer parameters in {} are loaded.".format(
len(optimizer_var_list), weights_dir))
logging.info("There are {} optimizer parameters in {} are loaded.".
format(len(optimizer_var_list), weights_dir))
class EarlyStop:
......@@ -342,12 +349,12 @@ class EarlyStop:
self.max = current_score
return False
else:
if (abs(self.score - current_score) < self.thresh
or current_score < self.score):
if (abs(self.score - current_score) < self.thresh or
current_score < self.score):
self.counter += 1
self.score = current_score
logging.debug(
"EarlyStopping: %i / %i" % (self.counter, self.patience))
logging.debug("EarlyStopping: %i / %i" %
(self.counter, self.patience))
if self.counter >= self.patience:
logging.info("EarlyStopping: Stop training")
return True
......
......@@ -15,11 +15,11 @@
import setuptools
import sys
long_description = "PaddleX. A end-to-end deeplearning model development toolkit base on PaddlePaddle\n\n"
long_description = "PaddlePaddle Entire Process Development Toolkit"
setuptools.setup(
name="paddlex",
version='1.0.6',
version='1.0.7',
author="paddlex",
author_email="paddlex@baidu.com",
description=long_description,
......
#!/bin/bash
set -e
readonly VERSION="3.8"
version=$(clang-format -version)
if ! [[ $version == *"$VERSION"* ]]; then
echo "clang-format version check failed."
echo "a version contains '$VERSION' is needed, but get '$version'"
echo "you can install the right version, and make an soft-link to '\$PATH' env"
exit -1
fi
clang-format $@
# set -e
#
# readonly VERSION="3.8"
#
# version=$(clang-format -version)
#
# if ! [[ $version == *"$VERSION"* ]]; then
# echo "clang-format version check failed."
# echo "a version contains '$VERSION' is needed, but get '$version'"
# echo "you can install the right version, and make an soft-link to '\$PATH' env"
# exit -1
# fi
#
# clang-format $@
......@@ -11,7 +11,8 @@ pdx.utils.download_and_decompress(optic_dataset, path='./')
# 定义训练和验证时的transforms
# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769])
train_transforms = transforms.ComposedSegTransforms(
mode='train', train_crop_size=[769, 769])
eval_transforms = transforms.ComposedSegTransforms(mode='eval')
# 定义训练和验证所用的数据集
......@@ -34,14 +35,14 @@ eval_dataset = pdx.datasets.SegDataset(
# 浏览器打开 https://0.0.0.0:8001即可
# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet
# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#fastscnn
num_classes = len(train_dataset.labels)
model = pdx.seg.HRNet(num_classes=num_classes)
model = pdx.seg.FastSCNN(num_classes=num_classes)
model.train(
num_epochs=20,
train_dataset=train_dataset,
train_batch_size=4,
eval_dataset=eval_dataset,
learning_rate=0.01,
save_dir='output/hrnet',
save_dir='output/fastscnn',
use_vdl=True)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册