diff --git a/README.md b/README.md index 4105c32ad8ec0462b16ea365937bf348bc45b903..644769e6dce6a7133e1532379ce36a8a78f569f4 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ [![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleX.svg)](https://github.com/PaddlePaddle/PaddleX/releases) ![python version](https://img.shields.io/badge/python-3.6+-orange.svg) ![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg) +![QQGroup](https://img.shields.io/badge/QQ_Group-1045148026-52B6EF?style=social&logo=tencent-qq&logoColor=000&logoWidth=20) PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习全流程开发工具。具备**全流程打通**、**融合产业实践**、**易用易集成**三大特点。 diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt index bd13a46713e1239380891e25c3ee7cb68f0f8d1e..ceaa448253f18bb8ea55423ed323aeb3cb459fdc 100644 --- a/deploy/cpp/CMakeLists.txt +++ b/deploy/cpp/CMakeLists.txt @@ -73,7 +73,11 @@ endif() if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include") include_directories("${PADDLE_DIR}/third_party/install/snappystream/include") endif() -include_directories("${PADDLE_DIR}/third_party/install/zlib/include") +# zlib does not exist in 1.8.1 +if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/include") + include_directories("${PADDLE_DIR}/third_party/install/zlib/include") +endif() + include_directories("${PADDLE_DIR}/third_party/boost") include_directories("${PADDLE_DIR}/third_party/eigen3") @@ -84,7 +88,10 @@ if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib") endif() -link_directories("${PADDLE_DIR}/third_party/install/zlib/lib") +if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib") + link_directories("${PADDLE_DIR}/third_party/install/zlib/lib") +endif() + link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib") link_directories("${PADDLE_DIR}/third_party/install/glog/lib") link_directories("${PADDLE_DIR}/third_party/install/gflags/lib") @@ -107,6 +114,14 @@ include_directories(${OpenCV_INCLUDE_DIRS}) if (WIN32) add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + find_package(OpenMP REQUIRED) + if (OPENMP_FOUND) + message("OPENMP FOUND") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${OpenMP_C_FLAGS}") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${OpenMP_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${OpenMP_CXX_FLAGS}") + endif() set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") @@ -186,8 +201,13 @@ if(WITH_STATIC_LIB) set(DEPS ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(DEPS - ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) + if (NOT WIN32) + set(DEPS + ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + set(DEPS + ${PADDLE_DIR}/paddle/lib/paddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() endif() if (NOT WIN32) @@ -204,13 +224,16 @@ if (NOT WIN32) else() set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} - glog gflags_static libprotobuf zlibstatic xxhash libyaml-cppmt) + glog gflags_static libprotobuf xxhash libyaml-cppmt) + if (EXISTS "${PADDLE_DIR}/third_party/install/zlib/lib") + set(DEPS ${DEPS} zlibstatic) + endif() set(DEPS ${DEPS} libcmt shlwapi) if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") set(DEPS ${DEPS} snappy) endif() - if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + if (EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") set(DEPS ${DEPS} snappystream) endif() endif(NOT WIN32) @@ -236,7 +259,9 @@ if(WITH_ENCRYPTION) link_directories("${ENCRYPTION_DIR}/lib") set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/libpmodel-decrypt${CMAKE_SHARED_LIBRARY_SUFFIX}) else() - message(FATAL_ERROR "Encryption Tool don't support WINDOWS") + include_directories("${ENCRYPTION_DIR}/include") + link_directories("${ENCRYPTION_DIR}/lib") + set(DEPS ${DEPS} ${ENCRYPTION_DIR}/lib/pmodel-decrypt${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() @@ -284,10 +309,23 @@ if (WIN32 AND WITH_MKL) COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll ) - + # for encryption + if (EXISTS "${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll") + add_custom_command(TARGET classifier POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll + ) + add_custom_command(TARGET detector POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll + ) + add_custom_command(TARGET segmenter POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./pmodel-decrypt.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ENCRYPTION_DIR}/lib/pmodel-decrypt.dll ./release/pmodel-decrypt.dll + ) + endif() endif() file(COPY "${CMAKE_SOURCE_DIR}/include/paddlex/visualize.h" diff --git a/deploy/cpp/CMakeSettings.json b/deploy/cpp/CMakeSettings.json index 860ca7a61e222d84e5cc7e9b3447bdc8397a8c40..535ff1a8b8aac2ddfc70e0f1c2a25a3b910976d4 100644 --- a/deploy/cpp/CMakeSettings.json +++ b/deploy/cpp/CMakeSettings.json @@ -22,9 +22,9 @@ "type": "PATH" }, { - "name": "CMAKE_BUILD_TYPE", - "value": "Release", - "type": "STRING" + "name": "CUDA_LIB", + "value": "", + "type": "PATH" }, { "name": "WITH_STATIC_LIB", @@ -40,8 +40,18 @@ "name": "WITH_GPU", "value": "False", "type": "BOOL" + }, + { + "name": "WITH_ENCRYPTION", + "value": "False", + "type": "BOOL" + }, + { + "name": "ENCRYPTION_DIR", + "value": "", + "type": "PATH" } ] } ] -} \ No newline at end of file +} diff --git a/deploy/cpp/cmake/yaml-cpp.cmake b/deploy/cpp/cmake/yaml-cpp.cmake index caa8be513bcaaf7ff73c12c268b8137e5582672c..a4b527d38a1676db9c40bd20b6c803b13f597eb2 100644 --- a/deploy/cpp/cmake/yaml-cpp.cmake +++ b/deploy/cpp/cmake/yaml-cpp.cmake @@ -1,5 +1,3 @@ -find_package(Git REQUIRED) - include(ExternalProject) message("${CMAKE_BUILD_TYPE}") diff --git a/deploy/cpp/demo/classifier.cpp b/deploy/cpp/demo/classifier.cpp index badb835132418098d332014a590d2dbb7a1e43fd..8b78d7e8975642b8d10d6c641c0245cdf661d3d9 100644 --- a/deploy/cpp/demo/classifier.cpp +++ b/deploy/cpp/demo/classifier.cpp @@ -13,14 +13,19 @@ // limitations under the License. #include +#include +#include +#include // NOLINT #include #include #include #include - +#include #include "include/paddlex/paddlex.h" +using namespace std::chrono; // NOLINT + DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); @@ -28,6 +33,10 @@ DEFINE_int32(gpu_id, 0, "GPU card id"); DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_int32(thread_num, + omp_get_num_procs(), + "Number of preprocessing threads"); int main(int argc, char** argv) { // Parsing command-line @@ -44,32 +53,80 @@ int main(int argc, char** argv) { // 加载模型 PaddleX::Model model; - model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key); + model.Init(FLAGS_model_dir, + FLAGS_use_gpu, + FLAGS_use_trt, + FLAGS_gpu_id, + FLAGS_key); // 进行预测 + double total_running_time_s = 0.0; + double total_imread_time_s = 0.0; + int imgs = 1; if (FLAGS_image_list != "") { std::ifstream inf(FLAGS_image_list); if (!inf) { std::cerr << "Fail to open file " << FLAGS_image_list << std::endl; return -1; } + // 多batch预测 std::string image_path; + std::vector image_paths; while (getline(inf, image_path)) { - PaddleX::ClsResult result; - cv::Mat im = cv::imread(image_path, 1); - model.predict(im, &result); - std::cout << "Predict label: " << result.category - << ", label_id:" << result.category_id - << ", score: " << result.score << std::endl; + image_paths.push_back(image_path); + } + imgs = image_paths.size(); + for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) { + auto start = system_clock::now(); + // 读图像 + int im_vec_size = + std::min(static_cast(image_paths.size()), i + FLAGS_batch_size); + std::vector im_vec(im_vec_size - i); + std::vector results(im_vec_size - i, + PaddleX::ClsResult()); + int thread_num = std::min(FLAGS_thread_num, im_vec_size - i); + #pragma omp parallel for num_threads(thread_num) + for (int j = i; j < im_vec_size; ++j) { + im_vec[j - i] = std::move(cv::imread(image_paths[j], 1)); + } + auto imread_end = system_clock::now(); + model.predict(im_vec, &results, thread_num); + + auto imread_duration = duration_cast(imread_end - start); + total_imread_time_s += static_cast(imread_duration.count()) * + microseconds::period::num / + microseconds::period::den; + + auto end = system_clock::now(); + auto duration = duration_cast(end - start); + total_running_time_s += static_cast(duration.count()) * + microseconds::period::num / + microseconds::period::den; + for (int j = i; j < im_vec_size; ++j) { + std::cout << "Path:" << image_paths[j] + << ", predict label: " << results[j - i].category + << ", label_id:" << results[j - i].category_id + << ", score: " << results[j - i].score << std::endl; + } } } else { + auto start = system_clock::now(); PaddleX::ClsResult result; cv::Mat im = cv::imread(FLAGS_image, 1); model.predict(im, &result); + auto end = system_clock::now(); + auto duration = duration_cast(end - start); + total_running_time_s += static_cast(duration.count()) * + microseconds::period::num / + microseconds::period::den; std::cout << "Predict label: " << result.category << ", label_id:" << result.category_id << ", score: " << result.score << std::endl; } - + std::cout << "Total running time: " << total_running_time_s + << " s, average running time: " << total_running_time_s / imgs + << " s/img, total read img time: " << total_imread_time_s + << " s, average read time: " << total_imread_time_s / imgs + << " s/img, batch_size = " << FLAGS_batch_size << std::endl; return 0; } diff --git a/deploy/cpp/demo/detector.cpp b/deploy/cpp/demo/detector.cpp index e42288fbccd434ef5953c606696af623323aa80d..5b4e3a2ba9d2c921cf23774a17e34e0c8e26cc2a 100644 --- a/deploy/cpp/demo/detector.cpp +++ b/deploy/cpp/demo/detector.cpp @@ -13,15 +13,21 @@ // limitations under the License. #include +#include +#include +#include // NOLINT #include #include #include #include +#include #include "include/paddlex/paddlex.h" #include "include/paddlex/visualize.h" +using namespace std::chrono; // NOLINT + DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); @@ -30,6 +36,13 @@ DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); DEFINE_string(save_dir, "output", "Path to save visualized image"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_double(threshold, + 0.5, + "The minimum scores of target boxes which are shown"); +DEFINE_int32(thread_num, + omp_get_num_procs(), + "Number of preprocessing threads"); int main(int argc, char** argv) { // 解析命令行参数 @@ -43,11 +56,17 @@ int main(int argc, char** argv) { std::cerr << "--image or --image_list need to be defined" << std::endl; return -1; } - // 加载模型 PaddleX::Model model; - model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key); + model.Init(FLAGS_model_dir, + FLAGS_use_gpu, + FLAGS_use_trt, + FLAGS_gpu_id, + FLAGS_key); + double total_running_time_s = 0.0; + double total_imread_time_s = 0.0; + int imgs = 1; auto colormap = PaddleX::GenerateColorMap(model.labels.size()); std::string save_dir = "output"; // 进行预测 @@ -58,47 +77,83 @@ int main(int argc, char** argv) { return -1; } std::string image_path; + std::vector image_paths; while (getline(inf, image_path)) { - PaddleX::DetResult result; - cv::Mat im = cv::imread(image_path, 1); - model.predict(im, &result); - for (int i = 0; i < result.boxes.size(); ++i) { - std::cout << "image file: " << image_path - << ", predict label: " << result.boxes[i].category - << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" - << result.boxes[i].coordinate[0] << ", " - << result.boxes[i].coordinate[1] << ", " - << result.boxes[i].coordinate[2] << ", " - << result.boxes[i].coordinate[3] << ")" << std::endl; + image_paths.push_back(image_path); + } + imgs = image_paths.size(); + for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) { + auto start = system_clock::now(); + int im_vec_size = + std::min(static_cast(image_paths.size()), i + FLAGS_batch_size); + std::vector im_vec(im_vec_size - i); + std::vector results(im_vec_size - i, + PaddleX::DetResult()); + int thread_num = std::min(FLAGS_thread_num, im_vec_size - i); + #pragma omp parallel for num_threads(thread_num) + for (int j = i; j < im_vec_size; ++j) { + im_vec[j - i] = std::move(cv::imread(image_paths[j], 1)); + } + auto imread_end = system_clock::now(); + model.predict(im_vec, &results, thread_num); + auto imread_duration = duration_cast(imread_end - start); + total_imread_time_s += static_cast(imread_duration.count()) * + microseconds::period::num / + microseconds::period::den; + auto end = system_clock::now(); + auto duration = duration_cast(end - start); + total_running_time_s += static_cast(duration.count()) * + microseconds::period::num / + microseconds::period::den; + // 输出结果目标框 + for (int j = 0; j < im_vec_size - i; ++j) { + for (int k = 0; k < results[j].boxes.size(); ++k) { + std::cout << "image file: " << image_paths[i + j] << ", "; + std::cout << "predict label: " << results[j].boxes[k].category + << ", label_id:" << results[j].boxes[k].category_id + << ", score: " << results[j].boxes[k].score + << ", box(xmin, ymin, w, h):(" + << results[j].boxes[k].coordinate[0] << ", " + << results[j].boxes[k].coordinate[1] << ", " + << results[j].boxes[k].coordinate[2] << ", " + << results[j].boxes[k].coordinate[3] << ")" << std::endl; + } } - // 可视化 - cv::Mat vis_img = - PaddleX::Visualize(im, result, model.labels, colormap, 0.5); - std::string save_path = - PaddleX::generate_save_path(FLAGS_save_dir, image_path); - cv::imwrite(save_path, vis_img); - result.clear(); - std::cout << "Visualized output saved as " << save_path << std::endl; + for (int j = 0; j < im_vec_size - i; ++j) { + cv::Mat vis_img = PaddleX::Visualize( + im_vec[j], results[j], model.labels, colormap, FLAGS_threshold); + std::string save_path = + PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]); + cv::imwrite(save_path, vis_img); + std::cout << "Visualized output saved as " << save_path << std::endl; + } } } else { + auto start = system_clock::now(); PaddleX::DetResult result; cv::Mat im = cv::imread(FLAGS_image, 1); model.predict(im, &result); + auto end = system_clock::now(); + auto duration = duration_cast(end - start); + total_running_time_s += static_cast(duration.count()) * + microseconds::period::num / + microseconds::period::den; + // 输出结果目标框 for (int i = 0; i < result.boxes.size(); ++i) { + std::cout << "image file: " << FLAGS_image << std::endl; std::cout << ", predict label: " << result.boxes[i].category << ", label_id:" << result.boxes[i].category_id - << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):(" - << result.boxes[i].coordinate[0] << ", " - << result.boxes[i].coordinate[1] << ", " + << ", score: " << result.boxes[i].score + << ", box(xmin, ymin, w, h):(" << result.boxes[i].coordinate[0] + << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " << result.boxes[i].coordinate[3] << ")" << std::endl; } // 可视化 cv::Mat vis_img = - PaddleX::Visualize(im, result, model.labels, colormap, 0.5); + PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold); std::string save_path = PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image); cv::imwrite(save_path, vis_img); @@ -106,5 +161,11 @@ int main(int argc, char** argv) { std::cout << "Visualized output saved as " << save_path << std::endl; } + std::cout << "Total running time: " << total_running_time_s + << " s, average running time: " << total_running_time_s / imgs + << " s/img, total read img time: " << total_imread_time_s + << " s, average read img time: " << total_imread_time_s / imgs + << " s, batch_size = " << FLAGS_batch_size << std::endl; + return 0; } diff --git a/deploy/cpp/demo/segmenter.cpp b/deploy/cpp/demo/segmenter.cpp index 0492ef803e15268022d869eb8b8e84969b1c8fad..7dd48e551890f3c8e4550694c45bc3f84088ec0a 100644 --- a/deploy/cpp/demo/segmenter.cpp +++ b/deploy/cpp/demo/segmenter.cpp @@ -13,15 +13,20 @@ // limitations under the License. #include +#include +#include +#include // NOLINT #include #include #include #include - +#include #include "include/paddlex/paddlex.h" #include "include/paddlex/visualize.h" +using namespace std::chrono; // NOLINT + DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); DEFINE_bool(use_trt, false, "Infering with TensorRT"); @@ -30,6 +35,10 @@ DEFINE_string(key, "", "key of encryption"); DEFINE_string(image, "", "Path of test image file"); DEFINE_string(image_list, "", "Path of test image list file"); DEFINE_string(save_dir, "output", "Path to save visualized image"); +DEFINE_int32(batch_size, 1, "Batch size of infering"); +DEFINE_int32(thread_num, + omp_get_num_procs(), + "Number of preprocessing threads"); int main(int argc, char** argv) { // 解析命令行参数 @@ -46,8 +55,15 @@ int main(int argc, char** argv) { // 加载模型 PaddleX::Model model; - model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key); + model.Init(FLAGS_model_dir, + FLAGS_use_gpu, + FLAGS_use_trt, + FLAGS_gpu_id, + FLAGS_key); + double total_running_time_s = 0.0; + double total_imread_time_s = 0.0; + int imgs = 1; auto colormap = PaddleX::GenerateColorMap(model.labels.size()); // 进行预测 if (FLAGS_image_list != "") { @@ -57,23 +73,54 @@ int main(int argc, char** argv) { return -1; } std::string image_path; + std::vector image_paths; while (getline(inf, image_path)) { - PaddleX::SegResult result; - cv::Mat im = cv::imread(image_path, 1); - model.predict(im, &result); + image_paths.push_back(image_path); + } + imgs = image_paths.size(); + for (int i = 0; i < image_paths.size(); i += FLAGS_batch_size) { + auto start = system_clock::now(); + int im_vec_size = + std::min(static_cast(image_paths.size()), i + FLAGS_batch_size); + std::vector im_vec(im_vec_size - i); + std::vector results(im_vec_size - i, + PaddleX::SegResult()); + int thread_num = std::min(FLAGS_thread_num, im_vec_size - i); + #pragma omp parallel for num_threads(thread_num) + for (int j = i; j < im_vec_size; ++j) { + im_vec[j - i] = std::move(cv::imread(image_paths[j], 1)); + } + auto imread_end = system_clock::now(); + model.predict(im_vec, &results, thread_num); + auto imread_duration = duration_cast(imread_end - start); + total_imread_time_s += static_cast(imread_duration.count()) * + microseconds::period::num / + microseconds::period::den; + auto end = system_clock::now(); + auto duration = duration_cast(end - start); + total_running_time_s += static_cast(duration.count()) * + microseconds::period::num / + microseconds::period::den; // 可视化 - cv::Mat vis_img = - PaddleX::Visualize(im, result, model.labels, colormap); - std::string save_path = - PaddleX::generate_save_path(FLAGS_save_dir, image_path); - cv::imwrite(save_path, vis_img); - result.clear(); - std::cout << "Visualized output saved as " << save_path << std::endl; + for (int j = 0; j < im_vec_size - i; ++j) { + cv::Mat vis_img = + PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap); + std::string save_path = + PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]); + cv::imwrite(save_path, vis_img); + std::cout << "Visualized output saved as " << save_path << std::endl; + } } } else { + auto start = system_clock::now(); PaddleX::SegResult result; cv::Mat im = cv::imread(FLAGS_image, 1); model.predict(im, &result); + auto end = system_clock::now(); + auto duration = duration_cast(end - start); + total_running_time_s += static_cast(duration.count()) * + microseconds::period::num / + microseconds::period::den; // 可视化 cv::Mat vis_img = PaddleX::Visualize(im, result, model.labels, colormap); std::string save_path = @@ -82,6 +129,11 @@ int main(int argc, char** argv) { result.clear(); std::cout << "Visualized output saved as " << save_path << std::endl; } + std::cout << "Total running time: " << total_running_time_s + << " s, average running time: " << total_running_time_s / imgs + << " s/img, total read img time: " << total_imread_time_s + << " s, average read img time: " << total_imread_time_s / imgs + << " s, batch_size = " << FLAGS_batch_size << std::endl; return 0; } diff --git a/deploy/cpp/include/paddlex/config_parser.h b/deploy/cpp/include/paddlex/config_parser.h index 5303e4da7ac0eb3de73bc57059617d361065f136..850e46656d9efdb5374e3086757cb5350f0457b2 100644 --- a/deploy/cpp/include/paddlex/config_parser.h +++ b/deploy/cpp/include/paddlex/config_parser.h @@ -54,4 +54,4 @@ class ConfigPaser { YAML::Node Transforms_; }; -} // namespace PaddleDetection +} // namespace PaddleX diff --git a/deploy/cpp/include/paddlex/paddlex.h b/deploy/cpp/include/paddlex/paddlex.h index d000728c763666e46271d4602b0e42c41dc130f1..af4d8898496fee47ed9b5c74599536ddf1fe9f6c 100644 --- a/deploy/cpp/include/paddlex/paddlex.h +++ b/deploy/cpp/include/paddlex/paddlex.h @@ -16,8 +16,11 @@ #include #include +#include +#include #include - +#include +#include #include "yaml-cpp/yaml.h" #ifdef _WIN32 @@ -28,19 +31,48 @@ #include "paddle_inference_api.h" // NOLINT -#include "config_parser.h" -#include "results.h" -#include "transforms.h" +#include "config_parser.h" // NOLINT +#include "results.h" // NOLINT +#include "transforms.h" // NOLINT #ifdef WITH_ENCRYPTION -#include "paddle_model_decrypt.h" -#include "model_code.h" +#include "paddle_model_decrypt.h" // NOLINT +#include "model_code.h" // NOLINT #endif namespace PaddleX { +/* + * @brief + * This class encapsulates all necessary proccess steps of model infering, which + * include image matrix preprocessing, model predicting and results postprocessing. + * The entire process of model infering can be simplified as below: + * 1. preprocess image matrix (resize, padding, ......) + * 2. model infer + * 3. postprocess the results which generated from model infering + * + * @example + * PaddleX::Model cls_model; + * // initialize model configuration + * cls_model.Init(cls_model_dir, use_gpu, use_trt, gpu_id, encryption_key); + * // define a Classification result object + * PaddleX::ClsResult cls_result; + * // get image matrix from image file + * cv::Mat im = cv::imread(image_file_path, 1); + * cls_model.predict(im, &cls_result); + * */ class Model { public: + /* + * @brief + * This method aims to initialize the model configuration + * + * @param model_dir: the directory which contains model.yml + * @param use_gpu: use gpu or not when infering + * @param use_trt: use Tensor RT or not when infering + * @param gpu_id: the id of gpu when infering with using gpu + * @param key: the key of encryption when using encrypted model + * */ void Init(const std::string& model_dir, bool use_gpu = false, bool use_trt = false, @@ -55,26 +87,134 @@ class Model { int gpu_id = 0, std::string key = ""); - bool load_config(const std::string& model_dir); - + /* + * @brief + * This method aims to load model configurations which include + * transform steps and label list + * + * @param yaml_input: model configuration string + * @return true if load configuration successfully + * */ + bool load_config(const std::string& yaml_input); + + /* + * @brief + * This method aims to transform single image matrix, the result will be + * returned at second parameter. + * + * @param input_im: single image matrix to be transformed + * @param blob: the raw data of single image matrix after transformed + * @return true if preprocess image matrix successfully + * */ bool preprocess(const cv::Mat& input_im, ImageBlob* blob); + /* + * @brief + * This method aims to transform mutiple image matrixs, the result will be + * returned at second parameter. + * + * @param input_im_batch: a batch of image matrixs to be transformed + * @param blob_blob: raw data of a batch of image matrixs after transformed + * @param thread_num: the number of preprocessing threads, + * each thread run preprocess on single image matrix + * @return true if preprocess a batch of image matrixs successfully + * */ + bool preprocess(const std::vector &input_im_batch, + std::vector *blob_batch, + int thread_num = 1); + + /* + * @brief + * This method aims to execute classification model prediction on single image matrix, + * the result will be returned at second parameter. + * + * @param im: single image matrix to be predicted + * @param result: classification prediction result data after postprocessed + * @return true if predict successfully + * */ bool predict(const cv::Mat& im, ClsResult* result); + /* + * @brief + * This method aims to execute classification model prediction on a batch of image matrixs, + * the result will be returned at second parameter. + * + * @param im: a batch of image matrixs to be predicted + * @param results: a batch of classification prediction result data after postprocessed + * @param thread_num: the number of predicting threads, each thread run prediction + * on single image matrix + * @return true if predict successfully + * */ + bool predict(const std::vector &im_batch, + std::vector *results, + int thread_num = 1); + + /* + * @brief + * This method aims to execute detection or instance segmentation model prediction + * on single image matrix, the result will be returned at second parameter. + * + * @param im: single image matrix to be predicted + * @param result: detection or instance segmentation prediction result data after postprocessed + * @return true if predict successfully + * */ bool predict(const cv::Mat& im, DetResult* result); + /* + * @brief + * This method aims to execute detection or instance segmentation model prediction + * on a batch of image matrixs, the result will be returned at second parameter. + * + * @param im: a batch of image matrix to be predicted + * @param result: detection or instance segmentation prediction result data after postprocessed + * @param thread_num: the number of predicting threads, each thread run prediction + * on single image matrix + * @return true if predict successfully + * */ + bool predict(const std::vector &im_batch, + std::vector *result, + int thread_num = 1); + + /* + * @brief + * This method aims to execute segmentation model prediction on single image matrix, + * the result will be returned at second parameter. + * + * @param im: single image matrix to be predicted + * @param result: segmentation prediction result data after postprocessed + * @return true if predict successfully + * */ bool predict(const cv::Mat& im, SegResult* result); - bool postprocess(SegResult* result); - - bool postprocess(DetResult* result); - + /* + * @brief + * This method aims to execute segmentation model prediction on a batch of image matrix, + * the result will be returned at second parameter. + * + * @param im: a batch of image matrix to be predicted + * @param result: segmentation prediction result data after postprocessed + * @param thread_num: the number of predicting threads, each thread run prediction + * on single image matrix + * @return true if predict successfully + * */ + bool predict(const std::vector &im_batch, + std::vector *result, + int thread_num = 1); + + // model type, include 3 type: classifier, detector, segmenter std::string type; + // model name, such as FasterRCNN, YOLOV3 and so on. std::string name; std::map labels; + // transform(preprocessing) pipeline manager Transforms transforms_; + // single input preprocessed data ImageBlob inputs_; + // batch input preprocessed data + std::vector inputs_batch_; + // raw data of predicting results std::vector outputs_; + // a predictor which run the model predicting std::unique_ptr predictor_; }; -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/cpp/include/paddlex/results.h b/deploy/cpp/include/paddlex/results.h index 1643c9249e8e8e993017c7702d1d490352c2d9a8..72caa1f5d4f78275ca9c4de55aa89bc22edd02e5 100644 --- a/deploy/cpp/include/paddlex/results.h +++ b/deploy/cpp/include/paddlex/results.h @@ -20,9 +20,15 @@ namespace PaddleX { +/* + * @brief + * This class represents mask in instance segmentation tasks. + * */ template struct Mask { + // raw data of mask std::vector data; + // the shape of mask std::vector shape; void clear() { data.clear(); @@ -30,19 +36,34 @@ struct Mask { } }; +/* + * @brief + * This class represents target box in detection or instance segmentation tasks. + * */ struct Box { int category_id; + // category label this box belongs to std::string category; + // confidence score float score; std::vector coordinate; Mask mask; }; +/* + * @brief + * This class is prediction result based class. + * */ class BaseResult { public: + // model type std::string type = "base"; }; +/* + * @brief + * This class represent classification result. + * */ class ClsResult : public BaseResult { public: int category_id; @@ -51,17 +72,28 @@ class ClsResult : public BaseResult { std::string type = "cls"; }; +/* + * @brief + * This class represent detection or instance segmentation result. + * */ class DetResult : public BaseResult { public: + // target boxes std::vector boxes; int mask_resolution; std::string type = "det"; void clear() { boxes.clear(); } }; +/* + * @brief + * This class represent segmentation result. + * */ class SegResult : public BaseResult { public: + // represent label of each pixel on image matrix Mask label_map; + // represent score of each pixel on image matrix Mask score_map; std::string type = "seg"; void clear() { diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h index f8265db447f693d084c5a789504bc4b0ccc14d28..c1ffd7e1de8a28f88a571e7b9d029585806cf59d 100644 --- a/deploy/cpp/include/paddlex/transforms.h +++ b/deploy/cpp/include/paddlex/transforms.h @@ -28,7 +28,10 @@ namespace PaddleX { -// Object for storing all preprocessed data +/* + * @brief + * This class represents object for storing all preprocessed data + * */ class ImageBlob { public: // Original image height and width @@ -45,21 +48,34 @@ class ImageBlob { std::vector im_data_; void clear() { - ori_im_size_.clear(); - new_im_size_.clear(); im_size_before_resize_.clear(); reshape_order_.clear(); im_data_.clear(); } }; -// Abstraction of preprocessing opration class +/* + * @brief + * Abstraction of preprocessing operation class + * */ class Transform { public: virtual void Init(const YAML::Node& item) = 0; + /* + * @brief + * This method executes preprocessing operation on image matrix, + * result will be returned at second parameter. + * @param im: single image matrix to be preprocessed + * @param data: the raw data of single image matrix after preprocessed + * @return true if transform successfully + * */ virtual bool Run(cv::Mat* im, ImageBlob* data) = 0; }; +/* + * @brief + * This class execute normalization operation on image matrix + * */ class Normalize : public Transform { public: virtual void Init(const YAML::Node& item) { @@ -74,6 +90,14 @@ class Normalize : public Transform { std::vector std_; }; +/* + * @brief + * This class execute resize by short operation on image matrix. At first, it resizes + * the short side of image matrix to specified length. Accordingly, the long side + * will be resized in the same proportion. If new length of long side exceeds max + * size, the long size will be resized to max size, and the short size will be + * resized in the same proportion + * */ class ResizeByShort : public Transform { public: virtual void Init(const YAML::Node& item) { @@ -92,6 +116,12 @@ class ResizeByShort : public Transform { int max_size_; }; +/* + * @brief + * This class execute resize by long operation on image matrix. At first, it resizes + * the long side of image matrix to specified length. Accordingly, the short side + * will be resized in the same proportion. + * */ class ResizeByLong : public Transform { public: virtual void Init(const YAML::Node& item) { @@ -103,13 +133,20 @@ class ResizeByLong : public Transform { int long_size_; }; +/* + * @brief + * This class execute resize operation on image matrix. It resizes width and height + * to specified length. + * */ class Resize : public Transform { public: virtual void Init(const YAML::Node& item) { + if (item["interp"].IsDefined()) { + interp_ = item["interp"].as(); + } if (item["target_size"].IsScalar()) { height_ = item["target_size"].as(); width_ = item["target_size"].as(); - interp_ = item["interp"].as(); } else if (item["target_size"].IsSequence()) { std::vector target_size = item["target_size"].as>(); width_ = target_size[0]; @@ -128,6 +165,11 @@ class Resize : public Transform { std::string interp_; }; +/* + * @brief + * This class execute center crop operation on image matrix. It crops the center + * of image matrix accroding to specified size. + * */ class CenterCrop : public Transform { public: virtual void Init(const YAML::Node& item) { @@ -147,6 +189,11 @@ class CenterCrop : public Transform { int width_; }; +/* + * @brief + * This class execute padding operation on image matrix. It makes border on edge + * of image matrix. + * */ class Padding : public Transform { public: virtual void Init(const YAML::Node& item) { @@ -175,7 +222,11 @@ class Padding : public Transform { int width_ = 0; int height_ = 0; }; - +/* + * @brief + * This class is transform operations manager. It stores all neccessary + * transform operations and run them in correct order. + * */ class Transforms { public: void Init(const YAML::Node& node, bool to_rgb = true); diff --git a/deploy/cpp/include/paddlex/visualize.h b/deploy/cpp/include/paddlex/visualize.h index 7a71f474d028795aa1dec3cd993f5480c0906ced..5a8e39a762ff5bbde966aff213f1751f520789e2 100644 --- a/deploy/cpp/include/paddlex/visualize.h +++ b/deploy/cpp/include/paddlex/visualize.h @@ -43,20 +43,55 @@ namespace PaddleX { -// Generate visualization colormap for each class +/* + * @brief + * Generate visualization colormap for each class + * + * @param number of class + * @return color map, the size of vector is 3 * num_class + * */ std::vector GenerateColorMap(int num_class); + +/* + * @brief + * Visualize the detection result + * + * @param img: initial image matrix + * @param results: the detection result + * @param labels: label map + * @param colormap: visualization color map + * @return visualized image matrix + * */ cv::Mat Visualize(const cv::Mat& img, const DetResult& results, const std::map& labels, const std::vector& colormap, float threshold = 0.5); +/* + * @brief + * Visualize the segmentation result + * + * @param img: initial image matrix + * @param results: the detection result + * @param labels: label map + * @param colormap: visualization color map + * @return visualized image matrix + * */ cv::Mat Visualize(const cv::Mat& img, const SegResult& result, const std::map& labels, const std::vector& colormap); +/* + * @brief + * generate save path for visualized image matrix + * + * @param save_dir: directory for saving visualized image matrix + * @param file_path: sourcen image file path + * @return path of saving visualized result + * */ std::string generate_save_path(const std::string& save_dir, const std::string& file_path); -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/cpp/scripts/build.sh b/deploy/cpp/scripts/build.sh index 74ab96a32466b6351def8f9abc3275954cdc1e06..e87d7bf4797f1833d88379df0587733958639b06 100644 --- a/deploy/cpp/scripts/build.sh +++ b/deploy/cpp/scripts/build.sh @@ -4,10 +4,10 @@ WITH_GPU=OFF WITH_MKL=ON # 是否集成 TensorRT(仅WITH_GPU=ON 有效) WITH_TENSORRT=OFF -# TensorRT 的路径 -TENSORRT_DIR=/path/to/TensorRT/ -# Paddle 预测库路径 -PADDLE_DIR=/docker/jiangjiajun/PaddleDetection/deploy/cpp/fluid_inference +# TensorRT 的路径,如果需要集成TensorRT,需修改为您实际安装的TensorRT路径 +TENSORRT_DIR=/root/projects/TensorRT/ +# Paddle 预测库路径, 请修改为您实际安装的预测库路径 +PADDLE_DIR=/root/projects/fluid_inference # Paddle 的预测库是否使用静态库来编译 # 使用TensorRT时,Paddle的预测库通常为动态库 WITH_STATIC_LIB=OFF @@ -16,7 +16,7 @@ CUDA_LIB=/usr/local/cuda/lib64 # CUDNN 的 lib 路径 CUDNN_LIB=/usr/local/cuda/lib64 -# 是否加载加密后的模型 +# 是否加载加密后的模型 WITH_ENCRYPTION=ON # 加密工具的路径, 如果使用自带预编译版本可不修改 sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具 diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index 90a4a4452b9e5f3eba1c0b4c7ab88f5b91e03971..bedd83b356baff41d7f9d16ac6de855e982332b2 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -11,9 +11,11 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#include +#include +#include +#include #include "include/paddlex/paddlex.h" - namespace PaddleX { void Model::create_predictor(const std::string& model_dir, @@ -21,22 +23,37 @@ void Model::create_predictor(const std::string& model_dir, bool use_trt, int gpu_id, std::string key) { - // 读取配置文件 - if (!load_config(model_dir)) { - std::cerr << "Parse file 'model.yml' failed!" << std::endl; - exit(-1); - } paddle::AnalysisConfig config; std::string model_file = model_dir + OS_PATH_SEP + "__model__"; std::string params_file = model_dir + OS_PATH_SEP + "__params__"; + std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml"; + std::string yaml_input = ""; #ifdef WITH_ENCRYPTION - if (key != ""){ + if (key != "") { model_file = model_dir + OS_PATH_SEP + "__model__.encrypted"; params_file = model_dir + OS_PATH_SEP + "__params__.encrypted"; - paddle_security_load_model(&config, key.c_str(), model_file.c_str(), params_file.c_str()); + yaml_file = model_dir + OS_PATH_SEP + "model.yml.encrypted"; + paddle_security_load_model( + &config, key.c_str(), model_file.c_str(), params_file.c_str()); + yaml_input = decrypt_file(yaml_file.c_str(), key.c_str()); } #endif - if (key == ""){ + if (yaml_input == "") { + // 读取配置文件 + std::ifstream yaml_fin(yaml_file); + yaml_fin.seekg(0, std::ios::end); + size_t yaml_file_size = yaml_fin.tellg(); + yaml_input.assign(yaml_file_size, ' '); + yaml_fin.seekg(0); + yaml_fin.read(&yaml_input[0], yaml_file_size); + } + // 读取配置文件内容 + if (!load_config(yaml_input)) { + std::cerr << "Parse file 'model.yml' failed!" << std::endl; + exit(-1); + } + + if (key == "") { config.SetModel(model_file, params_file); } if (use_gpu) { @@ -60,18 +77,17 @@ void Model::create_predictor(const std::string& model_dir, predictor_ = std::move(CreatePaddlePredictor(config)); } -bool Model::load_config(const std::string& model_dir) { - std::string yaml_file = model_dir + OS_PATH_SEP + "model.yml"; - YAML::Node config = YAML::LoadFile(yaml_file); +bool Model::load_config(const std::string& yaml_input) { + YAML::Node config = YAML::Load(yaml_input); type = config["_Attributes"]["model_type"].as(); name = config["Model"].as(); std::string version = config["version"].as(); if (version[0] == '0') { - std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, deployment " - << "cannot be done, please refer to " - << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/tutorials/deploy/upgrade_version.md " - << "to transfer version." - << std::endl; + std::cerr << "[Init] Version of the loaded model is lower than 1.0.0, " + << "deployment cannot be done, please refer to " + << "https://github.com/PaddlePaddle/PaddleX/blob/develop/docs" + << "/tutorials/deploy/upgrade_version.md " + << "to transfer version." << std::endl; return false; } bool to_rgb = true; @@ -104,17 +120,29 @@ bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) { return true; } +// use openmp +bool Model::preprocess(const std::vector& input_im_batch, + std::vector* blob_batch, + int thread_num) { + int batch_size = input_im_batch.size(); + bool success = true; + thread_num = std::min(thread_num, batch_size); + #pragma omp parallel for num_threads(thread_num) + for (int i = 0; i < input_im_batch.size(); ++i) { + cv::Mat im = input_im_batch[i].clone(); + if (!transforms_.Run(&im, &(*blob_batch)[i])) { + success = false; + } + } + return success; +} + bool Model::predict(const cv::Mat& im, ClsResult* result) { inputs_.clear(); if (type == "detector") { std::cerr << "Loading model is a 'detector', DetResult should be passed to " "function predict()!" - << std::endl; - return false; - } else if (type == "segmenter") { - std::cerr << "Loading model is a 'segmenter', SegResult should be passed " - "to function predict()!" - << std::endl; + "to function predict()!" << std::endl; return false; } // 处理输入图像 @@ -144,20 +172,80 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) { result->category_id = std::distance(std::begin(outputs_), ptr); result->score = *ptr; result->category = labels[result->category_id]; + return true; +} + +bool Model::predict(const std::vector& im_batch, + std::vector* results, + int thread_num) { + for (auto& inputs : inputs_batch_) { + inputs.clear(); + } + if (type == "detector") { + std::cerr << "Loading model is a 'detector', DetResult should be passed to " + "function predict()!" << std::endl; + return false; + } else if (type == "segmenter") { + std::cerr << "Loading model is a 'segmenter', SegResult should be passed " + "to function predict()!" << std::endl; + return false; + } + inputs_batch_.assign(im_batch.size(), ImageBlob()); + // 处理输入图像 + if (!preprocess(im_batch, &inputs_batch_, thread_num)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + // 使用加载的模型进行预测 + int batch_size = im_batch.size(); + auto in_tensor = predictor_->GetInputTensor("image"); + int h = inputs_batch_[0].new_im_size_[0]; + int w = inputs_batch_[0].new_im_size_[1]; + in_tensor->Reshape({batch_size, 3, h, w}); + std::vector inputs_data(batch_size * 3 * h * w); + for (int i = 0; i < batch_size; ++i) { + std::copy(inputs_batch_[i].im_data_.begin(), + inputs_batch_[i].im_data_.end(), + inputs_data.begin() + i * 3 * h * w); + } + in_tensor->copy_from_cpu(inputs_data.data()); + // in_tensor->copy_from_cpu(inputs_.im_data_.data()); + predictor_->ZeroCopyRun(); + // 取出模型的输出结果 + auto output_names = predictor_->GetOutputNames(); + auto output_tensor = predictor_->GetOutputTensor(output_names[0]); + std::vector output_shape = output_tensor->shape(); + int size = 1; + for (const auto& i : output_shape) { + size *= i; + } + outputs_.resize(size); + output_tensor->copy_to_cpu(outputs_.data()); + // 对模型输出结果进行后处理 + int single_batch_size = size / batch_size; + for (int i = 0; i < batch_size; ++i) { + auto start_ptr = std::begin(outputs_); + auto end_ptr = std::begin(outputs_); + std::advance(start_ptr, i * single_batch_size); + std::advance(end_ptr, (i + 1) * single_batch_size); + auto ptr = std::max_element(start_ptr, end_ptr); + (*results)[i].category_id = std::distance(start_ptr, ptr); + (*results)[i].score = *ptr; + (*results)[i].category = labels[(*results)[i].category_id]; + } + return true; } bool Model::predict(const cv::Mat& im, DetResult* result) { - result->clear(); inputs_.clear(); + result->clear(); if (type == "classifier") { std::cerr << "Loading model is a 'classifier', ClsResult should be passed " - "to function predict()!" - << std::endl; + "to function predict()!" << std::endl; return false; } else if (type == "segmenter") { std::cerr << "Loading model is a 'segmenter', SegResult should be passed " - "to function predict()!" - << std::endl; + "to function predict()!" << std::endl; return false; } @@ -172,6 +260,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { auto im_tensor = predictor_->GetInputTensor("image"); im_tensor->Reshape({1, 3, h, w}); im_tensor->copy_from_cpu(inputs_.im_data_.data()); + if (name == "YOLOv3") { auto im_size_tensor = predictor_->GetInputTensor("im_size"); im_size_tensor->Reshape({1, 2}); @@ -247,6 +336,181 @@ bool Model::predict(const cv::Mat& im, DetResult* result) { static_cast(box->coordinate[3])}; } } + return true; +} + +bool Model::predict(const std::vector& im_batch, + std::vector* result, + int thread_num) { + for (auto& inputs : inputs_batch_) { + inputs.clear(); + } + if (type == "classifier") { + std::cerr << "Loading model is a 'classifier', ClsResult should be passed " + "to function predict()!" << std::endl; + return false; + } else if (type == "segmenter") { + std::cerr << "Loading model is a 'segmenter', SegResult should be passed " + "to function predict()!" << std::endl; + return false; + } + + inputs_batch_.assign(im_batch.size(), ImageBlob()); + int batch_size = im_batch.size(); + // 处理输入图像 + if (!preprocess(im_batch, &inputs_batch_, thread_num)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + // 对RCNN类模型做批量padding + if (batch_size > 1) { + if (name == "FasterRCNN" || name == "MaskRCNN") { + int max_h = -1; + int max_w = -1; + for (int i = 0; i < batch_size; ++i) { + max_h = std::max(max_h, inputs_batch_[i].new_im_size_[0]); + max_w = std::max(max_w, inputs_batch_[i].new_im_size_[1]); + // std::cout << "(" << inputs_batch_[i].new_im_size_[0] + // << ", " << inputs_batch_[i].new_im_size_[1] + // << ")" << std::endl; + } + thread_num = std::min(thread_num, batch_size); + #pragma omp parallel for num_threads(thread_num) + for (int i = 0; i < batch_size; ++i) { + int h = inputs_batch_[i].new_im_size_[0]; + int w = inputs_batch_[i].new_im_size_[1]; + int c = im_batch[i].channels(); + if (max_h != h || max_w != w) { + std::vector temp_buffer(c * max_h * max_w); + float* temp_ptr = temp_buffer.data(); + float* ptr = inputs_batch_[i].im_data_.data(); + for (int cur_channel = c - 1; cur_channel >= 0; --cur_channel) { + int ori_pos = cur_channel * h * w + (h - 1) * w; + int des_pos = cur_channel * max_h * max_w + (h - 1) * max_w; + int last_pos = cur_channel * h * w; + for (; ori_pos >= last_pos; ori_pos -= w, des_pos -= max_w) { + memcpy(temp_ptr + des_pos, ptr + ori_pos, w * sizeof(float)); + } + } + inputs_batch_[i].im_data_.swap(temp_buffer); + inputs_batch_[i].new_im_size_[0] = max_h; + inputs_batch_[i].new_im_size_[1] = max_w; + } + } + } + } + int h = inputs_batch_[0].new_im_size_[0]; + int w = inputs_batch_[0].new_im_size_[1]; + auto im_tensor = predictor_->GetInputTensor("image"); + im_tensor->Reshape({batch_size, 3, h, w}); + std::vector inputs_data(batch_size * 3 * h * w); + for (int i = 0; i < batch_size; ++i) { + std::copy(inputs_batch_[i].im_data_.begin(), + inputs_batch_[i].im_data_.end(), + inputs_data.begin() + i * 3 * h * w); + } + im_tensor->copy_from_cpu(inputs_data.data()); + if (name == "YOLOv3") { + auto im_size_tensor = predictor_->GetInputTensor("im_size"); + im_size_tensor->Reshape({batch_size, 2}); + std::vector inputs_data_size(batch_size * 2); + for (int i = 0; i < batch_size; ++i) { + std::copy(inputs_batch_[i].ori_im_size_.begin(), + inputs_batch_[i].ori_im_size_.end(), + inputs_data_size.begin() + 2 * i); + } + im_size_tensor->copy_from_cpu(inputs_data_size.data()); + } else if (name == "FasterRCNN" || name == "MaskRCNN") { + auto im_info_tensor = predictor_->GetInputTensor("im_info"); + auto im_shape_tensor = predictor_->GetInputTensor("im_shape"); + im_info_tensor->Reshape({batch_size, 3}); + im_shape_tensor->Reshape({batch_size, 3}); + + std::vector im_info(3 * batch_size); + std::vector im_shape(3 * batch_size); + for (int i = 0; i < batch_size; ++i) { + float ori_h = static_cast(inputs_batch_[i].ori_im_size_[0]); + float ori_w = static_cast(inputs_batch_[i].ori_im_size_[1]); + float new_h = static_cast(inputs_batch_[i].new_im_size_[0]); + float new_w = static_cast(inputs_batch_[i].new_im_size_[1]); + im_info[i * 3] = new_h; + im_info[i * 3 + 1] = new_w; + im_info[i * 3 + 2] = inputs_batch_[i].scale; + im_shape[i * 3] = ori_h; + im_shape[i * 3 + 1] = ori_w; + im_shape[i * 3 + 2] = 1.0; + } + im_info_tensor->copy_from_cpu(im_info.data()); + im_shape_tensor->copy_from_cpu(im_shape.data()); + } + // 使用加载的模型进行预测 + predictor_->ZeroCopyRun(); + + // 读取所有box + std::vector output_box; + auto output_names = predictor_->GetOutputNames(); + auto output_box_tensor = predictor_->GetOutputTensor(output_names[0]); + std::vector output_box_shape = output_box_tensor->shape(); + int size = 1; + for (const auto& i : output_box_shape) { + size *= i; + } + output_box.resize(size); + output_box_tensor->copy_to_cpu(output_box.data()); + if (size < 6) { + std::cerr << "[WARNING] There's no object detected." << std::endl; + return true; + } + auto lod_vector = output_box_tensor->lod(); + int num_boxes = size / 6; + // 解析预测框box + for (int i = 0; i < lod_vector[0].size() - 1; ++i) { + for (int j = lod_vector[0][i]; j < lod_vector[0][i + 1]; ++j) { + Box box; + box.category_id = static_cast(round(output_box[j * 6])); + box.category = labels[box.category_id]; + box.score = output_box[j * 6 + 1]; + float xmin = output_box[j * 6 + 2]; + float ymin = output_box[j * 6 + 3]; + float xmax = output_box[j * 6 + 4]; + float ymax = output_box[j * 6 + 5]; + float w = xmax - xmin + 1; + float h = ymax - ymin + 1; + box.coordinate = {xmin, ymin, w, h}; + (*result)[i].boxes.push_back(std::move(box)); + } + } + + // 实例分割需解析mask + if (name == "MaskRCNN") { + std::vector output_mask; + auto output_mask_tensor = predictor_->GetOutputTensor(output_names[1]); + std::vector output_mask_shape = output_mask_tensor->shape(); + int masks_size = 1; + for (const auto& i : output_mask_shape) { + masks_size *= i; + } + int mask_pixels = output_mask_shape[2] * output_mask_shape[3]; + int classes = output_mask_shape[1]; + output_mask.resize(masks_size); + output_mask_tensor->copy_to_cpu(output_mask.data()); + int mask_idx = 0; + for (int i = 0; i < lod_vector[0].size() - 1; ++i) { + (*result)[i].mask_resolution = output_mask_shape[2]; + for (int j = 0; j < (*result)[i].boxes.size(); ++j) { + Box* box = &(*result)[i].boxes[j]; + int category_id = box->category_id; + auto begin_mask = output_mask.begin() + + (mask_idx * classes + category_id) * mask_pixels; + auto end_mask = begin_mask + mask_pixels; + box->mask.data.assign(begin_mask, end_mask); + box->mask.shape = {static_cast(box->coordinate[2]), + static_cast(box->coordinate[3])}; + mask_idx++; + } + } + } + return true; } bool Model::predict(const cv::Mat& im, SegResult* result) { @@ -254,13 +518,11 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { inputs_.clear(); if (type == "classifier") { std::cerr << "Loading model is a 'classifier', ClsResult should be passed " - "to function predict()!" - << std::endl; + "to function predict()!" << std::endl; return false; } else if (type == "detector") { std::cerr << "Loading model is a 'detector', DetResult should be passed to " - "function predict()!" - << std::endl; + "function predict()!" << std::endl; return false; } @@ -288,6 +550,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { size *= i; result->label_map.shape.push_back(i); } + result->label_map.data.resize(size); output_label_tensor->copy_to_cpu(result->label_map.data.data()); @@ -299,6 +562,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { size *= i; result->score_map.shape.push_back(i); } + result->score_map.data.resize(size); output_score_tensor->copy_to_cpu(result->score_map.data.data()); @@ -325,8 +589,8 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { inputs_.im_size_before_resize_.pop_back(); auto padding_w = before_shape[0]; auto padding_h = before_shape[1]; - mask_label = mask_label(cv::Rect(0, 0, padding_w, padding_h)); - mask_score = mask_score(cv::Rect(0, 0, padding_w, padding_h)); + mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w)); + mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w)); } else if (*iter == "resize") { auto before_shape = inputs_.im_size_before_resize_[len_postprocess - idx]; inputs_.im_size_before_resize_.pop_back(); @@ -343,7 +607,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { cv::Size(resize_h, resize_w), 0, 0, - cv::INTER_NEAREST); + cv::INTER_LINEAR); } ++idx; } @@ -353,6 +617,156 @@ bool Model::predict(const cv::Mat& im, SegResult* result) { result->score_map.data.assign(mask_score.begin(), mask_score.end()); result->score_map.shape = {mask_score.rows, mask_score.cols}; + return true; +} + +bool Model::predict(const std::vector& im_batch, + std::vector* result, + int thread_num) { + for (auto& inputs : inputs_batch_) { + inputs.clear(); + } + if (type == "classifier") { + std::cerr << "Loading model is a 'classifier', ClsResult should be passed " + "to function predict()!" << std::endl; + return false; + } else if (type == "detector") { + std::cerr << "Loading model is a 'detector', DetResult should be passed to " + "function predict()!" << std::endl; + return false; + } + + // 处理输入图像 + inputs_batch_.assign(im_batch.size(), ImageBlob()); + if (!preprocess(im_batch, &inputs_batch_, thread_num)) { + std::cerr << "Preprocess failed!" << std::endl; + return false; + } + + int batch_size = im_batch.size(); + (*result).clear(); + (*result).resize(batch_size); + int h = inputs_batch_[0].new_im_size_[0]; + int w = inputs_batch_[0].new_im_size_[1]; + auto im_tensor = predictor_->GetInputTensor("image"); + im_tensor->Reshape({batch_size, 3, h, w}); + std::vector inputs_data(batch_size * 3 * h * w); + for (int i = 0; i < batch_size; ++i) { + std::copy(inputs_batch_[i].im_data_.begin(), + inputs_batch_[i].im_data_.end(), + inputs_data.begin() + i * 3 * h * w); + } + im_tensor->copy_from_cpu(inputs_data.data()); + // im_tensor->copy_from_cpu(inputs_.im_data_.data()); + + // 使用加载的模型进行预测 + predictor_->ZeroCopyRun(); + + // 获取预测置信度,经过argmax后的labelmap + auto output_names = predictor_->GetOutputNames(); + auto output_label_tensor = predictor_->GetOutputTensor(output_names[0]); + std::vector output_label_shape = output_label_tensor->shape(); + int size = 1; + for (const auto& i : output_label_shape) { + size *= i; + } + + std::vector output_labels(size, 0); + output_label_tensor->copy_to_cpu(output_labels.data()); + auto output_labels_iter = output_labels.begin(); + + int single_batch_size = size / batch_size; + for (int i = 0; i < batch_size; ++i) { + (*result)[i].label_map.data.resize(single_batch_size); + (*result)[i].label_map.shape.push_back(1); + for (int j = 1; j < output_label_shape.size(); ++j) { + (*result)[i].label_map.shape.push_back(output_label_shape[j]); + } + std::copy(output_labels_iter + i * single_batch_size, + output_labels_iter + (i + 1) * single_batch_size, + (*result)[i].label_map.data.data()); + } + + // 获取预测置信度scoremap + auto output_score_tensor = predictor_->GetOutputTensor(output_names[1]); + std::vector output_score_shape = output_score_tensor->shape(); + size = 1; + for (const auto& i : output_score_shape) { + size *= i; + } + + std::vector output_scores(size, 0); + output_score_tensor->copy_to_cpu(output_scores.data()); + auto output_scores_iter = output_scores.begin(); + + int single_batch_score_size = size / batch_size; + for (int i = 0; i < batch_size; ++i) { + (*result)[i].score_map.data.resize(single_batch_score_size); + (*result)[i].score_map.shape.push_back(1); + for (int j = 1; j < output_score_shape.size(); ++j) { + (*result)[i].score_map.shape.push_back(output_score_shape[j]); + } + std::copy(output_scores_iter + i * single_batch_score_size, + output_scores_iter + (i + 1) * single_batch_score_size, + (*result)[i].score_map.data.data()); + } + + // 解析输出结果到原图大小 + for (int i = 0; i < batch_size; ++i) { + std::vector label_map((*result)[i].label_map.data.begin(), + (*result)[i].label_map.data.end()); + cv::Mat mask_label((*result)[i].label_map.shape[1], + (*result)[i].label_map.shape[2], + CV_8UC1, + label_map.data()); + + cv::Mat mask_score((*result)[i].score_map.shape[2], + (*result)[i].score_map.shape[3], + CV_32FC1, + (*result)[i].score_map.data.data()); + int idx = 1; + int len_postprocess = inputs_batch_[i].im_size_before_resize_.size(); + for (std::vector::reverse_iterator iter = + inputs_batch_[i].reshape_order_.rbegin(); + iter != inputs_batch_[i].reshape_order_.rend(); + ++iter) { + if (*iter == "padding") { + auto before_shape = + inputs_batch_[i].im_size_before_resize_[len_postprocess - idx]; + inputs_batch_[i].im_size_before_resize_.pop_back(); + auto padding_w = before_shape[0]; + auto padding_h = before_shape[1]; + mask_label = mask_label(cv::Rect(0, 0, padding_h, padding_w)); + mask_score = mask_score(cv::Rect(0, 0, padding_h, padding_w)); + } else if (*iter == "resize") { + auto before_shape = + inputs_batch_[i].im_size_before_resize_[len_postprocess - idx]; + inputs_batch_[i].im_size_before_resize_.pop_back(); + auto resize_w = before_shape[0]; + auto resize_h = before_shape[1]; + cv::resize(mask_label, + mask_label, + cv::Size(resize_h, resize_w), + 0, + 0, + cv::INTER_NEAREST); + cv::resize(mask_score, + mask_score, + cv::Size(resize_h, resize_w), + 0, + 0, + cv::INTER_LINEAR); + } + ++idx; + } + (*result)[i].label_map.data.assign(mask_label.begin(), + mask_label.end()); + (*result)[i].label_map.shape = {mask_label.rows, mask_label.cols}; + (*result)[i].score_map.data.assign(mask_score.begin(), + mask_score.end()); + (*result)[i].score_map.shape = {mask_score.rows, mask_score.cols}; + } + return true; } -} // namespce of PaddleX +} // namespace PaddleX diff --git a/deploy/cpp/src/transforms.cpp b/deploy/cpp/src/transforms.cpp index 9224367d3522ebe4e323a40a1af92be7cfeae9d3..99a73ee7345bbc8cc672d1c42627a9326ded0cf7 100644 --- a/deploy/cpp/src/transforms.cpp +++ b/deploy/cpp/src/transforms.cpp @@ -95,11 +95,13 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) { if (width_ > 1 & height_ > 1) { padding_w = width_ - im->cols; padding_h = height_ - im->rows; - } else if (coarsest_stride_ > 1) { + } else if (coarsest_stride_ >= 1) { + int h = im->rows; + int w = im->cols; padding_h = - ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows; + ceil(h * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows; padding_w = - ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols; + ceil(w * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols; } if (padding_h < 0 || padding_w < 0) { @@ -219,4 +221,5 @@ bool Transforms::Run(cv::Mat* im, ImageBlob* data) { } return true; } + } // namespace PaddleX diff --git a/deploy/cpp/src/visualize.cpp b/deploy/cpp/src/visualize.cpp index 6ec09fd1c2b7a342ea3d31e784a80033d80f1014..1511887f097e20826f13c8c1f098ceea4efc0b5b 100644 --- a/deploy/cpp/src/visualize.cpp +++ b/deploy/cpp/src/visualize.cpp @@ -145,4 +145,4 @@ std::string generate_save_path(const std::string& save_dir, std::string image_name(file_path.substr(pos + 1)); return save_dir + OS_PATH_SEP + image_name; } -} // namespace of PaddleX +} // namespace PaddleX diff --git a/docs/FAQ.md b/docs/FAQ.md index b120ebd10ed791c65c3f65e611c5b45da2a9211f..e25faab5ad9e230f34f1790db0dcf24fba3328e6 100755 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -13,7 +13,7 @@ > 可以使用模型裁剪,参考文档[模型裁剪使用教程](slim/prune.md),通过调整裁剪参数,可以控制模型裁剪后的大小,在实际实验中,如VOC检测数据,使用yolov3-mobilenet,原模型大小为XXM,裁剪后为XX M,精度基本保持不变 ## 4. 如何配置训练时GPU的卡数 -> 通过在终端export环境变量,或在Python代码中设置,可参考文档[CPU/多卡GPU训练](gpu_configure.md) +> 通过在终端export环境变量,或在Python代码中设置,可参考文档[CPU/多卡GPU训练](appendix/gpu_configure.md) ## 5. 想将之前训练的模型参数上继续训练 > 在训练调用`train`接口时,将`pretrain_weights`设为之前的模型保存路径即可 @@ -52,7 +52,7 @@ > 1. 用户自行训练时,如不确定迭代的轮数,可以将轮数设高一些,同时注意设置`save_interval_epochs`,这样模型迭代每间隔相应轮数就会在验证集上进行评估和保存,可以根据不同轮数模型在验证集上的评估指标,判断模型是否已经收敛,若模型已收敛,可以自行结束训练进程 > ## 9. 只有CPU,没有GPU,如何提升训练速度 -> 当没有GPU时,可以根据自己的CPU配置,选择是否使用多CPU进行训练,具体配置方式可以参考文档[多卡CPU/GPU训练](gpu_configure.md) +> 当没有GPU时,可以根据自己的CPU配置,选择是否使用多CPU进行训练,具体配置方式可以参考文档[多卡CPU/GPU训练](appendix/gpu_configure.md) > ## 10. 电脑不能联网,训练时因为下载预训练模型失败,如何解决 > 可以预先通过其它方式准备好预训练模型,然后训练时自定义`pretrain_weights`即可,可参考文档[无联网模型训练](how_to_offline_run.md) @@ -61,8 +61,8 @@ > 1.可以按照9的方式来解决这个问题 > 2.每次训练前都设定`paddlex.pretrain_dir`路径,如设定`paddlex.pretrain_dir='/usrname/paddlex`,如此下载完的预训练模型会存放至`/usrname/paddlex`目录下,而已经下载在该目录的模型也不会再次重复下载 -## 12. 程序启动时提示"Failed to execute script PaddleX",如何解决? +## 12. PaddleX GUI启动时提示"Failed to execute script PaddleX",如何解决? > 1. 请检查目标机器上PaddleX程序所在路径是否包含中文。目前暂不支持中文路径,请尝试将程序移动到英文目录。 > 2. 如果您的系统是Windows 7或者Windows Server 2012时,原因是缺少MFPlat.DLL/MF.dll/MFReadWrite.dll等OpenCV依赖的DLL,请按如下方式安装桌面体验:通过“我的电脑”-->“属性”-->"管理"打开服务器管理器,点击右上角“管理”选择“添加角色和功能”。点击“服务器选择”-->“功能”,拖动滚动条到最下端,点开“用户界面和基础结构”,勾选“桌面体验”后点击“安装”,等安装完成尝试再次运行PaddleX。 > 3. 请检查目标机器上是否有其他的PaddleX程序或者进程在运行中,如有请退出或者重启机器看是否解决 -> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功 \ No newline at end of file +> 4. 请确认运行程序的用户是否有管理员权限,如非管理员权限用户请尝试使用管理员运行看是否成功 diff --git a/docs/apis/datasets/detection.md b/docs/apis/datasets/detection.md index e660d7edfa9cfc41582902b92bcf0b0977766222..a32b6be5de6246ef6e28ebe376ded7e3faf82ff7 100755 --- a/docs/apis/datasets/detection.md +++ b/docs/apis/datasets/detection.md @@ -8,7 +8,7 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, > 仅用于**目标检测**。读取PascalVOC格式的检测数据集,并对样本进行相应的处理。PascalVOC数据集格式的介绍可查看文档:[数据集格式说明](../datasets.md) -> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_mobilenetv1.py#L29) +> 示例:[代码文件](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/train/detection/yolov3_darknet53.py#L29) > **参数** @@ -21,6 +21,16 @@ paddlex.datasets.VOCDetection(data_dir, file_list, label_list, transforms=None, > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义VOCDetection类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** + +> > > * **image_dir** (str): 背景图片所在的目录路径。 + ## CocoDetection类 ``` @@ -41,6 +51,16 @@ paddlex.datasets.CocoDetection(data_dir, ann_file, transforms=None, num_workers= > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 > > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义CocoDetection类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** + +> > > * **image_dir** (str): 背景图片所在的目录路径。 + ## EasyDataDet类 ``` @@ -59,5 +79,15 @@ paddlex.datasets.EasyDataDet(data_dir, file_list, label_list, transforms=None, n > > * **num_workers** (int|str):数据集中样本在预处理过程中的线程或进程数。默认为'auto'。当设为'auto'时,根据系统的实际CPU核数设置`num_workers`: 如果CPU核数的一半大于8,则`num_workers`为8,否则为CPU核数的一半。 > > * **buffer_size** (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。 > > * **parallel_method** (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread'线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 -> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 +> > * **shuffle** (bool): 是否需要对数据集中样本打乱顺序。默认为False。 + + +> 【可选】支持在训练过程中加入无目标真值的背景图片来减少背景误检,定义EasyDataDet类后调用其成员函数`add_negative_samples`添加背景图片即可: +> ``` +> add_negative_samples(image_dir) +> ``` +> > 示例:[代码](../../tuning_strategy/detection/negatives_training.html#id4) + +> > **参数** +> > > * **image_dir** (str): 背景图片所在的目录路径。 diff --git a/docs/apis/models/classification.md b/docs/apis/models/classification.md index 82b459d8281b1e9bc9d1f7abdd48fddb16473c21..b70b555a7007b77851af22ddd4a775a4b3a8f93b 100755 --- a/docs/apis/models/classification.md +++ b/docs/apis/models/classification.md @@ -80,7 +80,7 @@ predict(self, img_file, transforms=None, topk=5) ## 其它分类器类 -PaddleX提供了共计22种分类器,所有分类器均提供同`ResNet50`相同的训练`train`,评估`evaluate`和预测`predict`接口,各模型效果可参考[模型库](../appendix/model_zoo.md)。 +PaddleX提供了共计22种分类器,所有分类器均提供同`ResNet50`相同的训练`train`,评估`evaluate`和预测`predict`接口,各模型效果可参考[模型库](https://paddlex.readthedocs.io/zh_CN/latest/appendix/model_zoo.html)。 ### ResNet18 ```python diff --git a/docs/apis/models/detection.md b/docs/apis/models/detection.md index dbd3130b115abc0d81a53cbc4aad5d0d08d73734..f76e5598636f6c8ac94b90acca7fe1c846708077 100755 --- a/docs/apis/models/detection.md +++ b/docs/apis/models/detection.md @@ -42,7 +42,7 @@ train(self, num_epochs, train_dataset, train_batch_size=8, eval_dataset=None, sa > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为20。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。 -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的学习率。默认为1.0/8000。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为1000。 @@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, sa > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。 -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18的COCO预训练模型);为None,则不使用预训练模型。默认为None。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.0025。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。 diff --git a/docs/apis/models/instance_segmentation.md b/docs/apis/models/instance_segmentation.md index e3f3f720adda70d7649234a96dca28dc7133bc4b..72d008b2252a0df73648941d8dbee9d6f8a8764a 100755 --- a/docs/apis/models/instance_segmentation.md +++ b/docs/apis/models/instance_segmentation.md @@ -34,7 +34,7 @@ train(self, num_epochs, train_dataset, train_batch_size=1, eval_dataset=None, sa > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认值为'output'。 -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供ResNet18和HRNet_W18的COCO预训练模型);若为None,则不使用预训练模型。默认为None。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器:fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认为0.00125。 > > - **warmup_steps** (int): 默认优化器进行warmup过程的步数。默认为500。 diff --git a/docs/apis/models/semantic_segmentation.md b/docs/apis/models/semantic_segmentation.md index 2321b45c61e4b44e8620543cab1711671929c5f6..3ff66337fe64b35f29a2a7985cea040fcb233d82 100755 --- a/docs/apis/models/semantic_segmentation.md +++ b/docs/apis/models/semantic_segmentation.md @@ -12,7 +12,7 @@ paddlex.seg.DeepLabv3p(num_classes=2, backbone='MobileNetV2_x1.0', output_stride > **参数** > > - **num_classes** (int): 类别数。 -> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],'MobileNetV2_x1.0'。 +> > - **backbone** (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0'],默认值为'MobileNetV2_x1.0'。 > > - **output_stride** (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。默认16。 > > - **aspp_with_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。 > > - **decoder_use_sep_conv** (bool): decoder模块是否采用separable convolutions。默认True。 @@ -40,12 +40,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认'output' -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO',则自动下载在COCO数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的COCO预训练模型);若为字符串'CITYSCAPES',则自动下载在CITYSCAPES数据集上预训练的模型权重(注意:暂未提供Xception41、MobileNetV2_x0.25、MobileNetV2_x0.5、MobileNetV2_x1.5、MobileNetV2_x2.0的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 -> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **early_stop** (bool): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 @@ -129,7 +129,7 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 -> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 @@ -186,10 +186,10 @@ paddlex.seg.HRNet(num_classes=2, width=18, use_bce_loss=False, use_dice_loss=Fal > **参数** > > - **num_classes** (int): 类别数。 -> > - **width** (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。 +> > - **width** (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。'18_small_v1'是18的轻量级版本。 > > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 > > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 -> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。 +> > - **class_weight** (list|str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。 > > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。 ### train 训练接口 @@ -209,12 +209,12 @@ train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, ev > > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 > > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 > > - **save_dir** (str): 模型保存路径。默认'output' -> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET'。 +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET',则自动下载在ImageNet数据集上预训练的模型权重;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重(注意:目前仅提供`width`取值为18的CITYSCAPES预训练模型);若为None,则不使用预训练模型。默认'IMAGENET'。 > > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 > > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 > > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 > > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 -> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 > > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 > > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 > > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 @@ -258,3 +258,88 @@ predict(self, im_file, transforms=None): > **返回值** > > > > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 + + +## FastSCNN类 + +```python +paddlex.seg.FastSCNN(num_classes=2, use_bce_loss=False, use_dice_loss=False, class_weight=None, ignore_index=255, multi_loss_weight=[1.0]) +``` + +> 构建FastSCNN分割器。 + +> **参数** + +> > - **num_classes** (int): 类别数。 +> > - **use_bce_loss** (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 +> > - **use_dice_loss** (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 +> > - **class_weight** (list/str): 交叉熵损失函数各类损失的权重。当`class_weight`为list的时候,长度应为`num_classes`。当`class_weight`为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1,即平时使用的交叉熵损失函数。 +> > - **ignore_index** (int): label上忽略的值,label为`ignore_index`的像素不参与损失函数的计算。默认255。 +> > - **multi_loss_weight** (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列,fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重,lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。 + +### train 训练接口 + +```python +train(self, num_epochs, train_dataset, train_batch_size=2, eval_dataset=None, eval_batch_size=1, save_interval_epochs=1, log_interval_steps=2, save_dir='output', pretrain_weights='CITYSCAPES', optimizer=None, learning_rate=0.01, lr_decay_power=0.9, use_vdl=False, sensitivities_file=None, eval_metric_loss=0.05, early_stop=False, early_stop_patience=5, resume_checkpoint=None): +``` + +> FastSCNN模型训练接口。 + +> **参数** +> > +> > - **num_epochs** (int): 训练迭代轮数。 +> > - **train_dataset** (paddlex.datasets): 训练数据读取器。 +> > - **train_batch_size** (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **save_interval_epochs** (int): 模型保存间隔(单位:迭代轮数)。默认为1。 +> > - **log_interval_steps** (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 +> > - **save_dir** (str): 模型保存路径。默认'output' +> > - **pretrain_weights** (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES',则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'CITYSCAPES'。 +> > - **optimizer** (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 +> > - **learning_rate** (float): 默认优化器的初始学习率。默认0.01。 +> > - **lr_decay_power** (float): 默认优化器学习率衰减指数。默认0.9。 +> > - **use_vdl** (bool): 是否使用VisualDL进行可视化。默认False。 +> > - **sensitivities_file** (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 +> > - **eval_metric_loss** (float): 可容忍的精度损失。默认为0.05。 +> > - **early_stop** (float): 是否使用提前终止训练策略。默认值为False。 +> > - **early_stop_patience** (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内连续下降或持平,则终止训练。默认值为5。 +> > - **resume_checkpoint** (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + +#### evaluate 评估接口 + +``` +evaluate(self, eval_dataset, batch_size=1, epoch_id=None, return_details=False): +``` + +> FastSCNN模型评估接口。 + +> **参数** +> > +> > - **eval_dataset** (paddlex.datasets): 评估数据读取器。 +> > - **batch_size** (int): 评估时的batch大小。默认1。 +> > - **epoch_id** (int): 当前评估模型所在的训练轮数。 +> > - **return_details** (bool): 是否返回详细信息。默认False。 + +> **返回值** +> > +> > - **dict**: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 +> > 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 +> > - **tuple** (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), +> > 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 + +#### predict 预测接口 + +``` +predict(self, im_file, transforms=None): +``` + +> FastSCNN模型预测接口。需要注意的是,只有在训练过程中定义了eval_dataset,模型在保存时才会将预测时的图像处理流程保存在`UNet.test_transforms`和`UNet.eval_transforms`中。如未在训练时定义eval_dataset,那在调用预测`predict`接口时,用户需要再重新定义test_transforms传入给`predict`接口。 + +> **参数** +> > +> > - **img_file** (str): 预测图像路径。 +> > - **transforms** (paddlex.seg.transforms): 数据预处理操作。 + +> **返回值** +> > +> > - **dict**: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图,像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes)。 diff --git a/docs/apis/transforms/seg_transforms.md b/docs/apis/transforms/seg_transforms.md index 1fb2b561e4818edad72fd97f43029de079b355b3..264af5c472cb824865188a5386a513e5a00fe0ba 100755 --- a/docs/apis/transforms/seg_transforms.md +++ b/docs/apis/transforms/seg_transforms.md @@ -200,7 +200,7 @@ ComposedSegTransforms.add_augmenters(augmenters) import paddlex as pdx from paddlex.seg import transforms train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[512, 512]) -eval_transforms = transforms.ComposedYOLOTransforms(mode='eval') +eval_transforms = transforms.ComposedSegTransforms(mode='eval') # 添加数据增强 import imgaug.augmenters as iaa diff --git a/docs/apis/visualize.md b/docs/apis/visualize.md index 069913274580f1e8bd5fdb5ee6e6e642c977b3ce..2cdc96844758128545ffe3a1ebf815476cae1090 100755 --- a/docs/apis/visualize.md +++ b/docs/apis/visualize.md @@ -146,10 +146,11 @@ paddlex.interpret.normlime(img_file, dataset=None, num_samples=3000, batch_size=50, - save_dir='./') + save_dir='./', + normlime_weights_file=None) ``` 使用NormLIME算法将模型预测结果的可解释性可视化。 -NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系。 +NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式:使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间;然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定。 **注意:** 可解释性结果可视化目前只支持分类模型。 @@ -159,9 +160,27 @@ NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会 >* **dataset** (paddlex.datasets): 数据集读取器,默认为None。 >* **num_samples** (int): LIME用于学习线性模型的采样数,默认为3000。 >* **batch_size** (int): 预测数据batch大小,默认为50。 ->* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 +>* **save_dir** (str): 可解释性可视化结果(保存为png格式文件)和中间文件存储路径。 +>* **normlime_weights_file** (str): NormLIME初始化文件名,若不存在,则计算一次,保存于该路径;若存在,则直接载入。 -**注意:** dataset`读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。 +**注意:** dataset`读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。NormLIME可解释性结果可视化目前只支持分类模型。 ### 使用示例 > 对预测可解释性结果可视化的过程可参见[代码](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)。 + +## 数据预处理/增强过程可视化 +``` +paddlex.transforms.visualize(dataset, + img_count=3, + save_dir='vdl_output') +``` +对数据预处理/增强中间结果进行可视化。 +可使用VisualDL查看中间结果: +1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001 +2. 浏览器打开 https://0.0.0.0:8001即可, + 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + +### 参数 +>* **dataset** (paddlex.datasets): 数据集读取器。 +>* **img_count** (int): 需要进行数据预处理/增强的图像数目。默认为3。 +>* **save_dir** (str): 日志保存的路径。默认为'vdl_output'。 \ No newline at end of file diff --git a/docs/appendix/index.rst b/docs/appendix/index.rst index c402384ebc307713ed87055dc86cab58dcf33bbe..814a611948a451a76d73fd0aa9276f40db2c28b9 100755 --- a/docs/appendix/index.rst +++ b/docs/appendix/index.rst @@ -7,6 +7,7 @@ :caption: 目录: model_zoo.md + slim_model_zoo.md metrics.md interpret.md parameters.md diff --git a/docs/appendix/interpret.md b/docs/appendix/interpret.md index 886620df2fa98c03abda4717dea627277715b2d9..43ecd48e23810c2e3ed3cd1652bf06b6e1fc04f7 100644 --- a/docs/appendix/interpret.md +++ b/docs/appendix/interpret.md @@ -20,9 +20,20 @@ LIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/Paddl ## NormLIME NormLIME是在LIME上的改进,LIME的解释是局部性的,是针对当前样本给的特定解释,而NormLIME是利用一定数量的样本对当前样本的一个全局性的解释,有一定的降噪效果。其实现步骤如下所示: 1. 下载Kmeans模型参数和ResNet50_vc网络前三层参数。(ResNet50_vc的参数是在ImageNet上训练所得网络的参数;使用ImageNet图像作为数据集,每张图像从ResNet50_vc的第三层输出提取对应超象素位置上的平均特征和质心上的特征,训练将得到此处的Kmeans模型) -2. 计算测试集中每张图像的LIME结果。(如无测试集,可用验证集代替) -3. 使用Kmeans模型对所有图像中的所有像素进行聚类。 -4. 对在同一个簇的超像素(相同的特征)进行权重的归一化,得到每个超像素的权重,以此来解释模型。 +2. 使用测试集中的数据计算normlime的权重信息(如无测试集,可用验证集代替): + 对每张图像的处理: + (1) 获取图像的超像素。 + (2) 使用ResNet50_vc获取第三层特征,针对每个超像素位置,组合质心特征和均值特征`F`。 + (3) 把`F`作为Kmeans模型的输入,计算每个超像素位置的聚类中心。 + (4) 使用训练好的分类模型,预测该张图像的`label`。 + 对所有图像的处理: + (1) 以每张图像的聚类中心信息组成的向量(若某聚类中心出现在盖章途中设置为1,反之为0)为输入, + 预测的`label`为输出,构建逻辑回归函数`regression_func`。 + (2) 由`regression_func`可获得每个聚类中心不同类别下的权重,并对权重进行归一化。 +3. 使用Kmeans模型获取需要可视化图像的每个超像素的聚类中心。 +4. 对需要可视化的图像的超像素进行随机遮掩构成新的图像。 +5. 对每张构造的图像使用预测模型预测label。 +6. 根据normlime的权重信息,每个超像素可获不同的权重,选取最高的权重为最终的权重,以此来解释模型。 NormLIME的使用方式可参见[代码示例](https://github.com/PaddlePaddle/PaddleX/blob/develop/tutorials/interpret/normlime.py)和[api介绍](../apis/visualize.html#normlime)。在使用时,参数中的`num_samples`设置尤为重要,其表示上述步骤2中的随机采样的个数,若设置过小会影响可解释性结果的稳定性,若设置过大则将在上述步骤3耗费较长时间;参数`batch_size`则表示在计算上述步骤3时,预测的batch size,若设置过小将在上述步骤3耗费较长时间,而上限则根据机器配置决定;而`dataset`则是由测试集或验证集构造的数据。 diff --git a/docs/appendix/model_zoo.md b/docs/appendix/model_zoo.md index 026aab79baad01f919718089bafc1c1541284be0..f866b39173ead1c162e9e3ee722ae2ea2cb2afb3 100644 --- a/docs/appendix/model_zoo.md +++ b/docs/appendix/model_zoo.md @@ -6,48 +6,56 @@ | 模型 | 模型大小 | 预测速度(毫秒) | Top1准确率(%) | Top5准确率(%) | | :----| :------- | :----------- | :--------- | :--------- | -| ResNet18| 46.9MB | - | 71.0 | 89.9 | -| ResNet34| 87.5MB | - | 74.6 | 92.1 | -| ResNet50| 102.7MB | - | 76.5 | 93.0 | -| ResNet101 |179.1MB | - | 77.6 | 93.6 | -| ResNet50_vd |102.8MB |- | 79.1 | 94.4 | -| ResNet101_vd| 179.2MB | - | 80.2 | 95.0 | -| ResNet50_vd_ssld |102.8MB | - | 82.4 | 96.1 | -| ResNet101_vd_ssld| 179.2MB | - | 83.7 | 96.7 | -| DarkNet53|166.9MB | - | 78.0 | 94.1 | -| MobileNetV1 | 16.0MB | - | 71.0 | 89.7 | -| MobileNetV2 | 14.0MB | - | 72.2 | 90.7 | -| MobileNetV3_large| 21.0MB | - | 75.3 | 93.2 | -| MobileNetV3_small | 12.0MB | - | 68.2 | 88.1 | -| MobileNetV3_large_ssld| 21.0MB | - | 79.0 | 94.5 | -| MobileNetV3_small_ssld | 12.0MB | - | 71.3 | 90.1 | -| Xception41 |92.4MB | - | 79.6 | 94.4 | -| Xception65 | 144.6MB | - | 80.3 | 94.5 | -| DenseNet121 | 32.8MB | - | 75.7 | 92.6 | -| DenseNet161|116.3MB | - | 78.6 | 94.1 | -| DenseNet201| 84.6MB | - | 77.6 | 93.7 | -| ShuffleNetV2 | 9.0MB | - | 68.8 | 88.5 | -| HRNet_W18 | 21.29MB | - | 76.9 | 93.4 | +| [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar)| 46.2MB | 3.72882 | 71.0 | 89.9 | +| [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar)| 87.9MB | 5.50876 | 74.6 | 92.1 | +| [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar)| 103.4MB | 7.76659 | 76.5 | 93.0 | +| [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) |180.4MB | 13.80876 | 77.6 | 93.6 | +| [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |103.5MB | 8.20476 | 79.1 | 94.4 | +| [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar)| 180.5MB | 14.24643 | 80.2 | 95.0 | +| [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar) |103.5MB | 7.79264 | 82.4 | 96.1 | +| [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar)| 180.5MB | 13.34580 | 83.7 | 96.7 | +| [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar)|167.4MB | 8.82047 | 78.0 | 94.1 | +| [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 17.4MB | 3.42838 | 71.0 | 89.7 | +| [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 15.0MB | 5.92667 | 72.2 | 90.7 | +| [MobileNetV3_large](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar)| 22.8MB | 8.31428 | 75.3 | 93.2 | +| [MobileNetV3_small](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 12.5MB | 7.30689 | 68.2 | 88.1 | +| [MobileNetV3_large_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar)| 22.8MB | 8.06651 | 79.0 | 94.5 | +| [MobileNetV3_small_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar) | 12.5MB | 7.08837 | 71.3 | 90.1 | +| [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 109.2MB | 8.15611 | 79.6 | 94.4 | +| [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 161.6MB | 13.87017 | 80.3 | 94.5 | +| [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 33.1MB | 17.09874 | 75.7 | 92.6 | +| [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar)| 118.0MB | 22.79690 | 78.6 | 94.1 | +| [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar)| 84.1MB | 25.26089 | 77.6 | 93.7 | +| [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 10.2MB | 15.40138 | 68.8 | 88.5 | +| [HRNet_W18](https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar) | 21.29MB |45.25514 | 76.9 | 93.4 | ## 目标检测模型 -> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到,表中符号`-`表示相关指标暂未测试。 +> 表中模型相关指标均为在MSCOCO数据集上使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla V100测试得到),表中符号`-`表示相关指标暂未测试。 | 模型 | 模型大小 | 预测时间(毫秒) | BoxAP(%) | |:-------|:-----------|:-------------|:----------| -|FasterRCNN-ResNet50|135.6MB| 78.450 | 35.2 | -|FasterRCNN-ResNet50_vd| 135.7MB | 79.523 | 36.4 | -|FasterRCNN-ResNet101| 211.7MB | 107.342 | 38.3 | -|FasterRCNN-ResNet50-FPN| 167.2MB | 44.897 | 37.2 | -|FasterRCNN-ResNet50_vd-FPN|168.7MB | 45.773 | 38.9 | -|FasterRCNN-ResNet101-FPN| 251.7MB | 55.782 | 38.7 | -|FasterRCNN-ResNet101_vd-FPN |252MB | 58.785 | 40.5 | -|FasterRCNN-HRNet_W18-FPN |115.5MB | 57.11 | 36 | -|YOLOv3-DarkNet53|252.4MB | 21.944 | 38.9 | -|YOLOv3-MobileNetv1 |101.2MB | 12.771 | 29.3 | -|YOLOv3-MobileNetv3|94.6MB | - | 31.6 | -| YOLOv3-ResNet34|169.7MB | 15.784 | 36.2 | +|[FasterRCNN-ResNet50](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar)|136.0MB| 197.715 | 35.2 | +|[FasterRCNN-ResNet50_vd](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_1x.tar)| 136.1MB | 475.700 | 36.4 | +|[FasterRCNN-ResNet101](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_1x.tar)| 212.5MB | 582.911 | 38.3 | +|[FasterRCNN-ResNet50-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_1x.tar)| 167.7MB | 83.189 | 37.2 | +|[FasterRCNN-ResNet50_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar)|167.8MB | 128.277 | 38.9 | +|[FasterRCNN-ResNet101-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_1x.tar)| 244.2MB | 119.788 | 38.7 | +|[FasterRCNN-ResNet101_vd-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar) |244.3MB | 156.097 | 40.5 | +|[FasterRCNN-HRNet_W18-FPN](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_1x.tar) |115.5MB | 81.592 | 36 | +|[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar)|249.2MB | 42.672 | 38.9 | +|[YOLOv3-MobileNetV1](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar) |99.2MB | 15.442 | 29.3 | +|[YOLOv3-MobileNetV3_large](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams)|100.7MB | 143.322 | 31.6 | +| [YOLOv3-ResNet34](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar)|170.3MB | 23.185 | 36.2 | ## 实例分割模型 > 表中模型相关指标均为在MSCOCO数据集上测试得到。 + +| 模型 | 模型大小 | 预测时间(毫秒) | mIoU(%) | +|:-------|:-----------|:-------------|:----------| +|DeepLabv3+-MobileNetV2_x1.0|-| - | - | +|DeepLabv3+-Xception41|-| - | - | +|DeepLabv3+-Xception65|-| - | - | +|UNet|-| - | - | +|HRNet_w18|-| - | - | diff --git a/docs/appendix/slim_model_zoo.md b/docs/appendix/slim_model_zoo.md new file mode 100644 index 0000000000000000000000000000000000000000..a594d53dd7a777288571ccae6fad5ec21415de36 --- /dev/null +++ b/docs/appendix/slim_model_zoo.md @@ -0,0 +1,121 @@ +# PaddleX压缩模型库 + +## 图像分类 + +数据集:ImageNet-1000 + +### 量化 + +| 模型 | 压缩策略 | Top-1准确率 | 存储体积 | TensorRT时延(V100, ms) | +|:--:|:---:|:--:|:--:|:--:| +|MobileNetV1| 无 |70.99%| 17MB | -| +|MobileNetV1| 量化 |70.18% (-0.81%)| 4.4MB | - | +| MobileNetV2 | 无 |72.15%| 15MB | - | +| MobileNetV2 | 量化 | 71.15% (-1%)| 4.0MB | - | +|ResNet50| 无 |76.50%| 99MB | 2.71 | +|ResNet50| 量化 |76.33% (-0.17%)| 25.1MB | 1.19 | + +分类模型Lite时延(ms) + +| 设备 | 模型类型 | 压缩策略 | armv7 Thread 1 | armv7 Thread 2 | armv7 Thread 4 | armv8 Thread 1 | armv8 Thread 2 | armv8 Thread 4 | +| ------- | ----------- | ------------- | -------------- | -------------- | -------------- | -------------- | -------------- | -------------- | +| 高通835 | MobileNetV1 | 无 | 96.1942 | 53.2058 | 32.4468 | 88.4955 | 47.95 | 27.5189 | +| 高通835 | MobileNetV1 | 量化 | 60.5615 | 32.4016 | 16.6596 | 56.5266 | 29.7178 | 15.1459 | +| 高通835 | MobileNetV2 | 无 | 65.715 | 38.1346 | 25.155 | 61.3593 | 36.2038 | 22.849 | +| 高通835 | MobileNetV2 | 量化 | 48.3495 | 30.3069 | 22.1506 | 45.8715 | 27.4105 | 18.2223 | +| 高通835 | ResNet50 | 无 | 526.811 | 319.6486 | 205.8345 | 506.1138 | 335.1584 | 214.8936 | +| 高通835 | ResNet50 | 量化 | 476.0507 | 256.5963 | 139.7266 | 461.9176 | 248.3795 | 149.353 | +| 高通855 | MobileNetV1 | 无 | 33.5086 | 19.5773 | 11.7534 | 31.3474 | 18.5382 | 10.0811 | +| 高通855 | MobileNetV1 | 量化 | 37.0498 | 21.7081 | 11.0779 | 14.0947 | 8.1926 | 4.2934 | +| 高通855 | MobileNetV2 | 无 | 25.0396 | 15.2862 | 9.6609 | 22.909 | 14.1797 | 8.8325 | +| 高通855 | MobileNetV2 | 量化 | 28.1631 | 18.3917 | 11.8333 | 16.9399 | 11.1772 | 7.4176 | +| 高通855 | ResNet50 | 无 | 185.3705 | 113.0825 | 87.0741 | 177.7367 | 110.0433 | 74.4114 | +| 高通855 | ResNet50 | 量化 | 328.2683 | 201.9937 | 106.744 | 242.6397 | 150.0338 | 79.8659 | +| 麒麟970 | MobileNetV1 | 无 | 101.2455 | 56.4053 | 35.6484 | 94.8985 | 51.7251 | 31.9511 | +| 麒麟970 | MobileNetV1 | 量化 | 62.4412 | 32.2585 | 16.6215 | 57.825 | 29.2573 | 15.1206 | +| 麒麟970 | MobileNetV2 | 无 | 70.4176 | 42.0795 | 25.1939 | 68.9597 | 39.2145 | 22.6617 | +| 麒麟970 | MobileNetV2 | 量化 | 53.0961 | 31.7987 | 21.8334 | 49.383 | 28.2358 | 18.3642 | +| 麒麟970 | ResNet50 | 无 | 586.8943 | 344.0858 | 228.2293 | 573.3344 | 351.4332 | 225.8006 | +| 麒麟970 | ResNet50 | 量化 | 489.6188 | 258.3279 | 142.6063 | 480.0064 | 249.5339 | 138.5284 | + +### 剪裁 + +PaddleLite推理耗时说明: + +环境:Qualcomm SnapDragon 845 + armv8 + +速度指标:Thread1/Thread2/Thread4耗时 + + +| 模型 | 压缩策略 | Top-1 | 存储体积 |PaddleLite推理耗时|TensorRT推理速度(FPS)| +|:--:|:---:|:--:|:--:|:--:|:--:| +| MobileNetV1 | 无 | 70.99% | 17MB | 66.052\35.8014\19.5762|-| +| MobileNetV1 | 剪裁 -30% | 70.4% (-0.59%) | 12MB | 46.5958\25.3098\13.6982|-| +| MobileNetV1 | 剪裁 -50% | 69.8% (-1.19%) | 9MB | 37.9892\20.7882\11.3144|-| + +## 目标检测 + +### 量化 + +数据集: COCO2017 + +| 模型 | 压缩策略 | 数据集 | Image/GPU | 输入608 Box AP | 存储体积 | TensorRT时延(V100, ms) | +| :----------------------------: | :---------: | :----: | :-------: | :------------: | :------------: | :----------: | +| MobileNet-V1-YOLOv3 | 无 | COCO | 8 | 29.3 | 95MB | - | +| MobileNet-V1-YOLOv3 | 量化 | COCO | 8 | 27.9 (-1.4)| 25MB | - | +| R34-YOLOv3 | 无 | COCO | 8 | 36.2 | 162MB | - | +| R34-YOLOv3 | 量化 | COCO | 8 | 35.7 (-0.5) | 42.7MB | - | + +### 剪裁 + +数据集:Pasacl VOC & COCO2017 + +PaddleLite推理耗时说明: + +环境:Qualcomm SnapDragon 845 + armv8 + +速度指标:Thread1/Thread2/Thread4耗时 + +| 模型 | 压缩策略 | 数据集 | Image/GPU | 输入608 Box mmAP | 存储体积 | PaddleLite推理耗时(ms)(608*608) | TensorRT推理速度(FPS)(608*608) | +| :----------------------------: | :---------------: | :--------: | :-------: | :------------: | :----------: | :--------------: | :--------------: | +| MobileNet-V1-YOLOv3 | 无 | Pascal VOC | 8 | 76.2 | 94MB | 1238\796.943\520.101|60.04| +| MobileNet-V1-YOLOv3 | 剪裁 -52.88% | Pascal VOC | 8 | 77.6 (+1.4) | 31MB | 602.497\353.759\222.427 |99.36| +| MobileNet-V1-YOLOv3 | 无 | COCO | 8 | 29.3 | 95MB |-|-| +| MobileNet-V1-YOLOv3 | 剪裁 -51.77% | COCO | 8 | 26.0 (-3.3) | 32MB |-|73.93| + +## 语义分割 + +数据集:Cityscapes + + +### 量化 + +| 模型 | 压缩策略 | mIoU | 存储体积 | +| :--------------------: | :---------: | :-----------: | :------------: | +| DeepLabv3-MobileNetv2 | 无 | 69.81 | 7.4MB | +| DeepLabv3-MobileNetv2 | 量化 | 67.59 (-2.22) | 2.1MB | + +图像分割模型Lite时延(ms), 输入尺寸769 x 769 + +| 设备 | 模型类型 | 压缩策略 | armv7 Thread 1 | armv7 Thread 2 | armv7 Thread 4 | armv8 Thread 1 | armv8 Thread 2 | armv8 Thread 4 | +| ------- | ---------------------- | ------------- | -------------- | -------------- | -------------- | -------------- | -------------- | -------------- | +| 高通835 | Deeplabv3-MobileNetV2 | 无 | 1282.8126 | 793.2064 | 653.6538 | 1193.9908 | 737.1827 | 593.4522 | +| 高通835 | Deeplabv3-MobileNetV2 | 量化 | 981.44 | 658.4969 | 538.6166 | 885.3273 | 586.1284 | 484.0018 | +| 高通855 | Deeplabv3-MobileNetV2 | 无 | 639.4425 | 390.1851 | 322.7014 | 477.7667 | 339.7411 | 262.2847 | +| 高通855 | Deeplabv3-MobileNetV2 | 量化 | 705.7589 | 474.4076 | 427.2951 | 394.8352 | 297.4035 | 264.6724 | +| 麒麟970 | Deeplabv3-MobileNetV2 | 无 | 1771.1301 | 1746.0569 | 1222.4805 | 1448.9739 | 1192.4491 | 760.606 | +| 麒麟970 | Deeplabv3-MobileNetV2 | 量化 | 1320.386 | 918.5328 | 672.2481 | 1020.753 | 820.094 | 591.4114 | + +### 剪裁 + +PaddleLite推理耗时说明: + +环境:Qualcomm SnapDragon 845 + armv8 + +速度指标:Thread1/Thread2/Thread4耗时 + + +| 模型 | 压缩方法 | mIoU | 存储体积 | PaddleLite推理耗时 | TensorRT推理速度(FPS) | +| :-------: | :---------------: | :-----------: | :------: | :------------: | :----: | +| FastSCNN | 无 | 69.64 | 11MB | 1226.36\682.96\415.664 |39.53| +| FastSCNN | 剪裁 -47.60% | 66.68 (-2.96) | 5.7MB | 866.693\494.467\291.748 |51.48| diff --git a/docs/cv_solutions.md b/docs/cv_solutions.md index cb96c2d9e71ac6e98ee036364b8700ec9656411a..4d8482da94423ba5cc4f0695bf3f9669ef5f732a 100755 --- a/docs/cv_solutions.md +++ b/docs/cv_solutions.md @@ -1,63 +1,132 @@ # PaddleX视觉方案介绍 -PaddleX目前提供了4种视觉任务解决方案,分别为图像分类、目标检测、实例分割和语义分割。用户可以根据自己的任务类型按需选取。 +PaddleX针对图像分类、目标检测、实例分割和语义分割4种视觉任务提供了包含模型选择、压缩策略选择、部署方案选择在内的解决方案。用户根据自己的需求选择合适的模型,选择合适的压缩策略来减小模型的计算量和存储体积、加速模型预测推理,最后选择合适的部署方案将模型部署在移动端或者服务器端。 -## 图像分类 +## 模型选择 + +### 图像分类 图像分类任务指的是输入一张图片,模型预测图片的类别,如识别为风景、动物、车等。 ![](./images/image_classification.png) -对于图像分类任务,针对不同的应用场景,PaddleX提供了百度改进的模型,见下表所示 +对于图像分类任务,针对不同的应用场景,PaddleX提供了百度改进的模型,见下表所示: +> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。 +> 表中CPU预测速度 (测试CPU型号为)。 +> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。 +> 测速时模型输入大小为224 x 224,Top1准确率为ImageNet-1000数据集上评估所得。 -| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | 准确率 | 备注 | -| :--------- | :------ | :---------- | :-----------| :------------- | :----- | :--- | -| MobileNetV3_small_ssld | 12M | - | - | - | 71.3% |适用于移动端场景 | -| MobileNetV3_large_ssld | 21M | - | - | - | 79.0% | 适用于移动端/服务端场景 | -| ResNet50_vd_ssld | 102.8MB | - | - | - | 82.4% | 适用于服务端场景 | -| ResNet101_vd_ssld | 179.2MB | - | - | - |83.7% | 适用于服务端场景 | +| 模型 | 模型特点 | 存储体积 | GPU预测速度(毫秒) | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Top1准确率 | +| :--------- | :------ | :---------- | :-----------| :------------- | :------------- |:--- | +| MobileNetV3_small_ssld | 轻量高速,适用于追求高速的实时移动端场景 | 12.5MB | 7.08837 | - | 6.546 | 71.3.0% | +| ShuffleNetV2 | 轻量级模型,精度相对偏低,适用于要求更小存储体积的实时移动端场景 | 10.2MB | 15.40 | - | 10.941 | 68.8% | +| MobileNetV3_large_ssld | 轻量级模型,在存储方面优势不大,在速度和精度上表现适中,适合于移动端场景 | 22.8MB | 8.06651 | - | 19.803 | 79.0% | +| MobileNetV2 | 轻量级模型,适用于使用GPU预测的移动端场景 | 15.0MB | 5.92667 | - | 23.318| 72.2 % | +| ResNet50_vd_ssld | 高精度模型,预测时间较短,适用于大多数的服务器端场景 | 103.5MB | 7.79264 | - | - | 82.4% | +| ResNet101_vd_ssld | 超高精度模型,预测时间相对较长,适用于有大数据量时的服务器端场景 | 180.5MB | 13.34580 | - | -| 83.7% | +| Xception65 | 超高精度模型,预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 161.6MB | 13.87017 | - | - | 80.3% | -除上述模型外,PaddleX还支持近20种图像分类模型,模型列表可参考[PaddleX模型库](../appendix/model_zoo.md) +包括上述模型,PaddleX支持近20种图像分类模型,其余模型可参考[PaddleX模型库](../appendix/model_zoo.md) -## 目标检测 +### 目标检测 目标检测任务指的是输入图像,模型识别出图像中物体的位置(用矩形框框出来,并给出框的位置),和物体的类别,如在手机等零件质检中,用于检测外观上的瑕疵等。 ![](./images/object_detection.png) 对于目标检测,针对不同的应用场景,PaddleX提供了主流的YOLOv3模型和Faster-RCNN模型,见下表所示 - -| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 |ARM芯片预测速度 | BoxMAP | 备注 | -| :------- | :------- | :--------- | :---------- | :------------- | :----- | :--- | -| YOLOv3-MobileNetV1 | 101.2M | - | - | - | 29.3 | | -| YOLOv3-MobileNetV3 | 94.6M | - | - | - | 31.6 | | -| YOLOv3-ResNet34 | 169.7M | - | - | - | 36.2 | | -| YOLOv3-DarkNet53 | 252.4 | - | - | - | 38.9 | | - -除YOLOv3模型外,PaddleX同时也支持FasterRCNN模型,支持FPN结构和5种backbone网络,详情可参考[PaddleX模型库](../appendix/model_zoo.md) - -## 实例分割 +> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。 +> 表中CPU预测速度 (测试CPU型号为)。 +> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。 +> 测速时YOLOv3的输入大小为608 x 608,FasterRCNN的输入大小为800 x 1333,Box mmAP为COCO2017数据集上评估所得。 + +| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Box mmAP | +| :------- | :------- | :--------- | :---------- | :------------- | :------------- |:--- | +| YOLOv3-MobileNetV3_larget | 适用于追求高速预测的移动端场景 | 100.7MB | 143.322 | - | - | 31.6 | +| YOLOv3-MobileNetV1 | 精度相对偏低,适用于追求高速预测的服务器端场景 | 99.2MB| 15.422 | - | - | 29.3 | +| YOLOv3-DarkNet53 | 在预测速度和模型精度上都有较好的表现,适用于大多数的服务器端场景| 249.2MB | 42.672 | - | - | 38.9 | +| FasterRCNN-ResNet50-FPN | 经典的二阶段检测器,预测速度相对较慢,适用于重视模型精度的服务器端场景 | 167.MB | 83.189 | - | -| 37.2 | +| FasterRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | 115.5MB | 81.592 | - | - | 36 | +| FasterRCNN-ResNet101_vd-FPN | 超高精度模型,预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 244.3MB | 156.097 | - | - | 40.5 | + +除上述模型外,YOLOv3和Faster RCNN还支持其他backbone,详情可参考[PaddleX模型库](../appendix/model_zoo.md) + +### 实例分割 在目标检测中,模型识别出图像中物体的位置和物体的类别。而实例分割则是在目标检测的基础上,做了像素级的分类,将框内的属于目标物体的像素识别出来。 ![](./images/instance_segmentation.png) PaddleX目前提供了实例分割MaskRCNN模型,支持5种不同的backbone网络,详情可参考[PaddleX模型库](../appendix/model_zoo.md) - -| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | BoxMAP | SegMAP | 备注 | -| :---- | :------- | :---------- | :---------- | :------------- | :----- | :----- | :--- | -| MaskRCNN-ResNet50_vd-FPN | 185.5M | - | - | - | 39.8 | 35.4 | | -| MaskRCNN-ResNet101_vd-FPN | 268.6M | - | - | - | 41.4 | 36.8 | | - - -## 语义分割 +> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。 +> 表中CPU预测速度 (测试CPU型号为)。 +> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。 +> 测速时MaskRCNN的输入大小为800 x 1333,Box mmAP和Seg mmAP为COCO2017数据集上评估所得。 + +| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| Box mmAP | Seg mmAP | +| :---- | :------- | :---------- | :---------- | :----- | :----- | :--- |:--- | +| MaskRCNN-HRNet_W18-FPN | 适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景 | - | - | - | - | 37.0 | 33.4 | +| MaskRCNN-ResNet50-FPN | 精度较高,适合大多数的服务器端场景| 185.5M | - | - | - | 37.9 | 34.2 | +| MaskRCNN-ResNet101_vd-FPN | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器端场景 | 268.6M | - | - | - | 41.4 | 36.8 | + +### 语义分割 语义分割用于对图像做像素级的分类,应用在人像分类、遥感图像识别等场景。 ![](./images/semantic_segmentation.png) 对于语义分割,PaddleX也针对不同的应用场景,提供了不同的模型选择,如下表所示 +> 表中GPU预测速度是使用PaddlePaddle Python预测接口测试得到(测试GPU型号为Nvidia Tesla P40)。 +> 表中CPU预测速度 (测试CPU型号为)。 +> 表中骁龙855预测速度是使用处理器为骁龙855的手机测试得到。 +> 测速时模型的输入大小为1024 x 2048,mIOU为Cityscapes数据集上评估所得。 + +| 模型 | 模型特点 | 存储体积 | GPU预测速度 | CPU(x86)预测速度(毫秒) | 骁龙855(ARM)预测速度 (毫秒)| mIOU | +| :---- | :------- | :---------- | :---------- | :----- | :----- |:--- | +| DeepLabv3p-MobileNetV2_x1.0 | 轻量级模型,适用于移动端场景| - | - | - | 69.8% | +| HRNet_W18_Small_v1 | 轻量高速,适用于移动端场景 | - | - | - | - | +| FastSCNN | 轻量高速,适用于追求高速预测的移动端或服务器端场景 | - | - | - | 69.64 | +| HRNet_W18 | 高精度模型,适用于对图像分辨率较为敏感、对目标细节预测要求更高的服务器端场景| - | - | - | 79.36 | +| DeepLabv3p-Xception65 | 高精度但预测时间更长,在处理较大数据量时有较高的精度,适用于服务器且背景复杂的场景| - | - | - | 79.3% | + +## 压缩策略选择 + +PaddleX提供包含模型剪裁、定点量化的模型压缩策略来减小模型的计算量和存储体积,加快模型部署后的预测速度。使用不同压缩策略在图像分类、目标检测和语义分割模型上的模型精度和预测速度详见以下内容,用户可以选择根据自己的需求选择合适的压缩策略,进一步优化模型的性能。 + +| 压缩策略 | 策略特点 | +| :---- | :------- | +| 量化 | 较为显著地减少模型的存储体积,适用于移动端或服务期端TensorRT部署,在移动端对于MobileNet系列模型有明显的加速效果 | +| 剪裁 | 能够去除冗余的参数,达到显著减少参数计算量和模型体积的效果,提升模型的预测性能,适用于CPU部署或移动端部署(GPU上无明显加速效果) | +| 先剪裁后量化 | 可以进一步提升模型的预测性能,适用于移动端或服务器端TensorRT部署 | + +### 性能对比 + +* 表中各指标的格式为XXX/YYY,XXX表示未采取压缩策略时的指标,YYY表示压缩后的指标 +* 分类模型的准确率指的是ImageNet-1000数据集上的Top1准确率(模型输入大小为224x224),检测模型的准确率指的是COCO2017数据集上的mmAP(模型输入大小为608x608),分割模型的准确率指的是Cityscapes数据集上mIOU(模型输入大小为769x769) +* 量化策略中,PaddleLiter推理环境为Qualcomm SnapDragon 855 + armv8,速度指标为Thread4耗时 +* 剪裁策略中,PaddleLiter推理环境为Qualcomm SnapDragon 845 + armv8,速度指标为Thread4耗时 + + +| 模型 | 压缩策略 | 存储体积(MB) | 准确率(%) | PaddleLite推理耗时(ms) | +| :--: | :------: | :------: | :----: | :----------------: | +| MobileNetV1 | 量化 | 17/4.4 | 70.99/70.18 | 10.0811/4.2934 | +| MobileNetV1 | 剪裁 -30% | 17/12 | 70.99/70.4 | 19.5762/13.6982 | +| YOLOv3-MobileNetV1 | 量化 | 95/25 | 29.3/27.9 | - | +| YOLOv3-MobileNetV1 | 剪裁 -51.77% | 95/25 | 29.3/26 | - | +| Deeplabv3-MobileNetV2 | 量化 | 7.4/1.8 | 63.26/62.03 | 593.4522/484.0018 | +| FastSCNN | 剪裁 -47.60% | 11/5.7 | 69.64/66.68 | 415.664/291.748 | + +更多模型在不同设备上压缩前后的指标对比详见[PaddleX压缩模型库](appendix/slim_model_zoo.md) + +压缩策略的具体使用流程详见[模型压缩](tutorials/compress) + +**注意:PaddleX中全部图像分类模型和语义分割模型都支持量化和剪裁操作,目标检测仅有YOLOv3支持量化和剪裁操作。** + +## 模型部署 + +PaddleX提供服务器端python部署、服务器端c++部署、服务器端加密部署、OpenVINO部署、移动端部署共5种部署方案,用户可以根据自己的需求选择合适的部署方案,点击以下链接了解部署的具体流程。 -| 模型 | 模型大小 | GPU预测速度 | CPU预测速度 | ARM芯片预测速度 | mIOU | 备注 | -| :---- | :------- | :---------- | :---------- | :------------- | :----- | :----- | -| DeepLabv3p-MobileNetV2_x0.25 | | - | - | - | - | - | -| DeepLabv3p-MobileNetV2_x1.0 | | - | - | - | - | - | -| DeepLabv3p-Xception65 | | - | - | - | - | - | -| UNet | | - | - | - | - | - | +| 部署方案 | 部署流程 | +| :------: | :------: | +| 服务器端python部署 | [部署流程](tutorials/deploy/deploy_server/deploy_python.html)| +| 服务器端c++部署 | [部署流程](tutorials/deploy/deploy_server/deploy_cpp/) | +| 服务器端加密部署 | [部署流程](tutorials/deploy/deploy_server/encryption.html) | +| OpenVINO部署 | [部署流程](tutorials/deploy/deploy_openvino.html) | +| 移动端部署 | [部署流程](tutorials/deploy/deploy_lite.html) | diff --git a/docs/images/lime.png b/docs/images/lime.png index de435a2e2375a788319f0d80a4cce7a21d395e41..801be69b57c80ad92dcc0ca69bf1a0a4de074b0f 100644 Binary files a/docs/images/lime.png and b/docs/images/lime.png differ diff --git a/docs/images/normlime.png b/docs/images/normlime.png index 4e5099347f261d3f5ce47b93d28cfa484c1d3776..dd9a2f8f96a3ade26179010f340c7c5185bf0656 100644 Binary files a/docs/images/normlime.png and b/docs/images/normlime.png differ diff --git a/docs/index.rst b/docs/index.rst index 0f876e6337ce4ea181b7558a5654808774f50572..1d8d9c0c124dd0015e7b90634fcb1b1551db87f5 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,6 +26,7 @@ PaddleX是基于飞桨核心框架、开发套件和工具组件的深度学习 cv_solutions.md apis/index.rst paddlex_gui/index.rst + tuning_strategy/index.rst update.md FAQ.md appendix/index.rst diff --git a/docs/tuning_strategy/detection/index.rst b/docs/tuning_strategy/detection/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..5457adeeea053df4de9332bd4df61cd450830f96 --- /dev/null +++ b/docs/tuning_strategy/detection/index.rst @@ -0,0 +1,10 @@ +目标检测 +============================ + +PaddleX针对目标检测任务提供了通过负样本学习降低误检率的策略,用户可根据需求及应用场景使用该策略对模型进行调优。 + +.. toctree:: + :maxdepth: 1 + + negatives_training.md + diff --git a/docs/tuning_strategy/detection/negatives_training.md b/docs/tuning_strategy/detection/negatives_training.md new file mode 100644 index 0000000000000000000000000000000000000000..d3590e3222018faf90462935588a785b8fae4e7f --- /dev/null +++ b/docs/tuning_strategy/detection/negatives_training.md @@ -0,0 +1,101 @@ +# 通过负样本学习降低误检率 + +## 应用场景 + +在背景和目标相似的场景下,模型容易把背景误检成目标。为了降低误检率,可以通过负样本学习来降低误检率,即在训练过程中把无目标真值的图片加入训练。 + +## 效果对比 + +* 与基准模型相比,通过负样本学习后的模型**mmAP有3.6%的提升,mAP有0.1%的提升**。 +* 与基准模型相比,通过负样本学习后的模型在背景图片上的图片级别**误检率降低了49.68%**。 + +表1 违禁品验证集上**框级别精度**对比 + +||mmAP(AP@IoU=0.5:0.95)| mAP (AP@IoU=0.5)| +|:---|:---|:---| +|基准模型 | 45.8% | 83% | +|通过负样本学习后的模型 | 49.4% | 83.1% | + +表2 违禁品验证集上**图片级别的召回率**、无违禁品验证集上**图片级别的误检率**对比 + +||违禁品图片级别的召回率| 无违禁品图片级别的误检率| +|:---|:--------------------|:------------------------| +|基准模型 | 98.97% | 55.27% | +|通过负样本学习后的模型 | 97.75% | 5.59% | + +【名词解释】 + + * 图片级别的召回率:只要在有目标的图片上检测出目标(不论框的个数),该图片被认为召回。批量有目标图片中被召回图片所占的比例,即为图片级别的召回率。 + + * 图片级别的误检率:只要在无目标的图片上检测出目标(不论框的个数),该图片被认为误检。批量无目标图片中被误检图片所占的比例,即为图片级别的误检率。 + + +## 使用方法 + +在定义训练所用的数据集之后,使用数据集类的成员函数`add_negative_samples`将无目标真值的背景图片所在路径传入给训练集。代码示例如下: + +``` +import os +os.environ['CUDA_VISIBLE_DEVICES'] = '0' +from paddlex.det import transforms +import paddlex as pdx + +# 定义训练和验证时的transforms +train_transforms = transforms.ComposedRCNNTransforms( + mode='train', min_max_size=[600, 1000]) +eval_transforms = transforms.ComposedRCNNTransforms( + mode='eval', min_max_size=[600, 1000]) + +# 定义训练所用的数据集 +train_dataset = pdx.datasets.CocoDetection( + data_dir='jinnan2_round1_train_20190305/restricted/', + ann_file='jinnan2_round1_train_20190305/train.json', + transforms=train_transforms, + shuffle=True, + num_workers=2) +# 训练集中加入无目标背景图片 +train_dataset.add_negative_samples( + 'jinnan2_round1_train_20190305/normal_train_back/') + +# 定义验证所用的数据集 +eval_dataset = pdx.datasets.CocoDetection( + data_dir='jinnan2_round1_train_20190305/restricted/', + ann_file='jinnan2_round1_train_20190305/val.json', + transforms=eval_transforms, + num_workers=2) + +# 初始化模型,并进行训练 +model = pdx.det.FasterRCNN(num_classes=len(train_dataset.labels) + 1) +model.train( + num_epochs=17, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + train_batch_size=8, + learning_rate=0.01, + lr_decay_epochs=[13, 16], + save_dir='./output') +``` + +## 实验细则 + +(1) 数据集 + +我们使用X光违禁品数据集对通过负样本学习降低误检率的策略有效性进行了实验验证。该数据集中背景比较繁杂,很多背景物体与目标物体较为相似。 + +* 检测铁壳打火机、黑钉打火机 、刀具、电源和电池、剪刀5种违禁品。 + +* 训练集有883张违禁品图片,验证集有98张违禁品图片。 + +* 无违禁品的X光图片有2540张。 + +(2) 基准模型 + +使用FasterRCNN-ResNet50作为检测模型,除了水平翻转外没有使用其他的数据增强方式,只使用违禁品训练集进行训练。模型在违禁品验证集上的精度见表1,mmAP有45.8%,mAP达到83%。 + +(3) 通过负样本学习后的模型 + +把无违禁品的X光图片按1:1分成无违禁品训练集和无违禁品验证集。我们将基准模型在无违禁品验证集进行测试,发现图片级别的误检率高达55.27%。为了降低该误检率,将基准模型在无违禁品训练集进行测试,挑选出被误检图片共663张,将这663张图片加入训练,训练参数配置与基准模型训练时一致。 + +通过负样本学习后的模型在违禁品验证集上的精度见表1,mmAP有49.4%,mAP达到83.1%。与基准模型相比,**mmAP有3.6%的提升,mAP有0.1%的提升**。通过负样本学习后的模型在无违禁品验证集的误检率仅有5.58%,与基准模型相比,**误检率降低了49.68%**。 + +此外,还测试了两个模型在有违禁品验证集上图片级别的召回率,见表2,与基准模型相比,通过负样本学习后的模型仅漏检了1张图片,召回率几乎是无损的。 diff --git a/docs/tuning_strategy/index.rst b/docs/tuning_strategy/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..f9d5cd50f914609f864135dfba922f857f771dbf --- /dev/null +++ b/docs/tuning_strategy/index.rst @@ -0,0 +1,7 @@ +PaddleX调优策略介绍 +============================ + +.. toctree:: + :maxdepth: 2 + + detection/index.rst diff --git a/docs/tutorials/deploy/deploy_lite.md b/docs/tutorials/deploy/deploy_lite.md index 5419aed636545b95e9f98fdd45109592b7a6d9d6..fd757933dcd201cf5c45b9a58013ee8078248ba0 100644 --- a/docs/tutorials/deploy/deploy_lite.md +++ b/docs/tutorials/deploy/deploy_lite.md @@ -21,7 +21,7 @@ step 2: 将PaddleX模型导出为inference模型 step 3: 将inference模型转换成PaddleLite模型 ``` -python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/onnx_model --place place/to/run +python /path/to/PaddleX/deploy/lite/export_lite.py --model_dir /path/to/inference_model --save_file /path/to/lite_model --place place/to/run ``` diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md index 838195f14ff108de838f04d5514101b17280f4dd..b4309ba896f1ae7c0d6f413e537343b608c5fa9f 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_linux.md @@ -19,18 +19,18 @@ ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference -PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.7版本,以下提供了多个不同版本的Paddle预测库: +PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.8版本,以下提供了多个不同版本的Paddle预测库: -| 版本说明 | 预测库(1.7.2版本) | +| 版本说明 | 预测库(1.8.2版本) | | ---- | ---- | -| ubuntu14.04_cpu_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-mkl/fluid_inference.tgz) | -| ubuntu14.04_cpu_avx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-avx-openblas/fluid_inference.tgz) | -| ubuntu14.04_cpu_noavx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-cpu-noavx-openblas/fluid_inference.tgz) | -| ubuntu14.04_cuda9.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) | -| ubuntu14.04_cuda10.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) | -| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.7.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) | +| ubuntu14.04_cpu_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-mkl/fluid_inference.tgz) | +| ubuntu14.04_cpu_avx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-avx-openblas/fluid_inference.tgz) | +| ubuntu14.04_cpu_noavx_openblas | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-cpu-noavx-openblas/fluid_inference.tgz) | +| ubuntu14.04_cuda9.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) | +| ubuntu14.04_cuda10.0_cudnn7_avx_mkl | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz ) | +| ubuntu14.04_cuda10.1_cudnn7.6_avx_mkl_trt6 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.8.2-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6%2Ffluid_inference.tgz) | -更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html#id1) +更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html) 下载并解压后`/root/projects/fluid_inference`目录包含内容为: ``` @@ -42,7 +42,7 @@ fluid_inference └── version.txt # 版本和编译信息 ``` -**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。 +**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id12)。 ### Step4: 编译 @@ -55,17 +55,17 @@ WITH_GPU=OFF WITH_MKL=ON # 是否集成 TensorRT(仅WITH_GPU=ON 有效) WITH_TENSORRT=OFF -# TensorRT 的lib路径 -TENSORRT_DIR=/path/to/TensorRT/ -# Paddle 预测库路径 -PADDLE_DIR=/path/to/fluid_inference/ +# TensorRT 的路径,如果需要集成TensorRT,需修改为您实际安装的TensorRT路径 +TENSORRT_DIR=/root/projects/TensorRT/ +# Paddle 预测库路径, 请修改为您实际安装的预测库路径 +PADDLE_DIR=/root/projects/fluid_inference # Paddle 的预测库是否使用静态库来编译 # 使用TensorRT时,Paddle的预测库通常为动态库 -WITH_STATIC_LIB=ON +WITH_STATIC_LIB=OFF # CUDA 的 lib 路径 -CUDA_LIB=/path/to/cuda/lib/ +CUDA_LIB=/usr/local/cuda/lib64 # CUDNN 的 lib 路径 -CUDNN_LIB=/path/to/cudnn/lib/ +CUDNN_LIB=/usr/local/cuda/lib64 # 是否加载加密后的模型 WITH_ENCRYPTION=ON @@ -74,8 +74,8 @@ sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的加密工具 ENCRYPTION_DIR=$(pwd)/paddlex-encryption # OPENCV 路径, 如果使用自带预编译版本可不修改 +sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的opencv OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/ -sh $(pwd)/scripts/bootstrap.sh # 以下无需改动 rm -rf build @@ -94,7 +94,6 @@ cmake .. \ -DENCRYPTION_DIR=${ENCRYPTION_DIR} \ -DOPENCV_DIR=${OPENCV_DIR} make - ``` **注意:** linux环境下编译会自动下载OPENCV, PaddleX-Encryption和YAML,如果编译环境无法访问外网,可手动下载: @@ -117,9 +116,7 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https:// **在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** -> **注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** - -编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: +编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifier`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: | 参数 | 说明 | | ---- | ---- | @@ -127,34 +124,37 @@ yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https:// | image | 要预测的图片文件路径 | | image_list | 按行存储图片路径的.txt文件 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | -| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | +| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) | | gpu_id | GPU 设备ID, 默认值为0 | | save_dir | 保存可视化结果的路径, 默认值为"output",**classfier无该参数** | +| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 | +| batch_size | 预测的批量大小,默认为1 | +| thread_num | 预测的线程数,默认为cpu处理器个数 | ## 样例 -可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。 +可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测,导出到/root/projects,模型路径为/root/projects/inference_model。 `样例一`: -不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg` +不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg` ```shell -./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output +./build/demo/detector --model_dir=/root/projects/inference_model --image=/root/projects/images/xiaoduxiong.jpeg --save_dir=output ``` 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 `样例二`: -使用`GPU`预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: +使用`GPU`预测多个图片`/root/projects/image_list.txt`,image_list.txt内容的格式如下: ``` -/path/to/images/xiaoduxiong1.jpeg -/path/to/images/xiaoduxiong2.jpeg +/root/projects/images/xiaoduxiong1.jpeg +/root/projects/images/xiaoduxiong2.jpeg ... -/path/to/images/xiaoduxiongn.jpeg +/root/projects/images/xiaoduxiongn.jpeg ``` ```shell -./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output +./build/demo/detector --model_dir=/root/projects/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2 ``` 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 diff --git a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md index e319df76ccc7ab4308b0a0b295eb412d9d89c2fe..48d936fd8a9e75e668b44db08352eebe1c20b338 100755 --- a/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md +++ b/docs/tutorials/deploy/deploy_server/deploy_cpp/deploy_cpp_win_vs2019.md @@ -10,11 +10,10 @@ Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试 请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 -**下面所有示例以工作目录为 `D:\projects`演示**。 +**下面所有示例以工作目录为 `D:\projects`演示。** -### Step1: 下载代码 +### Step1: 下载PaddleX预测代码 -下载源代码 ```shell d: mkdir projects @@ -22,25 +21,24 @@ cd projects git clone https://github.com/PaddlePaddle/PaddleX.git ``` -**说明**:其中`C++`预测代码在`PaddleX/deploy/cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。 +**说明**:其中`C++`预测代码在`PaddleX\deploy\cpp` 目录,该目录不依赖任何`PaddleX`下其他目录。 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference -PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持TensorRT,提供了不同的预编译版本,目前PaddleX依赖于Paddle1.7版本,以下提供了多个不同版本的Paddle预测库: +PaddlePaddle C++ 预测库针对是否使用GPU、是否支持TensorRT、以及不同的CUDA版本提供了已经编译好的预测库,目前PaddleX依赖于Paddle 1.8,基于Paddle 1.8的Paddle预测库下载链接如下所示: -| 版本说明 | 预测库(1.7.2版本) | 编译器 | 构建工具| cuDNN | CUDA +| 版本说明 | 预测库(1.8.2版本) | 编译器 | 构建工具| cuDNN | CUDA | | ---- | ---- | ---- | ---- | ---- | ---- | -| cpu_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | -| cpu_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | -| cuda9.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 | -| cuda9.0_cudnn7_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 | -| cuda10.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.7.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 | +| cpu_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | +| cpu_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/cpu/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | +| cuda9.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 | +| cuda9.0_cudnn7_avx_openblas | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/open/post97/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.4.1 | 9.0 | +| cuda10.0_cudnn7_avx_mkl | [fluid_inference.zip](https://paddle-wheel.bj.bcebos.com/1.8.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | MSVC 2015 update 3 | CMake v3.16.0 | 7.5.0 | 9.0 | +请根据实际情况选择下载,如若以上版本不满足您的需求,请至[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/windows_cpp_inference.html)选择符合的版本。 -更多和更新的版本,请根据实际情况下载: [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html#id1) - -解压后`D:\projects\fluid_inference*\`目录下主要包含的内容为: +将预测库解压后,其所在目录(例如`D:\projects\fluid_inference\`)下主要包含的内容有: ``` ├── \paddle\ # paddle核心库和头文件 | @@ -51,8 +49,8 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens ### Step3: 安装配置OpenCV -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` +1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://bj.bcebos.com/paddleseg/deploy/opencv-3.4.6-vc14_vc15.exe) +2. 运行下载的可执行文件,将OpenCV解压至指定目录,例如`D:\projects\opencv` 3. 配置环境变量,如下流程所示 - 我的电脑->属性->高级系统设置->环境变量 - 在系统变量中找到Path(如没有,自行创建),并双击编辑 @@ -63,22 +61,21 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens 1. 打开Visual Studio 2019 Community,点击`继续但无需代码` ![step2](../../images/vs2019_step1.png) - 2. 点击: `文件`->`打开`->`CMake` ![step2.1](../../images/vs2019_step2.png) -选择项目代码所在路径,并打开`CMakeList.txt`: +选择C++预测代码所在路径(例如`D:\projects\PaddleX\deploy\cpp`),并打开`CMakeList.txt`: ![step2.2](../../images/vs2019_step3.png) - -3. 点击:`项目`->`PADDLEX_INFERENCE的CMake设置` +3. 点击:`项目`->`CMake设置` ![step3](../../images/vs2019_step4.png) - 4. 点击`浏览`,分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径 -依赖库路径的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): +![step3](../../images/vs2019_step5.png) + +依赖库路径的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量与Paddle预测库的对齐,例如Paddle预测库是**使用9.0、10.0版本**编译的,则编译PaddleX预测代码时**不使用9.2、10.1等版本**CUDA库): | 参数名 | 含义 | | ---- | ---- | @@ -87,38 +84,33 @@ PaddlePaddle C++ 预测库针对不同的`CPU`,`CUDA`,以及是否支持Tens | PADDLE_DIR | Paddle c++预测库的路径 | **注意:** -1. 使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 - +1. 如果使用`CPU`版预测库,请把`WITH_GPU`的`值`去掉勾 2. 如果使用的是`openblas`版本,请把`WITH_MKL`的`值`去掉勾 - 3. Windows环境下编译会自动下载YAML,如果编译环境无法访问外网,可手动下载: [yaml-cpp.zip](https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip) - yaml-cpp.zip文件下载后无需解压,在cmake/yaml.cmake中将`URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip` 中的网址,改为下载文件的路径。 +4. 如果需要使用模型加密功能,需要手动下载[Windows预测模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip)。例如解压到D:/projects,解压后目录为D:/projects/paddlex-encryption。编译时需勾选WITH_EBNCRYPTION并且在ENCRTYPTION_DIR填入D:/projects/paddlex-encryption。 -![step4](../../images/vs2019_step5.png) +![step_encryption](../../images/vs2019_step_encryption.png) -**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。 +![step4](../../images/vs2019_step6.png) +**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。 5. 点击`生成`->`全部生成` -![step6](../../images/vs2019_step6.png) - +![step6](../../images/vs2019_step7.png) ### Step5: 预测及可视化 - -**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** - -**注意:由于PaddleX代码的持续更新,版本低于1.0.0的模型(模型版本可查看model.yml文件中的version字段)暂时无法直接用于预测部署,参考[模型版本升级](../../upgrade_version.md)对模型版本进行升级。** +**在加载模型前,请检查你的模型目录中文件应该包括`model.yml`、`__model__`和`__params__`三个文件。如若不满足这个条件,请参考[模型导出为Inference文档](../deploy_python.html#inference)将模型导出为部署格式。** 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: ``` -d: +D: cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release ``` -编译成功后,预测demo的入口程序为`paddlex_inference\detector.exe`,`paddlex_inference\classifer.exe`,`paddlex_inference\segmenter.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下: +编译成功后,预测demo的入口程序为`paddlex_inference\detector.exe`,`paddlex_inference\classifier.exe`,`paddlex_inference\segmenter.exe`,用户可根据自己的模型类型选择,其主要命令参数说明如下: | 参数 | 说明 | | ---- | ---- | @@ -128,33 +120,45 @@ cd D:\projects\PaddleX\deploy\cpp\out\build\x64-Release | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | | gpu_id | GPU 设备ID, 默认值为0 | | save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 | - +| key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 | +| batch_size | 预测的批量大小,默认为1 | +| thread_num | 预测的线程数,默认为cpu处理器个数 | ## 样例 -可使用[小度熊识别模型](../deploy_python.html#inference)中导出的`inference_model`和测试图片进行预测。 +可使用[小度熊识别模型](../deploy_python.md)中导出的`inference_model`和测试图片进行预测, 例如导出到D:\projects,模型路径为D:\projects\inference_model。 -`样例一`: +### 样例一:(使用未加密的模型对单张图像做预测) -不使用`GPU`测试图片 `\\path\\to\\xiaoduxiong.jpeg` +不使用`GPU`测试图片 `D:\images\xiaoduxiong.jpeg` -```shell -.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image=D:\\images\\xiaoduxiong.jpeg --save_dir=output +``` +.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output ``` 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 -`样例二`: +### 样例二:(使用未加密的模型对图像列表做预测) -使用`GPU`预测多个图片`\\path\\to\\image_list.txt`,image_list.txt内容的格式如下: +使用`GPU`预测多个图片`D:\images\image_list.txt`,image_list.txt内容的格式如下: ``` -\\path\\to\\images\\xiaoduxiong1.jpeg -\\path\\to\\images\\xiaoduxiong2.jpeg +D:\images\xiaoduxiong1.jpeg +D:\images\xiaoduxiong2.jpeg ... -\\path\\to\\images\\xiaoduxiongn.jpeg +D:\images\xiaoduxiongn.jpeg ``` -```shell -.\\paddlex_inference\\detector.exe --model_dir=\\path\\to\\inference_model --image_list=\\path\\to\\images_list.txt --use_gpu=1 --save_dir=output +``` +.\paddlex_inference\detector.exe --model_dir=D:\projects\inference_model --image_list=D:\images\image_list.txt --use_gpu=1 --save_dir=output --batch_size=2 --thread_num=2 ``` 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 + +### 样例三:(使用加密后的模型对单张图片进行预测) + +如果未对模型进行加密,请参考[加密PaddleX模型](../encryption.html#paddlex)对模型进行加密。例如加密后的模型所在目录为`D:\projects\encrypted_inference_model`。 + +``` +.\paddlex_inference\detector.exe --model_dir=D:\projects\encrypted_inference_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= +``` + +`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件可视化预测结果会保存在`save_dir`参数设置的目录下。 diff --git a/docs/tutorials/deploy/deploy_server/encryption.md b/docs/tutorials/deploy/deploy_server/encryption.md index 7090421823bb3bbe2017818a3fc2f7e96608dda9..89eee6b8f1089964834bc0d88d1306f8ac3961ba 100644 --- a/docs/tutorials/deploy/deploy_server/encryption.md +++ b/docs/tutorials/deploy/deploy_server/encryption.md @@ -2,7 +2,7 @@ PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的模型加密工具对推理模型进行加密,预测部署SDK支持直接加载密文模型并完成推理,提升AI模型部署的安全性。 -**注意:目前加密方案仅支持Linux系统** +**目前加密方案已支持Windows,Linux系统** ## 1. 方案简介 @@ -20,7 +20,7 @@ PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的 ![](../images/encryption_process.png) -下面是对提供的C/C++加解密库内部实现的中文描述,参考以下步骤可以实现 一套加解密库 来适应自己的场景并通过内存数据load到paddlepaddle中(c/c++预测服务) +下面是对提供的C/C++加解密库内部实现的中文描述,参考以下步骤可以实现一套加解密库来适应自己的场景并通过内存数据加载到Paddle Inference预测库中 > 1)考虑到加密的模型文件解密后需要从内存加载数据,使用conbine的模式生成模型文件和参数文件。 > @@ -34,15 +34,17 @@ PaddleX提供一个轻量级的模型加密部署方案,通过PaddleX内置的 > > 6)在模型解密环节根据加密后的文件读取相关的加密数据到内存中,对内存数据使用AES算法进行解密,注意解密时需要采用与加密时一致的加密算法和加密的模式,以及密钥的数据和长度,否则会导致解密后数据错误。 > -> 7)集成模型预测的C/C++库,在具体使用paddlepaddle预测时一般涉及paddle::AnalysisConfig和paddle:Predictor,为了能够从内存数据中直接load解密后的模型明文数据(避免模型解密后创建临时文件),这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。 +> 7)集成模型预测的C/C++库,在具体使用预测时一般涉及paddle::AnalysisConfig和paddle:Predictor,为了能够从内存数据中直接load解密后的模型明文数据(避免模型解密后创建临时文件),这里需要将AnalysisConfig的模型加载函数从SetModel替换为SetModelBuffer来实现从内存中加载模型数据。 需要注意的是,在本方案中,密钥集成在上层预测服务的代码中。故模型的安全强度等同于代码抵御逆向调试的强度。为了保护密钥和模型的安全,开发者还需对自己的应用进行加固保护。常见的应用加固手段有:代码混淆,二进制文件加壳 等等,亦或将加密机制更改为AES白盒加密技术来保护密钥。这类技术领域内有大量商业和开源产品可供选择,此处不一一赘述。 ### 1.2 加密工具 -[PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip)。在编译部署代码时,编译脚本会自动下载加密工具,您也可以选择手动下载。 +[Linux版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/paddlex-encryption.zip),编译脚本会自动下载该版本加密工具,您也可以选择手动下载。 -加密工具包含内容为: +[Windows版本 PaddleX模型加密工具](https://bj.bcebos.com/paddlex/tools/win/paddlex-encryption.zip),该版本加密工具需手动下载,如果您在使用Visual Studio 2019编译C++预测代码的过程中已经下载过该工具,此处可不必重复下载。 + +Linux加密工具包含内容为: ``` paddlex-encryption ├── include # 头文件:paddle_model_decrypt.h(解密)和paddle_model_encrypt.h(加密) @@ -52,22 +54,40 @@ paddlex-encryption └── tool # paddlex_encrypt_tool ``` +Windows加密工具包含内容为: +``` +paddlex-encryption +├── include # 头文件:paddle_model_decrypt.h(解密)和paddle_model_encrypt.h(加密) +| +├── lib # pmodel-encrypt.dll和pmodel-decrypt.dll动态库 pmodel-encrypt.lib和pmodel-encrypt.lib静态库 +| +└── tool # paddlex_encrypt_tool.exe 模型加密工具 +``` ### 1.3 加密PaddleX模型 对模型完成加密后,加密工具会产生随机密钥信息(用于AES加解密使用),需要在后续加密部署时传入该密钥来用于解密。 > 密钥由32字节key + 16字节iv组成, 注意这里产生的key是经过base64编码后的,这样可以扩充key的选取范围 +Linux平台: ``` -./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /path/to/paddlex_inference_model -save_dir /path/to/paddlex_encrypted_model +# 假设模型在/root/projects下 +./paddlex-encryption/tool/paddlex_encrypt_tool -model_dir /root/projects/paddlex_inference_model -save_dir /root/projects/paddlex_encrypted_model ``` -`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`(**注意**:由于PaddleX代码的持续更新,版本低于1.0.0的模型暂时无法直接用于预测部署,参考[模型版本升级](../upgrade_version.md)对模型版本进行升级。)。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` +Windows平台: +``` +# 假设模型在D:/projects下 +.\paddlex-encryption\tool\paddlex_encrypt_tool.exe -model_dir D:\projects\paddlex_inference_model -save_dir D:\projects\paddlex_encrypted_model +``` + +`-model_dir`用于指定inference模型路径(参考[导出inference模型](deploy_python.html#inference)将模型导出为inference格式模型),可使用[导出小度熊识别模型](deploy_python.html#inference)中导出的`inference_model`。加密完成后,加密过的模型会保存至指定的`-save_dir`下,包含`__model__.encrypted`、`__params__.encrypted`和`model.yml`三个文件,同时生成密钥信息,命令输出如下图所示,密钥为`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=` ![](../images/encrypt.png) ## 2. PaddleX C++加密部署 -参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.html#linux)编译C++部署代码。编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifer`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: +### 2.1 Linux平台使用 +参考[Linux平台编译指南](deploy_cpp/deploy_cpp_linux.md)编译C++部署代码。编译成功后,预测demo的可执行程序分别为`build/demo/detector`,`build/demo/classifier`,`build/demo/segmenter`,用户可根据自己的模型类型选择,其主要命令参数说明如下: | 参数 | 说明 | | ---- | ---- | @@ -75,36 +95,72 @@ paddlex-encryption | image | 要预测的图片文件路径 | | image_list | 按行存储图片路径的.txt文件 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | -| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | +| use_trt | 是否使用 TensorRT 预测, 支持值为0或1(默认值为0) | | gpu_id | GPU 设备ID, 默认值为0 | | save_dir | 保存可视化结果的路径, 默认值为"output",classifier无该参数 | | key | 加密过程中产生的密钥信息,默认值为""表示加载的是未加密的模型 | +| batch_size | 预测的批量大小,默认为1 | +| thread_num | 预测的线程数,默认为cpu处理器个数 | -## 样例 +### 样例 -可使用[导出小度熊识别模型](deploy_python.html#inference)中的测试图片进行预测。 +可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。 -`样例一`: +#### 样例一: -不使用`GPU`测试图片 `/path/to/xiaoduxiong.jpeg` +不使用`GPU`测试图片 `/root/projects/images/xiaoduxiong.jpeg` ```shell -./build/demo/detector --model_dir=/path/to/inference_model --image=/path/to/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= +./build/demo/detector --model_dir=/root/projects/paddlex_encrypted_model --image=/root/projects/xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= ``` `--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 -`样例二`: +#### 样例二: -使用`GPU`预测多个图片`/path/to/image_list.txt`,image_list.txt内容的格式如下: +使用`GPU`预测多个图片`/root/projects/image_list.txt`,image_list.txt内容的格式如下: ``` -/path/to/images/xiaoduxiong1.jpeg -/path/to/images/xiaoduxiong2.jpeg +/root/projects/images/xiaoduxiong1.jpeg +/root/projects/xiaoduxiong2.jpeg ... -/path/to/images/xiaoduxiongn.jpeg +/root/projects/xiaoduxiongn.jpeg +``` +```shell +./build/demo/detector --model_dir=/root/projects/models/paddlex_encrypted_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= ``` +`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 + +### 2.2 Windows平台使用 +参考[Windows平台编译指南](deploy_cpp/deploy_cpp_win_vs2019.md)。需自行下载Windows版PaddleX加密工具压缩包,解压,在编译指南的编译流程基础上,在CMake设置中勾选WITH_ENCRYPTION,ENCRYPTION_DIR填写为加密工具包解压后的目录,再进行编译。参数与Linux版本预测部署一致。预测demo的入口程序为paddlex_inference\detector.exe,paddlex_inference\classifier.exe,paddlex_inference\segmenter.exe。 + +### 样例 + +可使用[导出小度熊识别模型](deploy_python.md#inference)中的测试图片进行预测。 + +#### 样例一: + +不使用`GPU`测试单张图片,例如图片为`D:\images\xiaoduxiong.jpeg`,加密后的模型目录为`D:\projects\paddlex_encrypted_model` + ```shell -./build/demo/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= +.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image=D:\images\xiaoduxiong.jpeg --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= +``` +`--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 + + +#### 样例二: + +使用`GPU`预测图片列表,例如图片列表为`D:\projects\image_list.txt`,`image_list.txt`的内容如下: +``` +D:\projects\images\xiaoduxiong1.jpeg +D:\projects\images\xiaoduxiong2.jpeg +... +D:\projects\images\xiaoduxiongn.jpeg +``` + +加密后的模型目录例如为`D:\projects\paddlex_encrypted_model` + +``` +.\paddlex_inference\detector.exe --model_dir=D:\projects\paddlex_encrypted_model --image_list=D:\projects\images_list.txt --use_gpu=1 --save_dir=output --key=kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c= ``` `--key`传入加密工具输出的密钥,例如`kLAl1qOs5uRbFt0/RrIDTZW2+tOf5bzvUIaHGF8lJ1c=`, 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 diff --git a/docs/tutorials/deploy/images/vs2019_step4.png b/docs/tutorials/deploy/images/vs2019_step4.png index 74fe7b224dc9b3533066111ab2a9cd877a5bbc68..9df86c9824f5fde45bc6df440ddc813742ce460f 100644 Binary files a/docs/tutorials/deploy/images/vs2019_step4.png and b/docs/tutorials/deploy/images/vs2019_step4.png differ diff --git a/docs/tutorials/deploy/images/vs2019_step5.png b/docs/tutorials/deploy/images/vs2019_step5.png old mode 100755 new mode 100644 index 0986e823812e2316c4fd0f2e6cb260a1204fda40..e5349cf08b209561b5cd11e3f5b68e4aa6c6e295 Binary files a/docs/tutorials/deploy/images/vs2019_step5.png and b/docs/tutorials/deploy/images/vs2019_step5.png differ diff --git a/docs/tutorials/deploy/images/vs2019_step6.png b/docs/tutorials/deploy/images/vs2019_step6.png index 86a8039cbd2a9f8fb499ed72d386b5c02b30c86c..0986e823812e2316c4fd0f2e6cb260a1204fda40 100755 Binary files a/docs/tutorials/deploy/images/vs2019_step6.png and b/docs/tutorials/deploy/images/vs2019_step6.png differ diff --git a/docs/tutorials/deploy/images/vs2019_step7.png b/docs/tutorials/deploy/images/vs2019_step7.png new file mode 100755 index 0000000000000000000000000000000000000000..86a8039cbd2a9f8fb499ed72d386b5c02b30c86c Binary files /dev/null and b/docs/tutorials/deploy/images/vs2019_step7.png differ diff --git a/docs/tutorials/deploy/images/vs2019_step_encryption.png b/docs/tutorials/deploy/images/vs2019_step_encryption.png new file mode 100644 index 0000000000000000000000000000000000000000..27a606799363b8b0f383ebd06f86a9a20e133ce9 Binary files /dev/null and b/docs/tutorials/deploy/images/vs2019_step_encryption.png differ diff --git a/examples/human_segmentation/README.md b/examples/human_segmentation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..18d1f22f3b48979602028e13d1045b63991794b8 --- /dev/null +++ b/examples/human_segmentation/README.md @@ -0,0 +1,181 @@ +# HumanSeg人像分割模型 + +本教程基于PaddleX核心分割网络,提供针对人像分割场景从预训练模型、Fine-tune、视频分割预测部署的全流程应用指南。 + +## 安装 + +**前置依赖** +* paddlepaddle >= 1.8.0 +* python >= 3.5 + +``` +pip install paddlex -i https://mirror.baidu.com/pypi/simple +``` +安装的相关问题参考[PaddleX安装](https://paddlex.readthedocs.io/zh_CN/latest/install.html) + +## 预训练模型 +HumanSeg开放了在大规模人像数据上训练的两个预训练模型,满足多种使用场景的需求 + +| 模型类型 | Checkpoint Parameter | Inference Model | Quant Inference Model | 备注 | +| --- | --- | --- | ---| --- | +| HumanSeg-server | [humanseg_server_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server.pdparams) | [humanseg_server_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 输入大小(512, 512) | +| HumanSeg-mobile | [humanseg_mobile_params](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile.pdparams) | [humanseg_mobile_inference](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddlex.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) | + + +模型性能 + +| 模型 | 模型大小 | 计算耗时 | +| --- | --- | --- | +|humanseg_server_inference| 158M | - | +|humanseg_mobile_inference | 5.8 M | 42.35ms | +|humanseg_mobile_quant | 1.6M | 24.93ms | + +计算耗时运行环境: 小米,cpu:骁龙855, 内存:6GB, 图片大小:192*192 + + +**NOTE:** +其中Checkpoint Parameter为模型权重,用于Fine-tuning场景。 + +* Inference Model和Quant Inference Model为预测部署模型,包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。 + +* 其中Inference Model适用于服务端的CPU和GPU预测部署,Qunat Inference Model为量化版本,适用于通过Paddle Lite进行移动端等端侧设备部署。 + +执行以下脚本进行HumanSeg预训练模型的下载 +```bash +python pretrain_weights/download_pretrain_weights.py +``` + +## 下载测试数据 +我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleX可直接加载数据格式。通过运行以下代码进行快速下载,其中包含手机前置摄像头的人像测试视频`video_test.mp4`. + +```bash +python data/download_data.py +``` + +## 快速体验视频流人像分割 +结合DIS(Dense Inverse Search-basedmethod)光流算法预测结果与分割结果,改善视频流人像分割 +```bash +# 通过电脑摄像头进行实时分割处理 +python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference + +# 对人像视频进行分割处理 +python video_infer.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 +``` + +视频分割结果如下: + + + +根据所选背景进行背景替换,背景可以是一张图片,也可以是一段视频。 +```bash +# 通过电脑摄像头进行实时背景替换处理, 也可通过'--background_video_path'传入背景视频 +python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --background_image_path data/background.jpg + +# 对人像视频进行背景替换处理, 也可通过'--background_video_path'传入背景视频 +python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg + +# 对单张图像进行背景替换 +python bg_replace.py --model_dir pretrain_weights/humanseg_mobile_inference --image_path data/human_image.jpg --background_image_path data/background.jpg + +``` + +背景替换结果如下: + + + + +**NOTE**: + +视频分割处理时间需要几分钟,请耐心等待。 + +提供的模型适用于手机摄像头竖屏拍摄场景,宽屏效果会略差一些。 + +## 训练 +使用下述命令基于与训练模型进行Fine-tuning,请确保选用的模型结构`model_type`与模型参数`pretrain_weights`匹配。 +```bash +# 指定GPU卡号(以0号卡为例) +export CUDA_VISIBLE_DEVICES=0 +# 若不使用GPU,则将CUDA_VISIBLE_DEVICES指定为空 +# export CUDA_VISIBLE_DEVICES= +python train.py --model_type HumanSegMobile \ +--save_dir output/ \ +--data_dir data/mini_supervisely \ +--train_list data/mini_supervisely/train.txt \ +--val_list data/mini_supervisely/val.txt \ +--pretrain_weights pretrain_weights/humanseg_mobile_params \ +--batch_size 8 \ +--learning_rate 0.001 \ +--num_epochs 10 \ +--image_shape 192 192 +``` +其中参数含义如下: +* `--model_type`: 模型类型,可选项为:HumanSegServer和HumanSegMobile +* `--save_dir`: 模型保存路径 +* `--data_dir`: 数据集路径 +* `--train_list`: 训练集列表路径 +* `--val_list`: 验证集列表路径 +* `--pretrain_weights`: 预训练模型路径 +* `--batch_size`: 批大小 +* `--learning_rate`: 初始学习率 +* `--num_epochs`: 训练轮数 +* `--image_shape`: 网络输入图像大小(w, h) + +更多命令行帮助可运行下述命令进行查看: +```bash +python train.py --help +``` +**NOTE** +可通过更换`--model_type`变量与对应的`--pretrain_weights`使用不同的模型快速尝试。 + +## 评估 +使用下述命令进行评估 +```bash +python eval.py --model_dir output/best_model \ +--data_dir data/mini_supervisely \ +--val_list data/mini_supervisely/val.txt \ +--image_shape 192 192 +``` +其中参数含义如下: +* `--model_dir`: 模型路径 +* `--data_dir`: 数据集路径 +* `--val_list`: 验证集列表路径 +* `--image_shape`: 网络输入图像大小(w, h) + +## 预测 +使用下述命令进行预测, 预测结果默认保存在`./output/result/`文件夹中。 +```bash +python infer.py --model_dir output/best_model \ +--data_dir data/mini_supervisely \ +--test_list data/mini_supervisely/test.txt \ +--save_dir output/result \ +--image_shape 192 192 +``` +其中参数含义如下: +* `--model_dir`: 模型路径 +* `--data_dir`: 数据集路径 +* `--test_list`: 测试集列表路径 +* `--image_shape`: 网络输入图像大小(w, h) + +## 模型导出 +```bash +paddlex --export_inference --model_dir output/best_model \ +--save_dir output/export +``` +其中参数含义如下: +* `--model_dir`: 模型路径 +* `--save_dir`: 导出模型保存路径 + +## 离线量化 +```bash +python quant_offline.py --model_dir output/best_model \ +--data_dir data/mini_supervisely \ +--quant_list data/mini_supervisely/val.txt \ +--save_dir output/quant_offline \ +--image_shape 192 192 +``` +其中参数含义如下: +* `--model_dir`: 待量化模型路径 +* `--data_dir`: 数据集路径 +* `--quant_list`: 量化数据集列表路径,一般直接选择训练集或验证集 +* `--save_dir`: 量化模型保存路径 +* `--image_shape`: 网络输入图像大小(w, h) diff --git a/examples/human_segmentation/bg_replace.py b/examples/human_segmentation/bg_replace.py new file mode 100644 index 0000000000000000000000000000000000000000..e0c1cc4261f0c946aaf07c11b5c4f6d1c21f6dca --- /dev/null +++ b/examples/human_segmentation/bg_replace.py @@ -0,0 +1,314 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import os.path as osp +import cv2 +import numpy as np + +from postprocess import postprocess, threshold_mask +import paddlex as pdx +import paddlex.utils.logging as logging +from paddlex.seg import transforms + + +def parse_args(): + parser = argparse.ArgumentParser( + description='HumanSeg inference for video') + parser.add_argument( + '--model_dir', + dest='model_dir', + help='Model path for inference', + type=str) + parser.add_argument( + '--image_path', + dest='image_path', + help='Image including human', + type=str, + default=None) + parser.add_argument( + '--background_image_path', + dest='background_image_path', + help='Background image for replacing', + type=str, + default=None) + parser.add_argument( + '--video_path', + dest='video_path', + help='Video path for inference', + type=str, + default=None) + parser.add_argument( + '--background_video_path', + dest='background_video_path', + help='Background video path for replacing', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the inference results', + type=str, + default='./output') + parser.add_argument( + "--image_shape", + dest="image_shape", + help="The image shape for net inputs.", + nargs=2, + default=[192, 192], + type=int) + + return parser.parse_args() + + +def bg_replace(label_map, img, bg): + h, w, _ = img.shape + bg = cv2.resize(bg, (w, h)) + label_map = np.repeat(label_map[:, :, np.newaxis], 3, axis=2) + comb = (label_map * img + (1 - label_map) * bg).astype(np.uint8) + return comb + + +def recover(img, im_info): + if im_info[0] == 'resize': + w, h = im_info[1][1], im_info[1][0] + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR) + elif im_info[0] == 'padding': + w, h = im_info[1][0], im_info[1][0] + img = img[0:h, 0:w, :] + return img + + +def infer(args): + resize_h = args.image_shape[1] + resize_w = args.image_shape[0] + + test_transforms = transforms.Compose([transforms.Normalize()]) + model = pdx.load_model(args.model_dir) + + if not osp.exists(args.save_dir): + os.makedirs(args.save_dir) + + # 图像背景替换 + if args.image_path is not None: + if not osp.exists(args.image_path): + raise Exception('The --image_path is not existed: {}'.format( + args.image_path)) + if args.background_image_path is None: + raise Exception( + 'The --background_image_path is not set. Please set it') + else: + if not osp.exists(args.background_image_path): + raise Exception( + 'The --background_image_path is not existed: {}'.format( + args.background_image_path)) + + img = cv2.imread(args.image_path) + im_shape = img.shape + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + im = cv2.resize( + img, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + image = im.astype('float32') + im_info = ('resize', im_shape[0:2]) + pred = model.predict(image, test_transforms) + label_map = pred['label_map'] + label_map = recover(label_map, im_info) + bg = cv2.imread(args.background_image_path) + save_name = osp.basename(args.image_path) + save_path = osp.join(args.save_dir, save_name) + result = bg_replace(label_map, img, bg) + cv2.imwrite(save_path, result) + + # 视频背景替换,如果提供背景视频则以背景视频作为背景,否则采用提供的背景图片 + else: + is_video_bg = False + if args.background_video_path is not None: + if not osp.exists(args.background_video_path): + raise Exception( + 'The --background_video_path is not existed: {}'.format( + args.background_video_path)) + is_video_bg = True + elif args.background_image_path is not None: + if not osp.exists(args.background_image_path): + raise Exception( + 'The --background_image_path is not existed: {}'.format( + args.background_image_path)) + else: + raise Exception( + 'Please offer backgound image or video. You should set --backbground_iamge_paht or --background_video_path' + ) + + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + if args.video_path is not None: + logging.info('Please wait. It is computing......') + if not osp.exists(args.video_path): + raise Exception('The --video_path is not existed: {}'.format( + args.video_path)) + + cap_video = cv2.VideoCapture(args.video_path) + fps = cap_video.get(cv2.CAP_PROP_FPS) + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + save_name = osp.basename(args.video_path) + save_name = save_name.split('.')[0] + save_path = osp.join(args.save_dir, save_name + '.avi') + + cap_out = cv2.VideoWriter( + save_path, + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + + if is_video_bg: + cap_bg = cv2.VideoCapture(args.background_video_path) + frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT) + current_frame_bg = 1 + else: + img_bg = cv2.imread(args.background_image_path) + while cap_video.isOpened(): + ret, frame = cap_video.read() + if ret: + im_shape = frame.shape + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + im = cv2.resize( + frame, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + image = im.astype('float32') + im_info = ('resize', im_shape[0:2]) + pred = model.predict(image, test_transforms) + score_map = pred['score_map'] + cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ + disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + is_init = False + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + score_map = recover(optflow_map, im_info) + + #循环读取背景帧 + if is_video_bg: + ret_bg, frame_bg = cap_bg.read() + if ret_bg: + if current_frame_bg == frames_bg: + current_frame_bg = 1 + cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0) + else: + break + current_frame_bg += 1 + comb = bg_replace(score_map, frame, frame_bg) + else: + comb = bg_replace(score_map, frame, img_bg) + + cap_out.write(comb) + else: + break + + if is_video_bg: + cap_bg.release() + cap_video.release() + cap_out.release() + + # 当没有输入预测图像和视频的时候,则打开摄像头 + else: + cap_video = cv2.VideoCapture(0) + if not cap_video.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format( + args.video_path)) + return + + if is_video_bg: + cap_bg = cv2.VideoCapture(args.background_video_path) + frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT) + current_frame_bg = 1 + else: + img_bg = cv2.imread(args.background_image_path) + while cap_video.isOpened(): + ret, frame = cap_video.read() + if ret: + im_shape = frame.shape + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + im = cv2.resize( + frame, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + image = im.astype('float32') + im_info = ('resize', im_shape[0:2]) + pred = model.predict(image, test_transforms) + score_map = pred['score_map'] + cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ + disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + is_init = False + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + score_map = recover(optflow_map, im_info) + + #循环读取背景帧 + if is_video_bg: + ret_bg, frame_bg = cap_bg.read() + if ret_bg: + if current_frame_bg == frames_bg: + current_frame_bg = 1 + cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0) + else: + break + current_frame_bg += 1 + comb = bg_replace(score_map, frame, frame_bg) + else: + comb = bg_replace(score_map, frame, img_bg) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + if is_video_bg: + cap_bg.release() + cap_video.release() + + +if __name__ == "__main__": + args = parse_args() + infer(args) diff --git a/examples/human_segmentation/data/download_data.py b/examples/human_segmentation/data/download_data.py new file mode 100644 index 0000000000000000000000000000000000000000..941b4cc81ef05335c867c6c1eea20c07c44c7360 --- /dev/null +++ b/examples/human_segmentation/data/download_data.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os + +LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) + +import paddlex as pdx + + +def download_data(savepath): + url = "https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip" + pdx.utils.download_and_decompress(url=url, path=savepath) + + url = "https://paddleseg.bj.bcebos.com/humanseg/data/video_test.zip" + pdx.utils.download_and_decompress(url=url, path=savepath) + + +if __name__ == "__main__": + download_data(LOCAL_PATH) + print("Data download finish!") diff --git a/examples/human_segmentation/eval.py b/examples/human_segmentation/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..a6e05ea0b2c463b948a1a021fa74f01512985675 --- /dev/null +++ b/examples/human_segmentation/eval.py @@ -0,0 +1,85 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import paddlex as pdx +import paddlex.utils.logging as logging +from paddlex.seg import transforms + + +def parse_args(): + parser = argparse.ArgumentParser(description='HumanSeg training') + parser.add_argument( + '--model_dir', + dest='model_dir', + help='Model path for evaluating', + type=str, + default='output/best_model') + parser.add_argument( + '--data_dir', + dest='data_dir', + help='The root directory of dataset', + type=str) + parser.add_argument( + '--val_list', + dest='val_list', + help='Val list file of dataset', + type=str, + default=None) + parser.add_argument( + '--batch_size', + dest='batch_size', + help='Mini batch size', + type=int, + default=128) + parser.add_argument( + "--image_shape", + dest="image_shape", + help="The image shape for net inputs.", + nargs=2, + default=[192, 192], + type=int) + return parser.parse_args() + + +def dict2str(dict_input): + out = '' + for k, v in dict_input.items(): + try: + v = round(float(v), 6) + except: + pass + out = out + '{}={}, '.format(k, v) + return out.strip(', ') + + +def evaluate(args): + eval_transforms = transforms.Compose( + [transforms.Resize(args.image_shape), transforms.Normalize()]) + + eval_dataset = pdx.datasets.SegDataset( + data_dir=args.data_dir, + file_list=args.val_list, + transforms=eval_transforms) + + model = pdx.load_model(args.model_dir) + metrics = model.evaluate(eval_dataset, args.batch_size) + logging.info('[EVAL] Finished, {} .'.format(dict2str(metrics))) + + +if __name__ == '__main__': + args = parse_args() + + evaluate(args) diff --git a/examples/human_segmentation/infer.py b/examples/human_segmentation/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..c78df7ae51609299a44d1c706197c56e2a20618e --- /dev/null +++ b/examples/human_segmentation/infer.py @@ -0,0 +1,109 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import os.path as osp +import cv2 +import numpy as np +import tqdm + +import paddlex as pdx +from paddlex.seg import transforms + + +def parse_args(): + parser = argparse.ArgumentParser( + description='HumanSeg prediction and visualization') + parser.add_argument( + '--model_dir', + dest='model_dir', + help='Model path for prediction', + type=str) + parser.add_argument( + '--data_dir', + dest='data_dir', + help='The root directory of dataset', + type=str) + parser.add_argument( + '--test_list', + dest='test_list', + help='Test list file of dataset', + type=str) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the inference results', + type=str, + default='./output/result') + parser.add_argument( + "--image_shape", + dest="image_shape", + help="The image shape for net inputs.", + nargs=2, + default=[192, 192], + type=int) + return parser.parse_args() + + +def infer(args): + def makedir(path): + sub_dir = osp.dirname(path) + if not osp.exists(sub_dir): + os.makedirs(sub_dir) + + test_transforms = transforms.Compose( + [transforms.Resize(args.image_shape), transforms.Normalize()]) + model = pdx.load_model(args.model_dir) + added_saved_path = osp.join(args.save_dir, 'added') + mat_saved_path = osp.join(args.save_dir, 'mat') + scoremap_saved_path = osp.join(args.save_dir, 'scoremap') + + with open(args.test_list, 'r') as f: + files = f.readlines() + + for file in tqdm.tqdm(files): + file = file.strip() + im_file = osp.join(args.data_dir, file) + im = cv2.imread(im_file) + result = model.predict(im_file, transforms=test_transforms) + + # save added image + added_image = pdx.seg.visualize( + im_file, result, weight=0.6, save_dir=None) + added_image_file = osp.join(added_saved_path, file) + makedir(added_image_file) + cv2.imwrite(added_image_file, added_image) + + # save score map + score_map = result['score_map'][:, :, 1] + score_map = (score_map * 255).astype(np.uint8) + score_map_file = osp.join(scoremap_saved_path, file) + makedir(score_map_file) + cv2.imwrite(score_map_file, score_map) + + # save mat image + score_map = np.expand_dims(score_map, axis=-1) + mat_image = np.concatenate([im, score_map], axis=2) + mat_file = osp.join(mat_saved_path, file) + ext = osp.splitext(mat_file)[-1] + mat_file = mat_file.replace(ext, '.png') + makedir(mat_file) + cv2.imwrite(mat_file, mat_image) + + +if __name__ == '__main__': + args = parse_args() + infer(args) diff --git a/examples/human_segmentation/postprocess.py b/examples/human_segmentation/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..88e5dcc80f3d49d7d5625e74fe4de313b59fa844 --- /dev/null +++ b/examples/human_segmentation/postprocess.py @@ -0,0 +1,125 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + + +def cal_optical_flow_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, + disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + h, w = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + flow_fw = disflow.calc(pre_gray, cur_gray, None) + flow_bw = disflow.calc(cur_gray, pre_gray, None) + flow_fw = np.round(flow_fw).astype(np.int) + flow_bw = np.round(flow_bw).astype(np.int) + y_list = np.array(range(h)) + x_list = np.array(range(w)) + yv, xv = np.meshgrid(y_list, x_list) + yv, xv = yv.T, xv.T + cur_x = xv + flow_fw[:, :, 0] + cur_y = yv + flow_fw[:, :, 1] + + # 超出边界不跟踪 + not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) + flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] + not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + + np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1]) + ) >= check_thres + track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] + + is_track[cur_y[~not_track], cur_x[~not_track]] = 1 + + not_flow = np.all(np.abs(flow_fw) == 0, + axis=-1) * np.all(np.abs(flow_bw) == 0, axis=-1) + dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 + return track_cfd, is_track, dl_weights + + +def fuse_optical_flow_tracking(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回 + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + fusion_cfd = dl_cfd.copy() + is_track = is_track.astype(np.bool) + fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + ( + 1 - dl_weights[is_track]) * track_cfd[is_track] + # 确定区域 + index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track + index_less01 = (dl_weights < 0.1) * index_certain + fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[ + index_less01] + index_larger09 = (dl_weights >= 0.1) * index_certain + fusion_cfd[index_larger09] = 0.4 * dl_cfd[ + index_larger09] + 0.6 * track_cfd[index_larger09] + return fusion_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def postprocess(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + pre_gray : 前一帧灰度图 + pre_cfd :前一帧融合结果 + scoremap : 当前帧分割结果 + difflow : 光流 + is_init : 是否第一帧 + Returns: + fusion_cfd : 光流追踪图和预测结果融合图 + """ + h, w = scoremap.shape + cur_cfd = scoremap.copy() + + if is_init: + if h <= 64 or w <= 64: + disflow.setFinestScale(1) + elif h <= 160 or w <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((h, w), np.float32) * 0.3 + track_cfd, is_track, weights = cal_optical_flow_tracking( + prev_gray, cur_gray, pre_cfd, weights, disflow) + fusion_cfd = fuse_optical_flow_tracking(track_cfd, cur_cfd, weights, + is_track) + + return fusion_cfd diff --git a/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py b/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py new file mode 100644 index 0000000000000000000000000000000000000000..be961ab6ebca2f8fef2e5573a817ccfd29fee41a --- /dev/null +++ b/examples/human_segmentation/pretrain_weights/download_pretrain_weights.py @@ -0,0 +1,40 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os + +LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) + +import paddlex as pdx +import paddlehub as hub + +model_urls = { + "PaddleX_HumanSeg_Server_Params": + "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_params.tar", + "PaddleX_HumanSeg_Server_Inference": + "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_server_inference.tar", + "PaddleX_HumanSeg_Mobile_Params": + "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_params.tar", + "PaddleX_HumanSeg_Mobile_Inference": + "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_inference.tar", + "PaddleX_HumanSeg_Mobile_Quant": + "https://bj.bcebos.com/paddlex/models/humanseg/humanseg_mobile_quant.tar" +} + +if __name__ == "__main__": + for model_name, url in model_urls.items(): + pdx.utils.download_and_decompress(url=url, path=LOCAL_PATH) + print("Pretrained Model download success!") diff --git a/examples/human_segmentation/quant_offline.py b/examples/human_segmentation/quant_offline.py new file mode 100644 index 0000000000000000000000000000000000000000..a801f8d02263f8dab98f3250478a289337492ae4 --- /dev/null +++ b/examples/human_segmentation/quant_offline.py @@ -0,0 +1,85 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import paddlex as pdx +from paddlex.seg import transforms + + +def parse_args(): + parser = argparse.ArgumentParser(description='HumanSeg training') + parser.add_argument( + '--model_dir', + dest='model_dir', + help='Model path for quant', + type=str, + default='output/best_model') + parser.add_argument( + '--batch_size', + dest='batch_size', + help='Mini batch size', + type=int, + default=1) + parser.add_argument( + '--batch_nums', + dest='batch_nums', + help='Batch number for quant', + type=int, + default=10) + parser.add_argument( + '--data_dir', + dest='data_dir', + help='the root directory of dataset', + type=str) + parser.add_argument( + '--quant_list', + dest='quant_list', + help='Image file list for model quantization, it can be vat.txt or train.txt', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the quant model', + type=str, + default='./output/quant_offline') + parser.add_argument( + "--image_shape", + dest="image_shape", + help="The image shape for net inputs.", + nargs=2, + default=[192, 192], + type=int) + return parser.parse_args() + + +def evaluate(args): + eval_transforms = transforms.Compose( + [transforms.Resize(args.image_shape), transforms.Normalize()]) + + eval_dataset = pdx.datasets.SegDataset( + data_dir=args.data_dir, + file_list=args.quant_list, + transforms=eval_transforms) + + model = pdx.load_model(args.model_dir) + pdx.slim.export_quant_model(model, eval_dataset, args.batch_size, + args.batch_nums, args.save_dir) + + +if __name__ == '__main__': + args = parse_args() + + evaluate(args) diff --git a/examples/human_segmentation/train.py b/examples/human_segmentation/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a7df98f360a78c2624814fc75bb0c382e19b7e95 --- /dev/null +++ b/examples/human_segmentation/train.py @@ -0,0 +1,156 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +import paddlex as pdx +from paddlex.seg import transforms + +MODEL_TYPE = ['HumanSegMobile', 'HumanSegServer'] + + +def parse_args(): + parser = argparse.ArgumentParser(description='HumanSeg training') + parser.add_argument( + '--model_type', + dest='model_type', + help="Model type for traing, which is one of ('HumanSegMobile', 'HumanSegServer')", + type=str, + default='HumanSegMobile') + parser.add_argument( + '--data_dir', + dest='data_dir', + help='The root directory of dataset', + type=str) + parser.add_argument( + '--train_list', + dest='train_list', + help='Train list file of dataset', + type=str) + parser.add_argument( + '--val_list', + dest='val_list', + help='Val list file of dataset', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the model snapshot', + type=str, + default='./output') + parser.add_argument( + '--num_classes', + dest='num_classes', + help='Number of classes', + type=int, + default=2) + parser.add_argument( + "--image_shape", + dest="image_shape", + help="The image shape for net inputs.", + nargs=2, + default=[192, 192], + type=int) + parser.add_argument( + '--num_epochs', + dest='num_epochs', + help='Number epochs for training', + type=int, + default=100) + parser.add_argument( + '--batch_size', + dest='batch_size', + help='Mini batch size', + type=int, + default=128) + parser.add_argument( + '--learning_rate', + dest='learning_rate', + help='Learning rate', + type=float, + default=0.01) + parser.add_argument( + '--pretrain_weights', + dest='pretrain_weights', + help='The path of pretrianed weight', + type=str, + default=None) + parser.add_argument( + '--resume_checkpoint', + dest='resume_checkpoint', + help='The path of resume checkpoint', + type=str, + default=None) + parser.add_argument( + '--use_vdl', + dest='use_vdl', + help='Whether to use visualdl', + action='store_true') + parser.add_argument( + '--save_interval_epochs', + dest='save_interval_epochs', + help='The interval epochs for save a model snapshot', + type=int, + default=5) + + return parser.parse_args() + + +def train(args): + train_transforms = transforms.Compose([ + transforms.Resize(args.image_shape), transforms.RandomHorizontalFlip(), + transforms.Normalize() + ]) + + eval_transforms = transforms.Compose( + [transforms.Resize(args.image_shape), transforms.Normalize()]) + + train_dataset = pdx.datasets.SegDataset( + data_dir=args.data_dir, + file_list=args.train_list, + transforms=train_transforms, + shuffle=True) + eval_dataset = pdx.datasets.SegDataset( + data_dir=args.data_dir, + file_list=args.val_list, + transforms=eval_transforms) + + if args.model_type == 'HumanSegMobile': + model = pdx.seg.HRNet( + num_classes=args.num_classes, width='18_small_v1') + elif args.model_type == 'HumanSegServer': + model = pdx.seg.DeepLabv3p( + num_classes=args.num_classes, backbone='Xception65') + else: + raise ValueError( + "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', " + "'HumanSegLite', 'HumanSegServer')".format(args.model_type)) + model.train( + num_epochs=args.num_epochs, + train_dataset=train_dataset, + train_batch_size=args.batch_size, + eval_dataset=eval_dataset, + save_interval_epochs=args.save_interval_epochs, + learning_rate=args.learning_rate, + pretrain_weights=args.pretrain_weights, + resume_checkpoint=args.resume_checkpoint, + save_dir=args.save_dir, + use_vdl=args.use_vdl) + + +if __name__ == '__main__': + args = parse_args() + train(args) diff --git a/examples/human_segmentation/video_infer.py b/examples/human_segmentation/video_infer.py new file mode 100644 index 0000000000000000000000000000000000000000..c2a67fe0032eae19e937580ff35e53ba09d1118f --- /dev/null +++ b/examples/human_segmentation/video_infer.py @@ -0,0 +1,187 @@ +# coding: utf8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import os.path as osp +import cv2 +import numpy as np + +from postprocess import postprocess, threshold_mask +import paddlex as pdx +import paddlex.utils.logging as logging +from paddlex.seg import transforms + + +def parse_args(): + parser = argparse.ArgumentParser( + description='HumanSeg inference for video') + parser.add_argument( + '--model_dir', + dest='model_dir', + help='Model path for inference', + type=str) + parser.add_argument( + '--video_path', + dest='video_path', + help='Video path for inference, camera will be used if the path not existing', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the inference results', + type=str, + default='./output') + parser.add_argument( + "--image_shape", + dest="image_shape", + help="The image shape for net inputs.", + nargs=2, + default=[192, 192], + type=int) + + return parser.parse_args() + + +def recover(img, im_info): + if im_info[0] == 'resize': + w, h = im_info[1][1], im_info[1][0] + img = cv2.resize(img, (w, h), cv2.INTER_LINEAR) + elif im_info[0] == 'padding': + w, h = im_info[1][0], im_info[1][0] + img = img[0:h, 0:w, :] + return img + + +def video_infer(args): + resize_h = args.image_shape[1] + resize_w = args.image_shape[0] + + model = pdx.load_model(args.model_dir) + test_transforms = transforms.Compose([transforms.Normalize()]) + if not args.video_path: + cap = cv2.VideoCapture(0) + else: + cap = cv2.VideoCapture(args.video_path) + if not cap.isOpened(): + raise IOError("Error opening video stream or file, " + "--video_path whether existing: {}" + " or camera whether working".format(args.video_path)) + return + + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((resize_h, resize_w), np.uint8) + prev_cfd = np.zeros((resize_h, resize_w), np.float32) + is_init = True + + fps = cap.get(cv2.CAP_PROP_FPS) + if args.video_path: + logging.info("Please wait. It is computing......") + # 用于保存预测结果视频 + if not osp.exists(args.save_dir): + os.makedirs(args.save_dir) + out = cv2.VideoWriter( + osp.join(args.save_dir, 'result.avi'), + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + # 开始获取视频帧 + while cap.isOpened(): + ret, frame = cap.read() + if ret: + im_shape = frame.shape + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + im = cv2.resize( + frame, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + image = im.astype('float32') + im_info = ('resize', im_shape[0:2]) + pred = model.predict(image, test_transforms) + score_map = pred['score_map'] + cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ + disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + is_init = False + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = np.repeat( + optflow_map[:, :, np.newaxis], 3, axis=2) + img_matting = recover(img_matting, im_info) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame + + (1 - img_matting) * bg_im).astype(np.uint8) + out.write(comb) + else: + break + cap.release() + out.release() + + else: + while cap.isOpened(): + ret, frame = cap.read() + if ret: + im_shape = frame.shape + im_scale_x = float(resize_w) / float(im_shape[1]) + im_scale_y = float(resize_h) / float(im_shape[0]) + im = cv2.resize( + frame, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=cv2.INTER_LINEAR) + image = im.astype('float32') + im_info = ('resize', im_shape[0:2]) + pred = model.predict(image, test_transforms) + score_map = pred['score_map'] + cur_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) + score_map = 255 * score_map[:, :, 1] + optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ + disflow, is_init) + prev_gray = cur_gray.copy() + prev_cfd = optflow_map.copy() + is_init = False + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask( + optflow_map, thresh_bg=0.2, thresh_fg=0.8) + img_matting = np.repeat( + optflow_map[:, :, np.newaxis], 3, axis=2) + img_matting = recover(img_matting, im_info) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame + + (1 - img_matting) * bg_im).astype(np.uint8) + cv2.imshow('HumanSegmentation', comb) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap.release() + + +if __name__ == "__main__": + args = parse_args() + video_infer(args) diff --git a/new_tutorials/train/README.md b/new_tutorials/train/README.md deleted file mode 100644 index 1900143bceb3435da8ffa04a7fed7b0205e04477..0000000000000000000000000000000000000000 --- a/new_tutorials/train/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# 使用教程——训练模型 - -本目录下整理了使用PaddleX训练模型的示例代码,代码中均提供了示例数据的自动下载,并均使用单张GPU卡进行训练。 - -|代码 | 模型任务 | 数据 | -|------|--------|---------| -|classification/mobilenetv2.py | 图像分类MobileNetV2 | 蔬菜分类 | -|classification/resnet50.py | 图像分类ResNet50 | 蔬菜分类 | -|detection/faster_rcnn_r50_fpn.py | 目标检测FasterRCNN | 昆虫检测 | -|detection/mask_rcnn_f50_fpn.py | 实例分割MaskRCNN | 垃圾分拣 | -|segmentation/deeplabv3p.py | 语义分割DeepLabV3| 视盘分割 | -|segmentation/unet.py | 语义分割UNet | 视盘分割 | - -## 开始训练 -在安装PaddleX后,使用如下命令开始训练 -``` -python classification/mobilenetv2.py -``` diff --git a/new_tutorials/train/classification/mobilenetv2.py b/new_tutorials/train/classification/mobilenetv2.py deleted file mode 100644 index 9a075526a3cbb7e560c133f08faef68ea5a07121..0000000000000000000000000000000000000000 --- a/new_tutorials/train/classification/mobilenetv2.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -from paddlex.cls import transforms -import paddlex as pdx - -# 下载和解压蔬菜分类数据集 -veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz' -pdx.utils.download_and_decompress(veg_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms -train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224]) -eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224]) - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet -train_dataset = pdx.datasets.ImageNet( - data_dir='vegetables_cls', - file_list='vegetables_cls/train_list.txt', - label_list='vegetables_cls/labels.txt', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.ImageNet( - data_dir='vegetables_cls', - file_list='vegetables_cls/val_list.txt', - label_list='vegetables_cls/labels.txt', - transforms=eval_transforms) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50 -model = pdx.cls.MobileNetV2(num_classes=len(train_dataset.labels)) -model.train( - num_epochs=10, - train_dataset=train_dataset, - train_batch_size=32, - eval_dataset=eval_dataset, - lr_decay_epochs=[4, 6, 8], - learning_rate=0.025, - save_dir='output/mobilenetv2', - use_vdl=True) diff --git a/new_tutorials/train/classification/resnet50.py b/new_tutorials/train/classification/resnet50.py deleted file mode 100644 index bf56a605f1c3376057c1ab9283fa1251491b2750..0000000000000000000000000000000000000000 --- a/new_tutorials/train/classification/resnet50.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddle.fluid as fluid -from paddlex.cls import transforms -import paddlex as pdx - -# 下载和解压蔬菜分类数据集 -veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz' -pdx.utils.download_and_decompress(veg_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/cls_transforms.html#composedclstransforms -train_transforms = transforms.ComposedClsTransforms(mode='train', crop_size=[224, 224]) -eval_transforms = transforms.ComposedClsTransforms(mode='eval', crop_size=[224, 224]) - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/classification.html#imagenet -train_dataset = pdx.datasets.ImageNet( - data_dir='vegetables_cls', - file_list='vegetables_cls/train_list.txt', - label_list='vegetables_cls/labels.txt', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.ImageNet( - data_dir='vegetables_cls', - file_list='vegetables_cls/val_list.txt', - label_list='vegetables_cls/labels.txt', - transforms=eval_transforms) - -# PaddleX支持自定义构建优化器 -step_each_epoch = train_dataset.num_samples // 32 -learning_rate = fluid.layers.cosine_decay( - learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10) -optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, - momentum=0.9, - regularization=fluid.regularizer.L2Decay(4e-5)) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/classification.html#resnet50 -model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels)) -model.train( - num_epochs=10, - train_dataset=train_dataset, - train_batch_size=32, - eval_dataset=eval_dataset, - optimizer=optimizer, - save_dir='output/resnet50', - use_vdl=True) diff --git a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py b/new_tutorials/train/detection/faster_rcnn_r50_fpn.py deleted file mode 100644 index a64b711c3af48cb85cfd8a82938785ca386a99ec..0000000000000000000000000000000000000000 --- a/new_tutorials/train/detection/faster_rcnn_r50_fpn.py +++ /dev/null @@ -1,49 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -from paddlex.det import transforms -import paddlex as pdx - -# 下载和解压昆虫检测数据集 -insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz' -pdx.utils.download_and_decompress(insect_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms -train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) -eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection -train_dataset = pdx.datasets.VOCDetection( - data_dir='insect_det', - file_list='insect_det/train_list.txt', - label_list='insect_det/labels.txt', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.VOCDetection( - data_dir='insect_det', - file_list='insect_det/val_list.txt', - label_list='insect_det/labels.txt', - transforms=eval_transforms) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP -# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1 - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#fasterrcnn -num_classes = len(train_dataset.labels) + 1 -model = pdx.det.FasterRCNN(num_classes=num_classes) -model.train( - num_epochs=12, - train_dataset=train_dataset, - train_batch_size=2, - eval_dataset=eval_dataset, - learning_rate=0.0025, - lr_decay_epochs=[8, 11], - save_dir='output/faster_rcnn_r50_fpn', - use_vdl=True) diff --git a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py b/new_tutorials/train/detection/mask_rcnn_r50_fpn.py deleted file mode 100644 index f2ebf6e20f18054bf16452eb6e60b9ea24f20748..0000000000000000000000000000000000000000 --- a/new_tutorials/train/detection/mask_rcnn_r50_fpn.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -from paddlex.det import transforms -import paddlex as pdx - -# 下载和解压小度熊分拣数据集 -xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz' -pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedrcnntransforms -train_transforms = transforms.ComposedRCNNTransforms(mode='train', min_max_size=[800, 1333]) -eval_transforms = transforms.ComposedRCNNTransforms(mode='eval', min_max_size=[800, 1333]) - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#cocodetection -train_dataset = pdx.datasets.CocoDetection( - data_dir='xiaoduxiong_ins_det/JPEGImages', - ann_file='xiaoduxiong_ins_det/train.json', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.CocoDetection( - data_dir='xiaoduxiong_ins_det/JPEGImages', - ann_file='xiaoduxiong_ins_det/val.json', - transforms=eval_transforms) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP -# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1 - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/instance_segmentation.html#maskrcnn -num_classes = len(train_dataset.labels) + 1 -model = pdx.det.MaskRCNN(num_classes=num_classes) -model.train( - num_epochs=12, - train_dataset=train_dataset, - train_batch_size=1, - eval_dataset=eval_dataset, - learning_rate=0.00125, - warmup_steps=10, - lr_decay_epochs=[8, 11], - save_dir='output/mask_rcnn_r50_fpn', - use_vdl=True) diff --git a/new_tutorials/train/detection/yolov3_darknet53.py b/new_tutorials/train/detection/yolov3_darknet53.py deleted file mode 100644 index 8027a506458aac94de82a915aa8b058d71ba97f7..0000000000000000000000000000000000000000 --- a/new_tutorials/train/detection/yolov3_darknet53.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -from paddlex.det import transforms -import paddlex as pdx - -# 下载和解压昆虫检测数据集 -insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz' -pdx.utils.download_and_decompress(insect_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#composedyolotransforms -train_transforms = transforms.ComposedYOLOv3Transforms(mode='train', shape=[608, 608]) -eval_transforms = transforms.ComposedYOLOv3Transforms(mode='eva', shape=[608, 608]) - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/detection.html#vocdetection -train_dataset = pdx.datasets.VOCDetection( - data_dir='insect_det', - file_list='insect_det/train_list.txt', - label_list='insect_det/labels.txt', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.VOCDetection( - data_dir='insect_det', - file_list='insect_det/val_list.txt', - label_list='insect_det/labels.txt', - transforms=eval_transforms) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/detection.html#yolov3 -num_classes = len(train_dataset.labels) -model = pdx.det.YOLOv3(num_classes=num_classes, backbone='DarkNet53') -model.train( - num_epochs=270, - train_dataset=train_dataset, - train_batch_size=8, - eval_dataset=eval_dataset, - learning_rate=0.000125, - lr_decay_epochs=[210, 240], - save_dir='output/yolov3_darknet53', - use_vdl=True) diff --git a/new_tutorials/train/segmentation/deeplabv3p.py b/new_tutorials/train/segmentation/deeplabv3p.py deleted file mode 100644 index cb18fcfad65331d02b04abe3c3a76fa0356fb5b8..0000000000000000000000000000000000000000 --- a/new_tutorials/train/segmentation/deeplabv3p.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddlex as pdx -from paddlex.seg import transforms - -# 下载和解压视盘分割数据集 -optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' -pdx.utils.download_and_decompress(optic_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms -train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) -eval_transforms = transforms.ComposedSegTransforms(mode='eval') - -train_transforms.add_augmenters([ - transforms.RandomRotate() -]) - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset -train_dataset = pdx.datasets.SegDataset( - data_dir='optic_disc_seg', - file_list='optic_disc_seg/train_list.txt', - label_list='optic_disc_seg/labels.txt', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.SegDataset( - data_dir='optic_disc_seg', - file_list='optic_disc_seg/val_list.txt', - label_list='optic_disc_seg/labels.txt', - transforms=eval_transforms) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/deeplab/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#deeplabv3p -num_classes = len(train_dataset.labels) -model = pdx.seg.DeepLabv3p(num_classes=num_classes) -model.train( - num_epochs=40, - train_dataset=train_dataset, - train_batch_size=4, - eval_dataset=eval_dataset, - learning_rate=0.01, - save_dir='output/deeplab', - use_vdl=True) diff --git a/new_tutorials/train/segmentation/unet.py b/new_tutorials/train/segmentation/unet.py deleted file mode 100644 index ddf4f7991a690b0d0d506967df0c140f60945e85..0000000000000000000000000000000000000000 --- a/new_tutorials/train/segmentation/unet.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -# 选择使用0号卡 -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -import paddlex as pdx -from paddlex.seg import transforms - -# 下载和解压视盘分割数据集 -optic_dataset = 'https://bj.bcebos.com/paddlex/datasets/optic_disc_seg.tar.gz' -pdx.utils.download_and_decompress(optic_dataset, path='./') - -# 定义训练和验证时的transforms -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms -train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) -eval_transforms = transforms.ComposedSegTransforms(mode='eval') - -# 定义训练和验证所用的数据集 -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset -train_dataset = pdx.datasets.SegDataset( - data_dir='optic_disc_seg', - file_list='optic_disc_seg/train_list.txt', - label_list='optic_disc_seg/labels.txt', - transforms=train_transforms, - shuffle=True) -eval_dataset = pdx.datasets.SegDataset( - data_dir='optic_disc_seg', - file_list='optic_disc_seg/val_list.txt', - label_list='optic_disc_seg/labels.txt', - transforms=eval_transforms) - -# 初始化模型,并进行训练 -# 可使用VisualDL查看训练指标 -# VisualDL启动方式: visualdl --logdir output/unet/vdl_log --port 8001 -# 浏览器打开 https://0.0.0.0:8001即可 -# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP - -# API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#unet -num_classes = len(train_dataset.labels) -model = pdx.seg.UNet(num_classes=num_classes) -model.train( - num_epochs=20, - train_dataset=train_dataset, - train_batch_size=4, - eval_dataset=eval_dataset, - learning_rate=0.01, - save_dir='output/unet', - use_vdl=True) diff --git a/paddlex/__init__.py b/paddlex/__init__.py index b80363f2e6adfdbd6ce712cfec486540753abbb7..7743882a6334e257c1a7a4b94566aff3a8a55667 100644 --- a/paddlex/__init__.py +++ b/paddlex/__init__.py @@ -48,9 +48,10 @@ if hub.version.hub_version < '1.6.2': env_info = get_environ_info() load_model = cv.models.load_model datasets = cv.datasets +transforms = cv.transforms log_level = 2 from . import interpret -__version__ = '1.0.6' +__version__ = '1.0.7' diff --git a/paddlex/command.py b/paddlex/command.py index 8198291180b92a061dd633eae863f8ddb17727cb..612bc5f3f2b2c3bbec23f56c2983a722d76e21fc 100644 --- a/paddlex/command.py +++ b/paddlex/command.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ from six import text_type as _text_type import argparse import sys +import paddlex.utils.logging as logging def arg_parser(): @@ -94,15 +95,15 @@ def main(): if args.export_onnx: assert args.model_dir is not None, "--model_dir should be defined while exporting onnx model" assert args.save_dir is not None, "--save_dir should be defined to create onnx model" - assert args.fixed_input_shape is not None, "--fixed_input_shape should be defined [w,h] to create onnx model, such as [224,224]" - fixed_input_shape = [] - if args.fixed_input_shape is not None: - fixed_input_shape = eval(args.fixed_input_shape) - assert len( - fixed_input_shape - ) == 2, "len of fixed input shape must == 2, such as [224,224]" - model = pdx.load_model(args.model_dir, fixed_input_shape) + model = pdx.load_model(args.model_dir) + if model.status == "Normal" or model.status == "Prune": + logging.error( + "Only support inference model, try to export model first as below,", + exit=False) + logging.error( + "paddlex --export_inference --model_dir model_path --save_dir infer_model" + ) pdx.convertor.export_onnx_model(model, args.save_dir) diff --git a/paddlex/convertor.py b/paddlex/convertor.py index a6888ae1ef9bd764d213125142d355e7e2ca2428..47fc8a82be5ac337206eb0c9dc395aecb862299e 100644 --- a/paddlex/convertor.py +++ b/paddlex/convertor.py @@ -1,11 +1,11 @@ # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,119 +30,17 @@ def export_onnx(model_dir, save_dir, fixed_input_shape): def export_onnx_model(model, save_dir): - support_list = [ - 'ResNet18', 'ResNet34', 'ResNet50', 'ResNet101', 'ResNet50_vd', - 'ResNet101_vd', 'ResNet50_vd_ssld', 'ResNet101_vd_ssld', 'DarkNet53', - 'MobileNetV1', 'MobileNetV2', 'DenseNet121', 'DenseNet161', - 'DenseNet201' - ] - if model.__class__.__name__ not in support_list: - raise Exception("Model: {} unsupport export to ONNX".format( - model.__class__.__name__)) - try: - from fluid.utils import op_io_info, init_name_prefix - from onnx import helper, checker - import fluid_onnx.ops as ops - from fluid_onnx.variables import paddle_variable_to_onnx_tensor, paddle_onnx_weight - from debug.model_check import debug_model, Tracker - except Exception as e: + if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN": logging.error( - "Import Module Failed! Please install paddle2onnx. Related requirements see https://github.com/PaddlePaddle/paddle2onnx." + "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX" ) - raise e - place = fluid.CPUPlace() - exe = fluid.Executor(place) - inference_scope = fluid.global_scope() - with fluid.scope_guard(inference_scope): - test_input_names = [ - var.name for var in list(model.test_inputs.values()) - ] - inputs_outputs_list = ["fetch", "feed"] - weights, weights_value_info = [], [] - global_block = model.test_prog.global_block() - for var_name in global_block.vars: - var = global_block.var(var_name) - if var_name not in test_input_names\ - and var.persistable: - weight, val_info = paddle_onnx_weight( - var=var, scope=inference_scope) - weights.append(weight) - weights_value_info.append(val_info) - - # Create inputs - inputs = [ - paddle_variable_to_onnx_tensor(v, global_block) - for v in test_input_names - ] - logging.INFO("load the model parameter done.") - onnx_nodes = [] - op_check_list = [] - op_trackers = [] - nms_first_index = -1 - nms_outputs = [] - for block in model.test_prog.blocks: - for op in block.ops: - if op.type in ops.node_maker: - # TODO: deal with the corner case that vars in - # different blocks have the same name - node_proto = ops.node_maker[str(op.type)]( - operator=op, block=block) - op_outputs = [] - last_node = None - if isinstance(node_proto, tuple): - onnx_nodes.extend(list(node_proto)) - last_node = list(node_proto) - else: - onnx_nodes.append(node_proto) - last_node = [node_proto] - tracker = Tracker(str(op.type), last_node) - op_trackers.append(tracker) - op_check_list.append(str(op.type)) - if op.type == "multiclass_nms" and nms_first_index < 0: - nms_first_index = 0 - if nms_first_index >= 0: - _, _, output_op = op_io_info(op) - for output in output_op: - nms_outputs.extend(output_op[output]) - else: - if op.type not in ['feed', 'fetch']: - op_check_list.append(op.type) - logging.info('The operator sets to run test case.') - logging.info(set(op_check_list)) - - # Create outputs - # Get the new names for outputs if they've been renamed in nodes' making - renamed_outputs = op_io_info.get_all_renamed_outputs() - test_outputs = list(model.test_outputs.values()) - test_outputs_names = [var.name for var in model.test_outputs.values()] - test_outputs_names = [ - name if name not in renamed_outputs else renamed_outputs[name] - for name in test_outputs_names - ] - outputs = [ - paddle_variable_to_onnx_tensor(v, global_block) - for v in test_outputs_names - ] - - # Make graph - onnx_name = 'paddlex.onnx' - onnx_graph = helper.make_graph( - nodes=onnx_nodes, - name=onnx_name, - initializer=weights, - inputs=inputs + weights_value_info, - outputs=outputs) - - # Make model - onnx_model = helper.make_model( - onnx_graph, producer_name='PaddlePaddle') - - # Model check - checker.check_model(onnx_model) - if onnx_model is not None: - onnx_model_file = os.path.join(save_dir, onnx_name) - if not os.path.exists(save_dir): - os.mkdir(save_dir) - with open(onnx_model_file, 'wb') as f: - f.write(onnx_model.SerializeToString()) - logging.info("Saved converted model to path: %s" % onnx_model_file) + try: + import x2paddle + if x2paddle.__version__ < '0.7.4': + logging.error("You need to upgrade x2paddle >= 0.7.4") + except: + logging.error( + "You need to install x2paddle first, pip install x2paddle>=0.7.4") + from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper + mapper = PaddleOpMapper() + mapper.convert(model.test_prog, save_dir) diff --git a/paddlex/cv/datasets/coco.py b/paddlex/cv/datasets/coco.py index ff7c2b2d2438fb88d359c94f9ede65d900d9216e..264b2da1e6a6aa9e15bf8a2ae9b3fbdc3ee75f1b 100644 --- a/paddlex/cv/datasets/coco.py +++ b/paddlex/cv/datasets/coco.py @@ -100,7 +100,7 @@ class CocoDetection(VOCDetection): gt_score = np.ones((num_bbox, 1), dtype=np.float32) is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) difficult = np.zeros((num_bbox, 1), dtype=np.int32) - gt_poly = None + gt_poly = [None] * num_bbox for i, box in enumerate(bboxes): catid = box['category_id'] @@ -108,8 +108,6 @@ class CocoDetection(VOCDetection): gt_bbox[i, :] = box['clean_bbox'] is_crowd[i][0] = box['iscrowd'] if 'segmentation' in box: - if gt_poly is None: - gt_poly = [None] * num_bbox gt_poly[i] = box['segmentation'] im_info = { @@ -121,14 +119,12 @@ class CocoDetection(VOCDetection): 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_score': gt_score, + 'gt_poly': gt_poly, 'difficult': difficult } - if gt_poly is not None: - label_info['gt_poly'] = gt_poly coco_rec = (im_info, label_info) self.file_list.append([im_fname, coco_rec]) - if not len(self.file_list) > 0: raise Exception('not found any coco record in %s' % (ann_file)) logging.info("{} samples in file {}".format( diff --git a/paddlex/cv/datasets/easydata_cls.py b/paddlex/cv/datasets/easydata_cls.py index 121ae563308c695a0a76fcf383eb6e6bb7f43011..9b6dddc4843616ff0a09712e6766e3ea9552b466 100644 --- a/paddlex/cv/datasets/easydata_cls.py +++ b/paddlex/cv/datasets/easydata_cls.py @@ -39,14 +39,14 @@ class EasyDataCls(ImageNet): 线程和'process'进程两种方式。默认为'process'(Windows和Mac下会强制使用thread,该参数无效)。 shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。 """ - + def __init__(self, data_dir, file_list, label_list, transforms=None, num_workers='auto', - buffer_size=100, + buffer_size=8, parallel_method='process', shuffle=False): super(ImageNet, self).__init__( @@ -58,7 +58,7 @@ class EasyDataCls(ImageNet): self.file_list = list() self.labels = list() self._epoch = 0 - + with open(label_list, encoding=get_encoding(label_list)) as f: for line in f: item = line.strip() @@ -73,8 +73,8 @@ class EasyDataCls(ImageNet): if not osp.isfile(json_file): continue if not osp.exists(img_file): - raise IOError( - 'The image file {} is not exist!'.format(img_file)) + raise IOError('The image file {} is not exist!'.format( + img_file)) with open(json_file, mode='r', \ encoding=get_encoding(json_file)) as j: json_info = json.load(j) @@ -83,4 +83,3 @@ class EasyDataCls(ImageNet): self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format( len(self.file_list), file_list)) - \ No newline at end of file diff --git a/paddlex/cv/datasets/imagenet.py b/paddlex/cv/datasets/imagenet.py index 99723d3b8f4ec6f8c0b9297f9fe66c1fbc60693f..0986f823add893c6fb746168f3c2bcfa438f5e10 100644 --- a/paddlex/cv/datasets/imagenet.py +++ b/paddlex/cv/datasets/imagenet.py @@ -45,7 +45,7 @@ class ImageNet(Dataset): label_list, transforms=None, num_workers='auto', - buffer_size=100, + buffer_size=8, parallel_method='process', shuffle=False): super(ImageNet, self).__init__( @@ -70,8 +70,8 @@ class ImageNet(Dataset): continue full_path = osp.join(data_dir, items[0]) if not osp.exists(full_path): - raise IOError( - 'The image file {} is not exist!'.format(full_path)) + raise IOError('The image file {} is not exist!'.format( + full_path)) self.file_list.append([full_path, int(items[1])]) self.num_samples = len(self.file_list) logging.info("{} samples in file {}".format( diff --git a/paddlex/cv/datasets/seg_dataset.py b/paddlex/cv/datasets/seg_dataset.py index 61697e3d799ccb0ca765410a81e7257741acfb44..6e8bfae1ca623ed90a6d583042627cf4aecb2ea6 100644 --- a/paddlex/cv/datasets/seg_dataset.py +++ b/paddlex/cv/datasets/seg_dataset.py @@ -1,4 +1,4 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ class SegDataset(Dataset): Args: data_dir (str): 数据集所在的目录路径。 file_list (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对data_dir的相对路)。 - label_list (str): 描述数据集包含的类别信息文件路径。 + label_list (str): 描述数据集包含的类别信息文件路径。默认值为None。 transforms (list): 数据集中每个样本的预处理/增强算子。 num_workers (int): 数据集中样本在预处理过程中的线程或进程数。默认为4。 buffer_size (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。 @@ -40,7 +40,7 @@ class SegDataset(Dataset): def __init__(self, data_dir, file_list, - label_list, + label_list=None, transforms=None, num_workers='auto', buffer_size=100, @@ -56,10 +56,11 @@ class SegDataset(Dataset): self.labels = list() self._epoch = 0 - with open(label_list, encoding=get_encoding(label_list)) as f: - for line in f: - item = line.strip() - self.labels.append(item) + if label_list is not None: + with open(label_list, encoding=get_encoding(label_list)) as f: + for line in f: + item = line.strip() + self.labels.append(item) with open(file_list, encoding=get_encoding(file_list)) as f: for line in f: @@ -69,8 +70,8 @@ class SegDataset(Dataset): full_path_im = osp.join(data_dir, items[0]) full_path_label = osp.join(data_dir, items[1]) if not osp.exists(full_path_im): - raise IOError( - 'The image file {} is not exist!'.format(full_path_im)) + raise IOError('The image file {} is not exist!'.format( + full_path_im)) if not osp.exists(full_path_label): raise IOError('The image file {} is not exist!'.format( full_path_label)) diff --git a/paddlex/cv/datasets/voc.py b/paddlex/cv/datasets/voc.py index 45335084a9f8ddd9b850b52f3a6db9fe6886a153..b701c56847b6e0da9aace3784c4cb8e76dbbed77 100644 --- a/paddlex/cv/datasets/voc.py +++ b/paddlex/cv/datasets/voc.py @@ -14,8 +14,10 @@ from __future__ import absolute_import import copy +import os import os.path as osp import random +import re import numpy as np from collections import OrderedDict import xml.etree.ElementTree as ET @@ -103,23 +105,60 @@ class VOCDetection(Dataset): else: ct = int(tree.find('id').text) im_id = np.array([int(tree.find('id').text)]) - - objs = tree.findall('object') - im_w = float(tree.find('size').find('width').text) - im_h = float(tree.find('size').find('height').text) + pattern = re.compile('', re.IGNORECASE) + obj_tag = pattern.findall( + str(ET.tostringlist(tree.getroot())))[0][1:-1] + objs = tree.findall(obj_tag) + pattern = re.compile('', re.IGNORECASE) + size_tag = pattern.findall( + str(ET.tostringlist(tree.getroot())))[0][1:-1] + size_element = tree.find(size_tag) + pattern = re.compile('', re.IGNORECASE) + width_tag = pattern.findall( + str(ET.tostringlist(size_element)))[0][1:-1] + im_w = float(size_element.find(width_tag).text) + pattern = re.compile('', re.IGNORECASE) + height_tag = pattern.findall( + str(ET.tostringlist(size_element)))[0][1:-1] + im_h = float(size_element.find(height_tag).text) gt_bbox = np.zeros((len(objs), 4), dtype=np.float32) gt_class = np.zeros((len(objs), 1), dtype=np.int32) gt_score = np.ones((len(objs), 1), dtype=np.float32) is_crowd = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32) for i, obj in enumerate(objs): - cname = obj.find('name').text.strip() + pattern = re.compile('', re.IGNORECASE) + name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][ + 1:-1] + cname = obj.find(name_tag).text.strip() gt_class[i][0] = cname2cid[cname] - _difficult = int(obj.find('difficult').text) - x1 = float(obj.find('bndbox').find('xmin').text) - y1 = float(obj.find('bndbox').find('ymin').text) - x2 = float(obj.find('bndbox').find('xmax').text) - y2 = float(obj.find('bndbox').find('ymax').text) + pattern = re.compile('', re.IGNORECASE) + diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][ + 1:-1] + try: + _difficult = int(obj.find(diff_tag).text) + except Exception: + _difficult = 0 + pattern = re.compile('', re.IGNORECASE) + box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1: + -1] + box_element = obj.find(box_tag) + pattern = re.compile('', re.IGNORECASE) + xmin_tag = pattern.findall( + str(ET.tostringlist(box_element)))[0][1:-1] + x1 = float(box_element.find(xmin_tag).text) + pattern = re.compile('', re.IGNORECASE) + ymin_tag = pattern.findall( + str(ET.tostringlist(box_element)))[0][1:-1] + y1 = float(box_element.find(ymin_tag).text) + pattern = re.compile('', re.IGNORECASE) + xmax_tag = pattern.findall( + str(ET.tostringlist(box_element)))[0][1:-1] + x2 = float(box_element.find(xmax_tag).text) + pattern = re.compile('', re.IGNORECASE) + ymax_tag = pattern.findall( + str(ET.tostringlist(box_element)))[0][1:-1] + y2 = float(box_element.find(ymax_tag).text) x1 = max(0, x1) y1 = max(0, y1) if im_w > 0.5 and im_h > 0.5: @@ -148,6 +187,7 @@ class VOCDetection(Dataset): 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_score': gt_score, + 'gt_poly': [], 'difficult': difficult } voc_rec = (im_info, label_info) @@ -170,6 +210,44 @@ class VOCDetection(Dataset): self.coco_gt.dataset = annotations self.coco_gt.createIndex() + def add_negative_samples(self, image_dir): + import cv2 + if not osp.exists(image_dir): + raise Exception("{} background images directory does not exist.". + format(image_dir)) + image_list = os.listdir(image_dir) + max_img_id = max(self.coco_gt.getImgIds()) + for image in image_list: + if not is_pic(image): + continue + # False ground truth + gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32) + gt_class = np.array([[0]], dtype=np.int32) + gt_score = np.ones((1, 1), dtype=np.float32) + is_crowd = np.array([[0]], dtype=np.int32) + difficult = np.zeros((1, 1), dtype=np.int32) + gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]] + + max_img_id += 1 + im_fname = osp.join(image_dir, image) + img_data = cv2.imread(im_fname) + im_h, im_w, im_c = img_data.shape + im_info = { + 'im_id': np.array([max_img_id]).astype('int32'), + 'image_shape': np.array([im_h, im_w]).astype('int32'), + } + label_info = { + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_score': gt_score, + 'difficult': difficult, + 'gt_poly': gt_poly + } + coco_rec = (im_info, label_info) + self.file_list.append([im_fname, coco_rec]) + self.num_samples = len(self.file_list) + def iterator(self): self._epoch += 1 self._pos = 0 diff --git a/paddlex/cv/models/__init__.py b/paddlex/cv/models/__init__.py index 622878933c12f1934960eb42aed1f992e7164708..1c7e4b35bc7387c3f5c536e74edc0feafa1811d9 100644 --- a/paddlex/cv/models/__init__.py +++ b/paddlex/cv/models/__init__.py @@ -43,5 +43,6 @@ from .mask_rcnn import MaskRCNN from .unet import UNet from .deeplabv3p import DeepLabv3p from .hrnet import HRNet +from .fast_scnn import FastSCNN from .load_model import load_model from .slim import prune diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py index 14db42b8aed39674f2911f3fe5ee472435b8da34..e30a2529c5a7ff9cbcafb4a05d58f53ea5476e7e 100644 --- a/paddlex/cv/models/base.py +++ b/paddlex/cv/models/base.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddle.fluid as fluid @@ -194,14 +194,37 @@ class BaseAPI: if os.path.exists(pretrain_dir): os.remove(pretrain_dir) os.makedirs(pretrain_dir) + if pretrain_weights is not None and not os.path.exists( + pretrain_weights): + if self.model_type == 'classifier': + if pretrain_weights not in ['IMAGENET']: + logging.warning( + "Pretrain_weights for classifier should be defined as directory path or parameter file or 'IMAGENET' or None, but it is {}, so we force to set it as 'IMAGENET'". + format(pretrain_weights)) + pretrain_weights = 'IMAGENET' + elif self.model_type == 'detector': + if pretrain_weights not in ['IMAGENET', 'COCO']: + logging.warning( + "Pretrain_weights for detector should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or None, but it is {}, so we force to set it as 'IMAGENET'". + format(pretrain_weights)) + pretrain_weights = 'IMAGENET' + elif self.model_type == 'segmenter': + if pretrain_weights not in [ + 'IMAGENET', 'COCO', 'CITYSCAPES' + ]: + logging.warning( + "Pretrain_weights for segmenter should be defined as directory path or parameter file or 'IMAGENET' or 'COCO' or 'CITYSCAPES', but it is {}, so we force to set it as 'IMAGENET'". + format(pretrain_weights)) + pretrain_weights = 'IMAGENET' if hasattr(self, 'backbone'): backbone = self.backbone else: backbone = self.__class__.__name__ if backbone == "HRNet": backbone = backbone + "_W{}".format(self.width) + class_name = self.__class__.__name__ pretrain_weights = get_pretrain_weights( - pretrain_weights, self.model_type, backbone, pretrain_dir) + pretrain_weights, class_name, backbone, pretrain_dir) if startup_prog is None: startup_prog = fluid.default_startup_program() self.exe.run(startup_prog) @@ -221,8 +244,8 @@ class BaseAPI: logging.info( "Load pretrain weights from {}.".format(pretrain_weights), use_color=True) - paddlex.utils.utils.load_pretrain_weights(self.exe, self.train_prog, - pretrain_weights, fuse_bn) + paddlex.utils.utils.load_pretrain_weights( + self.exe, self.train_prog, pretrain_weights, fuse_bn) # 进行裁剪 if sensitivities_file is not None: import paddleslim @@ -326,7 +349,9 @@ class BaseAPI: logging.info("Model saved in {}.".format(save_dir)) def export_inference_model(self, save_dir): - test_input_names = [var.name for var in list(self.test_inputs.values())] + test_input_names = [ + var.name for var in list(self.test_inputs.values()) + ] test_outputs = list(self.test_outputs.values()) if self.__class__.__name__ == 'MaskRCNN': from paddlex.utils.save import save_mask_inference_model @@ -363,7 +388,8 @@ class BaseAPI: # 模型保存成功的标志 open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model for inference deploy saved in {}.".format(save_dir)) + logging.info("Model for inference deploy saved in {}.".format( + save_dir)) def train_loop(self, num_epochs, @@ -487,11 +513,13 @@ class BaseAPI: eta = ((num_epochs - i) * total_num_steps - step - 1 ) * avg_step_time if time_eval_one_epoch is not None: - eval_eta = (total_eval_times - i // save_interval_epochs - ) * time_eval_one_epoch + eval_eta = ( + total_eval_times - i // save_interval_epochs + ) * time_eval_one_epoch else: - eval_eta = (total_eval_times - i // save_interval_epochs - ) * total_num_steps_eval * avg_step_time + eval_eta = ( + total_eval_times - i // save_interval_epochs + ) * total_num_steps_eval * avg_step_time eta_str = seconds_to_hms(eta + eval_eta) logging.info( @@ -508,6 +536,7 @@ class BaseAPI: epoch_start_time = time.time() # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存 + self.completed_epochs += 1 eval_epoch_start_time = time.time() if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) @@ -521,7 +550,6 @@ class BaseAPI: return_details=True) logging.info('[EVAL] Finished, Epoch={}, {} .'.format( i + 1, dict2str(self.eval_metrics))) - self.completed_epochs += 1 # 保存最优模型 best_accuracy_key = list(self.eval_metrics.keys())[0] current_accuracy = self.eval_metrics[best_accuracy_key] diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py index 48a0d17604e7af59377af49967fb8c527f094b09..17a307d8bdeed77467535bec1216cc9b97bd70e3 100644 --- a/paddlex/cv/models/classifier.py +++ b/paddlex/cv/models/classifier.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import numpy as np diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index c5f8dc16e76a60bfead93daa1e083f410a02c7d6..e548439a7ed81fd5758395244d26926d3c8010fe 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import os.path as osp @@ -242,14 +242,16 @@ class DeepLabv3p(BaseAPI): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 save_dir (str): 模型保存路径。默认'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为字符串'CITYSCAPES', + 则自动下载在CITYSCAPES数据集上预训练的模型权重;若为None,则不使用预训练模型。默认'IMAGENET。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认的优化器:使用 fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 learning_rate (float): 默认优化器的初始学习率。默认0.01。 lr_decay_power (float): 默认优化器学习率衰减指数。默认0.9。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', - 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 diff --git a/paddlex/cv/models/fast_scnn.py b/paddlex/cv/models/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..5f66e4df6ede1b48c0363b5b8a496b23021454ef --- /dev/null +++ b/paddlex/cv/models/fast_scnn.py @@ -0,0 +1,169 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +import paddle.fluid as fluid +import paddlex +from collections import OrderedDict +from .deeplabv3p import DeepLabv3p + + +class FastSCNN(DeepLabv3p): + """实现Fast SCNN网络的构建并进行训练、评估、预测和模型导出。 + + Args: + num_classes (int): 类别数。 + use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 + use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 + 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 + class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 + num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 + 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, + 即平时使用的交叉熵损失函数。 + ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。 + multi_loss_weight (list): 多分支上的loss权重。默认计算一个分支上的loss,即默认值为[1.0]。 + 也支持计算两个分支或三个分支上的loss,权重按[fusion_branch_weight, higher_branch_weight, lower_branch_weight]排列, + fusion_branch_weight为空间细节分支和全局上下文分支融合后的分支上的loss权重,higher_branch_weight为空间细节分支上的loss权重, + lower_branch_weight为全局上下文分支上的loss权重,若higher_branch_weight和lower_branch_weight未设置则不会计算这两个分支上的loss。 + + Raises: + ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 + ValueError: class_weight为list, 但长度不等于num_class。 + class_weight为str, 但class_weight.low()不等于dynamic。 + TypeError: class_weight不为None时,其类型不是list或str。 + TypeError: multi_loss_weight不为list。 + ValueError: multi_loss_weight为list但长度小于0或者大于3。 + """ + + def __init__(self, + num_classes=2, + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + ignore_index=255, + multi_loss_weight=[1.0]): + self.init_params = locals() + super(DeepLabv3p, self).__init__('segmenter') + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + if not isinstance(multi_loss_weight, list): + raise TypeError( + 'Expect multi_loss_weight is a list but receive {}'.format( + type(multi_loss_weight))) + if len(multi_loss_weight) > 3 or len(multi_loss_weight) < 0: + raise ValueError( + "Length of multi_loss_weight should be lower than or equal to 3 but greater than 0." + ) + + self.num_classes = num_classes + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.multi_loss_weight = multi_loss_weight + self.ignore_index = ignore_index + self.labels = None + self.fixed_input_shape = None + + def build_net(self, mode='train'): + model = paddlex.cv.nets.segmentation.FastSCNN( + self.num_classes, + mode=mode, + use_bce_loss=self.use_bce_loss, + use_dice_loss=self.use_dice_loss, + class_weight=self.class_weight, + ignore_index=self.ignore_index, + multi_loss_weight=self.multi_loss_weight, + fixed_input_shape=self.fixed_input_shape) + inputs = model.generate_inputs() + model_out = model.build_net(inputs) + outputs = OrderedDict() + if mode == 'train': + self.optimizer.minimize(model_out) + outputs['loss'] = model_out + else: + outputs['pred'] = model_out[0] + outputs['logit'] = model_out[1] + return inputs, outputs + + def train(self, + num_epochs, + train_dataset, + train_batch_size=2, + eval_dataset=None, + save_interval_epochs=1, + log_interval_steps=2, + save_dir='output', + pretrain_weights='CITYSCAPES', + optimizer=None, + learning_rate=0.01, + lr_decay_power=0.9, + use_vdl=False, + sensitivities_file=None, + eval_metric_loss=0.05, + early_stop=False, + early_stop_patience=5, + resume_checkpoint=None): + """训练。 + + Args: + num_epochs (int): 训练迭代轮数。 + train_dataset (paddlex.datasets): 训练数据读取器。 + train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 + eval_dataset (paddlex.datasets): 评估数据读取器。 + save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。 + log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 + save_dir (str): 模型保存路径。默认'output'。 + pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'CITYSCAPES' + 则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'CITYSCAPES'。 + optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 + fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 + learning_rate (float): 默认优化器的初始学习率。默认0.01。 + lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 + use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 + sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 + early_stop (bool): 是否使用提前终止训练策略。默认值为False。 + early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 + 连续下降或持平,则终止训练。默认值为5。 + resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。 + + Raises: + ValueError: 模型从inference model进行加载。 + """ + return super(FastSCNN, self).train( + num_epochs, train_dataset, train_batch_size, eval_dataset, + save_interval_epochs, log_interval_steps, save_dir, + pretrain_weights, optimizer, learning_rate, lr_decay_power, + use_vdl, sensitivities_file, eval_metric_loss, early_stop, + early_stop_patience, resume_checkpoint) diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index c1a5c4c81cf80911afb42401406d357fe8f46e09..45279bfc6014329ced089d39072221ceaf8dd683 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -196,7 +196,8 @@ class FasterRCNN(BaseAPI): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。 save_dir (str): 模型保存路径。默认值为'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的初始学习率。默认为0.0025。 diff --git a/paddlex/cv/models/hrnet.py b/paddlex/cv/models/hrnet.py index 304cf0b1568d473df615e12ffbaa96e9d681af20..d3af363ceac925d40552da22360759553c0090f7 100644 --- a/paddlex/cv/models/hrnet.py +++ b/paddlex/cv/models/hrnet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddle.fluid as fluid @@ -24,11 +24,12 @@ class HRNet(DeepLabv3p): Args: num_classes (int): 类别数。 - width (int): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64]。 + width (int|str): 高分辨率分支中特征层的通道数量。默认值为18。可选择取值为[18, 30, 32, 40, 44, 48, 60, 64, '18_small_v1']。 + '18_small_v1'是18的轻量级版本。 use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 - class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 + class_weight (list|str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, 即平时使用的交叉熵损失函数。 @@ -95,11 +96,6 @@ class HRNet(DeepLabv3p): if mode == 'train': self.optimizer.minimize(model_out) outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] else: outputs['pred'] = model_out[0] outputs['logit'] = model_out[1] @@ -152,14 +148,15 @@ class HRNet(DeepLabv3p): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 save_dir (str): 模型保存路径。默认'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在IMAGENET图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + 则自动下载在IMAGENET图片数据上预训练的模型权重;若为字符串'CITYSCAPES' + 则自动下载在CITYSCAPES图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 learning_rate (float): 默认优化器的初始学习率。默认0.01。 lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', - 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 @@ -172,6 +169,6 @@ class HRNet(DeepLabv3p): return super(HRNet, self).train( num_epochs, train_dataset, train_batch_size, eval_dataset, save_interval_epochs, log_interval_steps, save_dir, - pretrain_weights, optimizer, learning_rate, lr_decay_power, use_vdl, - sensitivities_file, eval_metric_loss, early_stop, + pretrain_weights, optimizer, learning_rate, lr_decay_power, + use_vdl, sensitivities_file, eval_metric_loss, early_stop, early_stop_patience, resume_checkpoint) diff --git a/paddlex/cv/models/load_model.py b/paddlex/cv/models/load_model.py index 87b30ac47c206f0b3723ffcf353d95078feeb892..2d24abf4c75f1ff4b503138b3e18341da0b665c5 100644 --- a/paddlex/cv/models/load_model.py +++ b/paddlex/cv/models/load_model.py @@ -108,6 +108,7 @@ def load_model(model_dir, fixed_input_shape=None): logging.info("Model[{}] loaded.".format(info['Model'])) model.trainable = False + model.status = status return model diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index 9f6ec4867b072a33a8e2d83542fcf760280e4187..26d5e5cb4edc58be0fffaf6d778058c5846c1929 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -155,7 +155,8 @@ class MaskRCNN(FasterRCNN): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为20。 save_dir (str): 模型保存路径。默认值为'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为None。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的学习率。默认为1.0/800。 diff --git a/paddlex/cv/models/slim/prune.py b/paddlex/cv/models/slim/prune.py index ad4dec23b8e3b29eda30fa873f4baa625a004884..f1e5f98a23c0d352bbf00dbb6b9b8fb60655fed3 100644 --- a/paddlex/cv/models/slim/prune.py +++ b/paddlex/cv/models/slim/prune.py @@ -158,6 +158,7 @@ def prune_program(model, prune_params_ratios=None): prune_params_ratios (dict): 由裁剪参数名和裁剪率组成的字典,当为None时 使用默认裁剪参数名和裁剪率。默认为None。 """ + assert model.status == 'Normal', 'Only the models saved while training are supported!' place = model.places[0] train_prog = model.train_prog eval_prog = model.test_prog @@ -235,6 +236,7 @@ def cal_params_sensitivities(model, save_file, eval_dataset, batch_size=8): 其中``weight_0``是卷积Kernel名;``sensitivities['weight_0']``是一个字典,key是裁剪率,value是敏感度。 """ + assert model.status == 'Normal', 'Only the models saved while training are supported!' if os.path.exists(save_file): os.remove(save_file) diff --git a/paddlex/cv/models/slim/prune_config.py b/paddlex/cv/models/slim/prune_config.py index 49430e9bfb1dcc47fb93aa9fc7d05ceb21e2b9e8..4ca4215cd31dcf47bed7d3ae25c9ccae3c9a3dc8 100644 --- a/paddlex/cv/models/slim/prune_config.py +++ b/paddlex/cv/models/slim/prune_config.py @@ -19,6 +19,8 @@ import paddle.fluid as fluid import paddlex sensitivities_data = { + 'AlexNet': + 'https://bj.bcebos.com/paddlex/slim_prune/alexnet_sensitivities.data', 'ResNet18': 'https://bj.bcebos.com/paddlex/slim_prune/resnet18.sensitivities', 'ResNet34': @@ -41,6 +43,10 @@ sensitivities_data = { 'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_large.sensitivities', 'MobileNetV3_small': 'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_small.sensitivities', + 'MobileNetV3_large_ssld': + 'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_large_ssld_sensitivities.data', + 'MobileNetV3_small_ssld': + 'https://bj.bcebos.com/paddlex/slim_prune/mobilenetv3_small_ssld_sensitivities.data', 'DenseNet121': 'https://bj.bcebos.com/paddlex/slim_prune/densenet121.sensitivities', 'DenseNet161': @@ -51,6 +57,8 @@ sensitivities_data = { 'https://bj.bcebos.com/paddlex/slim_prune/xception41.sensitivities', 'Xception65': 'https://bj.bcebos.com/paddlex/slim_prune/xception65.sensitivities', + 'ShuffleNetV2': + 'https://bj.bcebos.com/paddlex/slim_prune/shufflenetv2_sensitivities.data', 'YOLOv3_MobileNetV1': 'https://bj.bcebos.com/paddlex/slim_prune/yolov3_mobilenetv1.sensitivities', 'YOLOv3_MobileNetV3_large': @@ -143,7 +151,8 @@ def get_prune_params(model): if model_type.startswith('ResNet') or \ model_type.startswith('DenseNet') or \ model_type.startswith('DarkNet') or \ - model_type.startswith('AlexNet'): + model_type.startswith('AlexNet') or \ + model_type.startswith('ShuffleNetV2'): for block in program.blocks: for param in block.all_parameters(): pd_var = fluid.global_scope().find_var(param.name) @@ -152,6 +161,28 @@ def get_prune_params(model): prune_names.append(param.name) if model_type == 'AlexNet': prune_names.remove('conv5_weights') + if model_type == 'ShuffleNetV2': + not_prune_names = ['stage_2_1_conv5_weights', + 'stage_2_1_conv3_weights', + 'stage_2_2_conv3_weights', + 'stage_2_3_conv3_weights', + 'stage_2_4_conv3_weights', + 'stage_3_1_conv5_weights', + 'stage_3_1_conv3_weights', + 'stage_3_2_conv3_weights', + 'stage_3_3_conv3_weights', + 'stage_3_4_conv3_weights', + 'stage_3_5_conv3_weights', + 'stage_3_6_conv3_weights', + 'stage_3_7_conv3_weights', + 'stage_3_8_conv3_weights', + 'stage_4_1_conv5_weights', + 'stage_4_1_conv3_weights', + 'stage_4_2_conv3_weights', + 'stage_4_3_conv3_weights', + 'stage_4_4_conv3_weights',] + for name in not_prune_names: + prune_names.remove(name) elif model_type == "MobileNetV1": prune_names.append("conv1_weights") for param in program.global_block().all_parameters(): diff --git a/paddlex/cv/models/unet.py b/paddlex/cv/models/unet.py index d7bf80ed27898f65c059ad3febde4885b4e58a9f..34c597b0e190122c3ba80c485378273abff20b65 100644 --- a/paddlex/cv/models/unet.py +++ b/paddlex/cv/models/unet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import paddlex @@ -95,11 +95,6 @@ class UNet(DeepLabv3p): if mode == 'train': self.optimizer.minimize(model_out) outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] else: outputs['pred'] = model_out[0] outputs['logit'] = model_out[1] @@ -141,7 +136,7 @@ class UNet(DeepLabv3p): lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT', - 则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 + 则自动下载在Cityscapes图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。 eval_metric_loss (float): 可容忍的精度损失。默认为0.05。 early_stop (bool): 是否使用提前终止训练策略。默认值为False。 early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内 diff --git a/paddlex/cv/models/utils/detection_eval.py b/paddlex/cv/models/utils/detection_eval.py index b9dcdaa029265483c2b9fb919426686c36a411f5..d2c0ae8abf867baddfc767bd6e1a73cf5d36ea3d 100644 --- a/paddlex/cv/models/utils/detection_eval.py +++ b/paddlex/cv/models/utils/detection_eval.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import diff --git a/paddlex/cv/models/utils/pretrain_weights.py b/paddlex/cv/models/utils/pretrain_weights.py index a7bd78c4bb3e6d1c9d7ffe714aac721873e1ab38..97018acb827c41381f2e3e29df87ee0620ee2f40 100644 --- a/paddlex/cv/models/utils/pretrain_weights.py +++ b/paddlex/cv/models/utils/pretrain_weights.py @@ -1,4 +1,5 @@ import paddlex +import paddlex.utils.logging as logging import paddlehub as hub import os import os.path as osp @@ -75,16 +76,101 @@ image_pretrain = { } coco_pretrain = { - 'UNet': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz' + 'YOLOv3_DarkNet53_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar', + 'YOLOv3_MobileNetV1_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar', + 'YOLOv3_MobileNetV3_large_COCO': + 'https://bj.bcebos.com/paddlex/models/yolov3_mobilenet_v3.tar', + 'YOLOv3_ResNet34_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r34.tar', + 'YOLOv3_ResNet50_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn.tar', + 'FasterRCNN_ResNet50_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_fpn_2x.tar', + 'FasterRCNN_ResNet50_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vd_fpn_2x.tar', + 'FasterRCNN_ResNet101_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_fpn_2x.tar', + 'FasterRCNN_ResNet101_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r101_vd_fpn_2x.tar', + 'FasterRCNN_HRNet_W18_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_hrnetv2p_w18_2x.tar', + 'MaskRCNN_ResNet50_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_2x.tar', + 'MaskRCNN_ResNet50_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_vd_fpn_2x.tar', + 'MaskRCNN_ResNet101_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_fpn_1x.tar', + 'MaskRCNN_ResNet101_vd_COCO': + 'https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r101_vd_fpn_1x.tar', + 'UNet_COCO': 'https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz', + 'DeepLabv3p_MobileNetV2_x1.0_COCO': + 'https://bj.bcebos.com/v1/paddleseg/deeplab_mobilenet_x1_0_coco.tgz', + 'DeepLabv3p_Xception65_COCO': + 'https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz' +} + +cityscapes_pretrain = { + 'DeepLabv3p_MobileNetV2_x1.0_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz', + 'DeepLabv3p_Xception65_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz', + 'HRNet_W18_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz', + 'FastSCNN_CITYSCAPES': + 'https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar' } -def get_pretrain_weights(flag, model_type, backbone, save_dir): +def get_pretrain_weights(flag, class_name, backbone, save_dir): if flag is None: return None elif osp.isdir(flag): return flag + elif osp.isfile(flag): + return flag + warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}" + if flag == 'COCO': + if class_name == "FasterRCNN" and backbone in ['ResNet18'] or \ + class_name == "MaskRCNN" and backbone in ['ResNet18', 'HRNet_W18'] or \ + class_name == 'DeepLabv3p' and backbone in ['Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0']: + model_name = '{}_{}'.format(class_name, backbone) + logging.warning(warning_info.format(model_name, flag, 'IMAGENET')) + flag = 'IMAGENET' + elif class_name == 'HRNet': + logging.warning(warning_info.format(class_name, flag, 'IMAGENET')) + flag = 'IMAGENET' + elif class_name == 'FastSCNN': + logging.warning( + warning_info.format(class_name, flag, 'CITYSCAPES')) + flag = 'CITYSCAPES' + elif flag == 'CITYSCAPES': + model_name = '{}_{}'.format(class_name, backbone) + if class_name == 'UNet': + logging.warning(warning_info.format(class_name, flag, 'COCO')) + flag = 'COCO' + if class_name == 'HRNet' and backbone.split('_')[ + -1] in ['W30', 'W32', 'W40', 'W48', 'W60', 'W64']: + logging.warning(warning_info.format(backbone, flag, 'IMAGENET')) + flag = 'IMAGENET' + if class_name == 'DeepLabv3p' and backbone in [ + 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', + 'MobileNetV2_x1.5', 'MobileNetV2_x2.0' + ]: + model_name = '{}_{}'.format(class_name, backbone) + logging.warning(warning_info.format(model_name, flag, 'IMAGENET')) + flag = 'IMAGENET' elif flag == 'IMAGENET': + if class_name == 'UNet': + logging.warning(warning_info.format(class_name, flag, 'COCO')) + flag = 'COCO' + elif class_name == 'FastSCNN': + logging.warning( + warning_info.format(class_name, flag, 'CITYSCAPES')) + flag = 'CITYSCAPES' + + if flag == 'IMAGENET': new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir @@ -96,7 +182,7 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): backbone = 'MobileNetV3_small_x1_0_ssld' elif backbone == 'MobileNetV3_large_ssld': backbone = 'MobileNetV3_large_x1_0_ssld' - if model_type == 'detector': + if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN']: if backbone == 'ResNet50': backbone = 'DetResNet50' assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( @@ -121,17 +207,20 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): raise Exception( "Unexpected error, please make sure paddlehub >= 1.6.2") return osp.join(new_save_dir, backbone) - elif flag == 'COCO': + elif flag in ['COCO', 'CITYSCAPES']: new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir - url = coco_pretrain[backbone] + if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN', 'DeepLabv3p']: + backbone = '{}_{}'.format(class_name, backbone) + backbone = "{}_{}".format(backbone, flag) + if flag == 'COCO': + url = coco_pretrain[backbone] + elif flag == 'CITYSCAPES': + url = cityscapes_pretrain[backbone] fname = osp.split(url)[-1].split('.')[0] # paddlex.utils.download_and_decompress(url, path=new_save_dir) # return osp.join(new_save_dir, fname) - - assert backbone in coco_pretrain, "There is not COCO pretrain weights for {}, you may try ImageNet.".format( - backbone) try: hub.download(backbone, save_path=new_save_dir) except Exception as e: @@ -148,5 +237,5 @@ def get_pretrain_weights(flag, model_type, backbone, save_dir): return osp.join(new_save_dir, backbone) else: raise Exception( - "pretrain_weights need to be defined as directory path or `IMAGENET` or 'COCO' (download pretrain weights automatically)." + "pretrain_weights need to be defined as directory path or 'IMAGENET' or 'COCO' or 'Cityscapes' (download pretrain weights automatically)." ) diff --git a/paddlex/cv/models/utils/visualize.py b/paddlex/cv/models/utils/visualize.py index 6982bdf62993598ab8f0d42e09af2e303a7957bb..89875114f13e2b275019e3a65fc19576013dd68a 100644 --- a/paddlex/cv/models/utils/visualize.py +++ b/paddlex/cv/models/utils/visualize.py @@ -1,17 +1,18 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -* import os import cv2 import colorsys diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py index 4635c7aac0f110d1c10d8c219dbe6a23cc9f1617..85ee89fc86851ff9be104d0ee258eefce9843a69 100644 --- a/paddlex/cv/models/yolo_v3.py +++ b/paddlex/cv/models/yolo_v3.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import import math @@ -188,7 +188,8 @@ class YOLOv3(BaseAPI): log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为10。 save_dir (str): 模型保存路径。默认值为'output'。 pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为字符串'IMAGENET', - 则自动下载在ImageNet图片数据上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 + 则自动下载在ImageNet图片数据上预训练的模型权重;若为字符串'COCO', + 则自动下载在COCO数据集上预训练的模型权重;若为None,则不使用预训练模型。默认为'IMAGENET'。 optimizer (paddle.fluid.optimizer): 优化器。当该参数为None时,使用默认优化器: fluid.layers.piecewise_decay衰减策略,fluid.optimizer.Momentum优化方法。 learning_rate (float): 默认优化器的学习率。默认为1.0/8000。 diff --git a/paddlex/cv/nets/__init__.py b/paddlex/cv/nets/__init__.py index 6e5102a26c9a573db25ad63984dad41c633c987d..5b427fe31be957f92611f7cfc6a9e6102a3c9616 100644 --- a/paddlex/cv/nets/__init__.py +++ b/paddlex/cv/nets/__init__.py @@ -20,6 +20,7 @@ from .mobilenet_v2 import MobileNetV2 from .mobilenet_v3 import MobileNetV3 from .segmentation import UNet from .segmentation import DeepLabv3p +from .segmentation import FastSCNN from .xception import Xception from .densenet import DenseNet from .shufflenet_v2 import ShuffleNetV2 diff --git a/paddlex/cv/nets/densenet.py b/paddlex/cv/nets/densenet.py index a7238b2cd8775f20210d04d41f6caa1343c68092..76997c48de412e52cf914c32057f8a1bd0c06f9d 100644 --- a/paddlex/cv/nets/densenet.py +++ b/paddlex/cv/nets/densenet.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/paddlex/cv/nets/hrnet.py b/paddlex/cv/nets/hrnet.py index a7934d385d4a53fd936410e37d3896fe21cb17ee..561c7594da2904632386c0d88e9d841c047fb2d2 100644 --- a/paddlex/cv/nets/hrnet.py +++ b/paddlex/cv/nets/hrnet.py @@ -51,15 +51,38 @@ class HRNet(object): self.width = width self.has_se = has_se + self.num_modules = { + '18_small_v1': [1, 1, 1, 1], + '18': [1, 1, 4, 3], + '30': [1, 1, 4, 3], + '32': [1, 1, 4, 3], + '40': [1, 1, 4, 3], + '44': [1, 1, 4, 3], + '48': [1, 1, 4, 3], + '60': [1, 1, 4, 3], + '64': [1, 1, 4, 3] + } + self.num_blocks = { + '18_small_v1': [[1], [2, 2], [2, 2, 2], [2, 2, 2, 2]], + '18': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '30': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '32': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '40': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '44': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '48': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '60': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]], + '64': [[4], [4, 4], [4, 4, 4], [4, 4, 4, 4]] + } self.channels = { - 18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]], - 30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]], - 32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]], - 40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]], - 44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]], - 48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]], - 60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]], - 64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]], + '18_small_v1': [[32], [16, 32], [16, 32, 64], [16, 32, 64, 128]], + '18': [[64], [18, 36], [18, 36, 72], [18, 36, 72, 144]], + '30': [[64], [30, 60], [30, 60, 120], [30, 60, 120, 240]], + '32': [[64], [32, 64], [32, 64, 128], [32, 64, 128, 256]], + '40': [[64], [40, 80], [40, 80, 160], [40, 80, 160, 320]], + '44': [[64], [44, 88], [44, 88, 176], [44, 88, 176, 352]], + '48': [[64], [48, 96], [48, 96, 192], [48, 96, 192, 384]], + '60': [[64], [60, 120], [60, 120, 240], [60, 120, 240, 480]], + '64': [[64], [64, 128], [64, 128, 256], [64, 128, 256, 512]], } self.freeze_at = freeze_at @@ -73,31 +96,38 @@ class HRNet(object): def net(self, input): width = self.width - channels_2, channels_3, channels_4 = self.channels[width] - num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3 + channels_1, channels_2, channels_3, channels_4 = self.channels[str( + width)] + num_modules_1, num_modules_2, num_modules_3, num_modules_4 = self.num_modules[ + str(width)] + num_blocks_1, num_blocks_2, num_blocks_3, num_blocks_4 = self.num_blocks[ + str(width)] x = self.conv_bn_layer( input=input, filter_size=3, - num_filters=64, + num_filters=channels_1[0], stride=2, if_act=True, name='layer1_1') x = self.conv_bn_layer( input=x, filter_size=3, - num_filters=64, + num_filters=channels_1[0], stride=2, if_act=True, name='layer1_2') - la1 = self.layer1(x, name='layer2') + la1 = self.layer1(x, num_blocks_1, channels_1, name='layer2') tr1 = self.transition_layer([la1], [256], channels_2, name='tr1') - st2 = self.stage(tr1, num_modules_2, channels_2, name='st2') + st2 = self.stage( + tr1, num_modules_2, num_blocks_2, channels_2, name='st2') tr2 = self.transition_layer(st2, channels_2, channels_3, name='tr2') - st3 = self.stage(tr2, num_modules_3, channels_3, name='st3') + st3 = self.stage( + tr2, num_modules_3, num_blocks_3, channels_3, name='st3') tr3 = self.transition_layer(st3, channels_3, channels_4, name='tr3') - st4 = self.stage(tr3, num_modules_4, channels_4, name='st4') + st4 = self.stage( + tr3, num_modules_4, num_blocks_4, channels_4, name='st4') # classification if self.num_classes: @@ -139,12 +169,12 @@ class HRNet(object): self.end_points = st4 return st4[-1] - def layer1(self, input, name=None): + def layer1(self, input, num_blocks, channels, name=None): conv = input - for i in range(4): + for i in range(num_blocks[0]): conv = self.bottleneck_block( conv, - num_filters=64, + num_filters=channels[0], downsample=True if i == 0 else False, name=name + '_' + str(i + 1)) return conv @@ -178,7 +208,7 @@ class HRNet(object): out = [] for i in range(len(channels)): residual = x[i] - for j in range(block_num): + for j in range(block_num[i]): residual = self.basic_block( residual, channels[i], @@ -240,10 +270,11 @@ class HRNet(object): def high_resolution_module(self, x, + num_blocks, channels, multi_scale_output=True, name=None): - residual = self.branches(x, 4, channels, name=name) + residual = self.branches(x, num_blocks, channels, name=name) out = self.fuse_layers( residual, channels, @@ -254,6 +285,7 @@ class HRNet(object): def stage(self, x, num_modules, + num_blocks, channels, multi_scale_output=True, name=None): @@ -262,12 +294,13 @@ class HRNet(object): if i == num_modules - 1 and multi_scale_output == False: out = self.high_resolution_module( out, + num_blocks, channels, multi_scale_output=False, name=name + '_' + str(i + 1)) else: out = self.high_resolution_module( - out, channels, name=name + '_' + str(i + 1)) + out, num_blocks, channels, name=name + '_' + str(i + 1)) return out diff --git a/paddlex/cv/nets/segmentation/__init__.py b/paddlex/cv/nets/segmentation/__init__.py index 9b8f4391102fd41ac2b60c54141af534d390b071..8c7d9674ae79a3ee6145c1c92612498ac7340faa 100644 --- a/paddlex/cv/nets/segmentation/__init__.py +++ b/paddlex/cv/nets/segmentation/__init__.py @@ -15,5 +15,6 @@ from .unet import UNet from .deeplabv3p import DeepLabv3p from .hrnet import HRNet +from .fast_scnn import FastSCNN from .model_utils import libs from .model_utils import loss diff --git a/paddlex/cv/nets/segmentation/deeplabv3p.py b/paddlex/cv/nets/segmentation/deeplabv3p.py index 60a34d0128bf271d42fa8658100318ce05e31812..4f9e538a28abe37481b96a517019b252f6d96f45 100644 --- a/paddlex/cv/nets/segmentation/deeplabv3p.py +++ b/paddlex/cv/nets/segmentation/deeplabv3p.py @@ -28,7 +28,6 @@ from .model_utils.libs import sigmoid_to_softmax from .model_utils.loss import softmax_with_loss from .model_utils.loss import dice_loss from .model_utils.loss import bce_loss -import paddlex.utils.logging as logging from paddlex.cv.nets.xception import Xception from paddlex.cv.nets.mobilenet_v2 import MobileNetV2 diff --git a/paddlex/cv/nets/segmentation/fast_scnn.py b/paddlex/cv/nets/segmentation/fast_scnn.py new file mode 100644 index 0000000000000000000000000000000000000000..71866e56df9adf31c45d841a7bcde3a062c3067a --- /dev/null +++ b/paddlex/cv/nets/segmentation/fast_scnn.py @@ -0,0 +1,395 @@ +# coding: utf8 +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import OrderedDict + +import paddle.fluid as fluid +from .model_utils.libs import scope +from .model_utils.libs import bn, bn_relu, relu, conv_bn_layer +from .model_utils.libs import conv, avg_pool +from .model_utils.libs import separate_conv +from .model_utils.libs import sigmoid_to_softmax +from .model_utils.loss import softmax_with_loss +from .model_utils.loss import dice_loss +from .model_utils.loss import bce_loss + + +class FastSCNN(object): + def __init__(self, + num_classes, + mode='train', + use_bce_loss=False, + use_dice_loss=False, + class_weight=None, + multi_loss_weight=[1.0], + ignore_index=255, + fixed_input_shape=None): + # dice_loss或bce_loss只适用两类分割中 + if num_classes > 2 and (use_bce_loss or use_dice_loss): + raise ValueError( + "dice loss and bce loss is only applicable to binary classfication" + ) + + if class_weight is not None: + if isinstance(class_weight, list): + if len(class_weight) != num_classes: + raise ValueError( + "Length of class_weight should be equal to number of classes" + ) + elif isinstance(class_weight, str): + if class_weight.lower() != 'dynamic': + raise ValueError( + "if class_weight is string, must be dynamic!") + else: + raise TypeError( + 'Expect class_weight is a list or string but receive {}'. + format(type(class_weight))) + + self.num_classes = num_classes + self.mode = mode + self.use_bce_loss = use_bce_loss + self.use_dice_loss = use_dice_loss + self.class_weight = class_weight + self.ignore_index = ignore_index + self.multi_loss_weight = multi_loss_weight + self.fixed_input_shape = fixed_input_shape + + def build_net(self, inputs): + if self.use_dice_loss or self.use_bce_loss: + self.num_classes = 1 + image = inputs['image'] + size = fluid.layers.shape(image)[2:] + with scope('learning_to_downsample'): + higher_res_features = self._learning_to_downsample(image, 32, 48, + 64) + with scope('global_feature_extractor'): + lower_res_feature = self._global_feature_extractor( + higher_res_features, 64, [64, 96, 128], 128, 6, [3, 3, 3]) + with scope('feature_fusion'): + x = self._feature_fusion(higher_res_features, lower_res_feature, + 64, 128, 128) + with scope('classifier'): + logit = self._classifier(x, 128) + logit = fluid.layers.resize_bilinear(logit, size, align_mode=0) + + if len(self.multi_loss_weight) == 3: + with scope('aux_layer_higher'): + higher_logit = self._aux_layer(higher_res_features, + self.num_classes) + higher_logit = fluid.layers.resize_bilinear( + higher_logit, size, align_mode=0) + with scope('aux_layer_lower'): + lower_logit = self._aux_layer(lower_res_feature, + self.num_classes) + lower_logit = fluid.layers.resize_bilinear( + lower_logit, size, align_mode=0) + logit = (logit, higher_logit, lower_logit) + elif len(self.multi_loss_weight) == 2: + with scope('aux_layer_higher'): + higher_logit = self._aux_layer(higher_res_features, + self.num_classes) + higher_logit = fluid.layers.resize_bilinear( + higher_logit, size, align_mode=0) + logit = (logit, higher_logit) + else: + logit = (logit, ) + + if self.num_classes == 1: + out = sigmoid_to_softmax(logit[0]) + out = fluid.layers.transpose(out, [0, 2, 3, 1]) + else: + out = fluid.layers.transpose(logit[0], [0, 2, 3, 1]) + + pred = fluid.layers.argmax(out, axis=3) + pred = fluid.layers.unsqueeze(pred, axes=[3]) + + if self.mode == 'train': + label = inputs['label'] + return self._get_loss(logit, label) + elif self.mode == 'eval': + label = inputs['label'] + loss = self._get_loss(logit, label) + return loss, pred, label, mask + else: + if self.num_classes == 1: + logit = sigmoid_to_softmax(logit[0]) + else: + logit = fluid.layers.softmax(logit[0], axis=1) + return pred, logit + + def generate_inputs(self): + inputs = OrderedDict() + if self.fixed_input_shape is not None: + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] + inputs['image'] = fluid.data( + dtype='float32', shape=input_shape, name='image') + else: + inputs['image'] = fluid.data( + dtype='float32', shape=[None, 3, None, None], name='image') + if self.mode == 'train': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + elif self.mode == 'eval': + inputs['label'] = fluid.data( + dtype='int32', shape=[None, 1, None, None], name='label') + return inputs + + def _get_loss(self, logits, label): + avg_loss = 0 + if not (self.use_dice_loss or self.use_bce_loss): + for i, logit in enumerate(logits): + logit_mask = ( + label.astype('int32') != self.ignore_index).astype('int32') + loss = softmax_with_loss( + logit, + label, + logit_mask, + num_classes=self.num_classes, + weight=self.class_weight, + ignore_index=self.ignore_index) + avg_loss += self.multi_loss_weight[i] * loss + else: + if self.use_dice_loss: + for i, logit in enumerate(logits): + logit_mask = (label.astype('int32') != self.ignore_index + ).astype('int32') + loss = dice_loss(logit, label, logit_mask) + avg_loss += self.multi_loss_weight[i] * loss + if self.use_bce_loss: + for i, logit in enumerate(logits): + #logit_label = fluid.layers.resize_nearest(label, logit_shape[2:]) + logit_mask = (label.astype('int32') != self.ignore_index + ).astype('int32') + loss = bce_loss( + logit, + label, + logit_mask, + ignore_index=self.ignore_index) + avg_loss += self.multi_loss_weight[i] * loss + return avg_loss + + def _learning_to_downsample(self, + x, + dw_channels1=32, + dw_channels2=48, + out_channels=64): + x = relu(bn(conv(x, dw_channels1, 3, 2))) + with scope('dsconv1'): + x = separate_conv( + x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu) + with scope('dsconv2'): + x = separate_conv( + x, out_channels, stride=2, filter=3, act=fluid.layers.relu) + return x + + def _shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def _dropout2d(self, input, prob, is_train=False): + if not is_train: + return input + keep_prob = 1.0 - prob + shape = fluid.layers.shape(input) + channels = shape[1] + random_tensor = keep_prob + fluid.layers.uniform_random( + [shape[0], channels, 1, 1], min=0., max=1.) + binary_tensor = fluid.layers.floor(random_tensor) + output = input / keep_prob * binary_tensor + return output + + def _inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + depthwise_output = bottleneck_conv + + linear_out = conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + + if ifshortcut: + out = self._shortcut(input=input, data_residual=linear_out) + return out, depthwise_output + else: + return linear_out, depthwise_output + + def _inverted_blocks(self, input, in_c, t, c, n, s, name=None): + first_block, depthwise_output = self._inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block, depthwise_output = self._inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block, depthwise_output + + def _psp_module(self, input, out_features): + + cat_layers = [] + sizes = (1, 2, 3, 6) + for size in sizes: + psp_name = "psp" + str(size) + with scope(psp_name): + pool = fluid.layers.adaptive_pool2d( + input, + pool_size=[size, size], + pool_type='avg', + name=psp_name + '_adapool') + data = conv( + pool, + out_features, + filter_size=1, + bias_attr=False, + name=psp_name + '_conv') + data_bn = bn(data, act='relu') + interp = fluid.layers.resize_bilinear( + data_bn, + out_shape=fluid.layers.shape(input)[2:], + name=psp_name + '_interp', + align_mode=0) + cat_layers.append(interp) + cat_layers = [input] + cat_layers + out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') + + return out + + def _aux_layer(self, x, num_classes): + x = relu(bn(conv(x, 32, 3, padding=1))) + x = self._dropout2d(x, 0.1, is_train=(self.mode == 'train')) + with scope('logit'): + x = conv(x, num_classes, 1, bias_attr=True) + return x + + def _feature_fusion(self, + higher_res_feature, + lower_res_feature, + higher_in_channels, + lower_in_channels, + out_channels, + scale_factor=4): + shape = fluid.layers.shape(higher_res_feature) + w = shape[-1] + h = shape[-2] + lower_res_feature = fluid.layers.resize_bilinear( + lower_res_feature, [h, w], align_mode=0) + + with scope('dwconv'): + lower_res_feature = relu( + bn(conv(lower_res_feature, out_channels, + 1))) #(lower_res_feature) + with scope('conv_lower_res'): + lower_res_feature = bn( + conv( + lower_res_feature, out_channels, 1, bias_attr=True)) + with scope('conv_higher_res'): + higher_res_feature = bn( + conv( + higher_res_feature, out_channels, 1, bias_attr=True)) + out = higher_res_feature + lower_res_feature + + return relu(out) + + def _global_feature_extractor(self, + x, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + t=6, + num_blocks=(3, 3, 3)): + x, _ = self._inverted_blocks(x, in_channels, t, block_channels[0], + num_blocks[0], 2, 'inverted_block_1') + x, _ = self._inverted_blocks(x, block_channels[0], t, + block_channels[1], num_blocks[1], 2, + 'inverted_block_2') + x, _ = self._inverted_blocks(x, block_channels[1], t, + block_channels[2], num_blocks[2], 1, + 'inverted_block_3') + x = self._psp_module(x, block_channels[2] // 4) + + with scope('out'): + x = relu(bn(conv(x, out_channels, 1))) + + return x + + def _classifier(self, x, dw_channels, stride=1): + with scope('dsconv1'): + x = separate_conv( + x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu) + with scope('dsconv2'): + x = separate_conv( + x, dw_channels, stride=stride, filter=3, act=fluid.layers.relu) + + x = self._dropout2d(x, 0.1, is_train=self.mode == 'train') + x = conv(x, self.num_classes, 1, bias_attr=True) + return x diff --git a/paddlex/cv/nets/segmentation/hrnet.py b/paddlex/cv/nets/segmentation/hrnet.py index b0bf10d5dd172851b12234a0e07a059f58b82773..209da9b507ba8e59a073fab616418c378a1e7cd5 100644 --- a/paddlex/cv/nets/segmentation/hrnet.py +++ b/paddlex/cv/nets/segmentation/hrnet.py @@ -27,7 +27,6 @@ from .model_utils.loss import softmax_with_loss from .model_utils.loss import dice_loss from .model_utils.loss import bce_loss import paddlex -import paddlex.utils.logging as logging class HRNet(object): @@ -83,7 +82,8 @@ class HRNet(object): st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape) out = fluid.layers.concat(st4, axis=1) - last_channels = sum(self.backbone.channels[self.backbone.width][-1]) + last_channels = sum(self.backbone.channels[str(self.backbone.width)][ + -1]) out = self._conv_bn_layer( input=out, diff --git a/paddlex/cv/nets/segmentation/unet.py b/paddlex/cv/nets/segmentation/unet.py index 899ba1621ea39d7e11623259ed90744f823b179c..f3675ea5124d684809ebaf1138545a4ea6fd6cec 100644 --- a/paddlex/cv/nets/segmentation/unet.py +++ b/paddlex/cv/nets/segmentation/unet.py @@ -27,7 +27,6 @@ from .model_utils.libs import sigmoid_to_softmax from .model_utils.loss import softmax_with_loss from .model_utils.loss import dice_loss from .model_utils.loss import bce_loss -import paddlex.utils.logging as logging class UNet(object): @@ -106,7 +105,8 @@ class UNet(object): name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.33)) with scope("conv0"): data = bn_relu( conv( @@ -140,8 +140,7 @@ class UNet(object): name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.XavierInitializer(), - ) + initializer=fluid.initializer.XavierInitializer(), ) with scope("up"): if self.upsample_mode == 'bilinear': short_cut_shape = fluid.layers.shape(short_cut) @@ -197,7 +196,8 @@ class UNet(object): name='weights', regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) + initializer=fluid.initializer.TruncatedNormal( + loc=0.0, scale=0.01)) with scope("logit"): data = conv( data, diff --git a/paddlex/cv/nets/shufflenet_v2.py b/paddlex/cv/nets/shufflenet_v2.py index f663535c31f9c899799b696351ba415049c83bf6..23045ee0d7279011ad93160e778dfd88862b9953 100644 --- a/paddlex/cv/nets/shufflenet_v2.py +++ b/paddlex/cv/nets/shufflenet_v2.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division @@ -269,4 +269,4 @@ class ShuffleNetV2(): name='stage_' + name + '_conv3') out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - return self.channel_shuffle(out, 2) \ No newline at end of file + return self.channel_shuffle(out, 2) diff --git a/paddlex/cv/transforms/__init__.py b/paddlex/cv/transforms/__init__.py index 37c14e75f72f8c6b76a608116419d58437fab99e..c74b5b19e8d1e007674f6d17a30736f42dde1789 100644 --- a/paddlex/cv/transforms/__init__.py +++ b/paddlex/cv/transforms/__init__.py @@ -15,3 +15,5 @@ from . import cls_transforms from . import det_transforms from . import seg_transforms +from . import visualize +visualize = visualize.visualize diff --git a/paddlex/cv/transforms/cls_transforms.py b/paddlex/cv/transforms/cls_transforms.py index dbcd34222daf71c05c8f26a2a38c94faacb526f2..606bb5b8d6eb4605510f734d9b737811ec22c477 100644 --- a/paddlex/cv/transforms/cls_transforms.py +++ b/paddlex/cv/transforms/cls_transforms.py @@ -32,10 +32,8 @@ class ClsTransform: class Compose(ClsTransform): """根据数据预处理/增强算子对输入数据进行操作。 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 - Args: transforms (list): 数据预处理/增强算子。 - Raises: TypeError: 形参数据类型不满足需求。 ValueError: 数据长度不匹配。 @@ -70,8 +68,8 @@ class Compose(ClsTransform): if isinstance(im, np.ndarray): if len(im.shape) != 3: raise Exception( - "im should be 3-dimension, but now is {}-dimensions". - format(len(im.shape))) + "im should be 3-dimension, but now is {}-dimensions".format( + len(im.shape))) else: try: im = cv2.imread(im).astype('float32') @@ -100,7 +98,9 @@ class Compose(ClsTransform): transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: - logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + logging.error( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) self.transforms = augmenters + self.transforms @@ -139,8 +139,8 @@ class RandomCrop(ClsTransform): tuple: 当label为空时,返回的tuple为(im, ),对应图像np.ndarray数据; 当label不为空时,返回的tuple为(im, label),分别对应图像np.ndarray数据、图像类别id。 """ - im = random_crop(im, self.crop_size, self.lower_scale, - self.lower_ratio, self.upper_ratio) + im = random_crop(im, self.crop_size, self.lower_scale, self.lower_ratio, + self.upper_ratio) if label is None: return (im, ) else: @@ -270,14 +270,12 @@ class ResizeByShort(ClsTransform): im_short_size = min(im.shape[0], im.shape[1]) im_long_size = max(im.shape[0], im.shape[1]) scale = float(self.short_size) / im_short_size - if self.max_size > 0 and np.round(scale * - im_long_size) > self.max_size: + if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size: scale = float(self.max_size) / float(im_long_size) resized_width = int(round(im.shape[1] * scale)) resized_height = int(round(im.shape[0] * scale)) im = cv2.resize( - im, (resized_width, resized_height), - interpolation=cv2.INTER_LINEAR) + im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR) if label is None: return (im, ) @@ -434,6 +432,7 @@ class RandomDistort(ClsTransform): params['im'] = im if np.random.uniform(0, 1) < prob: im = ops[id](**params) + im = im.astype('float32') if label is None: return (im, ) else: @@ -490,13 +489,15 @@ class ComposedClsTransforms(Compose): crop_size(int|list): 输入模型里的图像大小 mean(list): 图像均值 std(list): 图像方差 + random_horizontal_flip(bool): 是否以0.5的概率使用随机水平翻转增强,该仅在mode为`train`时生效,默认为True """ def __init__(self, mode, crop_size=[224, 224], mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]): + std=[0.229, 0.224, 0.225], + random_horizontal_flip=True): width = crop_size if isinstance(crop_size, list): if crop_size[0] != crop_size[1]: @@ -512,10 +513,11 @@ class ComposedClsTransforms(Compose): if mode == 'train': # 训练时的transforms,包含数据增强 transforms = [ - RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5), - Normalize( + RandomCrop(crop_size=width), Normalize( mean=mean, std=std) ] + if random_horizontal_flip: + transforms.insert(0, RandomHorizontalFlip()) else: # 验证/预测时的transforms transforms = [ diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index 45eff2569943b70eb99d6b7d54a32c1f709bff58..dd193d2f14aac37b58b2af9ee49e8d99d51f05ad 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -41,10 +41,8 @@ class DetTransform: class Compose(DetTransform): """根据数据预处理/增强列表对输入数据进行操作。 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 - Args: transforms (list): 数据预处理/增强列表。 - Raises: TypeError: 形参数据类型不满足需求。 ValueError: 数据长度不匹配。 @@ -160,7 +158,9 @@ class Compose(DetTransform): transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: - logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + logging.error( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) self.transforms = augmenters + self.transforms @@ -220,15 +220,13 @@ class ResizeByShort(DetTransform): im_short_size = min(im.shape[0], im.shape[1]) im_long_size = max(im.shape[0], im.shape[1]) scale = float(self.short_size) / im_short_size - if self.max_size > 0 and np.round(scale * - im_long_size) > self.max_size: + if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size: scale = float(self.max_size) / float(im_long_size) resized_width = int(round(im.shape[1] * scale)) resized_height = int(round(im.shape[0] * scale)) im_resize_info = [resized_height, resized_width, scale] im = cv2.resize( - im, (resized_width, resized_height), - interpolation=cv2.INTER_LINEAR) + im, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR) im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32) if label_info is None: return (im, im_info) @@ -268,8 +266,7 @@ class Padding(DetTransform): if not isinstance(target_size, tuple) and not isinstance( target_size, list): raise TypeError( - "Padding: Type of target_size must in (int|list|tuple)." - ) + "Padding: Type of target_size must in (int|list|tuple).") elif len(target_size) != 2: raise ValueError( "Padding: Length of target_size must equal 2.") @@ -454,8 +451,7 @@ class RandomHorizontalFlip(DetTransform): ValueError: 数据长度不匹配。 """ if not isinstance(im, np.ndarray): - raise TypeError( - "RandomHorizontalFlip: image is not a numpy array.") + raise TypeError("RandomHorizontalFlip: image is not a numpy array.") if len(im.shape) != 3: raise ValueError( "RandomHorizontalFlip: image is not 3-dimensional.") @@ -621,6 +617,7 @@ class RandomDistort(DetTransform): if np.random.uniform(0, 1) < prob: im = ops[id](**params) + im = im.astype('float32') if label_info is None: return (im, im_info) else: @@ -727,22 +724,38 @@ class MixupImage(DetTransform): 'Becasuse gt_bbox/gt_class/gt_score is not in label_info!') gt_bbox1 = label_info['gt_bbox'] gt_bbox2 = im_info['mixup'][2]['gt_bbox'] - gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) gt_class1 = label_info['gt_class'] gt_class2 = im_info['mixup'][2]['gt_class'] - gt_class = np.concatenate((gt_class1, gt_class2), axis=0) - gt_score1 = label_info['gt_score'] gt_score2 = im_info['mixup'][2]['gt_score'] - gt_score = np.concatenate( - (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) if 'gt_poly' in label_info: gt_poly1 = label_info['gt_poly'] gt_poly2 = im_info['mixup'][2]['gt_poly'] - label_info['gt_poly'] = gt_poly1 + gt_poly2 is_crowd1 = label_info['is_crowd'] is_crowd2 = im_info['mixup'][2]['is_crowd'] - is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + + if 0 not in gt_class1 and 0 not in gt_class2: + gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0) + gt_class = np.concatenate((gt_class1, gt_class2), axis=0) + gt_score = np.concatenate( + (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + gt_poly2 + is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0) + elif 0 in gt_class1: + gt_bbox = gt_bbox2 + gt_class = gt_class2 + gt_score = gt_score2 * (1. - factor) + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly2 + is_crowd = is_crowd2 + else: + gt_bbox = gt_bbox1 + gt_class = gt_class1 + gt_score = gt_score1 * factor + if 'gt_poly' in label_info: + label_info['gt_poly'] = gt_poly1 + is_crowd = is_crowd1 label_info['gt_bbox'] = gt_bbox label_info['gt_score'] = gt_score label_info['gt_class'] = gt_class @@ -769,9 +782,7 @@ class RandomExpand(DetTransform): fill_value (list): 扩张图像的初始填充值(0-255)。默认为[123.675, 116.28, 103.53]。 """ - def __init__(self, - ratio=4., - prob=0.5, + def __init__(self, ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53]): super(RandomExpand, self).__init__() assert ratio > 1.01, "expand ratio must be larger than 1.01" @@ -811,9 +822,11 @@ class RandomExpand(DetTransform): 'gt_class' not in label_info: raise TypeError('Cannot do RandomExpand! ' + \ 'Becasuse gt_bbox/gt_class is not in label_info!') - if np.random.uniform(0., 1.) < self.prob: + if np.random.uniform(0., 1.) > self.prob: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] height = int(image_shape[0]) width = int(image_shape[1]) @@ -909,6 +922,8 @@ class RandomCrop(DetTransform): if len(label_info['gt_bbox']) == 0: return (im, im_info, label_info) + if 'gt_class' in label_info and 0 in label_info['gt_class']: + return (im, im_info, label_info) image_shape = im_info['image_shape'] w = image_shape[1] @@ -1204,9 +1219,10 @@ class ArrangeYOLOv3(DetTransform): if gt_num > 0: label_info['gt_class'][:gt_num, 0] = label_info[ 'gt_class'][:gt_num, 0] - 1 - gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] - gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] - gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] + if -1 not in label_info['gt_class']: + gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :] + gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0] + gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0] # parse [x1, y1, x2, y2] to [x, y, w, h] gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. @@ -1260,21 +1276,25 @@ class ComposedRCNNTransforms(Compose): min_max_size(list): 图像在缩放时,最小边和最大边的约束条件 mean(list): 图像均值 std(list): 图像方差 + random_horizontal_flip(bool): 是否以0.5的概率使用随机水平翻转增强,该仅在mode为`train`时生效,默认为True """ def __init__(self, mode, min_max_size=[800, 1333], mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]): + std=[0.229, 0.224, 0.225], + random_horizontal_flip=True): if mode == 'train': # 训练时的transforms,包含数据增强 transforms = [ - RandomHorizontalFlip(prob=0.5), Normalize( + Normalize( mean=mean, std=std), ResizeByShort( short_size=min_max_size[0], max_size=min_max_size[1]), Padding(coarsest_stride=32) ] + if random_horizontal_flip: + transforms.insert(0, RandomHorizontalFlip()) else: # 验证/预测时的transforms transforms = [ @@ -1304,9 +1324,14 @@ class ComposedYOLOv3Transforms(Compose): Args: mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小 - mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略 + mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略, 若设为-1,则表示不使用该策略 mean(list): 图像均值 std(list): 图像方差 + random_distort(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机扰动图像,默认为True + random_expand(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机扩张图像,默认为True + random_crop(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机裁剪图像,默认为True + random_horizontal_flip(bool): 数据增强方式,参数仅在mode为`train`时生效,表示是否在训练过程中随机水平翻转图像,默认为True + """ def __init__(self, @@ -1314,7 +1339,11 @@ class ComposedYOLOv3Transforms(Compose): shape=[608, 608], mixup_epoch=250, mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]): + std=[0.229, 0.224, 0.225], + random_distort=True, + random_expand=True, + random_crop=True, + random_horizontal_flip=True): width = shape if isinstance(shape, list): if shape[0] != shape[1]: @@ -1329,12 +1358,18 @@ class ComposedYOLOv3Transforms(Compose): if mode == 'train': # 训练时的transforms,包含数据增强 transforms = [ - MixupImage(mixup_epoch=mixup_epoch), RandomDistort(), - RandomExpand(), RandomCrop(), Resize( - target_size=width, - interp='RANDOM'), RandomHorizontalFlip(), Normalize( + MixupImage(mixup_epoch=mixup_epoch), Resize( + target_size=width, interp='RANDOM'), Normalize( mean=mean, std=std) ] + if random_horizontal_flip: + transforms.insert(1, RandomHorizontalFlip()) + if random_crop: + transforms.insert(1, RandomCrop()) + if random_expand: + transforms.insert(1, RandomExpand()) + if random_distort: + transforms.insert(1, RandomDistort()) else: # 验证/预测时的transforms transforms = [ diff --git a/paddlex/cv/transforms/seg_transforms.py b/paddlex/cv/transforms/seg_transforms.py index 9ea1c3bdc2159dbc1f33ac5f15dc710e12ccb83c..f2bfb32ebeed11f84c27ab7f1d8e8920f21699a7 100644 --- a/paddlex/cv/transforms/seg_transforms.py +++ b/paddlex/cv/transforms/seg_transforms.py @@ -35,14 +35,11 @@ class SegTransform: class Compose(SegTransform): """根据数据预处理/增强算子对输入数据进行操作。 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 - Args: transforms (list): 数据预处理/增强算子。 - Raises: TypeError: transforms不是list对象 ValueError: transforms元素个数小于1。 - """ def __init__(self, transforms): @@ -71,7 +68,6 @@ class Compose(SegTransform): 图像在过resize前shape为(200, 300), 过padding前shape为 (400, 600) label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。 - Returns: tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。 """ @@ -116,7 +112,9 @@ class Compose(SegTransform): transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: - logging.error("{} is already in ComposedTransforms, need to remove it from add_augmenters().".format(type(aug).__name__)) + logging.error( + "{} is already in ComposedTransforms, need to remove it from add_augmenters().". + format(type(aug).__name__)) self.transforms = augmenters + self.transforms @@ -401,8 +399,7 @@ class ResizeByShort(SegTransform): im_short_size = min(im.shape[0], im.shape[1]) im_long_size = max(im.shape[0], im.shape[1]) scale = float(self.short_size) / im_short_size - if self.max_size > 0 and np.round(scale * - im_long_size) > self.max_size: + if self.max_size > 0 and np.round(scale * im_long_size) > self.max_size: scale = float(self.max_size) / float(im_long_size) resized_width = int(round(im.shape[1] * scale)) resized_height = int(round(im.shape[0] * scale)) @@ -1053,6 +1050,7 @@ class RandomDistort(SegTransform): params['im'] = im if np.random.uniform(0, 1) < prob: im = ops[id](**params) + im = im.astype('float32') if label is None: return (im, im_info) else: @@ -1113,25 +1111,35 @@ class ComposedSegTransforms(Compose): Args: mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test' - train_crop_size(list): 模型训练阶段,随机从原图crop的大小 + min_max_size(list): 训练过程中,图像的最长边会随机resize至此区间(短边按比例相应resize);预测阶段,图像最长边会resize至此区间中间值,即(min_size+max_size)/2。默认为[400, 600] + train_crop_size(list): 仅在mode为'train`时生效,训练过程中,随机从图像中裁剪出对应大小的子图(如若原图小于此大小,则会padding到此大小),默认为[400, 600] mean(list): 图像均值 std(list): 图像方差 + random_horizontal_flip(bool): 数据增强方式,仅在mode为`train`时生效,表示训练过程是否随机水平翻转图像,默认为True """ def __init__(self, mode, - train_crop_size=[769, 769], + min_max_size=[400, 600], + train_crop_size=[512, 512], mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]): + std=[0.5, 0.5, 0.5], + random_horizontal_flip=True): if mode == 'train': # 训练时的transforms,包含数据增强 transforms = [ - RandomHorizontalFlip(prob=0.5), ResizeStepScaling(), + ResizeRangeScaling( + min_value=min(min_max_size), max_value=max(min_max_size)), RandomPaddingCrop(crop_size=train_crop_size), Normalize( mean=mean, std=std) ] + if random_horizontal_flip: + transforms.insert(0, RandomHorizontalFlip()) else: # 验证/预测时的transforms - transforms = [Normalize(mean=mean, std=std)] - + long_size = (min(min_max_size) + max(min_max_size)) // 2 + transforms = [ + ResizeByLong(long_size=long_size), Normalize( + mean=mean, std=std) + ] super(ComposedSegTransforms, self).__init__(transforms) diff --git a/paddlex/cv/transforms/visualize.py b/paddlex/cv/transforms/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..514ac73b2107caade31a5e760b1ec6fa2d4d8076 --- /dev/null +++ b/paddlex/cv/transforms/visualize.py @@ -0,0 +1,306 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import cv2 +from PIL import Image +import numpy as np +import math +from .imgaug_support import execute_imgaug +from .cls_transforms import ClsTransform +from .det_transforms import DetTransform +from .seg_transforms import SegTransform +import paddlex as pdx +from paddlex.cv.models.utils.visualize import get_color_map_list + + +def _draw_rectangle_and_cname(img, xmin, ymin, xmax, ymax, cname, color): + """ 根据提供的标注信息,给图片描绘框体和类别显示 + + Args: + img: 图片路径 + xmin: 检测框最小的x坐标 + ymin: 检测框最小的y坐标 + xmax: 检测框最大的x坐标 + ymax: 检测框最大的y坐标 + cname: 类别信息 + color: 类别与颜色的对应信息 + """ + # 描绘检测框 + line_width = math.ceil(2 * max(img.shape[0:2]) / 600) + cv2.rectangle( + img, + pt1=(xmin, ymin), + pt2=(xmax, ymax), + color=color, + thickness=line_width) + return img + +def cls_compose(im, label=None, transforms=None, vdl_writer=None, step=0): + """ + Args: + im (str/np.ndarray): 图像路径/图像np.ndarray数据。 + label (int): 每张图像所对应的类别序号。 + vdl_writer (visualdl.LogWriter): VisualDL存储器,日志信息将保存在其中。 + 当为None时,不对日志进行保存。默认为None。 + step (int): 数据预处理的轮数,当vdl_writer不为None时有效。默认为0。 + + Returns: + tuple: 根据网络所需字段所组成的tuple; + 字段由transforms中的最后一个数据预处理操作决定。 + """ + if isinstance(im, np.ndarray): + if len(im.shape) != 3: + raise Exception( + "im should be 3-dimension, but now is {}-dimensions". + format(len(im.shape))) + else: + try: + im = cv2.imread(im).astype('float32') + except: + raise TypeError('Can\'t read The image file {}!'.format(im)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + if vdl_writer is not None: + vdl_writer.add_image(tag='0. OriginalImange/' + str(step), + img=im, + step=0) + op_id = 1 + for op in transforms: + if isinstance(op, ClsTransform): + if vdl_writer is not None and hasattr(op, 'prob'): + op.prob = 1.0 + outputs = op(im, label) + im = outputs[0] + if len(outputs) == 2: + label = outputs[1] + if isinstance(op, pdx.cv.transforms.cls_transforms.Normalize): + continue + else: + import imgaug.augmenters as iaa + if isinstance(op, iaa.Augmenter): + im = execute_imgaug(op, im) + outputs = (im, ) + if label is not None: + outputs = (im, label) + if vdl_writer is not None: + tag = str(op_id) + '. ' + op.__class__.__name__ + '/' + str(step) + vdl_writer.add_image(tag=tag, + img=im, + step=0) + op_id += 1 + +def det_compose(im, im_info=None, label_info=None, transforms=None, vdl_writer=None, step=0, + labels=[], catid2color=None): + def decode_image(im_file, im_info, label_info): + if im_info is None: + im_info = dict() + if isinstance(im_file, np.ndarray): + if len(im_file.shape) != 3: + raise Exception( + "im should be 3-dimensions, but now is {}-dimensions". + format(len(im_file.shape))) + im = im_file + else: + try: + im = cv2.imread(im_file).astype('float32') + except: + raise TypeError('Can\'t read The image file {}!'.format( + im_file)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + # make default im_info with [h, w, 1] + im_info['im_resize_info'] = np.array( + [im.shape[0], im.shape[1], 1.], dtype=np.float32) + im_info['image_shape'] = np.array([im.shape[0], + im.shape[1]]).astype('int32') + use_mixup = False + for t in transforms: + if type(t).__name__ == 'MixupImage': + use_mixup = True + if not use_mixup: + if 'mixup' in im_info: + del im_info['mixup'] + # decode mixup image + if 'mixup' in im_info: + im_info['mixup'] = \ + decode_image(im_info['mixup'][0], + im_info['mixup'][1], + im_info['mixup'][2]) + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) + + outputs = decode_image(im, im_info, label_info) + im = outputs[0] + im_info = outputs[1] + if len(outputs) == 3: + label_info = outputs[2] + if vdl_writer is not None: + vdl_writer.add_image(tag='0. OriginalImange/' + str(step), + img=im, + step=0) + op_id = 1 + bboxes = label_info['gt_bbox'] + transforms = [None] + transforms + for op in transforms: + if im is None: + return None + if isinstance(op, DetTransform) or op is None: + if vdl_writer is not None and hasattr(op, 'prob'): + op.prob = 1.0 + if op is not None: + outputs = op(im, im_info, label_info) + else: + outputs = (im, im_info, label_info) + im = outputs[0] + vdl_im = im + if vdl_writer is not None: + if isinstance(op, pdx.cv.transforms.det_transforms.ResizeByShort): + scale = outputs[1]['im_resize_info'][2] + bboxes = bboxes * scale + elif isinstance(op, pdx.cv.transforms.det_transforms.Resize): + h = outputs[1]['image_shape'][0] + w = outputs[1]['image_shape'][1] + target_size = op.target_size + if isinstance(target_size, int): + h_scale = float(target_size) / h + w_scale = float(target_size) / w + else: + h_scale = float(target_size[0]) / h + w_scale = float(target_size[1]) / w + bboxes[:,0] = bboxes[:,0] * w_scale + bboxes[:,1] = bboxes[:,1] * h_scale + bboxes[:,2] = bboxes[:,2] * w_scale + bboxes[:,3] = bboxes[:,3] * h_scale + else: + bboxes = outputs[2]['gt_bbox'] + if not isinstance(op, pdx.cv.transforms.det_transforms.RandomHorizontalFlip): + for i in range(bboxes.shape[0]): + bbox = bboxes[i] + cname = labels[outputs[2]['gt_class'][i][0]-1] + vdl_im = _draw_rectangle_and_cname(vdl_im, + int(bbox[0]), + int(bbox[1]), + int(bbox[2]), + int(bbox[3]), + cname, + catid2color[outputs[2]['gt_class'][i][0]-1]) + if isinstance(op, pdx.cv.transforms.det_transforms.Normalize): + continue + else: + im = execute_imgaug(op, im) + if label_info is not None: + outputs = (im, im_info, label_info) + else: + outputs = (im, im_info) + vdl_im = im + if vdl_writer is not None: + tag = str(op_id) + '. ' + op.__class__.__name__ + '/' + str(step) + if op is None: + tag = str(op_id) + '. OriginalImangeWithGTBox/' + str(step) + vdl_writer.add_image(tag=tag, + img=vdl_im, + step=0) + op_id += 1 + +def seg_compose(im, im_info=None, label=None, transforms=None, vdl_writer=None, step=0): + if im_info is None: + im_info = list() + if isinstance(im, np.ndarray): + if len(im.shape) != 3: + raise Exception( + "im should be 3-dimensions, but now is {}-dimensions". + format(len(im.shape))) + else: + try: + im = cv2.imread(im).astype('float32') + except: + raise ValueError('Can\'t read The image file {}!'.format(im)) + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + if label is not None: + if not isinstance(label, np.ndarray): + label = np.asarray(Image.open(label)) + if vdl_writer is not None: + vdl_writer.add_image(tag='0. OriginalImange' + '/' + str(step), + img=im, + step=0) + op_id = 1 + for op in transforms: + if isinstance(op, SegTransform): + outputs = op(im, im_info, label) + im = outputs[0] + if len(outputs) >= 2: + im_info = outputs[1] + if len(outputs) == 3: + label = outputs[2] + if isinstance(op, pdx.cv.transforms.seg_transforms.Normalize): + continue + else: + im = execute_imgaug(op, im) + if label is not None: + outputs = (im, im_info, label) + else: + outputs = (im, im_info) + if vdl_writer is not None: + tag = str(op_id) + '. ' + op.__class__.__name__ + '/' + str(step) + vdl_writer.add_image(tag=tag, + img=im, + step=0) + op_id += 1 + +def visualize(dataset, img_count=3, save_dir='vdl_output'): + '''对数据预处理/增强中间结果进行可视化。 + 可使用VisualDL查看中间结果: + 1. VisualDL启动方式: visualdl --logdir vdl_output --port 8001 + 2. 浏览器打开 https://0.0.0.0:8001即可, + 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP + + Args: + dataset (paddlex.datasets): 数据集读取器。 + img_count (int): 需要进行数据预处理/增强的图像数目。默认为3。 + save_dir (str): 日志保存的路径。默认为'vdl_output'。 + ''' + if dataset.num_samples < img_count: + img_count = dataset.num_samples + transforms = dataset.transforms + if not osp.isdir(save_dir): + if osp.exists(save_dir): + os.remove(save_dir) + os.makedirs(save_dir) + from visualdl import LogWriter + vdl_save_dir = osp.join(save_dir, 'image_transforms') + vdl_writer = LogWriter(vdl_save_dir) + for i, data in enumerate(dataset.iterator()): + if i == img_count: + break + data.append(transforms.transforms) + data.append(vdl_writer) + data.append(i) + if isinstance(transforms, ClsTransform): + cls_compose(*data) + elif isinstance(transforms, DetTransform): + labels = dataset.labels + color_map = get_color_map_list(len(labels) + 1) + catid2color = {} + for catid in range(len(labels)): + catid2color[catid] = color_map[catid + 1] + data.append(labels) + data.append(catid2color) + det_compose(*data) + elif isinstance(transforms, SegTransform): + seg_compose(*data) + else: + raise Exception('The transform must the subclass of \ + ClsTransform or DetTransform or SegTransform!') \ No newline at end of file diff --git a/paddlex/interpret/as_data_reader/data_path_utils.py b/paddlex/interpret/as_data_reader/data_path_utils.py index e0604f4f5dfc2a622659bb537046a92cd4c2ce61..1c915050bed935c4e7f6ea34be6a231f7c05f44c 100644 --- a/paddlex/interpret/as_data_reader/data_path_utils.py +++ b/paddlex/interpret/as_data_reader/data_path_utils.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os @@ -19,4 +19,4 @@ def _find_classes(dir): classes = [d.name for d in os.scandir(dir) if d.is_dir()] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} - return classes, class_to_idx \ No newline at end of file + return classes, class_to_idx diff --git a/paddlex/interpret/as_data_reader/readers.py b/paddlex/interpret/as_data_reader/readers.py index d9244f17fa0a93f26589b29471ade59843b7d454..4b551177334c1da6546a605f2cee00518d90c57a 100644 --- a/paddlex/interpret/as_data_reader/readers.py +++ b/paddlex/interpret/as_data_reader/readers.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys @@ -223,4 +223,4 @@ def create_reader(list_image_path, list_label=None, is_test=False): yield img_show, img, label - return reader \ No newline at end of file + return reader diff --git a/paddlex/interpret/core/_session_preparation.py b/paddlex/interpret/core/_session_preparation.py index 08eda36fb873c4e5824f8131aca77c7cdc352c22..81d3b22b216f07047b6a3a4c39701a03ec96a964 100644 --- a/paddlex/interpret/core/_session_preparation.py +++ b/paddlex/interpret/core/_session_preparation.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp diff --git a/paddlex/interpret/core/interpretation.py b/paddlex/interpret/core/interpretation.py index 5b1a5e45b5804acc005a407893c9ceeea8261863..ca3b1cf3371f244a1ab55e6940de2cd382fd7ab3 100644 --- a/paddlex/interpret/core/interpretation.py +++ b/paddlex/interpret/core/interpretation.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from .interpretation_algorithms import CAM, LIME, NormLIME from .normlime_base import precompute_normlime_weights diff --git a/paddlex/interpret/core/interpretation_algorithms.py b/paddlex/interpret/core/interpretation_algorithms.py index a54f46632567f54d934af69dfde64cacea7c5622..2805af601a91314a5d554511af04b53eef7b653a 100644 --- a/paddlex/interpret/core/interpretation_algorithms.py +++ b/paddlex/interpret/core/interpretation_algorithms.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp diff --git a/paddlex/interpret/core/normlime_base.py b/paddlex/interpret/core/normlime_base.py index 471078129cdd96df10ae0af1ced39ccf344c7564..8270099b17c858688903354bffcfa412ed8c804c 100644 --- a/paddlex/interpret/core/normlime_base.py +++ b/paddlex/interpret/core/normlime_base.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import os.path as osp diff --git a/paddlex/interpret/visualize.py b/paddlex/interpret/visualize.py index f0158402b69ad3eb90aac6b11a134889fda6dc2b..6c3570b05d99f359452116542c82cb9a8cbc555b 100644 --- a/paddlex/interpret/visualize.py +++ b/paddlex/interpret/visualize.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import cv2 @@ -70,8 +70,10 @@ def normlime(img_file, normlime_weights_file=None): """使用NormLIME算法将模型预测结果的可解释性可视化。 - NormLIME是利用一定数量的样本来出一个全局的解释。NormLIME会提前计算一定数量的测 - 试样本的LIME结果,然后对相同的特征进行权重的归一化,这样来得到一个全局的输入和输出的关系。 + NormLIME是利用一定数量的样本来出一个全局的解释。由于NormLIME计算量较大,此处采用一种简化的方式: + 使用一定数量的测试样本(目前默认使用所有测试样本),对每个样本进行特征提取,映射到同一个特征空间; + 然后以此特征做为输入,以模型输出做为输出,使用线性回归对其进行拟合,得到一个全局的输入和输出的关系。 + 之后,对一测试样本进行解释时,使用NormLIME全局的解释,来对LIME的结果进行滤波,使最终的可视化结果更加稳定。 注意1:dataset读取的是一个数据集,该数据集不宜过大,否则计算时间会较长,但应包含所有类别的数据。 注意2:NormLIME可解释性结果可视化目前只支持分类模型。 diff --git a/paddlex/seg.py b/paddlex/seg.py index c19ec30c883e92f91dabea4488d1890495c99934..fdfdffd4639c6b3ddb75ac20ca0b3ecf4edd2328 100644 --- a/paddlex/seg.py +++ b/paddlex/seg.py @@ -18,5 +18,6 @@ from . import cv UNet = cv.models.UNet DeepLabv3p = cv.models.DeepLabv3p HRNet = cv.models.HRNet +FastSCNN = cv.models.FastSCNN transforms = cv.transforms.seg_transforms visualize = cv.models.utils.visualize.visualize_segmentation diff --git a/paddlex/utils/__init__.py b/paddlex/utils/__init__.py index ff774c985feb6ffc24a3e8c67237cdff0a074ee4..19c86d754b9b99219fdbf7be4b5e7fa6cffe6346 100644 --- a/paddlex/utils/__init__.py +++ b/paddlex/utils/__init__.py @@ -1,16 +1,16 @@ -#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from . import logging diff --git a/paddlex/utils/utils.py b/paddlex/utils/utils.py index 875a027f187661ab3ed44266c1b90780a55d518a..d9005875ea6c793269a8c67e065b69bd7100dbe8 100644 --- a/paddlex/utils/utils.py +++ b/paddlex/utils/utils.py @@ -67,8 +67,8 @@ def parse_param_file(param_file, return_shape=True): f.close() return tuple(tensor_desc.dims) if tensor_desc.data_type != 5: - raise Exception( - "Unexpected data type while parse {}".format(param_file)) + raise Exception("Unexpected data type while parse {}".format( + param_file)) data_size = 4 for i in range(len(tensor_shape)): data_size *= tensor_shape[i] @@ -139,7 +139,12 @@ def load_pdparams(exe, main_prog, model_dir): vars_to_load = list() import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: + + if osp.isfile(model_dir): + params_file = model_dir + else: + params_file = osp.join(model_dir, 'model.pdparams') + with open(params_file, 'rb') as f: params_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') unused_vars = list() @@ -185,8 +190,8 @@ def is_belong_to_optimizer(var): import paddle.fluid as fluid from paddle.fluid.proto.framework_pb2 import VarType - if not (isinstance(var, fluid.framework.Parameter) - or var.desc.need_check_feed()): + if not (isinstance(var, fluid.framework.Parameter) or + var.desc.need_check_feed()): return is_persistable(var) return False @@ -206,9 +211,8 @@ def load_pdopt(exe, main_prog, model_dir): if len(optimizer_var_list) > 0: for var in optimizer_var_list: if var.name not in opt_dict: - raise Exception( - "{} is not in saved paddlex optimizer, {}".format( - var.name, exception_message)) + raise Exception("{} is not in saved paddlex optimizer, {}". + format(var.name, exception_message)) if var.shape != opt_dict[var.name].shape: raise Exception( "Shape of optimizer variable {} doesn't match.(Last: {}, Now: {}), {}" @@ -227,9 +231,8 @@ def load_pdopt(exe, main_prog, model_dir): "There is no optimizer parameters in the model, please set the optimizer!" ) else: - logging.info( - "There are {} optimizer parameters in {} are loaded.".format( - len(optimizer_var_list), model_dir)) + logging.info("There are {} optimizer parameters in {} are loaded.". + format(len(optimizer_var_list), model_dir)) def load_pretrain_weights(exe, @@ -239,6 +242,12 @@ def load_pretrain_weights(exe, resume=False): if not osp.exists(weights_dir): raise Exception("Path {} not exists.".format(weights_dir)) + if osp.isfile(weights_dir): + if not weights_dir.endswith('.pdparams'): + raise Exception("File {} is not a paddle parameter file".format( + weights_dir)) + load_pdparams(exe, main_prog, weights_dir) + return if osp.exists(osp.join(weights_dir, "model.pdparams")): load_pdparams(exe, main_prog, weights_dir) if resume: @@ -255,9 +264,8 @@ def load_pretrain_weights(exe, if not isinstance(var, fluid.framework.Parameter): continue if not osp.exists(osp.join(weights_dir, var.name)): - logging.debug( - "[SKIP] Pretrained weight {}/{} doesn't exist".format( - weights_dir, var.name)) + logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist". + format(weights_dir, var.name)) continue pretrained_shape = parse_param_file(osp.join(weights_dir, var.name)) actual_shape = tuple(var.shape) @@ -317,9 +325,8 @@ def load_pretrain_weights(exe, "There is no optimizer parameters in the model, please set the optimizer!" ) else: - logging.info( - "There are {} optimizer parameters in {} are loaded.".format( - len(optimizer_var_list), weights_dir)) + logging.info("There are {} optimizer parameters in {} are loaded.". + format(len(optimizer_var_list), weights_dir)) class EarlyStop: @@ -342,12 +349,12 @@ class EarlyStop: self.max = current_score return False else: - if (abs(self.score - current_score) < self.thresh - or current_score < self.score): + if (abs(self.score - current_score) < self.thresh or + current_score < self.score): self.counter += 1 self.score = current_score - logging.debug( - "EarlyStopping: %i / %i" % (self.counter, self.patience)) + logging.debug("EarlyStopping: %i / %i" % + (self.counter, self.patience)) if self.counter >= self.patience: logging.info("EarlyStopping: Stop training") return True diff --git a/setup.py b/setup.py index 44aca0f9dc2a214ff4bcf4e2817d06423c26812b..1f42da4da4099b6b651a41b65aaedde7b76093ca 100644 --- a/setup.py +++ b/setup.py @@ -15,11 +15,11 @@ import setuptools import sys -long_description = "PaddleX. A end-to-end deeplearning model development toolkit base on PaddlePaddle\n\n" +long_description = "PaddlePaddle Entire Process Development Toolkit" setuptools.setup( name="paddlex", - version='1.0.6', + version='1.0.7', author="paddlex", author_email="paddlex@baidu.com", description=long_description, diff --git a/tools/codestyle/clang_format.hook b/tools/codestyle/clang_format.hook index 1d928216867c0ba3897d71542fea44debf8d72a0..14300746ac343fa56c690bc43fc02659d690f73c 100755 --- a/tools/codestyle/clang_format.hook +++ b/tools/codestyle/clang_format.hook @@ -1,15 +1,15 @@ #!/bin/bash -set -e - -readonly VERSION="3.8" - -version=$(clang-format -version) - -if ! [[ $version == *"$VERSION"* ]]; then - echo "clang-format version check failed." - echo "a version contains '$VERSION' is needed, but get '$version'" - echo "you can install the right version, and make an soft-link to '\$PATH' env" - exit -1 -fi - -clang-format $@ +# set -e +# +# readonly VERSION="3.8" +# +# version=$(clang-format -version) +# +# if ! [[ $version == *"$VERSION"* ]]; then +# echo "clang-format version check failed." +# echo "a version contains '$VERSION' is needed, but get '$version'" +# echo "you can install the right version, and make an soft-link to '\$PATH' env" +# exit -1 +# fi +# +# clang-format $@ diff --git a/new_tutorials/train/segmentation/hrnet.py b/tutorials/train/segmentation/fast_scnn.py similarity index 88% rename from new_tutorials/train/segmentation/hrnet.py rename to tutorials/train/segmentation/fast_scnn.py index 98fdd1b925bd4707001fdad56b3ffdc6bb2b58ae..9c48d31eda7b612243e65df124b51722c4ea59e4 100644 --- a/new_tutorials/train/segmentation/hrnet.py +++ b/tutorials/train/segmentation/fast_scnn.py @@ -11,7 +11,8 @@ pdx.utils.download_and_decompress(optic_dataset, path='./') # 定义训练和验证时的transforms # API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms -train_transforms = transforms.ComposedSegTransforms(mode='train', train_crop_size=[769, 769]) +train_transforms = transforms.ComposedSegTransforms( + mode='train', train_crop_size=[769, 769]) eval_transforms = transforms.ComposedSegTransforms(mode='eval') # 定义训练和验证所用的数据集 @@ -34,14 +35,14 @@ eval_dataset = pdx.datasets.SegDataset( # 浏览器打开 https://0.0.0.0:8001即可 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP -# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#hrnet +# https://paddlex.readthedocs.io/zh_CN/latest/apis/models/semantic_segmentation.html#fastscnn num_classes = len(train_dataset.labels) -model = pdx.seg.HRNet(num_classes=num_classes) +model = pdx.seg.FastSCNN(num_classes=num_classes) model.train( num_epochs=20, train_dataset=train_dataset, train_batch_size=4, eval_dataset=eval_dataset, learning_rate=0.01, - save_dir='output/hrnet', + save_dir='output/fastscnn', use_vdl=True)