diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt index cc76b0e61a8ab439e08ef1e952b64be0f0041380..d04bf85183e7aaf64950cf56ab9ffe7a78ddb3fe 100644 --- a/deploy/cpp/CMakeLists.txt +++ b/deploy/cpp/CMakeLists.txt @@ -114,8 +114,6 @@ if (NOT WIN32) if (WITH_TENSORRT AND WITH_GPU) include_directories("${TENSORRT_DIR}/include") link_directories("${TENSORRT_DIR}/lib") - #include_directories("${PADDLE_DIR}/third_party/install/tensorrt/include") - #link_directories("${PADDLE_DIR}/third_party/install/tensorrt/lib") endif() endif(NOT WIN32) @@ -172,7 +170,7 @@ endif() if (NOT WIN32) set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} + ${MATH_LIB} ${MKLDNN_LIB} glog gflags protobuf z xxhash yaml-cpp ) if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") @@ -199,8 +197,6 @@ if(WITH_GPU) if (WITH_TENSORRT) set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) - #set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) - #set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) @@ -216,7 +212,7 @@ if (NOT WIN32) set(DEPS ${DEPS} ${EXTERNAL_LIB}) endif() -set(DEPS ${DEPS} ${OpenCV_LIBS}) +set(DEPS ${DEPS} ${OpenCV_LIBS}) add_executable(classifier src/classifier.cpp src/transforms.cpp src/paddlex.cpp) ADD_DEPENDENCIES(classifier ext-yaml-cpp) target_link_libraries(classifier ${DEPS}) @@ -256,4 +252,3 @@ if (WIN32 AND WITH_MKL) ) endif() - diff --git a/deploy/cpp/include/paddlex/transforms.h b/deploy/cpp/include/paddlex/transforms.h index 33bd56467fb998bb65817b91070a05d8a8538d21..d155e2c2b260dc0925b36688201d6e71cf7d7534 100644 --- a/deploy/cpp/include/paddlex/transforms.h +++ b/deploy/cpp/include/paddlex/transforms.h @@ -152,18 +152,19 @@ class Padding : public Transform { virtual void Init(const YAML::Node& item) { if (item["coarsest_stride"].IsDefined()) { coarsest_stride_ = item["coarsest_stride"].as(); - if (coarsest_stride_ <= 1) { + if (coarsest_stride_ < 1) { std::cerr << "[Padding] coarest_stride should greater than 0" << std::endl; exit(-1); } - } else { + } + if (item["target_size"].IsDefined()){ if (item["target_size"].IsScalar()) { width_ = item["target_size"].as(); height_ = item["target_size"].as(); } else if (item["target_size"].IsSequence()) { - width_ = item["target_size"].as>()[1]; - height_ = item["target_size"].as>()[0]; + width_ = item["target_size"].as>()[0]; + height_ = item["target_size"].as>()[1]; } } if (item["im_padding_value"].IsDefined()) { diff --git a/deploy/cpp/scripts/build.sh b/deploy/cpp/scripts/build.sh index dd4d62715fad0a4464044d8c63c2a55546bcfada..b0588ed3017d97621e87b8f74e64ae6ecf870e84 100644 --- a/deploy/cpp/scripts/build.sh +++ b/deploy/cpp/scripts/build.sh @@ -6,6 +6,9 @@ WITH_TENSORRT=OFF TENSORRT_DIR=/path/to/TensorRT/ # Paddle 预测库路径 PADDLE_DIR=/path/to/fluid_inference/ +# Paddle 的预测库是否使用静态库来编译 +# 使用TensorRT时,Paddle的预测库通常为动态库 +WITH_STATIC_LIB=ON # CUDA 的 lib 路径 CUDA_LIB=/path/to/cuda/lib/ # CUDNN 的 lib 路径 @@ -24,6 +27,7 @@ cmake .. \ -DWITH_TENSORRT=${WITH_TENSORRT} \ -DTENSORRT_DIR=${TENSORRT_DIR} \ -DPADDLE_DIR=${PADDLE_DIR} \ + -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ -DCUDA_LIB=${CUDA_LIB} \ -DCUDNN_LIB=${CUDNN_LIB} \ -DOPENCV_DIR=${OPENCV_DIR} diff --git a/deploy/cpp/src/detector.cpp b/deploy/cpp/src/detector.cpp index 315ae5e7287245a9dce71a1d74b09b859c21b463..f31178b26f644eb6cb8c22de403f0c5758655ab7 100644 --- a/deploy/cpp/src/detector.cpp +++ b/deploy/cpp/src/detector.cpp @@ -69,7 +69,7 @@ int main(int argc, char** argv) { << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " - << result.boxes[i].coordinate[3] << std::endl; + << result.boxes[i].coordinate[3] << ")" << std::endl; } // 可视化 @@ -92,7 +92,7 @@ int main(int argc, char** argv) { << result.boxes[i].coordinate[0] << ", " << result.boxes[i].coordinate[1] << ", " << result.boxes[i].coordinate[2] << ", " - << result.boxes[i].coordinate[3] << std::endl; + << result.boxes[i].coordinate[3] << ")" << std::endl; } // 可视化 diff --git a/deploy/cpp/src/paddlex.cpp b/deploy/cpp/src/paddlex.cpp index a1561764129ca09afc951386755626889434ccc3..4031a9d8c41921b90d2e2a9782fb67d1642a2f5c 100644 --- a/deploy/cpp/src/paddlex.cpp +++ b/deploy/cpp/src/paddlex.cpp @@ -39,11 +39,11 @@ void Model::create_predictor(const std::string& model_dir, // 开启内存优化 config.EnableMemoryOptim(); if (use_trt){ - config.EnableTensorRtEngine(1 << 20 /* workspace_size*/, - 32 /* max_batch_size*/, + config.EnableTensorRtEngine(1 << 20 /* workspace_size*/, + 32 /* max_batch_size*/, 20 /* min_subgraph_size*/, paddle::AnalysisConfig::Precision::kFloat32 /* precision*/, - false /* use_static*/, + true /* use_static*/, false /* use_calib_mode*/); } predictor_ = std::move(CreatePaddlePredictor(config)); diff --git a/deploy/cpp/src/transforms.cpp b/deploy/cpp/src/transforms.cpp index 3d7b4cee8ab0a943f0eb123a23b129598995278f..76ad550d560140e129ec6049edfc1afbf814b5ff 100644 --- a/deploy/cpp/src/transforms.cpp +++ b/deploy/cpp/src/transforms.cpp @@ -92,15 +92,16 @@ bool Padding::Run(cv::Mat* im, ImageBlob* data) { int padding_w = 0; int padding_h = 0; - if (width_ > 0 & height_ > 0) { + if (width_ > 1 & height_ > 1) { padding_w = width_ - im->cols; padding_h = height_ - im->rows; - } else if (coarsest_stride_ > 0) { + } else if (coarsest_stride_ > 1) { padding_h = ceil(im->rows * 1.0 / coarsest_stride_) * coarsest_stride_ - im->rows; padding_w = ceil(im->cols * 1.0 / coarsest_stride_) * coarsest_stride_ - im->cols; } + if (padding_h < 0 || padding_w < 0) { std::cerr << "[Padding] Computed padding_h=" << padding_h << ", padding_w=" << padding_w diff --git a/docs/deploy/deploy.md b/docs/deploy/deploy.md index ef381555f47cbe72707da1a0386d7906c2848f75..bc3b8b70ca9d5cfe25be16b1c810b629b86ed0fe 100644 --- a/docs/deploy/deploy.md +++ b/docs/deploy/deploy.md @@ -14,6 +14,12 @@ paddlex --export_inference --model_dir=./garbage_epoch_12 --save_dir=./inference_model ``` +使用TensorRT预测时,需指定模型的图像输入shape:[w,h],需要注意的是分类模型请保持于训练时输入的shape一致。 + +``` +paddlex --export_inference --model_dir=./garbage_epoch_12 --save_dir=./inference_model --fixed_input_shape=[640,960] +``` + ### Python部署 PaddleX已经集成了基于Python的高性能预测接口,在安装PaddleX后,可参照如下代码示例,进行预测。相关的接口文档可参考[paddlex.deploy](apis/deploy.md) > 点击下载测试图片 [garbage.bmp](https://bj.bcebos.com/paddlex/datasets/garbage.bmp) diff --git a/docs/deploy/deploy_cpp_linux.md b/docs/deploy/deploy_cpp_linux.md index 5e6a594b70fedf3d88e9566e45f8b3f8e82d2109..9a195dc3d81f3d3d529e2879059070cd500d378a 100644 --- a/docs/deploy/deploy_cpp_linux.md +++ b/docs/deploy/deploy_cpp_linux.md @@ -39,18 +39,24 @@ fluid_inference 编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下: ``` + # 是否使用GPU(即是否使用 CUDA) -WITH_GPU=ON +WITH_GPU=OFF # 是否集成 TensorRT(仅WITH_GPU=ON 有效) WITH_TENSORRT=OFF -# 上一步下载的 Paddle 预测库路径 -PADDLE_DIR=/root/projects/deps/fluid_inference/ +# TensorRT 的lib路径 +TENSORRT_DIR=/path/to/TensorRT/ +# Paddle 预测库路径 +PADDLE_DIR=/path/to/fluid_inference/ +# Paddle 的预测库是否使用静态库来编译 +# 使用TensorRT时,Paddle的预测库通常为动态库 +WITH_STATIC_LIB=ON # CUDA 的 lib 路径 -CUDA_LIB=/usr/local/cuda/lib64/ +CUDA_LIB=/path/to/cuda/lib/ # CUDNN 的 lib 路径 -CUDNN_LIB=/usr/local/cudnn/lib64/ +CUDNN_LIB=/path/to/cudnn/lib/ -# OPENCV 路径, 如果使用自带预编译版本可不设置 +# OPENCV 路径, 如果使用自带预编译版本可不修改 OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/ sh $(pwd)/scripts/bootstrap.sh @@ -61,7 +67,9 @@ cd build cmake .. \ -DWITH_GPU=${WITH_GPU} \ -DWITH_TENSORRT=${WITH_TENSORRT} \ + -DTENSORRT_DIR=${TENSORRT_DIR} \ -DPADDLE_DIR=${PADDLE_DIR} \ + -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ -DCUDA_LIB=${CUDA_LIB} \ -DCUDNN_LIB=${CUDNN_LIB} \ -DOPENCV_DIR=${OPENCV_DIR} @@ -83,6 +91,7 @@ make | image | 要预测的图片文件路径 | | image_list | 按行存储图片路径的.txt文件 | | use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0) | +| use_trt | 是否使用 TensorTr 预测, 支持值为0或1(默认值为0) | | gpu_id | GPU 设备ID, 默认值为0 | | save_dir | 保存可视化结果的路径, 默认值为"output",classfier无该参数 | @@ -113,4 +122,3 @@ make ./build/detector --model_dir=/path/to/models/inference_model --image_list=/root/projects/images_list.txt --use_gpu=1 --save_dir=output ``` 图片文件`可视化预测结果`会保存在`save_dir`参数设置的目录下。 - diff --git a/paddlex/command.py b/paddlex/command.py index 2b728ea459a43adf92a37771fdb4080d0493e2fd..0c4f5b65f86b062c47bd518c66c0d5f01a91b143 100644 --- a/paddlex/command.py +++ b/paddlex/command.py @@ -33,7 +33,7 @@ def arg_parser(): "--fixed_input_shape", "-fs", default=None, - help="export inference model with fixed input shape(TensorRT need)") + help="export inference model with fixed input shape:[w,h]") return parser @@ -58,10 +58,11 @@ def main(): assert args.model_dir is not None, "--model_dir should be defined while exporting inference model" assert args.save_dir is not None, "--save_dir should be defined to save inference model" fixed_input_shape = eval(args.fixed_input_shape) - assert len(fixed_input_shape) == 2, "len of fixed input shape must == 2" + assert len( + fixed_input_shape) == 2, "len of fixed input shape must == 2" model = pdx.load_model(args.model_dir, fixed_input_shape) - model.export_inference_model(args.save_dir, fixed_input_shape) + model.export_inference_model(args.save_dir) if __name__ == "__main__": diff --git a/paddlex/cv/models/base.py b/paddlex/cv/models/base.py index e6664f056679139f64d71d62b09dea6fbcd6a6fd..84b531a1c8fba04855669995dfefe15526111c99 100644 --- a/paddlex/cv/models/base.py +++ b/paddlex/cv/models/base.py @@ -316,25 +316,6 @@ class BaseAPI: model_info['_ModelInputsOutputs']['test_outputs'] = [ [k, v.name] for k, v in self.test_outputs.items() ] - resize = {'ResizeByShort': {}} - padding = {'Padding':{}} - - if model_info['_Attributes']['model_type'] == 'classifier': - crop_size = 0 - for transform in model_info['Transforms']: - if 'CenterCrop' in transform: - crop_size = transform['CenterCrop']['crop_size'] - break - assert crop_size == fixed_input_shape[0], "fixed_input_shape must == CenterCrop:crop_size:{}".format(crop_size) - assert crop_size == fixed_input_shape[1], "fixed_input_shape must == CenterCrop:crop_size:{}".format(crop_size) - if crop_size == 0: - logging.warning("fixed_input_shape must == input shape when trainning") - else: - resize['ResizeByShort']['short_size'] = min(fixed_input_shape) - resize['ResizeByShort']['max_size'] = max(fixed_input_shape) - padding['Padding']['target_size'] = list(fixed_input_shape) - model_info['Transforms'].append(resize) - model_info['Transforms'].append(padding) with open( osp.join(save_dir, 'model.yml'), encoding='utf-8', mode='w') as f: diff --git a/paddlex/cv/models/classifier.py b/paddlex/cv/models/classifier.py index eb14e74ceae2fe4ac17aedfddb5b457c675c6355..88a15095b3c8ad8475ca5955f2de5649be32da0d 100644 --- a/paddlex/cv/models/classifier.py +++ b/paddlex/cv/models/classifier.py @@ -35,10 +35,9 @@ class BaseClassifier(BaseAPI): 'MobileNetV1', 'MobileNetV2', 'Xception41', 'Xception65', 'Xception71']。默认为'ResNet50'。 num_classes (int): 类别数。默认为1000。 - fixed_input_shape (list): 长度为2,维度为1的list,如:[640,720],用来固定模型输入:'image'的shape,默认为None。 """ - def __init__(self, model_name='ResNet50', num_classes=1000, fixed_input_shape=None): + def __init__(self, model_name='ResNet50', num_classes=1000): self.init_params = locals() super(BaseClassifier, self).__init__('classifier') if not hasattr(paddlex.cv.nets, str.lower(model_name)): @@ -47,11 +46,13 @@ class BaseClassifier(BaseAPI): self.model_name = model_name self.labels = None self.num_classes = num_classes - self.fixed_input_shape = fixed_input_shape + self.fixed_input_shape = None def build_net(self, mode='train'): if self.fixed_input_shape is not None: - input_shape =[None, 3, self.fixed_input_shape[0], self.fixed_input_shape[1]] + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] image = fluid.data( dtype='float32', shape=input_shape, name='image') else: diff --git a/paddlex/cv/models/deeplabv3p.py b/paddlex/cv/models/deeplabv3p.py index b734b8995692d0b5bfd7d483e5eb214ec8d6141a..3091f58a7017c8644acb718d595dae61e07ee5f6 100644 --- a/paddlex/cv/models/deeplabv3p.py +++ b/paddlex/cv/models/deeplabv3p.py @@ -48,7 +48,6 @@ class DeepLabv3p(BaseAPI): 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None时,各类的权重1, 即平时使用的交叉熵损失函数。 ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。 - fixed_input_shape (list): 长度为2,维度为1的list,如:[640,720],用来固定模型输入:'image'的shape,默认为None。 Raises: ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 ValueError: backbone取值不在['Xception65', 'Xception41', 'MobileNetV2_x0.25', @@ -69,8 +68,7 @@ class DeepLabv3p(BaseAPI): use_bce_loss=False, use_dice_loss=False, class_weight=None, - ignore_index=255, - fixed_input_shape=None): + ignore_index=255): self.init_params = locals() super(DeepLabv3p, self).__init__('segmenter') # dice_loss或bce_loss只适用两类分割中 @@ -119,7 +117,7 @@ class DeepLabv3p(BaseAPI): self.enable_decoder = enable_decoder self.labels = None self.sync_bn = True - self.fixed_input_shape = fixed_input_shape + self.fixed_input_shape = None def _get_backbone(self, backbone): def mobilenetv2(backbone): @@ -185,7 +183,7 @@ class DeepLabv3p(BaseAPI): use_dice_loss=self.use_dice_loss, class_weight=self.class_weight, ignore_index=self.ignore_index, - fixed_input_shape = self.fixed_input_shape) + fixed_input_shape=self.fixed_input_shape) inputs = model.generate_inputs() model_out = model.build_net(inputs) outputs = OrderedDict() diff --git a/paddlex/cv/models/faster_rcnn.py b/paddlex/cv/models/faster_rcnn.py index 74db50f398311eda12dbf84e5fed2adf15df10b9..79bd14c12c156ce7fc427670ec27d61370757813 100644 --- a/paddlex/cv/models/faster_rcnn.py +++ b/paddlex/cv/models/faster_rcnn.py @@ -44,8 +44,7 @@ class FasterRCNN(BaseAPI): backbone='ResNet50', with_fpn=True, aspect_ratios=[0.5, 1.0, 2.0], - anchor_sizes=[32, 64, 128, 256, 512], - fixed_input_shape=None): + anchor_sizes=[32, 64, 128, 256, 512]): self.init_params = locals() super(FasterRCNN, self).__init__('detector') backbones = [ @@ -59,7 +58,7 @@ class FasterRCNN(BaseAPI): self.aspect_ratios = aspect_ratios self.anchor_sizes = anchor_sizes self.labels = None - self.fixed_input_shape = fixed_input_shape + self.fixed_input_shape = None def _get_backbone(self, backbone_name): norm_type = None @@ -113,7 +112,7 @@ class FasterRCNN(BaseAPI): anchor_sizes=self.anchor_sizes, train_pre_nms_top_n=train_pre_nms_top_n, test_pre_nms_top_n=test_pre_nms_top_n, - fixed_input_shape = self.fixed_input_shape) + fixed_input_shape=self.fixed_input_shape) inputs = model.generate_inputs() if mode == 'train': model_out = model.build_net(inputs) diff --git a/paddlex/cv/models/load_model.py b/paddlex/cv/models/load_model.py index 98c9966814078814042236b767f74f50e72d93c8..9d1485d9b7e859bb81053129d391e679210bedb7 100644 --- a/paddlex/cv/models/load_model.py +++ b/paddlex/cv/models/load_model.py @@ -39,13 +39,12 @@ def load_model(model_dir, fixed_input_shape=None): raise Exception("There's no attribute {} in paddlex.cv.models".format( info['Model'])) - info['_init_params']['fixed_input_shape'] = fixed_input_shape - if info['_Attributes']['model_type'] == 'classifier': model = paddlex.cv.models.BaseClassifier(**info['_init_params']) else: model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params']) + model.fixed_input_shape = fixed_input_shape if status == "Normal" or \ status == "Prune" or status == "fluid.save": startup_prog = fluid.Program() @@ -80,6 +79,8 @@ def load_model(model_dir, fixed_input_shape=None): model.test_outputs[var_desc[0]] = out if 'Transforms' in info: transforms_mode = info.get('TransformsMode', 'RGB') + # 固定模型的输入shape + fix_input_shape(info, fixed_input_shape=fixed_input_shape) if transforms_mode == 'RGB': to_rgb = True else: @@ -104,6 +105,34 @@ def load_model(model_dir, fixed_input_shape=None): return model +def fix_input_shape(info, fixed_input_shape=None): + if fixed_input_shape is not None: + resize = {'ResizeByShort': {}} + padding = {'Padding': {}} + if info['_Attributes']['model_type'] == 'classifier': + crop_size = 0 + for transform in info['Transforms']: + if 'CenterCrop' in transform: + crop_size = transform['CenterCrop']['crop_size'] + break + assert crop_size == fixed_input_shape[ + 0], "fixed_input_shape must == CenterCrop:crop_size:{}".format( + crop_size) + assert crop_size == fixed_input_shape[ + 1], "fixed_input_shape must == CenterCrop:crop_size:{}".format( + crop_size) + if crop_size == 0: + logging.warning( + "fixed_input_shape must == input shape when trainning") + else: + print("*" * 10) + resize['ResizeByShort']['short_size'] = min(fixed_input_shape) + resize['ResizeByShort']['max_size'] = max(fixed_input_shape) + padding['Padding']['target_size'] = list(fixed_input_shape) + info['Transforms'].append(resize) + info['Transforms'].append(padding) + + def build_transforms(model_type, transforms_info, to_rgb=True): if model_type == "classifier": import paddlex.cv.transforms.cls_transforms as T diff --git a/paddlex/cv/models/mask_rcnn.py b/paddlex/cv/models/mask_rcnn.py index f3110503c3dfe981ebfcbd91819b69f51d1ce529..7956dc7e9a48f6d0300db80e7331151e795e2c32 100644 --- a/paddlex/cv/models/mask_rcnn.py +++ b/paddlex/cv/models/mask_rcnn.py @@ -36,7 +36,6 @@ class MaskRCNN(FasterRCNN): with_fpn (bool): 是否使用FPN结构。默认为True。 aspect_ratios (list): 生成anchor高宽比的可选值。默认为[0.5, 1.0, 2.0]。 anchor_sizes (list): 生成anchor大小的可选值。默认为[32, 64, 128, 256, 512]。 - fixed_input_shape (list): 长度为2,维度为1的list,如:[640,720],用来固定模型输入:'image'的shape,默认为None。 """ def __init__(self, @@ -44,8 +43,7 @@ class MaskRCNN(FasterRCNN): backbone='ResNet50', with_fpn=True, aspect_ratios=[0.5, 1.0, 2.0], - anchor_sizes=[32, 64, 128, 256, 512], - fixed_input_shape=None): + anchor_sizes=[32, 64, 128, 256, 512]): self.init_params = locals() backbones = [ 'ResNet18', 'ResNet50', 'ResNet50vd', 'ResNet101', 'ResNet101vd' @@ -62,7 +60,7 @@ class MaskRCNN(FasterRCNN): self.mask_head_resolution = 28 else: self.mask_head_resolution = 14 - self.fixed_input_shape = fixed_input_shape + self.fixed_input_shape = None def build_net(self, mode='train'): train_pre_nms_top_n = 2000 if self.with_fpn else 12000 @@ -77,7 +75,7 @@ class MaskRCNN(FasterRCNN): test_pre_nms_top_n=test_pre_nms_top_n, num_convs=num_convs, mask_head_resolution=self.mask_head_resolution, - fixed_input_shape = self.fixed_input_shape) + fixed_input_shape=self.fixed_input_shape) inputs = model.generate_inputs() if mode == 'train': model_out = model.build_net(inputs) diff --git a/paddlex/cv/models/yolo_v3.py b/paddlex/cv/models/yolo_v3.py index 51c940aadca060d3354c9394b429cfe6ed1c7343..7821997e3b6c536a7b654ab29a242b76dcdf2d46 100644 --- a/paddlex/cv/models/yolo_v3.py +++ b/paddlex/cv/models/yolo_v3.py @@ -60,8 +60,7 @@ class YOLOv3(BaseAPI): label_smooth=False, train_random_shapes=[ 320, 352, 384, 416, 448, 480, 512, 544, 576, 608 - ], - fixed_input_shape=None): + ]): self.init_params = locals() super(YOLOv3, self).__init__('detector') backbones = [ @@ -81,7 +80,7 @@ class YOLOv3(BaseAPI): self.label_smooth = label_smooth self.sync_bn = True self.train_random_shapes = train_random_shapes - self.fixed_input_shape = fixed_input_shape + self.fixed_input_shape = None def _get_backbone(self, backbone_name): if backbone_name == 'DarkNet53': @@ -116,7 +115,7 @@ class YOLOv3(BaseAPI): nms_keep_topk=self.nms_keep_topk, nms_iou_threshold=self.nms_iou_threshold, train_random_shapes=self.train_random_shapes, - fixed_input_shape = self.fixed_input_shape) + fixed_input_shape=self.fixed_input_shape) inputs = model.generate_inputs() model_out = model.build_net(inputs) outputs = OrderedDict([('bbox', model_out)]) diff --git a/paddlex/cv/nets/detection/faster_rcnn.py b/paddlex/cv/nets/detection/faster_rcnn.py index f53c053716934ba35d8ac1f1b762daef6fec1868..7e7bd63dcb64065a3c7616b2e344d7152cbfdfde 100644 --- a/paddlex/cv/nets/detection/faster_rcnn.py +++ b/paddlex/cv/nets/detection/faster_rcnn.py @@ -223,7 +223,9 @@ class FasterRCNN(object): inputs = OrderedDict() if self.fixed_input_shape is not None: - input_shape =[None, 3, self.fixed_input_shape[0], self.fixed_input_shape[1]] + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] inputs['image'] = fluid.data( dtype='float32', shape=input_shape, name='image') else: diff --git a/paddlex/cv/nets/detection/mask_rcnn.py b/paddlex/cv/nets/detection/mask_rcnn.py index 268a3c20ce6f701d49e000abddfcba1cc5838caf..010bacfe921137c69802ff25a405c65ea2141f7f 100644 --- a/paddlex/cv/nets/detection/mask_rcnn.py +++ b/paddlex/cv/nets/detection/mask_rcnn.py @@ -310,7 +310,9 @@ class MaskRCNN(object): inputs = OrderedDict() if self.fixed_input_shape is not None: - input_shape =[None, 3, self.fixed_input_shape[0], self.fixed_input_shape[1]] + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] inputs['image'] = fluid.data( dtype='float32', shape=input_shape, name='image') else: diff --git a/paddlex/cv/nets/detection/yolo_v3.py b/paddlex/cv/nets/detection/yolo_v3.py index b3c5aeb2da8e1680fe20c6f001d0955b0aafff63..2fba001ea8991ce9368c5442acc9e7d260df2110 100644 --- a/paddlex/cv/nets/detection/yolo_v3.py +++ b/paddlex/cv/nets/detection/yolo_v3.py @@ -250,7 +250,9 @@ class YOLOv3: def generate_inputs(self): inputs = OrderedDict() if self.fixed_input_shape is not None: - input_shape =[None, 3, self.fixed_input_shape[0], self.fixed_input_shape[1]] + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] inputs['image'] = fluid.data( dtype='float32', shape=input_shape, name='image') else: diff --git a/paddlex/cv/nets/segmentation/deeplabv3p.py b/paddlex/cv/nets/segmentation/deeplabv3p.py index d5dd6661772cd27207bced6a0c7361c242519122..08dad240c0b28d6e6e13845dcc0c9148c442014f 100644 --- a/paddlex/cv/nets/segmentation/deeplabv3p.py +++ b/paddlex/cv/nets/segmentation/deeplabv3p.py @@ -315,7 +315,9 @@ class DeepLabv3p(object): inputs = OrderedDict() if self.fixed_input_shape is not None: - input_shape =[None, 3, self.fixed_input_shape[0], self.fixed_input_shape[1]] + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] inputs['image'] = fluid.data( dtype='float32', shape=input_shape, name='image') else: diff --git a/paddlex/cv/nets/segmentation/unet.py b/paddlex/cv/nets/segmentation/unet.py index 48c595f12360fc9c62370d02c840c4260f65a0c1..899ba1621ea39d7e11623259ed90744f823b179c 100644 --- a/paddlex/cv/nets/segmentation/unet.py +++ b/paddlex/cv/nets/segmentation/unet.py @@ -231,7 +231,9 @@ class UNet(object): inputs = OrderedDict() if self.fixed_input_shape is not None: - input_shape =[None, 3, self.fixed_input_shape[0], self.fixed_input_shape[1]] + input_shape = [ + None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0] + ] inputs['image'] = fluid.data( dtype='float32', shape=input_shape, name='image') else: diff --git a/paddlex/cv/transforms/det_transforms.py b/paddlex/cv/transforms/det_transforms.py index 38f9ee619a86f24c6db6010cac161899d8448aa8..d2358c8d7f6face6b76d2b12d1a31d464b6d7462 100644 --- a/paddlex/cv/transforms/det_transforms.py +++ b/paddlex/cv/transforms/det_transforms.py @@ -211,7 +211,7 @@ class Padding: target_size (int|list): 填充后的图像长、宽,默认为1。 """ - def __init__(self, coarsest_stride=1, target_size=None): + def __init__(self, coarsest_stride=1, target_size=1): self.coarsest_stride = coarsest_stride self.target_size = target_size @@ -233,11 +233,12 @@ class Padding: ValueError: target_size小于原图的大小。 """ - if self.coarsest_stride == 1 and self.target_size is None: - if label_info is None: - return (im, im_info) - else: - return (im, im_info, label_info) + if self.coarsest_stride == 1: + if isinstance(self.target_size, int) and self.target_size == 1: + if label_info is None: + return (im, im_info) + else: + return (im, im_info, label_info) if im_info is None: im_info = dict() if not isinstance(im, np.ndarray): @@ -250,18 +251,17 @@ class Padding: np.ceil(im_h / self.coarsest_stride) * self.coarsest_stride) padding_im_w = int( np.ceil(im_w / self.coarsest_stride) * self.coarsest_stride) - if self.target_size is not None: - if isinstance(self.target_size, int): - padding_im_h = self.target_size - padding_im_w = self.target_size - else: - padding_im_h = self.target_size[0] - padding_im_w = self.target_size[1] - pad_height = padding_im_h - im_h - pad_width = padding_im_w - im_w - if pad_height < 0 or pad_width < 0: - raise ValueError( + if isinstance(self.target_size, int) and self.target_size != 1: + padding_im_h = self.target_size + padding_im_w = self.target_size + elif isinstance(self.target_size, list): + padding_im_w = self.target_size[0] + padding_im_h = self.target_size[1] + pad_height = padding_im_h - im_h + pad_width = padding_im_w - im_w + if pad_height < 0 or pad_width < 0: + raise ValueError( 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' .format(im_w, im_h, padding_im_w, padding_im_h)) padding_im = np.zeros((padding_im_h, padding_im_w, im_c), @@ -562,7 +562,7 @@ class RandomDistort: params = params_dict[ops[id].__name__] prob = prob_dict[ops[id].__name__] params['im'] = im - + if np.random.uniform(0, 1) < prob: im = ops[id](**params) if label_info is None: