diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/README.md b/modules/video/multiple_object_tracking/fairmot_dla34/README.md index 3ff8bd396a3bc60764241701856dfa20feb59459..702c9dd432ec92f864b7f8f38d726da2cd28be42 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/README.md +++ b/modules/video/multiple_object_tracking/fairmot_dla34/README.md @@ -117,6 +117,10 @@ 初始发布 +* 1.1.0 + + 移除fluid api + - ```shell - $ hub install fairmot_dla34==1.0.0 + $ hub install fairmot_dla34==1.1.0 ``` diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/config/fairmot_dla34_30e_1088x608.yml b/modules/video/multiple_object_tracking/fairmot_dla34/config/fairmot_dla34_30e_1088x608.yml index d29d177651f71dc019db54d6692c28c14cf1accd..1f8c00f933f50e50bbdb7f65371eb76320d753a9 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/config/fairmot_dla34_30e_1088x608.yml +++ b/modules/video/multiple_object_tracking/fairmot_dla34/config/fairmot_dla34_30e_1088x608.yml @@ -1,7 +1,6 @@ _BASE_: [ '_base_/mot.yml', '_base_/runtime.yml', - '_base_/optimizer_30e.yml', '_base_/fairmot_dla34.yml', '_base_/fairmot_reader_1088x608.yml', ] diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/dataset.py b/modules/video/multiple_object_tracking/fairmot_dla34/dataset.py index ff584d41995237dce107e1f103d862af942ed6a5..5f6a9dd8ddcc19d59bd0f7821b9adaf7a4c02c45 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/dataset.py +++ b/modules/video/multiple_object_tracking/fairmot_dla34/dataset.py @@ -11,24 +11,78 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numbers import os import sys -import six -from collections.abc import Mapping from collections import deque +from collections.abc import Mapping + +import six +try: + from collections.abc import Sequence, Mapping +except: + from collections import Sequence, Mapping from ppdet.core.workspace import register, serializable from ppdet.utils.logger import setup_logger from ppdet.data.reader import BaseDataLoader, Compose -from paddle.fluid.dataloader.collate import default_collate_fn import cv2 from imageio import imread, imwrite import numpy as np import paddle +from paddle.framework import core logger = setup_logger(__name__) +def default_collate_fn(batch): + """ + Default batch collating function for :code:`paddle.io.DataLoader`, + get input data as a list of sample datas, each element in list + if the data of a sample, and sample data should composed of list, + dictionary, string, number, numpy array and paddle.Tensor, this + function will parse input data recursively and stack number, + numpy array and paddle.Tensor datas as batch datas. e.g. for + following input data: + [{'image': np.array(shape=[3, 224, 224]), 'label': 1}, + {'image': np.array(shape=[3, 224, 224]), 'label': 3}, + {'image': np.array(shape=[3, 224, 224]), 'label': 4}, + {'image': np.array(shape=[3, 224, 224]), 'label': 5},] + + + This default collate function zipped each number and numpy array + field together and stack each field as the batch field as follows: + {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])} + Args: + batch(list of sample data): batch should be a list of sample data. + + Returns: + Batched data: batched each number, numpy array and paddle.Tensor + in input data. + """ + sample = batch[0] + if isinstance(sample, np.ndarray): + batch = np.stack(batch, axis=0) + return batch + elif isinstance(sample, (paddle.Tensor, core.eager.Tensor)): + return paddle.stack(batch, axis=0) + elif isinstance(sample, numbers.Number): + batch = np.array(batch) + return batch + elif isinstance(sample, (str, bytes)): + return batch + elif isinstance(sample, Mapping): + return {key: default_collate_fn([d[key] for d in batch]) for key in sample} + elif isinstance(sample, Sequence): + sample_fields_num = len(sample) + if not all(len(sample) == sample_fields_num for sample in iter(batch)): + raise RuntimeError("fileds number not same among samples in a batch") + return [default_collate_fn(fields) for fields in zip(*batch)] + + raise TypeError("batch data con only contains: tensor, numpy.ndarray, " + "dict, list, number, but got {}".format(type(sample))) + + @register @serializable class MOTVideoStream: @@ -40,6 +94,7 @@ class MOTVideoStream: Set True when used during MOT model inference while saving images or video, or used in DeepSORT. """ + def __init__(self, video_stream=None, keep_ori_im=False, **kwargs): self.video_stream = video_stream self.keep_ori_im = keep_ori_im @@ -106,6 +161,7 @@ class MOTImageStream: Set True when used during MOT model inference while saving images or video, or used in DeepSORT. """ + def __init__(self, sample_num=-1, keep_ori_im=False, **kwargs): self.keep_ori_im = keep_ori_im self._curr_iter = 0 diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/module.py b/modules/video/multiple_object_tracking/fairmot_dla34/module.py index 0543dc55e5e1126edd922bfbb220afdba9c9188d..49071982fb52696106a9ea038b8dbc4716c7a2e8 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/module.py +++ b/modules/video/multiple_object_tracking/fairmot_dla34/module.py @@ -11,34 +11,38 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os -import sys -import signal import argparse import glob +import os +import signal +import sys +import cv2 import paddle -from ppdet.core.workspace import load_config, merge_config +from ppdet.core.workspace import load_config +from ppdet.core.workspace import merge_config from ppdet.engine import Tracker -from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.check import check_config +from ppdet.utils.check import check_gpu +from ppdet.utils.check import check_version from ppdet.utils.logger import setup_logger -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable -import cv2 +import paddlehub as hub from .tracker import StreamTracker +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable logger = setup_logger('Predict') -@moduleinfo( - name="fairmot_dla34", - type="CV/multiple_object_tracking", - author="paddlepaddle", - author_email="", - summary="Fairmot is a model for multiple object tracking.", - version="1.0.0") +@moduleinfo(name="fairmot_dla34", + type="CV/multiple_object_tracking", + author="paddlepaddle", + author_email="", + summary="Fairmot is a model for multiple object tracking.", + version="1.1.0") class FairmotTracker_1088x608: + def __init__(self): self.pretrained_model = os.path.join(self.directory, "fairmot_dla34_30e_1088x608") @@ -71,13 +75,12 @@ class FairmotTracker_1088x608: tracker.load_weights_jde(self.pretrained_model) signal.signal(signal.SIGINT, self.signalhandler) # inference - tracker.videostream_predict( - video_stream=video_stream, - output_dir=output_dir, - data_type='mot', - model_type='FairMOT', - visualization=visualization, - draw_threshold=draw_threshold) + tracker.videostream_predict(video_stream=video_stream, + output_dir=output_dir, + data_type='mot', + model_type='FairMOT', + visualization=visualization, + draw_threshold=draw_threshold) def stream_mode(self, output_dir='mot_result', visualization=True, draw_threshold=0.5, use_gpu=False): ''' @@ -108,12 +111,11 @@ class FairmotTracker_1088x608: return self def __enter__(self): - self.tracker_generator = self.tracker.imagestream_predict( - self.output_dir, - data_type='mot', - model_type='FairMOT', - visualization=self.visualization, - draw_threshold=self.draw_threshold) + self.tracker_generator = self.tracker.imagestream_predict(self.output_dir, + data_type='mot', + model_type='FairMOT', + visualization=self.visualization, + draw_threshold=self.draw_threshold) next(self.tracker_generator) def __exit__(self, exc_type, exc_value, traceback): @@ -132,12 +134,11 @@ class FairmotTracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter( - output_video_path, - apiPreference=0, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=(img.shape[1], img.shape[0])) + video_writer = cv2.VideoWriter(output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -174,11 +175,10 @@ class FairmotTracker_1088x608: """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -210,12 +210,11 @@ class FairmotTracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter( - output_video_path, - apiPreference=0, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=(img.shape[1], img.shape[0])) + video_writer = cv2.VideoWriter(output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -231,16 +230,22 @@ class FairmotTracker_1088x608: """ self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='mot_result', help='Directory name for output tracking results.') - self.arg_config_group.add_argument( - '--visualization', action='store_true', help="whether to save output as images.") - self.arg_config_group.add_argument( - "--draw_threshold", type=float, default=0.5, help="Threshold to reserve the result for visualization.") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='mot_result', + help='Directory name for output tracking results.') + self.arg_config_group.add_argument('--visualization', + action='store_true', + help="whether to save output as images.") + self.arg_config_group.add_argument("--draw_threshold", + type=float, + default=0.5, + help="Threshold to reserve the result for visualization.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument( - '--video_stream', type=str, help="path to video stream, can be a video file or stream device number.") + self.arg_input_group.add_argument('--video_stream', + type=str, + help="path to video stream, can be a video file or stream device number.") diff --git a/modules/video/multiple_object_tracking/jde_darknet53/README.md b/modules/video/multiple_object_tracking/jde_darknet53/README.md index 87e0755cce6c439e4f0ebb04c6ef5e3f740d433a..185904379281d83f2f3168626d5ff1a52229e715 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/README.md +++ b/modules/video/multiple_object_tracking/jde_darknet53/README.md @@ -120,6 +120,10 @@ 初始发布 +* 1.1.0 + + 移除fluid api + - ```shell - $ hub install jde_darknet53==1.0.0 + $ hub install jde_darknet53==1.1.0 ``` diff --git a/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml b/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml index 33fa547afe9f95f5dfe7ea321c3e9be1c3634e1d..01753447a99683ff055d7496410831c2e330ca9c 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml +++ b/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml @@ -1,7 +1,6 @@ _BASE_: [ '_base_/mot.yml', '_base_/runtime.yml', - '_base_/optimizer_30e.yml', '_base_/jde_darknet53.yml', '_base_/jde_reader_1088x608.yml', ] diff --git a/modules/video/multiple_object_tracking/jde_darknet53/dataset.py b/modules/video/multiple_object_tracking/jde_darknet53/dataset.py index ff584d41995237dce107e1f103d862af942ed6a5..028ff4a6b63d36b87b0cb104afb699232d88d3a5 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/dataset.py +++ b/modules/video/multiple_object_tracking/jde_darknet53/dataset.py @@ -11,24 +11,79 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numbers import os import sys -import six -from collections.abc import Mapping from collections import deque +from collections.abc import Mapping + +import six +try: + from collections.abc import Sequence, Mapping +except: + from collections import Sequence, Mapping from ppdet.core.workspace import register, serializable from ppdet.utils.logger import setup_logger from ppdet.data.reader import BaseDataLoader, Compose -from paddle.fluid.dataloader.collate import default_collate_fn + import cv2 from imageio import imread, imwrite import numpy as np import paddle +from paddle.framework import core logger = setup_logger(__name__) +def default_collate_fn(batch): + """ + Default batch collating function for :code:`paddle.io.DataLoader`, + get input data as a list of sample datas, each element in list + if the data of a sample, and sample data should composed of list, + dictionary, string, number, numpy array and paddle.Tensor, this + function will parse input data recursively and stack number, + numpy array and paddle.Tensor datas as batch datas. e.g. for + following input data: + [{'image': np.array(shape=[3, 224, 224]), 'label': 1}, + {'image': np.array(shape=[3, 224, 224]), 'label': 3}, + {'image': np.array(shape=[3, 224, 224]), 'label': 4}, + {'image': np.array(shape=[3, 224, 224]), 'label': 5},] + + + This default collate function zipped each number and numpy array + field together and stack each field as the batch field as follows: + {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])} + Args: + batch(list of sample data): batch should be a list of sample data. + + Returns: + Batched data: batched each number, numpy array and paddle.Tensor + in input data. + """ + sample = batch[0] + if isinstance(sample, np.ndarray): + batch = np.stack(batch, axis=0) + return batch + elif isinstance(sample, (paddle.Tensor, core.eager.Tensor)): + return paddle.stack(batch, axis=0) + elif isinstance(sample, numbers.Number): + batch = np.array(batch) + return batch + elif isinstance(sample, (str, bytes)): + return batch + elif isinstance(sample, Mapping): + return {key: default_collate_fn([d[key] for d in batch]) for key in sample} + elif isinstance(sample, Sequence): + sample_fields_num = len(sample) + if not all(len(sample) == sample_fields_num for sample in iter(batch)): + raise RuntimeError("fileds number not same among samples in a batch") + return [default_collate_fn(fields) for fields in zip(*batch)] + + raise TypeError("batch data con only contains: tensor, numpy.ndarray, " + "dict, list, number, but got {}".format(type(sample))) + + @register @serializable class MOTVideoStream: @@ -40,6 +95,7 @@ class MOTVideoStream: Set True when used during MOT model inference while saving images or video, or used in DeepSORT. """ + def __init__(self, video_stream=None, keep_ori_im=False, **kwargs): self.video_stream = video_stream self.keep_ori_im = keep_ori_im @@ -106,6 +162,7 @@ class MOTImageStream: Set True when used during MOT model inference while saving images or video, or used in DeepSORT. """ + def __init__(self, sample_num=-1, keep_ori_im=False, **kwargs): self.keep_ori_im = keep_ori_im self._curr_iter = 0 diff --git a/modules/video/multiple_object_tracking/jde_darknet53/module.py b/modules/video/multiple_object_tracking/jde_darknet53/module.py index 8ef1e6c481a225b332d2d9f193c68bd9acf17775..faf03665649d3f111a9d5242e6b91f2b1b6bda63 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/module.py +++ b/modules/video/multiple_object_tracking/jde_darknet53/module.py @@ -11,34 +11,39 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import argparse +import glob import os -import sys import signal -import glob -import argparse +import sys +import cv2 import paddle -from ppdet.core.workspace import load_config, merge_config +from ppdet.core.workspace import load_config +from ppdet.core.workspace import merge_config from ppdet.engine import Tracker -from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.check import check_config +from ppdet.utils.check import check_gpu +from ppdet.utils.check import check_version from ppdet.utils.logger import setup_logger -import paddlehub as hub -from paddlehub.module.module import moduleinfo, serving, runnable -import cv2 +import paddlehub as hub from .tracker import StreamTracker +from paddlehub.module.module import moduleinfo +from paddlehub.module.module import runnable +from paddlehub.module.module import serving logger = setup_logger('Predict') -@moduleinfo( - name="jde_darknet53", - type="CV/multiple_object_tracking", - author="paddlepaddle", - author_email="", - summary="JDE is a joint detection and appearance embedding model for multiple object tracking.", - version="1.0.0") +@moduleinfo(name="jde_darknet53", + type="CV/multiple_object_tracking", + author="paddlepaddle", + author_email="", + summary="JDE is a joint detection and appearance embedding model for multiple object tracking.", + version="1.1.0") class JDETracker_1088x608: + def __init__(self): self.pretrained_model = os.path.join(self.directory, "jde_darknet53_30e_1088x608") @@ -71,13 +76,12 @@ class JDETracker_1088x608: tracker.load_weights_jde(self.pretrained_model) signal.signal(signal.SIGINT, self.signalhandler) # inference - tracker.videostream_predict( - video_stream=video_stream, - output_dir=output_dir, - data_type='mot', - model_type='JDE', - visualization=visualization, - draw_threshold=draw_threshold) + tracker.videostream_predict(video_stream=video_stream, + output_dir=output_dir, + data_type='mot', + model_type='JDE', + visualization=visualization, + draw_threshold=draw_threshold) def stream_mode(self, output_dir='mot_result', visualization=True, draw_threshold=0.5, use_gpu=False): ''' @@ -108,12 +112,11 @@ class JDETracker_1088x608: return self def __enter__(self): - self.tracker_generator = self.tracker.imagestream_predict( - self.output_dir, - data_type='mot', - model_type='JDE', - visualization=self.visualization, - draw_threshold=self.draw_threshold) + self.tracker_generator = self.tracker.imagestream_predict(self.output_dir, + data_type='mot', + model_type='JDE', + visualization=self.visualization, + draw_threshold=self.draw_threshold) next(self.tracker_generator) def __exit__(self, exc_type, exc_value, traceback): @@ -132,12 +135,11 @@ class JDETracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter( - output_video_path, - apiPreference=0, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=(img.shape[1], img.shape[0])) + video_writer = cv2.VideoWriter(output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -174,11 +176,10 @@ class JDETracker_1088x608: """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -210,12 +211,11 @@ class JDETracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter( - output_video_path, - apiPreference=0, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=(img.shape[1], img.shape[0])) + video_writer = cv2.VideoWriter(output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -231,16 +231,22 @@ class JDETracker_1088x608: """ self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', type=str, default='mot_result', help='Directory name for output tracking results.') - self.arg_config_group.add_argument( - '--visualization', action='store_true', help="whether to save output as images.") - self.arg_config_group.add_argument( - "--draw_threshold", type=float, default=0.5, help="Threshold to reserve the result for visualization.") + self.arg_config_group.add_argument('--output_dir', + type=str, + default='mot_result', + help='Directory name for output tracking results.') + self.arg_config_group.add_argument('--visualization', + action='store_true', + help="whether to save output as images.") + self.arg_config_group.add_argument("--draw_threshold", + type=float, + default=0.5, + help="Threshold to reserve the result for visualization.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument( - '--video_stream', type=str, help="path to video stream, can be a video file or stream device number.") + self.arg_input_group.add_argument('--video_stream', + type=str, + help="path to video stream, can be a video file or stream device number.")