Update mot modules (#2111)

* remove fluid api * fix readme

Update mot modules (#2111)
* remove fluid api * fix readme
e8f5bf8e · chenjian · GitHub · 404ab2dd · e8f5bf8e · e8f5bf8e
8 changed file
--- a/modules/video/multiple_object_tracking/fairmot_dla34/README.md
+++ b/modules/video/multiple_object_tracking/fairmot_dla34/README.md
@@ -117,6 +117,10 @@

  初始发布

+* 1.1.0
+
+  移除fluid api
+
  - ```shell
-    $ hub install fairmot_dla34==1.0.0
+    $ hub install fairmot_dla34==1.1.0
    ```
--- a/modules/video/multiple_object_tracking/fairmot_dla34/config/fairmot_dla34_30e_1088x608.yml
+++ b/modules/video/multiple_object_tracking/fairmot_dla34/config/fairmot_dla34_30e_1088x608.yml
 _BASE_: [
  '_base_/mot.yml',
  '_base_/runtime.yml',
-  '_base_/optimizer_30e.yml',
  '_base_/fairmot_dla34.yml',
  '_base_/fairmot_reader_1088x608.yml',
 ]

--- a/modules/video/multiple_object_tracking/fairmot_dla34/dataset.py
+++ b/modules/video/multiple_object_tracking/fairmot_dla34/dataset.py
@@ -11,24 +11,78 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numbers
 import os
 import sys
-import six
-from collections.abc import Mapping
 from collections import deque
+from collections.abc import Mapping
+
+import six
+try:
+    from collections.abc import Sequence, Mapping
+except:
+    from collections import Sequence, Mapping

 from ppdet.core.workspace import register, serializable
 from ppdet.utils.logger import setup_logger
 from ppdet.data.reader import BaseDataLoader, Compose
-from paddle.fluid.dataloader.collate import default_collate_fn
 import cv2
 from imageio import imread, imwrite
 import numpy as np
 import paddle
+from paddle.framework import core

 logger = setup_logger(__name__)


+def default_collate_fn(batch):
+    """
+    Default batch collating function for :code:`paddle.io.DataLoader`,
+    get input data as a list of sample datas, each element in list
+    if the data of a sample, and sample data should composed of list,
+    dictionary, string, number, numpy array and paddle.Tensor, this
+    function will parse input data recursively and stack number,
+    numpy array and paddle.Tensor datas as batch datas. e.g. for
+    following input data:
+    [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 3},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 4},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
+
+
+    This default collate function zipped each number and numpy array
+    field together and stack each field as the batch field as follows:
+    {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
+    Args:
+        batch(list of sample data): batch should be a list of sample data.
+
+    Returns:
+        Batched data: batched each number, numpy array and paddle.Tensor
+                      in input data.
+    """
+    sample = batch[0]
+    if isinstance(sample, np.ndarray):
+        batch = np.stack(batch, axis=0)
+        return batch
+    elif isinstance(sample, (paddle.Tensor, core.eager.Tensor)):
+        return paddle.stack(batch, axis=0)
+    elif isinstance(sample, numbers.Number):
+        batch = np.array(batch)
+        return batch
+    elif isinstance(sample, (str, bytes)):
+        return batch
+    elif isinstance(sample, Mapping):
+        return {key: default_collate_fn([d[key] for d in batch]) for key in sample}
+    elif isinstance(sample, Sequence):
+        sample_fields_num = len(sample)
+        if not all(len(sample) == sample_fields_num for sample in iter(batch)):
+            raise RuntimeError("fileds number not same among samples in a batch")
+        return [default_collate_fn(fields) for fields in zip(*batch)]
+
+    raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
+                    "dict, list, number, but got {}".format(type(sample)))
+
+
 @register
 @serializable
 class MOTVideoStream:
@@ -40,6 +94,7 @@ class MOTVideoStream:
            Set True when used during MOT model inference while saving
            images or video, or used in DeepSORT.
    """
+
    def __init__(self, video_stream=None, keep_ori_im=False, **kwargs):
        self.video_stream = video_stream
        self.keep_ori_im = keep_ori_im
@@ -106,6 +161,7 @@ class MOTImageStream:
            Set True when used during MOT model inference while saving
            images or video, or used in DeepSORT.
    """
+
    def __init__(self, sample_num=-1, keep_ori_im=False, **kwargs):
        self.keep_ori_im = keep_ori_im
        self._curr_iter = 0

--- a/modules/video/multiple_object_tracking/fairmot_dla34/module.py
+++ b/modules/video/multiple_object_tracking/fairmot_dla34/module.py
@@ -11,34 +11,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-import sys
-import signal
 import argparse
 import glob
+import os
+import signal
+import sys

+import cv2
 import paddle
-from ppdet.core.workspace import load_config, merge_config
+from ppdet.core.workspace import load_config
+from ppdet.core.workspace import merge_config
 from ppdet.engine import Tracker
-from ppdet.utils.check import check_gpu, check_version, check_config
+from ppdet.utils.check import check_config
+from ppdet.utils.check import check_gpu
+from ppdet.utils.check import check_version
 from ppdet.utils.logger import setup_logger
-import paddlehub as hub
-from paddlehub.module.module import moduleinfo, runnable
-import cv2

+import paddlehub as hub
 from .tracker import StreamTracker
+from paddlehub.module.module import moduleinfo
+from paddlehub.module.module import runnable

 logger = setup_logger('Predict')


-@moduleinfo(
-    name="fairmot_dla34",
-    type="CV/multiple_object_tracking",
-    author="paddlepaddle",
-    author_email="",
-    summary="Fairmot is a model for multiple object tracking.",
-    version="1.0.0")
+@moduleinfo(name="fairmot_dla34",
+            type="CV/multiple_object_tracking",
+            author="paddlepaddle",
+            author_email="",
+            summary="Fairmot is a model for multiple object tracking.",
+            version="1.1.0")
 class FairmotTracker_1088x608:
+
    def __init__(self):
        self.pretrained_model = os.path.join(self.directory, "fairmot_dla34_30e_1088x608")

@@ -71,13 +75,12 @@ class FairmotTracker_1088x608:
        tracker.load_weights_jde(self.pretrained_model)
        signal.signal(signal.SIGINT, self.signalhandler)
        # inference
-        tracker.videostream_predict(
-            video_stream=video_stream,
-            output_dir=output_dir,
-            data_type='mot',
-            model_type='FairMOT',
-            visualization=visualization,
-            draw_threshold=draw_threshold)
+        tracker.videostream_predict(video_stream=video_stream,
+                                    output_dir=output_dir,
+                                    data_type='mot',
+                                    model_type='FairMOT',
+                                    visualization=visualization,
+                                    draw_threshold=draw_threshold)

    def stream_mode(self, output_dir='mot_result', visualization=True, draw_threshold=0.5, use_gpu=False):
        '''
@@ -108,12 +111,11 @@ class FairmotTracker_1088x608:
        return self

    def __enter__(self):
-        self.tracker_generator = self.tracker.imagestream_predict(
-            self.output_dir,
-            data_type='mot',
-            model_type='FairMOT',
-            visualization=self.visualization,
-            draw_threshold=self.draw_threshold)
+        self.tracker_generator = self.tracker.imagestream_predict(self.output_dir,
+                                                                  data_type='mot',
+                                                                  model_type='FairMOT',
+                                                                  visualization=self.visualization,
+                                                                  draw_threshold=self.draw_threshold)
        next(self.tracker_generator)

    def __exit__(self, exc_type, exc_value, traceback):
@@ -132,12 +134,11 @@ class FairmotTracker_1088x608:
                logger.info('No output images to save for video')
                return
            img = cv2.imread(os.path.join(save_dir, '00000.jpg'))
-            video_writer = cv2.VideoWriter(
-                output_video_path,
-                apiPreference=0,
-                fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
-                fps=30,
-                frameSize=(img.shape[1], img.shape[0]))
+            video_writer = cv2.VideoWriter(output_video_path,
+                                           apiPreference=0,
+                                           fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+                                           fps=30,
+                                           frameSize=(img.shape[1], img.shape[0]))
            for i in range(len(imgnames)):
                imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i))
                img = cv2.imread(imgpath)
@@ -174,11 +175,10 @@ class FairmotTracker_1088x608:
        """
        Run as a command.
        """
-        self.parser = argparse.ArgumentParser(
-            description="Run the {} module.".format(self.name),
-            prog='hub run {}'.format(self.name),
-            usage='%(prog)s',
-            add_help=True)
+        self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name),
+                                              prog='hub run {}'.format(self.name),
+                                              usage='%(prog)s',
+                                              add_help=True)

        self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")
        self.arg_config_group = self.parser.add_argument_group(
@@ -210,12 +210,11 @@ class FairmotTracker_1088x608:
                logger.info('No output images to save for video')
                return
            img = cv2.imread(os.path.join(save_dir, '00000.jpg'))
-            video_writer = cv2.VideoWriter(
-                output_video_path,
-                apiPreference=0,
-                fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
-                fps=30,
-                frameSize=(img.shape[1], img.shape[0]))
+            video_writer = cv2.VideoWriter(output_video_path,
+                                           apiPreference=0,
+                                           fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+                                           fps=30,
+                                           frameSize=(img.shape[1], img.shape[0]))
            for i in range(len(imgnames)):
                imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i))
                img = cv2.imread(imgpath)
@@ -231,16 +230,22 @@ class FairmotTracker_1088x608:
        """
        self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not")

-        self.arg_config_group.add_argument(
-            '--output_dir', type=str, default='mot_result', help='Directory name for output tracking results.')
-        self.arg_config_group.add_argument(
-            '--visualization', action='store_true', help="whether to save output as images.")
-        self.arg_config_group.add_argument(
-            "--draw_threshold", type=float, default=0.5, help="Threshold to reserve the result for visualization.")
+        self.arg_config_group.add_argument('--output_dir',
+                                           type=str,
+                                           default='mot_result',
+                                           help='Directory name for output tracking results.')
+        self.arg_config_group.add_argument('--visualization',
+                                           action='store_true',
+                                           help="whether to save output as images.")
+        self.arg_config_group.add_argument("--draw_threshold",
+                                           type=float,
+                                           default=0.5,
+                                           help="Threshold to reserve the result for visualization.")

    def add_module_input_arg(self):
        """
        Add the command input options.
        """
-        self.arg_input_group.add_argument(
-            '--video_stream', type=str, help="path to video stream, can be a video file or stream device number.")
+        self.arg_input_group.add_argument('--video_stream',
+                                          type=str,
+                                          help="path to video stream, can be a video file or stream device number.")
--- a/modules/video/multiple_object_tracking/jde_darknet53/README.md
+++ b/modules/video/multiple_object_tracking/jde_darknet53/README.md
@@ -120,6 +120,10 @@

  初始发布

+* 1.1.0
+
+  移除fluid api
+
  - ```shell
-    $ hub install jde_darknet53==1.0.0
+    $ hub install jde_darknet53==1.1.0
    ```
--- a/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml
+++ b/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml
 _BASE_: [
  '_base_/mot.yml',
  '_base_/runtime.yml',
-  '_base_/optimizer_30e.yml',
  '_base_/jde_darknet53.yml',
  '_base_/jde_reader_1088x608.yml',
 ]

--- a/modules/video/multiple_object_tracking/jde_darknet53/dataset.py
+++ b/modules/video/multiple_object_tracking/jde_darknet53/dataset.py
@@ -11,24 +11,79 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numbers
 import os
 import sys
-import six
-from collections.abc import Mapping
 from collections import deque
+from collections.abc import Mapping
+
+import six
+try:
+    from collections.abc import Sequence, Mapping
+except:
+    from collections import Sequence, Mapping

 from ppdet.core.workspace import register, serializable
 from ppdet.utils.logger import setup_logger
 from ppdet.data.reader import BaseDataLoader, Compose
-from paddle.fluid.dataloader.collate import default_collate_fn
+
 import cv2
 from imageio import imread, imwrite
 import numpy as np
 import paddle
+from paddle.framework import core

 logger = setup_logger(__name__)


+def default_collate_fn(batch):
+    """
+    Default batch collating function for :code:`paddle.io.DataLoader`,
+    get input data as a list of sample datas, each element in list
+    if the data of a sample, and sample data should composed of list,
+    dictionary, string, number, numpy array and paddle.Tensor, this
+    function will parse input data recursively and stack number,
+    numpy array and paddle.Tensor datas as batch datas. e.g. for
+    following input data:
+    [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 3},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 4},
+     {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
+
+
+    This default collate function zipped each number and numpy array
+    field together and stack each field as the batch field as follows:
+    {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
+    Args:
+        batch(list of sample data): batch should be a list of sample data.
+
+    Returns:
+        Batched data: batched each number, numpy array and paddle.Tensor
+                      in input data.
+    """
+    sample = batch[0]
+    if isinstance(sample, np.ndarray):
+        batch = np.stack(batch, axis=0)
+        return batch
+    elif isinstance(sample, (paddle.Tensor, core.eager.Tensor)):
+        return paddle.stack(batch, axis=0)
+    elif isinstance(sample, numbers.Number):
+        batch = np.array(batch)
+        return batch
+    elif isinstance(sample, (str, bytes)):
+        return batch
+    elif isinstance(sample, Mapping):
+        return {key: default_collate_fn([d[key] for d in batch]) for key in sample}
+    elif isinstance(sample, Sequence):
+        sample_fields_num = len(sample)
+        if not all(len(sample) == sample_fields_num for sample in iter(batch)):
+            raise RuntimeError("fileds number not same among samples in a batch")
+        return [default_collate_fn(fields) for fields in zip(*batch)]
+
+    raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
+                    "dict, list, number, but got {}".format(type(sample)))
+
+
 @register
 @serializable
 class MOTVideoStream:
@@ -40,6 +95,7 @@ class MOTVideoStream:
            Set True when used during MOT model inference while saving
            images or video, or used in DeepSORT.
    """
+
    def __init__(self, video_stream=None, keep_ori_im=False, **kwargs):
        self.video_stream = video_stream
        self.keep_ori_im = keep_ori_im
@@ -106,6 +162,7 @@ class MOTImageStream:
            Set True when used during MOT model inference while saving
            images or video, or used in DeepSORT.
    """
+
    def __init__(self, sample_num=-1, keep_ori_im=False, **kwargs):
        self.keep_ori_im = keep_ori_im
        self._curr_iter = 0

--- a/modules/video/multiple_object_tracking/jde_darknet53/module.py
+++ b/modules/video/multiple_object_tracking/jde_darknet53/module.py
@@ -11,34 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import argparse
+import glob
 import os
-import sys
 import signal
-import glob
-import argparse
+import sys

+import cv2
 import paddle
-from ppdet.core.workspace import load_config, merge_config
+from ppdet.core.workspace import load_config
+from ppdet.core.workspace import merge_config
 from ppdet.engine import Tracker
-from ppdet.utils.check import check_gpu, check_version, check_config
+from ppdet.utils.check import check_config
+from ppdet.utils.check import check_gpu
+from ppdet.utils.check import check_version
 from ppdet.utils.logger import setup_logger
-import paddlehub as hub
-from paddlehub.module.module import moduleinfo, serving, runnable
-import cv2

+import paddlehub as hub
 from .tracker import StreamTracker
+from paddlehub.module.module import moduleinfo
+from paddlehub.module.module import runnable
+from paddlehub.module.module import serving

 logger = setup_logger('Predict')


-@moduleinfo(
-    name="jde_darknet53",
-    type="CV/multiple_object_tracking",
-    author="paddlepaddle",
-    author_email="",
-    summary="JDE is a joint detection and appearance embedding model for multiple object tracking.",
-    version="1.0.0")
+@moduleinfo(name="jde_darknet53",
+            type="CV/multiple_object_tracking",
+            author="paddlepaddle",
+            author_email="",
+            summary="JDE is a joint detection and appearance embedding model for multiple object tracking.",
+            version="1.1.0")
 class JDETracker_1088x608:
+
    def __init__(self):
        self.pretrained_model = os.path.join(self.directory, "jde_darknet53_30e_1088x608")

@@ -71,13 +76,12 @@ class JDETracker_1088x608:
        tracker.load_weights_jde(self.pretrained_model)
        signal.signal(signal.SIGINT, self.signalhandler)
        # inference
-        tracker.videostream_predict(
-            video_stream=video_stream,
-            output_dir=output_dir,
-            data_type='mot',
-            model_type='JDE',
-            visualization=visualization,
-            draw_threshold=draw_threshold)
+        tracker.videostream_predict(video_stream=video_stream,
+                                    output_dir=output_dir,
+                                    data_type='mot',
+                                    model_type='JDE',
+                                    visualization=visualization,
+                                    draw_threshold=draw_threshold)

    def stream_mode(self, output_dir='mot_result', visualization=True, draw_threshold=0.5, use_gpu=False):
        '''
@@ -108,12 +112,11 @@ class JDETracker_1088x608:
        return self

    def __enter__(self):
-        self.tracker_generator = self.tracker.imagestream_predict(
-            self.output_dir,
-            data_type='mot',
-            model_type='JDE',
-            visualization=self.visualization,
-            draw_threshold=self.draw_threshold)
+        self.tracker_generator = self.tracker.imagestream_predict(self.output_dir,
+                                                                  data_type='mot',
+                                                                  model_type='JDE',
+                                                                  visualization=self.visualization,
+                                                                  draw_threshold=self.draw_threshold)
        next(self.tracker_generator)

    def __exit__(self, exc_type, exc_value, traceback):
@@ -132,12 +135,11 @@ class JDETracker_1088x608:
                logger.info('No output images to save for video')
                return
            img = cv2.imread(os.path.join(save_dir, '00000.jpg'))
-            video_writer = cv2.VideoWriter(
-                output_video_path,
-                apiPreference=0,
-                fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
-                fps=30,
-                frameSize=(img.shape[1], img.shape[0]))
+            video_writer = cv2.VideoWriter(output_video_path,
+                                           apiPreference=0,
+                                           fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+                                           fps=30,
+                                           frameSize=(img.shape[1], img.shape[0]))
            for i in range(len(imgnames)):
                imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i))
                img = cv2.imread(imgpath)
@@ -174,11 +176,10 @@ class JDETracker_1088x608:
        """
        Run as a command.
        """
-        self.parser = argparse.ArgumentParser(
-            description="Run the {} module.".format(self.name),
-            prog='hub run {}'.format(self.name),
-            usage='%(prog)s',
-            add_help=True)
+        self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name),
+                                              prog='hub run {}'.format(self.name),
+                                              usage='%(prog)s',
+                                              add_help=True)

        self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")
        self.arg_config_group = self.parser.add_argument_group(
@@ -210,12 +211,11 @@ class JDETracker_1088x608:
                logger.info('No output images to save for video')
                return
            img = cv2.imread(os.path.join(save_dir, '00000.jpg'))
-            video_writer = cv2.VideoWriter(
-                output_video_path,
-                apiPreference=0,
-                fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
-                fps=30,
-                frameSize=(img.shape[1], img.shape[0]))
+            video_writer = cv2.VideoWriter(output_video_path,
+                                           apiPreference=0,
+                                           fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
+                                           fps=30,
+                                           frameSize=(img.shape[1], img.shape[0]))
            for i in range(len(imgnames)):
                imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i))
                img = cv2.imread(imgpath)
@@ -231,16 +231,22 @@ class JDETracker_1088x608:
        """
        self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not")

-        self.arg_config_group.add_argument(
-            '--output_dir', type=str, default='mot_result', help='Directory name for output tracking results.')
-        self.arg_config_group.add_argument(
-            '--visualization', action='store_true', help="whether to save output as images.")
-        self.arg_config_group.add_argument(
-            "--draw_threshold", type=float, default=0.5, help="Threshold to reserve the result for visualization.")
+        self.arg_config_group.add_argument('--output_dir',
+                                           type=str,
+                                           default='mot_result',
+                                           help='Directory name for output tracking results.')
+        self.arg_config_group.add_argument('--visualization',
+                                           action='store_true',
+                                           help="whether to save output as images.")
+        self.arg_config_group.add_argument("--draw_threshold",
+                                           type=float,
+                                           default=0.5,
+                                           help="Threshold to reserve the result for visualization.")

    def add_module_input_arg(self):
        """
        Add the command input options.
        """
-        self.arg_input_group.add_argument(
-            '--video_stream', type=str, help="path to video stream, can be a video file or stream device number.")
+        self.arg_input_group.add_argument('--video_stream',
+                                          type=str,
+                                          help="path to video stream, can be a video file or stream device number.")