Merge branch 'develop' of https://github.com/PaddlePaddle/models into fix_resize

64fc2d96 · jerrywgz · 8901f23d · 4a1c1a57 · 64fc2d96 · 64fc2d96
20 changed file
--- a/PaddleCV/PaddleDetection/configs/ssd_mobilenet_v1_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/ssd_mobilenet_v1_voc.yml
 architecture: SSD
-max_iters: 28000
 train_feed: SSDTrainFeed
 eval_feed: SSDEvalFeed
 test_feed: SSDTestFeed
 pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_coco_pretrained.tar
-
 use_gpu: true
+max_iters: 28000
 snapshot_iter: 2000
 log_smooth_window: 1
 metric: VOC
 save_dir: output
 weights: output/ssd_mobilenet_v1_voc/model_final/
+num_classes: 21

 SSD:
  backbone: MobileNet
  multi_box_head: MultiBoxHead
-  num_classes: 21
  metric:
    ap_version: 11point
    evaluate_difficult: false

--- a/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md
+++ b/PaddleCV/PaddleDetection/docs/GETTING_STARTED.md
@@ -20,6 +20,8 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml

 ```bash
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+# or run on CPU with:
+# export CPU_NUM=8
 python tools/train.py -c configs/faster_rcnn_r50_1x.yml
 ```

@@ -28,6 +30,7 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml
 - Pretrained model is downloaded automatically and cached in `~/.cache/paddle/weights`.
 - Model checkpoints is saved in `output` by default (configurable).
 - To check out hyper parameters used, please refer to the config file.
+- RCNN models training on CPU is not supported on PaddlePaddle<=1.5.1 and will be fixed on later version.

 Alternating between training epoch and evaluation run is possible, simply pass
 in `--eval` to do so (tested with `SSD` detector on Pascal-VOC, not

--- a/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md
+++ b/PaddleCV/PaddleDetection/docs/GETTING_STARTED_cn.md
@@ -19,6 +19,8 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml

 ```bash
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+# 若使用CPU，则执行
+# export CPU_NUM=8
 python tools/train.py -c configs/faster_rcnn_r50_1x.yml
 ```

@@ -27,7 +29,7 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml
 - 预训练模型自动下载并保存在`〜/.cache/paddle/weights`中。
 - 模型checkpoints默认保存在`output`中（可配置）。
 - 更多参数配置，请参考配置文件。
-
+- RCNN系列模型CPU训练在PaddlePaddle 1.5.1及以下版本暂不支持，将在下个版本修复。

 可通过设置`--eval`在训练epoch中交替执行评估（已在在Pascal-VOC数据集上
 用`SSD`检测器验证，不推荐在COCO数据集上的两阶段模型上执行交替评估）

--- a/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md
+++ b/PaddleCV/PaddleDetection/docs/MODEL_ZOO.md
@@ -87,7 +87,7 @@ results of image size 608/416/320 above.

 | Backbone     | Size | Image/gpu | Lr schd | Box AP | Download  |
 | :----------- | :--: | :-----: | :-----: | :----: | :-------: |
-| MobileNet v1 | 300  |    32   |   120e  |  73.2  | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) |
+| MobileNet v1 | 300  |    32   |   120e  |  73.13  | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar) |

 **NOTE**: SSD is trained in 2 GPU with totoal batch size as 64 and trained 120 epoches. SSD training data augmentations: randomly color distortion,
 randomly cropping, randomly expansion, randomly flipping.
--- a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
@@ -744,7 +744,6 @@ class SSDEvalFeed(DataFeed):
                DecodeImage(to_rgb=True, with_mixup=False),
                NormalizeBox(),
                ResizeImage(target_size=300, use_cv2=False, interp=1),
-                RandomFlipImage(is_normalized=True),
                Permute(),
                NormalizeImage(
                    mean=[127.5, 127.5, 127.5],

--- a/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py
@@ -76,6 +76,7 @@ class RoiDbSource(Dataset):
        self._mixup_epoch = mixup_epoch
        self._with_background = with_background
        self.cname2cid = cname2cid
+        self._imid2path = None

    def __str__(self):
        return 'RoiDbSource(fname:%s,epoch:%d,size:%d,pos:%d)' \
@@ -156,3 +157,14 @@ class RoiDbSource(Dataset):
        """ return epoch id for latest sample
        """
        return self._epoch
+
+    def get_imid2path(self):
+        """return image id to image path map"""
+        if self._imid2path is None:
+            self._imid2path = {}
+            for record in self._roidb:
+                im_id = record['im_id']
+                im_id = im_id if isinstance(im_id, int) else im_id[0]
+                im_path = os.path.join(self._image_dir, record['im_file'])
+                self._imid2path[im_id] = im_path
+        return self._imid2path
--- a/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/transform/operators.py
@@ -127,6 +127,13 @@ class ResizeImage(BaseOperator):
                 use_cv2=True,
                 target_shape=None):
        """
+        Rescale image to the specified target size, and capped at max_size
+        if max_size != 0.
+        If target_size is list, selected a scale randomly as the specified
+        target size.
+        If target_shape is set, it has higher priority to target_size. Rescale
+        image to specified target shape.
+
        Args:
            target_size (int|list): the target size of image's short side, 
                multi-scale training is adopted when type is list.
@@ -169,7 +176,7 @@ class ResizeImage(BaseOperator):
                im = cv2.resize(
                    im, tuple(self.target_shape), interpolation=self.interp)
            else:
-                im = Image.fromarray(np.uint8(im))
+                im = Image.fromarray(im)
                im = im.resize(tuple(self.target_shape), self.interp)
                im = np.array(im)
            sample['image'] = im
@@ -192,15 +199,19 @@ class ResizeImage(BaseOperator):
                im_scale = float(self.max_size) / float(im_size_max)
            im_scale_x = im_scale
            im_scale_y = im_scale
+
+            resize_w = np.round(im_scale_x * float(im_shape[1]))
+            resize_h = np.round(im_scale_y * float(im_shape[0]))
+
            sample['im_info'] = np.array(
-                [
-                    np.round(im_shape[0] * im_scale),
-                    np.round(im_shape[1] * im_scale), im_scale
-                ],
-                dtype=np.float32)
+                [resize_h, resize_w, im_scale], dtype=np.float32)
        else:
            im_scale_x = float(selected_size) / float(im_shape[1])
            im_scale_y = float(selected_size) / float(im_shape[0])
+
+            resize_w = selected_size
+            resize_h = selected_size
+
        if self.use_cv2:
            im = cv2.resize(
                im,
@@ -211,9 +222,7 @@ class ResizeImage(BaseOperator):
                interpolation=self.interp)
        else:
            im = Image.fromarray(im)
-            resize_w = selected_size
-            resize_h = selected_size
-            im = im.resize((resize_w, resize_h), self.interp)
+            im = im.resize((int(resize_w), int(resize_h)), self.interp)
            im = np.array(im)
        sample['image'] = im
        return sample

--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/ssd.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/ssd.py
@@ -39,6 +39,7 @@ class SSD(object):

    __category__ = 'architecture'
    __inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'metric']
+    __shared__ = ['num_classes']

    def __init__(self,
                 backbone,
@@ -56,7 +57,7 @@ class SSD(object):
            self.output_decoder = SSDOutputDecoder(**output_decoder)
        if isinstance(metric, dict):
            self.metric = SSDMetric(**metric)
-        
+
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        if mode == 'train' or mode == 'eval':
@@ -100,4 +101,3 @@ class SSD(object):
        # SSD use output_decoder in output layers, bbox is normalized
        # to range [0, 1], is_bbox_normalized is used in infer.py
        return True
-
--- a/PaddleCV/PaddleDetection/ppdet/modeling/backbones/mobilenet.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/backbones/mobilenet.py
@@ -57,7 +57,6 @@ class MobileNet(object):
                   num_filters,
                   stride,
                   padding,
-                   channels=None,
                   num_groups=1,
                   act='relu',
                   use_cudnn=True,
@@ -125,22 +124,21 @@ class MobileNet(object):
                     num_filters2,
                     num_groups,
                     stride,
-                     scale,
                     name=None):
        pointwise_conv = self._conv_norm(
            input=input,
            filter_size=1,
-            num_filters=int(num_filters1 * scale),
+            num_filters=int(num_filters1),
            stride=1,
-            num_groups=int(num_groups * scale),
+            num_groups=int(num_groups),
            padding=0,
            name=name + "_extra1")
        normal_conv = self._conv_norm(
            input=pointwise_conv,
            filter_size=3,
-            num_filters=int(num_filters2 * scale),
+            num_filters=int(num_filters2),
            stride=2,
-            num_groups=int(num_groups * scale),
+            num_groups=int(num_groups),
            padding=1,
            name=name + "_extra2")
        return normal_conv
@@ -150,7 +148,7 @@ class MobileNet(object):

        blocks = []
        # input 1/1
-        out = self._conv_norm(input, 3, int(32 * scale), 2, 1, 3, name="conv1")
+        out = self._conv_norm(input, 3, int(32 * scale), 2, 1, name="conv1")
        # 1/2
        out = self.depthwise_separable(
            out, 32, 64, 32, 1, scale, name="conv2_1")
@@ -186,11 +184,11 @@ class MobileNet(object):

        num_filters = self.extra_block_filters
        module14 = self._extra_block(module13, num_filters[0][0],
-                                     num_filters[0][1], 1, 2, scale, "conv7_1")
+                                     num_filters[0][1], 1, 2, "conv7_1")
        module15 = self._extra_block(module14, num_filters[1][0],
-                                     num_filters[1][1], 1, 2, scale, "conv7_2")
+                                     num_filters[1][1], 1, 2, "conv7_2")
        module16 = self._extra_block(module15, num_filters[2][0],
-                                     num_filters[2][1], 1, 2, scale, "conv7_3")
+                                     num_filters[2][1], 1, 2, "conv7_3")
        module17 = self._extra_block(module16, num_filters[3][0],
-                                     num_filters[3][1], 1, 2, scale, "conv7_4")
+                                     num_filters[3][1], 1, 2, "conv7_4")
        return module11, module13, module14, module15, module16, module17
--- a/PaddleCV/PaddleDetection/ppdet/utils/cli.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/cli.py
@@ -68,7 +68,8 @@ class ArgsParser(ArgumentParser):
                config[k] = yaml.load(v, Loader=yaml.Loader)
            else:
                keys = k.split('.')
-                config[keys[0]] = {}
+                if keys[0] not in config:
+                    config[keys[0]] = {}
                cur = config[keys[0]]
                for idx, key in enumerate(keys[1:]):
                    if idx == len(keys) - 2:

--- a/PaddleCV/PaddleDetection/tools/train.py
+++ b/PaddleCV/PaddleDetection/tools/train.py
@@ -20,6 +20,8 @@ import os
 import time
 import multiprocessing
 import numpy as np
+import datetime
+from collections import deque


 def set_paddle_flags(**kwargs):
@@ -55,13 +57,14 @@ logger = logging.getLogger(__name__)

 def main():
    cfg = load_config(FLAGS.config)
-
    if 'architecture' in cfg:
        main_arch = cfg.architecture
    else:
        raise ValueError("'architecture' not specified in config file.")

    merge_config(FLAGS.opt)
+    if 'log_iter' not in cfg:
+        cfg.log_iter = 20

    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)
@@ -160,16 +163,22 @@ def main():

    cfg_name = os.path.basename(FLAGS.config).split('.')[0]
    save_dir = os.path.join(cfg.save_dir, cfg_name)
+    time_stat = deque(maxlen=cfg.log_iter)
    for it in range(start_iter, cfg.max_iters):
        start_time = end_time
        end_time = time.time()
+        time_stat.append(end_time - start_time)
+        time_cost = np.mean(time_stat)
+        eta_sec = (cfg.max_iters - it) * time_cost
+        eta = str(datetime.timedelta(seconds=int(eta_sec)))
        outs = exe.run(train_compile_program, fetch_list=train_values)
        stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])}
        train_stats.update(stats)
        logs = train_stats.log()
-        strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
-            it, np.mean(outs[-1]), logs, end_time - start_time)
-        logger.info(strs)
+        if it % cfg.log_iter == 0:
+            strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format(
+                it, np.mean(outs[-1]), logs, time_cost, eta)
+            logger.info(strs)

        if it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1:
            save_name = str(it) if it != cfg.max_iters - 1 else "model_final"

--- a/PaddleCV/image_classification/dist_train/dist_train.py
+++ b/PaddleCV/image_classification/dist_train/dist_train.py
@@ -78,13 +78,7 @@ def parse_args():
 def get_device_num():
    if os.getenv("CPU_NUM"):
        return int(os.getenv("CPU_NUM"))
-    visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
-    if visible_device:
-        device_num = len(visible_device.split(','))
-    else:
-        device_num = subprocess.check_output(
-            ['nvidia-smi', '-L']).decode().count('\n')
-    return device_num
+    return fluid.core.get_cuda_device_count()


 def prepare_reader(is_train, pyreader, args, pass_id=1):

--- a/PaddleCV/image_classification/fast_imagenet/train.py
+++ b/PaddleCV/image_classification/fast_imagenet/train.py
@@ -62,18 +62,7 @@ def parse_args():
    return args


-def get_device_num():
-    import subprocess
-    visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
-    if visible_device:
-        device_num = len(visible_device.split(','))
-    else:
-        device_num = subprocess.check_output(
-            ['nvidia-smi', '-L']).decode().count('\n')
-    return device_num
-
-
-DEVICE_NUM = get_device_num()
+DEVICE_NUM = fluid.core.get_cuda_device_count()


 def test_parallel(exe, test_args, args, test_reader, feeder, bs):

--- a/PaddleCV/image_classification/reader_cv2.py
+++ b/PaddleCV/image_classification/reader_cv2.py
@@ -207,16 +207,14 @@ def process_image(sample,
        return (img, )


-def image_mapper(**kwargs):
-    """ image_mapper """
-    return functools.partial(process_image, **kwargs)
-
-
 def process_batch_data(input_data, settings, mode, color_jitter, rotate):
    batch_data = []
    for sample in input_data:
-        batch_data.append(
-            process_image(sample, settings, mode, color_jitter, rotate))
+        if os.path.isfile(sample[0]):
+            batch_data.append(
+                process_image(sample, settings, mode, color_jitter, rotate))
+        else:
+            print("File not exist : %s" % sample[0])
    return batch_data



--- a/PaddleCV/image_classification/train.py
+++ b/PaddleCV/image_classification/train.py
@@ -338,13 +338,9 @@ def build_program(is_train, main_prog, startup_prog, args):

 def get_device_num():
    # NOTE(zcd): for multi-processe training, each process use one GPU card.
-    if num_trainers > 1 : return 1
-    visible_device = os.environ.get('CUDA_VISIBLE_DEVICES', None)
-    if visible_device:
-        device_num = len(visible_device.split(','))
-    else:
-        device_num = subprocess.check_output(['nvidia-smi','-L']).decode().count('\n')
-    return device_num
+    if num_trainers > 1:
+        return 1
+    return fluid.core.get_cuda_device_count()

 def train(args):
    # parameters from arguments

--- a/PaddleCV/rcnn/train.py
+++ b/PaddleCV/rcnn/train.py
@@ -52,14 +52,9 @@ num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))

 def get_device_num():
    # NOTE(zcd): for multi-processe training, each process use one GPU card.
-    if num_trainers > 1: return 1
-    visible_device = os.environ.get('CUDA_VISIBLE_DEVICES', None)
-    if visible_device:
-        device_num = len(visible_device.split(','))
-    else:
-        device_num = subprocess.check_output(
-            ['nvidia-smi', '-L']).decode().count('\n')
-    return device_num
+    if num_trainers > 1:
+        return 1
+    return fluid.core.get_cuda_device_count()


 def train():

--- a/PaddleCV/ssd/mobilenet_ssd.py
+++ b/PaddleCV/ssd/mobilenet_ssd.py
@@ -11,7 +11,7 @@ class MobileNetSSD:

    def ssd_net(self, scale=1.0):
        # 300x300
-        tmp = self.conv_bn(self.img, 3, int(32 * scale), 2, 1, 3)
+        tmp = self.conv_bn(self.img, 3, int(32 * scale), 2, 1)
        # 150x150
        tmp = self.depthwise_separable(tmp, 32, 64, 32, 1, scale)
        tmp = self.depthwise_separable(tmp, 64, 128, 64, 2, scale)
@@ -30,13 +30,13 @@ class MobileNetSSD:

        # 10x10
        module13 = self.depthwise_separable(tmp, 1024, 1024, 1024, 1, scale)
-        module14 = self.extra_block(module13, 256, 512, 1, 2, scale)
+        module14 = self.extra_block(module13, 256, 512, 1, 2)
        # 5x5
-        module15 = self.extra_block(module14, 128, 256, 1, 2, scale)
+        module15 = self.extra_block(module14, 128, 256, 1, 2)
        # 3x3
-        module16 = self.extra_block(module15, 128, 256, 1, 2, scale)
+        module16 = self.extra_block(module15, 128, 256, 1, 2)
        # 2x2
-        module17 = self.extra_block(module16, 64, 128, 1, 2, scale)
+        module17 = self.extra_block(module16, 64, 128, 1, 2)

        mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
            inputs=[
@@ -62,7 +62,6 @@ class MobileNetSSD:
                num_filters,
                stride,
                padding,
-                channels=None,
                num_groups=1,
                act='relu',
                use_cudnn=True):
@@ -99,24 +98,23 @@ class MobileNetSSD:
            padding=0)
        return pointwise_conv

-    def extra_block(self, input, num_filters1, num_filters2, num_groups, stride,
-                    scale):
+    def extra_block(self, input, num_filters1, num_filters2, num_groups, stride):
        # 1x1 conv
        pointwise_conv = self.conv_bn(
            input=input,
            filter_size=1,
-            num_filters=int(num_filters1 * scale),
+            num_filters=int(num_filters1),
            stride=1,
-            num_groups=int(num_groups * scale),
+            num_groups=int(num_groups),
            padding=0)

        # 3x3 conv
        normal_conv = self.conv_bn(
            input=pointwise_conv,
            filter_size=3,
-            num_filters=int(num_filters2 * scale),
+            num_filters=int(num_filters2),
            stride=2,
-            num_groups=int(num_groups * scale),
+            num_groups=int(num_groups),
            padding=1)
        return normal_conv


--- a/PaddleCV/yolov3/train.py
+++ b/PaddleCV/yolov3/train.py
@@ -51,14 +51,9 @@ num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))

 def get_device_num():
    # NOTE(zcd): for multi-processe training, each process use one GPU card.
-    if num_trainers > 1: return 1
-    visible_device = os.environ.get('CUDA_VISIBLE_DEVICES', None)
-    if visible_device:
-        device_num = len(visible_device.split(','))
-    else:
-        device_num = subprocess.check_output(
-            ['nvidia-smi', '-L']).decode().count('\n')
-    return device_num
+    if num_trainers > 1:
+        return 1
+    return fluid.core.get_cuda_device_count()


 def train():

--- a/PaddleNLP/Research/ACL2019-ARNOR/README.md
+++ b/PaddleNLP/Research/ACL2019-ARNOR/README.md
 Data
 =====

-This dataset is for our paper: ARNOR: Attention Regularization based Noise Reduction for Distant Supervision Relation Classification. This test set is for sentence-level evaluation.
+This dataset is used in our paper: "ARNOR: Attention Regularization based Noise Reduction for Distant Supervision Relation Classification". We release a new NYT test set for sentence-level evaluation of distant supervision relation extraction model. It increases almost 5 times positive instances than the previous one [2]. And it is carefully annotated to ensure accuracy.

-The original data is from the dataset in the paper: Cotype: Joint extraction of typed entities and relations with knowledge bases. It is a distant supervision dataset from NYT (New York Time). And the test set is annotated by humans. However the number of positive instances in test set is small. We revise and annotate more test data based on it.
+This dataset is based on Ren's [1] training set that is generated by distant supervision, and a manually annotated test set that contains 395 sentences from Hoffmann [2]. They are all from New York Times news articles [3]. However the number of positive instances in test set is small (only 396), and the quality is insufficient. We revise and annotate more test data based on it, and release two versions of datasets.

 In a data file, each line is a json string. The content is like

@@ -18,7 +18,7 @@ In a data file, each line is a json string. The content is like
                                },
                                ...
                            ],
-        "entityMentions":     [
+        "entityMentions":   [
                                {
                                    "text": "Entity words",
                                    "label": "Entity type",
@@ -32,10 +32,74 @@ In a data file, each line is a json string. The content is like
 Data version 1.0.0
 =====

-This version of dataset is the original one applied in our paper, which includes four files: train.json, test.json, dev_part.json, and test_part.json. Here dev_part.json and test_part.json are from test.json. This dataset can be downloaded here: https://baidu-nlp.bj.bcebos.com/arnor_dataset-1.0.0.tar.gz
+This version of dataset is the original one applied in our paper, which includes four files: train.json, test.json, dev_part.json, and test_part.json. Here dev_part.json and test_part.json are from test.json. **This dataset can be downloaded here: https://baidu-nlp.bj.bcebos.com/arnor_dataset-1.0.0.tar.gz**


 Data version 2.0.0
 =====

-More test date are coming soon ......
+We strongly recommend to apply this dataset in later relation classification studies. This version contains more annotated test data comparing with version 1.0.0. We continuely annotated more data that is shown in the below table. What is more, we have removed the relation "/location/administrative_division/country" from the training set and changed "/location/country/administrative_divisions" into "/location/location/contains". Because we do not label these two relation types in test set.
+
+| Test set | version 1.0.0 | version 2.0.0 |
+| :-----| :-----| :-----|
+| #Sentences | 1,024 | 3,192 |
+| #Instances | 4,543 | 9,051 |
+| #Positive instances | 671 | 2,224 |
+
+**The download address is: http://baidu-nlp.bj.bcebos.com/arnor_dataset-2.0.0.tar.gz**
+
+There are four files in it. Training set, dev set, and test set are all included. In addition, it also includes a "test_noise.json" file, which is for noise reduction evaluation.
+
+Model Performances on version 2.0.0
+-----
+
+We reproduce experiments following our ARNOR paper. The results are listed below.
+
+Main results:
+
+| Method | Dev Prec. | Dev Rec. | Dev F1 | Test Prec. | Test Rec. | Test F1 |
+| :-----| :-----| :-----| :-----| :-----| :-----| :-----|
+| CNN | 39.27 | 73.80 | 51.26 | 42.41 | 76.64 | 54.60 |
+| PCNN | 39.08 | 74.74 | 51.32 | 42.18 | 77.50 | 54.64 |
+| BiLSTM | 41.16 | 70.17 | 52.12 | 44.12 | 71.12 | 54.45 |
+| BiLSTM+ATT | 40.81 | 70.37 | 51.66 | 42.77 | 71.59 | 53.55 |
+| PCNN+SelATT | 82.41 | 34.10 | 48.24 | 81.00 | 35.50 | 49.37 |
+| CNN+RL1 | 42.50 | 71.62 | 53.34 | 43.70 | 72.34 | 54.49 |
+| CNN+RL2 | 42.69 | 72.56 | 53.75 | 44.54 | 73.40 | 55.44 |
+| ARNOR | 78.14 | 59.82 | 67.77 | 79.70 | 62.30 | 69.93 |
+
+Components results:
+
+| Method | Test Prec. | Test Rec. | Test F1 |
+| :-----| :-----| :-----| :-----|
+| BiLSTM+ATT | 42.77 | 71.59 | 53.55 |
+| +IDR | 84.98 | 50.14 | 63.07 |
+| +ART | 80.03 | 60.53 | 68.93 |
+| +BLP | 79.70 | 62.30 | 69.93 |
+
+Noise reduction results:
+
+| Noise Reduction | Prec. | Rec. | F1 |
+| :-----| :-----| :-----| :-----|
+| CNN+RL2 | 40.19 | 95.39 | 56.56 |
+| ARNOR | 73.40 | 73.04 | 73.22 |
+
+
+Reference this ARNOR paper
+=====
+
+    @inproceedings{jia2019arnor,
+        title={ARNOR: Attention Regularization based Noise Reduction for Distant Supervision Relation Classification},
+        author={Jia, Wei and Dai, Dai and Xiao, Xinyan and Wu, Hua},
+        booktitle={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
+        year={2019}
+    }
+
+References
+-----
+
+[1] Xiang Ren, Zeqiu Wu, Wenqi He, Meng Qu, Clare R Voss, Heng Ji, Tarek F Abdelzaher, and Jiawei Han. 2017. Cotype: Joint extraction of typed entities and relations with knowledge bases. In Proceedings of the 26th International Conference on World Wide Web, pages 1015–1024. International World Wide Web Conferences Steering Committee.
+
+[2] Raphael Hoffmann, Congle Zhang, Xiao Ling, Luke Zettlemoyer, and Daniel S Weld. 2011. Knowledge- based weak supervision for information extraction of overlapping relations. In Proceedings of the 49th Annual Meeting of the Association for Computa- tional Linguistics: Human Language Technologies- Volume 1, pages 541–550. Association for Compu- tational Linguistics.
+
+[3] Sebastian Riedel, Limin Yao, and Andrew McCallum. 2010. Modeling relations and their mentions with- out labeled text. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases, pages 148–163. Springer.
--- a/PaddleNLP/neural_machine_translation/transformer/train.py
+++ b/PaddleNLP/neural_machine_translation/transformer/train.py
@@ -158,14 +158,9 @@ def parse_args():

 def get_device_num():
    # NOTE(zcd): for multi-processe training, each process use one GPU card.
-    if num_trainers > 1: return 1
-    visible_device = os.environ.get('CUDA_VISIBLE_DEVICES', None)
-    if visible_device:
-        device_num = len(visible_device.split(','))
-    else:
-        device_num = subprocess.check_output(
-            ['nvidia-smi', '-L']).decode().count('\n')
-    return device_num
+    if num_trainers > 1:
+        return 1
+    return fluid.core.get_cuda_device_count()


 def append_nccl2_prepare(startup_prog, trainer_id, worker_endpoints,