Merge pull request #717 from vslyu/dev/fix_xpu_eval

[Kunlun] xpu use one cards for evaluation in multi cards training

Merge pull request #717 from vslyu/dev/fix_xpu_eval
[Kunlun] xpu use one cards for evaluation in multi cards training
53db1b09 · Wei Shengyu · GitHub · c0dfe472 · 5170153e · 53db1b09
显示空白变更内容
内联并排

Showing with 35 addition and 21 deletion

ppcls/data/reader.py ppcls/data/reader.py +24 -14

tools/program.py tools/program.py +11 -7

未找到文件。
--- a/ppcls/data/reader.py
+++ b/ppcls/data/reader.py
@@ -265,6 +265,7 @@ class Reader:
            self.collate_fn = self.mix_collate_fn
        self.places = places
+        self.use_xpu = config.get("use_xpu", False)
        self.multilabel = config.get("multilabel", False)
    def mix_collate_fn(self, batch):
@@ -287,13 +288,22 @@ class Reader:
            dataset = MultiLabelDataset(self.params)
        else:
            dataset = CommonDataset(self.params)
+        if (self.params['mode'] != "train") and self.use_xpu:
+            loader = DataLoader(
+                dataset,
+                places=self.places,
+                batch_size=batch_size,
+                drop_last=False,
+                return_list=True,
+                shuffle=False,
+                num_workers=self.params["num_workers"])
+        else:
            is_train = self.params['mode'] == "train"
            batch_sampler = DistributedBatchSampler(
                dataset,
                batch_size=batch_size,
                shuffle=self.shuffle and is_train,
-            drop_last=False)
+                drop_last=is_train)
            loader = DataLoader(
                dataset,
                batch_sampler=batch_sampler,

--- a/tools/program.py
+++ b/tools/program.py
@@ -119,7 +119,8 @@ def create_metric(out,
                  classes_num=1000,
                  use_distillation=False,
                  multilabel=False,
-                  mode="train"):
+                  mode="train",
+                  use_xpu=False):
    """
    Create measures of model accuracy, such as top1 and top5
@@ -175,6 +176,7 @@ def create_metric(out,
        fetch_list.append(ham_dist)
    # multi cards' eval
+    if not use_xpu:
        if mode != "train" and paddle.distributed.get_world_size() > 1:
            for idx, fetch in enumerate(fetch_list):
                fetch_list[idx] = paddle.distributed.all_reduce(
@@ -213,6 +215,7 @@ def create_fetchs(feeds, net, config, mode="train"):
    use_mix = config.get('use_mix') and mode == 'train'
    use_distillation = config.get('use_distillation')
    multilabel = config.get('multilabel', False)
+    use_xpu = config.get("use_xpu", False)
    out = net(feeds["image"])
@@ -229,7 +232,8 @@ def create_fetchs(feeds, net, config, mode="train"):
            classes_num,
            use_distillation,
            multilabel=multilabel,
-            mode=mode)
+            mode=mode,
+            use_xpu=use_xpu)
        fetchs.update(metric)
    return fetchs