solove_device_guard_bugs (#32915)

585564d2 · Baibaifan · GitHub · a3debea2 · 585564d2 · 585564d2
3 changed file
--- a/python/paddle/distributed/collective.py
+++ b/python/paddle/distributed/collective.py
@@ -927,7 +927,8 @@ def _linear(x, weight, bias=None, name=None):
    else:
        helper = LayerHelper('linear', **locals())
        dtype = x.dtype
-        assert x.ndim < 4, "X latitude is not supported greater than 3 now."
+        assert len(
+            x.shape) < 4, "X latitude is not supported greater than 3 now."
        check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                 'linear')

--- a/python/paddle/distributed/fleet/ascend_utils.py
+++ b/python/paddle/distributed/fleet/ascend_utils.py
@@ -74,10 +74,17 @@ def _get_ascend_rankfile(rank_table_file_path):
    device_count = 0
    server_list = json_data['server_list']
    for server in server_list:
-        node_ips.append(server['server_id'])
        device_list = server['device']
        device_count = len(device_list)
+        if os.getenv("FLAGS_MODELARTS", None):
+            nodes = os.getenv("DLS_TASK_NUMBER", None)
+            assert nodes is not None, "DLS_TASK_NUMBER didn't set!"
+            for node in range(int(nodes)):
+                node_ip = os.getenv(f"VC_CUSTOM{node}_HOSTS", None)
+                assert node_ip is not None, f"VC_CUSTOM{node}_HOSTS didn't set!"
+                node_ips.append(node_ip)
+            return node_ips, device_count
+        node_ips.append(server['server_id'])
    return node_ips, device_count

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -4258,7 +4258,7 @@ class PipelineOptimizer(object):
        device = op.attr(self._op_device_key) \
            if op.has_attr(self._op_device_key) else None
        if device:
-            assert device[0:3] == 'gpu' or dev_type == 'npu', "Now, only gpu and npu devices are " \
+            assert device[0:3] == 'gpu' or device[0:3] == 'npu', "Now, only gpu and npu devices are " \
                "supported in pipeline parallemism."
        return device