device.py 6.4 KB
Newer Older
1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2
#
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9 10 11 12 13 14 15
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
16

17
from paddle.device import get_available_custom_device
18

19 20 21
# (TODO: GhostScreaming) It will be removed later.
from paddle.fluid import core

22 23 24 25 26 27 28

class DeviceType:
    CPU = 'cpu'
    GPU = 'gpu'
    XPU = 'xpu'
    NPU = 'npu'
    MLU = 'mlu'
29
    IPU = 'ipu'
30
    CUSTOM_DEVICE = 'custom_device'
31 32


33
class Device:
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
    def __init__(self, dtype=None, memory="", labels=""):
        self._dtype = dtype
        self._memory = memory
        self._labels = labels

    def __str__(self):
        return ",".join(self._labels)

    @property
    def dtype(self):
        return self._dtype

    @property
    def count(self):
        return len(self._labels) or 1

    @property
    def memory(self):
        return self._memory

    @property
    def labels(self):
        return self._labels

    @labels.setter
    def labels(self, lbs):
        if isinstance(lbs, str):
            self._labels = lbs.split(',')
        elif isinstance(lbs, list):
            self._labels = lbs
        else:
            self._labels = []

K
kuizhiqing 已提交
67
    def get_selected_device_key(self):
68 69 70 71 72 73 74 75 76 77
        if self._dtype == DeviceType.CPU:
            return 'FLAGS_selected_cpus'
        if self._dtype == DeviceType.GPU:
            return 'FLAGS_selected_gpus'
        if self._dtype == DeviceType.NPU:
            return 'FLAGS_selected_npus'
        if self._dtype == DeviceType.XPU:
            return 'FLAGS_selected_xpus'
        if self._dtype == DeviceType.MLU:
            return 'FLAGS_selected_mlus'
78 79
        if self._dtype == DeviceType.IPU:
            return 'FLAGS_selected_ipus'
80 81
        if self._dtype == DeviceType.CUSTOM_DEVICE:
            return 'FLAGS_selected_{}s'.format(os.getenv('PADDLE_XCCL_BACKEND'))
82 83
        return 'FLAGS_selected_devices'

84
    def get_selected_devices(self, devices=''):
K
kuizhiqing 已提交
85 86 87 88 89
        '''
        return the device label/id relative to the visible devices
        '''
        if not devices:
            return [str(x) for x in range(0, len(self._labels))]
90
        else:
K
kuizhiqing 已提交
91 92
            devs = [x.strip() for x in devices.split(',')]
            return [str(self._labels.index(d)) for d in devs]
93

94 95 96 97 98 99
    def get_custom_device_envs(self):
        return {
            'PADDLE_DISTRI_BACKEND': 'xccl',
            'PADDLE_XCCL_BACKEND': os.getenv('PADDLE_XCCL_BACKEND'),
        }

100 101 102 103
    @classmethod
    def parse_device(self):
        dev = Device()
        visible_devices = None
104 105 106
        if 'PADDLE_XCCL_BACKEND' in os.environ:
            dev._dtype = DeviceType.CUSTOM_DEVICE
            visible_devices_str = '{}_VISIBLE_DEVICES'.format(
107 108
                os.getenv('PADDLE_XCCL_BACKEND').upper()
            )
109 110
            if visible_devices_str in os.environ:
                visible_devices = os.getenv(visible_devices_str)
111
        elif 'CUDA_VISIBLE_DEVICES' in os.environ:
112
            dev._dtype = DeviceType.GPU
113
            visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
        elif 'XPU_VISIBLE_DEVICES' in os.environ:
            dev._dtype = DeviceType.XPU
            visible_devices = os.getenv("XPU_VISIBLE_DEVICES")
        elif 'ASCEND_VISIBLE_DEVICES' in os.environ:
            dev._dtype = DeviceType.NPU
            visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
        elif 'MLU_VISIBLE_DEVICES' in os.environ:
            dev._dtype = DeviceType.MLU
            visible_devices = os.getenv("MLU_VISIBLE_DEVICES")

        if visible_devices is not None and visible_devices != 'all':
            dev._labels = visible_devices.split(',')
        else:
            return self.detect_device()

        return dev

    @classmethod
    def detect_device(self):
133 134 135 136 137 138 139
        def get_custom_devices_count(device_type):
            all_custom_devices = get_available_custom_device()
            all_custom_devices = [
                device.split(':')[0] for device in all_custom_devices
            ]
            custom_devices_count = all_custom_devices.count(device_type)
            return custom_devices_count
140 141 142 143

        dev = Device()
        num = 0
        visible_devices = None
144 145 146 147 148
        if 'PADDLE_XCCL_BACKEND' in os.environ:
            custom_device_type = os.getenv('PADDLE_XCCL_BACKEND')
            dev._dtype = DeviceType.CUSTOM_DEVICE
            num = get_custom_devices_count(custom_device_type)
            visible_devices_str = '{}_VISIBLE_DEVICES'.format(
149 150
                custom_device_type.upper()
            )
151 152
            if visible_devices_str in os.environ:
                visible_devices = os.getenv(visible_devices_str)
153
        elif core.is_compiled_with_cuda():
154
            dev._dtype = DeviceType.GPU
155
            num = core.get_cuda_device_count()
156
            visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
157
        elif core.is_compiled_with_xpu():
158
            dev._dtype = DeviceType.XPU
159
            num = core.get_xpu_device_count()
160
            visible_devices = os.getenv("XPU_VISIBLE_DEVICES")
161
        elif core.is_compiled_with_npu():
162
            dev._dtype = DeviceType.NPU
163
            num = core.get_npu_device_count()
164
            visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
165
        elif core.is_compiled_with_mlu():
166
            dev._dtype = DeviceType.MLU
167
            num = core.get_mlu_device_count()
168
            visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
169
        elif core.is_compiled_with_ipu():
170
            dev._dtype = DeviceType.IPU
171
            num = core.get_ipu_device_count()
172 173 174
            # For IPUs, 'labels' is a list which contains the available numbers of IPU devices.
            dev._labels = [str(x) for x in range(0, num + 1)]
            return dev
175 176 177 178 179 180 181 182 183 184 185 186 187

        if num == 0:
            dev._dtype = DeviceType.CPU
        elif visible_devices is None or visible_devices == "all":
            dev._labels = [str(x) for x in range(0, num)]
        else:
            dev._labels = visible_devices.split(',')

        return dev


if __name__ == '__main__':
    d = Device.parse_device()
K
kuizhiqing 已提交
188
    print(d.get_selected_devices())