device.py 6.5 KB
Newer Older
1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2
#
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9 10 11 12 13 14 15
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
16 17
import paddle.fluid as fluid
from paddle.device import get_available_custom_device
18 19 20 21 22 23 24 25


class DeviceType:
    CPU = 'cpu'
    GPU = 'gpu'
    XPU = 'xpu'
    NPU = 'npu'
    MLU = 'mlu'
26
    IPU = 'ipu'
27
    CUSTOM_DEVICE = 'custom_device'
28 29 30


class Device(object):
31

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    def __init__(self, dtype=None, memory="", labels=""):
        self._dtype = dtype
        self._memory = memory
        self._labels = labels

    def __str__(self):
        return ",".join(self._labels)

    @property
    def dtype(self):
        return self._dtype

    @property
    def count(self):
        return len(self._labels) or 1

    @property
    def memory(self):
        return self._memory

    @property
    def labels(self):
        return self._labels

    @labels.setter
    def labels(self, lbs):
        if isinstance(lbs, str):
            self._labels = lbs.split(',')
        elif isinstance(lbs, list):
            self._labels = lbs
        else:
            self._labels = []

K
kuizhiqing 已提交
65
    def get_selected_device_key(self):
66 67 68 69 70 71 72 73 74 75
        if self._dtype == DeviceType.CPU:
            return 'FLAGS_selected_cpus'
        if self._dtype == DeviceType.GPU:
            return 'FLAGS_selected_gpus'
        if self._dtype == DeviceType.NPU:
            return 'FLAGS_selected_npus'
        if self._dtype == DeviceType.XPU:
            return 'FLAGS_selected_xpus'
        if self._dtype == DeviceType.MLU:
            return 'FLAGS_selected_mlus'
76 77
        if self._dtype == DeviceType.IPU:
            return 'FLAGS_selected_ipus'
78 79
        if self._dtype == DeviceType.CUSTOM_DEVICE:
            return 'FLAGS_selected_{}s'.format(os.getenv('PADDLE_XCCL_BACKEND'))
80 81
        return 'FLAGS_selected_devices'

82
    def get_selected_devices(self, devices=''):
K
kuizhiqing 已提交
83 84 85 86 87
        '''
        return the device label/id relative to the visible devices
        '''
        if not devices:
            return [str(x) for x in range(0, len(self._labels))]
88
        else:
K
kuizhiqing 已提交
89 90
            devs = [x.strip() for x in devices.split(',')]
            return [str(self._labels.index(d)) for d in devs]
91

92 93 94 95 96 97
    def get_custom_device_envs(self):
        return {
            'PADDLE_DISTRI_BACKEND': 'xccl',
            'PADDLE_XCCL_BACKEND': os.getenv('PADDLE_XCCL_BACKEND'),
        }

98 99 100 101
    @classmethod
    def parse_device(self):
        dev = Device()
        visible_devices = None
102 103 104 105 106 107 108
        if 'PADDLE_XCCL_BACKEND' in os.environ:
            dev._dtype = DeviceType.CUSTOM_DEVICE
            visible_devices_str = '{}_VISIBLE_DEVICES'.format(
                os.getenv('PADDLE_XCCL_BACKEND').upper())
            if visible_devices_str in os.environ:
                visible_devices = os.getenv(visible_devices_str)
        elif 'CUDA_VISIBLE_DEVICES' in os.environ or 'NVIDIA_VISIBLE_DEVICES' in os.environ:
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
            dev._dtype = DeviceType.GPU
            visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") or os.getenv(
                "NVIDIA_VISIBLE_DEVICES")
        elif 'XPU_VISIBLE_DEVICES' in os.environ:
            dev._dtype = DeviceType.XPU
            visible_devices = os.getenv("XPU_VISIBLE_DEVICES")
        elif 'ASCEND_VISIBLE_DEVICES' in os.environ:
            dev._dtype = DeviceType.NPU
            visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
        elif 'MLU_VISIBLE_DEVICES' in os.environ:
            dev._dtype = DeviceType.MLU
            visible_devices = os.getenv("MLU_VISIBLE_DEVICES")

        if visible_devices is not None and visible_devices != 'all':
            dev._labels = visible_devices.split(',')
        else:
            return self.detect_device()

        return dev

    @classmethod
    def detect_device(self):
131 132 133 134 135 136 137 138

        def get_custom_devices_count(device_type):
            all_custom_devices = get_available_custom_device()
            all_custom_devices = [
                device.split(':')[0] for device in all_custom_devices
            ]
            custom_devices_count = all_custom_devices.count(device_type)
            return custom_devices_count
139 140 141 142

        dev = Device()
        num = 0
        visible_devices = None
143 144 145 146 147 148 149 150 151
        if 'PADDLE_XCCL_BACKEND' in os.environ:
            custom_device_type = os.getenv('PADDLE_XCCL_BACKEND')
            dev._dtype = DeviceType.CUSTOM_DEVICE
            num = get_custom_devices_count(custom_device_type)
            visible_devices_str = '{}_VISIBLE_DEVICES'.format(
                custom_device_type.upper())
            if visible_devices_str in os.environ:
                visible_devices = os.getenv(visible_devices_str)
        elif fluid.core.is_compiled_with_cuda():
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
            dev._dtype = DeviceType.GPU
            num = fluid.core.get_cuda_device_count()
            visible_devices = os.getenv("CUDA_VISIBLE_DEVICES") or os.getenv(
                "NVIDIA_VISIBLE_DEVICES")
        elif fluid.core.is_compiled_with_xpu():
            dev._dtype = DeviceType.XPU
            num = fluid.core.get_xpu_device_count()
            visible_devices = os.getenv("XPU_VISIBLE_DEVICES")
        elif fluid.core.is_compiled_with_npu():
            dev._dtype = DeviceType.NPU
            num = fluid.core.get_npu_device_count()
            visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
        elif fluid.core.is_compiled_with_mlu():
            dev._dtype = DeviceType.MLU
            num = fluid.core.get_mlu_device_count()
            visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
168 169 170 171 172 173
        elif fluid.core.is_compiled_with_ipu():
            dev._dtype = DeviceType.IPU
            num = fluid.core.get_ipu_device_count()
            # For IPUs, 'labels' is a list which contains the available numbers of IPU devices.
            dev._labels = [str(x) for x in range(0, num + 1)]
            return dev
174 175 176 177 178 179 180 181 182 183 184 185 186

        if num == 0:
            dev._dtype = DeviceType.CPU
        elif visible_devices is None or visible_devices == "all":
            dev._labels = [str(x) for x in range(0, num)]
        else:
            dev._labels = visible_devices.split(',')

        return dev


if __name__ == '__main__':
    d = Device.parse_device()
K
kuizhiqing 已提交
187
    print(d.get_selected_devices())