parallel_helper.py 1.7 KB
Newer Older
C
chengduo 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except jin compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from ..layers import collective
C
chengduo 已提交
16
from ..framework import Parameter
C
chengduo 已提交
17 18 19 20 21 22 23 24 25
__parallel_ctx__clz__ = None


def _is_data_parallel_mode():
    global __parallel_ctx__clz__
    return __parallel_ctx__clz__ is not None and int(
        os.getenv("PADDLE_TRAINERS_NUM", "1")) > 1


26 27 28 29 30
def _is_parallel_ctx_initialized():
    global __parallel_ctx__clz__
    return __parallel_ctx__clz__ is not None


C
chengduo 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
def _set_parallel_ctx(nccl_parallel_context):
    global __parallel_ctx__clz__
    assert __parallel_ctx__clz__ is None, \
        "ParallelContext can only be initialized once."
    __parallel_ctx__clz__ = nccl_parallel_context


def _init_parallel_ctx():
    global __parallel_ctx__clz__
    assert __parallel_ctx__clz__ is not None, \
        "ParallelContext should be initialized."
    __parallel_ctx__clz__.init()


def _broadcast_parameters(parameters):
    for param in parameters:
47 48 49 50
        # In model parallel, some parameters are split into multiple devices,
        # so we could not broadcast these parameters.
        if param.is_distributed: continue

C
chengduo 已提交
51
        if isinstance(param, Parameter) and param.trainable:
C
chengduo 已提交
52
            collective._broadcast(param, 0, sync_mode=True)