collective_allreduce_api.py 2.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main

17
import paddle
18
import paddle.distributed as dist
19
import paddle.fluid as fluid
20 21
import paddle.fluid.data_feeder as data_feeder
import paddle.framework as framework
22

P
pangyoki 已提交
23 24
paddle.enable_static()

25

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
def all_reduce_new(tensor, reduce_type=str(dist.ReduceOp.SUM), group=None):
    op_type = 'all_reduce'
    data_feeder.check_variable_and_dtype(
        tensor,
        'tensor',
        [
            'float16',
            'float32',
            'float64',
            'int32',
            'int64',
            'int8',
            'uint8',
            'bool',
        ],
        op_type,
    )

    ring_id = 0 if group is None else group.id

    if not isinstance(ring_id, int):
        raise ValueError("The type of 'ring_id' for all_reduce should be int.")

    # TODO: Support task and use task.wait in static graph mode
    #       Use use_calc_stream rather than sync_op
    helper = framework.LayerHelper(op_type, **locals())
    if not reduce_type.isdigit():
        raise ValueError(
            "The type of 'reduce_type' for all_reduce should be int."
        )
    helper.append_op(
        type=op_type,
        inputs={'x': [tensor]},
        outputs={'out': [tensor]},
        attrs={'ring_id': ring_id, 'reduce_type': int(reduce_type)},
    )

    return None


66 67 68 69 70 71
class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase):
    def __init__(self):
        self.global_ring_id = 0

    def get_model(self, main_prog, startup_program, rank):
        with fluid.program_guard(main_prog, startup_program):
G
GGBond8488 已提交
72
            tindata = paddle.static.data(
73
                name="tindata", shape=[10, 1000], dtype='float32'
74
            )
75 76 77
            paddle.distributed.all_reduce(tindata)
            return [tindata]

78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
    def get_model_new(
        self,
        main_prog,
        startup_program,
        rank,
        dtype='float32',
        reduce_type=str(dist.ReduceOp.SUM),
    ):
        with fluid.program_guard(main_prog, startup_program):
            tindata = paddle.static.data(
                name="tindata", shape=[10, 1000], dtype=dtype
            )
            all_reduce_new(tindata, reduce_type)
            return [tindata]

93 94 95

if __name__ == "__main__":
    runtime_main(TestCollectiveAllreduceAPI, "allreduce")