dist_eltwise.py 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

from .common import DistributedOperatorImplContainer
from .common import DistributedOperatorImpl
from .common import register_distributed_operator_impl_container
from .common import register_distributed_operator_impl
from .common import is_elementwise_op
from ..utils import is_dim_shard
from ..utils import is_dim_replicate
from ..utils import is_valid_list_index
from ..utils import compute_compatible_dim_mapping
from ..utils import compute_compatible_dims_mapping
from ..utils import compute_compatible_and_update_dim_mapping
from ..dist_attribute import OperatorDistributedAttribute
from paddle.fluid import core, unique_name
J
Jiabin Yang 已提交
28
from paddle.fluid.framework import _non_static_mode
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
from paddle.fluid.framework import Program, Parameter, Variable, program_guard
from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
from paddle.distributed.fleet.meta_optimizers.common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY
from ..process_group import new_process_group
from ..utils import _get_comm_group, _get_corresponding_rank
from .dist_default import DistributedDefaultImpl0


class DistributedElementwise(DistributedOperatorImplContainer):
    def __init__(self, op_type):
        super(DistributedElementwise, self).__init__(op_type)


register_distributed_operator_impl_container(
    DistributedElementwise("elementwise"))


# Replicated Elementwise
class DistributedElementwiseImpl0(DistributedOperatorImpl):
    def __init__(self, name):
        super(DistributedElementwiseImpl0, self).__init__(name)
        self._forward_implemented = False
        self._backward_implemented = False

    def is_input_compatible(self, dist_op):
        op_desc = dist_op.serial_op.desc
55
        if not is_elementwise_op(op_desc.type()):
56
            return False
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
        op_dist_attr = dist_op.dist_attr
        dims_mapping_list = []
        input_arg_names = op_desc.input_arg_names()
        max_dims_mapping_len = -1
        for arg_name in input_arg_names:
            dims_mapping = op_dist_attr.get_input_dims_mapping(arg_name)
            if max_dims_mapping_len < len(dims_mapping):
                max_dims_mapping_len = len(dims_mapping)
            dims_mapping_list.append(dims_mapping)

        for idx in range(max_dims_mapping_len):
            dim_mappings = []
            for dims_mapping in dims_mapping_list:
                if idx < len(dims_mapping):
                    dim_mappings.append(dims_mapping[-(idx + 1)])
            if compute_compatible_dim_mapping(dim_mappings) is None:
                return False
        return True
75 76 77

    def is_output_compatible(self, dist_op):
        op_desc = dist_op.serial_op.desc
78 79 80 81 82
        if not is_elementwise_op(op_desc.type()):
            return False
        op_dist_attr = dist_op.dist_attr
        dims_mapping_list = []
        output_arg_names = op_desc.output_arg_names()
83
        max_dims_mapping_len = -1
84 85
        for arg_name in output_arg_names:
            dims_mapping = op_dist_attr.get_output_dims_mapping(arg_name)
86 87
            if max_dims_mapping_len < len(dims_mapping):
                max_dims_mapping_len = len(dims_mapping)
88 89
            dims_mapping_list.append(dims_mapping)

90 91 92 93 94 95 96
        for idx in range(max_dims_mapping_len):
            dim_mappings = []
            for dims_mapping in dims_mapping_list:
                if idx < len(dims_mapping):
                    dim_mappings.append(dims_mapping[-(idx + 1)])
            if compute_compatible_dim_mapping(dim_mappings) is None:
                return False
97
        return True
98 99 100

    def is_auto_compatible(self, dist_op):
        op_desc = dist_op.serial_op.desc
101 102
        if not is_elementwise_op(op_desc.type()):
            return False
103 104
        op_dist_attr = dist_op.dist_attr
        dims_mapping_list = []
105

106
        input_arg_names = op_desc.input_arg_names()
107
        input_max_dims_mapping_len = -1
108 109
        for arg_name in input_arg_names:
            dims_mapping = op_dist_attr.get_input_dims_mapping(arg_name)
110 111
            if input_max_dims_mapping_len < len(dims_mapping):
                input_max_dims_mapping_len = len(dims_mapping)
112
            dims_mapping_list.append(dims_mapping)
113

114
        output_arg_names = op_desc.output_arg_names()
115
        output_max_dims_mapping_len = -1
116 117
        for arg_name in output_arg_names:
            dims_mapping = op_dist_attr.get_output_dims_mapping(arg_name)
118 119
            if output_max_dims_mapping_len < len(dims_mapping):
                output_max_dims_mapping_len = len(dims_mapping)
120 121
            dims_mapping_list.append(dims_mapping)

122 123 124
        assert input_max_dims_mapping_len == output_max_dims_mapping_len
        max_dims_mapping_len = input_max_dims_mapping_len

125 126 127 128 129 130 131 132 133 134 135 136 137 138
        for idx in range(max_dims_mapping_len):
            dim_mappings = []
            for dims_mapping in dims_mapping_list:
                if idx < len(dims_mapping):
                    dim_mappings.append(dims_mapping[-(idx + 1)])
            if not all(dim_mappings[0] == dim_mapping
                       for dim_mapping in dim_mappings):
                return False
        return True

    def update_dims_mapping(self, dist_op):
        changed = False
        op_desc = dist_op.serial_op.desc
        op_dist_attr = dist_op.dist_attr
139 140
        dims_mapping_list = []

141 142 143
        input_arg_names = op_desc.input_arg_names()
        input_dims_mapping_dict = {}
        input_dims_mapping_lens = {}
144
        input_max_dims_mapping_len = -1
145 146
        for arg_name in input_arg_names:
            dims_mapping = op_dist_attr.get_input_dims_mapping(arg_name)
147 148
            if input_max_dims_mapping_len < len(dims_mapping):
                input_max_dims_mapping_len = len(dims_mapping)
149 150 151
            input_dims_mapping_dict[arg_name] = dims_mapping
            input_dims_mapping_lens[arg_name] = len(dims_mapping)
        for arg_name in input_arg_names:
152 153 154 155
            if input_dims_mapping_lens[arg_name] < input_max_dims_mapping_len:
                new_dims_mapping = [
                    -1 for _ in range(input_max_dims_mapping_len)
                ]
156
                for i in range(input_dims_mapping_lens[arg_name]):
157
                    new_idx = (input_max_dims_mapping_len -
158 159 160 161 162 163
                               input_dims_mapping_lens[arg_name]) + i
                    new_dims_mapping[new_idx] = input_dims_mapping_dict[
                        arg_name][i]
                dims_mapping_list.append(new_dims_mapping)
            else:
                dims_mapping_list.append(input_dims_mapping_dict[arg_name])
164

165
        output_arg_names = op_desc.output_arg_names()
166 167 168
        output_dims_mapping_dict = {}
        output_dims_mapping_lens = {}
        output_max_dims_mapping_len = -1
169 170
        for arg_name in output_arg_names:
            dims_mapping = op_dist_attr.get_output_dims_mapping(arg_name)
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
            if output_max_dims_mapping_len < len(dims_mapping):
                output_max_dims_mapping_len = len(dims_mapping)
            output_dims_mapping_dict[arg_name] = dims_mapping
            output_dims_mapping_lens[arg_name] = len(dims_mapping)
        for arg_name in output_arg_names:
            if output_dims_mapping_lens[arg_name] < output_max_dims_mapping_len:
                new_dims_mapping = [
                    -1 for _ in range(output_max_dims_mapping_len)
                ]
                for i in range(output_dims_mapping_lens[arg_name]):
                    new_idx = (output_max_dims_mapping_len -
                               output_dims_mapping_lens[arg_name]) + i
                    new_dims_mapping[new_idx] = output_dims_mapping_dict[
                        arg_name][i]
                dims_mapping_list.append(new_dims_mapping)
            else:
                dims_mapping_list.append(output_dims_mapping_dict[arg_name])
188

189 190
        assert input_max_dims_mapping_len == output_max_dims_mapping_len
        max_dims_mapping_len = input_max_dims_mapping_len
191 192
        compatible_dims_mapping = compute_compatible_dims_mapping(
            dims_mapping_list)
193 194
        if compatible_dims_mapping is None:
            return False
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215

        for arg_name in input_arg_names:
            if input_dims_mapping_lens[arg_name] < max_dims_mapping_len:
                new_dims_mapping = [
                    -1 for _ in range(input_dims_mapping_lens[arg_name])
                ]
                for i in range(input_dims_mapping_lens[arg_name]):
                    new_idx = (max_dims_mapping_len -
                               input_dims_mapping_lens[arg_name]) + i
                    new_dims_mapping[i] = compatible_dims_mapping[new_idx]
                if new_dims_mapping != input_dims_mapping_dict[arg_name]:
                    op_dist_attr.set_input_dims_mapping(arg_name,
                                                        new_dims_mapping)
                    changed = True
            else:
                if compatible_dims_mapping != input_dims_mapping_dict[arg_name]:
                    op_dist_attr.set_input_dims_mapping(arg_name,
                                                        compatible_dims_mapping)
                    changed = True

        for arg_name in output_arg_names:
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
            if output_dims_mapping_lens[arg_name] < max_dims_mapping_len:
                new_dims_mapping = [
                    -1 for _ in range(output_dims_mapping_lens[arg_name])
                ]
                for i in range(output_dims_mapping_lens[arg_name]):
                    new_idx = (max_dims_mapping_len -
                               output_dims_mapping_lens[arg_name]) + i
                    new_dims_mapping[i] = compatible_dims_mapping[new_idx]
                if new_dims_mapping != output_dims_mapping_dict[arg_name]:
                    op_dist_attr.set_output_dims_mapping(arg_name,
                                                         new_dims_mapping)
                    changed = True
            else:
                if compatible_dims_mapping != output_dims_mapping_dict[
                        arg_name]:
                    op_dist_attr.set_output_dims_mapping(
                        arg_name, compatible_dims_mapping)
                    changed = True
234 235 236 237 238 239 240 241 242 243 244 245 246 247

        return changed

    @staticmethod
    def forward(ctx, *args, **kwargs):
        DistributedDefaultImpl0.forward(ctx, *args, **kwargs)

    @staticmethod
    def backward(ctx, *args, **kwargs):
        DistributedDefaultImpl0.backward(ctx, *args, **kwargs)


register_distributed_operator_impl(
    "elementwise", DistributedElementwiseImpl0("replicate_parallel"))