backward.py 12.1 KB
Newer Older
Q
Qiao Longfei 已提交
1
from paddle.v2.fluid import framework as framework
F
update  
fengjiayi 已提交
2
from . import core
F
update  
fengjiayi 已提交
3
import collections
4

F
fengjiayi 已提交
5
__all__ = ['append_backward']
6 7


8 9 10 11 12
def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None):
    """
    Traverse all ops in op_descs[begin_idx : end_idx], 
    if any op has inputs/outputs named "old_name", rename it as 'new_name'
    """
F
update  
fengjiayi 已提交
13 14 15
    if begin_idx is None:
        begin_idx = 0
    if end_idx is None:
16
        end_idx = len(op_descs)
F
update  
fengjiayi 已提交
17
    for i in range(begin_idx, end_idx):
18
        op_desc = op_descs[i]
F
fengjiayi 已提交
19 20 21 22
        if isinstance(op_desc, tuple):
            op_desc = op_desc[0]
        op_desc.rename_input(old_name, new_name)
        op_desc.rename_output(old_name, new_name)
F
update  
fengjiayi 已提交
23 24


F
fengjiayi 已提交
25
def _create_op_desc_(op_type, inputs, outputs, attrs):
26 27 28
    """
    Create a C++ OpDesc object with specified inputs, outputs and attributes.
    """
F
fengjiayi 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42
    op_desc = core.OpDesc()
    op_desc.set_type(op_type)
    for para, args in inputs.iteritems():
        op_desc.set_input(para, args)
    for para, args in outputs.iteritems():
        op_desc.set_output(para, args)
    for name, val in attrs.iteritems():
        if isinstance(val, framework.Block):
            op_desc.set_block_attr(name, val.desc)
        else:
            op_desc.set_attr(name, val)
    return op_desc


43 44 45 46 47 48
def _infer_var_data_type_(grad_var_name, block):
    """
    Infer the data type of given grad variable
    """
    grad_var = block.desc.find_var(grad_var_name.encode("ascii"))
    fwd_name = _strip_grad_suffix_(grad_var_name.encode("ascii"))
F
fengjiayi 已提交
49 50 51 52 53 54 55
    if block.desc.has_var_recursive(fwd_name):
        fwd_var = block.desc.find_var_recursive(fwd_name.encode("ascii"))
        grad_var.set_dtype(fwd_var.dtype())
    else:
        grad_var.set_dtype(core.DataType.FP32)


F
fengjiayi 已提交
56
def _all_in_set_(cands, s):
57 58 59
    """
    Test if all elements of 'cands' are in set 's'
    """
F
fengjiayi 已提交
60 61 62 63 64 65 66
    for c in cands:
        if not c in s:
            return False
    return True


def _strip_grad_suffix_(name):
67 68 69 70 71
    """
    Strip the grad suffix from the given varibale name
    e.g. x@GRAD ==> x
         y@GRAD@RENAME@1 ==> y
    """
F
fengjiayi 已提交
72 73
    pos = name.find(core.grad_var_suffix())
    return name[:pos] if pos != -1 else name
F
fengjiayi 已提交
74 75 76


def _append_grad_suffix_(name):
77 78 79 80
    """
    Append grad suffix to the given variable name
    e.g. x ==> x@GRAD
    """
F
fengjiayi 已提交
81 82 83
    return name + core.grad_var_suffix()


F
fengjiayi 已提交
84
def _addup_repetitive_outputs_(op_descs):
85 86 87 88 89
    """
    In backward part, an variable may be the output of more than one ops.
    In this case, the variable should be the accumulation of all the outputs.
    `sum_op`s are added to implement the accumulate.
    """
F
update  
fengjiayi 已提交
90 91
    pending_sum_ops = []
    var_rename_count = collections.defaultdict(int)
F
fengjiayi 已提交
92 93
    renamed_vars = collections.defaultdict(list)
    for idx, op_desc in enumerate(op_descs):
F
update  
fengjiayi 已提交
94
        for var_name in op_desc.input_arg_names():
F
fengjiayi 已提交
95 96 97 98 99
            if len(renamed_vars[var_name]) > 1:
                pending_sum_ops.append(
                    (_create_op_desc_("sum", {"X": renamed_vars[var_name]},
                                      {"Out": [var_name]}, {}), idx))
                renamed_vars[var_name] = [var_name]
F
update  
fengjiayi 已提交
100
        for var_name in op_desc.output_arg_names():
F
fengjiayi 已提交
101 102 103
            if var_name == core.empty_var_name(
            ) or var_name in op_desc.input_arg_names():
                # empty variable or inplace op
F
fengjiayi 已提交
104
                continue
F
fengjiayi 已提交
105
            if len(renamed_vars[var_name]) == 0:
F
update  
fengjiayi 已提交
106
                # it's the first time we get the variable
F
fengjiayi 已提交
107
                renamed_vars[var_name] = [var_name]
F
update  
fengjiayi 已提交
108
            else:
F
fengjiayi 已提交
109
                if len(renamed_vars[var_name]) == 1:
F
update  
fengjiayi 已提交
110 111
                    new_name = var_name + "@RENAME@" + \
                        str(var_rename_count[var_name])
F
fengjiayi 已提交
112
                    var_rename_count[var_name] += 1
F
update  
fengjiayi 已提交
113
                    # rename original var_name
F
fengjiayi 已提交
114 115
                    renamed_vars[var_name][0] = new_name
                    _rename_arg_(op_descs, var_name, new_name, 0, idx)
F
fengjiayi 已提交
116
                    _rename_arg_(pending_sum_ops, var_name, new_name)
F
update  
fengjiayi 已提交
117 118 119

                new_name = var_name + "@RENAME@" + \
                    str(var_rename_count[var_name])
F
fengjiayi 已提交
120
                var_rename_count[var_name] += 1
F
update  
fengjiayi 已提交
121
                op_desc.rename_output(var_name, new_name)
F
fengjiayi 已提交
122 123
                renamed_vars[var_name].append(new_name)
    for var_name, inputs in renamed_vars.iteritems():
F
update  
fengjiayi 已提交
124
        if len(inputs) > 1:
F
fengjiayi 已提交
125
            pending_sum_ops.append((_create_op_desc_(
F
fengjiayi 已提交
126
                "sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs)))
F
fengjiayi 已提交
127
    # sum_op descs are sorted according to their insert position
F
update  
fengjiayi 已提交
128
    for p in reversed(pending_sum_ops):
F
fengjiayi 已提交
129 130 131 132 133 134
        op_descs.insert(p[1], p[0])

    return op_descs


def _remove_no_grad_branch_(op_descs, no_grad_set):
135 136 137 138 139 140
    """
    Remove unnecessary grad ops
    A grad op can be removed in two cases:
        1. all outputs of the grad op are in 'no_grad_set'
        2. (TODO) all grad inputs of the grad op are in 'no_grad_set'
    """
F
fengjiayi 已提交
141 142 143 144
    # Remove ops whose outputs are all in no_grad_dict
    op_descs = filter(
        lambda op_desc: not _all_in_set_(op_desc.output_arg_names(), no_grad_set),
        op_descs)
F
fengjiayi 已提交
145 146
    # Insert fill_zeros_like_op
    to_insert = []
F
fengjiayi 已提交
147
    for idx, op_desc in enumerate(op_descs):
F
fengjiayi 已提交
148
        for arg in op_desc.input_arg_names():
F
fengjiayi 已提交
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
            if core.grad_var_suffix() in arg and arg in no_grad_set:
                to_insert.append((_create_op_desc_("fill_zeros_like", {
                    "X": [_strip_grad_suffix_(arg)]
                }, {"Y": [arg]}, {}), idx))

    map(lambda p: op_descs.insert(p[1], p[0]), reversed(to_insert))

    return op_descs


def _append_backward_ops_(target,
                          block,
                          target_block,
                          no_grad_dict,
                          grad_to_var,
                          callback=None):
165 166 167 168 169 170 171 172 173 174 175 176 177 178
    """
    Create all grad ops, and insert them into given block

    Args:
        target(Variable): the target variable of forward pass
        block(Block): the block where forward ops are
        target_block(Block): the block which is going to hold new generated grad ops
        no_grad_dict(dict): 
            key(int)  block index
            val(set) a set of varibale names. These varibales have no gradient
        grad_to_var(dict)(output argument):
            key(str): grad variable name
            val(str): corresponding forward variable name
    """
F
fengjiayi 已提交
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
    grad_op_descs = []
    program = block.program
    for op in reversed(block.ops):
        grad_sub_block_list = []
        # If the op has its own sub-block, deal with the sub-block first
        if op.has_attr("sub_block"):
            sub_block = program.block(op.block_attr("sub_block"))
            grad_sub_block = program.create_block(parent_idx=sub_block.idx)
            _append_backward_ops_(target, sub_block, grad_sub_block,
                                  no_grad_dict, grad_to_var, callback)
            grad_sub_block_list.append(grad_sub_block.desc)

        grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
            op.desc, no_grad_dict[block.idx], grad_sub_block_list)
        grad_op_descs.extend(grad_op_desc)
        grad_to_var.update(op_grad_to_var)

    grad_op_descs = _addup_repetitive_outputs_(grad_op_descs)

    grad_op_descs = _remove_no_grad_branch_(grad_op_descs,
                                            no_grad_dict[block.idx])
F
fengjiayi 已提交
200

F
update  
fengjiayi 已提交
201 202 203
    if target_block.idx == 0:
        grad_op_descs.insert(
            0,
F
fengjiayi 已提交
204 205 206 207 208 209
            _create_op_desc_("fill_constant", {}, {
                "Out": [_append_grad_suffix_(target.name)]
            }, {"shape": [1],
                "value": 1.0,
                "dtype": target.dtype}))
    # append op_desc in grad_op_descs to target_block
F
update  
fengjiayi 已提交
210
    for op_desc in grad_op_descs:
F
fengjiayi 已提交
211 212
        new_op_desc = target_block.desc.append_op()
        new_op_desc.copy_from(op_desc)
F
update  
fengjiayi 已提交
213

F
fengjiayi 已提交
214 215

def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
216 217 218 219 220 221 222 223 224 225 226 227 228 229
    """
    Create new variables required by backward pass.

    Args:
        block(Block): the block where new variables will be created
        start_op_idx(int): Only variables required by ops in block.ops[start_op_idx : ] will be created
        grad_to_var(dict):
            key(str): grad variable name
            val(str): corresponding forward variable name
            In most cases, this dict is generated by _append_backward_ops_()
        grad_info_map(dict)(output argument):
            key(str): forward variable name
            val(tuple): a tuple of (str, int), str is the corresponding grad name, int is the block index
    """
F
fengjiayi 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
    for op_idx in range(start_op_idx, block.desc.op_size()):
        op_desc = block.desc.op(op_idx)
        if op_desc.has_attr("sub_block"):
            sub_block = block.program.block(op_desc.block_attr("sub_block"))
            _append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map)
        new_vars = set()
        # create new gradient variables
        for grad_var_name in op_desc.output_arg_names():
            grad_var_name = grad_var_name.encode("ascii")
            if block.desc.has_var_recursive(
                    grad_var_name) or grad_var_name == core.empty_var_name():
                continue
            block.desc.var(grad_var_name)
            new_vars.add(grad_var_name)
            if not grad_to_var.has_key(grad_var_name):
                continue
            grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block)
        # infer_shape and infer_type
        op_desc.infer_var_type(block.desc)
        op_desc.infer_shape(block.desc)
        for arg in op_desc.output_arg_names():
            if arg in new_vars:
                _infer_var_data_type_(arg, block)
F
update  
fengjiayi 已提交
253 254


F
fengjiayi 已提交
255
def append_backward(loss, parameter_list=None, no_grad_set=None):
256 257 258 259 260 261
    """
    Create and add gradient Operators in BlockDesc to compute
    gradients of `loss` for parameters in parameter_list

    :param loss: an variable generated by cost function.
    :type loss: Variable
F
fengjiayi 已提交
262 263
    :param no_grad_dict: variable that should not create gradient
    :type no_grad_dict: set
264 265 266 267 268 269 270
    :param parameter_list: parameters that need to compute gradient and 
    update to optimize the lost.
    :type: list
    :return: list of (parameters, gradients) pair.
    :rtype: list[Variable]
    """
    assert isinstance(loss, framework.Variable)
Y
Yu Yang 已提交
271

F
fengjiayi 已提交
272
    program = loss.block.program
F
fengjiayi 已提交
273 274
    no_grad_dict = dict()
    if no_grad_set is None:
Y
Yu Yang 已提交
275 276 277
        assert isinstance(program, framework.Program)
        for block in program.blocks:
            assert isinstance(block, framework.Block)
F
update  
fengjiayi 已提交
278
            block_no_grad_set = set()
Y
Yu Yang 已提交
279 280 281
            for var in block.vars.itervalues():
                assert isinstance(var, framework.Variable)
                if var.stop_gradient:
F
fengjiayi 已提交
282
                    block_no_grad_set.add(_append_grad_suffix_(var.name))
F
fengjiayi 已提交
283
            no_grad_dict[block.idx] = block_no_grad_set
F
fengjiayi 已提交
284 285 286 287
    elif isinstance(no_grad_set, set):
        no_grad_dict = {0: no_grad_set}
    else:
        raise ValueError("'no_grad_set' should be a set or None.")
Y
Yu Yang 已提交
288

F
update  
fengjiayi 已提交
289
    grad_info_map = dict()
F
fengjiayi 已提交
290
    root_block = program.block(0)
F
fengjiayi 已提交
291

F
fengjiayi 已提交
292 293
    fwd_op_num = root_block.desc.op_size()
    current_block_idx = program.current_block_idx
F
fengjiayi 已提交
294 295 296 297
    grad_to_var = dict()

    _append_backward_ops_(loss, root_block, root_block, no_grad_dict,
                          grad_to_var)
F
fengjiayi 已提交
298
    _append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
F
fengjiayi 已提交
299

F
fengjiayi 已提交
300 301
    program.current_block_idx = current_block_idx
    program.sync_with_cpp()
F
fengjiayi 已提交
302

303 304 305
    if parameter_list is not None:
        parameters = parameter_list
    else:
F
fengjiayi 已提交
306
        params = program.global_block().all_parameters()
307 308 309
        parameters = [param.name for param in params]
    params_and_grads = []
    for param in parameters:
F
update  
fengjiayi 已提交
310
        if param not in grad_info_map:
311
            raise ValueError("param %s is not in map" % param)
F
update  
fengjiayi 已提交
312
        grad_info = grad_info_map[param]
F
fengjiayi 已提交
313
        grad_block = grad_info[1]
314 315 316 317
        if not grad_block.has_var(grad_info[0]):
            raise ValueError("grad block[{0}] did not have grad var {1}".format(
                grad_info[1], grad_info[0]))
        # Get the param var from the global block
F
fengjiayi 已提交
318
        param_var = program.global_block().var(param)
319 320 321 322 323 324
        grad_var = grad_block.var(grad_info[0])
        if loss.block.has_var(grad_info[0]):
            params_and_grads.append((param_var, grad_var))
        else:
            params_and_grads.append((param_var, None))
    return params_and_grads