IfElse Python API (#5624)

* Forward of raw if-else op * add backward part of mnist if-else unittest * refine fill_constant_batch_size_like layer * add draft ifelse operator * Complete IfElse Op * add unittest of ifelse api * merge baidu/develop * Stash * Merge develop branch * Support int/int64 for fill_constant_batch_size_like

IfElse Python API (#5624)
* Forward of raw if-else op * add backward part of mnist if-else unittest * refine fill_constant_batch_size_like layer * add draft ifelse operator * Complete IfElse Op * add unittest of ifelse api * merge baidu/develop * Stash * Merge develop branch * Support int/int64 for fill_constant_batch_size_like
f2ca07e8 · Yu Yang · QI JUN · 9891667b · f2ca07e8 · f2ca07e8
Showing with 340 addition and 27 deletion

python/paddle/v2/fluid/layers.py python/paddle/v2/fluid/layers.py +186 -27

python/paddle/v2/fluid/tests/test_mnist_if_else_op.py python/paddle/v2/fluid/tests/test_mnist_if_else_op.py +154 -0

未找到文件。
--- a/python/paddle/v2/fluid/layers.py
+++ b/python/paddle/v2/fluid/layers.py
@@ -248,7 +248,7 @@ def data(name,
        stop_gradient=stop_gradient)


-def create_tensor(dtype, name=None, main_program=None):
+def create_tensor(dtype, name=None, main_program=None, startup_program=None):
    helper = LayerHelper("create_tensor", **locals())
    return helper.create_variable(name=helper.name, dtype=dtype)

@@ -412,30 +412,12 @@ _create_op_func_('mul')
 _create_op_func_('elementwise_add')
 _create_op_func_('dropout')
 _create_op_func_('reshape')
-_create_op_func_('elementwise_add')
 _create_op_func_('sigmoid')
 _create_op_func_('scale')
 _create_op_func_('reshape')
 _create_op_func_('transpose')


-def fill_constant(data_type, shape, value=None, program=None):
-    """
-    This function creates a tensor , with shape as mentioned in the input and
-    specified data_type and fills this up with a constant value that
-    comes in the input.
-    """
-    helper = LayerHelper('fill_constant', **locals())
-    out = helper.create_tmp_variable(dtype=data_type)
-    helper.append_op(
-        type='fill_constant',
-        outputs={'Out': [out]},
-        attrs={'data_type': data_type,
-               'shape': shape,
-               'value': value})
-    return out
-
-
 def cast(x, data_type, main_program=None):
    """
    This function takes in the input with input_data_type
@@ -478,7 +460,7 @@ def sums(input, main_program=None, startup_program=None):
    return out


-def assign(input, output, main_program=None):
+def assign(input, output, main_program=None, startup_program=None):
    helper = LayerHelper('assign', **locals())
    helper.append_op(
        type='scale',
@@ -490,7 +472,7 @@ def assign(input, output, main_program=None):

 def split_lod_tensor(input,
                     mask,
-                     level,
+                     level=0,
                     main_program=None,
                     startup_program=None):
    helper = LayerHelper('split_lod_tensor', **locals())
@@ -512,11 +494,11 @@ def merge_lod_tensor(in_true,
                     in_false,
                     x,
                     mask,
-                     level,
+                     level=0,
                     main_program=None,
                     startup_program=None):
    helper = LayerHelper('merge_lod_tensor', **locals())
-    out = helper.create_tmp_variable(dtype=x.data_type)
+    out = helper.create_tmp_variable(dtype=in_true.data_type)
    helper.append_op(
        type='merge_lod_tensor',
        inputs={'X': x,
@@ -1366,7 +1348,7 @@ def array_to_lod_tensor(x, table, main_program=None):
    return tmp


-def fill_constant(shape, dtype, value, main_program=None):
+def fill_constant(shape, dtype, value, main_program=None, startup_program=None):
    """
    This function creates a tensor , with shape as mentioned in the input and
    specified data_type and fills this up with a constant value that
@@ -1387,6 +1369,31 @@ def fill_constant(shape, dtype, value, main_program=None):
    return out


+def fill_constant_batch_size_like(input,
+                                  shape,
+                                  dtype,
+                                  value,
+                                  input_dim_idx=0,
+                                  output_dim_idx=0,
+                                  main_program=None,
+                                  startup_program=None):
+    helper = LayerHelper("fill_constant_batch_size_like", **locals())
+    out = helper.create_tmp_variable(dtype=dtype)
+    helper.append_op(
+        type='fill_constant_batch_size_like',
+        inputs={'Input': input},
+        outputs={'Out': [out]},
+        attrs={
+            'shape': shape,
+            'data_type': out.data_type,
+            'value': float(value),
+            'input_dim_idx': input_dim_idx,
+            'output_dim_idx': output_dim_idx
+        })
+    out.stop_gradient = True
+    return out
+
+
 def ones(shape, dtype, main_program=None):
    """
    This function performs the same function as fill_constant() declared above
@@ -1449,7 +1456,7 @@ def create_array(dtype, main_program=None):
        dtype=dtype)


-def less_than(x, y, cond=None, main_program=None):
+def less_than(x, y, cond=None, main_program=None, **ignored):
    helper = LayerHelper("less_than", **locals())
    if cond is None:
        cond = helper.create_tmp_variable(dtype='bool')
@@ -1527,13 +1534,20 @@ class ConditionalBlockGuard(BlockGuard):


 class ConditionalBlock(object):
-    def __init__(self, inputs, name=None, main_program=None):
+    def __init__(self,
+                 inputs,
+                 name=None,
+                 main_program=None,
+                 startup_program=None):
        for each_input in inputs:
            if not isinstance(each_input, Variable):
                raise TypeError("Each input should be variable")
        self.inputs = inputs
        self.helper = LayerHelper(
-            'conditional_block', name=name, main_program=main_program)
+            'conditional_block',
+            name=name,
+            main_program=main_program,
+            startup_program=startup_program)

    def block(self):
        return ConditionalBlockGuard(self)
@@ -1578,3 +1592,148 @@ class ConditionalBlock(object):
            outputs={'Out': out_list,
                     'Scope': [step_scope]},
            attrs={'block': inside_block})
+
+
+class IfElseBlockGuard(object):
+    def __init__(self, is_true, ifelse):
+        if not isinstance(ifelse, IfElse):
+            raise TypeError("ifelse must be an instance of IfElse class")
+
+        if ifelse.status != IfElse.OUT_IF_ELSE_BLOCKS:
+            raise ValueError("You cannot invoke IfElse.block() inside a block")
+
+        self.is_true = is_true
+        self.ie = ifelse
+        if is_true:
+            self.cond_block = ifelse.conditional_true_block
+        else:
+            self.cond_block = ifelse.conditional_false_block
+
+        if not isinstance(self.cond_block, ConditionalBlock):
+            raise TypeError("Unexpected situation")
+
+        self.cond_block = self.cond_block.block()
+
+    def __enter__(self):
+        self.ie.status = IfElse.IN_IF_ELSE_TRUE_BLOCKS if self.is_true else IfElse.IN_IF_ELSE_FALSE_BLOCKS
+        self.cond_block.__enter__()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if not self.cond_block.__exit__(exc_type, exc_val, exc_tb):
+            # re-raise inside exception
+            return False
+        if len(self.ie.output_table[1 if self.is_true else 0]) == 0:
+            raise ValueError("Must set output inside block")
+        self.ie.status = IfElse.OUT_IF_ELSE_BLOCKS
+
+
+class IfElse(object):
+    OUT_IF_ELSE_BLOCKS = 0
+    IN_IF_ELSE_TRUE_BLOCKS = 1
+    IN_IF_ELSE_FALSE_BLOCKS = 2
+
+    def __init__(self, cond, name=None, main_program=None,
+                 startup_program=None):
+        if not isinstance(cond, Variable):
+            raise TypeError("cond must be a Variable")
+        self.helper = LayerHelper(
+            'ifelse',
+            name=name,
+            main_program=main_program,
+            startup_program=startup_program)
+        self.cond = cond
+        self.input_table = {}
+        self.status = IfElse.OUT_IF_ELSE_BLOCKS
+        self.conditional_true_block = ConditionalBlock(inputs=[self.cond])
+        self.conditional_false_block = ConditionalBlock(inputs=[self.cond])
+        self.output_table = ([], [])  # (true_outs, false_outs)
+
+    def input(self, x):
+        if self.status == IfElse.OUT_IF_ELSE_BLOCKS:
+            raise ValueError("input must in true/false blocks")
+        if id(x) not in self.input_table:
+            parent_block = self.parent_block()
+            out_true = parent_block.create_var(
+                name=unique_name('ifelse_input' + self.helper.name),
+                dtype=x.data_type)
+
+            out_false = parent_block.create_var(
+                name=unique_name('ifelse_input' + self.helper.name),
+                dtype=x.data_type)
+            parent_block.append_op(
+                type='split_lod_tensor',
+                inputs={
+                    'X': x,
+                    'Mask': self.cond,
+                },
+                outputs={'OutTrue': out_true,
+                         'OutFalse': out_false},
+                attrs={'level': 0})
+            self.input_table[id(x)] = (out_true, out_false)
+        else:
+            out_true, out_false = self.input_table[id(x)]
+
+        if self.status == IfElse.IN_IF_ELSE_TRUE_BLOCKS:
+            return out_true
+        else:
+            return out_false
+
+    def parent_block(self):
+        current_block = self.helper.main_program.current_block()
+        return self.helper.main_program.block(current_block.parent_idx)
+
+    def true_block(self):
+        return IfElseBlockGuard(True, self)
+
+    def false_block(self):
+        return IfElseBlockGuard(False, self)
+
+    def output(self, *outs):
+        if self.status == self.OUT_IF_ELSE_BLOCKS:
+            raise ValueError("output can only be invoked in the sub-block")
+
+        out_table = self.output_table[1 if self.status ==
+                                      self.IN_IF_ELSE_TRUE_BLOCKS else 0]
+        parent_block = self.parent_block()
+        for each_out in outs:
+            if not isinstance(each_out, Variable):
+                raise TypeError("Each output should be a variable")
+            # create outside tensor
+            outside_out = parent_block.create_var(
+                name=unique_name("_".join([self.helper.name, 'output'])),
+                dtype=each_out.data_type)
+            out_table.append(outside_out)
+
+            # assign local var to outside
+            assign(
+                input=each_out,
+                output=outside_out,
+                main_program=self.helper.main_program,
+                startup_program=self.helper.startup_program)
+
+    def __call__(self):
+        if self.status != self.OUT_IF_ELSE_BLOCKS:
+            raise ValueError("IfElse::__call__ must be out of sub-block")
+        false_len, true_len = map(len, self.output_table)
+        if false_len == 0 and true_len == 0:
+            raise ValueError("Must invoke true_block/false_block before "
+                             "__call__")
+        elif false_len != true_len and false_len != 0 and true_len != 0:
+            raise ValueError("The output side must be same")
+        elif false_len == 0 or true_len == 0:
+            return self.output_table[0 if false_len != 0 else 1]
+
+        # else none of false_len/true_len is zero
+        # merge together
+        rlist = []
+        for false_var, true_var in zip(*self.output_table):
+            rlist.append(
+                merge_lod_tensor(
+                    in_true=true_var,
+                    in_false=false_var,
+                    mask=self.cond,
+                    x=self.cond,
+                    level=0,
+                    main_program=self.helper.main_program,
+                    startup_program=self.helper.startup_program))
+        return rlist
--- a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
+++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
+import paddle.v2.fluid.layers as layers
+from paddle.v2.fluid.framework import Program
+from paddle.v2.fluid.executor import Executor
+from paddle.v2.fluid.optimizer import MomentumOptimizer
+import paddle.v2.fluid.core as core
+import paddle.v2 as paddle
+import unittest
+import numpy as np
+
+
+class TestMNISTIfElseOp(unittest.TestCase):
+    def test_raw_api(self):
+        kwargs = {'startup_program': Program(), 'main_program': Program()}
+        image = layers.data(
+            name='x', shape=[784], data_type='float32', **kwargs)
+
+        label = layers.data(name='y', shape=[1], data_type='int64', **kwargs)
+
+        limit = layers.fill_constant_batch_size_like(
+            input=label, dtype='int64', shape=[1], value=5.0, **kwargs)
+
+        cond = layers.less_than(x=label, y=limit, **kwargs)
+        true_image, false_image = layers.split_lod_tensor(
+            input=image, mask=cond, **kwargs)
+
+        true_out = layers.create_tensor(dtype='float32', **kwargs)
+        true_cond = layers.ConditionalBlock([true_image], **kwargs)
+
+        with true_cond.block():
+            hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs)
+            prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
+            layers.assign(input=prob, output=true_out, **kwargs)
+
+        false_out = layers.create_tensor(dtype='float32', **kwargs)
+        false_cond = layers.ConditionalBlock([false_image], **kwargs)
+
+        with false_cond.block():
+            hidden = layers.fc(input=false_image,
+                               size=200,
+                               act='tanh',
+                               **kwargs)
+            prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
+            layers.assign(input=prob, output=false_out, **kwargs)
+
+        prob = layers.merge_lod_tensor(
+            in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs)
+        loss = layers.cross_entropy(input=prob, label=label, **kwargs)
+        avg_loss = layers.mean(x=loss, **kwargs)
+
+        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
+        optimizer.minimize(avg_loss, kwargs['startup_program'])
+
+        train_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.mnist.train(), buf_size=8192),
+            batch_size=200)
+
+        place = core.CPUPlace()
+        exe = Executor(place)
+
+        exe.run(kwargs['startup_program'])
+        PASS_NUM = 100
+        for pass_id in range(PASS_NUM):
+            for data in train_reader():
+                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
+                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+                y_data = np.expand_dims(y_data, axis=1)
+
+                tensor_x = core.LoDTensor()
+                tensor_x.set(x_data, place)
+
+                tensor_y = core.LoDTensor()
+                tensor_y.set(y_data, place)
+
+                outs = map(np.array,
+                           exe.run(kwargs['main_program'],
+                                   feed={'x': tensor_x,
+                                         'y': tensor_y},
+                                   fetch_list=[avg_loss]))
+                print outs[0]
+                if outs[0] < 1.0:
+                    return
+        self.assertFalse(True)
+
+    def test_ifelse(self):
+        kwargs = {'startup_program': Program(), 'main_program': Program()}
+        image = layers.data(
+            name='x', shape=[784], data_type='float32', **kwargs)
+
+        label = layers.data(name='y', shape=[1], data_type='int64', **kwargs)
+
+        limit = layers.fill_constant_batch_size_like(
+            input=label, dtype='int64', shape=[1], value=5.0, **kwargs)
+
+        cond = layers.less_than(x=label, y=limit, **kwargs)
+
+        ie = layers.IfElse(cond, **kwargs)
+
+        with ie.true_block():
+            true_image = ie.input(image)
+            hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs)
+            prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
+            ie.output(prob)
+
+        with ie.false_block():
+            false_image = ie.input(image)
+            hidden = layers.fc(input=false_image,
+                               size=200,
+                               act='tanh',
+                               **kwargs)
+            prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
+            ie.output(prob)
+
+        prob = ie()
+        loss = layers.cross_entropy(input=prob[0], label=label, **kwargs)
+        avg_loss = layers.mean(x=loss, **kwargs)
+
+        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
+        optimizer.minimize(avg_loss, kwargs['startup_program'])
+        train_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.mnist.train(), buf_size=8192),
+            batch_size=200)
+
+        place = core.CPUPlace()
+        exe = Executor(place)
+
+        exe.run(kwargs['startup_program'])
+        PASS_NUM = 100
+        for pass_id in range(PASS_NUM):
+            for data in train_reader():
+                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
+                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+                y_data = np.expand_dims(y_data, axis=1)
+
+                tensor_x = core.LoDTensor()
+                tensor_x.set(x_data, place)
+
+                tensor_y = core.LoDTensor()
+                tensor_y.set(y_data, place)
+
+                outs = map(np.array,
+                           exe.run(kwargs['main_program'],
+                                   feed={'x': tensor_x,
+                                         'y': tensor_y},
+                                   fetch_list=[avg_loss]))
+                print outs[0]
+                if outs[0] < 1.0:
+                    return
+        self.assertFalse(True)
+
+
+if __name__ == '__main__':
+    unittest.main()