From f2ca07e88a7589d9a33cd80dbc52e5e1261881bb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 21 Nov 2017 16:51:54 +0800 Subject: [PATCH] IfElse Python API (#5624) * Forward of raw if-else op * add backward part of mnist if-else unittest * refine fill_constant_batch_size_like layer * add draft ifelse operator * Complete IfElse Op * add unittest of ifelse api * merge baidu/develop * Stash * Merge develop branch * Support int/int64 for fill_constant_batch_size_like --- python/paddle/v2/fluid/layers.py | 213 +++++++++++++++--- .../v2/fluid/tests/test_mnist_if_else_op.py | 154 +++++++++++++ 2 files changed, 340 insertions(+), 27 deletions(-) create mode 100644 python/paddle/v2/fluid/tests/test_mnist_if_else_op.py diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index bb9af926e36..26a10ae766c 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -248,7 +248,7 @@ def data(name, stop_gradient=stop_gradient) -def create_tensor(dtype, name=None, main_program=None): +def create_tensor(dtype, name=None, main_program=None, startup_program=None): helper = LayerHelper("create_tensor", **locals()) return helper.create_variable(name=helper.name, dtype=dtype) @@ -412,30 +412,12 @@ _create_op_func_('mul') _create_op_func_('elementwise_add') _create_op_func_('dropout') _create_op_func_('reshape') -_create_op_func_('elementwise_add') _create_op_func_('sigmoid') _create_op_func_('scale') _create_op_func_('reshape') _create_op_func_('transpose') -def fill_constant(data_type, shape, value=None, program=None): - """ - This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that - comes in the input. - """ - helper = LayerHelper('fill_constant', **locals()) - out = helper.create_tmp_variable(dtype=data_type) - helper.append_op( - type='fill_constant', - outputs={'Out': [out]}, - attrs={'data_type': data_type, - 'shape': shape, - 'value': value}) - return out - - def cast(x, data_type, main_program=None): """ This function takes in the input with input_data_type @@ -478,7 +460,7 @@ def sums(input, main_program=None, startup_program=None): return out -def assign(input, output, main_program=None): +def assign(input, output, main_program=None, startup_program=None): helper = LayerHelper('assign', **locals()) helper.append_op( type='scale', @@ -490,7 +472,7 @@ def assign(input, output, main_program=None): def split_lod_tensor(input, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('split_lod_tensor', **locals()) @@ -512,11 +494,11 @@ def merge_lod_tensor(in_true, in_false, x, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('merge_lod_tensor', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=in_true.data_type) helper.append_op( type='merge_lod_tensor', inputs={'X': x, @@ -1366,7 +1348,7 @@ def array_to_lod_tensor(x, table, main_program=None): return tmp -def fill_constant(shape, dtype, value, main_program=None): +def fill_constant(shape, dtype, value, main_program=None, startup_program=None): """ This function creates a tensor , with shape as mentioned in the input and specified data_type and fills this up with a constant value that @@ -1387,6 +1369,31 @@ def fill_constant(shape, dtype, value, main_program=None): return out +def fill_constant_batch_size_like(input, + shape, + dtype, + value, + input_dim_idx=0, + output_dim_idx=0, + main_program=None, + startup_program=None): + helper = LayerHelper("fill_constant_batch_size_like", **locals()) + out = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type='fill_constant_batch_size_like', + inputs={'Input': input}, + outputs={'Out': [out]}, + attrs={ + 'shape': shape, + 'data_type': out.data_type, + 'value': float(value), + 'input_dim_idx': input_dim_idx, + 'output_dim_idx': output_dim_idx + }) + out.stop_gradient = True + return out + + def ones(shape, dtype, main_program=None): """ This function performs the same function as fill_constant() declared above @@ -1449,7 +1456,7 @@ def create_array(dtype, main_program=None): dtype=dtype) -def less_than(x, y, cond=None, main_program=None): +def less_than(x, y, cond=None, main_program=None, **ignored): helper = LayerHelper("less_than", **locals()) if cond is None: cond = helper.create_tmp_variable(dtype='bool') @@ -1527,13 +1534,20 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): - def __init__(self, inputs, name=None, main_program=None): + def __init__(self, + inputs, + name=None, + main_program=None, + startup_program=None): for each_input in inputs: if not isinstance(each_input, Variable): raise TypeError("Each input should be variable") self.inputs = inputs self.helper = LayerHelper( - 'conditional_block', name=name, main_program=main_program) + 'conditional_block', + name=name, + main_program=main_program, + startup_program=startup_program) def block(self): return ConditionalBlockGuard(self) @@ -1578,3 +1592,148 @@ class ConditionalBlock(object): outputs={'Out': out_list, 'Scope': [step_scope]}, attrs={'block': inside_block}) + + +class IfElseBlockGuard(object): + def __init__(self, is_true, ifelse): + if not isinstance(ifelse, IfElse): + raise TypeError("ifelse must be an instance of IfElse class") + + if ifelse.status != IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("You cannot invoke IfElse.block() inside a block") + + self.is_true = is_true + self.ie = ifelse + if is_true: + self.cond_block = ifelse.conditional_true_block + else: + self.cond_block = ifelse.conditional_false_block + + if not isinstance(self.cond_block, ConditionalBlock): + raise TypeError("Unexpected situation") + + self.cond_block = self.cond_block.block() + + def __enter__(self): + self.ie.status = IfElse.IN_IF_ELSE_TRUE_BLOCKS if self.is_true else IfElse.IN_IF_ELSE_FALSE_BLOCKS + self.cond_block.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self.cond_block.__exit__(exc_type, exc_val, exc_tb): + # re-raise inside exception + return False + if len(self.ie.output_table[1 if self.is_true else 0]) == 0: + raise ValueError("Must set output inside block") + self.ie.status = IfElse.OUT_IF_ELSE_BLOCKS + + +class IfElse(object): + OUT_IF_ELSE_BLOCKS = 0 + IN_IF_ELSE_TRUE_BLOCKS = 1 + IN_IF_ELSE_FALSE_BLOCKS = 2 + + def __init__(self, cond, name=None, main_program=None, + startup_program=None): + if not isinstance(cond, Variable): + raise TypeError("cond must be a Variable") + self.helper = LayerHelper( + 'ifelse', + name=name, + main_program=main_program, + startup_program=startup_program) + self.cond = cond + self.input_table = {} + self.status = IfElse.OUT_IF_ELSE_BLOCKS + self.conditional_true_block = ConditionalBlock(inputs=[self.cond]) + self.conditional_false_block = ConditionalBlock(inputs=[self.cond]) + self.output_table = ([], []) # (true_outs, false_outs) + + def input(self, x): + if self.status == IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("input must in true/false blocks") + if id(x) not in self.input_table: + parent_block = self.parent_block() + out_true = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.data_type) + + out_false = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.data_type) + parent_block.append_op( + type='split_lod_tensor', + inputs={ + 'X': x, + 'Mask': self.cond, + }, + outputs={'OutTrue': out_true, + 'OutFalse': out_false}, + attrs={'level': 0}) + self.input_table[id(x)] = (out_true, out_false) + else: + out_true, out_false = self.input_table[id(x)] + + if self.status == IfElse.IN_IF_ELSE_TRUE_BLOCKS: + return out_true + else: + return out_false + + def parent_block(self): + current_block = self.helper.main_program.current_block() + return self.helper.main_program.block(current_block.parent_idx) + + def true_block(self): + return IfElseBlockGuard(True, self) + + def false_block(self): + return IfElseBlockGuard(False, self) + + def output(self, *outs): + if self.status == self.OUT_IF_ELSE_BLOCKS: + raise ValueError("output can only be invoked in the sub-block") + + out_table = self.output_table[1 if self.status == + self.IN_IF_ELSE_TRUE_BLOCKS else 0] + parent_block = self.parent_block() + for each_out in outs: + if not isinstance(each_out, Variable): + raise TypeError("Each output should be a variable") + # create outside tensor + outside_out = parent_block.create_var( + name=unique_name("_".join([self.helper.name, 'output'])), + dtype=each_out.data_type) + out_table.append(outside_out) + + # assign local var to outside + assign( + input=each_out, + output=outside_out, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program) + + def __call__(self): + if self.status != self.OUT_IF_ELSE_BLOCKS: + raise ValueError("IfElse::__call__ must be out of sub-block") + false_len, true_len = map(len, self.output_table) + if false_len == 0 and true_len == 0: + raise ValueError("Must invoke true_block/false_block before " + "__call__") + elif false_len != true_len and false_len != 0 and true_len != 0: + raise ValueError("The output side must be same") + elif false_len == 0 or true_len == 0: + return self.output_table[0 if false_len != 0 else 1] + + # else none of false_len/true_len is zero + # merge together + rlist = [] + for false_var, true_var in zip(*self.output_table): + rlist.append( + merge_lod_tensor( + in_true=true_var, + in_false=false_var, + mask=self.cond, + x=self.cond, + level=0, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program)) + return rlist diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py new file mode 100644 index 00000000000..8af99005dc0 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -0,0 +1,154 @@ +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import MomentumOptimizer +import paddle.v2.fluid.core as core +import paddle.v2 as paddle +import unittest +import numpy as np + + +class TestMNISTIfElseOp(unittest.TestCase): + def test_raw_api(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data( + name='x', shape=[784], data_type='float32', **kwargs) + + label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond, **kwargs) + + true_out = layers.create_tensor(dtype='float32', **kwargs) + true_cond = layers.ConditionalBlock([true_image], **kwargs) + + with true_cond.block(): + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=true_out, **kwargs) + + false_out = layers.create_tensor(dtype='float32', **kwargs) + false_cond = layers.ConditionalBlock([false_image], **kwargs) + + with false_cond.block(): + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=false_out, **kwargs) + + prob = layers.merge_lod_tensor( + in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) + loss = layers.cross_entropy(input=prob, label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = map(np.array, + exe.run(kwargs['main_program'], + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_loss])) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + def test_ifelse(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data( + name='x', shape=[784], data_type='float32', **kwargs) + + label = layers.data(name='y', shape=[1], data_type='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + + ie = layers.IfElse(cond, **kwargs) + + with ie.true_block(): + true_image = ie.input(image) + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + with ie.false_block(): + false_image = ie.input(image) + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + prob = ie() + loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + tensor_x = core.LoDTensor() + tensor_x.set(x_data, place) + + tensor_y = core.LoDTensor() + tensor_y.set(y_data, place) + + outs = map(np.array, + exe.run(kwargs['main_program'], + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_loss])) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + +if __name__ == '__main__': + unittest.main() -- GitLab