From ac596a3952a3f75cc12f1eefafb14a165a57ff95 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 30 Nov 2017 14:14:13 +0800 Subject: [PATCH] Feature/switch program (#5932) * Unify fluid submodules to fluid module Change books just use `import fluid`, not submodules * Remove g_main_program/g_startup_program Use default_main_program/default_startup_program instead * Typo * Add API for switch default program * Two functions: switch_main_program/switch_startup_program * A guard: program_guard. Users can use the `with` statement change default programs * Change unittests in `test_layers` * Fix CI * Fix CI * Fix CI --- python/paddle/v2/fluid/framework.py | 79 +++++- python/paddle/v2/fluid/tests/test_layers.py | 271 ++++++++------------ 2 files changed, 188 insertions(+), 162 deletions(-) diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 1c42e4d44f..49c6d89834 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -3,10 +3,12 @@ import collections import numpy as np from . import core import proto.framework_pb2 as framework_pb2 +import contextlib __all__ = [ 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', - 'default_main_program' + 'default_main_program', 'program_guard', 'switch_startup_program', + 'switch_main_program' ] @@ -659,8 +661,83 @@ _startup_program_ = Program() def default_startup_program(): + """ + Get default startup program. In startup program, Paddle will initialize + parameters, initialize nccl handle, etc. + + Returns: + Program: startup program + """ return _startup_program_ def default_main_program(): + """ + Get default main program. The main program is used for training or testing. + + Returns: + Program: main program + """ return _main_program_ + + +def switch_main_program(program): + """ + Switch the main program to a new program. + + Args: + program(Program): The new main program + + Returns: + Program: The previous main program + """ + global _main_program_ + prev_program = _main_program_ + _main_program_ = program + return prev_program + + +def switch_startup_program(program): + """ + Switch the startup program to a new program + Args: + program(Program): The new startup program + + Returns: + Program: The previous startup program + """ + global _startup_program_ + prev_program = _startup_program_ + _startup_program_ = program + return prev_program + + +@contextlib.contextmanager +def program_guard(main_program, startup_program=None): + """ + Switch program with `with` statement + + Examples: + >>> with program_guard(Program()): + >>> data = fluid.layers.data(...) + >>> hidden = fluid.layers.fc(...) + + Args: + main_program(Program): New main program inside `with` statement + startup_program(Program): New startup program inside `with` statement. + None means do not change startup program. + + Returns: + None + """ + if not isinstance(main_program, Program): + raise TypeError("main_program should be Program") + main_program = switch_main_program(main_program) + if startup_program is not None: + if not isinstance(startup_program, Program): + raise TypeError("startup_program should be Program") + startup_program = switch_startup_program(startup_program) + yield + switch_main_program(main_program) + if startup_program is not None: + switch_startup_program(startup_program) diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index b6906be60b..33b0e54f42 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -1,192 +1,141 @@ +from __future__ import print_function import unittest import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.framework import Program, program_guard class TestBook(unittest.TestCase): def test_fit_a_line(self): program = Program() - x = layers.data( - name='x', shape=[13], dtype='float32', main_program=program) - y_predict = layers.fc(input=x, size=1, act=None, main_program=program) + with program_guard(program, startup_program=Program()): + x = layers.data(name='x', shape=[13], dtype='float32') + y_predict = layers.fc(input=x, size=1, act=None) + y = layers.data(name='y', shape=[1], dtype='float32') + cost = layers.square_error_cost(input=y_predict, label=y) + avg_cost = layers.mean(x=cost) + self.assertIsNotNone(avg_cost) + program.append_backward(avg_cost) - y = layers.data( - name='y', shape=[1], dtype='float32', main_program=program) - cost = layers.square_error_cost( - input=y_predict, label=y, main_program=program) - - avg_cost = layers.mean(x=cost, main_program=program) - self.assertIsNotNone(avg_cost) - program.append_backward(avg_cost) - - print str(program) + print(str(program)) def test_recognize_digits_mlp(self): program = Program() - - # Change g_program, so the rest layers use `g_program` - images = layers.data( - name='pixel', shape=[784], dtype='float32', main_program=program) - label = layers.data( - name='label', shape=[1], dtype='int32', main_program=program) - hidden1 = layers.fc(input=images, - size=128, - act='relu', - main_program=program) - hidden2 = layers.fc(input=hidden1, - size=64, - act='relu', - main_program=program) - predict = layers.fc(input=hidden2, - size=10, - act='softmax', - main_program=program) - cost = layers.cross_entropy( - input=predict, label=label, main_program=program) - avg_cost = layers.mean(x=cost, main_program=program) - self.assertIsNotNone(avg_cost) - - print str(program) + with program_guard(program, startup_program=Program()): + # Change g_program, so the rest layers use `g_program` + images = layers.data(name='pixel', shape=[784], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int32') + hidden1 = layers.fc(input=images, size=128, act='relu') + hidden2 = layers.fc(input=hidden1, size=64, act='relu') + predict = layers.fc(input=hidden2, size=10, act='softmax') + cost = layers.cross_entropy(input=predict, label=label) + avg_cost = layers.mean(x=cost) + self.assertIsNotNone(avg_cost) + + print(str(program)) def test_simple_conv2d(self): program = Program() - images = layers.data( - name='pixel', - shape=[3, 48, 48], - dtype='int32', - main_program=program) - layers.conv2d( - input=images, - num_filters=3, - filter_size=[4, 4], - main_program=program) - - print str(program) + with program_guard(program, startup_program=Program()): + images = layers.data(name='pixel', shape=[3, 48, 48], dtype='int32') + layers.conv2d(input=images, num_filters=3, filter_size=[4, 4]) + + print(str(program)) def test_conv2d_transpose(self): program = Program() - kwargs = {'main_program': program} - img = layers.data( - name='pixel', shape=[3, 2, 2], dtype='float32', **kwargs) - layers.conv2d_transpose( - input=img, num_filters=10, output_size=28, **kwargs) - print str(program) + with program_guard(program): + img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32') + layers.conv2d_transpose(input=img, num_filters=10, output_size=28) + print(str(program)) def test_recognize_digits_conv(self): program = Program() - - images = layers.data( - name='pixel', - shape=[1, 28, 28], - dtype='float32', - main_program=program) - label = layers.data( - name='label', shape=[1], dtype='int32', main_program=program) - conv_pool_1 = nets.simple_img_conv_pool( - input=images, - filter_size=5, - num_filters=2, - pool_size=2, - pool_stride=2, - act="relu", - main_program=program) - conv_pool_2 = nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=4, - pool_size=2, - pool_stride=2, - act="relu", - main_program=program) - - predict = layers.fc(input=conv_pool_2, - size=10, - act="softmax", - main_program=program) - cost = layers.cross_entropy( - input=predict, label=label, main_program=program) - avg_cost = layers.mean(x=cost, main_program=program) - - program.append_backward(avg_cost) - - print str(program) + with program_guard(program, startup_program=Program()): + images = layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int32') + conv_pool_1 = nets.simple_img_conv_pool( + input=images, + filter_size=5, + num_filters=2, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=4, + pool_size=2, + pool_stride=2, + act="relu") + + predict = layers.fc(input=conv_pool_2, size=10, act="softmax") + cost = layers.cross_entropy(input=predict, label=label) + avg_cost = layers.mean(x=cost) + + program.append_backward(avg_cost) + + print(str(program)) def test_word_embedding(self): program = Program() - dict_size = 10000 - embed_size = 32 - first_word = layers.data( - name='firstw', shape=[1], dtype='int64', main_program=program) - second_word = layers.data( - name='secondw', shape=[1], dtype='int64', main_program=program) - third_word = layers.data( - name='thirdw', shape=[1], dtype='int64', main_program=program) - forth_word = layers.data( - name='forthw', shape=[1], dtype='int64', main_program=program) - next_word = layers.data( - name='nextw', shape=[1], dtype='int64', main_program=program) - - embed_first = layers.embedding( - input=first_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w', - main_program=program) - embed_second = layers.embedding( - input=second_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w', - main_program=program) - - embed_third = layers.embedding( - input=third_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w', - main_program=program) - embed_forth = layers.embedding( - input=forth_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr='shared_w', - main_program=program) - - concat_embed = layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], - axis=1, - main_program=program) - - hidden1 = layers.fc(input=concat_embed, - size=256, - act='sigmoid', - main_program=program) - predict_word = layers.fc(input=hidden1, - size=dict_size, - act='softmax', - main_program=program) - cost = layers.cross_entropy( - input=predict_word, label=next_word, main_program=program) - avg_cost = layers.mean(x=cost, main_program=program) - self.assertIsNotNone(avg_cost) - - print str(program) + with program_guard(program, startup_program=Program()): + dict_size = 10000 + embed_size = 32 + first_word = layers.data(name='firstw', shape=[1], dtype='int64') + second_word = layers.data(name='secondw', shape=[1], dtype='int64') + third_word = layers.data(name='thirdw', shape=[1], dtype='int64') + forth_word = layers.data(name='forthw', shape=[1], dtype='int64') + next_word = layers.data(name='nextw', shape=[1], dtype='int64') + + embed_first = layers.embedding( + input=first_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + embed_second = layers.embedding( + input=second_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + + embed_third = layers.embedding( + input=third_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + embed_forth = layers.embedding( + input=forth_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + + concat_embed = layers.concat( + input=[embed_first, embed_second, embed_third, embed_forth], + axis=1) + + hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid') + predict_word = layers.fc(input=hidden1, + size=dict_size, + act='softmax') + cost = layers.cross_entropy(input=predict_word, label=next_word) + avg_cost = layers.mean(x=cost) + self.assertIsNotNone(avg_cost) + + print(str(program)) def test_linear_chain_crf(self): program = Program() - - # Change g_program, so the rest layers use `g_program` - images = layers.data( - name='pixel', shape=[784], dtype='float32', main_program=program) - label = layers.data( - name='label', shape=[1], dtype='int32', main_program=program) - hidden = layers.fc(input=images, size=128, main_program=program) - crf = layers.linear_chain_crf( - input=hidden, label=label, main_program=program) - - print str(program) + with program_guard(program, startup_program=Program()): + images = layers.data(name='pixel', shape=[784], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int32') + hidden = layers.fc(input=images, size=128) + crf = layers.linear_chain_crf(input=hidden, label=label) + self.assertNotEqual(crf, None) + + print(str(program)) if __name__ == '__main__': -- GitLab