diff --git a/python/paddle/fluid/tests/CMakeLists.txt b/python/paddle/fluid/tests/CMakeLists.txt index 2d81fd431716f9f1aef3d9b76c166807495cfb17..8cfd026f8ff8e044ffbd2cc76c34843072261ab1 100644 --- a/python/paddle/fluid/tests/CMakeLists.txt +++ b/python/paddle/fluid/tests/CMakeLists.txt @@ -11,4 +11,3 @@ endforeach() add_subdirectory(unittests) add_subdirectory(book) -add_subdirectory(book_memory_optimization) diff --git a/python/paddle/fluid/tests/book_memory_optimization/CMakeLists.txt b/python/paddle/fluid/tests/book_memory_optimization/CMakeLists.txt deleted file mode 100644 index 213af5d27f711214feda3d200ced57bf71fbf6c2..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book_memory_optimization/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -list(REMOVE_ITEM TEST_OPS test_memopt_image_classification_train) -py_test(test_memopt_image_classification_train_resnet SRCS test_memopt_image_classification_train.py ARGS resnet) -py_test(test_memopt_image_classification_train_vgg SRCS test_memopt_image_classification_train.py ARGS vgg) - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py deleted file mode 100644 index a231bbfbc8d5712275c92b4d27580016825ea91b..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import sys - -import paddle -import paddle.fluid as fluid -import math -import sys - -# need to fix random seed and training data to compare the loss -# value accurately calculated by the default and the memory optimization -# version. -fluid.default_startup_program().random_seed = 111 - - -def resnet_cifar10(input, depth=32): - def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): - tmp = fluid.layers.conv2d( - input=input, - filter_size=filter_size, - num_filters=ch_out, - stride=stride, - padding=padding, - act=None, - bias_attr=False) - return fluid.layers.batch_norm(input=tmp, act=act) - - def shortcut(input, ch_in, ch_out, stride): - if ch_in != ch_out: - return conv_bn_layer(input, ch_out, 1, stride, 0, None) - else: - return input - - def basicblock(input, ch_in, ch_out, stride): - tmp = conv_bn_layer(input, ch_out, 3, stride, 1) - tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) - short = shortcut(input, ch_in, ch_out, stride) - return fluid.layers.elementwise_add(x=tmp, y=short, act='relu') - - def layer_warp(block_func, input, ch_in, ch_out, count, stride): - tmp = block_func(input, ch_in, ch_out, stride) - for i in range(1, count): - tmp = block_func(tmp, ch_out, ch_out, 1) - return tmp - - assert (depth - 2) % 6 == 0 - n = (depth - 2) // 6 - conv1 = conv_bn_layer( - input=input, ch_out=16, filter_size=3, stride=1, padding=1) - res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) - res2 = layer_warp(basicblock, res1, 16, 32, n, 2) - res3 = layer_warp(basicblock, res2, 32, 64, n, 2) - pool = fluid.layers.pool2d( - input=res3, pool_size=8, pool_type='avg', pool_stride=1) - return pool - - -def vgg16_bn_drop(input): - def conv_block(input, num_filter, groups, dropouts): - return fluid.nets.img_conv_group( - input=input, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act='relu', - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type='max') - - conv1 = conv_block(input, 64, 2, [0.3, 0]) - conv2 = conv_block(conv1, 128, 2, [0.4, 0]) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - - drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) - fc1 = fluid.layers.fc(input=drop, size=4096, act=None) - bn = fluid.layers.batch_norm(input=fc1, act='relu') - drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) - fc2 = fluid.layers.fc(input=drop2, size=4096, act=None) - return fc2 - - -classdim = 10 -data_shape = [3, 32, 32] - -images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') -label = fluid.layers.data(name='label', shape=[1], dtype='int64') - -net_type = "vgg" -if len(sys.argv) >= 2: - net_type = sys.argv[1] - -if net_type == "vgg": - print("train vgg net") - net = vgg16_bn_drop(images) -elif net_type == "resnet": - print("train resnet") - net = resnet_cifar10(images, 32) -else: - raise ValueError("%s network is not supported" % net_type) - -predict = fluid.layers.fc(input=net, size=classdim, act='softmax') -cost = fluid.layers.cross_entropy(input=predict, label=label) -avg_cost = fluid.layers.mean(cost) - -optimizer = fluid.optimizer.Adam(learning_rate=0.001) -opts = optimizer.minimize(avg_cost) - -batch_size = fluid.layers.create_tensor(dtype='int64') -batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size) - -fluid.memory_optimize(fluid.default_main_program(), level=0) -# fluid.release_memory(fluid.default_main_program()) - -BATCH_SIZE = 16 -PASS_NUM = 1 - -# fix the order of training data -train_reader = paddle.batch( - paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE) - -# train_reader = paddle.batch( -# paddle.reader.shuffle( -# paddle.dataset.cifar.train10(), buf_size=128 * 10), -# batch_size=BATCH_SIZE) - -place = fluid.CPUPlace() -exe = fluid.Executor(place) -feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) -exe.run(fluid.default_startup_program()) - -i = 0 - -accuracy = fluid.average.WeightedAverage() -for pass_id in range(PASS_NUM): - accuracy.reset() - for data in train_reader(): - loss, acc, weight = exe.run( - fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost, batch_acc, batch_size]) - accuracy.add(value=acc, weight=weight) - pass_acc = accuracy.eval() - print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( - pass_acc)) - # this model is slow, so if we can train two mini batch, we think it works properly. - if i > 0: - exit(0) - if math.isnan(float(loss)): - sys.exit("got NaN loss, training failed.") - i += 1 -exit(1) diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py deleted file mode 100644 index e520c8965089263d1ba10a6057acda1a53cc34a9..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import numpy as np -import paddle -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.framework as framework -import paddle.fluid.layers as layers -from paddle.fluid.executor import Executor -import math -import sys - -dict_size = 30000 -source_dict_dim = target_dict_dim = dict_size -src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) -hidden_dim = 32 -word_dim = 16 -IS_SPARSE = True -batch_size = 10 -max_length = 50 -topk_size = 50 -trg_dic_size = 10000 - -decoder_size = hidden_dim - -# need to fix random seed and training data to compare the loss -# value accurately calculated by the default and the memory optimization -# version. -fluid.default_startup_program().random_seed = 111 - - -def encoder_decoder(): - # encoder - src_word_id = layers.data( - name="src_word_id", shape=[1], dtype='int64', lod_level=1) - src_embedding = layers.embedding( - input=src_word_id, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr=fluid.ParamAttr(name='vemb')) - - fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') - lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4) - encoder_out = layers.sequence_last_step(input=lstm_hidden0) - - # decoder - trg_language_word = layers.data( - name="target_language_word", shape=[1], dtype='int64', lod_level=1) - trg_embedding = layers.embedding( - input=trg_language_word, - size=[dict_size, word_dim], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr=fluid.ParamAttr(name='vemb')) - - rnn = fluid.layers.DynamicRNN() - with rnn.block(): - current_word = rnn.step_input(trg_embedding) - mem = rnn.memory(init=encoder_out) - fc1 = fluid.layers.fc(input=[current_word, mem], - size=decoder_size, - act='tanh') - out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax') - rnn.update_memory(mem, fc1) - rnn.output(out) - - return rnn() - - -def main(): - rnn_out = encoder_decoder() - label = layers.data( - name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) - cost = layers.cross_entropy(input=rnn_out, label=label) - avg_cost = fluid.layers.mean(cost) - - optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) - optimizer.minimize(avg_cost) - - fluid.memory_optimize(fluid.default_main_program()) - # fluid.release_memory(fluid.default_main_program()) - - # fix the order of training data - train_data = paddle.batch( - paddle.dataset.wmt14.train(dict_size), batch_size=batch_size) - - # train_data = paddle.batch( - # paddle.reader.shuffle( - # paddle.dataset.wmt14.train(dict_size), buf_size=1000), - # batch_size=batch_size) - - place = core.CPUPlace() - exe = Executor(place) - - exe.run(framework.default_startup_program()) - - feed_order = [ - 'src_word_id', 'target_language_word', 'target_language_next_word' - ] - - feed_list = [ - fluid.default_main_program().global_block().var(var_name) - for var_name in feed_order - ] - feeder = fluid.DataFeeder(feed_list, place) - - batch_id = 0 - for pass_id in range(10): - for data in train_data(): - outs = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost]) - avg_cost_val = np.array(outs[0]) - print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + - " avg_cost=" + str(avg_cost_val)) - if batch_id > 2: - exit(0) - if math.isnan(float(avg_cost_val)): - sys.exit("got NaN loss, training failed.") - batch_id += 1 - - -if __name__ == '__main__': - main()