# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle.fluid as fluid import numpy import sys TRAIN_FILES = ['train.recordio'] TEST_FILES = ['test.recordio'] DICT_DIM = 5147 # embedding dim emb_dim = 128 # hidden dim hid_dim = 128 # hidden dim2 hid_dim2 = 96 # class num class_dim = 2 # epoch num epoch_num = 10 def build_program(is_train): file_obj_handle = fluid.layers.io.open_files( filenames=TRAIN_FILES if is_train else TEST_FILES, shapes=[[-1, 1], [-1, 1]], lod_levels=[1, 0], dtypes=['int64', 'int64'], thread_num=1) if is_train: file_obj = fluid.layers.io.shuffle(file_obj_handle, buffer_size=1000) else: file_obj = file_obj_handle file_obj = fluid.layers.io.double_buffer(file_obj) with fluid.unique_name.guard(): data, label = fluid.layers.read_file(file_obj) emb = fluid.layers.embedding(input=data, size=[DICT_DIM, emb_dim]) # sequence conv with window size = 3 win_size = 3 conv_3 = fluid.nets.sequence_conv_pool( input=emb, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max") # fc layer after conv fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2) # probability of each class prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") # cross entropy loss cost = fluid.layers.cross_entropy(input=prediction, label=label) # mean loss avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) if is_train: # SGD optimizer sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) return {'loss': avg_cost, 'log': [avg_cost, acc], 'file': file_obj_handle} def main(): train = fluid.Program() startup = fluid.Program() test = fluid.Program() with fluid.program_guard(train, startup): train_args = build_program(is_train=True) with fluid.program_guard(test, startup): test_args = build_program(is_train=False) use_cuda = fluid.core.is_compiled_with_cuda() # startup place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place=place) exe.run(startup) train_exe = fluid.ParallelExecutor( use_cuda=use_cuda, loss_name=train_args['loss'].name, main_program=train) test_exe = fluid.ParallelExecutor( use_cuda=use_cuda, main_program=test, share_vars_from=train_exe) fetch_var_list = [var.name for var in train_args['log']] for epoch_id in range(epoch_num): # train try: batch_id = 0 while True: result = map(numpy.array, train_exe.run(fetch_list=fetch_var_list if batch_id % 10 == 0 else [])) if len(result) != 0: print 'Train loss: ', result batch_id += 1 except fluid.core.EOFException: print 'End of epoch', epoch_id train_args['file'].reset() # test loss = [] acc = [] try: while True: loss_np, acc_np = map(numpy.array, test_exe.run(fetch_list=fetch_var_list)) loss.append(loss_np[0]) acc.append(acc_np[0]) except: test_args['file'].reset() print 'TEST: ', numpy.mean(loss), numpy.mean(acc) if __name__ == '__main__': main()