remove python file

9c80f825 · yuyang18 · b2eb302f · b2eb302f · b2eb302f · b2eb302f
10 changed file
--- a/source/beginners_guide/basics/03.image_classification/resnet.py
+++ b/source/beginners_guide/basics/03.image_classification/resnet.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.v2 as paddle
-__all__ = ['resnet_cifar10']
-def conv_bn_layer(input,
-                  ch_out,
-                  filter_size,
-                  stride,
-                  padding,
-                  active_type=paddle.activation.Relu(),
-                  ch_in=None):
-    tmp = paddle.layer.img_conv(
-        input=input,
-        filter_size=filter_size,
-        num_channels=ch_in,
-        num_filters=ch_out,
-        stride=stride,
-        padding=padding,
-        act=paddle.activation.Linear(),
-        bias_attr=False)
-    return paddle.layer.batch_norm(input=tmp, act=active_type)
-def shortcut(ipt, ch_in, ch_out, stride):
-    if ch_in != ch_out:
-        return conv_bn_layer(ipt, ch_out, 1, stride, 0,
-                             paddle.activation.Linear())
-    else:
-        return ipt
-def basicblock(ipt, ch_in, ch_out, stride):
-    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
-    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
-    short = shortcut(ipt, ch_in, ch_out, stride)
-    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
-def layer_warp(block_func, ipt, ch_in, ch_out, count, stride):
-    tmp = block_func(ipt, ch_in, ch_out, stride)
-    for i in range(1, count):
-        tmp = block_func(tmp, ch_out, ch_out, 1)
-    return tmp
-def resnet_cifar10(ipt, depth=32):
-    # depth should be one of 20, 32, 44, 56, 110, 1202
-    assert (depth - 2) % 6 == 0
-    n = (depth - 2) / 6
-    nStages = {16, 64, 128}
-    conv1 = conv_bn_layer(
-        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
-    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
-    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
-    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
-    pool = paddle.layer.img_pool(
-        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
-    return pool
--- a/source/beginners_guide/basics/03.image_classification/train.py
+++ b/source/beginners_guide/basics/03.image_classification/train.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License
-import sys, os
-import paddle.v2 as paddle
-from vgg import vgg_bn_drop
-from resnet import resnet_cifar10
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-def main():
-    datadim = 3 * 32 * 32
-    classdim = 10
-    # PaddlePaddle init
-    paddle.init(use_gpu=with_gpu, trainer_count=1)
-    image = paddle.layer.data(
-        name="image", type=paddle.data_type.dense_vector(datadim))
-    # Add neural network config
-    # option 1. resnet
-    # net = resnet_cifar10(image, depth=32)
-    # option 2. vgg
-    net = vgg_bn_drop(image)
-    out = paddle.layer.fc(
-        input=net, size=classdim, act=paddle.activation.Softmax())
-    lbl = paddle.layer.data(
-        name="label", type=paddle.data_type.integer_value(classdim))
-    cost = paddle.layer.classification_cost(input=out, label=lbl)
-    # Create parameters
-    parameters = paddle.parameters.create(cost)
-    # Create optimizer
-    momentum_optimizer = paddle.optimizer.Momentum(
-        momentum=0.9,
-        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
-        learning_rate=0.1 / 128.0,
-        learning_rate_decay_a=0.1,
-        learning_rate_decay_b=50000 * 100,
-        learning_rate_schedule='discexp')
-    # Create trainer
-    trainer = paddle.trainer.SGD(
-        cost=cost, parameters=parameters, update_equation=momentum_optimizer)
-    # End batch and end pass event handler
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "\nPass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-            else:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-        if isinstance(event, paddle.event.EndPass):
-            # save parameters
-            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-                trainer.save_parameter_to_tar(f)
-            result = trainer.test(
-                reader=paddle.batch(
-                    paddle.dataset.cifar.test10(), batch_size=128),
-                feeding={'image': 0,
-                         'label': 1})
-            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-    # Save the inference topology to protobuf.
-    inference_topology = paddle.topology.Topology(layers=out)
-    with open("inference_topology.pkl", 'wb') as f:
-        inference_topology.serialize_for_inference(f)
-    trainer.train(
-        reader=paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.cifar.train10(), buf_size=50000),
-            batch_size=128),
-        num_passes=200,
-        event_handler=event_handler,
-        feeding={'image': 0,
-                 'label': 1})
-    # inference
-    from PIL import Image
-    import numpy as np
-    import os
-    def load_image(file):
-        im = Image.open(file)
-        im = im.resize((32, 32), Image.ANTIALIAS)
-        im = np.array(im).astype(np.float32)
-        # The storage order of the loaded image is W(widht),
-        # H(height), C(channel). PaddlePaddle requires
-        # the CHW order, so transpose them.
-        im = im.transpose((2, 0, 1))  # CHW
-        # In the training phase, the channel order of CIFAR
-        # image is B(Blue), G(green), R(Red). But PIL open
-        # image in RGB mode. It must swap the channel order.
-        im = im[(2, 1, 0), :, :]  # BGR
-        im = im.flatten()
-        im = im / 255.0
-        return im
-    test_data = []
-    cur_dir = os.path.dirname(os.path.realpath(__file__))
-    test_data.append((load_image(cur_dir + '/image/dog.png'), ))
-    # users can remove the comments and change the model name
-    # with open('params_pass_50.tar', 'r') as f:
-    #    parameters = paddle.parameters.Parameters.from_tar(f)
-    probs = paddle.infer(
-        output_layer=out, parameters=parameters, input=test_data)
-    lab = np.argsort(-probs)  # probs and lab are the results of one batch data
-    print "Label of image/dog.png is: %d" % lab[0][0]
-if __name__ == '__main__':
-    main()
--- a/source/beginners_guide/basics/03.image_classification/vgg.py
+++ b/source/beginners_guide/basics/03.image_classification/vgg.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.v2 as paddle
-__all__ = ['vgg_bn_drop']
-def vgg_bn_drop(input):
-    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
-        return paddle.networks.img_conv_group(
-            input=ipt,
-            num_channels=num_channels,
-            pool_size=2,
-            pool_stride=2,
-            conv_num_filter=[num_filter] * groups,
-            conv_filter_size=3,
-            conv_act=paddle.activation.Relu(),
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type=paddle.pooling.Max())
-    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
-    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
-    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
-    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
-    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
-    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
-    bn = paddle.layer.batch_norm(
-        input=fc1,
-        act=paddle.activation.Relu(),
-        layer_attr=paddle.attr.Extra(drop_rate=0.5))
-    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
-    return fc2
--- a/source/beginners_guide/basics/04.word2vec/calculate_dis.py
+++ b/source/beginners_guide/basics/04.word2vec/calculate_dis.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python calculate_dis.py DICTIONARYTXT FEATURETXT
-Required arguments:
-    DICTIONARYTXT    the dictionary generated in dataprovider
-    FEATURETXT       the text format word feature, one line for one word
-"""
-import numpy as np
-from argparse import ArgumentParser
-def load_dict(fdict):
-    words = [line.strip() for line in fdict.readlines()]
-    dictionary = dict(zip(words, xrange(len(words))))
-    return dictionary
-def load_emb(femb):
-    feaBank = []
-    flag_firstline = True
-    for line in femb:
-        if flag_firstline:
-            flag_firstline = False
-            continue
-        fea = np.array([float(x) for x in line.strip().split(',')])
-        normfea = fea * 1.0 / np.linalg.norm(fea)
-        feaBank.append(normfea)
-    return feaBank
-def calcos(id1, id2, Fea):
-    f1 = Fea[id1]
-    f2 = Fea[id2]
-    return np.dot(f1.transpose(), f2)
-def get_wordidx(w, Dict):
-    if w not in Dict:
-        print 'ERROR: %s not in the dictionary' % w
-        return -1
-    return Dict[w]
-if __name__ == '__main__':
-    parser = ArgumentParser()
-    parser.add_argument('dict', help='dictionary file')
-    parser.add_argument('fea', help='feature file')
-    args = parser.parse_args()
-    with open(args.dict) as fdict:
-        word_dict = load_dict(fdict)
-    with open(args.fea) as ffea:
-        word_fea = load_emb(ffea)
-    while True:
-        w1, w2 = raw_input("please input two words: ").split()
-        w1_id = get_wordidx(w1, word_dict)
-        w2_id = get_wordidx(w2, word_dict)
-        if w1_id == -1 or w2_id == -1:
-            continue
-        print 'similarity: %s' % (calcos(w1_id, w2_id, word_fea))
--- a/source/beginners_guide/basics/04.word2vec/format_convert.py
+++ b/source/beginners_guide/basics/04.word2vec/format_convert.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python format_convert.py --b2t -i INPUT -o OUTPUT -d DIM
-    python format_convert.py --t2b -i INPUT -o OUTPUT
-Options:
-    -h, --help  show this help message and exit
-    --b2t       convert parameter file of embedding model from binary to text
-    --t2b       convert parameter file of embedding model from text to binary
-    -i INPUT    input parameter file name
-    -o OUTPUT   output parameter file name
-    -d DIM      dimension of parameter
-"""
-from optparse import OptionParser
-import struct
-def binary2text(input, output, paraDim):
-    """
-    Convert a binary parameter file of embedding model to be a text file.
-    input: the name of input binary parameter file, the format is:
-           1) the first 16 bytes is filehead:
-                version(4 bytes): version of paddle, default = 0
-                floatSize(4 bytes): sizeof(float) = 4
-                paraCount(8 bytes): total number of parameter
-           2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
-    output: the name of output text parameter file, for example:
-           0,4,32156096
-           -0.7845433,1.1937413,-0.1704215,...
-           0.0000909,0.0009465,-0.0008813,...
-           ...
-           the format is:
-           1) the first line is filehead:
-              version=0, floatSize=4, paraCount=32156096
-           2) other lines print the paramters
-              a) each line prints paraDim paramters splitted by ','
-              b) there is paraCount/paraDim lines (embedding words)
-    paraDim: dimension of parameters
-    """
-    fi = open(input, "rb")
-    fo = open(output, "w")
-    """
-    """
-    version, floatSize, paraCount = struct.unpack("iil", fi.read(16))
-    newHead = ','.join([str(version), str(floatSize), str(paraCount)])
-    print >> fo, newHead
-    bytes = 4 * int(paraDim)
-    format = "%df" % int(paraDim)
-    context = fi.read(bytes)
-    line = 0
-    while context:
-        numbers = struct.unpack(format, context)
-        lst = []
-        for i in numbers:
-            lst.append('%8.7f' % i)
-        print >> fo, ','.join(lst)
-        context = fi.read(bytes)
-        line += 1
-    fi.close()
-    fo.close()
-    print "binary2text finish, total", line, "lines"
-def get_para_count(input):
-    """
-    Compute the total number of embedding parameters in input text file.
-    input: the name of input text file
-    """
-    numRows = 1
-    paraDim = 0
-    with open(input) as f:
-        line = f.readline()
-        paraDim = len(line.split(","))
-        for line in f:
-            numRows += 1
-    return numRows * paraDim
-def text2binary(input, output, paddle_head=True):
-    """
-    Convert a text parameter file of embedding model to be a binary file.
-    input: the name of input text parameter file, for example:
-           -0.7845433,1.1937413,-0.1704215,...
-           0.0000909,0.0009465,-0.0008813,...
-           ...
-           the format is:
-           1) it doesn't have filehead
-           2) each line stores the same dimension of parameters,
-              the separator is commas ','
-    output: the name of output binary parameter file, the format is:
-           1) the first 16 bytes is filehead:
-             version(4 bytes), floatSize(4 bytes), paraCount(8 bytes)
-           2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
-    """
-    fi = open(input, "r")
-    fo = open(output, "wb")
-    newHead = struct.pack("iil", 0, 4, get_para_count(input))
-    fo.write(newHead)
-    count = 0
-    for line in fi:
-        line = line.strip().split(",")
-        for i in range(0, len(line)):
-            binary_data = struct.pack("f", float(line[i]))
-            fo.write(binary_data)
-        count += 1
-    fi.close()
-    fo.close()
-    print "text2binary finish, total", count, "lines"
-def main():
-    """
-    Main entry for running format_convert.py
-    """
-    usage = "usage: \n" \
-            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
-            "python %prog --t2b -i INPUT -o OUTPUT"
-    parser = OptionParser(usage)
-    parser.add_option(
-        "--b2t",
-        action="store_true",
-        help="convert parameter file of embedding model from binary to text")
-    parser.add_option(
-        "--t2b",
-        action="store_true",
-        help="convert parameter file of embedding model from text to binary")
-    parser.add_option(
-        "-i", action="store", dest="input", help="input parameter file name")
-    parser.add_option(
-        "-o", action="store", dest="output", help="output parameter file name")
-    parser.add_option(
-        "-d", action="store", dest="dim", help="dimension of parameter")
-    (options, args) = parser.parse_args()
-    if options.b2t:
-        binary2text(options.input, options.output, options.dim)
-    if options.t2b:
-        text2binary(options.input, options.output)
-if __name__ == '__main__':
-    main()
--- a/source/beginners_guide/basics/04.word2vec/train.py
+++ b/source/beginners_guide/basics/04.word2vec/train.py
-import math
-import os
-import numpy
-import paddle.v2 as paddle
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-embsize = 32
-hiddensize = 256
-N = 5
-def wordemb(inlayer):
-    wordemb = paddle.layer.table_projection(
-        input=inlayer,
-        size=embsize,
-        param_attr=paddle.attr.Param(
-            name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0))
-    return wordemb
-# save and load word dict and embedding table
-def save_dict_and_embedding(word_dict, embeddings):
-    with open("word_dict", "w") as f:
-        for key in word_dict:
-            f.write(key + " " + str(word_dict[key]) + "\n")
-    with open("embedding_table", "w") as f:
-        numpy.savetxt(f, embeddings, delimiter=',', newline='\n')
-def load_dict_and_embedding():
-    word_dict = dict()
-    with open("word_dict", "r") as f:
-        for line in f:
-            key, value = line.strip().split(" ")
-            word_dict[key] = int(value)
-    embeddings = numpy.loadtxt("embedding_table", delimiter=",")
-    return word_dict, embeddings
-def main():
-    paddle.init(use_gpu=with_gpu, trainer_count=1)
-    word_dict = paddle.dataset.imikolov.build_dict()
-    dict_size = len(word_dict)
-    # Every layer takes integer value of range [0, dict_size)
-    firstword = paddle.layer.data(
-        name="firstw", type=paddle.data_type.integer_value(dict_size))
-    secondword = paddle.layer.data(
-        name="secondw", type=paddle.data_type.integer_value(dict_size))
-    thirdword = paddle.layer.data(
-        name="thirdw", type=paddle.data_type.integer_value(dict_size))
-    fourthword = paddle.layer.data(
-        name="fourthw", type=paddle.data_type.integer_value(dict_size))
-    nextword = paddle.layer.data(
-        name="fifthw", type=paddle.data_type.integer_value(dict_size))
-    Efirst = wordemb(firstword)
-    Esecond = wordemb(secondword)
-    Ethird = wordemb(thirdword)
-    Efourth = wordemb(fourthword)
-    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
-    hidden1 = paddle.layer.fc(
-        input=contextemb,
-        size=hiddensize,
-        act=paddle.activation.Sigmoid(),
-        layer_attr=paddle.attr.Extra(drop_rate=0.5),
-        bias_attr=paddle.attr.Param(learning_rate=2),
-        param_attr=paddle.attr.Param(
-            initial_std=1. / math.sqrt(embsize * 8), learning_rate=1))
-    predictword = paddle.layer.fc(
-        input=hidden1,
-        size=dict_size,
-        bias_attr=paddle.attr.Param(learning_rate=2),
-        act=paddle.activation.Softmax())
-    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
-    parameters = paddle.parameters.create(cost)
-    adagrad = paddle.optimizer.AdaGrad(
-        learning_rate=3e-3,
-        regularization=paddle.optimizer.L2Regularization(8e-4))
-    trainer = paddle.trainer.SGD(cost, parameters, adagrad)
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "Pass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-        if isinstance(event, paddle.event.EndPass):
-            result = trainer.test(
-                paddle.batch(paddle.dataset.imikolov.test(word_dict, N), 32))
-            print "Pass %d, Testing metrics %s" % (event.pass_id,
-                                                   result.metrics)
-            with open("model_%d.tar" % event.pass_id, 'w') as f:
-                trainer.save_parameter_to_tar(f)
-    trainer.train(
-        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
-        num_passes=100,
-        event_handler=event_handler)
-    # save word dict and embedding table
-    embeddings = parameters.get("_proj").reshape(len(word_dict), embsize)
-    save_dict_and_embedding(word_dict, embeddings)
-if __name__ == '__main__':
-    main()
--- a/source/beginners_guide/basics/05.recommender_system/train.py
+++ b/source/beginners_guide/basics/05.recommender_system/train.py
-import paddle.v2 as paddle
-import cPickle
-import copy
-import os
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-def get_usr_combined_features():
-    uid = paddle.layer.data(
-        name='user_id',
-        type=paddle.data_type.integer_value(
-            paddle.dataset.movielens.max_user_id() + 1))
-    usr_emb = paddle.layer.embedding(input=uid, size=32)
-    usr_fc = paddle.layer.fc(input=usr_emb, size=32)
-    usr_gender_id = paddle.layer.data(
-        name='gender_id', type=paddle.data_type.integer_value(2))
-    usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
-    usr_gender_fc = paddle.layer.fc(input=usr_gender_emb, size=16)
-    usr_age_id = paddle.layer.data(
-        name='age_id',
-        type=paddle.data_type.integer_value(
-            len(paddle.dataset.movielens.age_table)))
-    usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
-    usr_age_fc = paddle.layer.fc(input=usr_age_emb, size=16)
-    usr_job_id = paddle.layer.data(
-        name='job_id',
-        type=paddle.data_type.integer_value(
-            paddle.dataset.movielens.max_job_id() + 1))
-    usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
-    usr_job_fc = paddle.layer.fc(input=usr_job_emb, size=16)
-    usr_combined_features = paddle.layer.fc(
-        input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
-        size=200,
-        act=paddle.activation.Tanh())
-    return usr_combined_features
-def get_mov_combined_features():
-    movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
-    mov_id = paddle.layer.data(
-        name='movie_id',
-        type=paddle.data_type.integer_value(
-            paddle.dataset.movielens.max_movie_id() + 1))
-    mov_emb = paddle.layer.embedding(input=mov_id, size=32)
-    mov_fc = paddle.layer.fc(input=mov_emb, size=32)
-    mov_categories = paddle.layer.data(
-        name='category_id',
-        type=paddle.data_type.sparse_binary_vector(
-            len(paddle.dataset.movielens.movie_categories())))
-    mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
-    mov_title_id = paddle.layer.data(
-        name='movie_title',
-        type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
-    mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
-    mov_title_conv = paddle.networks.sequence_conv_pool(
-        input=mov_title_emb, hidden_size=32, context_len=3)
-    mov_combined_features = paddle.layer.fc(
-        input=[mov_fc, mov_categories_hidden, mov_title_conv],
-        size=200,
-        act=paddle.activation.Tanh())
-    return mov_combined_features
-def main():
-    paddle.init(use_gpu=with_gpu)
-    usr_combined_features = get_usr_combined_features()
-    mov_combined_features = get_mov_combined_features()
-    inference = paddle.layer.cos_sim(
-        a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
-    cost = paddle.layer.square_error_cost(
-        input=inference,
-        label=paddle.layer.data(
-            name='score', type=paddle.data_type.dense_vector(1)))
-    parameters = paddle.parameters.create(cost)
-    trainer = paddle.trainer.SGD(
-        cost=cost,
-        parameters=parameters,
-        update_equation=paddle.optimizer.Adam(learning_rate=1e-4))
-    feeding = {
-        'user_id': 0,
-        'gender_id': 1,
-        'age_id': 2,
-        'job_id': 3,
-        'movie_id': 4,
-        'category_id': 5,
-        'movie_title': 6,
-        'score': 7
-    }
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "Pass %d Batch %d Cost %.2f" % (
-                    event.pass_id, event.batch_id, event.cost)
-    trainer.train(
-        reader=paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.movielens.train(), buf_size=8192),
-            batch_size=256),
-        event_handler=event_handler,
-        feeding=feeding,
-        num_passes=1)
-    user_id = 234
-    movie_id = 345
-    user = paddle.dataset.movielens.user_info()[user_id]
-    movie = paddle.dataset.movielens.movie_info()[movie_id]
-    feature = user.value() + movie.value()
-    infer_dict = copy.copy(feeding)
-    del infer_dict['score']
-    prediction = paddle.infer(
-        output_layer=inference,
-        parameters=parameters,
-        input=[feature],
-        feeding=infer_dict)
-    print(prediction + 5) / 2
-if __name__ == '__main__':
-    main()
--- a/source/beginners_guide/basics/06.understand_sentiment/train.py
+++ b/source/beginners_guide/basics/06.understand_sentiment/train.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys, os
-import paddle.v2 as paddle
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-    conv_3 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=3, hidden_size=hid_dim)
-    conv_4 = paddle.networks.sequence_conv_pool(
-        input=emb, context_len=4, hidden_size=hid_dim)
-    output = paddle.layer.fc(
-        input=[conv_3, conv_4], size=class_dim, act=paddle.activation.Softmax())
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
-def stacked_lstm_net(input_dim,
-                     class_dim=2,
-                     emb_dim=128,
-                     hid_dim=512,
-                     stacked_num=3):
-    """
-    A Wrapper for sentiment classification task.
-    This network uses bi-directional recurrent network,
-    consisting three LSTM layers. This configure is referred to
-    the paper as following url, but use fewer layrs.
-        http://www.aclweb.org/anthology/P15-1109
-    input_dim: here is word dictionary dimension.
-    class_dim: number of categories.
-    emb_dim: dimension of word embedding.
-    hid_dim: dimension of hidden layer.
-    stacked_num: number of stacked lstm-hidden layer.
-    """
-    assert stacked_num % 2 == 1
-    fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
-    lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
-    para_attr = [fc_para_attr, lstm_para_attr]
-    bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
-    relu = paddle.activation.Relu()
-    linear = paddle.activation.Linear()
-    data = paddle.layer.data("word",
-                             paddle.data_type.integer_value_sequence(input_dim))
-    emb = paddle.layer.embedding(input=data, size=emb_dim)
-    fc1 = paddle.layer.fc(
-        input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
-    lstm1 = paddle.layer.lstmemory(input=fc1, act=relu, bias_attr=bias_attr)
-    inputs = [fc1, lstm1]
-    for i in range(2, stacked_num + 1):
-        fc = paddle.layer.fc(
-            input=inputs,
-            size=hid_dim,
-            act=linear,
-            param_attr=para_attr,
-            bias_attr=bias_attr)
-        lstm = paddle.layer.lstmemory(
-            input=fc, reverse=(i % 2) == 0, act=relu, bias_attr=bias_attr)
-        inputs = [fc, lstm]
-    fc_last = paddle.layer.pooling(
-        input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(
-        input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(
-        input=[fc_last, lstm_last],
-        size=class_dim,
-        act=paddle.activation.Softmax(),
-        bias_attr=bias_attr,
-        param_attr=para_attr)
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
-if __name__ == '__main__':
-    # init
-    paddle.init(use_gpu=with_gpu)
-    #data
-    print 'load dictionary...'
-    word_dict = paddle.dataset.imdb.word_dict()
-    dict_dim = len(word_dict)
-    class_dim = 2
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.imdb.train(word_dict), buf_size=1000),
-        batch_size=100)
-    test_reader = paddle.batch(
-        paddle.dataset.imdb.test(word_dict), batch_size=100)
-    feeding = {'word': 0, 'label': 1}
-    # network config
-    # Please choose the way to build the network
-    # by uncommenting the corresponding line.
-    [cost, output] = convolution_net(dict_dim, class_dim=class_dim)
-    # [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
-    # create parameters
-    parameters = paddle.parameters.create(cost)
-    # create optimizer
-    adam_optimizer = paddle.optimizer.Adam(
-        learning_rate=2e-3,
-        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-        model_average=paddle.optimizer.ModelAverage(average_window=0.5))
-    # create trainer
-    trainer = paddle.trainer.SGD(
-        cost=cost, parameters=parameters, update_equation=adam_optimizer)
-    # End batch and end pass event handler
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "\nPass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-            else:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-        if isinstance(event, paddle.event.EndPass):
-            with open('./params_pass_%d.tar' % event.pass_id, 'w') as f:
-                trainer.save_parameter_to_tar(f)
-            result = trainer.test(reader=test_reader, feeding=feeding)
-            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-    # Save the inference topology to protobuf.
-    inference_topology = paddle.topology.Topology(layers=output)
-    with open("./inference_topology.pkl", 'wb') as f:
-        inference_topology.serialize_for_inference(f)
-    trainer.train(
-        reader=train_reader,
-        event_handler=event_handler,
-        feeding=feeding,
-        num_passes=20)
--- a/source/beginners_guide/basics/07.label_semantic_roles/train.py
+++ b/source/beginners_guide/basics/07.label_semantic_roles/train.py
-import math, os
-import numpy as np
-import paddle.v2 as paddle
-import paddle.v2.dataset.conll05 as conll05
-import paddle.v2.evaluator as evaluator
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-word_dict, verb_dict, label_dict = conll05.get_dict()
-word_dict_len = len(word_dict)
-label_dict_len = len(label_dict)
-pred_len = len(verb_dict)
-mark_dict_len = 2
-word_dim = 32
-mark_dim = 5
-hidden_dim = 512
-depth = 8
-default_std = 1 / math.sqrt(hidden_dim) / 3.0
-mix_hidden_lr = 1e-3
-def d_type(size):
-    return paddle.data_type.integer_value_sequence(size)
-def db_lstm():
-    #8 features
-    word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
-    predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
-    ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
-    ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
-    ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
-    ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
-    ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
-    mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
-    emb_para = paddle.attr.Param(name='emb', initial_std=0., is_static=True)
-    std_0 = paddle.attr.Param(initial_std=0.)
-    std_default = paddle.attr.Param(initial_std=default_std)
-    predicate_embedding = paddle.layer.embedding(
-        size=word_dim,
-        input=predicate,
-        param_attr=paddle.attr.Param(name='vemb', initial_std=default_std))
-    mark_embedding = paddle.layer.embedding(
-        size=mark_dim, input=mark, param_attr=std_0)
-    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
-    emb_layers = [
-        paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para)
-        for x in word_input
-    ]
-    emb_layers.append(predicate_embedding)
-    emb_layers.append(mark_embedding)
-    hidden_0 = paddle.layer.mixed(
-        size=hidden_dim,
-        bias_attr=std_default,
-        input=[
-            paddle.layer.full_matrix_projection(
-                input=emb, param_attr=std_default) for emb in emb_layers
-        ])
-    lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
-    hidden_para_attr = paddle.attr.Param(
-        initial_std=default_std, learning_rate=mix_hidden_lr)
-    lstm_0 = paddle.layer.lstmemory(
-        input=hidden_0,
-        act=paddle.activation.Relu(),
-        gate_act=paddle.activation.Sigmoid(),
-        state_act=paddle.activation.Sigmoid(),
-        bias_attr=std_0,
-        param_attr=lstm_para_attr)
-    #stack L-LSTM and R-LSTM with direct edges
-    input_tmp = [hidden_0, lstm_0]
-    for i in range(1, depth):
-        mix_hidden = paddle.layer.mixed(
-            size=hidden_dim,
-            bias_attr=std_default,
-            input=[
-                paddle.layer.full_matrix_projection(
-                    input=input_tmp[0], param_attr=hidden_para_attr),
-                paddle.layer.full_matrix_projection(
-                    input=input_tmp[1], param_attr=lstm_para_attr)
-            ])
-        lstm = paddle.layer.lstmemory(
-            input=mix_hidden,
-            act=paddle.activation.Relu(),
-            gate_act=paddle.activation.Sigmoid(),
-            state_act=paddle.activation.Sigmoid(),
-            reverse=((i % 2) == 1),
-            bias_attr=std_0,
-            param_attr=lstm_para_attr)
-        input_tmp = [mix_hidden, lstm]
-    feature_out = paddle.layer.mixed(
-        size=label_dict_len,
-        bias_attr=std_default,
-        input=[
-            paddle.layer.full_matrix_projection(
-                input=input_tmp[0], param_attr=hidden_para_attr),
-            paddle.layer.full_matrix_projection(
-                input=input_tmp[1], param_attr=lstm_para_attr)
-        ], )
-    return feature_out
-def load_parameter(file_name, h, w):
-    with open(file_name, 'rb') as f:
-        f.read(16)  # skip header.
-        return np.fromfile(f, dtype=np.float32).reshape(h, w)
-def main():
-    paddle.init(use_gpu=with_gpu, trainer_count=1)
-    # define network topology
-    feature_out = db_lstm()
-    target = paddle.layer.data(name='target', type=d_type(label_dict_len))
-    crf_cost = paddle.layer.crf(
-        size=label_dict_len,
-        input=feature_out,
-        label=target,
-        param_attr=paddle.attr.Param(
-            name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
-    crf_dec = paddle.layer.crf_decoding(
-        size=label_dict_len,
-        input=feature_out,
-        label=target,
-        param_attr=paddle.attr.Param(name='crfw'))
-    evaluator.sum(input=crf_dec)
-    # create parameters
-    parameters = paddle.parameters.create(crf_cost)
-    parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
-    # create optimizer
-    optimizer = paddle.optimizer.Momentum(
-        momentum=0,
-        learning_rate=2e-2,
-        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
-        model_average=paddle.optimizer.ModelAverage(
-            average_window=0.5, max_average_window=10000), )
-    trainer = paddle.trainer.SGD(
-        cost=crf_cost,
-        parameters=parameters,
-        update_equation=optimizer,
-        extra_layers=crf_dec)
-    reader = paddle.batch(
-        paddle.reader.shuffle(conll05.test(), buf_size=8192), batch_size=10)
-    test_reader = paddle.batch(
-        paddle.reader.shuffle(conll05.test(), buf_size=8192), batch_size=10)
-    feeding = {
-        'word_data': 0,
-        'ctx_n2_data': 1,
-        'ctx_n1_data': 2,
-        'ctx_0_data': 3,
-        'ctx_p1_data': 4,
-        'ctx_p2_data': 5,
-        'verb_data': 6,
-        'mark_data': 7,
-        'target': 8
-    }
-    def event_handler(event):
-        if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 100 == 0:
-                print "Pass %d, Batch %d, Cost %f, %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics)
-            if event.batch_id % 1000 == 0:
-                result = trainer.test(reader=test_reader, feeding=feeding)
-                print "\nTest with Pass %d, Batch %d, %s" % (
-                    event.pass_id, event.batch_id, result.metrics)
-        if isinstance(event, paddle.event.EndPass):
-            # save parameters
-            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-                trainer.save_parameter_to_tar(f)
-            result = trainer.test(reader=test_reader, feeding=feeding)
-            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-    trainer.train(
-        reader=reader,
-        event_handler=event_handler,
-        num_passes=1,
-        feeding=feeding)
-    test_creator = paddle.dataset.conll05.test()
-    test_data = []
-    for item in test_creator():
-        test_data.append(item[0:8])
-        if len(test_data) == 1:
-            break
-    predict = paddle.layer.crf_decoding(
-        size=label_dict_len,
-        input=feature_out,
-        param_attr=paddle.attr.Param(name='crfw'))
-    probs = paddle.infer(
-        output_layer=predict,
-        parameters=parameters,
-        input=test_data,
-        feeding=feeding,
-        field='id')
-    assert len(probs) == len(test_data[0][0])
-    labels_reverse = {}
-    for (k, v) in label_dict.items():
-        labels_reverse[v] = k
-    pre_lab = [labels_reverse[i] for i in probs]
-    print pre_lab
-if __name__ == '__main__':
-    main()
--- a/source/beginners_guide/basics/08.machine_translation/train.py
+++ b/source/beginners_guide/basics/08.machine_translation/train.py
-import sys, os
-import numpy as np
-import paddle.v2 as paddle
-with_gpu = os.getenv('WITH_GPU', '0') != '0'
-def save_model(trainer, parameters, save_path):
-    with open(save_path, 'w') as f:
-        trainer.save_parameter_to_tar(f)
-def seq_to_seq_net(source_dict_dim,
-                   target_dict_dim,
-                   is_generating,
-                   beam_size=3,
-                   max_length=250):
-    ### Network Architecture
-    word_vector_dim = 512  # dimension of word vector
-    decoder_size = 512  # dimension of hidden unit of GRU decoder
-    encoder_size = 512  # dimension of hidden unit of GRU encoder
-    #### Encoder
-    src_word_id = paddle.layer.data(
-        name='source_language_word',
-        type=paddle.data_type.integer_value_sequence(source_dict_dim))
-    src_embedding = paddle.layer.embedding(
-        input=src_word_id, size=word_vector_dim)
-    src_forward = paddle.networks.simple_gru(
-        input=src_embedding, size=encoder_size)
-    src_backward = paddle.networks.simple_gru(
-        input=src_embedding, size=encoder_size, reverse=True)
-    encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
-    #### Decoder
-    encoded_proj = paddle.layer.fc(
-        act=paddle.activation.Linear(),
-        size=decoder_size,
-        bias_attr=False,
-        input=encoded_vector)
-    backward_first = paddle.layer.first_seq(input=src_backward)
-    decoder_boot = paddle.layer.fc(
-        size=decoder_size,
-        act=paddle.activation.Tanh(),
-        bias_attr=False,
-        input=backward_first)
-    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
-        decoder_mem = paddle.layer.memory(
-            name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
-        context = paddle.networks.simple_attention(
-            encoded_sequence=enc_vec,
-            encoded_proj=enc_proj,
-            decoder_state=decoder_mem)
-        decoder_inputs = paddle.layer.fc(
-            act=paddle.activation.Linear(),
-            size=decoder_size * 3,
-            bias_attr=False,
-            input=[context, current_word],
-            layer_attr=paddle.attr.ExtraLayerAttribute(
-                error_clipping_threshold=100.0))
-        gru_step = paddle.layer.gru_step(
-            name='gru_decoder',
-            input=decoder_inputs,
-            output_mem=decoder_mem,
-            size=decoder_size)
-        out = paddle.layer.fc(
-            size=target_dict_dim,
-            bias_attr=True,
-            act=paddle.activation.Softmax(),
-            input=gru_step)
-        return out
-    decoder_group_name = 'decoder_group'
-    group_input1 = paddle.layer.StaticInput(input=encoded_vector)
-    group_input2 = paddle.layer.StaticInput(input=encoded_proj)
-    group_inputs = [group_input1, group_input2]
-    if not is_generating:
-        trg_embedding = paddle.layer.embedding(
-            input=paddle.layer.data(
-                name='target_language_word',
-                type=paddle.data_type.integer_value_sequence(target_dict_dim)),
-            size=word_vector_dim,
-            param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
-        group_inputs.append(trg_embedding)
-        # For decoder equipped with attention mechanism, in training,
-        # target embeding (the groudtruth) is the data input,
-        # while encoded source sequence is accessed to as an unbounded memory.
-        # Here, the StaticInput defines a read-only memory
-        # for the recurrent_group.
-        decoder = paddle.layer.recurrent_group(
-            name=decoder_group_name,
-            step=gru_decoder_with_attention,
-            input=group_inputs)
-        lbl = paddle.layer.data(
-            name='target_language_next_word',
-            type=paddle.data_type.integer_value_sequence(target_dict_dim))
-        cost = paddle.layer.classification_cost(input=decoder, label=lbl)
-        return cost
-    else:
-        # In generation, the decoder predicts a next target word based on
-        # the encoded source sequence and the previous generated target word.
-        # The encoded source sequence (encoder's output) must be specified by
-        # StaticInput, which is a read-only memory.
-        # Embedding of the previous generated word is automatically retrieved
-        # by GeneratedInputs initialized by a start mark <s>.
-        trg_embedding = paddle.layer.GeneratedInput(
-            size=target_dict_dim,
-            embedding_name='_target_language_embedding',
-            embedding_size=word_vector_dim)
-        group_inputs.append(trg_embedding)
-        beam_gen = paddle.layer.beam_search(
-            name=decoder_group_name,
-            step=gru_decoder_with_attention,
-            input=group_inputs,
-            bos_id=0,
-            eos_id=1,
-            beam_size=beam_size,
-            max_length=max_length)
-        return beam_gen
-def main():
-    paddle.init(use_gpu=with_gpu, trainer_count=1)
-    is_generating = False
-    # source and target dict dim.
-    dict_size = 30000
-    source_dict_dim = target_dict_dim = dict_size
-    # train the network
-    if not is_generating:
-        # define optimize method and trainer
-        optimizer = paddle.optimizer.Adam(
-            learning_rate=5e-5,
-            regularization=paddle.optimizer.L2Regularization(rate=8e-4))
-        cost = seq_to_seq_net(source_dict_dim, target_dict_dim, is_generating)
-        parameters = paddle.parameters.create(cost)
-        trainer = paddle.trainer.SGD(
-            cost=cost, parameters=parameters, update_equation=optimizer)
-        # define data reader
-        wmt14_reader = paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.wmt14.train(dict_size), buf_size=8192),
-            batch_size=4)
-        # define event_handler callback
-        def event_handler(event):
-            if isinstance(event, paddle.event.EndIteration):
-                if event.batch_id % 10 == 0:
-                    print("\nPass %d, Batch %d, Cost %f, %s" %
-                          (event.pass_id, event.batch_id, event.cost,
-                           event.metrics))
-                else:
-                    sys.stdout.write('.')
-                    sys.stdout.flush()
-                if not event.batch_id % 10:
-                    save_path = 'params_pass_%05d_batch_%05d.tar' % (
-                        event.pass_id, event.batch_id)
-                    save_model(trainer, parameters, save_path)
-            if isinstance(event, paddle.event.EndPass):
-                # save parameters
-                save_path = 'params_pass_%05d.tar' % (event.pass_id)
-                save_model(trainer, parameters, save_path)
-        # start to train
-        trainer.train(
-            reader=wmt14_reader, event_handler=event_handler, num_passes=2)
-    # generate a english sequence to french
-    else:
-        # use the first 3 samples for generation
-        gen_data = []
-        gen_num = 3
-        for item in paddle.dataset.wmt14.gen(dict_size)():
-            gen_data.append([item[0]])
-            if len(gen_data) == gen_num:
-                break
-        beam_size = 3
-        beam_gen = seq_to_seq_net(source_dict_dim, target_dict_dim,
-                                  is_generating, beam_size)
-        # get the trained model, whose bleu = 26.92
-        parameters = paddle.dataset.wmt14.model()
-        # prob is the prediction probabilities, and id is the prediction word.
-        beam_result = paddle.infer(
-            output_layer=beam_gen,
-            parameters=parameters,
-            input=gen_data,
-            field=['prob', 'id'])
-        # load the dictionary
-        src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
-        gen_sen_idx = np.where(beam_result[1] == -1)[0]
-        assert len(gen_sen_idx) == len(gen_data) * beam_size
-        # -1 is the delimiter of generated sequences.
-        # the first element of each generated sequence its length.
-        start_pos, end_pos = 1, 0
-        for i, sample in enumerate(gen_data):
-            print(
-                " ".join([src_dict[w] for w in sample[0][1:-1]])
-            )  # skip the start and ending mark when printing the source sentence
-            for j in xrange(beam_size):
-                end_pos = gen_sen_idx[i * beam_size + j]
-                print("%.4f\t%s" % (beam_result[0][i][j], " ".join(
-                    trg_dict[w] for w in beam_result[1][start_pos:end_pos])))
-                start_pos = end_pos + 2
-            print("\n")
-if __name__ == '__main__':
-    main()