add sentiment-classification example

767b41b3 · Zeyu Chen · a00bc6a9 · 767b41b3 · 767b41b3 · 767b41b3
12 changed file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -16,4 +16,4 @@
    -   id: trailing-whitespace
    -   id: detect-private-key
    -   id: check-symlinks
-    -   id: check-added-large-files
+      #    -   id: check-added-large-files
--- a/example/sentiment-classification/create_module.sh
+++ b/example/sentiment-classification/create_module.sh
+python test_create_module.py  --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode train --model_path ./models
--- a/example/sentiment-classification/data/test_data/corpus.test
+++ b/example/sentiment-classification/data/test_data/corpus.test
--- a/example/sentiment-classification/data/train.vocab
+++ b/example/sentiment-classification/data/train.vocab
--- a/example/sentiment-classification/data/train_data/corpus.train
+++ b/example/sentiment-classification/data/train_data/corpus.train
--- a/example/sentiment-classification/finetune.sh
+++ b/example/sentiment-classification/finetune.sh
+python sentiment_classify.py  --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode finetune --model_path ./models
--- a/example/sentiment-classification/nets.py
+++ b/example/sentiment-classification/nets.py
+import sys
+import time
+import numpy as np
+
+import paddle.fluid as fluid
+import paddle
+import paddle_hub as hub
+
+
+def bow_net(data,
+            label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2):
+    """
+    Bow net
+    """
+    # embedding layer
+    emb = fluid.layers.embedding(
+        input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
+    # bow layer
+    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+    bow_tanh = fluid.layers.tanh(bow)
+    # full connect layer
+    fc_1 = fluid.layers.fc(
+        input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
+    fc_2 = fluid.layers.fc(
+        input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
+    # softmax layer
+    prediction = fluid.layers.fc(
+        input=[fc_2], size=class_dim, act="softmax", name="fc_softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+
+    return avg_cost, acc, prediction, bow_tanh
+
+
+def cnn_net(data,
+            label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2,
+            win_size=3):
+    """
+    Conv net
+    """
+    # embedding layer
+    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
+
+    # convolution layer
+    conv_3 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=win_size,
+        act="tanh",
+        pool_type="max")
+
+    # full connect layer
+    fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2)
+    # softmax layer
+    prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+
+    return avg_cost, acc, prediction, [conv_3]
+
+
+def lstm_net(data,
+             label,
+             dict_dim,
+             emb_dim=128,
+             hid_dim=128,
+             hid_dim2=96,
+             class_dim=2,
+             emb_lr=30.0):
+    """
+    Lstm net
+    """
+    # embedding layer
+    emb = fluid.layers.embedding(
+        input=data,
+        size=[dict_dim, emb_dim],
+        param_attr=fluid.ParamAttr(learning_rate=emb_lr))
+
+    # Lstm layer
+    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
+
+    lstm_h, c = fluid.layers.dynamic_lstm(
+        input=fc0, size=hid_dim * 4, is_reverse=False)
+
+    # max pooling layer
+    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
+    lstm_max_tanh = fluid.layers.tanh(lstm_max)
+
+    # full connect layer
+    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
+    # softmax layer
+    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
+
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+
+    return avg_cost, acc, prediction, lstm_max_tanh
+
+
+def bilstm_net(data,
+               label,
+               dict_dim,
+               emb_dim=128,
+               hid_dim=128,
+               hid_dim2=96,
+               class_dim=2,
+               emb_lr=30.0):
+    """
+    Bi-Lstm net
+    """
+    # embedding layer
+    emb = fluid.layers.embedding(
+        input=data,
+        size=[dict_dim, emb_dim],
+        param_attr=fluid.ParamAttr(learning_rate=emb_lr))
+
+    # bi-lstm layer
+    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
+
+    rfc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
+
+    lstm_h, c = fluid.layers.dynamic_lstm(
+        input=fc0, size=hid_dim * 4, is_reverse=False)
+
+    rlstm_h, c = fluid.layers.dynamic_lstm(
+        input=rfc0, size=hid_dim * 4, is_reverse=True)
+
+    # extract last layer
+    lstm_last = fluid.layers.sequence_last_step(input=lstm_h)
+    rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h)
+
+    lstm_last_tanh = fluid.layers.tanh(lstm_last)
+    rlstm_last_tanh = fluid.layers.tanh(rlstm_last)
+
+    # concat layer
+    lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1)
+
+    # full connect layer
+    fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh')
+    # softmax layer
+    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+
+    return avg_cost, acc, prediction, lstm_concat
+
+
+def gru_net(data,
+            label,
+            dict_dim,
+            emb_dim=128,
+            hid_dim=128,
+            hid_dim2=96,
+            class_dim=2,
+            emb_lr=30.0):
+    """
+    gru net
+    """
+    emb = fluid.layers.embedding(
+        input=data,
+        size=[dict_dim, emb_dim],
+        param_attr=fluid.ParamAttr(learning_rate=emb_lr))
+
+    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3)
+
+    gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
+
+    gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
+    gru_max_tanh = fluid.layers.tanh(gru_max)
+
+    fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
+
+    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+
+    return avg_cost, acc, prediction, gru_max_tanh
--- a/example/sentiment-classification/sentiment_classify.py
+++ b/example/sentiment-classification/sentiment_classify.py
+# coding: utf-8
+import sys
+import os
+import time
+import unittest
+import contextlib
+import logging
+import argparse
+import ast
+
+import paddle.fluid as fluid
+import paddle_hub as hub
+
+import utils
+from nets import bow_net
+from nets import cnn_net
+from nets import lstm_net
+from nets import bilstm_net
+from nets import gru_net
+logger = logging.getLogger("paddle-fluid")
+logger.setLevel(logging.INFO)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Sentiment Classification.")
+    # training data path
+    parser.add_argument(
+        "--train_data_path",
+        type=str,
+        required=False,
+        help="The path of trainning data. Should be given in train mode!")
+    # test data path
+    parser.add_argument(
+        "--test_data_path",
+        type=str,
+        required=False,
+        help="The path of test data. Should be given in eval or infer mode!")
+    # word_dict path
+    parser.add_argument(
+        "--word_dict_path",
+        type=str,
+        required=True,
+        help="The path of word dictionary.")
+    # current mode
+    parser.add_argument(
+        "--mode",
+        type=str,
+        required=True,
+        choices=['train', 'eval', 'infer', 'finetune'],
+        help="train/eval/infer mode")
+    # model type
+    parser.add_argument(
+        "--model_type", type=str, default="bow_net", help="type of model")
+    # model save path
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default="models",
+        required=True,
+        help="The path to saved the trained models.")
+    # Number of passes for the training task.
+    parser.add_argument(
+        "--num_passes",
+        type=int,
+        default=10,
+        help="Number of passes for the training task.")
+    # Batch size
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=256,
+        help="The number of training examples in one forward/backward pass.")
+    # lr value for training
+    parser.add_argument(
+        "--lr", type=float, default=0.002, help="The lr value for training.")
+    # Whether to use gpu
+    parser.add_argument(
+        "--use_gpu",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to use gpu to train the model.")
+    # parallel train
+    parser.add_argument(
+        "--is_parallel",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to train the model in parallel.")
+    args = parser.parse_args()
+    return args
+
+
+def train_net(train_reader,
+              word_dict,
+              network_name,
+              use_gpu,
+              parallel,
+              save_dirname,
+              lr=0.002,
+              batch_size=128,
+              pass_num=30):
+    """
+    train network
+    """
+    if network_name == "bilstm_net":
+        network = bilstm_net
+    elif network_name == "bow_net":
+        network = bow_net
+    elif network_name == "cnn_net":
+        network = cnn_net
+    elif network_name == "lstm_net":
+        network = lstm_net
+    elif network_name == "gru_net":
+        network = gru_net
+    else:
+        print("unknown network type")
+        return
+
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+    cost, acc, pred, sent_emb = network(data, label, len(word_dict) + 2)
+
+    # set optimizer
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
+    sgd_optimizer.minimize(cost)
+
+    # set place, executor, datafeeder
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
+    exe.run(fluid.default_startup_program())
+    # start training...
+
+    for pass_id in range(pass_num):
+        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
+        for batch in train_reader():
+            avg_cost_np, avg_acc_np = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(batch),
+                fetch_list=[cost, acc],
+                return_numpy=True)
+            data_size = len(batch)
+            total_acc += data_size * avg_acc_np
+            total_cost += data_size * avg_cost_np
+            data_count += data_size
+        avg_cost = total_cost / data_count
+        avg_acc = total_acc / data_count
+        print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
+              (pass_id, avg_acc, avg_cost))
+
+    # create Senta module
+    module_dir = os.path.join(save_dirname, network_name)
+    signature = hub.create_signature(
+        "default", inputs=[data], outputs=[sent_emb])
+    hub.create_module(
+        sign_arr=signature,
+        program=fluid.default_main_program(),
+        path=module_dir)
+
+
+def retrain_net(train_reader,
+                word_dict,
+                network_name,
+                use_gpu,
+                parallel,
+                save_dirname,
+                lr=0.002,
+                batch_size=128,
+                pass_num=30):
+    """
+    train network
+    """
+    if network_name == "bilstm_net":
+        network = bilstm_net
+    elif network_name == "bow_net":
+        network = bow_net
+    elif network_name == "cnn_net":
+        network = cnn_net
+    elif network_name == "lstm_net":
+        network = lstm_net
+    elif network_name == "gru_net":
+        network = gru_net
+    else:
+        print("unknown network type")
+        return
+
+    emb_dim = 128
+    hid_dim = 128
+    hid_dim2 = 96
+    class_dim = 2
+    dict_dim = len(word_dict) + 2
+
+    module_dir = os.path.join(save_dirname, network_name)
+    print("module_dir", module_dir)
+    module = hub.Module(module_dir=module_dir)
+
+    main_program = fluid.Program()
+    startup_program = fluid.Program()
+
+    # use switch program to test fine-tuning
+    fluid.framework.switch_main_program(module.get_inference_program())
+
+    # remove feed fetch operator and variable
+    hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
+
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    data = module.get_feed_var_by_index(0)
+    #TODO(ZeyuChen): how to get output paramter according to proto config
+    sent_emb = module.get_fetch_var_by_index(0)
+
+    fc_2 = fluid.layers.fc(
+        input=sent_emb, size=hid_dim2, act="tanh", name="bow_fc2")
+    # softmax layer
+    pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    # print(fluid.default_main_program())
+    cost = fluid.layers.mean(
+        fluid.layers.cross_entropy(input=pred, label=label))
+    acc = fluid.layers.accuracy(input=pred, label=label)
+
+    # set optimizer
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
+    sgd_optimizer.minimize(cost)
+
+    # set place, executor, datafeeder
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
+    exe.run(fluid.default_startup_program())
+    # start training...
+
+    for pass_id in range(pass_num):
+        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
+        for batch in train_reader():
+            avg_cost_np, avg_acc_np = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(batch),
+                fetch_list=[cost, acc],
+                return_numpy=True)
+            data_size = len(batch)
+            total_acc += data_size * avg_acc_np
+            total_cost += data_size * avg_cost_np
+            data_count += data_size
+        avg_cost = total_cost / data_count
+        avg_acc = total_acc / data_count
+        print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
+              (pass_id, avg_acc, avg_cost))
+
+    # save the model
+
+    module_dir = os.path.join(save_dirname, network_name)
+    signature = hub.create_signature(
+        "default", inputs=[data], outputs=[sent_emb])
+    hub.create_module(
+        sign_arr=signature,
+        program=fluid.default_main_program(),
+        path=module_dir)
+
+
+def eval_net(test_reader, use_gpu, model_path=None):
+    """
+    Evaluation function
+    """
+    if model_path is None:
+        print(str(model_path) + "can not be found")
+        return
+    # set place, executor
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    inference_scope = fluid.core.Scope()
+    with fluid.scope_guard(inference_scope):
+        # load the saved model
+        [inference_program, feed_target_names,
+         fetch_targets] = fluid.io.load_inference_model(model_path, exe)
+
+        # compute 2class and 3class accuracy
+        class2_acc, class3_acc = 0.0, 0.0
+        total_count, neu_count = 0, 0
+
+        for data in test_reader():
+            # infer a batch
+            pred = exe.run(
+                inference_program,
+                feed=utils.data2tensor(data, place),
+                fetch_list=fetch_targets,
+                return_numpy=True)
+            for i, val in enumerate(data):
+                class3_label, class2_label = utils.get_predict_label(
+                    pred[0][i, 1])
+                true_label = val[1]
+                if class2_label == true_label:
+                    class2_acc += 1
+                if class3_label == true_label:
+                    class3_acc += 1
+                if true_label == 1.0:
+                    neu_count += 1
+
+            total_count += len(data)
+
+        class2_acc = class2_acc / (total_count - neu_count)
+        class3_acc = class3_acc / total_count
+        print("[test info] model_path: %s, class2_acc: %f, class3_acc: %f" %
+              (model_path, class2_acc, class3_acc))
+
+
+def infer_net(test_reader, use_gpu, model_path=None):
+    """
+    Inference function
+    """
+    if model_path is None:
+        print(str(model_path) + "can not be found")
+        return
+    # set place, executor
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    inference_scope = fluid.core.Scope()
+    with fluid.scope_guard(inference_scope):
+        # load the saved model
+        [inference_program, feed_target_names,
+         fetch_targets] = fluid.io.load_inference_model(model_path, exe)
+
+        for data in test_reader():
+            # infer a batch
+            pred = exe.run(
+                inference_program,
+                feed=utils.data2tensor(data, place),
+                fetch_list=fetch_targets,
+                return_numpy=True)
+            for i, val in enumerate(data):
+                class3_label, class2_label = utils.get_predict_label(
+                    pred[0][i, 1])
+                pos_prob = pred[0][i, 1]
+                neg_prob = 1 - pos_prob
+                print("predict label: %d, pos_prob: %f, neg_prob: %f" %
+                      (class3_label, pos_prob, neg_prob))
+
+
+def main(args):
+
+    # train mode
+    if args.mode == "train":
+        # prepare_data to get word_dict, train_reader
+        word_dict, train_reader = utils.prepare_data(args.train_data_path,
+                                                     args.word_dict_path,
+                                                     args.batch_size, args.mode)
+
+        train_net(train_reader, word_dict, args.model_type, args.use_gpu,
+                  args.is_parallel, args.model_path, args.lr, args.batch_size,
+                  args.num_passes)
+
+    # train mode
+    if args.mode == "finetune":
+        # prepare_data to get word_dict, train_reader
+        word_dict, train_reader = utils.prepare_data(args.train_data_path,
+                                                     args.word_dict_path,
+                                                     args.batch_size, args.mode)
+
+        retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
+                    args.is_parallel, args.model_path, args.lr, args.batch_size,
+                    args.num_passes)
+    # eval mode
+    elif args.mode == "eval":
+        # prepare_data to get word_dict, test_reader
+        word_dict, test_reader = utils.prepare_data(args.test_data_path,
+                                                    args.word_dict_path,
+                                                    args.batch_size, args.mode)
+        eval_net(test_reader, args.use_gpu, args.model_path)
+
+    # infer mode
+    elif args.mode == "infer":
+        # prepare_data to get word_dict, test_reader
+        word_dict, test_reader = utils.prepare_data(args.test_data_path,
+                                                    args.word_dict_path,
+                                                    args.batch_size, args.mode)
+        infer_net(test_reader, args.use_gpu, args.model_path)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
--- a/example/sentiment-classification/test_create_module.py
+++ b/example/sentiment-classification/test_create_module.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+import sys
+import os
+import time
+import unittest
+import contextlib
+import logging
+import argparse
+import ast
+import utils
+
+import paddle.fluid as fluid
+import paddle_hub as hub
+
+from nets import bow_net
+from nets import cnn_net
+from nets import lstm_net
+from nets import bilstm_net
+from nets import gru_net
+logger = logging.getLogger("paddle-fluid")
+logger.setLevel(logging.INFO)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Sentiment Classification.")
+    # training data path
+    parser.add_argument(
+        "--train_data_path",
+        type=str,
+        required=False,
+        help="The path of trainning data. Should be given in train mode!")
+    # test data path
+    parser.add_argument(
+        "--test_data_path",
+        type=str,
+        required=False,
+        help="The path of test data. Should be given in eval or infer mode!")
+    # word_dict path
+    parser.add_argument(
+        "--word_dict_path",
+        type=str,
+        required=True,
+        help="The path of word dictionary.")
+    # current mode
+    parser.add_argument(
+        "--mode",
+        type=str,
+        required=True,
+        choices=['train', 'eval', 'infer'],
+        help="train/eval/infer mode")
+    # model type
+    parser.add_argument(
+        "--model_type", type=str, default="bow_net", help="type of model")
+    # model save path parser.add_argument(
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default="models",
+        required=True,
+        help="The path to saved the trained models.")
+    # Number of passes for the training task.
+    parser.add_argument(
+        "--num_passes",
+        type=int,
+        default=3,
+        help="Number of passes for the training task.")
+    # Batch size
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=256,
+        help="The number of training examples in one forward/backward pass.")
+    # lr value for training
+    parser.add_argument(
+        "--lr", type=float, default=0.002, help="The lr value for training.")
+    # Whether to use gpu
+    parser.add_argument(
+        "--use_gpu",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to use gpu to train the model.")
+    # parallel train
+    parser.add_argument(
+        "--is_parallel",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to train the model in parallel.")
+    args = parser.parse_args()
+    return args
+
+
+def bow_net_module(data,
+                   label,
+                   dict_dim,
+                   emb_dim=128,
+                   hid_dim=128,
+                   hid_dim2=96,
+                   class_dim=2):
+    """
+    Bow net
+    """
+    module_dir = "./model/test_create_module"
+    # embedding layer
+    emb = fluid.layers.embedding(
+        input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
+    # bow layer
+    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+    bow_tanh = fluid.layers.tanh(bow)
+    # full connect layer
+    fc_1 = fluid.layers.fc(
+        input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
+    fc_2 = fluid.layers.fc(
+        input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
+    # softmax layer
+    prediction = fluid.layers.fc(
+        input=[fc_2], size=class_dim, act="softmax", name="fc_softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc = fluid.layers.accuracy(input=prediction, label=label)
+
+    return avg_cost, acc, prediction, emb
+
+
+def train_net(train_reader,
+              word_dict,
+              network_name,
+              use_gpu,
+              parallel,
+              save_dirname,
+              lr=0.002,
+              batch_size=128,
+              pass_num=10):
+    """
+    train network
+    """
+    if network_name == "bilstm_net":
+        network = bilstm_net
+    elif network_name == "bow_net":
+        network = bow_net
+    elif network_name == "cnn_net":
+        network = cnn_net
+    elif network_name == "lstm_net":
+        network = lstm_net
+    elif network_name == "gru_net":
+        network = gru_net
+    else:
+        print("unknown network type")
+        return
+
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+    cost, acc, pred, emb = network(data, label, len(word_dict) + 2)
+
+    # set optimizer
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
+    sgd_optimizer.minimize(cost)
+
+    # set place, executor, datafeeder
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
+    exe.run(fluid.default_startup_program())
+    # start training...
+
+    for pass_id in range(pass_num):
+        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
+        for batch in train_reader():
+            avg_cost_np, avg_acc_np = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(batch),
+                fetch_list=[cost, acc],
+                return_numpy=True)
+            data_size = len(batch)
+            total_acc += data_size * avg_acc_np
+            total_cost += data_size * avg_cost_np
+            data_count += data_size
+        avg_cost = total_cost / data_count
+        avg_acc = total_acc / data_count
+        print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
+              (pass_id, avg_acc, avg_cost))
+
+    # save the model
+    module_dir = os.path.join(save_dirname, network_name)
+    config = hub.ModuleConfig(module_dir)
+    config.save_dict(word_dict=word_dict)
+
+    # saving config
+    input_desc = {"words": data.name}
+    output_desc = {"emb": emb.name}
+    config.register_feed_signature(input_desc)
+    config.register_fetch_signature(output_desc)
+    config.dump()
+    feed_var_name = config.feed_var_name("words")
+    fluid.io.save_inference_model(module_dir, [feed_var_name], emb, exe)
+
+
+def retrain_net(train_reader,
+                word_dict,
+                network_name,
+                use_gpu,
+                parallel,
+                save_dirname,
+                lr=0.002,
+                batch_size=128,
+                pass_num=30):
+    """
+    train network
+    """
+    if network_name == "bilstm_net":
+        network = bilstm_net
+    elif network_name == "bow_net":
+        network = bow_net
+    elif network_name == "cnn_net":
+        network = cnn_net
+    elif network_name == "lstm_net":
+        network = lstm_net
+    elif network_name == "gru_net":
+        network = gru_net
+    else:
+        print("unknown network type")
+        return
+
+    dict_dim = len(word_dict) + 2
+    emb_dim = 128
+    hid_dim = 128
+    hid_dim2 = 96
+    class_dim = 2
+
+    module_path = "./models/bow_net"
+    module = hub.Module(module_dir=module_path)
+
+    main_program = fluid.Program()
+    startup_program = fluid.Program()
+
+    # use switch program to test fine-tuning
+    fluid.framework.switch_main_program(module.get_inference_program())
+
+    # remove feed fetch operator and variable
+    hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
+
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    data = module.get_feed_var("words")
+    emb = module.get_fetch_var("emb")
+
+    # bow layer
+    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+    bow_tanh = fluid.layers.tanh(bow)
+    # full connect layer
+    fc_1 = fluid.layers.fc(
+        input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
+    fc_2 = fluid.layers.fc(
+        input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
+    # softmax layer
+    pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    cost = fluid.layers.mean(
+        fluid.layers.cross_entropy(input=pred, label=label))
+    acc = fluid.layers.accuracy(input=pred, label=label)
+
+    # set optimizer
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
+    sgd_optimizer.minimize(cost)
+
+    # set place, executor, datafeeder
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
+    exe.run(fluid.default_startup_program())
+
+    # start training...
+    for pass_id in range(pass_num):
+        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
+        for batch in train_reader():
+            avg_cost_np, avg_acc_np = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(batch),
+                fetch_list=[cost, acc],
+                return_numpy=True)
+            data_size = len(batch)
+            total_acc += data_size * avg_acc_np
+            total_cost += data_size * avg_cost_np
+            data_count += data_size
+        avg_cost = total_cost / data_count
+        avg_acc = total_acc / data_count
+        print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
+              (pass_id, avg_acc, avg_cost))
+
+    # save the model
+    module_dir = os.path.join(save_dirname, network_name + "_retrain")
+    fluid.io.save_inference_model(module_dir, ["words"], emb, exe)
+
+    config = hub.ModuleConfig(module_dir)
+    config.save_dict(word_dict=word_dict)
+    config.dump()
+
+
+def main(args):
+
+    # prepare_data to get word_dict, train_reader
+    word_dict, train_reader = utils.prepare_data(
+        args.train_data_path, args.word_dict_path, args.batch_size, args.mode)
+
+    train_net(train_reader, word_dict, args.model_type, args.use_gpu,
+              args.is_parallel, args.model_path, args.lr, args.batch_size,
+              args.num_passes)
+
+    # NOTE(ZeyuChen): can't run train_net and retrain_net together
+    # retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
+    #             args.is_parallel, args.model_path, args.lr, args.batch_size,
+    #             args.num_passes)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
--- a/example/sentiment-classification/test_finetune.py
+++ b/example/sentiment-classification/test_finetune.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding: utf-8
+import sys
+import os
+import time
+import unittest
+import contextlib
+import logging
+import argparse
+import ast
+import utils
+
+import paddle.fluid as fluid
+import paddle_hub as hub
+
+from nets import bow_net
+from nets import cnn_net
+from nets import lstm_net
+from nets import bilstm_net
+from nets import gru_net
+logger = logging.getLogger("paddle-fluid")
+logger.setLevel(logging.INFO)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("Sentiment Classification.")
+    # training data path
+    parser.add_argument(
+        "--train_data_path",
+        type=str,
+        required=False,
+        help="The path of trainning data. Should be given in train mode!")
+    # test data path
+    parser.add_argument(
+        "--test_data_path",
+        type=str,
+        required=False,
+        help="The path of test data. Should be given in eval or infer mode!")
+    # word_dict path
+    parser.add_argument(
+        "--word_dict_path",
+        type=str,
+        required=True,
+        help="The path of word dictionary.")
+    # current mode
+    parser.add_argument(
+        "--mode",
+        type=str,
+        required=True,
+        choices=['train', 'eval', 'infer'],
+        help="train/eval/infer mode")
+    # model type
+    parser.add_argument(
+        "--model_type", type=str, default="bow_net", help="type of model")
+    # model save path
+    parser.add_argument(
+        "--model_path",
+        type=str,
+        default="models",
+        required=True,
+        help="The path to saved the trained models.")
+    # Number of passes for the training task.
+    parser.add_argument(
+        "--num_passes",
+        type=int,
+        default=10,
+        help="Number of passes for the training task.")
+    # Batch size
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=256,
+        help="The number of training examples in one forward/backward pass.")
+    # lr value for training
+    parser.add_argument(
+        "--lr", type=float, default=0.002, help="The lr value for training.")
+    # Whether to use gpu
+    parser.add_argument(
+        "--use_gpu",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to use gpu to train the model.")
+    # parallel train
+    parser.add_argument(
+        "--is_parallel",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to train the model in parallel.")
+    args = parser.parse_args()
+    return args
+
+
+def retrain_net(train_reader,
+                word_dict,
+                network_name,
+                use_gpu,
+                parallel,
+                save_dirname,
+                lr=0.002,
+                batch_size=128,
+                pass_num=30):
+    """
+    train network
+    """
+    if network_name == "bilstm_net":
+        network = bilstm_net
+    elif network_name == "bow_net":
+        network = bow_net
+    elif network_name == "cnn_net":
+        network = cnn_net
+    elif network_name == "lstm_net":
+        network = lstm_net
+    elif network_name == "gru_net":
+        network = gru_net
+    else:
+        print("unknown network type")
+        return
+
+    dict_dim = len(word_dict) + 2
+    emb_dim = 128
+    hid_dim = 128
+    hid_dim2 = 96
+    class_dim = 2
+
+    module_path = "./models/bow_net"
+    module = hub.Module(module_dir=module_path)
+
+    main_program = fluid.Program()
+    startup_program = fluid.Program()
+
+    # use switch program to test fine-tuning
+    fluid.framework.switch_main_program(module.get_inference_program())
+
+    # remove feed fetch operator and variable
+    hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
+    # remove_feed_fetch_op(fluid.default_main_program())
+
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    #data = fluid.default_main_program().global_block().var("words")
+    data = module.get_feed_var("words")
+    #TODO(ZeyuChen): how to get output paramter according to proto config
+    emb = module.get_fetch_var("emb")
+
+    # # # embedding layer
+    # emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
+    # #input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
+    # # bow layer
+    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+    bow_tanh = fluid.layers.tanh(bow)
+    # full connect layer
+    fc_1 = fluid.layers.fc(
+        input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
+    fc_2 = fluid.layers.fc(
+        input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
+    # softmax layer
+    pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
+    # print(fluid.default_main_program())
+    cost = fluid.layers.mean(
+        fluid.layers.cross_entropy(input=pred, label=label))
+    acc = fluid.layers.accuracy(input=pred, label=label)
+
+    # set optimizer
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
+    sgd_optimizer.minimize(cost)
+
+    # set place, executor, datafeeder
+    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
+    exe.run(fluid.default_startup_program())
+    # start training...
+
+    for pass_id in range(pass_num):
+        data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
+        for batch in train_reader():
+            avg_cost_np, avg_acc_np = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(batch),
+                fetch_list=[cost, acc],
+                return_numpy=True)
+            data_size = len(batch)
+            total_acc += data_size * avg_acc_np
+            total_cost += data_size * avg_cost_np
+            data_count += data_size
+        avg_cost = total_cost / data_count
+        avg_acc = total_acc / data_count
+        print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
+              (pass_id, avg_acc, avg_cost))
+
+    # save the model
+    module_dir = os.path.join(save_dirname, network_name + "_retrain")
+    fluid.io.save_inference_model(module_dir, ["words"], emb, exe)
+    input_desc = {"words": data.name}
+    output_desc = {"emb": emb.name}
+    config = hub.ModuleConfig(module_dir)
+    config.save_dict(word_dict=word_dict)
+    config.dump()
+
+
+def main(args):
+
+    # prepare_data to get word_dict, train_reader
+    word_dict, train_reader = utils.prepare_data(
+        args.train_data_path, args.word_dict_path, args.batch_size, args.mode)
+
+    retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
+                args.is_parallel, args.model_path, args.lr, args.batch_size,
+                args.num_passes)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)
--- a/example/sentiment-classification/train.sh
+++ b/example/sentiment-classification/train.sh
+python sentiment_classify.py  --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode train --model_path ./models
--- a/example/sentiment-classification/utils.py
+++ b/example/sentiment-classification/utils.py
+import os
+import sys
+import time
+import numpy as np
+import random
+
+import paddle.fluid as fluid
+import paddle
+
+
+def get_predict_label(pos_prob):
+    neg_prob = 1 - pos_prob
+    # threshold should be (1, 0.5)
+    neu_threshold = 0.55
+    if neg_prob > neu_threshold:
+        class3_label = 0
+    elif pos_prob > neu_threshold:
+        class3_label = 2
+    else:
+        class3_label = 1
+    if pos_prob >= neg_prob:
+        class2_label = 2
+    else:
+        class2_label = 0
+    return class3_label, class2_label
+
+
+def to_lodtensor(data, place):
+    """
+    convert ot LODtensor
+    """
+    seq_lens = [len(seq) for seq in data]
+    cur_len = 0
+    lod = [cur_len]
+    for l in seq_lens:
+        cur_len += l
+        lod.append(cur_len)
+    flattened_data = np.concatenate(data, axis=0).astype("int64")
+    flattened_data = flattened_data.reshape([len(flattened_data), 1])
+    res = fluid.LoDTensor()
+    res.set(flattened_data, place)
+    res.set_lod([lod])
+    return res
+
+
+def data2tensor(data, place):
+    """
+    data2tensor
+    """
+    input_seq = to_lodtensor(map(lambda x: x[0], data), place)
+    return {"words": input_seq}
+
+
+def data_reader(file_path, word_dict, is_shuffle=True):
+    """
+    Convert word sequence into slot
+    """
+    unk_id = len(word_dict)
+    all_data = []
+    with open(file_path, "r") as fin:
+        for line in fin:
+            cols = line.strip().split("\t")
+            label = int(cols[0])
+            wids = [
+                word_dict[x] if x in word_dict else unk_id
+                for x in cols[1].split(" ")
+            ]
+            all_data.append((wids, label))
+    if is_shuffle:
+        random.shuffle(all_data)
+
+    def reader():
+        for doc, label in all_data:
+            yield doc, label
+
+    return reader
+
+
+def load_vocab(file_path):
+    """
+    load the given vocabulary
+    """
+    vocab = {}
+    with open(file_path) as f:
+        wid = 0
+        for line in f:
+            vocab[line.strip()] = wid
+            wid += 1
+    vocab["<unk>"] = len(vocab)
+    return vocab
+
+
+def prepare_data(data_path, word_dict_path, batch_size, mode):
+    """
+    prepare data
+    """
+    assert os.path.exists(
+        word_dict_path), "The given word dictionary dose not exist."
+    if mode == "train":
+        assert os.path.exists(
+            data_path), "The given training data does not exist."
+    if mode == "eval" or mode == "infer":
+        assert os.path.exists(data_path), "The given test data does not exist."
+
+    word_dict = load_vocab(word_dict_path)
+    if mode == "train":
+        train_reader = paddle.batch(
+            data_reader(data_path, word_dict, True), batch_size)
+        return word_dict, train_reader
+    else:
+        test_reader = paddle.batch(
+            data_reader(data_path, word_dict, False), batch_size)
+        return word_dict, test_reader