elmo_finetune.py 5.8 KB
Newer Older
Steffy-zxf 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
import argparse
import ast
import io
import numpy as np

from paddle.fluid.framework import switch_main_program
import paddle.fluid as fluid
import paddlehub as hub

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--learning_rate", type=float, default=1e-4, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=5, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.05, help="Warmup proportion params for warmup strategy")
args = parser.parse_args()
# yapf: enable.

def bow_net(program, input_feature, hid_dim=128, hid_dim2=96):

    bow = fluid.layers.sequence_pool(input=input_feature, pool_type='sum')
    bow_tanh = fluid.layers.tanh(bow)
    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
    fc = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")

    return fc

def cnn_net(program, input_feature, win_size=3, hid_dim=128, hid_dim2=96):

    conv_3 = fluid.nets.sequence_conv_pool(
    fc = fluid.layers.fc(input=conv_3, size=hid_dim2)

    return fc

def gru_net(program, input_feature, hid_dim=128, hid_dim2=96):

    fc0 = fluid.layers.fc(input=input_feature, size=hid_dim * 3)
    gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
    gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
    gru_max_tanh = fluid.layers.tanh(gru_max)
    fc = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')

    return fc

def bilstm_net(program, input_feature, hid_dim=128, hid_dim2=96):

    fc0 = fluid.layers.fc(input=input_feature, size=hid_dim * 4)
    rfc0 = fluid.layers.fc(input=input_feature, size=hid_dim * 4)

    lstm_h, c = fluid.layers.dynamic_lstm(
        input=fc0, size=hid_dim * 4, is_reverse=False)
    rlstm_h, c = fluid.layers.dynamic_lstm(
        input=rfc0, size=hid_dim * 4, is_reverse=True)

    # extract last step
    lstm_last = fluid.layers.sequence_last_step(input=lstm_h)
    rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h)

    lstm_last_tanh = fluid.layers.tanh(lstm_last)
    rlstm_last_tanh = fluid.layers.tanh(rlstm_last)

    # concat layer
    lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1)
    # full connect layer
    fc = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh')

    return fc

def lstm_net(program, input_feature, hid_dim=128, hid_dim2=96):

    fc0 = fluid.layers.fc(input=input_feature, size=hid_dim * 4)
    lstm_h, c = fluid.layers.dynamic_lstm(
        input=fc0, size=hid_dim * 4, is_reverse=False)
    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
    lstm_max_tanh = fluid.layers.tanh(lstm_max)
    fc = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')

    return fc

if __name__ == '__main__':
    # Step1: load Paddlehub elmo pretrained model
    module = hub.Module(name="elmo.hub_module")
    inputs, outputs, program = module.context(trainable=True)

    # Step2: Download dataset and use TextClassificationReader to read dataset
    dataset = hub.dataset.ChnSentiCorp()

    reader = hub.reader.LACClassifyReader(
        dataset=dataset, vocab_path=module.get_vocab_path())
    word_dict_len = len(reader.vocab)

    word_ids = inputs["word_ids"]
    elmo_embedding = outputs["elmo_embed"]

    #Step3: switch program and build network
    #choose the net which you would like: bow, cnn, gru, bilstm, lstm

    # embedding layer
    word_embed_dims = 128
    word_embedding = fluid.layers.embedding(
        size=[word_dict_len, word_embed_dims],
            initializer=fluid.initializer.Uniform(low=-0.1, high=0.1)))
    # add elmo embedding
    input_feature = fluid.layers.concat(
        input=[elmo_embedding, word_embedding], axis=1)

    #choose the net which you would like: bow, cnn, gru, bilstm, lstm
    #we recommend you to choose the gru_net
    fc = gru_net(program, input_feature)

    # Define a classfication finetune task by PaddleHub's API
    elmo_task = hub.create_text_cls_task(
        feature=fc, num_classes=dataset.num_labels)

    # Setup feed list for data feeder
    # Must feed all the tensor of senta's module need
    feed_list = [inputs["word_ids"].name, elmo_task.variable("label").name]

    # Step4: Select finetune strategy, setup config and finetune
    strategy = hub.AdamWeightDecayStrategy(

    # Setup runing config for PaddleHub Finetune API
    config = hub.RunConfig(

    # Finetune and evaluate by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically
        task=elmo_task, data_reader=reader, feed_list=feed_list, config=config)