提交 767b41b3 编写于 作者: Z Zeyu Chen

add sentiment-classification example

上级 a00bc6a9
......@@ -16,4 +16,4 @@
- id: trailing-whitespace
- id: detect-private-key
- id: check-symlinks
- id: check-added-large-files
# - id: check-added-large-files
python test_create_module.py --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode train --model_path ./models
此差异已折叠。
python sentiment_classify.py --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode finetune --model_path ./models
import sys
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import paddle_hub as hub
def bow_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
Bow net
"""
# embedding layer
emb = fluid.layers.embedding(
input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
prediction = fluid.layers.fc(
input=[fc_2], size=class_dim, act="softmax", name="fc_softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, bow_tanh
def cnn_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
win_size=3):
"""
Conv net
"""
# embedding layer
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
# convolution layer
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=win_size,
act="tanh",
pool_type="max")
# full connect layer
fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2)
# softmax layer
prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, [conv_3]
def lstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
Lstm net
"""
# embedding layer
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
# Lstm layer
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
# max pooling layer
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
# full connect layer
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
# softmax layer
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, lstm_max_tanh
def bilstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
Bi-Lstm net
"""
# embedding layer
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
# bi-lstm layer
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
rfc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
rlstm_h, c = fluid.layers.dynamic_lstm(
input=rfc0, size=hid_dim * 4, is_reverse=True)
# extract last layer
lstm_last = fluid.layers.sequence_last_step(input=lstm_h)
rlstm_last = fluid.layers.sequence_last_step(input=rlstm_h)
lstm_last_tanh = fluid.layers.tanh(lstm_last)
rlstm_last_tanh = fluid.layers.tanh(rlstm_last)
# concat layer
lstm_concat = fluid.layers.concat(input=[lstm_last, rlstm_last], axis=1)
# full connect layer
fc1 = fluid.layers.fc(input=lstm_concat, size=hid_dim2, act='tanh')
# softmax layer
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, lstm_concat
def gru_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
gru net
"""
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3)
gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
gru_max_tanh = fluid.layers.tanh(gru_max)
fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, gru_max_tanh
# coding: utf-8
import sys
import os
import time
import unittest
import contextlib
import logging
import argparse
import ast
import paddle.fluid as fluid
import paddle_hub as hub
import utils
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import bilstm_net
from nets import gru_net
logger = logging.getLogger("paddle-fluid")
logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser("Sentiment Classification.")
# training data path
parser.add_argument(
"--train_data_path",
type=str,
required=False,
help="The path of trainning data. Should be given in train mode!")
# test data path
parser.add_argument(
"--test_data_path",
type=str,
required=False,
help="The path of test data. Should be given in eval or infer mode!")
# word_dict path
parser.add_argument(
"--word_dict_path",
type=str,
required=True,
help="The path of word dictionary.")
# current mode
parser.add_argument(
"--mode",
type=str,
required=True,
choices=['train', 'eval', 'infer', 'finetune'],
help="train/eval/infer mode")
# model type
parser.add_argument(
"--model_type", type=str, default="bow_net", help="type of model")
# model save path
parser.add_argument(
"--model_path",
type=str,
default="models",
required=True,
help="The path to saved the trained models.")
# Number of passes for the training task.
parser.add_argument(
"--num_passes",
type=int,
default=10,
help="Number of passes for the training task.")
# Batch size
parser.add_argument(
"--batch_size",
type=int,
default=256,
help="The number of training examples in one forward/backward pass.")
# lr value for training
parser.add_argument(
"--lr", type=float, default=0.002, help="The lr value for training.")
# Whether to use gpu
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to use gpu to train the model.")
# parallel train
parser.add_argument(
"--is_parallel",
type=ast.literal_eval,
default=False,
help="Whether to train the model in parallel.")
args = parser.parse_args()
return args
def train_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
cost, acc, pred, sent_emb = network(data, label, len(word_dict) + 2)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# create Senta module
module_dir = os.path.join(save_dirname, network_name)
signature = hub.create_signature(
"default", inputs=[data], outputs=[sent_emb])
hub.create_module(
sign_arr=signature,
program=fluid.default_main_program(),
path=module_dir)
def retrain_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
emb_dim = 128
hid_dim = 128
hid_dim2 = 96
class_dim = 2
dict_dim = len(word_dict) + 2
module_dir = os.path.join(save_dirname, network_name)
print("module_dir", module_dir)
module = hub.Module(module_dir=module_dir)
main_program = fluid.Program()
startup_program = fluid.Program()
# use switch program to test fine-tuning
fluid.framework.switch_main_program(module.get_inference_program())
# remove feed fetch operator and variable
hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = module.get_feed_var_by_index(0)
#TODO(ZeyuChen): how to get output paramter according to proto config
sent_emb = module.get_fetch_var_by_index(0)
fc_2 = fluid.layers.fc(
input=sent_emb, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
# print(fluid.default_main_program())
cost = fluid.layers.mean(
fluid.layers.cross_entropy(input=pred, label=label))
acc = fluid.layers.accuracy(input=pred, label=label)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name)
signature = hub.create_signature(
"default", inputs=[data], outputs=[sent_emb])
hub.create_module(
sign_arr=signature,
program=fluid.default_main_program(),
path=module_dir)
def eval_net(test_reader, use_gpu, model_path=None):
"""
Evaluation function
"""
if model_path is None:
print(str(model_path) + "can not be found")
return
# set place, executor
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# load the saved model
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
# compute 2class and 3class accuracy
class2_acc, class3_acc = 0.0, 0.0
total_count, neu_count = 0, 0
for data in test_reader():
# infer a batch
pred = exe.run(
inference_program,
feed=utils.data2tensor(data, place),
fetch_list=fetch_targets,
return_numpy=True)
for i, val in enumerate(data):
class3_label, class2_label = utils.get_predict_label(
pred[0][i, 1])
true_label = val[1]
if class2_label == true_label:
class2_acc += 1
if class3_label == true_label:
class3_acc += 1
if true_label == 1.0:
neu_count += 1
total_count += len(data)
class2_acc = class2_acc / (total_count - neu_count)
class3_acc = class3_acc / total_count
print("[test info] model_path: %s, class2_acc: %f, class3_acc: %f" %
(model_path, class2_acc, class3_acc))
def infer_net(test_reader, use_gpu, model_path=None):
"""
Inference function
"""
if model_path is None:
print(str(model_path) + "can not be found")
return
# set place, executor
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# load the saved model
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
for data in test_reader():
# infer a batch
pred = exe.run(
inference_program,
feed=utils.data2tensor(data, place),
fetch_list=fetch_targets,
return_numpy=True)
for i, val in enumerate(data):
class3_label, class2_label = utils.get_predict_label(
pred[0][i, 1])
pos_prob = pred[0][i, 1]
neg_prob = 1 - pos_prob
print("predict label: %d, pos_prob: %f, neg_prob: %f" %
(class3_label, pos_prob, neg_prob))
def main(args):
# train mode
if args.mode == "train":
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(args.train_data_path,
args.word_dict_path,
args.batch_size, args.mode)
train_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
# train mode
if args.mode == "finetune":
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(args.train_data_path,
args.word_dict_path,
args.batch_size, args.mode)
retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
# eval mode
elif args.mode == "eval":
# prepare_data to get word_dict, test_reader
word_dict, test_reader = utils.prepare_data(args.test_data_path,
args.word_dict_path,
args.batch_size, args.mode)
eval_net(test_reader, args.use_gpu, args.model_path)
# infer mode
elif args.mode == "infer":
# prepare_data to get word_dict, test_reader
word_dict, test_reader = utils.prepare_data(args.test_data_path,
args.word_dict_path,
args.batch_size, args.mode)
infer_net(test_reader, args.use_gpu, args.model_path)
if __name__ == "__main__":
args = parse_args()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding: utf-8
import sys
import os
import time
import unittest
import contextlib
import logging
import argparse
import ast
import utils
import paddle.fluid as fluid
import paddle_hub as hub
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import bilstm_net
from nets import gru_net
logger = logging.getLogger("paddle-fluid")
logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser("Sentiment Classification.")
# training data path
parser.add_argument(
"--train_data_path",
type=str,
required=False,
help="The path of trainning data. Should be given in train mode!")
# test data path
parser.add_argument(
"--test_data_path",
type=str,
required=False,
help="The path of test data. Should be given in eval or infer mode!")
# word_dict path
parser.add_argument(
"--word_dict_path",
type=str,
required=True,
help="The path of word dictionary.")
# current mode
parser.add_argument(
"--mode",
type=str,
required=True,
choices=['train', 'eval', 'infer'],
help="train/eval/infer mode")
# model type
parser.add_argument(
"--model_type", type=str, default="bow_net", help="type of model")
# model save path parser.add_argument(
parser.add_argument(
"--model_path",
type=str,
default="models",
required=True,
help="The path to saved the trained models.")
# Number of passes for the training task.
parser.add_argument(
"--num_passes",
type=int,
default=3,
help="Number of passes for the training task.")
# Batch size
parser.add_argument(
"--batch_size",
type=int,
default=256,
help="The number of training examples in one forward/backward pass.")
# lr value for training
parser.add_argument(
"--lr", type=float, default=0.002, help="The lr value for training.")
# Whether to use gpu
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to use gpu to train the model.")
# parallel train
parser.add_argument(
"--is_parallel",
type=ast.literal_eval,
default=False,
help="Whether to train the model in parallel.")
args = parser.parse_args()
return args
def bow_net_module(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
Bow net
"""
module_dir = "./model/test_create_module"
# embedding layer
emb = fluid.layers.embedding(
input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
prediction = fluid.layers.fc(
input=[fc_2], size=class_dim, act="softmax", name="fc_softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction, emb
def train_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=10):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
cost, acc, pred, emb = network(data, label, len(word_dict) + 2)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name)
config = hub.ModuleConfig(module_dir)
config.save_dict(word_dict=word_dict)
# saving config
input_desc = {"words": data.name}
output_desc = {"emb": emb.name}
config.register_feed_signature(input_desc)
config.register_fetch_signature(output_desc)
config.dump()
feed_var_name = config.feed_var_name("words")
fluid.io.save_inference_model(module_dir, [feed_var_name], emb, exe)
def retrain_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
dict_dim = len(word_dict) + 2
emb_dim = 128
hid_dim = 128
hid_dim2 = 96
class_dim = 2
module_path = "./models/bow_net"
module = hub.Module(module_dir=module_path)
main_program = fluid.Program()
startup_program = fluid.Program()
# use switch program to test fine-tuning
fluid.framework.switch_main_program(module.get_inference_program())
# remove feed fetch operator and variable
hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
data = module.get_feed_var("words")
emb = module.get_fetch_var("emb")
# bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.mean(
fluid.layers.cross_entropy(input=pred, label=label))
acc = fluid.layers.accuracy(input=pred, label=label)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name + "_retrain")
fluid.io.save_inference_model(module_dir, ["words"], emb, exe)
config = hub.ModuleConfig(module_dir)
config.save_dict(word_dict=word_dict)
config.dump()
def main(args):
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(
args.train_data_path, args.word_dict_path, args.batch_size, args.mode)
train_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
# NOTE(ZeyuChen): can't run train_net and retrain_net together
# retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
# args.is_parallel, args.model_path, args.lr, args.batch_size,
# args.num_passes)
if __name__ == "__main__":
args = parse_args()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# coding: utf-8
import sys
import os
import time
import unittest
import contextlib
import logging
import argparse
import ast
import utils
import paddle.fluid as fluid
import paddle_hub as hub
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import bilstm_net
from nets import gru_net
logger = logging.getLogger("paddle-fluid")
logger.setLevel(logging.INFO)
def parse_args():
parser = argparse.ArgumentParser("Sentiment Classification.")
# training data path
parser.add_argument(
"--train_data_path",
type=str,
required=False,
help="The path of trainning data. Should be given in train mode!")
# test data path
parser.add_argument(
"--test_data_path",
type=str,
required=False,
help="The path of test data. Should be given in eval or infer mode!")
# word_dict path
parser.add_argument(
"--word_dict_path",
type=str,
required=True,
help="The path of word dictionary.")
# current mode
parser.add_argument(
"--mode",
type=str,
required=True,
choices=['train', 'eval', 'infer'],
help="train/eval/infer mode")
# model type
parser.add_argument(
"--model_type", type=str, default="bow_net", help="type of model")
# model save path
parser.add_argument(
"--model_path",
type=str,
default="models",
required=True,
help="The path to saved the trained models.")
# Number of passes for the training task.
parser.add_argument(
"--num_passes",
type=int,
default=10,
help="Number of passes for the training task.")
# Batch size
parser.add_argument(
"--batch_size",
type=int,
default=256,
help="The number of training examples in one forward/backward pass.")
# lr value for training
parser.add_argument(
"--lr", type=float, default=0.002, help="The lr value for training.")
# Whether to use gpu
parser.add_argument(
"--use_gpu",
type=ast.literal_eval,
default=False,
help="Whether to use gpu to train the model.")
# parallel train
parser.add_argument(
"--is_parallel",
type=ast.literal_eval,
default=False,
help="Whether to train the model in parallel.")
args = parser.parse_args()
return args
def retrain_net(train_reader,
word_dict,
network_name,
use_gpu,
parallel,
save_dirname,
lr=0.002,
batch_size=128,
pass_num=30):
"""
train network
"""
if network_name == "bilstm_net":
network = bilstm_net
elif network_name == "bow_net":
network = bow_net
elif network_name == "cnn_net":
network = cnn_net
elif network_name == "lstm_net":
network = lstm_net
elif network_name == "gru_net":
network = gru_net
else:
print("unknown network type")
return
dict_dim = len(word_dict) + 2
emb_dim = 128
hid_dim = 128
hid_dim2 = 96
class_dim = 2
module_path = "./models/bow_net"
module = hub.Module(module_dir=module_path)
main_program = fluid.Program()
startup_program = fluid.Program()
# use switch program to test fine-tuning
fluid.framework.switch_main_program(module.get_inference_program())
# remove feed fetch operator and variable
hub.ModuleUtils.remove_feed_fetch_op(fluid.default_main_program())
# remove_feed_fetch_op(fluid.default_main_program())
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
#data = fluid.default_main_program().global_block().var("words")
data = module.get_feed_var("words")
#TODO(ZeyuChen): how to get output paramter according to proto config
emb = module.get_fetch_var("emb")
# # # embedding layer
# emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
# #input=data, size=[dict_dim, emb_dim], param_attr="bow_embedding")
# # bow layer
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
# full connect layer
fc_1 = fluid.layers.fc(
input=bow_tanh, size=hid_dim, act="tanh", name="bow_fc1")
fc_2 = fluid.layers.fc(
input=fc_1, size=hid_dim2, act="tanh", name="bow_fc2")
# softmax layer
pred = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
# print(fluid.default_main_program())
cost = fluid.layers.mean(
fluid.layers.cross_entropy(input=pred, label=label))
acc = fluid.layers.accuracy(input=pred, label=label)
# set optimizer
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
# set place, executor, datafeeder
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=["words", "label"], place=place)
exe.run(fluid.default_startup_program())
# start training...
for pass_id in range(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for batch in train_reader():
avg_cost_np, avg_acc_np = exe.run(
fluid.default_main_program(),
feed=feeder.feed(batch),
fetch_list=[cost, acc],
return_numpy=True)
data_size = len(batch)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
# save the model
module_dir = os.path.join(save_dirname, network_name + "_retrain")
fluid.io.save_inference_model(module_dir, ["words"], emb, exe)
input_desc = {"words": data.name}
output_desc = {"emb": emb.name}
config = hub.ModuleConfig(module_dir)
config.save_dict(word_dict=word_dict)
config.dump()
def main(args):
# prepare_data to get word_dict, train_reader
word_dict, train_reader = utils.prepare_data(
args.train_data_path, args.word_dict_path, args.batch_size, args.mode)
retrain_net(train_reader, word_dict, args.model_type, args.use_gpu,
args.is_parallel, args.model_path, args.lr, args.batch_size,
args.num_passes)
if __name__ == "__main__":
args = parse_args()
main(args)
python sentiment_classify.py --train_data_path ./data/train_data/corpus.train --word_dict_path ./data/train.vocab --mode train --model_path ./models
import os
import sys
import time
import numpy as np
import random
import paddle.fluid as fluid
import paddle
def get_predict_label(pos_prob):
neg_prob = 1 - pos_prob
# threshold should be (1, 0.5)
neu_threshold = 0.55
if neg_prob > neu_threshold:
class3_label = 0
elif pos_prob > neu_threshold:
class3_label = 2
else:
class3_label = 1
if pos_prob >= neg_prob:
class2_label = 2
else:
class2_label = 0
return class3_label, class2_label
def to_lodtensor(data, place):
"""
convert ot LODtensor
"""
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def data2tensor(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
return {"words": input_seq}
def data_reader(file_path, word_dict, is_shuffle=True):
"""
Convert word sequence into slot
"""
unk_id = len(word_dict)
all_data = []
with open(file_path, "r") as fin:
for line in fin:
cols = line.strip().split("\t")
label = int(cols[0])
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[1].split(" ")
]
all_data.append((wids, label))
if is_shuffle:
random.shuffle(all_data)
def reader():
for doc, label in all_data:
yield doc, label
return reader
def load_vocab(file_path):
"""
load the given vocabulary
"""
vocab = {}
with open(file_path) as f:
wid = 0
for line in f:
vocab[line.strip()] = wid
wid += 1
vocab["<unk>"] = len(vocab)
return vocab
def prepare_data(data_path, word_dict_path, batch_size, mode):
"""
prepare data
"""
assert os.path.exists(
word_dict_path), "The given word dictionary dose not exist."
if mode == "train":
assert os.path.exists(
data_path), "The given training data does not exist."
if mode == "eval" or mode == "infer":
assert os.path.exists(data_path), "The given test data does not exist."
word_dict = load_vocab(word_dict_path)
if mode == "train":
train_reader = paddle.batch(
data_reader(data_path, word_dict, True), batch_size)
return word_dict, train_reader
else:
test_reader = paddle.batch(
data_reader(data_path, word_dict, False), batch_size)
return word_dict, test_reader
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册