提交 e693a685 编写于 作者: J JesseyXujin 提交者: pkpk

add bow net (#3894)

上级 5fee7182
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
| 模型 | dev | | 模型 | dev |
| :------| :------ | | :------| :------ |
| CNN | 90.6% | | CNN | 90.6% |
| BOW | 90.1% |
动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html) 动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html)
......
...@@ -58,6 +58,7 @@ run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation.") ...@@ -58,6 +58,7 @@ run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation.")
run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.") run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.")
run_type_g.add_arg("profile_steps", int, 15000, run_type_g.add_arg("profile_steps", int, 15000,
"The steps interval to record the performance.") "The steps interval to record the performance.")
train_g.add_arg("model_type", str, "bow_net", "Model type of training.")
parser.add_argument("--ce", action="store_true", help="run ce") parser.add_argument("--ce", action="store_true", help="run ce")
args = parser.parse_args() args = parser.parse_args()
...@@ -87,7 +88,8 @@ if args.ce: ...@@ -87,7 +88,8 @@ if args.ce:
seed = 90 seed = 90
np.random.seed(seed) np.random.seed(seed)
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
def train(): def train():
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
...@@ -96,7 +98,7 @@ def train(): ...@@ -96,7 +98,7 @@ def train():
seed = 90 seed = 90
np.random.seed(seed) np.random.seed(seed)
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
processor = reader.SentaProcessor( processor = reader.SentaProcessor(
data_dir=args.data_dir, data_dir=args.data_dir,
vocab_path=args.vocab_path, vocab_path=args.vocab_path,
...@@ -106,7 +108,7 @@ def train(): ...@@ -106,7 +108,7 @@ def train():
num_train_examples = processor.get_num_examples(phase="train") num_train_examples = processor.get_num_examples(phase="train")
max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
if not args.ce: if not args.ce:
train_data_generator = processor.data_generator( train_data_generator = processor.data_generator(
batch_size=args.batch_size, batch_size=args.batch_size,
...@@ -131,9 +133,12 @@ def train(): ...@@ -131,9 +133,12 @@ def train():
phase='dev', phase='dev',
epoch=args.epoch, epoch=args.epoch,
shuffle=False) shuffle=False)
cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size, if args.model_type == 'cnn_net':
args.padding_size) model = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
steps = 0 steps = 0
total_cost, total_acc, total_num_seqs = [], [], [] total_cost, total_acc, total_num_seqs = [], [], []
...@@ -160,13 +165,13 @@ def train(): ...@@ -160,13 +165,13 @@ def train():
np.array([x[1] for x in data]).astype('int64').reshape( np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1)) args.batch_size, 1))
cnn_net.train() model.train()
avg_cost, prediction, acc = cnn_net(doc, label) avg_cost, prediction, acc = model(doc, label)
avg_cost.backward() avg_cost.backward()
np_mask = (doc.numpy() != args.vocab_size).astype('int32') np_mask = (doc.numpy() != args.vocab_size).astype('int32')
word_num = np.sum(np_mask) word_num = np.sum(np_mask)
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
cnn_net.clear_gradients() model.clear_gradients()
total_cost.append(avg_cost.numpy() * word_num) total_cost.append(avg_cost.numpy() * word_num)
total_acc.append(acc.numpy() * word_num) total_acc.append(acc.numpy() * word_num)
total_num_seqs.append(word_num) total_num_seqs.append(word_num)
...@@ -185,7 +190,7 @@ def train(): ...@@ -185,7 +190,7 @@ def train():
if steps % args.validation_steps == 0: if steps % args.validation_steps == 0:
total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], [] total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
cnn_net.eval() model.eval()
eval_steps = 0 eval_steps = 0
for eval_batch_id, eval_data in enumerate( for eval_batch_id, eval_data in enumerate(
eval_data_generator()): eval_data_generator()):
...@@ -201,7 +206,7 @@ def train(): ...@@ -201,7 +206,7 @@ def train():
np.array([x[1] for x in eval_data]).astype( np.array([x[1] for x in eval_data]).astype(
'int64').reshape(args.batch_size, 1)) 'int64').reshape(args.batch_size, 1))
eval_doc = to_variable(eval_np_doc.reshape(-1, 1)) eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
eval_avg_cost, eval_prediction, eval_acc = cnn_net( eval_avg_cost, eval_prediction, eval_acc = model(
eval_doc, eval_label) eval_doc, eval_label)
eval_np_mask = ( eval_np_mask = (
...@@ -226,17 +231,21 @@ def train(): ...@@ -226,17 +231,21 @@ def train():
eval_steps / used_time)) eval_steps / used_time))
time_begin = time.time() time_begin = time.time()
if args.ce: if args.ce:
print("kpis\ttrain_loss\t%0.3f" % (np.sum(total_eval_cost) / np.sum(total_eval_num_seqs))) print("kpis\ttrain_loss\t%0.3f" %
print("kpis\ttrain_acc\t%0.3f" % (np.sum(total_eval_acc) / np.sum(total_eval_num_seqs))) (np.sum(total_eval_cost) /
np.sum(total_eval_num_seqs)))
print("kpis\ttrain_acc\t%0.3f" %
(np.sum(total_eval_acc) /
np.sum(total_eval_num_seqs)))
if steps % args.save_steps == 0: if steps % args.save_steps == 0:
save_path = "save_dir_" + str(steps) save_path = "save_dir_" + str(steps)
print('save model to: ' + save_path) print('save model to: ' + save_path)
fluid.save_dygraph(cnn_net.state_dict(), fluid.dygraph.save_dygraph(model.state_dict(),
save_path) save_path)
if enable_profile: if enable_profile:
print('save profile result into /tmp/profile_file') print('save profile result into /tmp/profile_file')
return return
def infer(): def infer():
...@@ -251,10 +260,12 @@ def infer(): ...@@ -251,10 +260,12 @@ def infer():
phase='infer', phase='infer',
epoch=args.epoch, epoch=args.epoch,
shuffle=False) shuffle=False)
if args.model_type == 'cnn_net':
cnn_net_infer = nets.CNN("cnn_net", args.vocab_size, args.batch_size, model_infer = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
args.padding_size) args.padding_size)
elif args.model_type == 'bow_net':
model_infer = nets.BOW("bow_net", args.vocab_size, args.batch_size,
args.padding_size)
print('Do inferring ...... ') print('Do inferring ...... ')
total_acc, total_num_seqs = [], [] total_acc, total_num_seqs = [], []
...@@ -277,7 +288,7 @@ def infer(): ...@@ -277,7 +288,7 @@ def infer():
np.array([x[1] for x in data]).astype('int64').reshape( np.array([x[1] for x in data]).astype('int64').reshape(
args.batch_size, 1)) args.batch_size, 1))
_, _, acc = cnn_net_infer(doc, label) _, _, acc = model_infer(doc, label)
mask = (np_doc != args.vocab_size).astype('int32') mask = (np_doc != args.vocab_size).astype('int32')
word_num = np.sum(mask) word_num = np.sum(mask)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, Embedding
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import numpy as np
class SimpleConvPool(fluid.dygraph.Layer): class SimpleConvPool(fluid.dygraph.Layer):
...@@ -87,3 +88,47 @@ class CNN(fluid.dygraph.Layer): ...@@ -87,3 +88,47 @@ class CNN(fluid.dygraph.Layer):
return avg_cost, prediction, acc return avg_cost, prediction, acc
else: else:
return prediction return prediction
class BOW(fluid.dygraph.Layer):
def __init__(self, name_scope, dict_dim, batch_size, seq_len):
super(BOW, self).__init__(name_scope)
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
self.full_name(),
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
is_sparse=False)
self._fc1 = FC(self.full_name(), size=self.fc_hid_dim, act="tanh")
self._fc2 = FC(self.full_name(), size=self.class_dim, act="tanh")
self._fc_prediction = FC(self.full_name(),
size=self.class_dim,
act="softmax")
def forward(self, inputs, label=None):
emb = self.embedding(inputs)
o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32')
mask_emb = fluid.layers.expand(
to_variable(o_np_mask), [1, self.hid_dim])
emb = emb * mask_emb
emb = fluid.layers.reshape(
emb, shape=[-1, 1, self.seq_len, self.hid_dim])
bow_1 = fluid.layers.reduce_sum(emb, dim=1)
bow_1 = fluid.layers.tanh(bow_1)
fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1)
prediction = self._fc_prediction(fc_2)
if label:
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, prediction, acc
else:
return prediction
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册