提交 1d1696c2 编写于 作者: W wangxiao1021

Merge branch 'master' of https://github.com/PaddlePaddle/hapi

......@@ -16,14 +16,60 @@
import paddle.fluid as fluid
from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
from cls import ClsModelLayer
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
def train():
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml")
config.build()
......@@ -35,8 +81,6 @@ def train():
bert_config = BertConfig(config.bert_config_path)
bert_config.print_config()
trainer_count = fluid.dygraph.parallel.Env().nranks
tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
......@@ -52,14 +96,24 @@ def train():
return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label)
bert_dataloader = BertDataLoader(
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=64,
batch_size=32,
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor)
num_train_examples = len(bert_dataloader.dataset)
dev_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
......@@ -82,7 +136,6 @@ def train():
config,
bert_config,
len(["contradiction", "entailment", "neutral"]),
is_training=True,
return_pooled_out=True)
optimizer = Optimizer(
......@@ -106,10 +159,15 @@ def train():
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
return cls_model
# do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__':
cls_model = train()
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from hapi.text.bert import BertEncoder
from hapi.model import Model
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
is_training=True,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.is_training = is_training
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
logits = self.cls_fc(cls_feats)
return logits
......@@ -18,7 +18,7 @@ batch_size: 32
in_tokens: False
do_lower_case: True
random_seed: 5512
use_cuda: False
use_cuda: True
shuffle: True
do_train: True
do_test: True
......
......@@ -16,14 +16,60 @@
import paddle.fluid as fluid
from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
from cls import ClsModelLayer
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
def train():
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml")
config.build()
......@@ -35,8 +81,6 @@ def train():
bert_config = BertConfig(config.bert_config_path)
bert_config.print_config()
trainer_count = fluid.dygraph.parallel.Env().nranks
tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
......@@ -52,15 +96,26 @@ def train():
return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label)
bert_dataloader = BertDataLoader(
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=64,
batch_size=32,
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
mode="leveldb")
mode="leveldb",
phase="train")
num_train_examples = len(bert_dataloader.dataset)
dev_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
......@@ -83,7 +138,6 @@ def train():
config,
bert_config,
len(["contradiction", "entailment", "neutral"]),
is_training=True,
return_pooled_out=True)
optimizer = Optimizer(
......@@ -107,10 +161,15 @@ def train():
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
return cls_model
# do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__':
cls_model = train()
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from hapi.text.bert import BertEncoder
from hapi.model import Model
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
is_training=True,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.is_training = is_training
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
logits = self.cls_fc(cls_feats)
return logits
grep: warning: GREP_OPTIONS is deprecated; please use an alias or script
2020-04-13 13:08:30,568-WARNING: use_shared_memory can only be used in multi-process mode(num_workers > 0), set use_shared_memory as False
W0413 13:08:31.584532 119379 device_context.cc:237] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 10.1, Runtime API Version: 9.0
W0413 13:08:31.589192 119379 device_context.cc:245] device: 0, cuDNN Version: 7.5.
----------------------------------------------------------------------
bert_config_path: ./data/pretrained_models/uncased_L-12_H-768_A-12//bert_config.json
init_checkpoint: None
init_pretraining_params: ./data/pretrained_models/uncased_L-12_H-768_A-12//dygraph_params/
checkpoints: ./data/saved_model/mnli_models
epoch: 3
learning_rate: 5e-05
lr_scheduler: linear_warmup_decay
weight_decay: 0.01
warmup_proportion: 0.1
save_steps: 1000
validation_steps: 100
loss_scaling: 1.0
skip_steps: 10
data_dir: ./data/glue_data/MNLI/
vocab_path: ./data/pretrained_models/uncased_L-12_H-768_A-12//vocab.txt
max_seq_len: 128
batch_size: 64
in_tokens: False
do_lower_case: True
random_seed: 5512
use_cuda: True
shuffle: True
do_train: True
do_test: True
use_data_parallel: False
verbose: False
----------------------------------------------------------------------
attention_probs_dropout_prob: 0.1
hidden_act: gelu
hidden_dropout_prob: 0.1
hidden_size: 768
initializer_range: 0.02
intermediate_size: 3072
max_position_embeddings: 512
num_attention_heads: 12
num_hidden_layers: 12
type_vocab_size: 2
vocab_size: 30522
------------------------------------------------
Trainer count: 1
Num train examples: 392703
Max train steps: 18407
Num warmup steps: 1840
Epoch 1/3
step 10/12272 - loss: 1.1000 - acc_top1: 0.3531 - acc_top2: 0.6813 - 1s/step
step 20/12272 - loss: 1.1878 - acc_top1: 0.3578 - acc_top2: 0.6875 - 1s/step
step 30/12272 - loss: 1.0812 - acc_top1: 0.3708 - acc_top2: 0.6948 - 1s/step
step 40/12272 - loss: 1.1244 - acc_top1: 0.3773 - acc_top2: 0.6992 - 1s/step
step 50/12272 - loss: 1.1202 - acc_top1: 0.3756 - acc_top2: 0.7006 - 1s/step
step 60/12272 - loss: 1.1291 - acc_top1: 0.3703 - acc_top2: 0.6990 - 1s/step
step 70/12272 - loss: 1.0991 - acc_top1: 0.3634 - acc_top2: 0.6946 - 1s/step
step 80/12272 - loss: 1.0988 - acc_top1: 0.3602 - acc_top2: 0.6914 - 1s/step
step 90/12272 - loss: 1.0718 - acc_top1: 0.3646 - acc_top2: 0.6889 - 1s/step
step 100/12272 - loss: 1.0949 - acc_top1: 0.3638 - acc_top2: 0.6878 - 1s/step
step 110/12272 - loss: 1.1120 - acc_top1: 0.3608 - acc_top2: 0.6895 - 1s/step
step 120/12272 - loss: 1.1105 - acc_top1: 0.3622 - acc_top2: 0.6922 - 1s/step
step 130/12272 - loss: 1.0958 - acc_top1: 0.3623 - acc_top2: 0.6940 - 1s/step
step 140/12272 - loss: 1.0995 - acc_top1: 0.3636 - acc_top2: 0.6926 - 1s/step
step 150/12272 - loss: 1.1272 - acc_top1: 0.3671 - acc_top2: 0.6950 - 1s/step
step 160/12272 - loss: 1.0850 - acc_top1: 0.3697 - acc_top2: 0.6975 - 1s/step
step 170/12272 - loss: 1.0607 - acc_top1: 0.3691 - acc_top2: 0.6991 - 1s/step
step 180/12272 - loss: 1.0623 - acc_top1: 0.3707 - acc_top2: 0.6991 - 1s/step
step 190/12272 - loss: 1.1092 - acc_top1: 0.3697 - acc_top2: 0.6997 - 1s/step
step 200/12272 - loss: 1.1046 - acc_top1: 0.3713 - acc_top2: 0.7030 - 1s/step
step 210/12272 - loss: 1.0945 - acc_top1: 0.3720 - acc_top2: 0.7043 - 1s/step
step 220/12272 - loss: 1.0935 - acc_top1: 0.3719 - acc_top2: 0.7051 - 1s/step
step 230/12272 - loss: 1.1567 - acc_top1: 0.3742 - acc_top2: 0.7048 - 1s/step
step 240/12272 - loss: 1.0745 - acc_top1: 0.3766 - acc_top2: 0.7081 - 1s/step
step 250/12272 - loss: 1.0664 - acc_top1: 0.3756 - acc_top2: 0.7090 - 1s/step
step 260/12272 - loss: 1.0770 - acc_top1: 0.3751 - acc_top2: 0.7085 - 1s/step
step 270/12272 - loss: 1.1008 - acc_top1: 0.3730 - acc_top2: 0.7088 - 1s/step
step 280/12272 - loss: 1.0850 - acc_top1: 0.3737 - acc_top2: 0.7098 - 1s/step
step 290/12272 - loss: 1.0759 - acc_top1: 0.3747 - acc_top2: 0.7100 - 1s/step
step 300/12272 - loss: 1.0352 - acc_top1: 0.3758 - acc_top2: 0.7108 - 1s/step
step 310/12272 - loss: 1.0224 - acc_top1: 0.3786 - acc_top2: 0.7127 - 1s/step
step 320/12272 - loss: 1.0919 - acc_top1: 0.3800 - acc_top2: 0.7137 - 1s/step
step 330/12272 - loss: 1.0884 - acc_top1: 0.3825 - acc_top2: 0.7145 - 1s/step
step 340/12272 - loss: 1.1380 - acc_top1: 0.3849 - acc_top2: 0.7157 - 1s/step
step 350/12272 - loss: 0.9523 - acc_top1: 0.3890 - acc_top2: 0.7176 - 1s/step
step 360/12272 - loss: 0.9963 - acc_top1: 0.3922 - acc_top2: 0.7191 - 1s/step
step 370/12272 - loss: 1.1187 - acc_top1: 0.3955 - acc_top2: 0.7205 - 1s/step
step 380/12272 - loss: 0.9634 - acc_top1: 0.3988 - acc_top2: 0.7229 - 1s/step
step 390/12272 - loss: 0.9944 - acc_top1: 0.4017 - acc_top2: 0.7254 - 1s/step
step 400/12272 - loss: 1.1071 - acc_top1: 0.4044 - acc_top2: 0.7272 - 1s/step
step 410/12272 - loss: 0.9307 - acc_top1: 0.4070 - acc_top2: 0.7293 - 1s/step
step 420/12272 - loss: 1.1307 - acc_top1: 0.4087 - acc_top2: 0.7315 - 1s/step
step 430/12272 - loss: 0.9936 - acc_top1: 0.4110 - acc_top2: 0.7334 - 1s/step
step 440/12272 - loss: 0.9791 - acc_top1: 0.4139 - acc_top2: 0.7357 - 1s/step
step 450/12272 - loss: 1.0112 - acc_top1: 0.4147 - acc_top2: 0.7372 - 1s/step
step 460/12272 - loss: 0.8554 - acc_top1: 0.4179 - acc_top2: 0.7395 - 1s/step
step 470/12272 - loss: 0.9411 - acc_top1: 0.4198 - acc_top2: 0.7406 - 1s/step
step 480/12272 - loss: 0.8481 - acc_top1: 0.4231 - acc_top2: 0.7424 - 1s/step
step 490/12272 - loss: 1.0338 - acc_top1: 0.4261 - acc_top2: 0.7441 - 1s/step
step 500/12272 - loss: 0.9651 - acc_top1: 0.4281 - acc_top2: 0.7459 - 1s/step
step 510/12272 - loss: 0.8091 - acc_top1: 0.4306 - acc_top2: 0.7479 - 1s/step
step 520/12272 - loss: 1.0528 - acc_top1: 0.4325 - acc_top2: 0.7489 - 1s/step
step 530/12272 - loss: 0.9898 - acc_top1: 0.4338 - acc_top2: 0.7500 - 1s/step
step 540/12272 - loss: 0.7900 - acc_top1: 0.4364 - acc_top2: 0.7519 - 1s/step
step 550/12272 - loss: 0.9055 - acc_top1: 0.4389 - acc_top2: 0.7534 - 1s/step
step 560/12272 - loss: 1.0092 - acc_top1: 0.4410 - acc_top2: 0.7549 - 1s/step
step 570/12272 - loss: 0.7068 - acc_top1: 0.4441 - acc_top2: 0.7570 - 1s/step
step 580/12272 - loss: 0.9695 - acc_top1: 0.4455 - acc_top2: 0.7581 - 1s/step
step 590/12272 - loss: 0.8640 - acc_top1: 0.4487 - acc_top2: 0.7600 - 1s/step
step 600/12272 - loss: 0.9068 - acc_top1: 0.4514 - acc_top2: 0.7618 - 1s/step
step 610/12272 - loss: 0.9023 - acc_top1: 0.4524 - acc_top2: 0.7627 - 1s/step
step 620/12272 - loss: 0.7377 - acc_top1: 0.4552 - acc_top2: 0.7640 - 1s/step
step 630/12272 - loss: 0.8900 - acc_top1: 0.4574 - acc_top2: 0.7659 - 1s/step
step 640/12272 - loss: 0.8902 - acc_top1: 0.4590 - acc_top2: 0.7669 - 1s/step
step 650/12272 - loss: 0.9069 - acc_top1: 0.4608 - acc_top2: 0.7686 - 1s/step
step 660/12272 - loss: 0.9630 - acc_top1: 0.4631 - acc_top2: 0.7699 - 1s/step
step 670/12272 - loss: 0.9005 - acc_top1: 0.4652 - acc_top2: 0.7712 - 1s/step
step 680/12272 - loss: 1.0725 - acc_top1: 0.4670 - acc_top2: 0.7725 - 1s/step
step 690/12272 - loss: 0.8322 - acc_top1: 0.4689 - acc_top2: 0.7739 - 1s/step
step 700/12272 - loss: 0.9874 - acc_top1: 0.4714 - acc_top2: 0.7753 - 1s/step
step 710/12272 - loss: 0.7915 - acc_top1: 0.4728 - acc_top2: 0.7765 - 1s/step
step 720/12272 - loss: 0.7174 - acc_top1: 0.4746 - acc_top2: 0.7777 - 1s/step
step 730/12272 - loss: 0.7635 - acc_top1: 0.4770 - acc_top2: 0.7793 - 1s/step
step 740/12272 - loss: 0.9180 - acc_top1: 0.4793 - acc_top2: 0.7804 - 1s/step
step 750/12272 - loss: 0.8424 - acc_top1: 0.4817 - acc_top2: 0.7815 - 1s/step
step 760/12272 - loss: 0.9357 - acc_top1: 0.4837 - acc_top2: 0.7829 - 1s/step
step 770/12272 - loss: 0.7643 - acc_top1: 0.4858 - acc_top2: 0.7839 - 1s/step
step 780/12272 - loss: 0.8910 - acc_top1: 0.4868 - acc_top2: 0.7849 - 1s/step
step 790/12272 - loss: 0.8781 - acc_top1: 0.4888 - acc_top2: 0.7862 - 1s/step
step 800/12272 - loss: 0.8005 - acc_top1: 0.4907 - acc_top2: 0.7877 - 1s/step
step 810/12272 - loss: 0.6740 - acc_top1: 0.4929 - acc_top2: 0.7889 - 1s/step
step 820/12272 - loss: 0.7026 - acc_top1: 0.4947 - acc_top2: 0.7898 - 1s/step
step 830/12272 - loss: 0.8666 - acc_top1: 0.4964 - acc_top2: 0.7908 - 1s/step
step 840/12272 - loss: 0.6296 - acc_top1: 0.4983 - acc_top2: 0.7920 - 1s/step
step 850/12272 - loss: 0.7907 - acc_top1: 0.4992 - acc_top2: 0.7930 - 1s/step
step 860/12272 - loss: 0.7292 - acc_top1: 0.5007 - acc_top2: 0.7935 - 1s/step
step 870/12272 - loss: 0.7498 - acc_top1: 0.5026 - acc_top2: 0.7944 - 1s/step
step 880/12272 - loss: 0.9928 - acc_top1: 0.5040 - acc_top2: 0.7953 - 1s/step
step 890/12272 - loss: 1.0025 - acc_top1: 0.5056 - acc_top2: 0.7962 - 1s/step
step 900/12272 - loss: 0.7810 - acc_top1: 0.5071 - acc_top2: 0.7969 - 1s/step
step 910/12272 - loss: 0.6114 - acc_top1: 0.5090 - acc_top2: 0.7978 - 1s/step
step 920/12272 - loss: 0.7780 - acc_top1: 0.5105 - acc_top2: 0.7988 - 1s/step
step 930/12272 - loss: 0.9457 - acc_top1: 0.5116 - acc_top2: 0.7995 - 1s/step
step 940/12272 - loss: 0.7907 - acc_top1: 0.5135 - acc_top2: 0.8006 - 1s/step
step 950/12272 - loss: 0.5520 - acc_top1: 0.5153 - acc_top2: 0.8013 - 1s/step
step 960/12272 - loss: 0.8251 - acc_top1: 0.5168 - acc_top2: 0.8022 - 1s/step
step 970/12272 - loss: 0.8482 - acc_top1: 0.5179 - acc_top2: 0.8031 - 1s/step
step 980/12272 - loss: 0.8010 - acc_top1: 0.5196 - acc_top2: 0.8038 - 1s/step
step 990/12272 - loss: 0.8326 - acc_top1: 0.5207 - acc_top2: 0.8047 - 1s/step
step 1000/12272 - loss: 0.6979 - acc_top1: 0.5222 - acc_top2: 0.8057 - 1s/step
step 1010/12272 - loss: 0.7506 - acc_top1: 0.5234 - acc_top2: 0.8065 - 1s/step
step 1020/12272 - loss: 0.8457 - acc_top1: 0.5248 - acc_top2: 0.8073 - 1s/step
step 1030/12272 - loss: 0.8698 - acc_top1: 0.5263 - acc_top2: 0.8082 - 1s/step
step 1040/12272 - loss: 0.7016 - acc_top1: 0.5279 - acc_top2: 0.8091 - 1s/step
step 1050/12272 - loss: 0.7766 - acc_top1: 0.5290 - acc_top2: 0.8099 - 1s/step
step 1060/12272 - loss: 0.7994 - acc_top1: 0.5300 - acc_top2: 0.8105 - 1s/step
step 1070/12272 - loss: 0.7053 - acc_top1: 0.5317 - acc_top2: 0.8115 - 1s/step
step 1080/12272 - loss: 0.9085 - acc_top1: 0.5330 - acc_top2: 0.8125 - 1s/step
step 1090/12272 - loss: 0.7556 - acc_top1: 0.5342 - acc_top2: 0.8134 - 1s/step
step 1100/12272 - loss: 0.9364 - acc_top1: 0.5355 - acc_top2: 0.8141 - 1s/step
step 1110/12272 - loss: 0.9403 - acc_top1: 0.5367 - acc_top2: 0.8148 - 1s/step
step 1120/12272 - loss: 0.8228 - acc_top1: 0.5375 - acc_top2: 0.8152 - 1s/step
step 1130/12272 - loss: 0.6802 - acc_top1: 0.5388 - acc_top2: 0.8160 - 1s/step
step 1140/12272 - loss: 0.8222 - acc_top1: 0.5397 - acc_top2: 0.8167 - 1s/step
step 1150/12272 - loss: 0.9321 - acc_top1: 0.5407 - acc_top2: 0.8172 - 1s/step
step 1160/12272 - loss: 0.7478 - acc_top1: 0.5417 - acc_top2: 0.8181 - 1s/step
step 1170/12272 - loss: 0.7976 - acc_top1: 0.5430 - acc_top2: 0.8188 - 1s/step
step 1180/12272 - loss: 0.7386 - acc_top1: 0.5441 - acc_top2: 0.8192 - 1s/step
step 1190/12272 - loss: 0.6448 - acc_top1: 0.5450 - acc_top2: 0.8200 - 1s/step
step 1200/12272 - loss: 0.7441 - acc_top1: 0.5463 - acc_top2: 0.8206 - 1s/step
step 1210/12272 - loss: 0.8171 - acc_top1: 0.5476 - acc_top2: 0.8213 - 1s/step
step 1220/12272 - loss: 0.7480 - acc_top1: 0.5487 - acc_top2: 0.8219 - 1s/step
step 1230/12272 - loss: 0.6363 - acc_top1: 0.5497 - acc_top2: 0.8225 - 1s/step
step 1240/12272 - loss: 0.6630 - acc_top1: 0.5507 - acc_top2: 0.8231 - 1s/step
step 1250/12272 - loss: 0.8668 - acc_top1: 0.5517 - acc_top2: 0.8237 - 1s/step
step 1260/12272 - loss: 0.6057 - acc_top1: 0.5527 - acc_top2: 0.8243 - 1s/step
step 1270/12272 - loss: 0.8432 - acc_top1: 0.5538 - acc_top2: 0.8248 - 1s/step
step 1280/12272 - loss: 0.8447 - acc_top1: 0.5546 - acc_top2: 0.8253 - 1s/step
step 1290/12272 - loss: 0.6928 - acc_top1: 0.5556 - acc_top2: 0.8261 - 1s/step
step 1300/12272 - loss: 0.7872 - acc_top1: 0.5567 - acc_top2: 0.8266 - 1s/step
step 1310/12272 - loss: 0.7968 - acc_top1: 0.5570 - acc_top2: 0.8269 - 1s/step
step 1320/12272 - loss: 0.8059 - acc_top1: 0.5580 - acc_top2: 0.8275 - 1s/step
step 1330/12272 - loss: 0.8603 - acc_top1: 0.5587 - acc_top2: 0.8278 - 1s/step
step 1340/12272 - loss: 0.7872 - acc_top1: 0.5599 - acc_top2: 0.8285 - 1s/step
step 1350/12272 - loss: 0.7037 - acc_top1: 0.5609 - acc_top2: 0.8290 - 1s/step
step 1360/12272 - loss: 0.8268 - acc_top1: 0.5618 - acc_top2: 0.8297 - 1s/step
step 1370/12272 - loss: 0.5962 - acc_top1: 0.5627 - acc_top2: 0.8303 - 1s/step
step 1380/12272 - loss: 0.7712 - acc_top1: 0.5638 - acc_top2: 0.8310 - 1s/step
step 1390/12272 - loss: 0.5770 - acc_top1: 0.5650 - acc_top2: 0.8315 - 1s/step
step 1400/12272 - loss: 0.7174 - acc_top1: 0.5656 - acc_top2: 0.8319 - 1s/step
step 1410/12272 - loss: 0.6224 - acc_top1: 0.5660 - acc_top2: 0.8323 - 1s/step
step 1420/12272 - loss: 0.6782 - acc_top1: 0.5671 - acc_top2: 0.8328 - 1s/step
step 1430/12272 - loss: 0.4087 - acc_top1: 0.5682 - acc_top2: 0.8335 - 1s/step
step 1440/12272 - loss: 0.7534 - acc_top1: 0.5692 - acc_top2: 0.8342 - 1s/step
step 1450/12272 - loss: 0.6446 - acc_top1: 0.5702 - acc_top2: 0.8345 - 1s/step
step 1460/12272 - loss: 0.6606 - acc_top1: 0.5712 - acc_top2: 0.8351 - 1s/step
step 1470/12272 - loss: 0.7308 - acc_top1: 0.5723 - acc_top2: 0.8357 - 1s/step
step 1480/12272 - loss: 0.9016 - acc_top1: 0.5727 - acc_top2: 0.8359 - 1s/step
step 1490/12272 - loss: 0.8445 - acc_top1: 0.5730 - acc_top2: 0.8362 - 1s/step
step 1500/12272 - loss: 0.8217 - acc_top1: 0.5737 - acc_top2: 0.8367 - 1s/step
step 1510/12272 - loss: 0.8413 - acc_top1: 0.5747 - acc_top2: 0.8370 - 1s/step
step 1520/12272 - loss: 0.4643 - acc_top1: 0.5757 - acc_top2: 0.8376 - 1s/step
step 1530/12272 - loss: 0.9351 - acc_top1: 0.5764 - acc_top2: 0.8381 - 1s/step
step 1540/12272 - loss: 0.7856 - acc_top1: 0.5773 - acc_top2: 0.8386 - 1s/step
step 1550/12272 - loss: 0.5921 - acc_top1: 0.5780 - acc_top2: 0.8390 - 1s/step
step 1560/12272 - loss: 0.4460 - acc_top1: 0.5788 - acc_top2: 0.8395 - 1s/step
step 1570/12272 - loss: 0.6814 - acc_top1: 0.5793 - acc_top2: 0.8401 - 1s/step
step 1580/12272 - loss: 0.4115 - acc_top1: 0.5805 - acc_top2: 0.8407 - 1s/step
step 1590/12272 - loss: 0.9326 - acc_top1: 0.5810 - acc_top2: 0.8410 - 1s/step
step 1600/12272 - loss: 0.6989 - acc_top1: 0.5818 - acc_top2: 0.8413 - 1s/step
step 1610/12272 - loss: 0.5238 - acc_top1: 0.5826 - acc_top2: 0.8418 - 1s/step
step 1620/12272 - loss: 0.5827 - acc_top1: 0.5832 - acc_top2: 0.8422 - 1s/step
step 1630/12272 - loss: 0.7703 - acc_top1: 0.5838 - acc_top2: 0.8425 - 1s/step
step 1640/12272 - loss: 0.7926 - acc_top1: 0.5844 - acc_top2: 0.8428 - 1s/step
step 1650/12272 - loss: 0.7143 - acc_top1: 0.5851 - acc_top2: 0.8434 - 1s/step
step 1660/12272 - loss: 0.6240 - acc_top1: 0.5858 - acc_top2: 0.8438 - 1s/step
step 1670/12272 - loss: 0.7869 - acc_top1: 0.5862 - acc_top2: 0.8440 - 1s/step
step 1680/12272 - loss: 0.6485 - acc_top1: 0.5868 - acc_top2: 0.8444 - 1s/step
step 1690/12272 - loss: 0.7539 - acc_top1: 0.5876 - acc_top2: 0.8450 - 1s/step
step 1700/12272 - loss: 0.6173 - acc_top1: 0.5882 - acc_top2: 0.8454 - 1s/step
step 1710/12272 - loss: 0.8056 - acc_top1: 0.5890 - acc_top2: 0.8458 - 1s/step
step 1720/12272 - loss: 0.7035 - acc_top1: 0.5898 - acc_top2: 0.8463 - 1s/step
step 1730/12272 - loss: 0.5892 - acc_top1: 0.5908 - acc_top2: 0.8468 - 1s/step
step 1740/12272 - loss: 0.7755 - acc_top1: 0.5915 - acc_top2: 0.8472 - 1s/step
step 1750/12272 - loss: 0.6911 - acc_top1: 0.5920 - acc_top2: 0.8474 - 1s/step
step 1760/12272 - loss: 0.6309 - acc_top1: 0.5926 - acc_top2: 0.8477 - 1s/step
step 1770/12272 - loss: 0.7506 - acc_top1: 0.5932 - acc_top2: 0.8480 - 1s/step
step 1780/12272 - loss: 0.8711 - acc_top1: 0.5939 - acc_top2: 0.8482 - 1s/step
step 1790/12272 - loss: 0.9146 - acc_top1: 0.5945 - acc_top2: 0.8484 - 1s/step
step 1800/12272 - loss: 0.6208 - acc_top1: 0.5952 - acc_top2: 0.8487 - 1s/step
step 1810/12272 - loss: 0.8506 - acc_top1: 0.5959 - acc_top2: 0.8490 - 1s/step
step 1820/12272 - loss: 0.8330 - acc_top1: 0.5965 - acc_top2: 0.8494 - 1s/step
step 1830/12272 - loss: 0.8315 - acc_top1: 0.5970 - acc_top2: 0.8497 - 1s/step
step 1840/12272 - loss: 0.6227 - acc_top1: 0.5977 - acc_top2: 0.8501 - 1s/step
step 1850/12272 - loss: 0.5972 - acc_top1: 0.5985 - acc_top2: 0.8506 - 1s/step
step 1860/12272 - loss: 0.6309 - acc_top1: 0.5992 - acc_top2: 0.8510 - 1s/step
step 1870/12272 - loss: 0.8707 - acc_top1: 0.5995 - acc_top2: 0.8512 - 1s/step
step 1880/12272 - loss: 0.6419 - acc_top1: 0.6004 - acc_top2: 0.8516 - 1s/step
step 1890/12272 - loss: 0.6015 - acc_top1: 0.6010 - acc_top2: 0.8521 - 1s/step
step 1900/12272 - loss: 0.6000 - acc_top1: 0.6015 - acc_top2: 0.8524 - 1s/step
step 1910/12272 - loss: 0.7010 - acc_top1: 0.6020 - acc_top2: 0.8527 - 1s/step
step 1920/12272 - loss: 0.8539 - acc_top1: 0.6026 - acc_top2: 0.8530 - 1s/step
step 1930/12272 - loss: 0.8381 - acc_top1: 0.6031 - acc_top2: 0.8533 - 1s/step
step 1940/12272 - loss: 0.5921 - acc_top1: 0.6039 - acc_top2: 0.8537 - 1s/step
step 1950/12272 - loss: 0.4974 - acc_top1: 0.6047 - acc_top2: 0.8541 - 1s/step
step 1960/12272 - loss: 0.8269 - acc_top1: 0.6052 - acc_top2: 0.8544 - 1s/step
step 1970/12272 - loss: 0.6157 - acc_top1: 0.6058 - acc_top2: 0.8548 - 1s/step
step 1980/12272 - loss: 1.0949 - acc_top1: 0.6064 - acc_top2: 0.8552 - 1s/step
step 1990/12272 - loss: 0.6442 - acc_top1: 0.6070 - acc_top2: 0.8555 - 1s/step
step 2000/12272 - loss: 0.8747 - acc_top1: 0.6073 - acc_top2: 0.8558 - 1s/step
step 2010/12272 - loss: 0.8101 - acc_top1: 0.6078 - acc_top2: 0.8560 - 1s/step
step 2020/12272 - loss: 0.8623 - acc_top1: 0.6082 - acc_top2: 0.8562 - 1s/step
step 2030/12272 - loss: 0.6664 - acc_top1: 0.6089 - acc_top2: 0.8567 - 1s/step
step 2040/12272 - loss: 0.7616 - acc_top1: 0.6092 - acc_top2: 0.8567 - 1s/step
step 2050/12272 - loss: 0.7282 - acc_top1: 0.6095 - acc_top2: 0.8570 - 1s/step
step 2060/12272 - loss: 0.6914 - acc_top1: 0.6099 - acc_top2: 0.8574 - 1s/step
step 2070/12272 - loss: 0.6129 - acc_top1: 0.6105 - acc_top2: 0.8577 - 1s/step
step 2080/12272 - loss: 0.5605 - acc_top1: 0.6111 - acc_top2: 0.8580 - 1s/step
step 2090/12272 - loss: 0.6432 - acc_top1: 0.6116 - acc_top2: 0.8582 - 1s/step
step 2100/12272 - loss: 0.6783 - acc_top1: 0.6121 - acc_top2: 0.8586 - 1s/step
step 2110/12272 - loss: 0.5949 - acc_top1: 0.6128 - acc_top2: 0.8589 - 1s/step
step 2120/12272 - loss: 0.7832 - acc_top1: 0.6134 - acc_top2: 0.8592 - 1s/step
step 2130/12272 - loss: 0.6633 - acc_top1: 0.6139 - acc_top2: 0.8594 - 1s/step
step 2140/12272 - loss: 0.8456 - acc_top1: 0.6143 - acc_top2: 0.8596 - 1s/step
step 2150/12272 - loss: 0.7133 - acc_top1: 0.6150 - acc_top2: 0.8599 - 1s/step
step 2160/12272 - loss: 0.4699 - acc_top1: 0.6155 - acc_top2: 0.8602 - 1s/step
step 2170/12272 - loss: 0.6013 - acc_top1: 0.6161 - acc_top2: 0.8605 - 1s/step
step 2180/12272 - loss: 0.5676 - acc_top1: 0.6165 - acc_top2: 0.8608 - 1s/step
step 2190/12272 - loss: 0.5850 - acc_top1: 0.6172 - acc_top2: 0.8611 - 1s/step
step 2200/12272 - loss: 0.6887 - acc_top1: 0.6177 - acc_top2: 0.8612 - 1s/step
step 2210/12272 - loss: 0.5706 - acc_top1: 0.6180 - acc_top2: 0.8614 - 1s/step
step 2220/12272 - loss: 0.8251 - acc_top1: 0.6184 - acc_top2: 0.8617 - 1s/step
step 2230/12272 - loss: 0.6532 - acc_top1: 0.6188 - acc_top2: 0.8620 - 1s/step
step 2240/12272 - loss: 0.5888 - acc_top1: 0.6194 - acc_top2: 0.8623 - 1s/step
step 2250/12272 - loss: 0.6360 - acc_top1: 0.6198 - acc_top2: 0.8625 - 1s/step
step 2260/12272 - loss: 1.0555 - acc_top1: 0.6202 - acc_top2: 0.8628 - 1s/step
step 2270/12272 - loss: 0.4848 - acc_top1: 0.6207 - acc_top2: 0.8629 - 1s/step
step 2280/12272 - loss: 0.7243 - acc_top1: 0.6212 - acc_top2: 0.8632 - 1s/step
step 2290/12272 - loss: 0.4358 - acc_top1: 0.6216 - acc_top2: 0.8635 - 1s/step
step 2300/12272 - loss: 0.5473 - acc_top1: 0.6221 - acc_top2: 0.8637 - 1s/step
step 2310/12272 - loss: 0.6440 - acc_top1: 0.6226 - acc_top2: 0.8640 - 1s/step
step 2320/12272 - loss: 0.5785 - acc_top1: 0.6233 - acc_top2: 0.8643 - 1s/step
step 2330/12272 - loss: 0.7199 - acc_top1: 0.6237 - acc_top2: 0.8646 - 1s/step
step 2340/12272 - loss: 0.5622 - acc_top1: 0.6241 - acc_top2: 0.8647 - 1s/step
step 2350/12272 - loss: 0.6742 - acc_top1: 0.6245 - acc_top2: 0.8650 - 1s/step
step 2360/12272 - loss: 0.8149 - acc_top1: 0.6249 - acc_top2: 0.8652 - 1s/step
step 2370/12272 - loss: 0.5900 - acc_top1: 0.6253 - acc_top2: 0.8654 - 1s/step
step 2380/12272 - loss: 0.8046 - acc_top1: 0.6256 - acc_top2: 0.8656 - 1s/step
step 2390/12272 - loss: 0.6097 - acc_top1: 0.6262 - acc_top2: 0.8659 - 1s/step
step 2400/12272 - loss: 0.5936 - acc_top1: 0.6266 - acc_top2: 0.8660 - 1s/step
step 2410/12272 - loss: 0.7245 - acc_top1: 0.6270 - acc_top2: 0.8662 - 1s/step
step 2420/12272 - loss: 0.6349 - acc_top1: 0.6274 - acc_top2: 0.8665 - 1s/step
step 2430/12272 - loss: 0.7009 - acc_top1: 0.6278 - acc_top2: 0.8668 - 1s/step
step 2440/12272 - loss: 0.3881 - acc_top1: 0.6282 - acc_top2: 0.8670 - 1s/step
step 2450/12272 - loss: 0.5226 - acc_top1: 0.6286 - acc_top2: 0.8673 - 1s/step
step 2460/12272 - loss: 0.5748 - acc_top1: 0.6292 - acc_top2: 0.8675 - 1s/step
step 2470/12272 - loss: 0.4798 - acc_top1: 0.6297 - acc_top2: 0.8678 - 1s/step
step 2480/12272 - loss: 0.5857 - acc_top1: 0.6303 - acc_top2: 0.8680 - 1s/step
step 2490/12272 - loss: 0.6729 - acc_top1: 0.6308 - acc_top2: 0.8683 - 1s/step
step 2500/12272 - loss: 0.6392 - acc_top1: 0.6312 - acc_top2: 0.8686 - 1s/step
step 2510/12272 - loss: 0.9607 - acc_top1: 0.6315 - acc_top2: 0.8687 - 1s/step
step 2520/12272 - loss: 0.6036 - acc_top1: 0.6319 - acc_top2: 0.8690 - 1s/step
step 2530/12272 - loss: 0.6505 - acc_top1: 0.6324 - acc_top2: 0.8693 - 1s/step
step 2540/12272 - loss: 0.4558 - acc_top1: 0.6329 - acc_top2: 0.8696 - 1s/step
step 2550/12272 - loss: 0.4215 - acc_top1: 0.6333 - acc_top2: 0.8699 - 1s/step
step 2560/12272 - loss: 0.6908 - acc_top1: 0.6338 - acc_top2: 0.8701 - 1s/step
step 2570/12272 - loss: 0.5833 - acc_top1: 0.6342 - acc_top2: 0.8703 - 1s/step
step 2580/12272 - loss: 0.8548 - acc_top1: 0.6346 - acc_top2: 0.8706 - 1s/step
step 2590/12272 - loss: 0.5770 - acc_top1: 0.6351 - acc_top2: 0.8708 - 1s/step
step 2600/12272 - loss: 0.4476 - acc_top1: 0.6355 - acc_top2: 0.8711 - 1s/step
step 2610/12272 - loss: 0.4145 - acc_top1: 0.6360 - acc_top2: 0.8714 - 1s/step
step 2620/12272 - loss: 0.6625 - acc_top1: 0.6365 - acc_top2: 0.8717 - 1s/step
step 2630/12272 - loss: 0.4808 - acc_top1: 0.6369 - acc_top2: 0.8719 - 1s/step
#!/bin/bash
BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
--batch_size 64 \
--init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
--data_dir ${DATA_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--checkpoints ${CKPT_PATH} \
--save_steps 1000 \
--weight_decay 0.01 \
--warmup_proportion 0.1 \
--validation_steps 100 \
--epoch 3 \
--max_seq_len 128 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--learning_rate 5e-5 \
--skip_steps 10 \
--shuffle true
......@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=7
export CUDA_VISIBLE_DEVICES=0
# start fine-tuning
python3.7 bert_classifier.py\
......
......@@ -29,7 +29,6 @@ BMN Overview
├── train.py # 训练代码,训练网络
├── eval.py # 评估代码,评估网络性能
├── predict.py # 预测代码,针对任意输入预测结果
├── bmn_model.py # 网络结构与损失函数定义
├── bmn_metric.py # 精度评估方法定义
├── reader.py # 数据reader,构造Dataset和Dataloader
├── bmn_utils.py # 模型细节相关代码
......@@ -41,7 +40,7 @@ BMN Overview
## 数据准备
BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征,请下载[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)数据后解压,同时相应的修改bmn.yaml中的特征路径feat\_path。对应的标签文件请下载[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json)并修改bmn.yaml中的标签文件路径anno\_file。
BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征和对应的标签文件,请下载特征数据[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)和标签数据[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json),并相应地修改配置文件bmn.yaml中的特征文件路径feat\_path和标签文件路径anno\_file。
## 模型训练
......@@ -52,22 +51,17 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
bash run.sh
若使用单卡训练,启动方式如下:
若使用单卡训练,请将配置文件bmn.yaml中的batch\_size调整为16,启动方式如下:
export CUDA_VISIBLE_DEVICES=0
python train.py
- 代码运行需要先安装pandas
- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型
默认使用静态图训练,若使用动态图训练只需要在运行脚本添加`-d`参数即可,如:
- 单卡训练时,请将配置文件中的batch_size调整为16
python train.py -d
**训练策略:**
- 代码运行需要先安装pandas
* 采用Adam优化器,初始learning\_rate=0.001
* 权重衰减系数为1e-4
* 学习率在迭代次数达到4200的时候做一次衰减,衰减系数为0.1
- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型
## 模型评估
......@@ -76,9 +70,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
python eval.py --weights=$PATH_TO_WEIGHTS
- 进行评估时,可修改命令行中的`weights`参数指定需要评估的权重,如果不设置,将使用默认参数文件checkpoint/final.pdparams
- 进行评估时,可修改命令行中的`weights`参数指定需要评估的权重,若未指定,脚本会下载已发布的模型[model](https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams)进行评估
- 上述程序会将运行结果保存在output/EVAL/BMN\_results文件夹下,测试结果保存在evaluate\_results/bmn\_results\_validation.json文件中
- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/EVAL/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为evaluate\_results
- 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。
......@@ -87,9 +81,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
- ActivityNet数据集的具体使用说明可以参考其[官方网站](http://activity-net.org)
- 下载指标评估代码,请从[ActivityNet Gitub repository](https://github.com/activitynet/ActivityNet.git)下载,将Evaluation文件夹拷贝至models/dygraph/bmn目录下。(注:由于第三方评估代码不支持python3,此处建议使用python2进行评估;若使用python3,print函数需要添加括号,请对Evaluation目录下的.py文件做相应修改。)
- 下载指标评估代码,请从[ActivityNet Gitub repository](https://github.com/activitynet/ActivityNet.git)下载,将Evaluation文件夹拷贝至hapi/examples/bmn目录下。(注:由于第三方评估代码不支持python3,此处建议使用python2进行评估;若使用python3,print函数需要添加括号,请对Evaluation目录下的.py文件做相应修改。)
- 请下载[activity\_net\_1\_3\_new.json](https://paddlemodels.bj.bcebos.com/video_detection/activity_net_1_3_new.json)文件,并将其放置在models/dygraph/bmn/Evaluation/data目录下,相较于原始的activity\_net.v1-3.min.json文件,我们过滤了其中一些失效的视频条目。
- 请下载[activity\_net\_1\_3\_new.json](https://paddlemodels.bj.bcebos.com/video_detection/activity_net_1_3_new.json)文件,并将其放置在hapi/examples/bmn/Evaluation/data目录下,相较于原始的activity\_net.v1-3.min.json文件,我们过滤了其中一些失效的视频条目。
- 计算精度指标
......@@ -100,7 +94,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
| AR@1 | AR@5 | AR@10 | AR@100 | AUC |
| :---: | :---: | :---: | :---: | :---: |
| 33.46 | 49.25 | 56.25 | 75.40 | 67.16% |
| 33.10 | 49.18 | 56.54 | 75.12 | 67.16% |
## 模型推断
......@@ -110,9 +104,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
python predict.py --weights=$PATH_TO_WEIGHTS \
--filelist=$FILELIST
- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为./infer.list。`--weights`参数为训练好的权重参数,如果不设置,将使用默认参数文件checkpoint/final.pdparams
- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为./infer.list。`--weights`参数为训练好的权重参数,若未指定,脚本会下载已发布的模型[model](https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams)进行预测
- 上述程序会将运行结果保存在output/INFER/BMN\_results文件夹下,测试结果保存在predict\_results/bmn\_results\_test.json文件中
- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/INFER/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为predict\_results
## 参考论文
......
......@@ -12,11 +12,10 @@ MODEL:
TRAIN:
subset: "train"
epoch: 9
batch_size: 4
batch_size: 4
num_workers: 4
use_shuffle: True
device: "gpu"
num_gpus: 4
learning_rate: 0.001
learning_rate_decay: 0.1
lr_decay_iter: 4200
......@@ -29,10 +28,6 @@ TEST:
subset: "validation"
batch_size: 1
num_workers: 1
use_buffer: False
snms_alpha: 0.001
snms_t1: 0.5
snms_t2: 0.9
output_path: "output/EVAL/BMN_results"
result_path: "evaluate_results"
......@@ -40,10 +35,6 @@ INFER:
subset: "test"
batch_size: 1
num_workers: 1
use_buffer: False
snms_alpha: 0.4
snms_t1: 0.5
snms_t2: 0.9
filelist: './infer.list'
output_path: "output/INFER/BMN_results"
result_path: "predict_results"
......
......@@ -36,8 +36,26 @@ class BmnMetric(Metric):
#get video_dict and video_list
if self.mode == 'test':
self.get_test_dataset_dict()
if not os.path.isdir(self.cfg.TEST.output_path):
os.makedirs(self.cfg.TEST.output_path)
if not os.path.isdir(self.cfg.TEST.result_path):
os.makedirs(self.cfg.TEST.result_path)
elif self.mode == 'infer':
self.get_infer_dataset_dict()
if not os.path.isdir(self.cfg.INFER.output_path):
os.makedirs(self.cfg.INFER.output_path)
if not os.path.isdir(self.cfg.INFER.result_path):
os.makedirs(self.cfg.INFER.result_path)
def add_metric_op(self, *args):
if self.mode == 'test':
# only extract pred_bm, pred_start, pred_en from outputs
# and video_index from label here
pred_bm, pred_start, pred_en, _, _, _, video_index = args
else:
# in infer mode, labels only contains video_index
pred_bm, pred_start, pred_en, video_index = args
return pred_bm, pred_start, pred_en, video_index
def update(self, pred_bm, pred_start, pred_end, fid):
# generate proposals
......
......@@ -19,7 +19,8 @@ import logging
import paddle.fluid as fluid
from hapi.model import set_device, Input
from hapi.vision.models import bmn, BmnLoss
from modeling import bmn, BmnLoss
from bmn_metric import BmnMetric
from reader import BmnDataset
from config_utils import *
......@@ -37,7 +38,6 @@ def parse_args():
parser.add_argument(
"-d",
"--dynamic",
default=True,
action='store_true',
help="enable dygraph mode, only support dynamic mode at present time")
parser.add_argument(
......@@ -56,6 +56,17 @@ def parse_args():
default=None,
help='weight path, None to automatically download weights provided by Paddle.'
)
parser.add_argument(
'--output_path',
type=str,
default="output/EVAL/BMN_results",
help='output dir path, default to use output/EVAL/BMN_results')
parser.add_argument(
'--result_path',
type=str,
default="evaluate_results/",
help='output dir path after post processing, default to use ./evaluate_results/'
)
parser.add_argument(
'--log_interval',
type=int,
......@@ -67,17 +78,21 @@ def parse_args():
# Performance Evaluation
def test_bmn(args):
# only support dynamic mode at present time
device = set_device(args.device)
fluid.enable_dygraph(device) if args.dynamic else None
#config setting
config = parse_config(args.config_file)
eval_cfg = merge_configs(config, 'test', vars(args))
if not os.path.isdir(config.TEST.output_path):
os.makedirs(config.TEST.output_path)
if not os.path.isdir(config.TEST.result_path):
os.makedirs(config.TEST.result_path)
feat_dim = config.MODEL.feat_dim
tscale = config.MODEL.tscale
dscale = config.MODEL.dscale
prop_boundary_ratio = config.MODEL.prop_boundary_ratio
num_sample = config.MODEL.num_sample
num_sample_perbin = config.MODEL.num_sample_perbin
#input and video index
inputs = [
Input(
[None, config.MODEL.feat_dim, config.MODEL.tscale],
......@@ -97,9 +112,14 @@ def test_bmn(args):
eval_dataset = BmnDataset(eval_cfg, 'test')
#model
model = bmn(config, pretrained=args.weights is None)
model = bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=args.weights is None)
model.prepare(
loss_function=BmnLoss(config),
loss_function=BmnLoss(tscale, dscale),
metrics=BmnMetric(
config, mode='test'),
inputs=inputs,
......
......@@ -14,7 +14,6 @@
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
from paddle.fluid.framework import in_dygraph_mode
import numpy as np
import math
......@@ -27,7 +26,7 @@ DATATYPE = 'float32'
pretrain_infos = {
'bmn': ('https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams',
'9286c821acc4cad46d6613b931ba468c')
'aa84e3386e1fbd117fb96fa572feeb94')
}
......@@ -131,17 +130,23 @@ class BMN(Model):
`"BMN: Boundary-Matching Network for Temporal Action Proposal Generation" <https://arxiv.org/abs/1907.09702>`_
Args:
cfg (AttrDict): configs for BMN model
tscale (int): sequence length, default 100.
dscale (int): max duration length, default 100.
prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5.
num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32.
num_sample_perbin (int): number of selected points in each sample, default 3.
"""
def __init__(self, cfg):
def __init__(self, tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
super(BMN, self).__init__()
#init config
self.tscale = cfg.MODEL.tscale
self.dscale = cfg.MODEL.dscale
self.prop_boundary_ratio = cfg.MODEL.prop_boundary_ratio
self.num_sample = cfg.MODEL.num_sample
self.num_sample_perbin = cfg.MODEL.num_sample_perbin
self.tscale = tscale
self.dscale = dscale
self.prop_boundary_ratio = prop_boundary_ratio
self.num_sample = num_sample
self.num_sample_perbin = num_sample_perbin
self.hidden_dim_1d = 256
self.hidden_dim_2d = 128
......@@ -192,23 +197,17 @@ class BMN(Model):
padding=1,
act="relu")
# init to speed up
# get sample mask
sample_mask_array = get_interp1d_mask(
self.tscale, self.dscale, self.prop_boundary_ratio,
self.num_sample, self.num_sample_perbin)
if in_dygraph_mode():
self.sample_mask = fluid.dygraph.base.to_variable(
sample_mask_array)
else: # static
self.sample_mask = fluid.layers.create_parameter(
shape=[
self.tscale, self.num_sample * self.dscale * self.tscale
],
dtype=DATATYPE,
attr=fluid.ParamAttr(
name="sample_mask", trainable=False),
default_initializer=fluid.initializer.NumpyArrayInitializer(
sample_mask_array))
self.sample_mask = fluid.layers.create_parameter(
shape=[self.tscale, self.num_sample * self.dscale * self.tscale],
dtype=DATATYPE,
attr=fluid.ParamAttr(
name="sample_mask", trainable=False),
default_initializer=fluid.initializer.NumpyArrayInitializer(
sample_mask_array))
self.sample_mask.stop_gradient = True
......@@ -292,23 +291,27 @@ class BmnLoss(Loss):
"""Loss for BMN model
Args:
cfg (AttrDict): configs for BMN model
tscale (int): sequence length, default 100.
dscale (int): max duration length, default 100.
"""
def __init__(self, cfg):
def __init__(self, tscale, dscale):
super(BmnLoss, self).__init__()
self.cfg = cfg
self.tscale = tscale
self.dscale = dscale
def _get_mask(self):
dscale = self.cfg.MODEL.dscale
tscale = self.cfg.MODEL.tscale
bm_mask = []
for idx in range(dscale):
mask_vector = [1 for i in range(tscale - idx)
for idx in range(self.dscale):
mask_vector = [1 for i in range(self.tscale - idx)
] + [0 for i in range(idx)]
bm_mask.append(mask_vector)
bm_mask = np.array(bm_mask, dtype=np.float32)
self_bm_mask = fluid.layers.create_global_var(
shape=[dscale, tscale], value=0, dtype=DATATYPE, persistable=True)
shape=[self.dscale, self.tscale],
value=0,
dtype=DATATYPE,
persistable=True)
fluid.layers.assign(bm_mask, self_bm_mask)
self_bm_mask.stop_gradient = True
return self_bm_mask
......@@ -437,18 +440,27 @@ class BmnLoss(Loss):
return loss
def bmn(cfg, pretrained=True):
def bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=True):
"""BMN model
Args:
cfg (AttrDict): configs for BMN model
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
tscale (int): sequence length, default 100.
dscale (int): max duration length, default 100.
prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5.
num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32.
num_sample_perbin (int): number of selected points in each sample, default 3.
pretrained (bool): If True, returns a model with pre-trained model, default True.
"""
model = BMN(cfg)
model = BMN(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin)
if pretrained:
weight_path = get_weights_path(*(pretrain_infos['bmn']))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path[:-9])
model.load(weight_path)
return model
......@@ -19,7 +19,8 @@ import logging
import paddle.fluid as fluid
from hapi.model import set_device, Input
from hapi.vision.models import bmn, BmnLoss
from modeling import bmn, BmnLoss
from bmn_metric import BmnMetric
from reader import BmnDataset
from config_utils import *
......@@ -37,7 +38,6 @@ def parse_args():
parser.add_argument(
"-d",
"--dynamic",
default=True,
action='store_true',
help="enable dygraph mode, only support dynamic mode at present time")
parser.add_argument(
......@@ -54,10 +54,21 @@ def parse_args():
help='weight path, None to automatically download weights provided by Paddle.'
)
parser.add_argument(
'--save_dir',
'--filelist',
type=str,
default="infer.list",
help='infer file list, default to use ./infer.list')
parser.add_argument(
'--output_path',
type=str,
default="output/INFER/BMN_results",
help='output dir path, default to use output/INFER/BMN_results')
parser.add_argument(
'--result_path',
type=str,
default="predict_results/",
help='output dir path, default to use ./predict_results/')
help='output dir path after post processing, default to use ./predict_results/'
)
parser.add_argument(
'--log_interval',
type=int,
......@@ -69,18 +80,21 @@ def parse_args():
# Prediction
def infer_bmn(args):
# only support dynamic mode at present time
device = set_device(args.device)
fluid.enable_dygraph(device) if args.dynamic else None
#config setting
config = parse_config(args.config_file)
infer_cfg = merge_configs(config, 'infer', vars(args))
if not os.path.isdir(config.INFER.output_path):
os.makedirs(config.INFER.output_path)
if not os.path.isdir(config.INFER.result_path):
os.makedirs(config.INFER.result_path)
feat_dim = config.MODEL.feat_dim
tscale = config.MODEL.tscale
dscale = config.MODEL.dscale
prop_boundary_ratio = config.MODEL.prop_boundary_ratio
num_sample = config.MODEL.num_sample
num_sample_perbin = config.MODEL.num_sample_perbin
#input and video index
inputs = [
Input(
[None, config.MODEL.feat_dim, config.MODEL.tscale],
......@@ -92,7 +106,13 @@ def infer_bmn(args):
#data
infer_dataset = BmnDataset(infer_cfg, 'infer')
model = bmn(config, pretrained=args.weights is None)
#model
model = bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=args.weights is None)
model.prepare(
metrics=BmnMetric(
config, mode='infer'),
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch train.py
......@@ -19,9 +19,10 @@ import sys
import os
from hapi.model import set_device, Input
from hapi.vision.models import bmn, BmnLoss
from reader import BmnDataset
from config_utils import *
from modeling import bmn, BmnLoss
DATATYPE = 'float32'
......@@ -34,11 +35,7 @@ logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser("Paddle high level api of BMN.")
parser.add_argument(
"-d",
"--dynamic",
default=True,
action='store_true',
help="enable dygraph mode")
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
'--config_file',
type=str,
......@@ -48,7 +45,7 @@ def parse_args():
'--batch_size',
type=int,
default=None,
help='training batch size. None to use config file setting.')
help='training batch size. None for read from config file.')
parser.add_argument(
'--learning_rate',
type=float,
......@@ -68,8 +65,8 @@ def parse_args():
parser.add_argument(
'--epoch',
type=int,
default=9,
help='epoch number, 0 for read from config file')
default=None,
help='epoch number, None for read from config file')
parser.add_argument(
'--valid_interval',
type=int,
......@@ -113,22 +110,23 @@ def train_bmn(args):
if not os.path.isdir(args.save_dir):
os.makedirs(args.save_dir)
#config setting
config = parse_config(args.config_file)
train_cfg = merge_configs(config, 'train', vars(args))
val_cfg = merge_configs(config, 'valid', vars(args))
inputs = [
Input(
[None, config.MODEL.feat_dim, config.MODEL.tscale],
'float32',
name='feat_input')
]
gt_iou_map = Input(
[None, config.MODEL.dscale, config.MODEL.tscale],
'float32',
name='gt_iou_map')
gt_start = Input([None, config.MODEL.tscale], 'float32', name='gt_start')
gt_end = Input([None, config.MODEL.tscale], 'float32', name='gt_end')
feat_dim = config.MODEL.feat_dim
tscale = config.MODEL.tscale
dscale = config.MODEL.dscale
prop_boundary_ratio = config.MODEL.prop_boundary_ratio
num_sample = config.MODEL.num_sample
num_sample_perbin = config.MODEL.num_sample_perbin
# input and label list
inputs = [Input([None, feat_dim, tscale], 'float32', name='feat_input')]
gt_iou_map = Input([None, dscale, tscale], 'float32', name='gt_iou_map')
gt_start = Input([None, tscale], 'float32', name='gt_start')
gt_end = Input([None, tscale], 'float32', name='gt_end')
labels = [gt_iou_map, gt_start, gt_end]
# data
......@@ -136,11 +134,16 @@ def train_bmn(args):
val_dataset = BmnDataset(val_cfg, 'valid')
# model
model = bmn(config, pretrained=False)
model = bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=False)
optim = optimizer(config, parameter_list=model.parameters())
model.prepare(
optimizer=optim,
loss_function=BmnLoss(config),
loss_function=BmnLoss(tscale, dscale),
inputs=inputs,
labels=labels,
device=device)
......@@ -148,11 +151,10 @@ def train_bmn(args):
# if resume weights is given, load resume weights directly
if args.resume is not None:
model.load(args.resume)
model.fit(train_data=train_dataset,
eval_data=val_dataset,
batch_size=train_cfg.TRAIN.batch_size,
epochs=args.epoch,
epochs=train_cfg.TRAIN.epoch,
eval_freq=args.valid_interval,
log_freq=args.log_interval,
save_dir=args.save_dir,
......
......@@ -80,12 +80,19 @@ data/cityscapes/testA/412_A.jpg
### 训练
在GPU单卡上训练:
在GPU单卡上静态图训练:
```
env CUDA_VISIBLE_DEVICES=0 python train.py
env CUDA_VISIBLE_DEVICES=0 python train.py --checkpoint_path=checkpoint_static
```
在GPU单卡上动态图训练:
```
env CUDA_VISIBLE_DEVICES=0 python train.py --dynamic --checkpoint_path=checkpoint_dynamic
```
执行`python train.py --help`可查看更多使用方式和参数详细说明。
图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_loss','da_loss'和'db_loss'分别为生成器、判别器A和判别器B的训练损失。
......
......@@ -18,9 +18,10 @@ from __future__ import print_function
import numpy as np
from layers import ConvBN, DeConvBN
import paddle.fluid as fluid
from model import Model, Loss
from hapi.model import Model, Loss
from layers import ConvBN, DeConvBN
class ResnetBlock(fluid.dygraph.Layer):
......
......@@ -20,6 +20,8 @@ import random
import numpy as np
from PIL import Image, ImageOps
import paddle
DATASET = "cityscapes"
A_LIST_FILE = "./data/" + DATASET + "/trainA.txt"
B_LIST_FILE = "./data/" + DATASET + "/trainB.txt"
......@@ -27,8 +29,6 @@ A_TEST_LIST_FILE = "./data/" + DATASET + "/testA.txt"
B_TEST_LIST_FILE = "./data/" + DATASET + "/testB.txt"
IMAGES_ROOT = "./data/" + DATASET + "/"
import paddle.fluid as fluid
class Cityscapes(paddle.io.Dataset):
def __init__(self, root_path, file_path, mode='train', return_name=False):
......
......@@ -25,9 +25,9 @@ from PIL import Image
from scipy.misc import imsave
import paddle.fluid as fluid
from check import check_gpu, check_version
from hapi.model import Model, Input, set_device
from model import Model, Input, set_device
from check import check_gpu, check_version
from cyclegan import Generator, GeneratorCombine
......@@ -43,7 +43,7 @@ def main():
im_shape = [-1, 3, 256, 256]
input_A = Input(im_shape, 'float32', 'input_A')
input_B = Input(im_shape, 'float32', 'input_B')
g.prepare(inputs=[input_A, input_B])
g.prepare(inputs=[input_A, input_B], device=FLAGS.device)
g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True)
out_path = FLAGS.output + "/single"
......@@ -59,10 +59,10 @@ def main():
data = image.transpose([2, 0, 1])[np.newaxis, :]
if FLAGS.input_style == "A":
_, fake, _, _ = g.test([data, data])
_, fake, _, _ = g.test_batch([data, data])
if FLAGS.input_style == "B":
fake, _, _, _ = g.test([data, data])
fake, _, _, _ = g.test_batch([data, data])
fake = np.squeeze(fake[0]).transpose([1, 2, 0])
......@@ -74,7 +74,7 @@ def main():
if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN inference")
parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode")
"-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument(
"-p",
"--device",
......
......@@ -22,9 +22,9 @@ import numpy as np
from scipy.misc import imsave
import paddle.fluid as fluid
from check import check_gpu, check_version
from hapi.model import Model, Input, set_device
from model import Model, Input, set_device
from check import check_gpu, check_version
from cyclegan import Generator, GeneratorCombine
import data as data
......@@ -41,7 +41,7 @@ def main():
im_shape = [-1, 3, 256, 256]
input_A = Input(im_shape, 'float32', 'input_A')
input_B = Input(im_shape, 'float32', 'input_B')
g.prepare(inputs=[input_A, input_B])
g.prepare(inputs=[input_A, input_B], device=FLAGS.device)
g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True)
if not os.path.exists(FLAGS.output):
......@@ -56,7 +56,7 @@ def main():
data_A = np.array(data_A).astype("float32")
data_B = np.array(data_B).astype("float32")
fake_A, fake_B, cyc_A, cyc_B = g.test([data_A, data_B])
fake_A, fake_B, cyc_A, cyc_B = g.test_batch([data_A, data_B])
datas = [fake_A, fake_B, cyc_A, cyc_B, data_A, data_B]
odatas = []
......@@ -75,7 +75,7 @@ def main():
if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN test")
parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode")
"-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument(
"-p",
"--device",
......
......@@ -24,12 +24,11 @@ import time
import paddle
import paddle.fluid as fluid
from check import check_gpu, check_version
from model import Model, Input, set_device
from hapi.model import Model, Input, set_device
import data as data
from check import check_gpu, check_version
from cyclegan import Generator, Discriminator, GeneratorCombine, GLoss, DLoss
import data as data
step_per_epoch = 2974
......@@ -76,12 +75,15 @@ def main():
fake_A = Input(im_shape, 'float32', 'fake_A')
fake_B = Input(im_shape, 'float32', 'fake_B')
g_AB.prepare(inputs=[input_A])
g_BA.prepare(inputs=[input_B])
g_AB.prepare(inputs=[input_A], device=FLAGS.device)
g_BA.prepare(inputs=[input_B], device=FLAGS.device)
g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B])
d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B])
d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A])
g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B],
device=FLAGS.device)
d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B],
device=FLAGS.device)
d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A],
device=FLAGS.device)
if FLAGS.resume:
g.load(FLAGS.resume)
......@@ -108,14 +110,14 @@ def main():
data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0]
start = time.time()
fake_B = g_AB.test(data_A)[0]
fake_A = g_BA.test(data_B)[0]
g_loss = g.train([data_A, data_B])[0]
fake_B = g_AB.test_batch(data_A)[0]
fake_A = g_BA.test_batch(data_B)[0]
g_loss = g.train_batch([data_A, data_B])[0]
fake_pb = B_pool.get(fake_B)
da_loss = d_A.train([data_B, fake_pb])[0]
da_loss = d_A.train_batch([data_B, fake_pb])[0]
fake_pa = A_pool.get(fake_A)
db_loss = d_B.train([data_A, fake_pa])[0]
db_loss = d_B.train_batch([data_A, fake_pa])[0]
t = time.time() - start
if i % 20 == 0:
......@@ -128,7 +130,7 @@ def main():
if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN Training on Cityscapes")
parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode")
"-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument(
"-p",
"--device",
......
# 序列标注任务
## 1. 简介
Sequence Tagging,是一个序列标注模型,模型可用于实现,分词、词性标注、专名识别等序列标注任务。我们在自建的数据集上对分词、词性标注、专名识别进行整体的评估效果(即联合标签模型),具体数值见下表;
|模型|Precision|Recall|F1-score|
|:-:|:-:|:-:|:-:|
|Lexical Analysis|88.26%|89.20%|88.73%|
## 2. 快速开始
### 安装说明
#### 1.PaddlePaddle 安装
本项目依赖 PaddlePaddle 1.7 及以上版本和PaddleHub 1.0.0及以上版本 ,PaddlePaddle安装请参考官网 [快速安装](http://www.paddlepaddle.org/paddle#quick-start),PaddleHub安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
> Warning: GPU 和 CPU 版本的 PaddlePaddle 分别是 paddlepaddle-gpu 和 paddlepaddle,请安装时注意区别。
#### 2. 克隆代码
克隆工具集代码库到本地
```bash
git clone https://github.com/PaddlePaddle/hapi.git
cd hapi/sequence_tagging
```
#### 3. 环境依赖
PaddlePaddle的版本要求是:Python 2 版本是 2.7.15+、Python 3 版本是 3.5.1+/3.6/3.7。sequence tagging的代码可支持Python2/3,无具体版本限制
### 数据准备
#### 1. 快速下载
本项目涉及的**数据集****训练模型**的数据可通过执行以下脚本进行快速下载,若仅需使用部分数据或者模型,可根据需要参照2和3进行下载
```bash
python downloads.py all
```
或在支持运行shell脚本的环境下执行:
```bash
sh downloads.sh
```
#### 2. 训练数据集
下载数据集文件,解压后会生成 `./data/` 文件夹
```bash
python downloads.py dataset
```
#### 3. 已训练模型
我们开源了在自建数据集上训练的词法分析模型,可供用户直接使用,可通过下述链接进行下载:
```bash
# download baseline model
python downloads.py model
```
### 模型训练
基于示例的数据集,可通过下面的命令,在训练集 `./data/train.tsv` 上进行训练;
GPU上单卡训练
```
# setting visible devices for training
export CUDA_VISIBLE_DEVICES=0
python -u train.py \
--device gpu \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
GPU上多卡训练
```
# setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
--device gpu \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
CPU上训练
```
python -u train.py \
--device cpu \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
### 模型预测
加载已有的模型,对未知的数据进行预测
```bash
python predict.py \
--init_from_checkpoint model_baseline/params \
--output_file predict.result \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --output_file: 预测结果文件
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
### 模型评估
我们基于自建的数据集训练了一个词法分析的模型,可以直接用这个模型对测试集 `./data/test.tsv` 进行验证,
```bash
# baseline model
python eval.py \
--init_from_checkpoint ./model_baseline/params \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
## 3. 进阶使用
### 任务定义与建模
序列标注任务的输入是一个字符串(我们后面使用『句子』来指代它),而输出是句子中的词边界和类别。序列标注是词法分析的经典建模方式。我们使用基于 GRU 的网络结构学习特征,将学习到的特征接入 CRF 解码层完成序列标注。CRF 解码层本质上是将传统 CRF 中的线性模型换成了非线性神经网络,基于句子级别的似然概率,因而能够更好的解决标记偏置问题。模型要点如下。
1. 输入采用 one-hot 方式表示,每个字以一个 id 表示
2. one-hot 序列通过字表,转换为实向量表示的字向量序列;
3. 字向量序列作为双向 GRU 的输入,学习输入序列的特征表示,得到新的特性表示序列,我们堆叠了两层双向GRU以增加学习能力;
4. CRF 以 GRU 学习到的特征为输入,以标记序列为监督信号,实现序列标注。
可供用户下载的自有数据是对分词、词性标注、专名识别同时标注的联合数据集,进行词性和专名类别标签集合如下表,其中词性标签 24 个(小写字母),专名类别标签 4 个(大写字母)。这里需要说明的是,人名、地名、机构名和时间四个类别,在上表中存在两套标签(PER / LOC / ORG / TIME 和 nr / ns / nt / t),被标注为第二套标签的词,是模型判断为低置信度的人名、地名、机构名和时间词。开发者可以基于这两套标签,在四个类别的准确、召回之间做出自己的权衡。
| 标签 | 含义 | 标签 | 含义 | 标签 | 含义 | 标签 | 含义 |
| ---- | -------- | ---- | -------- | ---- | -------- | ---- | -------- |
| n | 普通名词 | f | 方位名词 | s | 处所名词 | t | 时间 |
| nr | 人名 | ns | 地名 | nt | 机构名 | nw | 作品名 |
| nz | 其他专名 | v | 普通动词 | vd | 动副词 | vn | 名动词 |
| a | 形容词 | ad | 副形词 | an | 名形词 | d | 副词 |
| m | 数量词 | q | 量词 | r | 代词 | p | 介词 |
| c | 连词 | u | 助词 | xc | 其他虚词 | w | 标点符号 |
| PER | 人名 | LOC | 地名 | ORG | 机构名 | TIME | 时间 |
### 模型原理介绍
上面介绍的模型原理如下图所示:<br />
<p align="center">
<img src="./images/gru-crf-model.png" width = "340" height = "300" /> <br />
Overall Architecture of GRU-CRF-MODEL
</p>
### 数据格式
训练使用的数据可以由用户根据实际的应用场景,自己组织数据。除了第一行是 `text_a\tlabel` 固定的开头,后面的每行数据都是由两列组成,以制表符分隔,第一列是 utf-8 编码的中文文本,以 `\002` 分割,第二列是对应每个字的标注,以 `\002` 分隔。我们采用 IOB2 标注体系,即以 X-B 作为类型为 X 的词的开始,以 X-I 作为类型为 X 的词的持续,以 O 表示不关注的字(实际上,在词性、专名联合标注中,不存在 O )。示例如下:
```text
除\002了\002他\002续\002任\002十\002二\002届\002政\002协\002委\002员\002,\002马\002化\002腾\002,\002雷\002军\002,\002李\002彦\002宏\002也\002被\002推\002选\002为\002新\002一\002届\002全\002国\002人\002大\002代\002表\002或\002全\002国\002政\002协\002委\002员 p-B\002p-I\002r-B\002v-B\002v-I\002m-B\002m-I\002m-I\002ORG-B\002ORG-I\002n-B\002n-I\002w-B\002PER-B\002PER-I\002PER-I\002w-B\002PER-B\002PER-I\002w-B\002PER-B\002PER-I\002PER-I\002d-B\002p-B\002v-B\002v-I\002v-B\002a-B\002m-B\002m-I\002ORG-B\002ORG-I\002ORG-I\002ORG-I\002n-B\002n-I\002c-B\002n-B\002n-I\002ORG-B\002ORG-I\002n-B\002n-I
```
+ 我们随同代码一并发布了完全版的模型和相关的依赖数据。但是,由于模型的训练数据过于庞大,我们没有发布训练数据,仅在`data`目录下放置少数样本用以示例输入数据格式。
+ 模型依赖数据包括:
1. 输入文本的词典,在`conf`目录下,对应`word.dic`
2. 对输入文本中特殊字符进行转换的字典,在`conf`目录下,对应`q2b.dic`
3. 标记标签的词典,在`conf`目录下,对应`tag.dic`
+ 在训练和预测阶段,我们都需要进行原始数据的预处理,具体处理工作包括:
1. 从原始数据文件中抽取出句子和标签,构造句子序列和标签序列
2. 将句子序列中的特殊字符进行转换
3. 依据词典获取词对应的整数索引
### 代码结构说明
```text
├── README.md # 本文档
├── data/ # 存放数据集的目录
├── conf/ # 词典及程序默认配置的目录
├── images/ # 文档图片存放位置
├── utils/ # 常用工具函数
├── train.py # 训练脚本
├── predict.py # 预测脚本
├── eval.py # 词法分析评估的脚本
├── downloads.py # 用于下载数据和模型的脚本
├── downloads.sh # 用于下载数据和模型的脚本
└──reader.py # 文件读取相关函数
```
## 4. 其他
### 在论文中引用 sequence tagging
如果您的学术工作成果中使用了 sequence tagging,请您增加下述引用。我们非常欣慰sequence tagging模型能够对您的学术工作带来帮助。
```text
@article{jiao2018LAC,
title={Chinese Lexical Analysis with Deep Bi-GRU-CRF Network},
author={Jiao, Zhenyu and Sun, Shuqi and Sun, Ke},
journal={arXiv preprint arXiv:1807.01882},
year={2018},
url={https://arxiv.org/abs/1807.01882}
}
```
### 如何贡献代码
如果你可以修复某个 issue 或者增加一个新功能,欢迎给我们提交PR。如果对应的PR被接受了,我们将根据贡献的质量和难度 进行打分(0-5分,越高越好)。如果你累计获得了 10 分,可以联系我们获得面试机会或为你写推荐信。
 
、 ,
。 .
— -
~ ~
‖ |
… .
‘ '
’ '
“ "
” "
〔 (
〕 )
〈 <
〉 >
「 '
」 '
『 "
』 "
〖 [
〗 ]
【 [
】 ]
∶ :
$ $
! !
" "
# #
% %
& &
' '
( (
) )
* *
+ +
, ,
- -
. .
/ /
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
: :
; ;
< <
= =
> >
? ?
@ @
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
[ [
\ \
] ]
^ ^
_ _
` `
a a
b b
c c
d d
e e
f f
g g
h h
i i
j j
k k
l l
m m
n n
o o
p p
q q
r r
s s
t t
u u
v v
w w
x x
y y
z z
{ {
| |
} }
 ̄ ~
〝 "
〞 "
﹐ ,
﹑ ,
﹒ .
﹔ ;
﹕ :
﹖ ?
﹗ !
﹙ (
﹚ )
﹛ {
﹜ {
﹝ [
﹞ ]
﹟ #
﹠ &
﹡ *
﹢ +
﹣ -
﹤ <
﹥ >
﹦ =
﹨ \
﹩ $
﹪ %
﹫ @
,
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
0 a-B
1 a-I
2 ad-B
3 ad-I
4 an-B
5 an-I
6 c-B
7 c-I
8 d-B
9 d-I
10 f-B
11 f-I
12 m-B
13 m-I
14 n-B
15 n-I
16 nr-B
17 nr-I
18 ns-B
19 ns-I
20 nt-B
21 nt-I
22 nw-B
23 nw-I
24 nz-B
25 nz-I
26 p-B
27 p-I
28 q-B
29 q-I
30 r-B
31 r-I
32 s-B
33 s-I
34 t-B
35 t-I
36 u-B
37 u-I
38 v-B
39 v-I
40 vd-B
41 vd-I
42 vn-B
43 vn-I
44 w-B
45 w-I
46 xc-B
47 xc-I
48 PER-B
49 PER-I
50 LOC-B
51 LOC-I
52 ORG-B
53 ORG-I
54 TIME-B
55 TIME-I
56 O
因为 它太大了无法显示 source diff 。你可以改为 查看blob
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Download script, download dataset and pretrain models.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import time
import hashlib
import tarfile
import requests
FILE_INFO = {
'BASE_URL': 'https://baidu-nlp.bj.bcebos.com/',
'DATA': {
'name': 'lexical_analysis-dataset-2.0.0.tar.gz',
'md5': '71e4a9a36d0f0177929a1bccedca7dba'
},
'MODEL': {
'name': 'sequence_tagging_dy.tar.gz',
'md5': "1125d374c03c8218b6e47325dcf607e3"
},
}
def usage():
desc = ("\nDownload datasets and pretrained models for sequence tagging.\n"
"Usage:\n"
" 1. python download.py all\n"
" 2. python download.py dataset\n"
" 3. python download.py model\n")
print(desc)
def md5file(fname):
hash_md5 = hashlib.md5()
with io.open(fname, "rb") as fin:
for chunk in iter(lambda: fin.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def extract(fname, dir_path):
"""
Extract tar.gz file
"""
try:
tar = tarfile.open(fname, "r:gz")
file_names = tar.getnames()
for file_name in file_names:
tar.extract(file_name, dir_path)
print(file_name)
tar.close()
except Exception as e:
raise e
def _download(url, filename, md5sum):
"""
Download file and check md5
"""
retry = 0
retry_limit = 3
chunk_size = 4096
while not (os.path.exists(filename) and md5file(filename) == md5sum):
if retry < retry_limit:
retry += 1
else:
raise RuntimeError(
"Cannot download dataset ({0}) with retry {1} times.".format(
url, retry_limit))
try:
start = time.time()
size = 0
res = requests.get(url, stream=True)
filesize = int(res.headers['content-length'])
if res.status_code == 200:
print("[Filesize]: %0.2f MB" % (filesize / 1024 / 1024))
# save by chunk
with io.open(filename, "wb") as fout:
for chunk in res.iter_content(chunk_size=chunk_size):
if chunk:
fout.write(chunk)
size += len(chunk)
pr = '>' * int(size * 50 / filesize)
print(
'\r[Process ]: %s%.2f%%' %
(pr, float(size / filesize * 100)),
end='')
end = time.time()
print("\n[CostTime]: %.2f s" % (end - start))
except Exception as e:
print(e)
def download(name, dir_path):
url = FILE_INFO['BASE_URL'] + FILE_INFO[name]['name']
file_path = os.path.join(dir_path, FILE_INFO[name]['name'])
if not os.path.exists(dir_path):
os.makedirs(dir_path)
# download data
print("Downloading : %s" % name)
_download(url, file_path, FILE_INFO[name]['md5'])
# extract data
print("Extracting : %s" % file_path)
extract(file_path, dir_path)
os.remove(file_path)
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
sys.exit(1)
pwd = os.path.join(os.path.dirname(__file__), './')
ernie_dir = os.path.join(os.path.dirname(__file__), './pretrained')
if sys.argv[1] == 'all':
download('DATA', pwd)
download('MODEL', pwd)
if sys.argv[1] == "dataset":
download('DATA', pwd)
elif sys.argv[1] == "model":
download('MODEL', pwd)
else:
usage()
#!/bin/bash
# download baseline model file to ./model_baseline/
if [ -d ./model_baseline/ ]
then
echo "./model_baseline/ directory already existed, ignore download"
else
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/sequence_tagging_dy.tar.gz
tar xvf sequence_tagging_dy.tar.gz
/bin/rm sequence_tagging_dy.tar.gz
fi
# download dataset file to ./data/
if [ -d ./data/ ]
then
echo "./data/ directory already existed, ignore download"
else
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis-dataset-2.0.0.tar.gz
tar xvf lexical_analysis-dataset-2.0.0.tar.gz
/bin/rm lexical_analysis-dataset-2.0.0.tar.gz
fi
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
from train import SeqTagging
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from utils.metrics import chunk_count
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
eval_path = args.test_file
chunk_evaluator = fluid.metrics.ChunkEvaluator()
chunk_evaluator.reset()
eval_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=eval_path, place=place, mode="test")
eval_dataset = create_dataloader(
eval_generator, place, feed_list=feed_list)
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.mode = "test"
model.prepare(inputs=inputs)
model.load(args.init_from_checkpoint, skip_mismatch=True)
for data in eval_dataset():
if len(data) == 1:
batch_data = data[0]
targets = np.array(batch_data[2])
else:
batch_data = data
targets = batch_data[2].numpy()
inputs_data = [batch_data[0], batch_data[1]]
crf_decode, length = model.test(inputs=inputs_data)
num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_count(crf_decode, targets, length, dataset.id2label_dict)
chunk_evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
precision, recall, f1 = chunk_evaluator.eval()
print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1))
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
from train import SeqTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
predict_path = args.predict_file
predict_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=predict_path, place=place, mode="predict")
predict_dataset = create_dataloader(
predict_generator, place, feed_list=feed_list)
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.mode = "test"
model.prepare(inputs=inputs)
model.load(args.init_from_checkpoint, skip_mismatch=True)
f = open(args.output_file, "wb")
for data in predict_dataset():
if len(data) == 1:
input_data = data[0]
else:
input_data = data
results, length = model.test(inputs=flatten(input_data))
for i in range(len(results)):
word_len = length[i]
word_ids = results[i][: word_len]
tags = [dataset.id2label_dict[str(id)] for id in word_ids]
f.write("\002".join(tags) + "\n")
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
main(args)
......@@ -21,7 +21,7 @@ from __future__ import print_function
import io
import numpy as np
import paddle.fluid as fluid
import paddle
class LacDataset(object):
......@@ -76,11 +76,11 @@ class LacDataset(object):
@property
def vocab_size(self):
return len(self.word2id_dict.values())
return max(self.word2id_dict.values()) + 1
@property
def num_labels(self):
return len(self.label2id_dict.values())
return max(self.label2id_dict.values()) + 1
def get_num_examples(self, filename):
"""num of line of file"""
......@@ -120,48 +120,121 @@ class LacDataset(object):
def wrapper():
fread = io.open(filename, "r", encoding="utf-8")
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
word_ids = word_ids[0:max_seq_len]
words_len = np.int64(len(word_ids))
word_ids += [0 for _ in range(max_seq_len - words_len)]
label_ids = label_ids[0:max_seq_len]
label_ids += [0 for _ in range(max_seq_len - words_len)]
assert len(word_ids) == len(label_ids)
yield word_ids, label_ids, words_len
if mode == "train":
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
word_ids = word_ids[0:max_seq_len]
words_len = np.int64(len(word_ids))
word_ids += [0 for _ in range(max_seq_len - words_len)]
label_ids = label_ids[0:max_seq_len]
label_ids += [0 for _ in range(max_seq_len - words_len)]
assert len(word_ids) == len(label_ids)
yield word_ids, label_ids, words_len
elif mode == "test":
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
yield word_ids, label_ids, words_len
else:
for line in fread:
words = line.strip("\n").split('\t')[0]
if words == u"text_a":
continue
if "\002" not in words:
word_ids = self.word_to_ids(words)
else:
word_ids = self.word_to_ids(words.split("\002"))
words_len = np.int64(len(word_ids))
yield word_ids, words_len
fread.close()
return wrapper
def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def wrapper():
batch_words, batch_labels, seq_lens = [], [], []
for epoch in xrange(args.epoch):
def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def padding_data(max_len, batch_data):
padding_batch_data = []
for data in batch_data:
data += [0 for _ in range(max_len - len(data))]
padding_batch_data.append(data)
return padding_batch_data
def wrapper():
if mode == "train":
batch_words, batch_labels, seq_lens = [], [], []
for epoch in xrange(args.epoch):
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
batch_labels.append(labels)
seq_lens.append(words_len)
if len(seq_lens) == args.batch_size:
yield batch_words, seq_lens, batch_labels, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
if len(seq_lens) > 0:
yield batch_words, seq_lens, batch_labels, batch_labels
elif mode == "test":
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) < args.batch_size:
batch_words.append(words)
batch_labels.append(labels)
seq_lens.append(words_len)
if len(seq_lens) == args.batch_size:
yield batch_words, batch_labels, seq_lens, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
batch_labels.append(labels)
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
if len(seq_lens) > 0:
yield batch_words, batch_labels, seq_lens, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
else:
batch_words, seq_lens, max_len = [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
batch_words, seq_lens, max_len = [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
return wrapper
......
word_dict_path: "./conf/word.dic"
label_dict_path: "./conf/tag.dic"
word_rep_dict_path: "./conf/q2b.dic"
device: "cpu"
dynamic: True
epoch: 10
base_learning_rate: 0.001
word_emb_dim: 128
grnn_hidden_dim: 128
bigru_num: 2
emb_learning_rate: 1.0
crf_learning_rate: 1.0
batch_size: 300
max_seq_len: 126
num_devices: 1
save_dir: "model"
init_from_checkpoint: "model_baseline/params"
init_from_pretrain_model: ""
save_freq: 1
eval_freq: 1
output_file: "predict.result"
test_file: "./data/test.tsv"
train_file: "./data/train.tsv"
predict_file: "./data/infer.tsv"
mode: "train"
......@@ -24,11 +24,17 @@ import sys
import math
import argparse
import numpy as np
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from metrics import Metric
from model import Model, Input, Loss, set_device
from text import SequenceTagging
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.metrics import Metric
from hapi.model import Model, Input, Loss, set_device
from hapi.text.text import SequenceTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
import paddle.fluid as fluid
......@@ -47,6 +53,7 @@ class SeqTagging(Model):
for infer: return the prediction
otherwise: return the prediction
"""
self.mode_type = args.mode
self.word_emb_dim = args.word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
......@@ -72,12 +79,18 @@ class SeqTagging(Model):
init_bound=self.init_bound,
length=self.length)
def forward(self, word, target, lengths):
def forward(self, *inputs):
"""
Configure the network
"""
crf_decode, avg_cost, lengths = self.sequence_tagging(word, target, lengths)
return crf_decode, avg_cost, lengths
word = inputs[0]
lengths = inputs[1]
if self.mode_type == "train" or self.mode_type == "test":
target = inputs[2]
outputs = self.sequence_tagging(word, lengths, target)
else:
outputs = self.sequence_tagging(word, lengths)
return outputs
class Chunk_eval(fluid.dygraph.Layer):
......@@ -103,10 +116,9 @@ class Chunk_eval(fluid.dygraph.Layer):
dtype="int64")
num_correct_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
this_input = {"Inference": input, "Label": label[0]}
if seq_length:
this_input["SeqLength"] = seq_length[0]
this_input = {"Inference": input, "Label": label}
if seq_length is not None:
this_input["SeqLength"] = seq_length
self._helper.append_op(
type='chunk_eval',
inputs=this_input,
......@@ -144,9 +156,10 @@ class ChunkEval(Metric):
int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset()
def add_metric_op(self, pred, label, *args, **kwargs):
crf_decode = pred[0]
lengths = pred[2]
def add_metric_op(self, *args):
crf_decode = args[0]
lengths = args[2]
label = args[3]
(num_infer_chunks, num_label_chunks,
num_correct_chunks) = self.chunk_eval(
input=crf_decode, label=label, seq_length=lengths)
......@@ -194,18 +207,16 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [
Input(
[None, args.max_seq_len], 'int64', name='words'), Input(
[None, args.max_seq_len], 'int64', name='target'), Input(
[None], 'int64', name='length')
]
labels = [Input([None, args.max_seq_len], 'int64', name='labels')]
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length'),
Input([None, None], 'int64', name='target')]
labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [x.forward() for x in inputs + labels]
dataset = LacDataset(args)
train_path = os.path.join(args.data, "train.tsv")
test_path = os.path.join(args.data, "test.tsv")
train_path = args.train_file
test_path = args.test_file
train_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=train_path, place=place, mode="train")
......@@ -233,8 +244,11 @@ def main(args):
labels=labels,
device=args.device)
if args.resume is not None:
model.load(args.resume)
if args.init_from_checkpoint:
model.load(args.init_from_checkpoint)
if args.init_from_pretrain_model:
model.load(args.init_from_pretrain_model, reset_optimizer=True)
model.fit(train_dataset,
test_dataset,
......@@ -246,78 +260,12 @@ def main(args):
if __name__ == '__main__':
parser = argparse.ArgumentParser("LAC training")
parser.add_argument(
"-dir", "--data", default=None, type=str, help='path to LAC dataset')
parser.add_argument(
"-wd",
"--word_dict_path",
default=None,
type=str,
help='word dict path')
parser.add_argument(
"-ld",
"--label_dict_path",
default=None,
type=str,
help='label dict path')
parser.add_argument(
"-wrd",
"--word_rep_dict_path",
default=None,
type=str,
help='The path of the word replacement Dictionary.')
parser.add_argument(
"-dev",
"--device",
type=str,
default='gpu',
help="device to use, gpu or cpu")
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=10, type=int, help="number of epoch")
parser.add_argument(
'-lr',
'--base_learning_rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"--word_emb_dim",
default=128,
type=int,
help='word embedding dimension')
parser.add_argument(
"--grnn_hidden_dim", default=128, type=int, help="hidden dimension")
parser.add_argument(
"--bigru_num", default=2, type=int, help='the number of bi-rnn')
parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float)
parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float)
parser.add_argument(
"-b", "--batch_size", default=300, type=int, help="batch size")
parser.add_argument(
"--max_seq_len", default=126, type=int, help="max sequence length")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-r",
"--resume",
default=None,
type=str,
help="checkpoint path to resume")
parser.add_argument(
"-o",
"--save_dir",
default="./model",
type=str,
help="save model path")
parser.add_argument(
"-sf", "--save_freq", default=1, type=int, help="save frequency")
parser.add_argument(
"-ef", "--eval_freq", default=1, type=int, help="eval frequency")
args = parser.parse_args()
print(args)
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
main(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import paddle.fluid as fluid
__all__ = ['check_gpu', 'check_version']
def check_gpu(use_gpu):
"""
Log error and exit when set use_gpu=true in paddlepaddle
cpu version.
"""
err = "Config use_gpu cannot be set as true while you are " \
"using paddlepaddle cpu version ! \nPlease try: \n" \
"\t1. Install paddlepaddle-gpu to run model on GPU \n" \
"\t2. Set use_gpu as false in config file to run " \
"model on CPU"
try:
if use_gpu and not fluid.is_compiled_with_cuda():
print(err)
sys.exit(1)
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.7.0')
except Exception as e:
print(err)
sys.exit(1)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5,
"Learning rate used to train with warmup.")
train_g.add_arg(
"lr_scheduler",
str,
"linear_warmup_decay",
"scheduler of learning rate.",
choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01,
"Weight decay rate for L2 regularizer.")
train_g.add_arg(
"warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for."
)
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("use_fp16", bool, False,
"Whether to use fp16 mixed precision training.")
train_g.add_arg(
"loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
)
train_g.add_arg("pred_dir", str, None,
"Path to save the prediction results")
log_g = ArgumentGroup(parser, "logging", "logging related.")
log_g.add_arg("skip_steps", int, 10,
"The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True,
"If set, use GPU for training.")
run_type_g.add_arg(
"use_fast_executor", bool, False,
"If set, use fast parallel executor (in experiment).")
run_type_g.add_arg(
"num_iteration_per_drop_scope", int, 1,
"Ihe iteration intervals to clean up temporary variables.")
run_type_g.add_arg("do_train", bool, True,
"Whether to perform training.")
run_type_g.add_arg("do_predict", bool, True,
"Whether to perform prediction.")
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class PDConfig(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.default_g.add_arg("do_train", bool, False,
"Whether to perform training.")
self.default_g.add_arg("do_predict", bool, False,
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg("do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if isinstance(self.json_config[name], list):
self.json_g.add_arg(
name,
type(self.json_config[name][0]),
self.json_config[name],
"This is from %s" % file_path,
nargs=len(self.json_config[name]))
continue
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if isinstance(self.yaml_config[name], list):
self.yaml_g.add_arg(
name,
type(self.yaml_config[name][0]),
self.yaml_config[name],
"This is from %s" % file_path,
nargs=len(self.yaml_config[name]))
continue
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
pd_config += ("my_age", int, 18, "I am forever 18.")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
print(pd_config.my_age)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import paddle.fluid as fluid
__all__ = ['chunk_count', "build_chunk"]
def build_chunk(data_list, id2label_dict):
"""
Assembly entity
"""
tag_list = [id2label_dict.get(str(id)) for id in data_list]
ner_dict = {}
ner_str = ""
ner_start = 0
for i in range(len(tag_list)):
tag = tag_list[i]
if tag == u"O":
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag
elif tag.endswith(u"B"):
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
elif tag.endswith(u"I"):
if tag.split('-')[0] != ner_str:
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
return ner_dict
def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
"""
calculate num_correct_chunks num_error_chunks total_num for metrics
"""
num_infer_chunks, num_label_chunks, num_correct_chunks = 0, 0, 0
assert infer_numpy.shape[0] == label_numpy.shape[0]
for i in range(infer_numpy.shape[0]):
infer_list = infer_numpy[i][: seq_len[i]]
label_list = label_numpy[i][: seq_len[i]]
infer_dict = build_chunk(infer_list, id2label_dict)
num_infer_chunks += len(infer_dict)
label_dict = build_chunk(label_list, id2label_dict)
num_label_chunks += len(label_dict)
for key in infer_dict:
if key in label_dict and label_dict[key] == infer_dict[key]:
num_correct_chunks += 1
return num_infer_chunks, num_label_chunks, num_correct_chunks
......@@ -20,9 +20,9 @@ import argparse
import numpy as np
from hapi.model import Input, set_device
from hapi.vision.models import tsm_resnet50
from check import check_gpu, check_version
from modeling import tsm_resnet50
from kinetics_dataset import KineticsDataset
from transforms import *
......
......@@ -24,8 +24,8 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
from hapi.model import Model, CrossEntropy, Input, set_device
from hapi.metrics import Accuracy
from hapi.vision.models import tsm_resnet50
from modeling import tsm_resnet50
from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset
from transforms import *
......
......@@ -196,7 +196,7 @@ def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True):
weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path[:-9])
model.load(weight_path)
return model
......
......@@ -99,18 +99,12 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层
| ...
```
```bash
sh pretrain_weights/download.sh
```
### 模型训练
数据准备完成后,可使用`main.py`脚本启动训练和评估,如下脚本会自动每epoch交替进行训练和模型评估,并将checkpoint默认保存在`yolo_checkpoint`目录下。
YOLOv3模型训练总batch_size为64训练,以下以使用4卡Tesla P40每卡batch_size为16训练介绍训练方式。对于静态图和动态图,多卡训练中`--batch_size`为每卡上的batch_size,即总batch_size为`--batch_size`乘以卡数。
YOLOv3模型训练须加载骨干网络[DarkNet53]()的预训练权重,可在训练时通过`--pretrain_weights`指定,若指定为URL,将自动下载权重至`~/.cache/paddle/weights`目录并加载。
`main.py`脚本参数可通过如下命令查询
```bash
......@@ -122,7 +116,7 @@ python main.py --help
使用如下方式进行多卡训练:
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16 --pretrain_weights=https://paddlemodels.bj.bcebos.com/hapi/darknet53_pretrained.pdparams
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16
```
#### 动态图训练
......@@ -132,7 +126,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=
使用如下方式进行多卡训练:
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d --pretrain_weights=https://paddlemodels.bj.bcebos.com/hapi/darknet53_pretrained.pdparams
CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d
```
......
......@@ -25,8 +25,9 @@ from paddle.fluid.optimizer import Momentum
from paddle.io import DataLoader
from hapi.model import Model, Input, set_device
from hapi.vision.models import yolov3_darknet53, YoloLoss
from hapi.vision.transforms import *
from modeling import yolov3_darknet53, YoloLoss
from transforms import *
from visualizer import draw_bbox
......
......@@ -27,12 +27,12 @@ from paddle.io import DataLoader
from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.download import is_url, get_weights_path
from hapi.datasets import COCODataset
from hapi.vision.transforms import *
from hapi.vision.models import yolov3_darknet53, YoloLoss
from hapi.vision.transforms import Compose, BatchCompose
from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset
from coco_metric import COCOMetric
from transforms import *
NUM_MAX_BOXES = 50
......@@ -126,10 +126,7 @@ def main():
pretrained=pretrained)
if FLAGS.pretrain_weights and not FLAGS.eval_only:
pretrain_weights = FLAGS.pretrain_weights
if is_url(pretrain_weights):
pretrain_weights = get_weights_path(pretrain_weights)
model.load(pretrain_weights, skip_mismatch=True, reset_optimizer=True)
model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())
......@@ -168,7 +165,7 @@ def main():
save_dir="yolo_checkpoint/mixup",
save_freq=10)
# do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches
# do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
dataset.mixup = False
model.fit(train_data=loader,
epochs=FLAGS.no_mixup_epoch,
......@@ -200,8 +197,7 @@ if __name__ == '__main__':
parser.add_argument(
"-j", "--num_workers", default=4, type=int, help="reader worker number")
parser.add_argument(
"-p", "--pretrain_weights",
default="./pretrain_weights/darknet53_pretrained", type=str,
"-p", "--pretrain_weights", default=None, type=str,
help="path to pretrained weights")
parser.add_argument(
"-r", "--resume", default=None, type=str,
......
......@@ -16,13 +16,13 @@ from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from hapi.model import Model, Loss
from hapi.download import get_weights_path
from .darknet import darknet53, ConvBNLayer
from hapi.vision.models import darknet53
__all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53']
......@@ -33,6 +33,46 @@ pretrain_infos = {
}
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=1,
groups=1,
padding=0,
act="leaky"):
super(ConvBNLayer, self).__init__()
self.conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=False,
act=None)
self.batch_norm = BatchNorm(
num_channels=ch_out,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02),
regularizer=L2Decay(0.)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.)))
self.act = act
def forward(self, inputs):
out = self.conv(inputs)
out = self.batch_norm(out)
if self.act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__()
......@@ -118,7 +158,7 @@ class YOLOv3(Model):
self.nms_posk = 100
self.draw_thresh = 0.5
self.backbone = darknet53(pretrained=False)
self.backbone = darknet53(pretrained=(model_mode=='train'))
self.block_outputs = []
self.yolo_blocks = []
self.route_blocks = []
......@@ -254,7 +294,7 @@ def _yolov3_darknet(num_layers=53, num_classes=80,
weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path[:-9])
model.load(weight_path)
return model
......
......@@ -12,7 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import folder
from . import mnist
from . import flowers
from .folder import *
from .mnist import *
from .flowers import *
from .coco import *
__all__ = folder.__all__ \
+ mnist.__all__ \
+ flowers.__all__
......@@ -193,17 +193,17 @@ class StaticGraphAdapter(object):
def mode(self, value):
self.model.mode = value
def train(self, inputs, labels=None):
def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
self.mode = 'train'
return self._run(inputs, labels)
def eval(self, inputs, labels=None):
def eval_batch(self, inputs, labels=None):
self.mode = 'eval'
return self._run(inputs, labels)
def test(self, inputs):
def test_batch(self, inputs):
self.mode = 'test'
return self._run(inputs, None)
......@@ -567,7 +567,7 @@ class DynamicGraphAdapter(object):
self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels=None):
def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first"
super(Model, self.model).train()
......@@ -600,7 +600,7 @@ class DynamicGraphAdapter(object):
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def eval(self, inputs, labels=None):
def eval_batch(self, inputs, labels=None):
super(Model, self.model).eval()
self.mode = 'eval'
inputs = to_list(inputs)
......@@ -642,7 +642,7 @@ class DynamicGraphAdapter(object):
return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses]
def test(self, inputs):
def test_batch(self, inputs):
super(Model, self.model).eval()
self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)]
......@@ -741,14 +741,14 @@ class Model(fluid.dygraph.Layer):
else:
self._adapter = StaticGraphAdapter(self)
def train(self, *args, **kwargs):
return self._adapter.train(*args, **kwargs)
def train_batch(self, *args, **kwargs):
return self._adapter.train_batch(*args, **kwargs)
def eval(self, *args, **kwargs):
return self._adapter.eval(*args, **kwargs)
def eval_batch(self, *args, **kwargs):
return self._adapter.eval_batch(*args, **kwargs)
def test(self, *args, **kwargs):
return self._adapter.test(*args, **kwargs)
def test_batch(self, *args, **kwargs):
return self._adapter.test_batch(*args, **kwargs)
def save(self, *args, **kwargs):
if ParallelEnv().local_rank == 0:
......@@ -816,7 +816,7 @@ class Model(fluid.dygraph.Layer):
except ValueError as err:
if skip_mismatch:
warnings.warn(
("Skip loading for {}. ".format(key) + err.message))
("Skip loading for {}. ".format(key) + str(err)))
# reset optimizer when mismatch happens
reset_optimizer = True
else:
......@@ -1185,7 +1185,7 @@ class Model(fluid.dygraph.Layer):
outputs = []
for data in tqdm.tqdm(loader):
data = flatten(data)
outputs.append(self.test(data[:len(self._inputs)]))
outputs.append(self.test_batch(data[:len(self._inputs)]))
# NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info
......@@ -1199,18 +1199,6 @@ class Model(fluid.dygraph.Layer):
outputs = [o[:len(test_loader.dataset)] for o in outputs]
return outputs
def set_eval_data(self, eval_data):
"""
Args:
eval_data (Dataset|DataLoader|None): An iterable data loader is used for
eval. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended.
"""
assert isinstance(
eval_data,
DataLoader), "eval_data must be a instance of Dataloader!"
self._test_dataloader = eval_data
def _run_one_epoch(self,
data_loader,
callbacks,
......@@ -1247,11 +1235,11 @@ class Model(fluid.dygraph.Layer):
callbacks.on_batch_begin(mode, step, logs)
if mode == 'train':
outs = self.train(data[:len(self._inputs)],
data[len(self._inputs):])
outs = self.train_batch(data[:len(self._inputs)],
data[len(self._inputs):])
else:
outs = self.eval(data[:len(self._inputs)],
data[len(self._inputs):])
outs = self.eval_batch(data[:len(self._inputs)],
data[len(self._inputs):])
# losses
loss = outs[0] if self._metrics else outs
......
......@@ -25,7 +25,8 @@ from hapi.text.text import TransformerDecoderLayer as TransformerDecoderLayer
from hapi.text.text import TransformerEncoder as TransformerEncoder
from hapi.text.text import TransformerDecoder as TransformerDecoder
from hapi.text.text import TransformerBeamSearchDecoder as TransformerBeamSearchDecoder
from hapi.text.text import DynamicGRU as DynamicGRU
from hapi.text.text import GRUCell as GRUCell
from hapi.text.text import GRUEncoderCell as GRUEncoderCell
from hapi.text.text import BiGRU as BiGRU
from hapi.text.text import Linear_chain_crf as Linear_chain_crf
from hapi.text.text import Crf_decoding as Crf_decoding
......
......@@ -30,6 +30,7 @@ from hapi.distributed import DistributedBatchSampler
from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
from hapi.text.bert.batching import prepare_batch_data
import hapi.text.tokenizer.tokenization as tokenization
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
__all__ = [
'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset',
......@@ -227,6 +228,9 @@ class SingleSentenceDataset(Dataset):
if line_processor is None:
line_processor = default_line_processor
if ParallelEnv().nranks > 1:
leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank)
if not os.path.exists(leveldb_file):
print("putting data %s into leveldb %s" %
(input_file, leveldb_file))
......@@ -384,7 +388,12 @@ class BertDataLoader(object):
quotechar=None,
device=fluid.CPUPlace(),
num_workers=0,
return_list=True):
return_list=True,
phase="train"):
assert phase in [
"train", "predict", "test"
], "phase of BertDataLoader should be in [train, predict, test], but get %s" % phase
self.dataset = SingleSentenceDataset(tokenizer, label_list,
max_seq_length, mode)
......@@ -394,15 +403,21 @@ class BertDataLoader(object):
input_file, label_list, max_seq_length, tokenizer,
line_processor, delimiter, quotechar)
elif mode == "leveldb":
#prepare_leveldb(self, input_file, leveldb_file, label_list, max_seq_length, tokenizer, line_processor=None, delimiter="\t", quotechar=None):
self.dataset.prepare_leveldb(input_file, leveldb_file, label_list,
max_seq_length, tokenizer,
line_processor, delimiter, quotechar)
else:
raise ValueError("mode should be in [all_in_memory, leveldb]")
self.sampler = DistributedBatchSampler(
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
if phase == "train":
self.sampler = DistributedBatchSampler(
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
elif phase == "test" or phase == "predict":
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader(
dataset=self.dataset,
......
......@@ -31,8 +31,6 @@ import multiprocessing
import collections
import copy
import six
import sys
from functools import partial, reduce
import paddle
......@@ -50,8 +48,8 @@ __all__ = [
'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode',
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'DynamicGRU',
'BiGRU', 'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging'
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf',
'Crf_decoding', 'SequenceTagging'
]
......@@ -246,194 +244,225 @@ class BasicLSTMCell(RNNCell):
self._dtype = dtype
self._input_size = input_size
assert isinstance(forget_gate_weights, dict)
assert isinstance(input_gate_weights, dict)
assert isinstance(output_gate_weights, dict)
assert isinstance(cell_weights, dict)
self.use_customized_weight = False
for _weights in [
forget_gate_weights, input_gate_weights, output_gate_weights,
cell_weights
]:
for _key in _weights:
if _weights[_key] is not None:
self.use_customized_weight = True
break
if self.use_customized_weight:
break
# forgot get parameters
if "w" in forget_gate_weights and forget_gate_weights["w"] is not None:
self.fg_w = forget_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_forget_gate_w"
else:
tmp_param_attr = self._param_attr
self.fg_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if not self.use_customized_weight:
if "h" in forget_gate_weights and forget_gate_weights["h"] is not None:
self.fg_h = forget_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_forget_gate_h"
else:
tmp_param_attr = self._param_attr
self.fg_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
self._weight = self.create_parameter(
attr=self._param_attr,
shape=[
self._input_size + self._hidden_size, 4 * self._hidden_size
],
dtype=self._dtype)
if "b" in forget_gate_weights and forget_gate_weights["b"] is not None:
self.fg_b = forget_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_forget_gate_b"
else:
tmp_param_attr = self._bias_attr
self.fg_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
self._bias = self.create_parameter(
attr=self._bias_attr,
shape=[4 * self._hidden_size],
dtype=self._dtype,
is_bias=True)
# input gate parameters
if "w" in input_gate_weights and input_gate_weights["w"] is not None:
self.ig_w = input_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_input_gate_w"
if "w" in forget_gate_weights and forget_gate_weights[
"w"] is not None:
self.fg_w = forget_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_forget_gate_w"
else:
tmp_param_attr = self._param_attr
self.fg_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in forget_gate_weights and forget_gate_weights[
"h"] is not None:
self.fg_h = forget_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_forget_gate_h"
else:
tmp_param_attr = self._param_attr
self.fg_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in forget_gate_weights and forget_gate_weights[
"b"] is not None:
self.fg_b = forget_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_forget_gate_b"
else:
tmp_param_attr = self._bias_attr
self.fg_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if "w" in input_gate_weights and input_gate_weights[
"w"] is not None:
self.ig_w = input_gate_weights["w"]
else:
tmp_param_attr = self._param_attr
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_input_gate_w"
else:
tmp_param_attr = self._param_attr
self.ig_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
self.ig_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in input_gate_weights and input_gate_weights["h"] is not None:
self.ig_h = input_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_input_gate_h"
if "h" in input_gate_weights and input_gate_weights[
"h"] is not None:
self.ig_h = input_gate_weights["h"]
else:
tmp_param_attr = self._param_attr
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_input_gate_h"
else:
tmp_param_attr = self._param_attr
self.ig_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
self.ig_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in input_gate_weights and input_gate_weights["b"] is not None:
self.ig_b = input_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_input_gate_b"
if "b" in input_gate_weights and input_gate_weights[
"b"] is not None:
self.ig_b = input_gate_weights["b"]
else:
tmp_param_attr = self._bias_attr
self.ig_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# output gate parameters
if "w" in output_gate_weights and output_gate_weights["w"] is not None:
self.og_w = output_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_output_gate_w"
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_input_gate_b"
else:
tmp_param_attr = self._bias_attr
self.ig_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if "w" in output_gate_weights and output_gate_weights[
"w"] is not None:
self.og_w = output_gate_weights["w"]
else:
tmp_param_attr = self._param_attr
self.og_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in output_gate_weights and output_gate_weights["h"] is not None:
self.og_h = output_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_output_gate_h"
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_output_gate_w"
else:
tmp_param_attr = self._param_attr
self.og_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in output_gate_weights and output_gate_weights[
"h"] is not None:
self.og_h = output_gate_weights["h"]
else:
tmp_param_attr = self._param_attr
self.og_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_output_gate_h"
else:
tmp_param_attr = self._param_attr
if "b" in output_gate_weights and output_gate_weights["b"] is not None:
self.og_b = output_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_output_gate_b"
else:
tmp_param_attr = self._bias_attr
self.og_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
self.og_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
# cell parameters
if "w" in cell_weights and cell_weights["w"] is not None:
self.c_w = cell_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_cell_w"
if "b" in output_gate_weights and output_gate_weights[
"b"] is not None:
self.og_b = output_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_output_gate_b"
else:
tmp_param_attr = self._bias_attr
self.og_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if "w" in cell_weights and cell_weights["w"] is not None:
self.c_w = cell_weights["w"]
else:
tmp_param_attr = self._param_attr
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_cell_w"
else:
tmp_param_attr = self._param_attr
self.c_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
self.c_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in cell_weights and cell_weights["h"] is not None:
self.c_h = cell_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_cell_h"
if "h" in cell_weights and cell_weights["h"] is not None:
self.c_h = cell_weights["h"]
else:
tmp_param_attr = self._param_attr
self.c_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in cell_weights and cell_weights["b"] is not None:
self.c_b = cell_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_cell_b"
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_cell_h"
else:
tmp_param_attr = self._param_attr
self.c_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in cell_weights and cell_weights["b"] is not None:
self.c_b = cell_weights["b"]
else:
tmp_param_attr = self._bias_attr
self.c_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_cell_b"
else:
tmp_param_attr = self._bias_attr
self.c_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# the weight is concated here in order to make the computation more efficent.
weight_w = fluid.layers.concat(
[self.ig_w, self.c_w, self.fg_w, self.og_w], axis=-1)
weight_h = fluid.layers.concat(
[self.ig_h, self.c_h, self.fg_h, self.og_h], axis=-1)
self._weight = fluid.layers.concat([weight_w, weight_h], axis=0)
def forward(self, input, state):
self._bias = fluid.layers.concat(
[self.ig_b, self.c_b, self.fg_b, self.og_b])
if self.use_customized_weight:
weight_w = fluid.layers.concat(
[self.ig_w, self.c_w, self.fg_w, self.og_w], axis=-1)
weight_h = fluid.layers.concat(
[self.ig_h, self.c_h, self.fg_h, self.og_h], axis=-1)
_weight = fluid.layers.concat([weight_w, weight_h], axis=0)
_bias = fluid.layers.concat(
[self.ig_b, self.c_b, self.fg_b, self.og_b])
else:
_weight = self._weight
_bias = self._bias
def forward(self, input, state):
pre_hidden, pre_cell = state
concat_input_hidden = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
gate_input = layers.matmul(x=concat_input_hidden, y=_weight)
gate_input = layers.elementwise_add(gate_input, self._bias)
gate_input = layers.elementwise_add(gate_input, _bias)
i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
new_cell = layers.elementwise_add(
layers.elementwise_mul(
......@@ -500,10 +529,9 @@ class BasicGRUCell(RNNCell):
cell_weights={"w": None,
"h": None,
"b": None}):
super(BasicGRUCell, self).__init__()
self._input_size = input_size
self._hiden_size = hidden_size
self._hidden_size = hidden_size
self._param_attr = param_attr
self._bias_attr = bias_attr
self._gate_activation = gate_activation or layers.sigmoid
......@@ -514,6 +542,16 @@ class BasicGRUCell(RNNCell):
assert isinstance(reset_gate_weights, dict)
assert isinstance(cell_weights, dict)
self.use_customized_weight = False
for _weights in [
update_gate_weights, reset_gate_weights, cell_weights
]:
for _key in _weights:
if _weights[_key] is not None:
self.use_customized_weight = True
if self.use_customized_weight:
break
if self._param_attr is not None and self._param_attr.name is not None:
gate_param_attr = copy.deepcopy(self._param_attr)
candidate_param_attr = copy.deepcopy(self._param_attr)
......@@ -523,156 +561,194 @@ class BasicGRUCell(RNNCell):
gate_param_attr = self._param_attr
candidate_param_attr = self._param_attr
if self._bias_attr is not None and self._bias_attr.name is not None:
gate_bias_attr = copy.deepcopy(self._bias_attr)
candidate_bias_attr = copy.deepcopy(self._bias_attr)
gate_bias_attr.name += "_gate"
candidate_bias_attr.name += "_candidate"
else:
gate_bias_attr = self._bias_attr
candidate_bias_attr = self._bias_attr
# create the parameters of gates in gru
if "w" in update_gate_weights and update_gate_weights["w"] is not None:
self.ug_w = update_gate_weights["w"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_update_gate_w"
else:
tmp_param_attr = gate_param_attr
self.ug_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
if not self.use_customized_weight:
self._gate_weight = self.create_parameter(
attr=gate_param_attr,
shape=[
self._input_size + self._hidden_size, 2 * self._hidden_size
],
dtype=self._dtype)
if "h" in update_gate_weights and update_gate_weights["h"] is not None:
self.ug_h = update_gate_weights["h"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_update_gate_h"
else:
tmp_param_attr = gate_param_attr
self.ug_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
self._candidate_weight = self.create_parameter(
attr=candidate_param_attr,
shape=[
self._input_size + self._hidden_size, self._hidden_size
],
dtype=self._dtype)
if "b" in update_gate_weights and update_gate_weights["b"] is not None:
self.ug_b = update_gate_weights["b"]
else:
if gate_bias_attr is not None and gate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_bias_attr)
tmp_param_attr.name += "_update_gate_b"
if self._bias_attr is not None and self._bias_attr.name is not None:
gate_bias_attr = copy.deepcopy(self._bias_attr)
candidate_bias_attr = copy.deepcopy(self._bias_attr)
gate_bias_attr.name += "_gate"
candidate_bias_attr.name += "_candidate"
else:
tmp_param_attr = gate_bias_attr
self.ug_b = self.create_parameter(
attr=tmp_param_attr,
gate_bias_attr = self._bias_attr
candidate_bias_attr = self._bias_attr
self._gate_bias = self.create_parameter(
attr=gate_bias_attr,
shape=[2 * self._hidden_size],
dtype=self._dtype,
is_bias=True)
self._candidate_bias = self.create_parameter(
attr=candidate_bias_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# reset gate parameters
if "w" in reset_gate_weights and reset_gate_weights["w"] is not None:
self.rg_w = reset_gate_weights["w"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_reset_gate_w"
else:
tmp_param_attr = gate_param_attr
self.rg_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in reset_gate_weights and reset_gate_weights["h"] is not None:
self.rg_h = reset_gate_weights["h"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_reset_gate_h"
# create the parameters of gates in gru
if "w" in update_gate_weights and update_gate_weights[
"w"] is not None:
self.ug_w = update_gate_weights["w"]
else:
tmp_param_attr = gate_param_attr
self.rg_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in reset_gate_weights and reset_gate_weights["b"] is not None:
self.rg_b = reused_params["b"]
else:
if gate_bias_attr is not None and gate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_bias_attr)
tmp_param_attr.name += "_reset_gate_b"
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_update_gate_w"
else:
tmp_param_attr = gate_param_attr
self.ug_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in update_gate_weights and update_gate_weights[
"h"] is not None:
self.ug_h = update_gate_weights["h"]
else:
tmp_param_attr = gate_bias_attr
self.rg_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# cell parameters
if "w" in cell_weights and cell_weights["w"] is not None:
self.c_w = cell_weights["w"]
else:
if candidate_param_attr is not None and candidate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_param_attr)
tmp_param_attr.name += "_cell_w"
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_update_gate_h"
else:
tmp_param_attr = gate_param_attr
self.ug_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in update_gate_weights and update_gate_weights[
"b"] is not None:
self.ug_b = update_gate_weights["b"]
else:
tmp_param_attr = gate_param_attr
self.c_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in cell_weights and cell_weights["h"] is not None:
self.c_h = cell_weights["h"]
else:
if candidate_param_attr is not None and candidate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_param_attr)
tmp_param_attr.name += "_cell_h"
if gate_bias_attr is not None and gate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_bias_attr)
tmp_param_attr.name += "_update_gate_b"
else:
tmp_param_attr = gate_bias_attr
self.ug_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# reset gate parameters
if "w" in reset_gate_weights and reset_gate_weights[
"w"] is not None:
self.rg_w = reset_gate_weights["w"]
else:
tmp_param_attr = gate_param_attr
self.c_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in cell_weights and cell_weights["b"] is not None:
self.c_b = cell_weights["b"]
else:
if candidate_bias_attr is not None and candidate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_bias_attr)
tmp_param_attr.name += "_cell_b"
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_reset_gate_w"
else:
tmp_param_attr = gate_param_attr
self.rg_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in reset_gate_weights and reset_gate_weights[
"h"] is not None:
self.rg_h = reset_gate_weights["h"]
else:
tmp_param_attr = gate_bias_attr
self.c_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_reset_gate_h"
else:
tmp_param_attr = gate_param_attr
self.rg_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in reset_gate_weights and reset_gate_weights[
"b"] is not None:
self.rg_b = reused_params["b"]
else:
if gate_bias_attr is not None and gate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_bias_attr)
tmp_param_attr.name += "_reset_gate_b"
else:
tmp_param_attr = gate_bias_attr
self.rg_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# cell parameters
if "w" in cell_weights and cell_weights["w"] is not None:
self.c_w = cell_weights["w"]
else:
if candidate_param_attr is not None and candidate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_param_attr)
tmp_param_attr.name += "_cell_w"
else:
tmp_param_attr = gate_param_attr
rg_weights = layers.concat([self.rg_w, self.rg_h], axis=0)
ug_weights = layers.concat([self.ug_w, self.ug_h], axis=0)
self._gate_weight = layers.concat([rg_weights, ug_weights], axis=-1)
self.c_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
self._candidate_weight = layers.concat([self.c_w, self.c_h], axis=0)
if "h" in cell_weights and cell_weights["h"] is not None:
self.c_h = cell_weights["h"]
else:
if candidate_param_attr is not None and candidate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_param_attr)
tmp_param_attr.name += "_cell_h"
else:
tmp_param_attr = gate_param_attr
self.c_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in cell_weights and cell_weights["b"] is not None:
self.c_b = cell_weights["b"]
else:
if candidate_bias_attr is not None and candidate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_bias_attr)
tmp_param_attr.name += "_cell_b"
else:
tmp_param_attr = gate_bias_attr
self.c_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
self._gate_bias = layers.concat([self.rg_b, self.ug_b], axis=0)
def forward(self, input, state):
self._candidate_bias = self.c_b
if self.use_customized_weight:
rg_weights = layers.concat([self.rg_w, self.rg_h], axis=0)
ug_weights = layers.concat([self.ug_w, self.ug_h], axis=0)
_gate_weight = layers.concat([rg_weights, ug_weights], axis=-1)
_candidate_weight = layers.concat([self.c_w, self.c_h], axis=0)
_gate_bias = layers.concat([self.rg_b, self.ug_b], axis=0)
_candidate_bias = self.c_b
else:
_gate_weight = self._gate_weight
_gate_bias = self._gate_bias
_candidate_weight = self._candidate_weight
_candidate_bias = self._candidate_bias
def forward(self, input, state):
pre_hidden = state
concat_input_hidden = layers.concat([input, pre_hidden], axis=1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight)
gate_input = layers.matmul(x=concat_input_hidden, y=_gate_weight)
gate_input = layers.elementwise_add(gate_input, self._gate_bias)
gate_input = layers.elementwise_add(gate_input, _gate_bias)
gate_input = self._gate_activation(gate_input)
r, u = layers.split(gate_input, num_or_sections=2, dim=1)
......@@ -680,8 +756,8 @@ class BasicGRUCell(RNNCell):
r_hidden = r * pre_hidden
candidate = layers.matmul(
layers.concat([input, r_hidden], 1), self._candidate_weight)
candidate = layers.elementwise_add(candidate, self._candidate_bias)
layers.concat([input, r_hidden], 1), _candidate_weight)
candidate = layers.elementwise_add(candidate, _candidate_bias)
c = self._activation(candidate)
new_hidden = u * pre_hidden + (1 - u) * c
......@@ -870,7 +946,6 @@ class DynamicDecode(Layer):
# To confirm states.finished/finished be consistent with
# next_finished.
layers.assign(next_finished, finished)
next_sequence_lengths = layers.elementwise_add(
sequence_lengths,
layers.cast(
......@@ -1479,98 +1554,98 @@ class TransformerDecoder(Layer):
]
class DynamicGRU(fluid.dygraph.Layer):
#TODO: we should merge GRUCell with BasicGRUCell
class GRUCell(RNNCell):
def __init__(self,
size,
h_0=None,
input_size,
hidden_size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
origin_mode=False):
super(GRUCell, self).__init__()
self.hidden_size = hidden_size
self.fc_layer = Linear(
input_size, hidden_size * 3, param_attr=param_attr)
self.gru_unit = GRUUnit(
size * 3,
hidden_size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs, states):
# for GRUCell, `step_outputs` and `new_states` both are hidden
x = self.fc_layer(inputs)
hidden, _, _ = self.gru_unit(x, states)
return hidden, hidden
def forward(self, inputs):
hidden = self.h_0
res = []
@property
def state_shape(self):
return [self.hidden_size]
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
#TODO: we should merge GRUCell with BasicGRUCell
class GRUEncoderCell(RNNCell):
def __init__(self,
num_layers,
input_size,
hidden_size,
dropout_prob=0.,
init_scale=0.1):
super(GRUEncoderCell, self).__init__()
self.dropout_prob = dropout_prob
# use add_sublayer to add multi-layers
self.gru_cells = []
for i in range(num_layers):
self.gru_cells.append(
self.add_sublayer(
"gru_%d" % i,
#BasicGRUCell(
GRUCell(
input_size=input_size if i == 0 else hidden_size,
hidden_size=hidden_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)))))
def forward(self, step_input, states):
new_states = []
for i, gru_cell in enumerate(self.gru_cells):
out, state = gru_cell(step_input, states[i])
step_input = layers.dropout(
out,
self.dropout_prob,
dropout_implementation='upscale_in_train'
) if self.dropout_prob > 0 else out
new_states.append(step_input)
return step_input, new_states
@property
def state_shape(self):
return [cell.state_shape for cell in self.gru_cells]
class BiGRU(fluid.dygraph.Layer):
def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
super(BiGRU, self).__init__()
self.gru = RNN(GRUEncoderCell(1, input_dim, grnn_hidden_dim, 0.0,
init_bound),
is_reverse=False,
time_major=False)
self.pre_gru = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru = DynamicGRU(
size=grnn_hidden_dim,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.pre_gru_r = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_r = DynamicGRU(
size=grnn_hidden_dim,
is_reverse=True,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_r = RNN(GRUEncoderCell(1, input_dim, grnn_hidden_dim, 0.0,
init_bound),
is_reverse=True,
time_major=False)
def forward(self, input_feature):
res_pre_gru = self.pre_gru(input_feature)
res_gru = self.gru(res_pre_gru)
res_pre_gru_r = self.pre_gru_r(input_feature)
res_gru_r = self.gru_r(res_pre_gru_r)
bi_merge = fluid.layers.concat(input=[res_gru, res_gru_r], axis=-1)
pre_gru, pre_state = self.gru(input_feature)
gru_r, r_state = self.gru_r(input_feature)
bi_merge = fluid.layers.concat(input=[pre_gru, gru_r], axis=-1)
return bi_merge
......@@ -1610,7 +1685,7 @@ class Linear_chain_crf(fluid.dygraph.Layer):
"Transition": self._transition,
"Label": [label]
}
if length:
if length is not None:
this_inputs['Length'] = [length]
self._helper.append_op(
type='linear_chain_crf',
......@@ -1655,7 +1730,7 @@ class Crf_decoding(fluid.dygraph.Layer):
"Transition": self._transition,
"Label": label
}
if length:
if length is not None:
this_inputs['Length'] = [length]
self._helper.append_op(
type='crf_decoding',
......@@ -1754,7 +1829,7 @@ class SequenceTagging(fluid.dygraph.Layer):
name='crfw', learning_rate=self.crf_lr),
size=self.num_labels)
def forward(self, word, target, lengths):
def forward(self, word, lengths, target=None):
"""
Configure the network
"""
......@@ -1767,9 +1842,14 @@ class SequenceTagging(fluid.dygraph.Layer):
emission = self.fc(bigru_output)
crf_cost = self.linear_chain_crf(
input=emission, label=target, length=lengths)
avg_cost = fluid.layers.mean(x=crf_cost)
self.crf_decoding.weight = self.linear_chain_crf.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, avg_cost, lengths
if target is not None:
crf_cost = self.linear_chain_crf(
input=emission, label=target, length=lengths)
avg_cost = fluid.layers.mean(x=crf_cost)
self.crf_decoding.weight = self.linear_chain_crf.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, avg_cost, lengths
else:
self.linear_chain_crf.weight = self.crf_decoding.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, lengths
......@@ -17,24 +17,15 @@ from . import vgg
from . import mobilenetv1
from . import mobilenetv2
from . import darknet
from . import yolov3
from . import tsm
from . import bmn_model
from .resnet import *
from .mobilenetv1 import *
from .mobilenetv2 import *
from .vgg import *
from .darknet import *
from .yolov3 import *
from .tsm import *
from .bmn_model import *
__all__ = resnet.__all__ \
+ vgg.__all__ \
+ mobilenetv1.__all__ \
+ mobilenetv2.__all__ \
+ darknet.__all__ \
+ yolov3.__all__ \
+ tsm.__all__ \
+ bmn_model.__all__
+ darknet.__all__
......@@ -21,7 +21,7 @@ from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from hapi.model import Model
from hapi.download import get_weights_path
__all__ = ['DarkNet', 'ConvBNLayer', 'darknet53']
__all__ = ['DarkNet', 'darknet53']
# {num_layers: (url, md5)}
pretrain_infos = {
......@@ -136,7 +136,7 @@ class LayerWarp(fluid.dygraph.Layer):
DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
class DarkNet(fluid.dygraph.Layer):
class DarkNet(Model):
"""DarkNet model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
......
......@@ -12,6 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import transforms
from . import functional
from .transforms import *
from .functional import *
from .detection_transforms import *
__all__ = transforms.__all__ \
+ functional.__all__
......@@ -26,6 +26,8 @@ else:
Sequence = collections.abc.Sequence
Iterable = collections.abc.Iterable
__all__ = ['flip', 'resize']
def flip(image, code):
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册