提交 ed6abbe1 编写于 作者: Q qingqing01

Update to master and add more doc

...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy ...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy
from hapi.configure import Config from hapi.configure import Config
from hapi.text.bert import BertEncoder from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, Input
from hapi.loss import SoftmaxWithCrossEntropy from hapi.loss import SoftmaxWithCrossEntropy
from hapi.model import set_device, Model, Input
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample from hapi.text.bert import BertConfig, BertDataLoader, BertInputExample, make_optimizer
class ClsModelLayer(Model): class ClsModelLayer(Model):
...@@ -128,7 +128,7 @@ def main(): ...@@ -128,7 +128,7 @@ def main():
[None, None], 'int64', name='src_ids'), Input( [None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input( [None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input( [None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask') [None, None, 1], 'float32', name='input_mask')
] ]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
...@@ -139,13 +139,13 @@ def main(): ...@@ -139,13 +139,13 @@ def main():
len(["contradiction", "entailment", "neutral"]), len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True) return_pooled_out=True)
optimizer = Optimizer( optimizer = make_optimizer(
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
num_train_steps=max_train_steps, num_train_steps=max_train_steps,
learning_rate=config.learning_rate, learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay, weight_decay=config.weight_decay,
scheduler=config.lr_scheduler, scheduler=config.lr_scheduler,
model=cls_model,
loss_scaling=config.loss_scaling, loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters()) parameter_list=cls_model.parameters())
...@@ -157,8 +157,7 @@ def main(): ...@@ -157,8 +157,7 @@ def main():
labels, labels,
device=device) device=device)
cls_model.bert_layer.init_parameters( cls_model.bert_layer.load("./bert_small", reset_optimizer=True)
config.init_pretraining_params, verbose=config.verbose)
# do train # do train
cls_model.fit(train_data=train_dataloader.dataloader, cls_model.fit(train_data=train_dataloader.dataloader,
......
...@@ -4,7 +4,7 @@ TASK_NAME='MNLI' ...@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/" DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=1
# start fine-tuning # start fine-tuning
python3.7 bert_classifier.py\ python3.7 bert_classifier.py\
......
...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy ...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy
from hapi.configure import Config from hapi.configure import Config
from hapi.text.bert import BertEncoder from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, Input
from hapi.loss import SoftmaxWithCrossEntropy from hapi.loss import SoftmaxWithCrossEntropy
from hapi.model import set_device, Model, Input
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample from hapi.text.bert import BertConfig, BertDataLoader, BertInputExample, make_optimizer
class ClsModelLayer(Model): class ClsModelLayer(Model):
...@@ -99,12 +99,12 @@ def main(): ...@@ -99,12 +99,12 @@ def main():
train_dataloader = BertDataLoader( train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv", "./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"], tokenizer,
["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len, max_seq_length=config.max_seq_len,
batch_size=config.batch_size, batch_size=config.batch_size,
line_processor=mnli_line_processor, line_processor=mnli_line_processor,
mode="leveldb", mode="leveldb", )
phase="train")
test_dataloader = BertDataLoader( test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv", "./data/glue_data/MNLI/dev_matched.tsv",
...@@ -130,7 +130,7 @@ def main(): ...@@ -130,7 +130,7 @@ def main():
[None, None], 'int64', name='src_ids'), Input( [None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input( [None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input( [None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask') [None, None, 1], 'float32', name='input_mask')
] ]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
...@@ -141,13 +141,13 @@ def main(): ...@@ -141,13 +141,13 @@ def main():
len(["contradiction", "entailment", "neutral"]), len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True) return_pooled_out=True)
optimizer = Optimizer( optimizer = make_optimizer(
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
num_train_steps=max_train_steps, num_train_steps=max_train_steps,
learning_rate=config.learning_rate, learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay, weight_decay=config.weight_decay,
scheduler=config.lr_scheduler, scheduler=config.lr_scheduler,
model=cls_model,
loss_scaling=config.loss_scaling, loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters()) parameter_list=cls_model.parameters())
...@@ -159,8 +159,7 @@ def main(): ...@@ -159,8 +159,7 @@ def main():
labels, labels,
device=device) device=device)
cls_model.bert_layer.init_parameters( cls_model.bert_layer.load("./bert_small", reset_optimizer=True)
config.init_pretraining_params, verbose=config.verbose)
# do train # do train
cls_model.fit(train_data=train_dataloader.dataloader, cls_model.fit(train_data=train_dataloader.dataloader,
......
...@@ -5,7 +5,7 @@ DATA_PATH="./data/glue_data/MNLI/" ...@@ -5,7 +5,7 @@ DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning # start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\ python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=1,2,3 bert_classifier.py\
--use_cuda true \ --use_cuda true \
--do_train true \ --do_train true \
--do_test true \ --do_test true \
......
...@@ -4,7 +4,7 @@ TASK_NAME='MNLI' ...@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/" DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=1
# start fine-tuning # start fine-tuning
python3.7 bert_classifier.py\ python3.7 bert_classifier.py\
......
## 简介
情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示:
| 模型 | dev | test |
| :------| :------ | :------ |
| CNN | 90.6% | 89.7% |
| BOW | 90.1% | 90.3% |
| GRU | 90.0% | 91.1% |
| BIGRU | 89.7% | 89.6% |
动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html)
## 快速开始
本项目依赖于 Paddlepaddle 1.7.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装。
python版本依赖python 2.7或python 3.5及以上版本。
#### 代码下载及环境变量设置
克隆代码库到本地,并设置`PYTHONPATH`环境变量
```shell
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd`
cd examples/sentiment_classification
```
#### 数据准备
下载经过预处理的数据,文件解压之后,senta_data目录下会存在训练数据(train.tsv)、开发集数据(dev.tsv)、测试集数据(test.tsv)以及对应的词典(word_dict.txt)
```shell
wget https://baidu-nlp.bj.bcebos.com/sentiment_classification-dataset-1.0.0.tar.gz
tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz
```
#### 模型训练
基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证。训练阶段需手动创建模型需要保存的文件夹,并且通过checkpoints设置保存文件路径。
model_type从bow_net,cnn_net,gru_net,bigru_net中选择。
模型相关参数均在`senta.yaml`中设置,模型训练需确保`senta.yaml``do_train`属性置为`True`
```shell
python sentiment_classifier.py
```
#### 模型预测
利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测。
模型预测需确保`senta.yaml``do_infer`属性置为`True`
```shell
python sentiment_classifier.py
```
#### 模型参数
模型参数配置文件:`senta.yaml`
1. batch_size, 根据模型情况和GPU占用率选择batch_size, 建议cnn/bow选择较大batch_size, gru/bigru选择较小batch_size。
2. padding_size默认为150。
3. epoch, training时默认设置为5,infer默认为1。
4. learning_rate默认为0.002。
## 进阶使用
#### 任务定义
传统的情感分类主要基于词典或者特征工程的方式进行分类,这种方法需要繁琐的人工特征设计和先验知识,理解停留于浅层并且扩展泛化能力差。为了避免传统方法的局限,我们采用近年来飞速发展的深度学习技术。基于深度学习的情感分类不依赖于人工特征,它能够端到端的对输入文本进行语义理解,并基于语义表示进行情感倾向的判断。
#### 模型原理介绍
本项目针对情感倾向性分类问题,:
+ CNN(Convolutional Neural Networks),是一个基础的序列模型,能处理变长序列输入,提取局部区域之内的特征;
+ BOW(Bag Of Words)模型,是一个非序列模型,使用基本的全连接结构;
+ GRU(Gated Recurrent Unit),序列模型,能够较好地解决序列文本中长距离依赖的问题;
+ BI-GRU(Bidirectional Gated Recurrent Unit),序列模型,采用双向双层GRU结构,更好地捕获句子中的语义特征;
#### 数据格式说明
训练、预测、评估使用的数据可以由用户根据实际的应用场景,自己组织数据。数据由两列组成,以制表符分隔,第一列是以空格分词的中文文本(分词预处理方法将在下文具体说明),文件为utf8编码;第二列是情感倾向分类的类别(0表示消极;1表示积极),注意数据文件第一行固定表示为"text_a\tlabel"
```text
特 喜欢 这种 好看的 狗狗 1
这 真是 惊艳 世界 的 中国 黑科技 1
环境 特别 差 ,脏兮兮 的,再也 不去 了 0
```
#### 代码结构说明
```text
.
├── sentiment_classifier.py # 该项目的主函数,封装包括训练、预测、评估的部分
├── models.py # 网络结构
```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear, Embedding
from paddle.fluid.dygraph.base import to_variable
import numpy as np
from hapi.model import Model
from hapi.text.text import GRUEncoderLayer as BiGRUEncoder
from hapi.text.test import BOWEncoder, CNNEncoder, GRUEncoder
class CNN(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(CNN, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.channels = 1
self.win_size = [3, self.hid_dim]
self.batch_size = batch_size
self.seq_len = seq_len
self._encoder = CNNEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
seq_len=self.seq_len,
filter_size= self.win_size,
num_filters= self.hid_dim,
hidden_dim= self.hid_dim,
padding_idx=None,
act='tanh')
self._fc1 = Linear(input_dim = self.hid_dim*self.seq_len, output_dim=self.fc_hid_dim, act="softmax")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
act="softmax")
def forward(self, inputs):
conv_3 = self._encoder(inputs)
fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1)
return prediction
class BOW(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(BOW, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._encoder = BOWEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
padding_idx=None,
bow_dim=self.hid_dim,
seq_len=self.seq_len)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim, act="tanh")
self._fc2 = Linear(input_dim = self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
act="softmax")
def forward(self, inputs):
bow_1 = self._encoder(inputs)
bow_1 = fluid.layers.tanh(bow_1)
fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1)
prediction = self._fc_prediction(fc_2)
return prediction
class GRU(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(GRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = GRUEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
gru_dim=self.hid_dim,
hidden_dim=self.hid_dim,
padding_idx=None,
seq_len=self.seq_len)
def forward(self, inputs):
emb = self._encoder(inputs)
fc_1 = self._fc1(emb)
prediction = self._fc_prediction(fc_1)
return prediction
class BiGRU(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(BiGRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim*3)
self._fc2 = Linear(input_dim = self.hid_dim*2, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = BiGRUEncoder(
grnn_hidden_dim=self.hid_dim,
input_dim=self.hid_dim * 3,
h_0=h_0,
init_bound=0.1,
is_bidirection=True)
def forward(self, inputs):
emb = self.embedding(inputs)
emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
encoded_vector = self._encoder(fc_1)
encoded_vector = fluid.layers.tanh(encoded_vector)
encoded_vector = fluid.layers.reduce_max(encoded_vector, dim=1)
fc_2 = self._fc2(encoded_vector)
prediction = self._fc_prediction(fc_2)
return prediction
checkpoints: "./checkpoints"
epoch: 5
save_freq: 1
eval_freq: 1
lr: 0.002
padding_size: 150
skip_steps: 10
verbose: False
data_dir: "./senta_data/"
vocab_path: "./senta_data/word_dict.txt"
vocab_size: 33256
batch_size: 20
random_seed: 0
use_cuda: True
do_train: True
do_infer: False
model_type: "bow_net"
output_dir: "./output"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sentiment Classification in Paddle Dygraph Mode. """
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from hapi.model import set_device, Model, CrossEntropy, Input
from hapi.configure import Config
from hapi.text.senta import SentaProcessor
from hapi.metrics import Accuracy
from models import CNN, BOW, GRU, BiGRU
import json
import os
args = Config(yaml_file='./senta.yaml')
args.build()
args.Print()
device = set_device("gpu" if args.use_cuda else "cpu")
dev_count = fluid.core.get_cuda_device_count() if args.use_cuda else 1
def main():
if args.do_train:
train()
elif args.do_infer:
infer()
def train():
fluid.enable_dygraph(device)
processor = SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
num_labels = len(processor.get_labels())
num_train_examples = processor.get_num_examples(phase="train")
max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
train_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='train',
epoch=args.epoch,
shuffle=False)
eval_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='dev',
epoch=args.epoch,
shuffle=False)
if args.model_type == 'cnn_net':
model = CNN( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model = BOW( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model = GRU( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model = BiGRU( args.vocab_size, args.batch_size,
args.padding_size)
optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr, parameter_list=model.parameters())
inputs = [Input([None, None], 'int64', name='doc')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optimizer,
CrossEntropy(),
Accuracy(topk=(1,)),
inputs,
labels,
device=device)
model.fit(train_data=train_data_generator,
eval_data=eval_data_generator,
batch_size=args.batch_size,
epochs=args.epoch,
save_dir=args.checkpoints,
eval_freq=args.eval_freq,
save_freq=args.save_freq)
def infer():
fluid.enable_dygraph(device)
processor = SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
infer_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='infer',
epoch=1,
shuffle=False)
if args.model_type == 'cnn_net':
model_infer = CNN( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model_infer = BOW( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model_infer = GRU( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model_infer = BiGRU( args.vocab_size, args.batch_size,
args.padding_size)
print('Do inferring ...... ')
inputs = [Input([None, None], 'int64', name='doc')]
model_infer.prepare(
None,
CrossEntropy(),
Accuracy(topk=(1,)),
inputs,
device=device)
model_infer.load(args.checkpoints, reset_optimizer=True)
preds = model_infer.predict(test_data=infer_data_generator)
preds = np.array(preds[0]).reshape((-1, 2))
if args.output_dir:
with open(os.path.join(args.output_dir, 'predictions.json'), 'w') as w:
for p in range(len(preds)):
label = np.argmax(preds[p])
result = json.dumps({'index': p, 'label': label, 'probs': preds[p].tolist()})
w.write(result+'\n')
print('Predictions saved at '+os.path.join(args.output_dir, 'predictions.json'))
if __name__ == '__main__':
main()
...@@ -39,8 +39,8 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分 ...@@ -39,8 +39,8 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分
```bash ```bash
git clone https://github.com/PaddlePaddle/hapi git clone https://github.com/PaddlePaddle/hapi
cd hapi cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd` export PYTHONPATH=`pwd`:$PYTHONPATH
cd tsm cd examples/tsm
``` ```
### 数据准备 ### 数据准备
...@@ -141,6 +141,8 @@ python infer.py --data=<path/to/dataset> --label_list=<path/to/label_list> --inf ...@@ -141,6 +141,8 @@ python infer.py --data=<path/to/dataset> --label_list=<path/to/label_list> --inf
2020-04-03 07:37:16,321-INFO: Sample ./kineteics/val_10/data_batch_10-042_6 predict label: 6, ground truth label: 6 2020-04-03 07:37:16,321-INFO: Sample ./kineteics/val_10/data_batch_10-042_6 predict label: 6, ground truth label: 6
``` ```
**注意:** 推断时`--infer_file`需要指定到pickle文件路径。
## 参考论文 ## 参考论文
- [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han - [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han
......
...@@ -26,6 +26,7 @@ from check import check_gpu, check_version ...@@ -26,6 +26,7 @@ from check import check_gpu, check_version
from modeling import tsm_resnet50 from modeling import tsm_resnet50
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
from transforms import * from transforms import *
from utils import print_arguments
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -56,7 +57,7 @@ def main(): ...@@ -56,7 +57,7 @@ def main():
model.load(FLAGS.weights, reset_optimizer=True) model.load(FLAGS.weights, reset_optimizer=True)
imgs, label = dataset[0] imgs, label = dataset[0]
pred = model.test([imgs[np.newaxis, :]]) pred = model.test_batch([imgs[np.newaxis, :]])
pred = labels[np.argmax(pred)] pred = labels[np.argmax(pred)]
logger.info("Sample {} predict label: {}, ground truth label: {}" \ logger.info("Sample {} predict label: {}, ground truth label: {}" \
.format(FLAGS.infer_file, pred, labels[int(label)])) .format(FLAGS.infer_file, pred, labels[int(label)]))
...@@ -86,6 +87,7 @@ if __name__ == '__main__': ...@@ -86,6 +87,7 @@ if __name__ == '__main__':
type=str, type=str,
help="weights path for evaluation") help="weights path for evaluation")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
check_gpu(str.lower(FLAGS.device) == 'gpu') check_gpu(str.lower(FLAGS.device) == 'gpu')
check_version() check_version()
......
...@@ -113,7 +113,7 @@ class KineticsDataset(Dataset): ...@@ -113,7 +113,7 @@ class KineticsDataset(Dataset):
if self.transform: if self.transform:
imgs, label = self.transform(imgs, label) imgs, label = self.transform(imgs, label)
return imgs, np.array([label]) return imgs, np.array([label]).astype('int64')
@property @property
def num_classes(self): def num_classes(self):
......
...@@ -31,6 +31,7 @@ from modeling import tsm_resnet50 ...@@ -31,6 +31,7 @@ from modeling import tsm_resnet50
from check import check_gpu, check_version from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
from transforms import * from transforms import *
from utils import print_arguments
def make_optimizer(step_per_epoch, parameter_list=None): def make_optimizer(step_per_epoch, parameter_list=None):
...@@ -106,7 +107,7 @@ def main(): ...@@ -106,7 +107,7 @@ def main():
eval_data=val_dataset, eval_data=val_dataset,
epochs=FLAGS.epoch, epochs=FLAGS.epoch,
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
save_dir='tsm_checkpoint', save_dir=FLAGS.save_dir or 'tsm_checkpoint',
num_workers=FLAGS.num_workers, num_workers=FLAGS.num_workers,
drop_last=True, drop_last=True,
shuffle=True) shuffle=True)
...@@ -150,7 +151,14 @@ if __name__ == '__main__': ...@@ -150,7 +151,14 @@ if __name__ == '__main__':
default=None, default=None,
type=str, type=str,
help="weights path for evaluation") help="weights path for evaluation")
parser.add_argument(
"-s",
"--save_dir",
default=None,
type=str,
help="directory path for checkpoint saving, default ./yolo_checkpoint")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
check_gpu(str.lower(FLAGS.device) == 'gpu') check_gpu(str.lower(FLAGS.device) == 'gpu')
check_version() check_version()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import logging
logger = logging.getLogger(__name__)
__all__ = ['print_ar']
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
logger.info("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.info("%s: %s" % (arg, value))
logger.info("------------------------------------------------")
...@@ -53,8 +53,8 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层 ...@@ -53,8 +53,8 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层
```bash ```bash
git clone https://github.com/PaddlePaddle/hapi git clone https://github.com/PaddlePaddle/hapi
cd hapi cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd` export PYTHONPATH=`pwd`:$PYTHONPATH
cd tsm cd examples/yolov3
``` ```
#### 安装COCO-API #### 安装COCO-API
...@@ -126,13 +126,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data= ...@@ -126,13 +126,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=
使用如下方式进行多卡训练: 使用如下方式进行多卡训练:
```bash ```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16 -d
``` ```
### 模型评估 ### 模型评估
YOLOv3模型输出为LoDTensor,只支持使用batch_size为1进行评估,可通过如下两种方式进行模型评估。 YOLOv3模型输出为LoDTensor,只支持使用单卡且batch_size为1进行评估,可通过如下两种方式进行模型评估。
1. 自动下载Paddle发布的[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams)权重评估 1. 自动下载Paddle发布的[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams)权重评估
...@@ -180,7 +180,7 @@ python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog. ...@@ -180,7 +180,7 @@ python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.
2. 加载checkpoint进行精度评估 2. 加载checkpoint进行精度评估
```bash ```bash
python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.jpg --weights=yolo_checkpoint/mo_mixup/final python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.jpg --weights=yolo_checkpoint/no_mixup/final
``` ```
推断结果可视化图像会保存于`--output`指定的文件夹下,默认保存于`./output`目录。 推断结果可视化图像会保存于`--output`指定的文件夹下,默认保存于`./output`目录。
......
...@@ -28,7 +28,7 @@ from hapi.model import Model, Input, set_device ...@@ -28,7 +28,7 @@ from hapi.model import Model, Input, set_device
from modeling import yolov3_darknet53, YoloLoss from modeling import yolov3_darknet53, YoloLoss
from transforms import * from transforms import *
from utils import print_arguments
from visualizer import draw_bbox from visualizer import draw_bbox
import logging import logging
...@@ -91,7 +91,7 @@ def main(): ...@@ -91,7 +91,7 @@ def main():
img_id = np.array([0]).astype('int64')[np.newaxis, :] img_id = np.array([0]).astype('int64')[np.newaxis, :]
img_shape = np.array([h, w]).astype('int32')[np.newaxis, :] img_shape = np.array([h, w]).astype('int32')[np.newaxis, :]
_, bboxes = model.test([img_id, img_shape, img]) _, bboxes = model.test_batch([img_id, img_shape, img])
vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold) vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold)
save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image) save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image)
...@@ -121,6 +121,7 @@ if __name__ == '__main__': ...@@ -121,6 +121,7 @@ if __name__ == '__main__':
"-w", "--weights", default=None, type=str, "-w", "--weights", default=None, type=str,
help="path to weights for inference") help="path to weights for inference")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
assert os.path.isfile(FLAGS.infer_image), \ assert os.path.isfile(FLAGS.infer_image), \
"infer_image {} not a file".format(FLAGS.infer_image) "infer_image {} not a file".format(FLAGS.infer_image)
assert os.path.isfile(FLAGS.label_list), \ assert os.path.isfile(FLAGS.label_list), \
......
...@@ -33,6 +33,7 @@ from modeling import yolov3_darknet53, YoloLoss ...@@ -33,6 +33,7 @@ from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset from coco import COCODataset
from coco_metric import COCOMetric from coco_metric import COCOMetric
from transforms import * from transforms import *
from utils import print_arguments
NUM_MAX_BOXES = 50 NUM_MAX_BOXES = 50
...@@ -171,16 +172,18 @@ def main(): ...@@ -171,16 +172,18 @@ def main():
if FLAGS.resume is not None: if FLAGS.resume is not None:
model.load(FLAGS.resume) model.load(FLAGS.resume)
save_dir = FLAGS.save_dir or 'yolo_checkpoint'
model.fit(train_data=loader, model.fit(train_data=loader,
epochs=FLAGS.epoch - FLAGS.no_mixup_epoch, epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
save_dir="yolo_checkpoint/mixup", save_dir=os.path.join(save_dir, "mixup"),
save_freq=10) save_freq=10)
# do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
dataset.mixup = False dataset.mixup = False
model.fit(train_data=loader, model.fit(train_data=loader,
epochs=FLAGS.no_mixup_epoch, epochs=FLAGS.no_mixup_epoch,
save_dir="yolo_checkpoint/no_mixup", save_dir=os.path.join(save_dir, "no_mixup"),
save_freq=5) save_freq=5)
...@@ -233,6 +236,13 @@ if __name__ == '__main__': ...@@ -233,6 +236,13 @@ if __name__ == '__main__':
default=None, default=None,
type=str, type=str,
help="path to weights for evaluation") help="path to weights for evaluation")
parser.add_argument(
"-s",
"--save_dir",
default=None,
type=str,
help="directory path for checkpoint saving, default ./yolo_checkpoint")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
assert FLAGS.data, "error: must provide data path" assert FLAGS.data, "error: must provide data path"
main() main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import logging
logger = logging.getLogger(__name__)
__all__ = ['print_ar']
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
logger.info("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.info("%s: %s" % (arg, value))
logger.info("------------------------------------------------")
...@@ -123,7 +123,7 @@ class Flowers(Dataset): ...@@ -123,7 +123,7 @@ class Flowers(Dataset):
if self.transform is not None: if self.transform is not None:
image = self.transform(image) image = self.transform(image)
return image, label return image, label.astype('int64')
def __len__(self): def __len__(self):
return len(self.indexes) return len(self.indexes)
...@@ -45,6 +45,8 @@ class MNIST(Dataset): ...@@ -45,6 +45,8 @@ class MNIST(Dataset):
:attr:`download` is True. Default None :attr:`download` is True. Default None
label_path(str): path to label file, can be set None if label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None :attr:`download` is True. Default None
chw_format(bool): If set True, the output shape is [1, 28, 28],
otherwise, output shape is [1, 784].
mode(str): 'train' or 'test' mode. Default 'train'. mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default :attr:`image_path`/:attr:`label_path` unset. Default
...@@ -70,13 +72,14 @@ class MNIST(Dataset): ...@@ -70,13 +72,14 @@ class MNIST(Dataset):
def __init__(self, def __init__(self,
image_path=None, image_path=None,
label_path=None, label_path=None,
chw_format=True,
mode='train', mode='train',
transform=None, transform=None,
download=True): download=True):
assert mode.lower() in ['train', 'test'], \ assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode) "mode should be 'train' or 'test', but got {}".format(mode)
self.mode = mode.lower() self.mode = mode.lower()
self.chw_format = chw_format
self.image_path = image_path self.image_path = image_path
if self.image_path is None: if self.image_path is None:
assert download, "image_path not set and auto download disabled" assert download, "image_path not set and auto download disabled"
...@@ -144,10 +147,13 @@ class MNIST(Dataset): ...@@ -144,10 +147,13 @@ class MNIST(Dataset):
for i in range(buffer_size): for i in range(buffer_size):
self.images.append(images[i, :]) self.images.append(images[i, :])
self.labels.append(np.array([labels[i]])) self.labels.append(
np.array([labels[i]]).astype('int64'))
def __getitem__(self, idx): def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx] image, label = self.images[idx], self.labels[idx]
if self.chw_format:
image = np.reshape(image, [1, 28, 28])
if self.transform is not None: if self.transform is not None:
image = self.transform(image) image = self.transform(image)
return image, label return image, label
......
...@@ -23,6 +23,7 @@ import requests ...@@ -23,6 +23,7 @@ import requests
import tqdm import tqdm
import hashlib import hashlib
import time import time
from collections import OrderedDict
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
...@@ -35,6 +36,26 @@ WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") ...@@ -35,6 +36,26 @@ WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3 DOWNLOAD_RETRY_LIMIT = 3
nlp_models = OrderedDict(
(('RoBERTa-zh-base', 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'),
('RoBERTa-zh-large', 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'),
('ERNIE-v2-en-base', 'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'),
('ERNIE-v2-en-large', 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'),
('XLNet-cased-base', 'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'),
('XLNet-cased-large', 'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'),
('ERNIE-v1-zh-base', 'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'),
('ERNIE-v1-zh-base-max-len-512', 'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'),
('BERT-en-uncased-large-whole-word-masking', 'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-large-whole-word-masking', 'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-uncased-base', 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'),
('BERT-en-uncased-large', 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-base', 'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'),
('BERT-en-cased-large','https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'),
('BERT-multilingual-uncased-base', 'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'),
('BERT-multilingual-cased-base', 'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'),
('BERT-zh-base', 'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'),)
)
def is_url(path): def is_url(path):
""" """
......
...@@ -116,7 +116,7 @@ class Accuracy(Metric): ...@@ -116,7 +116,7 @@ class Accuracy(Metric):
def add_metric_op(self, pred, label, *args): def add_metric_op(self, pred, label, *args):
pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk] pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk]
correct = pred == label correct = pred == label
return correct return fluid.layers.cast(correct, dtype='float32')
def update(self, correct, *args): def update(self, correct, *args):
accs = [] accs = []
...@@ -143,7 +143,7 @@ class Accuracy(Metric): ...@@ -143,7 +143,7 @@ class Accuracy(Metric):
if self.maxk != 1: if self.maxk != 1:
self._name = ['{}_top{}'.format(name, k) for k in self.topk] self._name = ['{}_top{}'.format(name, k) for k in self.topk]
else: else:
self._name = ['acc'] self._name = [name]
def name(self): def name(self):
return self._name return self._name
...@@ -642,7 +642,7 @@ class Model(fluid.dygraph.Layer): ...@@ -642,7 +642,7 @@ class Model(fluid.dygraph.Layer):
An Model object is network with training and inference features. An Model object is network with training and inference features.
Dynamic graph and static graph are supported at the same time, Dynamic graph and static graph are supported at the same time,
switched by `fluid.enable_dygraph()`. The usage is as follows. switched by `fluid.enable_dygraph()`. The usage is as follows.
The switching between dynamic and static should be before But note, the switching between dynamic and static should be before
instantiating a Model. The input description, i.e, hapi.Input, instantiating a Model. The input description, i.e, hapi.Input,
must be required for static graph. must be required for static graph.
...@@ -993,8 +993,25 @@ class Model(fluid.dygraph.Layer): ...@@ -993,8 +993,25 @@ class Model(fluid.dygraph.Layer):
Returns a list of parameters of the model. Returns a list of parameters of the model.
Returns: Returns:
list of :ref:`api_guide_Variable_en` : a list of parameters. list of Parameter in static graph.
list of ParamBase in dynamic graph.
Examples:
.. code-block:: python
from hapi.model import Model, Input, set_device
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = fluid.dygraph.Linear(20, 10, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
fluid.enable_dygraph()
model = MyModel()
params = model.parameters()
""" """
return self._adapter.parameters() return self._adapter.parameters()
...@@ -1006,27 +1023,32 @@ class Model(fluid.dygraph.Layer): ...@@ -1006,27 +1023,32 @@ class Model(fluid.dygraph.Layer):
labels=None, labels=None,
device=None): device=None):
""" """
FIXME: add comments Configures the model before runing.
Args: Args:
optimizer (Optimizer|None): optimizer must be set in training optimizer (Optimizer|None): Optimizer must be set in training
and should be a Optimizer instance. It can be None in eval and should be a Optimizer instance. It can be None in eval
and test mode. and test mode.
loss_function (Loss|None): loss function must be set in training loss_function (Loss|None): Loss function must be set in training
and should be a Loss instance. It can be None when there is and should be a Loss instance. It can be None when there is
no loss. no loss.
metrics (Metric|list of Metric|None): if metrics is set, all metrics (Metric|list of Metric|None): If metrics is set, all
metric will be calculate and output in train/eval mode. metrics will be calculated and output in train/eval mode.
inputs (Input|list|dict|None): inputs, entry points of network, inputs (Input|list|dict|None): inputs, entry points of network,
could be a Input layer, or lits of Input layers, could be a Input layer, or lits of Input layers,
or dict (name: Input), or None. For static graph, or dict (name: Input), or None. For static graph,
inputs must be set. For dynamic graph, it could be None. inputs must be set. For dynamic graph, it could be None.
labels (Input|list|None): labels, entry points of network, labels (Input|list|None): labels, entry points of network,
could be a Input layer or lits of Input layers, or None. could be a Input layer or lits of Input layers, or None.
For static graph, if set loss_function in Model.prepare(), it For static graph, if labels is required in loss_function,
must be set. Otherwise, it could be None. labels must be set. Otherwise, it could be None.
device (str|None): specify device type, 'CPU' or 'GPU'. device (str|fluid.CUDAPlace|fluid.CPUPlace|None): specify device
type, 'CPU', 'GPU', fluid.CUDAPlace or fluid.CPUPlace.
If None, automatically select device according to If None, automatically select device according to
installation package version. installation package version.
Returns:
None
""" """
if isinstance(device, fluid.CUDAPlace) or \ if isinstance(device, fluid.CUDAPlace) or \
...@@ -1108,7 +1130,9 @@ class Model(fluid.dygraph.Layer): ...@@ -1108,7 +1130,9 @@ class Model(fluid.dygraph.Layer):
num_workers=0, num_workers=0,
callbacks=None, ): callbacks=None, ):
""" """
FIXME: add more comments and usage Trains the model for a fixed number of epochs. If `eval_data` is set,
evaluation will be done at the end of each epoch.
Args: Args:
train_data (Dataset|DataLoader): An iterable data loader is used for train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle paddle.io.Dataset or train. An instance of paddle paddle.io.Dataset or
...@@ -1141,6 +1165,87 @@ class Model(fluid.dygraph.Layer): ...@@ -1141,6 +1165,87 @@ class Model(fluid.dygraph.Layer):
callbacks (Callback|None): A list of `Callback` instances to apply callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint` during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted. Default: None. are automatically inserted. Default: None.
Returns:
None
Examples:
1. An example use Dataset and set btch size, shuffle in fit.
How to make a batch is done internally.
.. code-block:: python
from hapi.model import Model, Input, set_device
from hapi.loss import CrossEntropy
from hapi.metrics import Accuracy
from hapi.datasets import MNIST
from hapi.vision.models import LeNet
dynamic = True
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if dynamic else None
train_dataset = MNIST(mode='train')
val_dataset = MNIST(mode='test')
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = LeNet()
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim,
CrossEntropy(),
Accuracy(topk=(1, 2)),
inputs=inputs,
labels=labels,
device=device)
model.fit(train_dataset,
val_dataset,
epochs=2,
batch_size=64,
save_dir='mnist_checkpoint')
2. An example use DataLoader, batch size and shuffle is set in
DataLoader.
.. code-block:: python
from hapi.model import Model, Input, set_device
from hapi.loss import CrossEntropy
from hapi.metrics import Accuracy
from hapi.datasets import MNIST
from hapi.vision.models import LeNet
dynamic = True
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if dynamic else None
train_dataset = MNIST(mode='train')
train_loader = fluid.io.DataLoader(train_dataset,
places=device, batch_size=64)
val_dataset = MNIST(mode='test')
val_loader = fluid.io.DataLoader(val_dataset,
places=device, batch_size=64)
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = LeNet()
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim,
CrossEntropy(),
Accuracy(topk=(1, 2)),
inputs=inputs,
labels=labels,
device=device)
model.fit(train_loader,
val_loader,
epochs=2,
save_dir='mnist_checkpoint')
""" """
assert train_data is not None, \ assert train_data is not None, \
...@@ -1235,26 +1340,29 @@ class Model(fluid.dygraph.Layer): ...@@ -1235,26 +1340,29 @@ class Model(fluid.dygraph.Layer):
num_workers=0, num_workers=0,
callbacks=None, ): callbacks=None, ):
""" """
FIXME: add more comments and usage Evaluate the loss and metrics of the model on input dataset.
Args: Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.io.Dataset or evaluation. An instance of paddle.io.Dataset or
paddle.io.Dataloader is recomended. paddle.io.Dataloader is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data
When eval_data is the instance of Dataloader, this argument will be ignored. and eval_data. When eval_data is the instance of Dataloader,
Default: 1. this argument will be ignored. Default: 1.
log_freq (int): The frequency, in number of steps, the eval logs log_freq (int): The frequency, in number of steps, the eval logs
are printed. Default: 10. are printed. Default: 10.
verbose (int): The verbosity mode, should be 0, 1, or 2. verbose (int): The verbosity mode, should be 0, 1, or 2. 0 = silent,
0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2. 1 = progress bar, 2 = one line per epoch. Default: 2.
num_workers (int): The number of subprocess to load data, 0 for no subprocess num_workers (int): The number of subprocess to load data,
used and loading data in main process. When train_data and eval_data are 0 for no subprocess used and loading data in main process. When
both the instance of Dataloader, this parameter will be ignored. Default: 0. train_data and eval_data are both the instance of Dataloader,
this parameter will be ignored. Default: 0.
callbacks (Callback|None): A list of `Callback` instances to apply callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint` during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted. Default: None. are automatically inserted. Default: None.
Returns: Returns:
dict: Result of metric. dict: Result of metric. The key is the names of Metric,
value is a scalar or numpy.array.
""" """
if fluid.in_dygraph_mode(): if fluid.in_dygraph_mode():
...@@ -1312,7 +1420,8 @@ class Model(fluid.dygraph.Layer): ...@@ -1312,7 +1420,8 @@ class Model(fluid.dygraph.Layer):
num_workers=0, num_workers=0,
stack_outputs=False): stack_outputs=False):
""" """
FIXME: add more comments and usage Compute the output predictions on testing data.
Args: Args:
test_data (Dataset|DataLoader): An iterable data loader is used for test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.io.Dataset or paddle.io.Dataloader predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
...@@ -1387,21 +1496,20 @@ class Model(fluid.dygraph.Layer): ...@@ -1387,21 +1496,20 @@ class Model(fluid.dygraph.Layer):
save_dir, save_dir,
model_filename=None, model_filename=None,
params_filename=None, params_filename=None,
program_only=False): model_only=False):
""" """
Save inference model must in static mode. Save inference model must in static mode.
Args: Args:
dirname(str): The directory path to save the inference model. dirname(str): The directory path to save the inference model.
model_filename(str|None): The name of file to save the inference program model_filename(str|None): The name of file to save the inference
itself. If is set None, a default filename model itself. If is set None, a default filename
:code:`__model__` will be used. :code:`__model__` will be used.
params_filename(str|None): The name of file to save all related parameters. params_filename(str|None): The name of file to save all related
If it is set None, parameters will be saved parameters. If it is set None, parameters will be saved
in separate files . in separate files .
program_only(bool): If True, It will save inference program only, and do not model_only(bool): If True, It will save inference model only,
save params of Program. and do not save parameters. Default: False.
Default: False.
Returns: Returns:
list: The fetch variables' name list list: The fetch variables' name list
...@@ -1426,7 +1534,7 @@ class Model(fluid.dygraph.Layer): ...@@ -1426,7 +1534,7 @@ class Model(fluid.dygraph.Layer):
main_program=infer_prog, main_program=infer_prog,
model_filename=model_filename, model_filename=model_filename,
params_filename=params_filename, params_filename=params_filename,
program_only=program_only) program_only=model_only)
def _run_one_epoch(self, def _run_one_epoch(self,
data_loader, data_loader,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from hapi.metrics import *
from hapi.utils import to_list
def accuracy(pred, label, topk=(1, )):
maxk = max(topk)
pred = np.argsort(pred)[:, ::-1][:, :maxk]
correct = (pred == np.repeat(label, maxk, 1))
batch_size = label.shape[0]
res = []
for k in topk:
correct_k = correct[:, :k].sum()
res.append(correct_k / batch_size)
return res
def convert_to_one_hot(y, C):
oh = np.random.random((y.shape[0], C)).astype('float32') * .5
for i in range(y.shape[0]):
oh[i, int(y[i])] = 1.
return oh
class TestAccuracyDynamic(unittest.TestCase):
def setUp(self):
self.topk = (1, )
self.class_num = 5
self.sample_num = 1000
self.name = None
def random_pred_label(self):
label = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int64')
pred = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int32')
pred_one_hot = convert_to_one_hot(pred, self.class_num)
pred_one_hot = pred_one_hot.astype('float32')
return label, pred_one_hot
def test_main(self):
with fluid.dygraph.guard(fluid.CPUPlace()):
acc = Accuracy(topk=self.topk, name=self.name)
for i in range(10):
label, pred = self.random_pred_label()
label_var = to_variable(label)
pred_var = to_variable(pred)
state = to_list(acc.add_metric_op(pred_var, label_var))
acc.update(*[s.numpy() for s in state])
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
class TestAccuracyStatic(TestAccuracyDynamic):
def test_main(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_prog, startup_prog):
pred = fluid.data(name='pred', shape=[None, self.class_num], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
acc = Accuracy(topk=self.topk, name=self.name)
state = acc.add_metric_op(pred, label)
exe = fluid.Executor(fluid.CPUPlace())
compiled_main_prog = fluid.CompiledProgram(main_prog)
for i in range(10):
label, pred = self.random_pred_label()
state_ret = exe.run(compiled_main_prog,
feed={'pred': pred, 'label': label},
fetch_list=[s.name for s in to_list(state)],
return_numpy=True)
acc.update(*state_ret)
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyStaticMultiTopk(TestAccuracyStatic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
if __name__ == '__main__':
unittest.main()
...@@ -17,6 +17,7 @@ from __future__ import print_function ...@@ -17,6 +17,7 @@ from __future__ import print_function
import unittest import unittest
import os
import numpy as np import numpy as np
import shutil import shutil
import tempfile import tempfile
...@@ -72,7 +73,8 @@ class MnistDataset(MNIST): ...@@ -72,7 +73,8 @@ class MnistDataset(MNIST):
self.labels = self.labels[:sample_num] self.labels = self.labels[:sample_num]
def __getitem__(self, idx): def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28]) img, label = self.images[idx], self.labels[idx]
img = np.reshape(img, [1, 28, 28])
if self.return_label: if self.return_label:
return img, np.array(self.labels[idx]).astype('int64') return img, np.array(self.labels[idx]).astype('int64')
return img, return img,
...@@ -141,34 +143,61 @@ class TestModel(unittest.TestCase): ...@@ -141,34 +143,61 @@ class TestModel(unittest.TestCase):
cls.init_param = dy_lenet.state_dict() cls.init_param = dy_lenet.state_dict()
dynamic_train(dy_lenet, cls.train_loader) dynamic_train(dy_lenet, cls.train_loader)
cls.trained_param = dy_lenet.state_dict()
cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader) cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader)
cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
cls.labels = [Input([None, 1], 'int64', name='label')] cls.labels = [Input([None, 1], 'int64', name='label')]
cls.save_dir = tempfile.mkdtemp()
cls.weight_path = os.path.join(cls.save_dir, 'lenet')
fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path)
fluid.disable_dygraph() fluid.disable_dygraph()
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.save_dir)
def test_fit_dygraph(self): def test_fit_dygraph(self):
self.fit(True) self.fit(True)
def test_fit_static(self): def test_fit_static(self):
self.fit(False) self.fit(False)
def not_test_evaluate_dygraph(self): def test_evaluate_dygraph(self):
self.evaluate(True) self.evaluate(True)
def not_test_evaluate_static(self): def test_evaluate_static(self):
self.evaluate(False) self.evaluate(False)
def not_test_predict_dygraph(self): def test_predict_dygraph(self):
self.predict(True) self.predict(True)
def not_test_predict_static(self): def test_predict_static(self):
self.predict(False) self.predict(False)
def fit(self, dynamic): def predict(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None fluid.enable_dygraph(self.device) if dynamic else None
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
test_dataloader = fluid.io.DataLoader(
self.test_dataset,
places=self.device,
batch_size=64,
return_list=True)
model = LeNet()
model.load(self.weight_path)
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
output = model.predict(test_dataloader, stack_outputs=True)
def fit(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
seed = 333 seed = 333
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
...@@ -193,7 +222,7 @@ class TestModel(unittest.TestCase): ...@@ -193,7 +222,7 @@ class TestModel(unittest.TestCase):
model = LeNet() model = LeNet()
model.prepare( model.prepare(
metrics=Accuracy(), inputs=self.inputs, labels=self.labels) metrics=Accuracy(), inputs=self.inputs, labels=self.labels)
model.load_dict(self.trained_param) model.load(self.weight_path)
result = model.evaluate(self.val_dataset, batch_size=64) result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1) np.testing.assert_allclose(result['acc'], self.acc1)
fluid.disable_dygraph() if dynamic else None fluid.disable_dygraph() if dynamic else None
...@@ -202,7 +231,7 @@ class TestModel(unittest.TestCase): ...@@ -202,7 +231,7 @@ class TestModel(unittest.TestCase):
fluid.enable_dygraph(self.device) if dynamic else None fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet() model = LeNet()
model.prepare(inputs=self.inputs) model.prepare(inputs=self.inputs)
model.load_dict(self.trained_param) model.load(self.weight_path)
output = model.predict( output = model.predict(
self.test_dataset, batch_size=64, stack_outputs=True) self.test_dataset, batch_size=64, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) np.testing.assert_equal(output[0].shape[0], len(self.test_dataset))
...@@ -269,11 +298,10 @@ class TestModelFunction(unittest.TestCase): ...@@ -269,11 +298,10 @@ class TestModelFunction(unittest.TestCase):
device=device) device=device)
loss, = model.train_batch([data], [label]) loss, = model.train_batch([data], [label])
print(loss, ref)
np.testing.assert_allclose(loss.flatten(), ref.flatten()) np.testing.assert_allclose(loss.flatten(), ref.flatten())
fluid.disable_dygraph() if dynamic else None fluid.disable_dygraph() if dynamic else None
def not_test_test_batch(self, dynamic=True): def test_test_batch(self, dynamic=True):
dim = 20 dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32) data = np.random.random(size=(4, dim)).astype(np.float32)
...@@ -288,9 +316,9 @@ class TestModelFunction(unittest.TestCase): ...@@ -288,9 +316,9 @@ class TestModelFunction(unittest.TestCase):
ref = get_expect() ref = get_expect()
for dynamic in [True, False]: for dynamic in [True, False]:
self.set_seed()
device = set_device('cpu') device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel() model = MyModel()
inputs = [Input([None, dim], 'float32', name='x')] inputs = [Input([None, dim], 'float32', name='x')]
model.prepare(inputs=inputs, device=device) model.prepare(inputs=inputs, device=device)
...@@ -299,24 +327,29 @@ class TestModelFunction(unittest.TestCase): ...@@ -299,24 +327,29 @@ class TestModelFunction(unittest.TestCase):
np.testing.assert_allclose(out, ref) np.testing.assert_allclose(out, ref)
fluid.disable_dygraph() if dynamic else None fluid.disable_dygraph() if dynamic else None
def not_test_save_load(self): def test_save_load(self):
path = tempfile.mkdtemp() path = tempfile.mkdtemp()
for dynamic in [True, False]: for dynamic in [True, False]:
device = set_device('cpu') device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None fluid.enable_dygraph(device) if dynamic else None
model = MyModel() model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
model.save(path + '/test') model.save(path + '/test')
model.load(path + '/test') model.load(path + '/test')
shutil.rmtree(path) shutil.rmtree(path)
fluid.disable_dygraph() if dynamic else None fluid.disable_dygraph() if dynamic else None
def not_test_parameters(self): def test_parameters(self):
for dynamic in [True, False]: for dynamic in [True, False]:
device = set_device('cpu') device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None fluid.enable_dygraph(device) if dynamic else None
model = MyModel() model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
params = model.parameters() params = model.parameters()
self.assertTrue(params[0].shape == [20, 10]) self.assertTrue(params[0].shape[0] == 20)
self.assertTrue(params[0].shape[1] == 10)
fluid.disable_dygraph() if dynamic else None fluid.disable_dygraph() if dynamic else None
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
# limitations under the License. # limitations under the License.
from hapi.text.bert.bert import BertConfig as BertConfig from hapi.text.bert.bert import BertConfig as BertConfig
from hapi.text.bert.optimization import Optimizer as Optimizer from hapi.text.bert.dygraph_optimization import DyOptimizer as DyOptimizer
from hapi.text.bert.static_optimization import StOptimizer as StOptimizer
from hapi.text.bert.optimization import make_optimizer as make_optimizer
from hapi.text.bert.dataloader import BertDataLoader as BertDataLoader from hapi.text.bert.dataloader import BertDataLoader as BertDataLoader
from hapi.text.bert.dataloader import BertInputExample as BertInputExample from hapi.text.bert.dataloader import BertInputExample as BertInputExample
from hapi.text.tokenizer import tokenization as tokenization from hapi.text.tokenizer import tokenization as tokenization
......
...@@ -23,8 +23,8 @@ import numpy as np ...@@ -23,8 +23,8 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from hapi.model import Model
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
from hapi.text.text import PrePostProcessLayer, TransformerEncoder from hapi.text.text import PrePostProcessLayer, TransformerEncoder
from hapi.text.bert.utils.init import init_from_static_model from hapi.text.bert.utils.init import init_from_static_model
...@@ -52,7 +52,7 @@ class BertConfig(object): ...@@ -52,7 +52,7 @@ class BertConfig(object):
print('------------------------------------------------') print('------------------------------------------------')
class BertEncoder(Layer): class BertEncoder(Model):
""" """
bert bert
""" """
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
class ConstantLR(LearningRateDecay):
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'):
super(ConstantLR, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
def step(self):
return self.learning_rate
class LinearDecay(LearningRateDecay):
def __init__(self,
learning_rate,
warmup_steps,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(LinearDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class DyOptimizer(object):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
model_cls,
weight_decay,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0:
for param, grad in param_grads:
if self.exclude_from_weight_decay(param.name):
continue
if isinstance(self.scheduled_lr.step(), float):
updated_param = param.numpy() - param_list[
param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step()
else:
updated_param = param.numpy(
) - param_list[param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step().numpy()
updated_param_var = fluid.dygraph.to_variable(updated_param)
param = updated_param_var
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape))
...@@ -11,172 +11,35 @@ ...@@ -11,172 +11,35 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import from paddle.fluid.framework import in_dygraph_mode
from __future__ import division from hapi.text.bert.dygraph_optimization import DyOptimizer as DyOptimizer
from __future__ import print_function from hapi.text.bert.static_optimization import StOptimizer as StOptimizer
import numpy as np
import paddle.fluid as fluid def make_optimizer(warmup_steps,
num_train_steps,
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay learning_rate,
weight_decay,
model,
class ConstantLR(LearningRateDecay): scheduler='linear_warmup_decay',
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'): loss_scaling=1.0,
super(ConstantLR, self).__init__(begin, step, dtype) parameter_list=None):
self.learning_rate = learning_rate
if in_dygraph_mode():
def step(self): return DyOptimizer(
return self.learning_rate warmup_steps=warmup_steps,
num_train_steps=num_train_steps,
learning_rate=learning_rate,
class LinearDecay(LearningRateDecay): model_cls=model,
def __init__(self, weight_decay=weight_decay,
learning_rate, scheduler=scheduler,
warmup_steps, loss_scaling=loss_scaling,
decay_steps, parameter_list=parameter_list)
end_learning_rate=0.0001, else:
power=1.0, return StOptimizer(
cycle=False, warmup_steps=warmup_steps,
begin=0, num_train_steps=num_train_steps,
step=1, learning_rate=learning_rate,
dtype='float32'): weight_decay=weight_decay,
super(LinearDecay, self).__init__(begin, step, dtype) scheduler=scheduler)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class Optimizer(object):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
model_cls,
weight_decay,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0:
for param, grad in param_grads:
if self.exclude_from_weight_decay(param.name):
continue
if isinstance(self.scheduled_lr.step(), float):
updated_param = param.numpy() - param_list[
param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step()
else:
updated_param = param.numpy(
) - param_list[param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step().numpy()
updated_param_var = fluid.dygraph.to_variable(updated_param)
param = updated_param_var
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape))
...@@ -19,7 +19,6 @@ from __future__ import print_function ...@@ -19,7 +19,6 @@ from __future__ import print_function
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from utils.fp16 import create_master_params_grads, master_param_to_train_param, apply_dynamic_loss_scaling
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps): def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
...@@ -51,128 +50,95 @@ def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps): ...@@ -51,128 +50,95 @@ def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
return lr return lr
def optimization(loss, class StOptimizer(fluid.optimizer.Optimizer):
def __init__(self,
warmup_steps, warmup_steps,
num_train_steps, num_train_steps,
learning_rate, learning_rate,
train_program,
startup_prog,
weight_decay, weight_decay,
scheduler='linear_warmup_decay', scheduler='linear_warmup_decay'):
use_fp16=False, super(StOptimizer, self).__init__(
use_dynamic_loss_scaling=False, learning_rate=learning_rate,
init_loss_scaling=1.0, parameter_list=None,
incr_every_n_steps=1000, regularization=None,
decr_every_n_nan_or_inf=2, grad_clip=None,
incr_ratio=2.0, name=None)
decr_ratio=0.8): self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
scheduled_lr, loss_scaling = None, None self.learning_rate = learning_rate
if scheduler == 'noam_decay': self.weight_decay = weight_decay
if warmup_steps > 0: self.scheduler = scheduler
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)), def minimize(self, loss):
warmup_steps)
train_program = fluid.default_main_program()
startup_program = fluid.default_startup_program()
if self.scheduler == 'noam_decay':
if self.warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(self.warmup_steps *(self.learning_rate ** 2)),
self.warmup_steps)
else:
print(
"WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!"
.format(self.warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=self.learning_rate,
dtype='float32',
persistable=True)
elif self.scheduler == 'linear_warmup_decay':
if self.warmup_steps > 0:
scheduled_lr = linear_warmup_decay(self.learning_rate,
self.warmup_steps,
self.num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(self.warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=self.learning_rate,
dtype='float32',
persistable=True)
else: else:
print( raise ValueError("Unkown learning rate scheduler, should be "
"WARNING: noam decay of learning rate should have postive warmup " "'noam_decay' or 'linear_warmup_decay'")
"steps but given {}, using constant learning rate instead!"
.format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
elif scheduler == 'linear_warmup_decay':
if warmup_steps > 0:
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
def exclude_from_weight_decay(param):
name = param.name.rstrip(".master")
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict()
if use_fp16:
loss_scaling = fluid.layers.create_global_var(
name=fluid.unique_name.generate("loss_scaling"),
shape=[1],
value=init_loss_scaling,
dtype='float32',
persistable=True)
loss *= loss_scaling
param_grads = optimizer.backward(loss)
master_param_grads = create_master_params_grads(
param_grads, train_program, startup_prog, loss_scaling)
if weight_decay > 0:
for param, _ in master_param_grads:
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_dynamic_loss_scaling: optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
apply_dynamic_loss_scaling( fluid.clip.set_gradient_clip(
loss_scaling, master_param_grads, incr_every_n_steps, clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
optimizer.apply_gradients(master_param_grads) def exclude_from_weight_decay(param):
name = param.name.rstrip(".master")
if weight_decay > 0: if name.find("layer_norm") > -1:
for param, grad in master_param_grads: return True
if exclude_from_weight_decay(param): bias_suffix = ["_bias", "_b", ".b_0"]
continue for suffix in bias_suffix:
with param.block.program._optimized_guard( if name.endswith(suffix):
[param, grad]), fluid.framework.name_scope("weight_decay"): return True
updated_param = param - param_list[ return False
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
master_param_to_train_param(master_param_grads, param_grads, param_list = dict()
train_program)
else: if self.weight_decay > 0:
if weight_decay > 0:
for param in train_program.all_parameters(): for param in train_program.all_parameters():
param_list[param.name] = param * 1.0 param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss) _, param_grads = optimizer.minimize(loss)
if weight_decay > 0: if self.weight_decay > 0:
for param, grad in param_grads: for param, grad in param_grads:
if exclude_from_weight_decay(param): if exclude_from_weight_decay(param):
continue continue
with param.block.program._optimized_guard( with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"): [param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[ updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr param.name] * self.weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param) fluid.layers.assign(output=param, input=updated_param)
return scheduled_lr, loss_scaling
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.text.senta.data_processer import SentaProcessor
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hapi.text.senta.data_reader import load_vocab
from hapi.text.senta.data_reader import data_reader
from paddle.io import DataLoader
class SentaProcessor(object):
def __init__(self, data_dir, vocab_path, random_seed=None):
self.data_dir = data_dir
self.vocab = load_vocab(vocab_path)
self.num_examples = {"train": -1, "dev": -1, "infer": -1}
np.random.seed(random_seed)
def get_train_examples(self, data_dir, epoch, shuffle, batch_size, places, padding_size):
train_reader = data_reader((self.data_dir + "/train.tsv"), self.vocab,
self.num_examples, "train", epoch, padding_size, shuffle)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(train_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_dev_examples(self, data_dir, epoch, shuffle, batch_size, places, padding_size):
dev_reader = data_reader((self.data_dir + "/dev.tsv"), self.vocab,
self.num_examples, "dev", epoch, padding_size, shuffle)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(dev_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_test_examples(self, data_dir, epoch, batch_size, places, padding_size):
test_reader = data_reader((self.data_dir + "/test.tsv"), self.vocab,
self.num_examples, "infer", epoch, padding_size)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(test_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_labels(self):
return ["0", "1"]
def get_num_examples(self, phase):
if phase not in ['train', 'dev', 'infer']:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
return self.num_examples[phase]
def get_train_progress(self):
return self.current_train_example, self.current_train_epoch
def data_generator(self, padding_size, batch_size, places, phase='train', epoch=1, shuffle=True):
if phase == "train":
return self.get_train_examples(self.data_dir, epoch, shuffle, batch_size, places, padding_size)
elif phase == "dev":
return self.get_dev_examples(self.data_dir, epoch, shuffle, batch_size, places, padding_size)
elif phase == "infer":
return self.get_test_examples(self.data_dir, epoch, batch_size, places, padding_size)
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import io
import sys
import random
def str2bool(v):
return v.lower() in ("true", "t", "1")
def data_reader(file_path, word_dict, num_examples, phrase, epoch, padding_size, shuffle=False):
unk_id = len(word_dict)
all_data = []
with io.open(file_path, "r", encoding='utf8') as fin:
for line in fin:
if line.startswith('text_a'):
continue
cols = line.strip().split("\t")
if len(cols) != 2:
sys.stderr.write("[NOTICE] Error Format Line!")
continue
label = [int(cols[1])]
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[0].split(" ")
]
wids = wids[:padding_size]
while len(wids) < padding_size:
wids.append(unk_id)
all_data.append((wids, label))
if shuffle:
if phrase == "train":
random.shuffle(all_data)
num_examples[phrase] = len(all_data)
def reader():
for epoch_index in range(epoch):
for doc, label in all_data:
yield doc, label
return reader
def load_vocab(file_path):
vocab = {}
with io.open(file_path, 'r', encoding='utf8') as f:
wid = 0
for line in f:
if line.strip() not in vocab:
vocab[line.strip()] = wid
wid += 1
vocab["<unk>"] = len(vocab)
return vocab
...@@ -1096,7 +1096,8 @@ class PrePostProcessLayer(Layer): ...@@ -1096,7 +1096,8 @@ class PrePostProcessLayer(Layer):
self.functors = [] self.functors = []
for cmd in self.process_cmd: for cmd in self.process_cmd:
if cmd == "a": # add residual connection if cmd == "a": # add residual connection
self.functors.append(lambda x, y: x + y if y else x) self.functors.append(
lambda x, y: x + y if y is not None else x)
elif cmd == "n": # add layer normalization elif cmd == "n": # add layer normalization
if reused_layer_norm is not None: if reused_layer_norm is not None:
layer_norm = reused_layer_norm layer_norm = reused_layer_norm
...@@ -1218,7 +1219,7 @@ class MultiHeadAttention(Layer): ...@@ -1218,7 +1219,7 @@ class MultiHeadAttention(Layer):
# scale dot product attention # scale dot product attention
product = layers.matmul( product = layers.matmul(
x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5) x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5)
if attn_bias: if attn_bias is not None:
product += attn_bias product += attn_bias
weights = layers.softmax(product) weights = layers.softmax(product)
if self.dropout_rate: if self.dropout_rate:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册