提交 07299487 编写于 作者: L LielinJiang

update comments

......@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, Input
from hapi.loss import SoftmaxWithCrossEntropy
from hapi.model import set_device, Model, Input
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
from hapi.text.bert import BertConfig, BertDataLoader, BertInputExample, make_optimizer
class ClsModelLayer(Model):
......@@ -128,7 +128,7 @@ def main():
[None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask')
[None, None, 1], 'float32', name='input_mask')
]
labels = [Input([None, 1], 'int64', name='label')]
......@@ -139,13 +139,13 @@ def main():
len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True)
optimizer = Optimizer(
optimizer = make_optimizer(
warmup_steps=warmup_steps,
num_train_steps=max_train_steps,
learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay,
scheduler=config.lr_scheduler,
model=cls_model,
loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters())
......@@ -157,8 +157,7 @@ def main():
labels,
device=device)
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
cls_model.bert_layer.load("./bert_small", reset_optimizer=True)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
......
......@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=1
# start fine-tuning
python3.7 bert_classifier.py\
......
......@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, Input
from hapi.loss import SoftmaxWithCrossEntropy
from hapi.model import set_device, Model, Input
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
from hapi.text.bert import BertConfig, BertDataLoader, BertInputExample, make_optimizer
class ClsModelLayer(Model):
......@@ -99,12 +99,12 @@ def main():
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
tokenizer,
["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
mode="leveldb",
phase="train")
mode="leveldb", )
test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
......@@ -130,7 +130,7 @@ def main():
[None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask')
[None, None, 1], 'float32', name='input_mask')
]
labels = [Input([None, 1], 'int64', name='label')]
......@@ -141,13 +141,13 @@ def main():
len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True)
optimizer = Optimizer(
optimizer = make_optimizer(
warmup_steps=warmup_steps,
num_train_steps=max_train_steps,
learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay,
scheduler=config.lr_scheduler,
model=cls_model,
loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters())
......@@ -159,8 +159,7 @@ def main():
labels,
device=device)
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
cls_model.bert_layer.load("./bert_small", reset_optimizer=True)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
......
......@@ -5,7 +5,7 @@ DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=1,2,3 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
......
......@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=1
# start fine-tuning
python3.7 bert_classifier.py\
......
......@@ -4,7 +4,7 @@
## 内容
- [模型简介](#模型简介)
- [代码结构](#代码结构)
- [代码获取](#代码获取)
- [数据准备](#数据准备)
- [模型训练](#模型训练)
- [模型评估](#模型评估)
......@@ -22,7 +22,21 @@ BMN Overview
</p>
## 代码结构
## 代码获取
### 代码下载及环境变量设置
克隆代码库到本地,并设置`PYTHONPATH`环境变量
```bash
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=`pwd`:$PYTHONPATH
cd examples/bmn
```
### 代码结构
```
├── bmn.yaml # 网络配置文件,快速配置参数
├── run.sh # 快速运行脚本,可直接开始多卡训练
......@@ -74,6 +88,8 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/EVAL/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为evaluate\_results。
- 暂不支持多卡评估。
- 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。
......@@ -108,6 +124,8 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/INFER/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为predict\_results。
- 暂不支持多卡预测。
## 参考论文
......
......@@ -15,7 +15,6 @@ TRAIN:
batch_size: 4
num_workers: 4
use_shuffle: True
device: "gpu"
learning_rate: 0.001
learning_rate_decay: 0.1
lr_decay_iter: 4200
......@@ -28,14 +27,14 @@ TEST:
subset: "validation"
batch_size: 1
num_workers: 1
output_path: "output/EVAL/BMN_results"
result_path: "evaluate_results"
output_path: "./output/EVAL/BMN_results"
result_path: "./evaluate_results"
INFER:
subset: "test"
batch_size: 1
num_workers: 1
filelist: './infer.list'
output_path: "output/INFER/BMN_results"
result_path: "predict_results"
output_path: "./output/INFER/BMN_results"
result_path: "./predict_results"
......@@ -54,18 +54,18 @@ def parse_args():
'--weights',
type=str,
default=None,
help='weight path, None to automatically download weights provided by Paddle.'
help='weight path. None to automatically download weights provided by Paddle.'
)
parser.add_argument(
'--output_path',
type=str,
default="output/EVAL/BMN_results",
help='output dir path, default to use output/EVAL/BMN_results')
default=None,
help='output dir path. None to use config file setting.')
parser.add_argument(
'--result_path',
type=str,
default="evaluate_results/",
help='output dir path after post processing, default to use ./evaluate_results/'
default=None,
help='output dir path after post processing. None to use config file setting.'
)
parser.add_argument(
'--log_interval',
......
......@@ -46,7 +46,10 @@ def parse_args():
default='bmn.yaml',
help='path to config file of model')
parser.add_argument(
'--device', type=str, default='GPU', help='default use gpu.')
'--device',
type=str,
default='gpu',
help='gpu or cpu, default use gpu.')
parser.add_argument(
'--weights',
type=str,
......@@ -56,18 +59,18 @@ def parse_args():
parser.add_argument(
'--filelist',
type=str,
default="infer.list",
help='infer file list, default to use ./infer.list')
default=None,
help='infer file list, None to use config file setting.')
parser.add_argument(
'--output_path',
type=str,
default="output/INFER/BMN_results",
help='output dir path, default to use output/INFER/BMN_results')
default=None,
help='output dir path, None to use config file setting.')
parser.add_argument(
'--result_path',
type=str,
default="predict_results/",
help='output dir path after post processing, default to use ./predict_results/'
default=None,
help='output dir path after post processing, None to use config file setting.'
)
parser.add_argument(
'--log_interval',
......
......@@ -48,7 +48,7 @@ class BmnDataset(Dataset):
def __getitem__(self, index):
video_name = self.video_list[index]
video_idx = self.video_list.index(video_name)
video_idx = np.array(self.video_list.index(video_name)).astype('int64')
video_feat = self.load_file(video_name)
if self.mode == 'infer':
return video_feat, video_idx
......
......@@ -49,7 +49,7 @@ def parse_args():
parser.add_argument(
'--learning_rate',
type=float,
default=0.001,
default=None,
help='learning rate use for training. None to use config file setting.')
parser.add_argument(
'--resume',
......
## 简介
情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示:
| 模型 | dev | test |
| :------| :------ | :------ |
| CNN | 90.6% | 89.7% |
| BOW | 90.1% | 90.3% |
| GRU | 90.0% | 91.1% |
| BIGRU | 89.7% | 89.6% |
动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html)
## 快速开始
本项目依赖于 Paddlepaddle 1.7.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装。
python版本依赖python 2.7或python 3.5及以上版本。
#### 代码下载及环境变量设置
克隆代码库到本地,并设置`PYTHONPATH`环境变量
```shell
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd`
cd examples/sentiment_classification
```
#### 数据准备
下载经过预处理的数据,文件解压之后,senta_data目录下会存在训练数据(train.tsv)、开发集数据(dev.tsv)、测试集数据(test.tsv)以及对应的词典(word_dict.txt)
```shell
wget https://baidu-nlp.bj.bcebos.com/sentiment_classification-dataset-1.0.0.tar.gz
tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz
```
#### 模型训练
基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证。训练阶段需手动创建模型需要保存的文件夹,并且通过checkpoints设置保存文件路径。
model_type从bow_net,cnn_net,gru_net,bigru_net中选择。
模型相关参数均在`senta.yaml`中设置,模型训练需确保`senta.yaml``do_train`属性置为`True`
```shell
python sentiment_classifier.py
```
#### 模型预测
利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测。
模型预测需确保`senta.yaml``do_infer`属性置为`True`
```shell
python sentiment_classifier.py
```
#### 模型参数
模型参数配置文件:`senta.yaml`
1. batch_size, 根据模型情况和GPU占用率选择batch_size, 建议cnn/bow选择较大batch_size, gru/bigru选择较小batch_size。
2. padding_size默认为150。
3. epoch, training时默认设置为5,infer默认为1。
4. learning_rate默认为0.002。
## 进阶使用
#### 任务定义
传统的情感分类主要基于词典或者特征工程的方式进行分类,这种方法需要繁琐的人工特征设计和先验知识,理解停留于浅层并且扩展泛化能力差。为了避免传统方法的局限,我们采用近年来飞速发展的深度学习技术。基于深度学习的情感分类不依赖于人工特征,它能够端到端的对输入文本进行语义理解,并基于语义表示进行情感倾向的判断。
#### 模型原理介绍
本项目针对情感倾向性分类问题,:
+ CNN(Convolutional Neural Networks),是一个基础的序列模型,能处理变长序列输入,提取局部区域之内的特征;
+ BOW(Bag Of Words)模型,是一个非序列模型,使用基本的全连接结构;
+ GRU(Gated Recurrent Unit),序列模型,能够较好地解决序列文本中长距离依赖的问题;
+ BI-GRU(Bidirectional Gated Recurrent Unit),序列模型,采用双向双层GRU结构,更好地捕获句子中的语义特征;
#### 数据格式说明
训练、预测、评估使用的数据可以由用户根据实际的应用场景,自己组织数据。数据由两列组成,以制表符分隔,第一列是以空格分词的中文文本(分词预处理方法将在下文具体说明),文件为utf8编码;第二列是情感倾向分类的类别(0表示消极;1表示积极),注意数据文件第一行固定表示为"text_a\tlabel"
```text
特 喜欢 这种 好看的 狗狗 1
这 真是 惊艳 世界 的 中国 黑科技 1
环境 特别 差 ,脏兮兮 的,再也 不去 了 0
```
#### 代码结构说明
```text
.
├── sentiment_classifier.py # 该项目的主函数,封装包括训练、预测、评估的部分
├── models.py # 网络结构
```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear, Embedding
from paddle.fluid.dygraph.base import to_variable
import numpy as np
from hapi.model import Model
from hapi.text.text import GRUEncoderLayer as BiGRUEncoder
from hapi.text.test import BOWEncoder, CNNEncoder, GRUEncoder
class CNN(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(CNN, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.channels = 1
self.win_size = [3, self.hid_dim]
self.batch_size = batch_size
self.seq_len = seq_len
self._encoder = CNNEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
seq_len=self.seq_len,
filter_size= self.win_size,
num_filters= self.hid_dim,
hidden_dim= self.hid_dim,
padding_idx=None,
act='tanh')
self._fc1 = Linear(input_dim = self.hid_dim*self.seq_len, output_dim=self.fc_hid_dim, act="softmax")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
act="softmax")
def forward(self, inputs):
conv_3 = self._encoder(inputs)
fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1)
return prediction
class BOW(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(BOW, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._encoder = BOWEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
padding_idx=None,
bow_dim=self.hid_dim,
seq_len=self.seq_len)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim, act="tanh")
self._fc2 = Linear(input_dim = self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
act="softmax")
def forward(self, inputs):
bow_1 = self._encoder(inputs)
bow_1 = fluid.layers.tanh(bow_1)
fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1)
prediction = self._fc_prediction(fc_2)
return prediction
class GRU(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(GRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = GRUEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
gru_dim=self.hid_dim,
hidden_dim=self.hid_dim,
padding_idx=None,
seq_len=self.seq_len)
def forward(self, inputs):
emb = self._encoder(inputs)
fc_1 = self._fc1(emb)
prediction = self._fc_prediction(fc_1)
return prediction
class BiGRU(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(BiGRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim*3)
self._fc2 = Linear(input_dim = self.hid_dim*2, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = BiGRUEncoder(
grnn_hidden_dim=self.hid_dim,
input_dim=self.hid_dim * 3,
h_0=h_0,
init_bound=0.1,
is_bidirection=True)
def forward(self, inputs):
emb = self.embedding(inputs)
emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
encoded_vector = self._encoder(fc_1)
encoded_vector = fluid.layers.tanh(encoded_vector)
encoded_vector = fluid.layers.reduce_max(encoded_vector, dim=1)
fc_2 = self._fc2(encoded_vector)
prediction = self._fc_prediction(fc_2)
return prediction
checkpoints: "./checkpoints"
epoch: 5
save_freq: 1
eval_freq: 1
lr: 0.002
padding_size: 150
skip_steps: 10
verbose: False
data_dir: "./senta_data/"
vocab_path: "./senta_data/word_dict.txt"
vocab_size: 33256
batch_size: 20
random_seed: 0
use_cuda: True
do_train: True
do_infer: False
model_type: "bow_net"
output_dir: "./output"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sentiment Classification in Paddle Dygraph Mode. """
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from hapi.model import set_device, Model, CrossEntropy, Input
from hapi.configure import Config
from hapi.text.senta import SentaProcessor
from hapi.metrics import Accuracy
from models import CNN, BOW, GRU, BiGRU
import json
import os
args = Config(yaml_file='./senta.yaml')
args.build()
args.Print()
device = set_device("gpu" if args.use_cuda else "cpu")
dev_count = fluid.core.get_cuda_device_count() if args.use_cuda else 1
def main():
if args.do_train:
train()
elif args.do_infer:
infer()
def train():
fluid.enable_dygraph(device)
processor = SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
num_labels = len(processor.get_labels())
num_train_examples = processor.get_num_examples(phase="train")
max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
train_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='train',
epoch=args.epoch,
shuffle=False)
eval_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='dev',
epoch=args.epoch,
shuffle=False)
if args.model_type == 'cnn_net':
model = CNN( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model = BOW( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model = GRU( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model = BiGRU( args.vocab_size, args.batch_size,
args.padding_size)
optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr, parameter_list=model.parameters())
inputs = [Input([None, None], 'int64', name='doc')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optimizer,
CrossEntropy(),
Accuracy(topk=(1,)),
inputs,
labels,
device=device)
model.fit(train_data=train_data_generator,
eval_data=eval_data_generator,
batch_size=args.batch_size,
epochs=args.epoch,
save_dir=args.checkpoints,
eval_freq=args.eval_freq,
save_freq=args.save_freq)
def infer():
fluid.enable_dygraph(device)
processor = SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
infer_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='infer',
epoch=1,
shuffle=False)
if args.model_type == 'cnn_net':
model_infer = CNN( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model_infer = BOW( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model_infer = GRU( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model_infer = BiGRU( args.vocab_size, args.batch_size,
args.padding_size)
print('Do inferring ...... ')
inputs = [Input([None, None], 'int64', name='doc')]
model_infer.prepare(
None,
CrossEntropy(),
Accuracy(topk=(1,)),
inputs,
device=device)
model_infer.load(args.checkpoints, reset_optimizer=True)
preds = model_infer.predict(test_data=infer_data_generator)
preds = np.array(preds[0]).reshape((-1, 2))
if args.output_dir:
with open(os.path.join(args.output_dir, 'predictions.json'), 'w') as w:
for p in range(len(preds)):
label = np.argmax(preds[p])
result = json.dumps({'index': p, 'label': label, 'probs': preds[p].tolist()})
w.write(result+'\n')
print('Predictions saved at '+os.path.join(args.output_dir, 'predictions.json'))
if __name__ == '__main__':
main()
......@@ -14,7 +14,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
#### 1.PaddlePaddle 安装
本项目依赖 PaddlePaddle 1.7 及以上版本和PaddleHub 1.0.0及以上版本 ,PaddlePaddle安装请参考官网 [快速安装](http://www.paddlepaddle.org/paddle#quick-start),PaddleHub安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
本项目依赖 PaddlePaddle 1.8 及以上版本和PaddleHub 1.0.0及以上版本 ,PaddlePaddle安装请参考官网 [快速安装](http://www.paddlepaddle.org/paddle#quick-start),PaddleHub安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
> Warning: GPU 和 CPU 版本的 PaddlePaddle 分别是 paddlepaddle-gpu 和 paddlepaddle,请安装时注意区别。
......
......@@ -21,6 +21,7 @@ from __future__ import print_function
import io
import os
import sys
import six
import math
import argparse
import numpy as np
......@@ -71,7 +72,12 @@ def main(args):
word_len = length[i]
word_ids = results[i][:word_len]
tags = [dataset.id2label_dict[str(id)] for id in word_ids]
f.write("\002".join(tags) + "\n")
if six.PY3:
tags = [bytes(tag, encoding="utf8") for tag in tags]
out = b"\002".join(tags) + b"\n"
f.write(out)
else:
f.write("\002".join(tags) + "\n")
if __name__ == '__main__':
......
......@@ -20,7 +20,6 @@ from __future__ import print_function
import io
import os
import leveldb
import numpy as np
import shutil
from functools import partial
......
......@@ -39,8 +39,8 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分
```bash
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd`
cd tsm
export PYTHONPATH=`pwd`:$PYTHONPATH
cd examples/tsm
```
### 数据准备
......@@ -141,6 +141,8 @@ python infer.py --data=<path/to/dataset> --label_list=<path/to/label_list> --inf
2020-04-03 07:37:16,321-INFO: Sample ./kineteics/val_10/data_batch_10-042_6 predict label: 6, ground truth label: 6
```
**注意:** 推断时`--infer_file`需要指定到pickle文件路径。
## 参考论文
- [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han
......
......@@ -26,6 +26,7 @@ from check import check_gpu, check_version
from modeling import tsm_resnet50
from kinetics_dataset import KineticsDataset
from transforms import *
from utils import print_arguments
import logging
logger = logging.getLogger(__name__)
......@@ -56,7 +57,7 @@ def main():
model.load(FLAGS.weights, reset_optimizer=True)
imgs, label = dataset[0]
pred = model.test([imgs[np.newaxis, :]])
pred = model.test_batch([imgs[np.newaxis, :]])
pred = labels[np.argmax(pred)]
logger.info("Sample {} predict label: {}, ground truth label: {}" \
.format(FLAGS.infer_file, pred, labels[int(label)]))
......@@ -86,6 +87,7 @@ if __name__ == '__main__':
type=str,
help="weights path for evaluation")
FLAGS = parser.parse_args()
print_arguments(FLAGS)
check_gpu(str.lower(FLAGS.device) == 'gpu')
check_version()
......
......@@ -113,7 +113,7 @@ class KineticsDataset(Dataset):
if self.transform:
imgs, label = self.transform(imgs, label)
return imgs, np.array([label])
return imgs, np.array([label]).astype('int64')
@property
def num_classes(self):
......
......@@ -31,6 +31,7 @@ from modeling import tsm_resnet50
from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset
from transforms import *
from utils import print_arguments
def make_optimizer(step_per_epoch, parameter_list=None):
......@@ -106,7 +107,7 @@ def main():
eval_data=val_dataset,
epochs=FLAGS.epoch,
batch_size=FLAGS.batch_size,
save_dir='tsm_checkpoint',
save_dir=FLAGS.save_dir or 'tsm_checkpoint',
num_workers=FLAGS.num_workers,
drop_last=True,
shuffle=True)
......@@ -150,7 +151,14 @@ if __name__ == '__main__':
default=None,
type=str,
help="weights path for evaluation")
parser.add_argument(
"-s",
"--save_dir",
default=None,
type=str,
help="directory path for checkpoint saving, default ./yolo_checkpoint")
FLAGS = parser.parse_args()
print_arguments(FLAGS)
check_gpu(str.lower(FLAGS.device) == 'gpu')
check_version()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import logging
logger = logging.getLogger(__name__)
__all__ = ['print_ar']
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
logger.info("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.info("%s: %s" % (arg, value))
logger.info("------------------------------------------------")
......@@ -53,8 +53,8 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层
```bash
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd`
cd tsm
export PYTHONPATH=`pwd`:$PYTHONPATH
cd examples/yolov3
```
#### 安装COCO-API
......@@ -126,13 +126,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=
使用如下方式进行多卡训练:
```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16 -d
```
### 模型评估
YOLOv3模型输出为LoDTensor,只支持使用batch_size为1进行评估,可通过如下两种方式进行模型评估。
YOLOv3模型输出为LoDTensor,只支持使用单卡且batch_size为1进行评估,可通过如下两种方式进行模型评估。
1. 自动下载Paddle发布的[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams)权重评估
......@@ -180,7 +180,7 @@ python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.
2. 加载checkpoint进行精度评估
```bash
python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.jpg --weights=yolo_checkpoint/mo_mixup/final
python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.jpg --weights=yolo_checkpoint/no_mixup/final
```
推断结果可视化图像会保存于`--output`指定的文件夹下,默认保存于`./output`目录。
......
......@@ -28,7 +28,7 @@ from hapi.model import Model, Input, set_device
from modeling import yolov3_darknet53, YoloLoss
from transforms import *
from utils import print_arguments
from visualizer import draw_bbox
import logging
......@@ -91,7 +91,7 @@ def main():
img_id = np.array([0]).astype('int64')[np.newaxis, :]
img_shape = np.array([h, w]).astype('int32')[np.newaxis, :]
_, bboxes = model.test([img_id, img_shape, img])
_, bboxes = model.test_batch([img_id, img_shape, img])
vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold)
save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image)
......@@ -121,6 +121,7 @@ if __name__ == '__main__':
"-w", "--weights", default=None, type=str,
help="path to weights for inference")
FLAGS = parser.parse_args()
print_arguments(FLAGS)
assert os.path.isfile(FLAGS.infer_image), \
"infer_image {} not a file".format(FLAGS.infer_image)
assert os.path.isfile(FLAGS.label_list), \
......
......@@ -33,6 +33,7 @@ from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset
from coco_metric import COCOMetric
from transforms import *
from utils import print_arguments
NUM_MAX_BOXES = 50
......@@ -171,16 +172,18 @@ def main():
if FLAGS.resume is not None:
model.load(FLAGS.resume)
save_dir = FLAGS.save_dir or 'yolo_checkpoint'
model.fit(train_data=loader,
epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
save_dir="yolo_checkpoint/mixup",
save_dir=os.path.join(save_dir, "mixup"),
save_freq=10)
# do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
dataset.mixup = False
model.fit(train_data=loader,
epochs=FLAGS.no_mixup_epoch,
save_dir="yolo_checkpoint/no_mixup",
save_dir=os.path.join(save_dir, "no_mixup"),
save_freq=5)
......@@ -233,6 +236,13 @@ if __name__ == '__main__':
default=None,
type=str,
help="path to weights for evaluation")
parser.add_argument(
"-s",
"--save_dir",
default=None,
type=str,
help="directory path for checkpoint saving, default ./yolo_checkpoint")
FLAGS = parser.parse_args()
print_arguments(FLAGS)
assert FLAGS.data, "error: must provide data path"
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import logging
logger = logging.getLogger(__name__)
__all__ = ['print_ar']
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
logger.info("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.info("%s: %s" % (arg, value))
logger.info("------------------------------------------------")
......@@ -201,24 +201,10 @@ class ProgBarLogger(Callback):
from hapi.callbacks import ProgBarLogger
from hapi.model import Input, set_device
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MnistDataset(mode='train')
train_dataset = MNIST(mode='train')
model = LeNet()
......@@ -240,6 +226,9 @@ class ProgBarLogger(Callback):
self.verbose = verbose
self.log_freq = log_freq
def _is_print(self):
return self.verbose and ParallelEnv().local_rank == 0
def on_train_begin(self, logs=None):
self.epochs = self.params['epochs']
assert self.epochs
......@@ -250,7 +239,7 @@ class ProgBarLogger(Callback):
self.steps = self.params['steps']
self.epoch = epoch
self.train_step = 0
if self.verbose and self.epochs and ParallelEnv().local_rank == 0:
if self.epochs and self._is_print():
print('Epoch %d/%d' % (epoch + 1, self.epochs))
self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose)
......@@ -270,17 +259,13 @@ class ProgBarLogger(Callback):
logs = logs or {}
self.train_step += 1
if self.train_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
if self._is_print() and self.train_step % self.log_freq == 0:
if self.steps is None or self.train_step < self.steps:
self._updates(logs, 'train')
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
if self.verbose == 1 and ParallelEnv().local_rank == 0:
self._updates(logs, 'train')
elif self.train_step % self.log_freq != 0 and self.verbose and ParallelEnv(
).local_rank == 0:
if self._is_print() and (self.steps is not None):
self._updates(logs, 'train')
def on_eval_begin(self, logs=None):
......@@ -291,7 +276,7 @@ class ProgBarLogger(Callback):
self.eval_progbar = ProgressBar(
num=self.eval_steps, verbose=self.verbose)
if ParallelEnv().local_rank == 0:
if self._is_print():
print('Eval begin...')
def on_eval_batch_end(self, step, logs=None):
......@@ -300,8 +285,7 @@ class ProgBarLogger(Callback):
samples = logs.get('batch_size', 1)
self.evaled_samples += samples
if self.eval_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
if self._is_print() and self.eval_step % self.log_freq == 0:
if self.eval_steps is None or self.eval_step < self.eval_steps:
self._updates(logs, 'eval')
......@@ -321,21 +305,19 @@ class ProgBarLogger(Callback):
samples = logs.get('batch_size', 1)
self.tested_samples += samples
if self.test_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
if self.test_step % self.log_freq == 0 and self._is_print():
if self.test_steps is None or self.test_step < self.test_steps:
self._updates(logs, 'test')
def on_eval_end(self, logs=None):
logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0:
if self.eval_step % self.log_freq != 0 or self.verbose == 1:
self._updates(logs, 'eval')
if self._is_print() and (self.steps is not None):
self._updates(logs, 'eval')
print('Eval samples: %d' % (self.evaled_samples))
def on_test_end(self, logs=None):
logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0:
if self._is_print():
if self.test_step % self.log_freq != 0 or self.verbose == 1:
self._updates(logs, 'test')
print('Predict samples: %d' % (self.tested_samples))
......@@ -362,24 +344,10 @@ class ModelCheckpoint(Callback):
from hapi.callbacks import ModelCheckpoint
from hapi.model import Input, set_device
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MnistDataset(mode='train')
train_dataset = MNIST(mode='train')
model = LeNet()
......
......@@ -123,7 +123,7 @@ class Flowers(Dataset):
if self.transform is not None:
image = self.transform(image)
return image, label
return image, label.astype('int64')
def __len__(self):
return len(self.indexes)
......@@ -45,6 +45,8 @@ class MNIST(Dataset):
:attr:`download` is True. Default None
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None
chw_format(bool): If set True, the output shape is [1, 28, 28],
otherwise, output shape is [1, 784]. Default True.
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
......@@ -70,13 +72,14 @@ class MNIST(Dataset):
def __init__(self,
image_path=None,
label_path=None,
chw_format=True,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode)
self.mode = mode.lower()
self.chw_format = chw_format
self.image_path = image_path
if self.image_path is None:
assert download, "image_path not set and auto download disabled"
......@@ -144,10 +147,13 @@ class MNIST(Dataset):
for i in range(buffer_size):
self.images.append(images[i, :])
self.labels.append(np.array([labels[i]]))
self.labels.append(
np.array([labels[i]]).astype('int64'))
def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx]
if self.chw_format:
image = np.reshape(image, [1, 28, 28])
if self.transform is not None:
image = self.transform(image)
return image, label
......
......@@ -23,6 +23,7 @@ import requests
import tqdm
import hashlib
import time
from collections import OrderedDict
from paddle.fluid.dygraph.parallel import ParallelEnv
......@@ -35,6 +36,44 @@ WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3
nlp_models = OrderedDict((
('RoBERTa-zh-base',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'
),
('RoBERTa-zh-large',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'
),
('ERNIE-v2-en-base',
'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'),
('ERNIE-v2-en-large',
'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'),
('XLNet-cased-base',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'),
('XLNet-cased-large',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'),
('ERNIE-v1-zh-base',
'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'),
('ERNIE-v1-zh-base-max-len-512',
'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'),
('BERT-en-uncased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-uncased-base',
'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'),
('BERT-en-uncased-large',
'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-base',
'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'),
('BERT-en-cased-large',
'https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'),
('BERT-multilingual-uncased-base',
'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'),
('BERT-multilingual-cased-base',
'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'),
('BERT-zh-base',
'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'), ))
def is_url(path):
"""
......
......@@ -116,7 +116,7 @@ class Accuracy(Metric):
def add_metric_op(self, pred, label, *args):
pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk]
correct = pred == label
return correct
return fluid.layers.cast(correct, dtype='float32')
def update(self, correct, *args):
accs = []
......@@ -143,7 +143,7 @@ class Accuracy(Metric):
if self.maxk != 1:
self._name = ['{}_top{}'.format(name, k) for k in self.topk]
else:
self._name = ['acc']
self._name = [name]
def name(self):
return self._name
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from hapi.metrics import *
from hapi.utils import to_list
def accuracy(pred, label, topk=(1, )):
maxk = max(topk)
pred = np.argsort(pred)[:, ::-1][:, :maxk]
correct = (pred == np.repeat(label, maxk, 1))
batch_size = label.shape[0]
res = []
for k in topk:
correct_k = correct[:, :k].sum()
res.append(correct_k / batch_size)
return res
def convert_to_one_hot(y, C):
oh = np.random.random((y.shape[0], C)).astype('float32') * .5
for i in range(y.shape[0]):
oh[i, int(y[i])] = 1.
return oh
class TestAccuracyDynamic(unittest.TestCase):
def setUp(self):
self.topk = (1, )
self.class_num = 5
self.sample_num = 1000
self.name = None
def random_pred_label(self):
label = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int64')
pred = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int32')
pred_one_hot = convert_to_one_hot(pred, self.class_num)
pred_one_hot = pred_one_hot.astype('float32')
return label, pred_one_hot
def test_main(self):
with fluid.dygraph.guard(fluid.CPUPlace()):
acc = Accuracy(topk=self.topk, name=self.name)
for i in range(10):
label, pred = self.random_pred_label()
label_var = to_variable(label)
pred_var = to_variable(pred)
state = to_list(acc.add_metric_op(pred_var, label_var))
acc.update(*[s.numpy() for s in state])
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
class TestAccuracyStatic(TestAccuracyDynamic):
def test_main(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_prog, startup_prog):
pred = fluid.data(name='pred', shape=[None, self.class_num], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
acc = Accuracy(topk=self.topk, name=self.name)
state = acc.add_metric_op(pred, label)
exe = fluid.Executor(fluid.CPUPlace())
compiled_main_prog = fluid.CompiledProgram(main_prog)
for i in range(10):
label, pred = self.random_pred_label()
state_ret = exe.run(compiled_main_prog,
feed={'pred': pred, 'label': label},
fetch_list=[s.name for s in to_list(state)],
return_numpy=True)
acc.update(*state_ret)
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyStaticMultiTopk(TestAccuracyStatic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
if __name__ == '__main__':
unittest.main()
......@@ -18,33 +18,25 @@ from __future__ import print_function
import unittest
import os
import cv2
import numpy as np
import shutil
import tempfile
import paddle
from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.container import Sequential
from paddle.io import BatchSampler, DataLoader
from paddle.io import DataLoader
from paddle.fluid.dygraph.base import to_variable
from hapi.model import Model, Input, set_device
from hapi.loss import Loss
from hapi.loss import CrossEntropy
from hapi.metrics import Accuracy
from hapi.datasets import MNIST
from hapi.vision.models import LeNet
from hapi.download import get_weights_path_from_url
class LeNetDygraph(fluid.dygraph.Layer):
"""LeNet model from
`"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_
Args:
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 10.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNetDygraph, self).__init__()
self.num_classes = num_classes
......@@ -73,12 +65,16 @@ class LeNetDygraph(fluid.dygraph.Layer):
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
def __init__(self, mode, return_label=True, sample_num=None):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
if sample_num:
self.images = self.images[:sample_num]
self.labels = self.labels[:sample_num]
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
img, label = self.images[idx], self.labels[idx]
img = np.reshape(img, [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
......@@ -87,15 +83,14 @@ class MnistDataset(MNIST):
return len(self.images)
def get_predict_accuracy(pred, gt):
def compute_acc(pred, label):
pred = np.argmax(pred, -1)
gt = np.array(gt)
correct = pred[:, np.newaxis] == gt
label = np.array(label)
correct = pred[:, np.newaxis] == label
return np.sum(correct) / correct.shape[0]
def low_level_lenet_dygraph_train(model, dataloader):
def dynamic_train(model, dataloader):
optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.train()
......@@ -108,7 +103,7 @@ def low_level_lenet_dygraph_train(model, dataloader):
model.clear_gradients()
def low_level_dynamic_evaluate(model, dataloader):
def dynamic_evaluate(model, dataloader):
with fluid.dygraph.no_grad():
model.eval()
cnt = 0
......@@ -121,57 +116,65 @@ def low_level_dynamic_evaluate(model, dataloader):
return cnt / len(dataloader.dataset)
class TestEvaluatePredict(unittest.TestCase):
def setUp(self):
self.device = set_device('gpu')
self.train_dataset = MnistDataset(mode='train')
self.val_dataset = MnistDataset(mode='test')
self.test_dataset = MnistDataset(mode='test', return_label=False)
class TestModel(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.device = set_device('gpu')
fluid.enable_dygraph(cls.device)
fluid.enable_dygraph(self.device)
train_dataloader = fluid.io.DataLoader(
self.train_dataset, places=self.device, batch_size=64)
val_dataloader = fluid.io.DataLoader(
self.val_dataset, places=self.device, batch_size=64)
self.lenet_dygraph = LeNetDygraph()
low_level_lenet_dygraph_train(self.lenet_dygraph, train_dataloader)
self.acc1 = low_level_dynamic_evaluate(self.lenet_dygraph,
val_dataloader)
sp_num = 1280
cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num)
cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num)
cls.test_dataset = MnistDataset(
mode='test', return_label=False, sample_num=sp_num)
def evaluate(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
cls.train_loader = fluid.io.DataLoader(
cls.train_dataset, places=cls.device, batch_size=64)
cls.val_loader = fluid.io.DataLoader(
cls.val_dataset, places=cls.device, batch_size=64)
cls.test_loader = fluid.io.DataLoader(
cls.test_dataset, places=cls.device, batch_size=64)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
seed = 333
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if fluid.in_dygraph_mode():
feed_list = None
else:
feed_list = [x.forward() for x in inputs + labels]
dy_lenet = LeNetDygraph()
cls.init_param = dy_lenet.state_dict()
dynamic_train(dy_lenet, cls.train_loader)
self.train_dataloader = fluid.io.DataLoader(
self.train_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.val_dataloader = fluid.io.DataLoader(
self.val_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.test_dataloader = fluid.io.DataLoader(
self.test_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader)
model = LeNet()
model.load_dict(self.lenet_dygraph.state_dict())
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
cls.labels = [Input([None, 1], 'int64', name='label')]
result = model.evaluate(self.val_dataloader)
cls.save_dir = tempfile.mkdtemp()
cls.weight_path = os.path.join(cls.save_dir, 'lenet')
fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path)
np.testing.assert_allclose(result['acc'], self.acc1)
fluid.disable_dygraph()
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.save_dir)
def test_fit_dygraph(self):
self.fit(True)
def test_fit_static(self):
self.fit(False)
def test_evaluate_dygraph(self):
self.evaluate(True)
def test_evaluate_static(self):
self.evaluate(False)
def test_predict_dygraph(self):
self.predict(True)
def test_predict_static(self):
self.predict(False)
def predict(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
......@@ -179,50 +182,175 @@ class TestEvaluatePredict(unittest.TestCase):
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
if fluid.in_dygraph_mode():
feed_list = None
else:
feed_list = [x.forward() for x in inputs + labels]
self.train_dataloader = fluid.io.DataLoader(
self.train_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.val_dataloader = fluid.io.DataLoader(
self.val_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.test_dataloader = fluid.io.DataLoader(
test_dataloader = fluid.io.DataLoader(
self.test_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
return_list=True)
model = LeNet()
model.load_dict(self.lenet_dygraph.state_dict())
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
output = model.predict(self.test_dataloader, stack_outputs=True)
model.load(self.weight_path)
np.testing.assert_equal(output[0].shape[0], len(self.test_dataset))
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
acc = get_predict_accuracy(output[0], self.val_dataset.labels)
output = model.predict(test_dataloader, stack_outputs=True)
np.testing.assert_allclose(acc, self.acc1)
def fit(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
seed = 333
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
def test_evaluate_dygraph(self):
self.evaluate(True)
model = LeNet()
optim_new = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim_new,
loss_function=CrossEntropy(average=False),
metrics=Accuracy(),
inputs=self.inputs,
labels=self.labels)
model.fit(self.train_dataset, batch_size=64, shuffle=False)
result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1)
fluid.disable_dygraph() if dynamic else None
def test_evaluate_static(self):
self.evaluate(False)
def evaluate(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet()
model.prepare(
metrics=Accuracy(), inputs=self.inputs, labels=self.labels)
model.load(self.weight_path)
result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1)
fluid.disable_dygraph() if dynamic else None
def test_predict_dygraph(self):
self.predict(True)
def predict(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet()
model.prepare(inputs=self.inputs)
model.load(self.weight_path)
output = model.predict(
self.test_dataset, batch_size=64, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(self.test_dataset))
def test_predict_static(self):
self.predict(False)
acc = compute_acc(output[0], self.val_dataset.labels)
np.testing.assert_allclose(acc, self.acc1)
fluid.disable_dygraph() if dynamic else None
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = Linear(20, 10, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
class TestModelFunction(unittest.TestCase):
def set_seed(self, seed=1024):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
def test_train_batch(self, dynamic=True):
dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32)
label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
def get_expect():
fluid.enable_dygraph(fluid.CPUPlace())
self.set_seed()
m = MyModel()
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=m.parameters())
m.train()
output = m(to_variable(data))
l = to_variable(label)
loss = fluid.layers.cross_entropy(output, l)
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss.backward()
optim.minimize(avg_loss)
m.clear_gradients()
fluid.disable_dygraph()
return avg_loss.numpy()
ref = get_expect()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel()
optim2 = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=model.parameters())
inputs = [Input([None, dim], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optim2,
loss_function=CrossEntropy(average=False),
inputs=inputs,
labels=labels,
device=device)
loss, = model.train_batch([data], [label])
np.testing.assert_allclose(loss.flatten(), ref.flatten())
fluid.disable_dygraph() if dynamic else None
def test_test_batch(self, dynamic=True):
dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32)
def get_expect():
fluid.enable_dygraph(fluid.CPUPlace())
self.set_seed()
m = MyModel()
m.eval()
output = m(to_variable(data))
fluid.disable_dygraph()
return output.numpy()
ref = get_expect()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel()
inputs = [Input([None, dim], 'float32', name='x')]
model.prepare(inputs=inputs, device=device)
out, = model.test_batch([data])
np.testing.assert_allclose(out, ref)
fluid.disable_dygraph() if dynamic else None
def test_save_load(self):
path = tempfile.mkdtemp()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
model.save(path + '/test')
model.load(path + '/test')
shutil.rmtree(path)
fluid.disable_dygraph() if dynamic else None
def test_parameters(self):
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
params = model.parameters()
self.assertTrue(params[0].shape[0] == 20)
self.assertTrue(params[0].shape[1] == 10)
fluid.disable_dygraph() if dynamic else None
if __name__ == '__main__':
......
......@@ -13,7 +13,9 @@
# limitations under the License.
from hapi.text.bert.bert import BertConfig as BertConfig
from hapi.text.bert.optimization import Optimizer as Optimizer
from hapi.text.bert.dygraph_optimization import DyOptimizer as DyOptimizer
from hapi.text.bert.static_optimization import StOptimizer as StOptimizer
from hapi.text.bert.optimization import make_optimizer as make_optimizer
from hapi.text.bert.dataloader import BertDataLoader as BertDataLoader
from hapi.text.bert.dataloader import BertInputExample as BertInputExample
from hapi.text.tokenizer import tokenization as tokenization
......
......@@ -23,8 +23,8 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from hapi.model import Model
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
from hapi.text.text import PrePostProcessLayer, TransformerEncoder
from hapi.text.bert.utils.init import init_from_static_model
......@@ -52,7 +52,7 @@ class BertConfig(object):
print('------------------------------------------------')
class BertEncoder(Layer):
class BertEncoder(Model):
"""
bert
"""
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
class ConstantLR(LearningRateDecay):
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'):
super(ConstantLR, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
def step(self):
return self.learning_rate
class LinearDecay(LearningRateDecay):
def __init__(self,
learning_rate,
warmup_steps,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(LinearDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class DyOptimizer(object):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
model_cls,
weight_decay,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0:
for param, grad in param_grads:
if self.exclude_from_weight_decay(param.name):
continue
if isinstance(self.scheduled_lr.step(), float):
updated_param = param.numpy() - param_list[
param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step()
else:
updated_param = param.numpy(
) - param_list[param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step().numpy()
updated_param_var = fluid.dygraph.to_variable(updated_param)
param = updated_param_var
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape))
......@@ -11,172 +11,35 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
class ConstantLR(LearningRateDecay):
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'):
super(ConstantLR, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
def step(self):
return self.learning_rate
class LinearDecay(LearningRateDecay):
def __init__(self,
learning_rate,
warmup_steps,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(LinearDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class Optimizer(object):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
model_cls,
weight_decay,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0:
for param, grad in param_grads:
if self.exclude_from_weight_decay(param.name):
continue
if isinstance(self.scheduled_lr.step(), float):
updated_param = param.numpy() - param_list[
param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step()
else:
updated_param = param.numpy(
) - param_list[param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step().numpy()
updated_param_var = fluid.dygraph.to_variable(updated_param)
param = updated_param_var
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape))
from paddle.fluid.framework import in_dygraph_mode
from hapi.text.bert.dygraph_optimization import DyOptimizer as DyOptimizer
from hapi.text.bert.static_optimization import StOptimizer as StOptimizer
def make_optimizer(warmup_steps,
num_train_steps,
learning_rate,
weight_decay,
model,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
if in_dygraph_mode():
return DyOptimizer(
warmup_steps=warmup_steps,
num_train_steps=num_train_steps,
learning_rate=learning_rate,
model_cls=model,
weight_decay=weight_decay,
scheduler=scheduler,
loss_scaling=loss_scaling,
parameter_list=parameter_list)
else:
return StOptimizer(
warmup_steps=warmup_steps,
num_train_steps=num_train_steps,
learning_rate=learning_rate,
weight_decay=weight_decay,
scheduler=scheduler)
......@@ -19,7 +19,6 @@ from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from utils.fp16 import create_master_params_grads, master_param_to_train_param, apply_dynamic_loss_scaling
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
......@@ -51,128 +50,95 @@ def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
return lr
def optimization(loss,
class StOptimizer(fluid.optimizer.Optimizer):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
train_program,
startup_prog,
weight_decay,
scheduler='linear_warmup_decay',
use_fp16=False,
use_dynamic_loss_scaling=False,
init_loss_scaling=1.0,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
incr_ratio=2.0,
decr_ratio=0.8):
scheduled_lr, loss_scaling = None, None
if scheduler == 'noam_decay':
if warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps)
scheduler='linear_warmup_decay'):
super(StOptimizer, self).__init__(
learning_rate=learning_rate,
parameter_list=None,
regularization=None,
grad_clip=None,
name=None)
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.scheduler = scheduler
def minimize(self, loss):
train_program = fluid.default_main_program()
startup_program = fluid.default_startup_program()
if self.scheduler == 'noam_decay':
if self.warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(self.warmup_steps *(self.learning_rate ** 2)),
self.warmup_steps)
else:
print(
"WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!"
.format(self.warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=self.learning_rate,
dtype='float32',
persistable=True)
elif self.scheduler == 'linear_warmup_decay':
if self.warmup_steps > 0:
scheduled_lr = linear_warmup_decay(self.learning_rate,
self.warmup_steps,
self.num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(self.warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=self.learning_rate,
dtype='float32',
persistable=True)
else:
print(
"WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!"
.format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
elif scheduler == 'linear_warmup_decay':
if warmup_steps > 0:
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
def exclude_from_weight_decay(param):
name = param.name.rstrip(".master")
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict()
if use_fp16:
loss_scaling = fluid.layers.create_global_var(
name=fluid.unique_name.generate("loss_scaling"),
shape=[1],
value=init_loss_scaling,
dtype='float32',
persistable=True)
loss *= loss_scaling
param_grads = optimizer.backward(loss)
master_param_grads = create_master_params_grads(
param_grads, train_program, startup_prog, loss_scaling)
if weight_decay > 0:
for param, _ in master_param_grads:
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
if use_dynamic_loss_scaling:
apply_dynamic_loss_scaling(
loss_scaling, master_param_grads, incr_every_n_steps,
decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
optimizer.apply_gradients(master_param_grads)
if weight_decay > 0:
for param, grad in master_param_grads:
if exclude_from_weight_decay(param):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
def exclude_from_weight_decay(param):
name = param.name.rstrip(".master")
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
master_param_to_train_param(master_param_grads, param_grads,
train_program)
param_list = dict()
else:
if weight_decay > 0:
if self.weight_decay > 0:
for param in train_program.all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss)
if weight_decay > 0:
if self.weight_decay > 0:
for param, grad in param_grads:
if exclude_from_weight_decay(param):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
param.name] * self.weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
return scheduled_lr, loss_scaling
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.text.senta.data_processer import SentaProcessor
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hapi.text.senta.data_reader import load_vocab
from hapi.text.senta.data_reader import data_reader
from paddle.io import DataLoader
class SentaProcessor(object):
def __init__(self, data_dir, vocab_path, random_seed=None):
self.data_dir = data_dir
self.vocab = load_vocab(vocab_path)
self.num_examples = {"train": -1, "dev": -1, "infer": -1}
np.random.seed(random_seed)
def get_train_examples(self, data_dir, epoch, shuffle, batch_size, places, padding_size):
train_reader = data_reader((self.data_dir + "/train.tsv"), self.vocab,
self.num_examples, "train", epoch, padding_size, shuffle)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(train_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_dev_examples(self, data_dir, epoch, shuffle, batch_size, places, padding_size):
dev_reader = data_reader((self.data_dir + "/dev.tsv"), self.vocab,
self.num_examples, "dev", epoch, padding_size, shuffle)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(dev_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_test_examples(self, data_dir, epoch, batch_size, places, padding_size):
test_reader = data_reader((self.data_dir + "/test.tsv"), self.vocab,
self.num_examples, "infer", epoch, padding_size)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(test_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_labels(self):
return ["0", "1"]
def get_num_examples(self, phase):
if phase not in ['train', 'dev', 'infer']:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
return self.num_examples[phase]
def get_train_progress(self):
return self.current_train_example, self.current_train_epoch
def data_generator(self, padding_size, batch_size, places, phase='train', epoch=1, shuffle=True):
if phase == "train":
return self.get_train_examples(self.data_dir, epoch, shuffle, batch_size, places, padding_size)
elif phase == "dev":
return self.get_dev_examples(self.data_dir, epoch, shuffle, batch_size, places, padding_size)
elif phase == "infer":
return self.get_test_examples(self.data_dir, epoch, batch_size, places, padding_size)
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import io
import sys
import random
def str2bool(v):
return v.lower() in ("true", "t", "1")
def data_reader(file_path, word_dict, num_examples, phrase, epoch, padding_size, shuffle=False):
unk_id = len(word_dict)
all_data = []
with io.open(file_path, "r", encoding='utf8') as fin:
for line in fin:
if line.startswith('text_a'):
continue
cols = line.strip().split("\t")
if len(cols) != 2:
sys.stderr.write("[NOTICE] Error Format Line!")
continue
label = [int(cols[1])]
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[0].split(" ")
]
wids = wids[:padding_size]
while len(wids) < padding_size:
wids.append(unk_id)
all_data.append((wids, label))
if shuffle:
if phrase == "train":
random.shuffle(all_data)
num_examples[phrase] = len(all_data)
def reader():
for epoch_index in range(epoch):
for doc, label in all_data:
yield doc, label
return reader
def load_vocab(file_path):
vocab = {}
with io.open(file_path, 'r', encoding='utf8') as f:
wid = 0
for line in f:
if line.strip() not in vocab:
vocab[line.strip()] = wid
wid += 1
vocab["<unk>"] = len(vocab)
return vocab
......@@ -1096,7 +1096,8 @@ class PrePostProcessLayer(Layer):
self.functors = []
for cmd in self.process_cmd:
if cmd == "a": # add residual connection
self.functors.append(lambda x, y: x + y if y else x)
self.functors.append(
lambda x, y: x + y if y is not None else x)
elif cmd == "n": # add layer normalization
if reused_layer_norm is not None:
layer_norm = reused_layer_norm
......@@ -1218,7 +1219,7 @@ class MultiHeadAttention(Layer):
# scale dot product attention
product = layers.matmul(
x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5)
if attn_bias:
if attn_bias is not None:
product += attn_bias
weights = layers.softmax(product)
if self.dropout_rate:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册