提交 07299487 编写于 作者: L LielinJiang

update comments

...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy ...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy
from hapi.configure import Config from hapi.configure import Config
from hapi.text.bert import BertEncoder from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, Input
from hapi.loss import SoftmaxWithCrossEntropy from hapi.loss import SoftmaxWithCrossEntropy
from hapi.model import set_device, Model, Input
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample from hapi.text.bert import BertConfig, BertDataLoader, BertInputExample, make_optimizer
class ClsModelLayer(Model): class ClsModelLayer(Model):
...@@ -128,7 +128,7 @@ def main(): ...@@ -128,7 +128,7 @@ def main():
[None, None], 'int64', name='src_ids'), Input( [None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input( [None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input( [None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask') [None, None, 1], 'float32', name='input_mask')
] ]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
...@@ -139,13 +139,13 @@ def main(): ...@@ -139,13 +139,13 @@ def main():
len(["contradiction", "entailment", "neutral"]), len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True) return_pooled_out=True)
optimizer = Optimizer( optimizer = make_optimizer(
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
num_train_steps=max_train_steps, num_train_steps=max_train_steps,
learning_rate=config.learning_rate, learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay, weight_decay=config.weight_decay,
scheduler=config.lr_scheduler, scheduler=config.lr_scheduler,
model=cls_model,
loss_scaling=config.loss_scaling, loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters()) parameter_list=cls_model.parameters())
...@@ -157,8 +157,7 @@ def main(): ...@@ -157,8 +157,7 @@ def main():
labels, labels,
device=device) device=device)
cls_model.bert_layer.init_parameters( cls_model.bert_layer.load("./bert_small", reset_optimizer=True)
config.init_pretraining_params, verbose=config.verbose)
# do train # do train
cls_model.fit(train_data=train_dataloader.dataloader, cls_model.fit(train_data=train_dataloader.dataloader,
......
...@@ -4,7 +4,7 @@ TASK_NAME='MNLI' ...@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/" DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=1
# start fine-tuning # start fine-tuning
python3.7 bert_classifier.py\ python3.7 bert_classifier.py\
......
...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy ...@@ -18,10 +18,10 @@ from hapi.metrics import Accuracy
from hapi.configure import Config from hapi.configure import Config
from hapi.text.bert import BertEncoder from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, Input
from hapi.loss import SoftmaxWithCrossEntropy from hapi.loss import SoftmaxWithCrossEntropy
from hapi.model import set_device, Model, Input
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample from hapi.text.bert import BertConfig, BertDataLoader, BertInputExample, make_optimizer
class ClsModelLayer(Model): class ClsModelLayer(Model):
...@@ -99,12 +99,12 @@ def main(): ...@@ -99,12 +99,12 @@ def main():
train_dataloader = BertDataLoader( train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv", "./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"], tokenizer,
["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len, max_seq_length=config.max_seq_len,
batch_size=config.batch_size, batch_size=config.batch_size,
line_processor=mnli_line_processor, line_processor=mnli_line_processor,
mode="leveldb", mode="leveldb", )
phase="train")
test_dataloader = BertDataLoader( test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv", "./data/glue_data/MNLI/dev_matched.tsv",
...@@ -130,7 +130,7 @@ def main(): ...@@ -130,7 +130,7 @@ def main():
[None, None], 'int64', name='src_ids'), Input( [None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input( [None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input( [None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask') [None, None, 1], 'float32', name='input_mask')
] ]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
...@@ -141,13 +141,13 @@ def main(): ...@@ -141,13 +141,13 @@ def main():
len(["contradiction", "entailment", "neutral"]), len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True) return_pooled_out=True)
optimizer = Optimizer( optimizer = make_optimizer(
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
num_train_steps=max_train_steps, num_train_steps=max_train_steps,
learning_rate=config.learning_rate, learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay, weight_decay=config.weight_decay,
scheduler=config.lr_scheduler, scheduler=config.lr_scheduler,
model=cls_model,
loss_scaling=config.loss_scaling, loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters()) parameter_list=cls_model.parameters())
...@@ -159,8 +159,7 @@ def main(): ...@@ -159,8 +159,7 @@ def main():
labels, labels,
device=device) device=device)
cls_model.bert_layer.init_parameters( cls_model.bert_layer.load("./bert_small", reset_optimizer=True)
config.init_pretraining_params, verbose=config.verbose)
# do train # do train
cls_model.fit(train_data=train_dataloader.dataloader, cls_model.fit(train_data=train_dataloader.dataloader,
......
...@@ -5,7 +5,7 @@ DATA_PATH="./data/glue_data/MNLI/" ...@@ -5,7 +5,7 @@ DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning # start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\ python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=1,2,3 bert_classifier.py\
--use_cuda true \ --use_cuda true \
--do_train true \ --do_train true \
--do_test true \ --do_test true \
......
...@@ -4,7 +4,7 @@ TASK_NAME='MNLI' ...@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/" DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=1
# start fine-tuning # start fine-tuning
python3.7 bert_classifier.py\ python3.7 bert_classifier.py\
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
## 内容 ## 内容
- [模型简介](#模型简介) - [模型简介](#模型简介)
- [代码结构](#代码结构) - [代码获取](#代码获取)
- [数据准备](#数据准备) - [数据准备](#数据准备)
- [模型训练](#模型训练) - [模型训练](#模型训练)
- [模型评估](#模型评估) - [模型评估](#模型评估)
...@@ -22,7 +22,21 @@ BMN Overview ...@@ -22,7 +22,21 @@ BMN Overview
</p> </p>
## 代码结构 ## 代码获取
### 代码下载及环境变量设置
克隆代码库到本地,并设置`PYTHONPATH`环境变量
```bash
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=`pwd`:$PYTHONPATH
cd examples/bmn
```
### 代码结构
``` ```
├── bmn.yaml # 网络配置文件,快速配置参数 ├── bmn.yaml # 网络配置文件,快速配置参数
├── run.sh # 快速运行脚本,可直接开始多卡训练 ├── run.sh # 快速运行脚本,可直接开始多卡训练
...@@ -74,6 +88,8 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -74,6 +88,8 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/EVAL/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为evaluate\_results。 - 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/EVAL/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为evaluate\_results。
- 暂不支持多卡评估。
- 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。 - 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。
...@@ -108,6 +124,8 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -108,6 +124,8 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
- 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/INFER/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为predict\_results。 - 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/INFER/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为predict\_results。
- 暂不支持多卡预测。
## 参考论文 ## 参考论文
......
...@@ -15,7 +15,6 @@ TRAIN: ...@@ -15,7 +15,6 @@ TRAIN:
batch_size: 4 batch_size: 4
num_workers: 4 num_workers: 4
use_shuffle: True use_shuffle: True
device: "gpu"
learning_rate: 0.001 learning_rate: 0.001
learning_rate_decay: 0.1 learning_rate_decay: 0.1
lr_decay_iter: 4200 lr_decay_iter: 4200
...@@ -28,14 +27,14 @@ TEST: ...@@ -28,14 +27,14 @@ TEST:
subset: "validation" subset: "validation"
batch_size: 1 batch_size: 1
num_workers: 1 num_workers: 1
output_path: "output/EVAL/BMN_results" output_path: "./output/EVAL/BMN_results"
result_path: "evaluate_results" result_path: "./evaluate_results"
INFER: INFER:
subset: "test" subset: "test"
batch_size: 1 batch_size: 1
num_workers: 1 num_workers: 1
filelist: './infer.list' filelist: './infer.list'
output_path: "output/INFER/BMN_results" output_path: "./output/INFER/BMN_results"
result_path: "predict_results" result_path: "./predict_results"
...@@ -54,18 +54,18 @@ def parse_args(): ...@@ -54,18 +54,18 @@ def parse_args():
'--weights', '--weights',
type=str, type=str,
default=None, default=None,
help='weight path, None to automatically download weights provided by Paddle.' help='weight path. None to automatically download weights provided by Paddle.'
) )
parser.add_argument( parser.add_argument(
'--output_path', '--output_path',
type=str, type=str,
default="output/EVAL/BMN_results", default=None,
help='output dir path, default to use output/EVAL/BMN_results') help='output dir path. None to use config file setting.')
parser.add_argument( parser.add_argument(
'--result_path', '--result_path',
type=str, type=str,
default="evaluate_results/", default=None,
help='output dir path after post processing, default to use ./evaluate_results/' help='output dir path after post processing. None to use config file setting.'
) )
parser.add_argument( parser.add_argument(
'--log_interval', '--log_interval',
......
...@@ -46,7 +46,10 @@ def parse_args(): ...@@ -46,7 +46,10 @@ def parse_args():
default='bmn.yaml', default='bmn.yaml',
help='path to config file of model') help='path to config file of model')
parser.add_argument( parser.add_argument(
'--device', type=str, default='GPU', help='default use gpu.') '--device',
type=str,
default='gpu',
help='gpu or cpu, default use gpu.')
parser.add_argument( parser.add_argument(
'--weights', '--weights',
type=str, type=str,
...@@ -56,18 +59,18 @@ def parse_args(): ...@@ -56,18 +59,18 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--filelist', '--filelist',
type=str, type=str,
default="infer.list", default=None,
help='infer file list, default to use ./infer.list') help='infer file list, None to use config file setting.')
parser.add_argument( parser.add_argument(
'--output_path', '--output_path',
type=str, type=str,
default="output/INFER/BMN_results", default=None,
help='output dir path, default to use output/INFER/BMN_results') help='output dir path, None to use config file setting.')
parser.add_argument( parser.add_argument(
'--result_path', '--result_path',
type=str, type=str,
default="predict_results/", default=None,
help='output dir path after post processing, default to use ./predict_results/' help='output dir path after post processing, None to use config file setting.'
) )
parser.add_argument( parser.add_argument(
'--log_interval', '--log_interval',
......
...@@ -48,7 +48,7 @@ class BmnDataset(Dataset): ...@@ -48,7 +48,7 @@ class BmnDataset(Dataset):
def __getitem__(self, index): def __getitem__(self, index):
video_name = self.video_list[index] video_name = self.video_list[index]
video_idx = self.video_list.index(video_name) video_idx = np.array(self.video_list.index(video_name)).astype('int64')
video_feat = self.load_file(video_name) video_feat = self.load_file(video_name)
if self.mode == 'infer': if self.mode == 'infer':
return video_feat, video_idx return video_feat, video_idx
......
...@@ -49,7 +49,7 @@ def parse_args(): ...@@ -49,7 +49,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--learning_rate', '--learning_rate',
type=float, type=float,
default=0.001, default=None,
help='learning rate use for training. None to use config file setting.') help='learning rate use for training. None to use config file setting.')
parser.add_argument( parser.add_argument(
'--resume', '--resume',
......
## 简介
情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示:
| 模型 | dev | test |
| :------| :------ | :------ |
| CNN | 90.6% | 89.7% |
| BOW | 90.1% | 90.3% |
| GRU | 90.0% | 91.1% |
| BIGRU | 89.7% | 89.6% |
动态图文档请见[Dygraph](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/user_guides/howto/dygraph/DyGraph.html)
## 快速开始
本项目依赖于 Paddlepaddle 1.7.0 及以上版本,请参考 [安装指南](http://www.paddlepaddle.org/#quick-start) 进行安装。
python版本依赖python 2.7或python 3.5及以上版本。
#### 代码下载及环境变量设置
克隆代码库到本地,并设置`PYTHONPATH`环境变量
```shell
git clone https://github.com/PaddlePaddle/hapi
cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd`
cd examples/sentiment_classification
```
#### 数据准备
下载经过预处理的数据,文件解压之后,senta_data目录下会存在训练数据(train.tsv)、开发集数据(dev.tsv)、测试集数据(test.tsv)以及对应的词典(word_dict.txt)
```shell
wget https://baidu-nlp.bj.bcebos.com/sentiment_classification-dataset-1.0.0.tar.gz
tar -zxvf sentiment_classification-dataset-1.0.0.tar.gz
```
#### 模型训练
基于示例的数据集,可以运行下面的命令,在训练集(train.tsv)上进行模型训练,并在开发集(dev.tsv)验证。训练阶段需手动创建模型需要保存的文件夹,并且通过checkpoints设置保存文件路径。
model_type从bow_net,cnn_net,gru_net,bigru_net中选择。
模型相关参数均在`senta.yaml`中设置,模型训练需确保`senta.yaml``do_train`属性置为`True`
```shell
python sentiment_classifier.py
```
#### 模型预测
利用已有模型,可以运行下面命令,对未知label的数据(test.tsv)进行预测。
模型预测需确保`senta.yaml``do_infer`属性置为`True`
```shell
python sentiment_classifier.py
```
#### 模型参数
模型参数配置文件:`senta.yaml`
1. batch_size, 根据模型情况和GPU占用率选择batch_size, 建议cnn/bow选择较大batch_size, gru/bigru选择较小batch_size。
2. padding_size默认为150。
3. epoch, training时默认设置为5,infer默认为1。
4. learning_rate默认为0.002。
## 进阶使用
#### 任务定义
传统的情感分类主要基于词典或者特征工程的方式进行分类,这种方法需要繁琐的人工特征设计和先验知识,理解停留于浅层并且扩展泛化能力差。为了避免传统方法的局限,我们采用近年来飞速发展的深度学习技术。基于深度学习的情感分类不依赖于人工特征,它能够端到端的对输入文本进行语义理解,并基于语义表示进行情感倾向的判断。
#### 模型原理介绍
本项目针对情感倾向性分类问题,:
+ CNN(Convolutional Neural Networks),是一个基础的序列模型,能处理变长序列输入,提取局部区域之内的特征;
+ BOW(Bag Of Words)模型,是一个非序列模型,使用基本的全连接结构;
+ GRU(Gated Recurrent Unit),序列模型,能够较好地解决序列文本中长距离依赖的问题;
+ BI-GRU(Bidirectional Gated Recurrent Unit),序列模型,采用双向双层GRU结构,更好地捕获句子中的语义特征;
#### 数据格式说明
训练、预测、评估使用的数据可以由用户根据实际的应用场景,自己组织数据。数据由两列组成,以制表符分隔,第一列是以空格分词的中文文本(分词预处理方法将在下文具体说明),文件为utf8编码;第二列是情感倾向分类的类别(0表示消极;1表示积极),注意数据文件第一行固定表示为"text_a\tlabel"
```text
特 喜欢 这种 好看的 狗狗 1
这 真是 惊艳 世界 的 中国 黑科技 1
环境 特别 差 ,脏兮兮 的,再也 不去 了 0
```
#### 代码结构说明
```text
.
├── sentiment_classifier.py # 该项目的主函数,封装包括训练、预测、评估的部分
├── models.py # 网络结构
```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Linear, Embedding
from paddle.fluid.dygraph.base import to_variable
import numpy as np
from hapi.model import Model
from hapi.text.text import GRUEncoderLayer as BiGRUEncoder
from hapi.text.test import BOWEncoder, CNNEncoder, GRUEncoder
class CNN(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(CNN, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.channels = 1
self.win_size = [3, self.hid_dim]
self.batch_size = batch_size
self.seq_len = seq_len
self._encoder = CNNEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
seq_len=self.seq_len,
filter_size= self.win_size,
num_filters= self.hid_dim,
hidden_dim= self.hid_dim,
padding_idx=None,
act='tanh')
self._fc1 = Linear(input_dim = self.hid_dim*self.seq_len, output_dim=self.fc_hid_dim, act="softmax")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
act="softmax")
def forward(self, inputs):
conv_3 = self._encoder(inputs)
fc_1 = self._fc1(conv_3)
prediction = self._fc_prediction(fc_1)
return prediction
class BOW(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(BOW, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._encoder = BOWEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
padding_idx=None,
bow_dim=self.hid_dim,
seq_len=self.seq_len)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim, act="tanh")
self._fc2 = Linear(input_dim = self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim = self.fc_hid_dim,
output_dim = self.class_dim,
act="softmax")
def forward(self, inputs):
bow_1 = self._encoder(inputs)
bow_1 = fluid.layers.tanh(bow_1)
fc_1 = self._fc1(bow_1)
fc_2 = self._fc2(fc_1)
prediction = self._fc_prediction(fc_2)
return prediction
class GRU(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(GRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self._fc1 = Linear(input_dim=self.hid_dim, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = GRUEncoder(
dict_size=self.dict_dim + 1,
emb_dim=self.emb_dim,
gru_dim=self.hid_dim,
hidden_dim=self.hid_dim,
padding_idx=None,
seq_len=self.seq_len)
def forward(self, inputs):
emb = self._encoder(inputs)
fc_1 = self._fc1(emb)
prediction = self._fc_prediction(fc_1)
return prediction
class BiGRU(Model):
def __init__(self, dict_dim, batch_size, seq_len):
super(BiGRU, self).__init__()
self.dict_dim = dict_dim
self.emb_dim = 128
self.hid_dim = 128
self.fc_hid_dim = 96
self.class_dim = 2
self.batch_size = batch_size
self.seq_len = seq_len
self.embedding = Embedding(
size=[self.dict_dim + 1, self.emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(learning_rate=30),
is_sparse=False)
h_0 = np.zeros((self.batch_size, self.hid_dim), dtype="float32")
h_0 = to_variable(h_0)
self._fc1 = Linear(input_dim = self.hid_dim, output_dim=self.hid_dim*3)
self._fc2 = Linear(input_dim = self.hid_dim*2, output_dim=self.fc_hid_dim, act="tanh")
self._fc_prediction = Linear(input_dim=self.fc_hid_dim,
output_dim=self.class_dim,
act="softmax")
self._encoder = BiGRUEncoder(
grnn_hidden_dim=self.hid_dim,
input_dim=self.hid_dim * 3,
h_0=h_0,
init_bound=0.1,
is_bidirection=True)
def forward(self, inputs):
emb = self.embedding(inputs)
emb = fluid.layers.reshape(emb, shape=[self.batch_size, -1, self.hid_dim])
fc_1 = self._fc1(emb)
encoded_vector = self._encoder(fc_1)
encoded_vector = fluid.layers.tanh(encoded_vector)
encoded_vector = fluid.layers.reduce_max(encoded_vector, dim=1)
fc_2 = self._fc2(encoded_vector)
prediction = self._fc_prediction(fc_2)
return prediction
checkpoints: "./checkpoints"
epoch: 5
save_freq: 1
eval_freq: 1
lr: 0.002
padding_size: 150
skip_steps: 10
verbose: False
data_dir: "./senta_data/"
vocab_path: "./senta_data/word_dict.txt"
vocab_size: 33256
batch_size: 20
random_seed: 0
use_cuda: True
do_train: True
do_infer: False
model_type: "bow_net"
output_dir: "./output"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sentiment Classification in Paddle Dygraph Mode. """
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from hapi.model import set_device, Model, CrossEntropy, Input
from hapi.configure import Config
from hapi.text.senta import SentaProcessor
from hapi.metrics import Accuracy
from models import CNN, BOW, GRU, BiGRU
import json
import os
args = Config(yaml_file='./senta.yaml')
args.build()
args.Print()
device = set_device("gpu" if args.use_cuda else "cpu")
dev_count = fluid.core.get_cuda_device_count() if args.use_cuda else 1
def main():
if args.do_train:
train()
elif args.do_infer:
infer()
def train():
fluid.enable_dygraph(device)
processor = SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
num_labels = len(processor.get_labels())
num_train_examples = processor.get_num_examples(phase="train")
max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count
train_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='train',
epoch=args.epoch,
shuffle=False)
eval_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='dev',
epoch=args.epoch,
shuffle=False)
if args.model_type == 'cnn_net':
model = CNN( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model = BOW( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model = GRU( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model = BiGRU( args.vocab_size, args.batch_size,
args.padding_size)
optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr, parameter_list=model.parameters())
inputs = [Input([None, None], 'int64', name='doc')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optimizer,
CrossEntropy(),
Accuracy(topk=(1,)),
inputs,
labels,
device=device)
model.fit(train_data=train_data_generator,
eval_data=eval_data_generator,
batch_size=args.batch_size,
epochs=args.epoch,
save_dir=args.checkpoints,
eval_freq=args.eval_freq,
save_freq=args.save_freq)
def infer():
fluid.enable_dygraph(device)
processor = SentaProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
infer_data_generator = processor.data_generator(
batch_size=args.batch_size,
padding_size=args.padding_size,
places=device,
phase='infer',
epoch=1,
shuffle=False)
if args.model_type == 'cnn_net':
model_infer = CNN( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bow_net':
model_infer = BOW( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'gru_net':
model_infer = GRU( args.vocab_size, args.batch_size,
args.padding_size)
elif args.model_type == 'bigru_net':
model_infer = BiGRU( args.vocab_size, args.batch_size,
args.padding_size)
print('Do inferring ...... ')
inputs = [Input([None, None], 'int64', name='doc')]
model_infer.prepare(
None,
CrossEntropy(),
Accuracy(topk=(1,)),
inputs,
device=device)
model_infer.load(args.checkpoints, reset_optimizer=True)
preds = model_infer.predict(test_data=infer_data_generator)
preds = np.array(preds[0]).reshape((-1, 2))
if args.output_dir:
with open(os.path.join(args.output_dir, 'predictions.json'), 'w') as w:
for p in range(len(preds)):
label = np.argmax(preds[p])
result = json.dumps({'index': p, 'label': label, 'probs': preds[p].tolist()})
w.write(result+'\n')
print('Predictions saved at '+os.path.join(args.output_dir, 'predictions.json'))
if __name__ == '__main__':
main()
...@@ -14,7 +14,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词 ...@@ -14,7 +14,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
#### 1.PaddlePaddle 安装 #### 1.PaddlePaddle 安装
本项目依赖 PaddlePaddle 1.7 及以上版本和PaddleHub 1.0.0及以上版本 ,PaddlePaddle安装请参考官网 [快速安装](http://www.paddlepaddle.org/paddle#quick-start),PaddleHub安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) 本项目依赖 PaddlePaddle 1.8 及以上版本和PaddleHub 1.0.0及以上版本 ,PaddlePaddle安装请参考官网 [快速安装](http://www.paddlepaddle.org/paddle#quick-start),PaddleHub安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
> Warning: GPU 和 CPU 版本的 PaddlePaddle 分别是 paddlepaddle-gpu 和 paddlepaddle,请安装时注意区别。 > Warning: GPU 和 CPU 版本的 PaddlePaddle 分别是 paddlepaddle-gpu 和 paddlepaddle,请安装时注意区别。
......
...@@ -21,6 +21,7 @@ from __future__ import print_function ...@@ -21,6 +21,7 @@ from __future__ import print_function
import io import io
import os import os
import sys import sys
import six
import math import math
import argparse import argparse
import numpy as np import numpy as np
...@@ -71,6 +72,11 @@ def main(args): ...@@ -71,6 +72,11 @@ def main(args):
word_len = length[i] word_len = length[i]
word_ids = results[i][:word_len] word_ids = results[i][:word_len]
tags = [dataset.id2label_dict[str(id)] for id in word_ids] tags = [dataset.id2label_dict[str(id)] for id in word_ids]
if six.PY3:
tags = [bytes(tag, encoding="utf8") for tag in tags]
out = b"\002".join(tags) + b"\n"
f.write(out)
else:
f.write("\002".join(tags) + "\n") f.write("\002".join(tags) + "\n")
......
...@@ -20,7 +20,6 @@ from __future__ import print_function ...@@ -20,7 +20,6 @@ from __future__ import print_function
import io import io
import os import os
import leveldb
import numpy as np import numpy as np
import shutil import shutil
from functools import partial from functools import partial
......
...@@ -39,8 +39,8 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分 ...@@ -39,8 +39,8 @@ TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分
```bash ```bash
git clone https://github.com/PaddlePaddle/hapi git clone https://github.com/PaddlePaddle/hapi
cd hapi cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd` export PYTHONPATH=`pwd`:$PYTHONPATH
cd tsm cd examples/tsm
``` ```
### 数据准备 ### 数据准备
...@@ -141,6 +141,8 @@ python infer.py --data=<path/to/dataset> --label_list=<path/to/label_list> --inf ...@@ -141,6 +141,8 @@ python infer.py --data=<path/to/dataset> --label_list=<path/to/label_list> --inf
2020-04-03 07:37:16,321-INFO: Sample ./kineteics/val_10/data_batch_10-042_6 predict label: 6, ground truth label: 6 2020-04-03 07:37:16,321-INFO: Sample ./kineteics/val_10/data_batch_10-042_6 predict label: 6, ground truth label: 6
``` ```
**注意:** 推断时`--infer_file`需要指定到pickle文件路径。
## 参考论文 ## 参考论文
- [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han - [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han
......
...@@ -26,6 +26,7 @@ from check import check_gpu, check_version ...@@ -26,6 +26,7 @@ from check import check_gpu, check_version
from modeling import tsm_resnet50 from modeling import tsm_resnet50
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
from transforms import * from transforms import *
from utils import print_arguments
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -56,7 +57,7 @@ def main(): ...@@ -56,7 +57,7 @@ def main():
model.load(FLAGS.weights, reset_optimizer=True) model.load(FLAGS.weights, reset_optimizer=True)
imgs, label = dataset[0] imgs, label = dataset[0]
pred = model.test([imgs[np.newaxis, :]]) pred = model.test_batch([imgs[np.newaxis, :]])
pred = labels[np.argmax(pred)] pred = labels[np.argmax(pred)]
logger.info("Sample {} predict label: {}, ground truth label: {}" \ logger.info("Sample {} predict label: {}, ground truth label: {}" \
.format(FLAGS.infer_file, pred, labels[int(label)])) .format(FLAGS.infer_file, pred, labels[int(label)]))
...@@ -86,6 +87,7 @@ if __name__ == '__main__': ...@@ -86,6 +87,7 @@ if __name__ == '__main__':
type=str, type=str,
help="weights path for evaluation") help="weights path for evaluation")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
check_gpu(str.lower(FLAGS.device) == 'gpu') check_gpu(str.lower(FLAGS.device) == 'gpu')
check_version() check_version()
......
...@@ -113,7 +113,7 @@ class KineticsDataset(Dataset): ...@@ -113,7 +113,7 @@ class KineticsDataset(Dataset):
if self.transform: if self.transform:
imgs, label = self.transform(imgs, label) imgs, label = self.transform(imgs, label)
return imgs, np.array([label]) return imgs, np.array([label]).astype('int64')
@property @property
def num_classes(self): def num_classes(self):
......
...@@ -31,6 +31,7 @@ from modeling import tsm_resnet50 ...@@ -31,6 +31,7 @@ from modeling import tsm_resnet50
from check import check_gpu, check_version from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
from transforms import * from transforms import *
from utils import print_arguments
def make_optimizer(step_per_epoch, parameter_list=None): def make_optimizer(step_per_epoch, parameter_list=None):
...@@ -106,7 +107,7 @@ def main(): ...@@ -106,7 +107,7 @@ def main():
eval_data=val_dataset, eval_data=val_dataset,
epochs=FLAGS.epoch, epochs=FLAGS.epoch,
batch_size=FLAGS.batch_size, batch_size=FLAGS.batch_size,
save_dir='tsm_checkpoint', save_dir=FLAGS.save_dir or 'tsm_checkpoint',
num_workers=FLAGS.num_workers, num_workers=FLAGS.num_workers,
drop_last=True, drop_last=True,
shuffle=True) shuffle=True)
...@@ -150,7 +151,14 @@ if __name__ == '__main__': ...@@ -150,7 +151,14 @@ if __name__ == '__main__':
default=None, default=None,
type=str, type=str,
help="weights path for evaluation") help="weights path for evaluation")
parser.add_argument(
"-s",
"--save_dir",
default=None,
type=str,
help="directory path for checkpoint saving, default ./yolo_checkpoint")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
check_gpu(str.lower(FLAGS.device) == 'gpu') check_gpu(str.lower(FLAGS.device) == 'gpu')
check_version() check_version()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import logging
logger = logging.getLogger(__name__)
__all__ = ['print_ar']
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
logger.info("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.info("%s: %s" % (arg, value))
logger.info("------------------------------------------------")
...@@ -53,8 +53,8 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层 ...@@ -53,8 +53,8 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层
```bash ```bash
git clone https://github.com/PaddlePaddle/hapi git clone https://github.com/PaddlePaddle/hapi
cd hapi cd hapi
export PYTHONPATH=$PYTHONPATH:`pwd` export PYTHONPATH=`pwd`:$PYTHONPATH
cd tsm cd examples/yolov3
``` ```
#### 安装COCO-API #### 安装COCO-API
...@@ -126,13 +126,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data= ...@@ -126,13 +126,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=
使用如下方式进行多卡训练: 使用如下方式进行多卡训练:
```bash ```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16 -d
``` ```
### 模型评估 ### 模型评估
YOLOv3模型输出为LoDTensor,只支持使用batch_size为1进行评估,可通过如下两种方式进行模型评估。 YOLOv3模型输出为LoDTensor,只支持使用单卡且batch_size为1进行评估,可通过如下两种方式进行模型评估。
1. 自动下载Paddle发布的[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams)权重评估 1. 自动下载Paddle发布的[YOLOv3-DarkNet53](https://paddlemodels.bj.bcebos.com/hapi/yolov3_darknet53.pdparams)权重评估
...@@ -180,7 +180,7 @@ python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog. ...@@ -180,7 +180,7 @@ python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.
2. 加载checkpoint进行精度评估 2. 加载checkpoint进行精度评估
```bash ```bash
python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.jpg --weights=yolo_checkpoint/mo_mixup/final python infer.py --label_list=dataset/voc/label_list.txt --infer_image=image/dog.jpg --weights=yolo_checkpoint/no_mixup/final
``` ```
推断结果可视化图像会保存于`--output`指定的文件夹下,默认保存于`./output`目录。 推断结果可视化图像会保存于`--output`指定的文件夹下,默认保存于`./output`目录。
......
...@@ -28,7 +28,7 @@ from hapi.model import Model, Input, set_device ...@@ -28,7 +28,7 @@ from hapi.model import Model, Input, set_device
from modeling import yolov3_darknet53, YoloLoss from modeling import yolov3_darknet53, YoloLoss
from transforms import * from transforms import *
from utils import print_arguments
from visualizer import draw_bbox from visualizer import draw_bbox
import logging import logging
...@@ -91,7 +91,7 @@ def main(): ...@@ -91,7 +91,7 @@ def main():
img_id = np.array([0]).astype('int64')[np.newaxis, :] img_id = np.array([0]).astype('int64')[np.newaxis, :]
img_shape = np.array([h, w]).astype('int32')[np.newaxis, :] img_shape = np.array([h, w]).astype('int32')[np.newaxis, :]
_, bboxes = model.test([img_id, img_shape, img]) _, bboxes = model.test_batch([img_id, img_shape, img])
vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold) vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold)
save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image) save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image)
...@@ -121,6 +121,7 @@ if __name__ == '__main__': ...@@ -121,6 +121,7 @@ if __name__ == '__main__':
"-w", "--weights", default=None, type=str, "-w", "--weights", default=None, type=str,
help="path to weights for inference") help="path to weights for inference")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
assert os.path.isfile(FLAGS.infer_image), \ assert os.path.isfile(FLAGS.infer_image), \
"infer_image {} not a file".format(FLAGS.infer_image) "infer_image {} not a file".format(FLAGS.infer_image)
assert os.path.isfile(FLAGS.label_list), \ assert os.path.isfile(FLAGS.label_list), \
......
...@@ -33,6 +33,7 @@ from modeling import yolov3_darknet53, YoloLoss ...@@ -33,6 +33,7 @@ from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset from coco import COCODataset
from coco_metric import COCOMetric from coco_metric import COCOMetric
from transforms import * from transforms import *
from utils import print_arguments
NUM_MAX_BOXES = 50 NUM_MAX_BOXES = 50
...@@ -171,16 +172,18 @@ def main(): ...@@ -171,16 +172,18 @@ def main():
if FLAGS.resume is not None: if FLAGS.resume is not None:
model.load(FLAGS.resume) model.load(FLAGS.resume)
save_dir = FLAGS.save_dir or 'yolo_checkpoint'
model.fit(train_data=loader, model.fit(train_data=loader,
epochs=FLAGS.epoch - FLAGS.no_mixup_epoch, epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
save_dir="yolo_checkpoint/mixup", save_dir=os.path.join(save_dir, "mixup"),
save_freq=10) save_freq=10)
# do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
dataset.mixup = False dataset.mixup = False
model.fit(train_data=loader, model.fit(train_data=loader,
epochs=FLAGS.no_mixup_epoch, epochs=FLAGS.no_mixup_epoch,
save_dir="yolo_checkpoint/no_mixup", save_dir=os.path.join(save_dir, "no_mixup"),
save_freq=5) save_freq=5)
...@@ -233,6 +236,13 @@ if __name__ == '__main__': ...@@ -233,6 +236,13 @@ if __name__ == '__main__':
default=None, default=None,
type=str, type=str,
help="path to weights for evaluation") help="path to weights for evaluation")
parser.add_argument(
"-s",
"--save_dir",
default=None,
type=str,
help="directory path for checkpoint saving, default ./yolo_checkpoint")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
print_arguments(FLAGS)
assert FLAGS.data, "error: must provide data path" assert FLAGS.data, "error: must provide data path"
main() main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import logging
logger = logging.getLogger(__name__)
__all__ = ['print_ar']
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
logger.info("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
logger.info("%s: %s" % (arg, value))
logger.info("------------------------------------------------")
...@@ -201,24 +201,10 @@ class ProgBarLogger(Callback): ...@@ -201,24 +201,10 @@ class ProgBarLogger(Callback):
from hapi.callbacks import ProgBarLogger from hapi.callbacks import ProgBarLogger
from hapi.model import Input, set_device from hapi.model import Input, set_device
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MnistDataset(mode='train') train_dataset = MNIST(mode='train')
model = LeNet() model = LeNet()
...@@ -240,6 +226,9 @@ class ProgBarLogger(Callback): ...@@ -240,6 +226,9 @@ class ProgBarLogger(Callback):
self.verbose = verbose self.verbose = verbose
self.log_freq = log_freq self.log_freq = log_freq
def _is_print(self):
return self.verbose and ParallelEnv().local_rank == 0
def on_train_begin(self, logs=None): def on_train_begin(self, logs=None):
self.epochs = self.params['epochs'] self.epochs = self.params['epochs']
assert self.epochs assert self.epochs
...@@ -250,7 +239,7 @@ class ProgBarLogger(Callback): ...@@ -250,7 +239,7 @@ class ProgBarLogger(Callback):
self.steps = self.params['steps'] self.steps = self.params['steps']
self.epoch = epoch self.epoch = epoch
self.train_step = 0 self.train_step = 0
if self.verbose and self.epochs and ParallelEnv().local_rank == 0: if self.epochs and self._is_print():
print('Epoch %d/%d' % (epoch + 1, self.epochs)) print('Epoch %d/%d' % (epoch + 1, self.epochs))
self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose) self.train_progbar = ProgressBar(num=self.steps, verbose=self.verbose)
...@@ -270,17 +259,13 @@ class ProgBarLogger(Callback): ...@@ -270,17 +259,13 @@ class ProgBarLogger(Callback):
logs = logs or {} logs = logs or {}
self.train_step += 1 self.train_step += 1
if self.train_step % self.log_freq == 0 and self.verbose and ParallelEnv( if self._is_print() and self.train_step % self.log_freq == 0:
).local_rank == 0:
if self.steps is None or self.train_step < self.steps: if self.steps is None or self.train_step < self.steps:
self._updates(logs, 'train') self._updates(logs, 'train')
def on_epoch_end(self, epoch, logs=None): def on_epoch_end(self, epoch, logs=None):
logs = logs or {} logs = logs or {}
if self.verbose == 1 and ParallelEnv().local_rank == 0: if self._is_print() and (self.steps is not None):
self._updates(logs, 'train')
elif self.train_step % self.log_freq != 0 and self.verbose and ParallelEnv(
).local_rank == 0:
self._updates(logs, 'train') self._updates(logs, 'train')
def on_eval_begin(self, logs=None): def on_eval_begin(self, logs=None):
...@@ -291,7 +276,7 @@ class ProgBarLogger(Callback): ...@@ -291,7 +276,7 @@ class ProgBarLogger(Callback):
self.eval_progbar = ProgressBar( self.eval_progbar = ProgressBar(
num=self.eval_steps, verbose=self.verbose) num=self.eval_steps, verbose=self.verbose)
if ParallelEnv().local_rank == 0: if self._is_print():
print('Eval begin...') print('Eval begin...')
def on_eval_batch_end(self, step, logs=None): def on_eval_batch_end(self, step, logs=None):
...@@ -300,8 +285,7 @@ class ProgBarLogger(Callback): ...@@ -300,8 +285,7 @@ class ProgBarLogger(Callback):
samples = logs.get('batch_size', 1) samples = logs.get('batch_size', 1)
self.evaled_samples += samples self.evaled_samples += samples
if self.eval_step % self.log_freq == 0 and self.verbose and ParallelEnv( if self._is_print() and self.eval_step % self.log_freq == 0:
).local_rank == 0:
if self.eval_steps is None or self.eval_step < self.eval_steps: if self.eval_steps is None or self.eval_step < self.eval_steps:
self._updates(logs, 'eval') self._updates(logs, 'eval')
...@@ -321,21 +305,19 @@ class ProgBarLogger(Callback): ...@@ -321,21 +305,19 @@ class ProgBarLogger(Callback):
samples = logs.get('batch_size', 1) samples = logs.get('batch_size', 1)
self.tested_samples += samples self.tested_samples += samples
if self.test_step % self.log_freq == 0 and self.verbose and ParallelEnv( if self.test_step % self.log_freq == 0 and self._is_print():
).local_rank == 0:
if self.test_steps is None or self.test_step < self.test_steps: if self.test_steps is None or self.test_step < self.test_steps:
self._updates(logs, 'test') self._updates(logs, 'test')
def on_eval_end(self, logs=None): def on_eval_end(self, logs=None):
logs = logs or {} logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0: if self._is_print() and (self.steps is not None):
if self.eval_step % self.log_freq != 0 or self.verbose == 1:
self._updates(logs, 'eval') self._updates(logs, 'eval')
print('Eval samples: %d' % (self.evaled_samples)) print('Eval samples: %d' % (self.evaled_samples))
def on_test_end(self, logs=None): def on_test_end(self, logs=None):
logs = logs or {} logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0: if self._is_print():
if self.test_step % self.log_freq != 0 or self.verbose == 1: if self.test_step % self.log_freq != 0 or self.verbose == 1:
self._updates(logs, 'test') self._updates(logs, 'test')
print('Predict samples: %d' % (self.tested_samples)) print('Predict samples: %d' % (self.tested_samples))
...@@ -362,24 +344,10 @@ class ModelCheckpoint(Callback): ...@@ -362,24 +344,10 @@ class ModelCheckpoint(Callback):
from hapi.callbacks import ModelCheckpoint from hapi.callbacks import ModelCheckpoint
from hapi.model import Input, set_device from hapi.model import Input, set_device
class MnistDataset(MNIST):
def __init__(self, mode, return_label=True):
super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label
def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28])
if self.return_label:
return img, np.array(self.labels[idx]).astype('int64')
return img,
def __len__(self):
return len(self.images)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
train_dataset = MnistDataset(mode='train') train_dataset = MNIST(mode='train')
model = LeNet() model = LeNet()
......
...@@ -123,7 +123,7 @@ class Flowers(Dataset): ...@@ -123,7 +123,7 @@ class Flowers(Dataset):
if self.transform is not None: if self.transform is not None:
image = self.transform(image) image = self.transform(image)
return image, label return image, label.astype('int64')
def __len__(self): def __len__(self):
return len(self.indexes) return len(self.indexes)
...@@ -45,6 +45,8 @@ class MNIST(Dataset): ...@@ -45,6 +45,8 @@ class MNIST(Dataset):
:attr:`download` is True. Default None :attr:`download` is True. Default None
label_path(str): path to label file, can be set None if label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None :attr:`download` is True. Default None
chw_format(bool): If set True, the output shape is [1, 28, 28],
otherwise, output shape is [1, 784]. Default True.
mode(str): 'train' or 'test' mode. Default 'train'. mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default :attr:`image_path`/:attr:`label_path` unset. Default
...@@ -70,13 +72,14 @@ class MNIST(Dataset): ...@@ -70,13 +72,14 @@ class MNIST(Dataset):
def __init__(self, def __init__(self,
image_path=None, image_path=None,
label_path=None, label_path=None,
chw_format=True,
mode='train', mode='train',
transform=None, transform=None,
download=True): download=True):
assert mode.lower() in ['train', 'test'], \ assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode) "mode should be 'train' or 'test', but got {}".format(mode)
self.mode = mode.lower() self.mode = mode.lower()
self.chw_format = chw_format
self.image_path = image_path self.image_path = image_path
if self.image_path is None: if self.image_path is None:
assert download, "image_path not set and auto download disabled" assert download, "image_path not set and auto download disabled"
...@@ -144,10 +147,13 @@ class MNIST(Dataset): ...@@ -144,10 +147,13 @@ class MNIST(Dataset):
for i in range(buffer_size): for i in range(buffer_size):
self.images.append(images[i, :]) self.images.append(images[i, :])
self.labels.append(np.array([labels[i]])) self.labels.append(
np.array([labels[i]]).astype('int64'))
def __getitem__(self, idx): def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx] image, label = self.images[idx], self.labels[idx]
if self.chw_format:
image = np.reshape(image, [1, 28, 28])
if self.transform is not None: if self.transform is not None:
image = self.transform(image) image = self.transform(image)
return image, label return image, label
......
...@@ -23,6 +23,7 @@ import requests ...@@ -23,6 +23,7 @@ import requests
import tqdm import tqdm
import hashlib import hashlib
import time import time
from collections import OrderedDict
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
...@@ -35,6 +36,44 @@ WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") ...@@ -35,6 +36,44 @@ WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3 DOWNLOAD_RETRY_LIMIT = 3
nlp_models = OrderedDict((
('RoBERTa-zh-base',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz'
),
('RoBERTa-zh-large',
'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz'
),
('ERNIE-v2-en-base',
'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz'),
('ERNIE-v2-en-large',
'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz'),
('XLNet-cased-base',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-12_H-768_A-12.tgz'),
('XLNet-cased-large',
'https://xlnet.bj.bcebos.com/xlnet_cased_L-24_H-1024_A-16.tgz'),
('ERNIE-v1-zh-base',
'https://baidu-nlp.bj.bcebos.com/ERNIE_stable-1.0.1.tar.gz'),
('ERNIE-v1-zh-base-max-len-512',
'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz'),
('BERT-en-uncased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-large-whole-word-masking',
'https://bert-models.bj.bcebos.com/wwm_cased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-uncased-base',
'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz'),
('BERT-en-uncased-large',
'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz'),
('BERT-en-cased-base',
'https://bert-models.bj.bcebos.com/cased_L-12_H-768_A-12.tar.gz'),
('BERT-en-cased-large',
'https://bert-models.bj.bcebos.com/cased_L-24_H-1024_A-16.tar.gz'),
('BERT-multilingual-uncased-base',
'https://bert-models.bj.bcebos.com/multilingual_L-12_H-768_A-12.tar.gz'),
('BERT-multilingual-cased-base',
'https://bert-models.bj.bcebos.com/multi_cased_L-12_H-768_A-12.tar.gz'),
('BERT-zh-base',
'https://bert-models.bj.bcebos.com/chinese_L-12_H-768_A-12.tar.gz'), ))
def is_url(path): def is_url(path):
""" """
......
...@@ -116,7 +116,7 @@ class Accuracy(Metric): ...@@ -116,7 +116,7 @@ class Accuracy(Metric):
def add_metric_op(self, pred, label, *args): def add_metric_op(self, pred, label, *args):
pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk] pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk]
correct = pred == label correct = pred == label
return correct return fluid.layers.cast(correct, dtype='float32')
def update(self, correct, *args): def update(self, correct, *args):
accs = [] accs = []
...@@ -143,7 +143,7 @@ class Accuracy(Metric): ...@@ -143,7 +143,7 @@ class Accuracy(Metric):
if self.maxk != 1: if self.maxk != 1:
self._name = ['{}_top{}'.format(name, k) for k in self.topk] self._name = ['{}_top{}'.format(name, k) for k in self.topk]
else: else:
self._name = ['acc'] self._name = [name]
def name(self): def name(self):
return self._name return self._name
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import os
import unittest
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from hapi.metrics import *
from hapi.utils import to_list
def accuracy(pred, label, topk=(1, )):
maxk = max(topk)
pred = np.argsort(pred)[:, ::-1][:, :maxk]
correct = (pred == np.repeat(label, maxk, 1))
batch_size = label.shape[0]
res = []
for k in topk:
correct_k = correct[:, :k].sum()
res.append(correct_k / batch_size)
return res
def convert_to_one_hot(y, C):
oh = np.random.random((y.shape[0], C)).astype('float32') * .5
for i in range(y.shape[0]):
oh[i, int(y[i])] = 1.
return oh
class TestAccuracyDynamic(unittest.TestCase):
def setUp(self):
self.topk = (1, )
self.class_num = 5
self.sample_num = 1000
self.name = None
def random_pred_label(self):
label = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int64')
pred = np.random.randint(0, self.class_num, (self.sample_num, 1)).astype('int32')
pred_one_hot = convert_to_one_hot(pred, self.class_num)
pred_one_hot = pred_one_hot.astype('float32')
return label, pred_one_hot
def test_main(self):
with fluid.dygraph.guard(fluid.CPUPlace()):
acc = Accuracy(topk=self.topk, name=self.name)
for i in range(10):
label, pred = self.random_pred_label()
label_var = to_variable(label)
pred_var = to_variable(pred)
state = to_list(acc.add_metric_op(pred_var, label_var))
acc.update(*[s.numpy() for s in state])
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyDynamicMultiTopk(TestAccuracyDynamic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
class TestAccuracyStatic(TestAccuracyDynamic):
def test_main(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
with fluid.program_guard(main_prog, startup_prog):
pred = fluid.data(name='pred', shape=[None, self.class_num], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
acc = Accuracy(topk=self.topk, name=self.name)
state = acc.add_metric_op(pred, label)
exe = fluid.Executor(fluid.CPUPlace())
compiled_main_prog = fluid.CompiledProgram(main_prog)
for i in range(10):
label, pred = self.random_pred_label()
state_ret = exe.run(compiled_main_prog,
feed={'pred': pred, 'label': label},
fetch_list=[s.name for s in to_list(state)],
return_numpy=True)
acc.update(*state_ret)
res_m = acc.accumulate()
res_f = accuracy(pred, label, self.topk)
assert np.all(np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)), \
"Accuracy precision error: {} != {}".format(res_m, res_f)
acc.reset()
assert np.sum(acc.total) == 0
assert np.sum(acc.count) == 0
class TestAccuracyStaticMultiTopk(TestAccuracyStatic):
def setUp(self):
self.topk = (1, 5)
self.class_num = 10
self.sample_num = 1000
self.name = "accuracy"
if __name__ == '__main__':
unittest.main()
...@@ -18,33 +18,25 @@ from __future__ import print_function ...@@ -18,33 +18,25 @@ from __future__ import print_function
import unittest import unittest
import os import os
import cv2
import numpy as np import numpy as np
import shutil
import tempfile
import paddle import paddle
from paddle import fluid from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.container import Sequential
from paddle.io import BatchSampler, DataLoader from paddle.io import DataLoader
from paddle.fluid.dygraph.base import to_variable
from hapi.model import Model, Input, set_device from hapi.model import Model, Input, set_device
from hapi.loss import Loss from hapi.loss import CrossEntropy
from hapi.metrics import Accuracy from hapi.metrics import Accuracy
from hapi.datasets import MNIST from hapi.datasets import MNIST
from hapi.vision.models import LeNet from hapi.vision.models import LeNet
from hapi.download import get_weights_path_from_url
class LeNetDygraph(fluid.dygraph.Layer): class LeNetDygraph(fluid.dygraph.Layer):
"""LeNet model from
`"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_
Args:
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 10.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self, num_classes=10, classifier_activation='softmax'): def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNetDygraph, self).__init__() super(LeNetDygraph, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
...@@ -73,12 +65,16 @@ class LeNetDygraph(fluid.dygraph.Layer): ...@@ -73,12 +65,16 @@ class LeNetDygraph(fluid.dygraph.Layer):
class MnistDataset(MNIST): class MnistDataset(MNIST):
def __init__(self, mode, return_label=True): def __init__(self, mode, return_label=True, sample_num=None):
super(MnistDataset, self).__init__(mode=mode) super(MnistDataset, self).__init__(mode=mode)
self.return_label = return_label self.return_label = return_label
if sample_num:
self.images = self.images[:sample_num]
self.labels = self.labels[:sample_num]
def __getitem__(self, idx): def __getitem__(self, idx):
img = np.reshape(self.images[idx], [1, 28, 28]) img, label = self.images[idx], self.labels[idx]
img = np.reshape(img, [1, 28, 28])
if self.return_label: if self.return_label:
return img, np.array(self.labels[idx]).astype('int64') return img, np.array(self.labels[idx]).astype('int64')
return img, return img,
...@@ -87,15 +83,14 @@ class MnistDataset(MNIST): ...@@ -87,15 +83,14 @@ class MnistDataset(MNIST):
return len(self.images) return len(self.images)
def get_predict_accuracy(pred, gt): def compute_acc(pred, label):
pred = np.argmax(pred, -1) pred = np.argmax(pred, -1)
gt = np.array(gt) label = np.array(label)
correct = pred[:, np.newaxis] == label
correct = pred[:, np.newaxis] == gt
return np.sum(correct) / correct.shape[0] return np.sum(correct) / correct.shape[0]
def low_level_lenet_dygraph_train(model, dataloader): def dynamic_train(model, dataloader):
optim = fluid.optimizer.Adam( optim = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters()) learning_rate=0.001, parameter_list=model.parameters())
model.train() model.train()
...@@ -108,7 +103,7 @@ def low_level_lenet_dygraph_train(model, dataloader): ...@@ -108,7 +103,7 @@ def low_level_lenet_dygraph_train(model, dataloader):
model.clear_gradients() model.clear_gradients()
def low_level_dynamic_evaluate(model, dataloader): def dynamic_evaluate(model, dataloader):
with fluid.dygraph.no_grad(): with fluid.dygraph.no_grad():
model.eval() model.eval()
cnt = 0 cnt = 0
...@@ -121,57 +116,65 @@ def low_level_dynamic_evaluate(model, dataloader): ...@@ -121,57 +116,65 @@ def low_level_dynamic_evaluate(model, dataloader):
return cnt / len(dataloader.dataset) return cnt / len(dataloader.dataset)
class TestEvaluatePredict(unittest.TestCase): class TestModel(unittest.TestCase):
def setUp(self): @classmethod
self.device = set_device('gpu') def setUpClass(cls):
self.train_dataset = MnistDataset(mode='train') cls.device = set_device('gpu')
self.val_dataset = MnistDataset(mode='test') fluid.enable_dygraph(cls.device)
self.test_dataset = MnistDataset(mode='test', return_label=False)
fluid.enable_dygraph(self.device) sp_num = 1280
train_dataloader = fluid.io.DataLoader( cls.train_dataset = MnistDataset(mode='train', sample_num=sp_num)
self.train_dataset, places=self.device, batch_size=64) cls.val_dataset = MnistDataset(mode='test', sample_num=sp_num)
val_dataloader = fluid.io.DataLoader( cls.test_dataset = MnistDataset(
self.val_dataset, places=self.device, batch_size=64) mode='test', return_label=False, sample_num=sp_num)
self.lenet_dygraph = LeNetDygraph()
low_level_lenet_dygraph_train(self.lenet_dygraph, train_dataloader)
self.acc1 = low_level_dynamic_evaluate(self.lenet_dygraph,
val_dataloader)
def evaluate(self, dynamic): cls.train_loader = fluid.io.DataLoader(
fluid.enable_dygraph(self.device) if dynamic else None cls.train_dataset, places=cls.device, batch_size=64)
cls.val_loader = fluid.io.DataLoader(
cls.val_dataset, places=cls.device, batch_size=64)
cls.test_loader = fluid.io.DataLoader(
cls.test_dataset, places=cls.device, batch_size=64)
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] seed = 333
labels = [Input([None, 1], 'int64', name='label')] fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
if fluid.in_dygraph_mode(): dy_lenet = LeNetDygraph()
feed_list = None cls.init_param = dy_lenet.state_dict()
else: dynamic_train(dy_lenet, cls.train_loader)
feed_list = [x.forward() for x in inputs + labels]
self.train_dataloader = fluid.io.DataLoader( cls.acc1 = dynamic_evaluate(dy_lenet, cls.val_loader)
self.train_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.val_dataloader = fluid.io.DataLoader(
self.val_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.test_dataloader = fluid.io.DataLoader(
self.test_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
model = LeNet() cls.inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
model.load_dict(self.lenet_dygraph.state_dict()) cls.labels = [Input([None, 1], 'int64', name='label')]
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
result = model.evaluate(self.val_dataloader) cls.save_dir = tempfile.mkdtemp()
cls.weight_path = os.path.join(cls.save_dir, 'lenet')
fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path)
np.testing.assert_allclose(result['acc'], self.acc1) fluid.disable_dygraph()
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.save_dir)
def test_fit_dygraph(self):
self.fit(True)
def test_fit_static(self):
self.fit(False)
def test_evaluate_dygraph(self):
self.evaluate(True)
def test_evaluate_static(self):
self.evaluate(False)
def test_predict_dygraph(self):
self.predict(True)
def test_predict_static(self):
self.predict(False)
def predict(self, dynamic): def predict(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None fluid.enable_dygraph(self.device) if dynamic else None
...@@ -179,50 +182,175 @@ class TestEvaluatePredict(unittest.TestCase): ...@@ -179,50 +182,175 @@ class TestEvaluatePredict(unittest.TestCase):
inputs = [Input([-1, 1, 28, 28], 'float32', name='image')] inputs = [Input([-1, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
if fluid.in_dygraph_mode(): test_dataloader = fluid.io.DataLoader(
feed_list = None
else:
feed_list = [x.forward() for x in inputs + labels]
self.train_dataloader = fluid.io.DataLoader(
self.train_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.val_dataloader = fluid.io.DataLoader(
self.val_dataset,
places=self.device,
batch_size=64,
feed_list=feed_list)
self.test_dataloader = fluid.io.DataLoader(
self.test_dataset, self.test_dataset,
places=self.device, places=self.device,
batch_size=64, batch_size=64,
feed_list=feed_list) return_list=True)
model = LeNet() model = LeNet()
model.load_dict(self.lenet_dygraph.state_dict())
model.load(self.weight_path)
model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels) model.prepare(metrics=Accuracy(), inputs=inputs, labels=labels)
output = model.predict(self.test_dataloader, stack_outputs=True) output = model.predict(test_dataloader, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(self.test_dataset)) def fit(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
seed = 333
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
acc = get_predict_accuracy(output[0], self.val_dataset.labels) model = LeNet()
optim_new = fluid.optimizer.Adam(
learning_rate=0.001, parameter_list=model.parameters())
model.prepare(
optim_new,
loss_function=CrossEntropy(average=False),
metrics=Accuracy(),
inputs=self.inputs,
labels=self.labels)
model.fit(self.train_dataset, batch_size=64, shuffle=False)
result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1)
fluid.disable_dygraph() if dynamic else None
np.testing.assert_allclose(acc, self.acc1) def evaluate(self, dynamic):
fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet()
model.prepare(
metrics=Accuracy(), inputs=self.inputs, labels=self.labels)
model.load(self.weight_path)
result = model.evaluate(self.val_dataset, batch_size=64)
np.testing.assert_allclose(result['acc'], self.acc1)
fluid.disable_dygraph() if dynamic else None
def test_evaluate_dygraph(self): def predict(self, dynamic):
self.evaluate(True) fluid.enable_dygraph(self.device) if dynamic else None
model = LeNet()
model.prepare(inputs=self.inputs)
model.load(self.weight_path)
output = model.predict(
self.test_dataset, batch_size=64, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(self.test_dataset))
def test_evaluate_static(self): acc = compute_acc(output[0], self.val_dataset.labels)
self.evaluate(False) np.testing.assert_allclose(acc, self.acc1)
fluid.disable_dygraph() if dynamic else None
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self._fc = Linear(20, 10, act='softmax')
def forward(self, x):
y = self._fc(x)
return y
class TestModelFunction(unittest.TestCase):
def set_seed(self, seed=1024):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
def test_train_batch(self, dynamic=True):
dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32)
label = np.random.randint(0, 10, size=(4, 1)).astype(np.int64)
def get_expect():
fluid.enable_dygraph(fluid.CPUPlace())
self.set_seed()
m = MyModel()
optim = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=m.parameters())
m.train()
output = m(to_variable(data))
l = to_variable(label)
loss = fluid.layers.cross_entropy(output, l)
avg_loss = fluid.layers.reduce_sum(loss)
avg_loss.backward()
optim.minimize(avg_loss)
m.clear_gradients()
fluid.disable_dygraph()
return avg_loss.numpy()
def test_predict_dygraph(self): ref = get_expect()
self.predict(True) for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel()
def test_predict_static(self): optim2 = fluid.optimizer.SGD(learning_rate=0.001,
self.predict(False) parameter_list=model.parameters())
inputs = [Input([None, dim], 'float32', name='x')]
labels = [Input([None, 1], 'int64', name='label')]
model.prepare(
optim2,
loss_function=CrossEntropy(average=False),
inputs=inputs,
labels=labels,
device=device)
loss, = model.train_batch([data], [label])
np.testing.assert_allclose(loss.flatten(), ref.flatten())
fluid.disable_dygraph() if dynamic else None
def test_test_batch(self, dynamic=True):
dim = 20
data = np.random.random(size=(4, dim)).astype(np.float32)
def get_expect():
fluid.enable_dygraph(fluid.CPUPlace())
self.set_seed()
m = MyModel()
m.eval()
output = m(to_variable(data))
fluid.disable_dygraph()
return output.numpy()
ref = get_expect()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
self.set_seed()
model = MyModel()
inputs = [Input([None, dim], 'float32', name='x')]
model.prepare(inputs=inputs, device=device)
out, = model.test_batch([data])
np.testing.assert_allclose(out, ref)
fluid.disable_dygraph() if dynamic else None
def test_save_load(self):
path = tempfile.mkdtemp()
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
model.save(path + '/test')
model.load(path + '/test')
shutil.rmtree(path)
fluid.disable_dygraph() if dynamic else None
def test_parameters(self):
for dynamic in [True, False]:
device = set_device('cpu')
fluid.enable_dygraph(device) if dynamic else None
model = MyModel()
inputs = [Input([None, 20], 'float32', name='x')]
model.prepare(inputs=inputs)
params = model.parameters()
self.assertTrue(params[0].shape[0] == 20)
self.assertTrue(params[0].shape[1] == 10)
fluid.disable_dygraph() if dynamic else None
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
# limitations under the License. # limitations under the License.
from hapi.text.bert.bert import BertConfig as BertConfig from hapi.text.bert.bert import BertConfig as BertConfig
from hapi.text.bert.optimization import Optimizer as Optimizer from hapi.text.bert.dygraph_optimization import DyOptimizer as DyOptimizer
from hapi.text.bert.static_optimization import StOptimizer as StOptimizer
from hapi.text.bert.optimization import make_optimizer as make_optimizer
from hapi.text.bert.dataloader import BertDataLoader as BertDataLoader from hapi.text.bert.dataloader import BertDataLoader as BertDataLoader
from hapi.text.bert.dataloader import BertInputExample as BertInputExample from hapi.text.bert.dataloader import BertInputExample as BertInputExample
from hapi.text.tokenizer import tokenization as tokenization from hapi.text.tokenizer import tokenization as tokenization
......
...@@ -23,8 +23,8 @@ import numpy as np ...@@ -23,8 +23,8 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from hapi.model import Model
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
from hapi.text.text import PrePostProcessLayer, TransformerEncoder from hapi.text.text import PrePostProcessLayer, TransformerEncoder
from hapi.text.bert.utils.init import init_from_static_model from hapi.text.bert.utils.init import init_from_static_model
...@@ -52,7 +52,7 @@ class BertConfig(object): ...@@ -52,7 +52,7 @@ class BertConfig(object):
print('------------------------------------------------') print('------------------------------------------------')
class BertEncoder(Layer): class BertEncoder(Model):
""" """
bert bert
""" """
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
class ConstantLR(LearningRateDecay):
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'):
super(ConstantLR, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
def step(self):
return self.learning_rate
class LinearDecay(LearningRateDecay):
def __init__(self,
learning_rate,
warmup_steps,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(LinearDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class DyOptimizer(object):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
model_cls,
weight_decay,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0:
for param, grad in param_grads:
if self.exclude_from_weight_decay(param.name):
continue
if isinstance(self.scheduled_lr.step(), float):
updated_param = param.numpy() - param_list[
param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step()
else:
updated_param = param.numpy(
) - param_list[param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step().numpy()
updated_param_var = fluid.dygraph.to_variable(updated_param)
param = updated_param_var
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape))
...@@ -11,172 +11,35 @@ ...@@ -11,172 +11,35 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import from paddle.fluid.framework import in_dygraph_mode
from __future__ import division from hapi.text.bert.dygraph_optimization import DyOptimizer as DyOptimizer
from __future__ import print_function from hapi.text.bert.static_optimization import StOptimizer as StOptimizer
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay def make_optimizer(warmup_steps,
class ConstantLR(LearningRateDecay):
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'):
super(ConstantLR, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
def step(self):
return self.learning_rate
class LinearDecay(LearningRateDecay):
def __init__(self,
learning_rate,
warmup_steps,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(LinearDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class Optimizer(object):
def __init__(self,
warmup_steps,
num_train_steps, num_train_steps,
learning_rate, learning_rate,
model_cls,
weight_decay, weight_decay,
model,
scheduler='linear_warmup_decay', scheduler='linear_warmup_decay',
loss_scaling=1.0, loss_scaling=1.0,
parameter_list=None): parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0: if in_dygraph_mode():
for param, grad in param_grads: return DyOptimizer(
if self.exclude_from_weight_decay(param.name): warmup_steps=warmup_steps,
continue num_train_steps=num_train_steps,
if isinstance(self.scheduled_lr.step(), float): learning_rate=learning_rate,
updated_param = param.numpy() - param_list[ model_cls=model,
param.name].numpy( weight_decay=weight_decay,
) * self.weight_decay * self.scheduled_lr.step() scheduler=scheduler,
loss_scaling=loss_scaling,
parameter_list=parameter_list)
else: else:
updated_param = param.numpy( return StOptimizer(
) - param_list[param.name].numpy( warmup_steps=warmup_steps,
) * self.weight_decay * self.scheduled_lr.step().numpy() num_train_steps=num_train_steps,
updated_param_var = fluid.dygraph.to_variable(updated_param) learning_rate=learning_rate,
param = updated_param_var weight_decay=weight_decay,
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape)) scheduler=scheduler)
...@@ -19,7 +19,6 @@ from __future__ import print_function ...@@ -19,7 +19,6 @@ from __future__ import print_function
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from utils.fp16 import create_master_params_grads, master_param_to_train_param, apply_dynamic_loss_scaling
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps): def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
...@@ -51,52 +50,60 @@ def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps): ...@@ -51,52 +50,60 @@ def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
return lr return lr
def optimization(loss, class StOptimizer(fluid.optimizer.Optimizer):
def __init__(self,
warmup_steps, warmup_steps,
num_train_steps, num_train_steps,
learning_rate, learning_rate,
train_program,
startup_prog,
weight_decay, weight_decay,
scheduler='linear_warmup_decay', scheduler='linear_warmup_decay'):
use_fp16=False, super(StOptimizer, self).__init__(
use_dynamic_loss_scaling=False, learning_rate=learning_rate,
init_loss_scaling=1.0, parameter_list=None,
incr_every_n_steps=1000, regularization=None,
decr_every_n_nan_or_inf=2, grad_clip=None,
incr_ratio=2.0, name=None)
decr_ratio=0.8): self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
scheduled_lr, loss_scaling = None, None self.learning_rate = learning_rate
if scheduler == 'noam_decay': self.weight_decay = weight_decay
if warmup_steps > 0: self.scheduler = scheduler
def minimize(self, loss):
train_program = fluid.default_main_program()
startup_program = fluid.default_startup_program()
if self.scheduler == 'noam_decay':
if self.warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler\ scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)), .noam_decay(1/(self.warmup_steps *(self.learning_rate ** 2)),
warmup_steps) self.warmup_steps)
else: else:
print( print(
"WARNING: noam decay of learning rate should have postive warmup " "WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!" "steps but given {}, using constant learning rate instead!"
.format(warmup_steps)) .format(self.warmup_steps))
scheduled_lr = fluid.layers.create_global_var( scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"), name=fluid.unique_name.generate("learning_rate"),
shape=[1], shape=[1],
value=learning_rate, value=self.learning_rate,
dtype='float32', dtype='float32',
persistable=True) persistable=True)
elif scheduler == 'linear_warmup_decay': elif self.scheduler == 'linear_warmup_decay':
if warmup_steps > 0: if self.warmup_steps > 0:
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, scheduled_lr = linear_warmup_decay(self.learning_rate,
num_train_steps) self.warmup_steps,
self.num_train_steps)
else: else:
print( print(
"WARNING: linear warmup decay of learning rate should have " "WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate " "postive warmup steps but given {}, use constant learning rate "
"instead!".format(warmup_steps)) "instead!".format(self.warmup_steps))
scheduled_lr = fluid.layers.create_global_var( scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"), name=fluid.unique_name.generate("learning_rate"),
shape=[1], shape=[1],
value=learning_rate, value=self.learning_rate,
dtype='float32', dtype='float32',
persistable=True) persistable=True)
else: else:
...@@ -119,60 +126,19 @@ def optimization(loss, ...@@ -119,60 +126,19 @@ def optimization(loss,
param_list = dict() param_list = dict()
if use_fp16: if self.weight_decay > 0:
loss_scaling = fluid.layers.create_global_var(
name=fluid.unique_name.generate("loss_scaling"),
shape=[1],
value=init_loss_scaling,
dtype='float32',
persistable=True)
loss *= loss_scaling
param_grads = optimizer.backward(loss)
master_param_grads = create_master_params_grads(
param_grads, train_program, startup_prog, loss_scaling)
if weight_decay > 0:
for param, _ in master_param_grads:
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_dynamic_loss_scaling:
apply_dynamic_loss_scaling(
loss_scaling, master_param_grads, incr_every_n_steps,
decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
optimizer.apply_gradients(master_param_grads)
if weight_decay > 0:
for param, grad in master_param_grads:
if exclude_from_weight_decay(param):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
master_param_to_train_param(master_param_grads, param_grads,
train_program)
else:
if weight_decay > 0:
for param in train_program.all_parameters(): for param in train_program.all_parameters():
param_list[param.name] = param * 1.0 param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss) _, param_grads = optimizer.minimize(loss)
if weight_decay > 0: if self.weight_decay > 0:
for param, grad in param_grads: for param, grad in param_grads:
if exclude_from_weight_decay(param): if exclude_from_weight_decay(param):
continue continue
with param.block.program._optimized_guard( with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"): [param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[ updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr param.name] * self.weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param) fluid.layers.assign(output=param, input=updated_param)
return scheduled_lr, loss_scaling
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.text.senta.data_processer import SentaProcessor
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from hapi.text.senta.data_reader import load_vocab
from hapi.text.senta.data_reader import data_reader
from paddle.io import DataLoader
class SentaProcessor(object):
def __init__(self, data_dir, vocab_path, random_seed=None):
self.data_dir = data_dir
self.vocab = load_vocab(vocab_path)
self.num_examples = {"train": -1, "dev": -1, "infer": -1}
np.random.seed(random_seed)
def get_train_examples(self, data_dir, epoch, shuffle, batch_size, places, padding_size):
train_reader = data_reader((self.data_dir + "/train.tsv"), self.vocab,
self.num_examples, "train", epoch, padding_size, shuffle)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(train_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_dev_examples(self, data_dir, epoch, shuffle, batch_size, places, padding_size):
dev_reader = data_reader((self.data_dir + "/dev.tsv"), self.vocab,
self.num_examples, "dev", epoch, padding_size, shuffle)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(dev_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_test_examples(self, data_dir, epoch, batch_size, places, padding_size):
test_reader = data_reader((self.data_dir + "/test.tsv"), self.vocab,
self.num_examples, "infer", epoch, padding_size)
loader = DataLoader.from_generator(capacity=50, return_list=True)
loader.set_sample_generator(test_reader, batch_size=batch_size, drop_last=False, places=places)
return loader
def get_labels(self):
return ["0", "1"]
def get_num_examples(self, phase):
if phase not in ['train', 'dev', 'infer']:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
return self.num_examples[phase]
def get_train_progress(self):
return self.current_train_example, self.current_train_epoch
def data_generator(self, padding_size, batch_size, places, phase='train', epoch=1, shuffle=True):
if phase == "train":
return self.get_train_examples(self.data_dir, epoch, shuffle, batch_size, places, padding_size)
elif phase == "dev":
return self.get_dev_examples(self.data_dir, epoch, shuffle, batch_size, places, padding_size)
elif phase == "infer":
return self.get_test_examples(self.data_dir, epoch, batch_size, places, padding_size)
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import io
import sys
import random
def str2bool(v):
return v.lower() in ("true", "t", "1")
def data_reader(file_path, word_dict, num_examples, phrase, epoch, padding_size, shuffle=False):
unk_id = len(word_dict)
all_data = []
with io.open(file_path, "r", encoding='utf8') as fin:
for line in fin:
if line.startswith('text_a'):
continue
cols = line.strip().split("\t")
if len(cols) != 2:
sys.stderr.write("[NOTICE] Error Format Line!")
continue
label = [int(cols[1])]
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[0].split(" ")
]
wids = wids[:padding_size]
while len(wids) < padding_size:
wids.append(unk_id)
all_data.append((wids, label))
if shuffle:
if phrase == "train":
random.shuffle(all_data)
num_examples[phrase] = len(all_data)
def reader():
for epoch_index in range(epoch):
for doc, label in all_data:
yield doc, label
return reader
def load_vocab(file_path):
vocab = {}
with io.open(file_path, 'r', encoding='utf8') as f:
wid = 0
for line in f:
if line.strip() not in vocab:
vocab[line.strip()] = wid
wid += 1
vocab["<unk>"] = len(vocab)
return vocab
...@@ -1096,7 +1096,8 @@ class PrePostProcessLayer(Layer): ...@@ -1096,7 +1096,8 @@ class PrePostProcessLayer(Layer):
self.functors = [] self.functors = []
for cmd in self.process_cmd: for cmd in self.process_cmd:
if cmd == "a": # add residual connection if cmd == "a": # add residual connection
self.functors.append(lambda x, y: x + y if y else x) self.functors.append(
lambda x, y: x + y if y is not None else x)
elif cmd == "n": # add layer normalization elif cmd == "n": # add layer normalization
if reused_layer_norm is not None: if reused_layer_norm is not None:
layer_norm = reused_layer_norm layer_norm = reused_layer_norm
...@@ -1218,7 +1219,7 @@ class MultiHeadAttention(Layer): ...@@ -1218,7 +1219,7 @@ class MultiHeadAttention(Layer):
# scale dot product attention # scale dot product attention
product = layers.matmul( product = layers.matmul(
x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5) x=q, y=k, transpose_y=True, alpha=self.d_model**-0.5)
if attn_bias: if attn_bias is not None:
product += attn_bias product += attn_bias
weights = layers.softmax(product) weights = layers.softmax(product)
if self.dropout_rate: if self.dropout_rate:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册