提交 38fd12ef 编写于 作者: G guosheng

Merge branch 'master' of https://github.com/PaddlePaddle/hapi into add-hapi-seq2seq

...@@ -2,3 +2,6 @@ ...@@ -2,3 +2,6 @@
*.json *.json
output* output*
*checkpoint* *checkpoint*
build
dist
hapi.egg-info
bert_config_path: "./config/bert_config.json"
init_checkpoint: None
init_pretraining_params: None
checkpoints: "./saved_model"
epoch: 3
learning_rate: 0.0001
lr_scheduler: "linear_warmup_decay"
weight_decay: 0.01
warmup_proportion: 0.1
save_steps: 100000
validation_steps: 100000
loss_scaling: 1.0
skip_steps: 100
data_dir: None
vocab_path: None
max_seq_len: 512
batch_size: 32
in_tokens: False
do_lower_case: True
random_seed: 5512
use_cuda: True
shuffle: True
do_train: True
do_test: True
use_data_parallel: False
verbose: False
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""BERT fine-tuning in Paddle Dygraph Mode."""
import paddle.fluid as fluid
from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml")
config.build()
config.Print()
device = set_device("gpu" if config.use_cuda else "cpu")
fluid.enable_dygraph(device)
bert_config = BertConfig(config.bert_config_path)
bert_config.print_config()
tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
def mnli_line_processor(line_id, line):
if line_id == "0":
return None
uid = tokenization.convert_to_unicode(line[0])
text_a = tokenization.convert_to_unicode(line[8])
text_b = tokenization.convert_to_unicode(line[9])
label = tokenization.convert_to_unicode(line[-1])
if label not in ["contradiction", "entailment", "neutral"]:
label = "contradiction"
return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label)
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor)
test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
print("Trainer count: %d" % trainer_count)
print("Num train examples: %d" % num_train_examples)
print("Max train steps: %d" % max_train_steps)
print("Num warmup steps: %d" % warmup_steps)
inputs = [
Input(
[None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask')
]
labels = [Input([None, 1], 'int64', name='label')]
cls_model = ClsModelLayer(
config,
bert_config,
len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True)
optimizer = Optimizer(
warmup_steps=warmup_steps,
num_train_steps=max_train_steps,
learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay,
scheduler=config.lr_scheduler,
loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters())
cls_model.prepare(
optimizer,
SoftmaxWithCrossEntropy(),
Accuracy(topk=(1, 2)),
inputs,
labels,
device=device)
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
# do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__':
main()
#!/bin/bash
BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0
# start fine-tuning
python3.7 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
--batch_size 64 \
--init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
--data_dir ${DATA_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--checkpoints ${CKPT_PATH} \
--save_steps 1000 \
--weight_decay 0.01 \
--warmup_proportion 0.1 \
--validation_steps 100 \
--epoch 3 \
--max_seq_len 128 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--learning_rate 5e-5 \
--skip_steps 10 \
--shuffle true
bert_config_path: "./config/bert_config.json"
init_checkpoint: None
init_pretraining_params: None
checkpoints: "./saved_model"
epoch: 3
learning_rate: 0.0001
lr_scheduler: "linear_warmup_decay"
weight_decay: 0.01
warmup_proportion: 0.1
save_steps: 100000
validation_steps: 100000
loss_scaling: 1.0
skip_steps: 100
data_dir: None
vocab_path: None
max_seq_len: 512
batch_size: 32
in_tokens: False
do_lower_case: True
random_seed: 5512
use_cuda: True
shuffle: True
do_train: True
do_test: True
use_data_parallel: False
verbose: False
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""BERT fine-tuning in Paddle Dygraph Mode."""
import paddle.fluid as fluid
from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml")
config.build()
config.Print()
device = set_device("gpu" if config.use_cuda else "cpu")
fluid.enable_dygraph(device)
bert_config = BertConfig(config.bert_config_path)
bert_config.print_config()
tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
def mnli_line_processor(line_id, line):
if line_id == "0":
return None
uid = tokenization.convert_to_unicode(line[0])
text_a = tokenization.convert_to_unicode(line[8])
text_b = tokenization.convert_to_unicode(line[9])
label = tokenization.convert_to_unicode(line[-1])
if label not in ["contradiction", "entailment", "neutral"]:
label = "contradiction"
return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label)
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
mode="leveldb",
phase="train")
test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
print("Trainer count: %d" % trainer_count)
print("Num train examples: %d" % num_train_examples)
print("Max train steps: %d" % max_train_steps)
print("Num warmup steps: %d" % warmup_steps)
inputs = [
Input(
[None, None], 'int64', name='src_ids'), Input(
[None, None], 'int64', name='pos_ids'), Input(
[None, None], 'int64', name='sent_ids'), Input(
[None, None], 'float32', name='input_mask')
]
labels = [Input([None, 1], 'int64', name='label')]
cls_model = ClsModelLayer(
config,
bert_config,
len(["contradiction", "entailment", "neutral"]),
return_pooled_out=True)
optimizer = Optimizer(
warmup_steps=warmup_steps,
num_train_steps=max_train_steps,
learning_rate=config.learning_rate,
model_cls=cls_model,
weight_decay=config.weight_decay,
scheduler=config.lr_scheduler,
loss_scaling=config.loss_scaling,
parameter_list=cls_model.parameters())
cls_model.prepare(
optimizer,
SoftmaxWithCrossEntropy(),
Accuracy(topk=(1, 2)),
inputs,
labels,
device=device)
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
# do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__':
main()
#!/bin/bash
BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
--batch_size 64 \
--init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
--data_dir ${DATA_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--checkpoints ${CKPT_PATH} \
--save_steps 1000 \
--weight_decay 0.01 \
--warmup_proportion 0.1 \
--validation_steps 100 \
--epoch 3 \
--max_seq_len 128 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--learning_rate 5e-5 \
--skip_steps 10 \
--shuffle true
#!/bin/bash
BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=0
# start fine-tuning
python3.7 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
--batch_size 64 \
--init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
--data_dir ${DATA_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--checkpoints ${CKPT_PATH} \
--save_steps 1000 \
--weight_decay 0.01 \
--warmup_proportion 0.1 \
--validation_steps 100 \
--epoch 3 \
--max_seq_len 128 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--learning_rate 5e-5 \
--skip_steps 10 \
--shuffle true
...@@ -29,7 +29,6 @@ BMN Overview ...@@ -29,7 +29,6 @@ BMN Overview
├── train.py # 训练代码,训练网络 ├── train.py # 训练代码,训练网络
├── eval.py # 评估代码,评估网络性能 ├── eval.py # 评估代码,评估网络性能
├── predict.py # 预测代码,针对任意输入预测结果 ├── predict.py # 预测代码,针对任意输入预测结果
├── bmn_model.py # 网络结构与损失函数定义
├── bmn_metric.py # 精度评估方法定义 ├── bmn_metric.py # 精度评估方法定义
├── reader.py # 数据reader,构造Dataset和Dataloader ├── reader.py # 数据reader,构造Dataset和Dataloader
├── bmn_utils.py # 模型细节相关代码 ├── bmn_utils.py # 模型细节相关代码
...@@ -41,7 +40,7 @@ BMN Overview ...@@ -41,7 +40,7 @@ BMN Overview
## 数据准备 ## 数据准备
BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征,请下载[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)数据后解压,同时相应的修改bmn.yaml中的特征路径feat\_path。对应的标签文件请下载[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json)并修改bmn.yaml中的标签文件路径anno\_file。 BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理好的视频特征和对应的标签文件,请下载特征数据[bmn\_feat](https://paddlemodels.bj.bcebos.com/video_detection/bmn_feat.tar.gz)和标签数据[label](https://paddlemodels.bj.bcebos.com/video_detection/activitynet_1.3_annotations.json),并相应地修改配置文件bmn.yaml中的特征文件路径feat\_path和标签文件路径anno\_file。
## 模型训练 ## 模型训练
...@@ -52,22 +51,17 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -52,22 +51,17 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
bash run.sh bash run.sh
若使用单卡训练,启动方式如下: 若使用单卡训练,请将配置文件bmn.yaml中的batch\_size调整为16,启动方式如下:
export CUDA_VISIBLE_DEVICES=0
python train.py python train.py
- 代码运行需要先安装pandas 默认使用静态图训练,若使用动态图训练只需要在运行脚本添加`-d`参数即可,如:
- 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型
- 单卡训练时,请将配置文件中的batch_size调整为16 python train.py -d
**训练策略:** - 代码运行需要先安装pandas
* 采用Adam优化器,初始learning\_rate=0.001 - 从头开始训练,使用上述启动命令行或者脚本程序即可启动训练,不需要用到预训练模型
* 权重衰减系数为1e-4
* 学习率在迭代次数达到4200的时候做一次衰减,衰减系数为0.1
## 模型评估 ## 模型评估
...@@ -76,9 +70,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -76,9 +70,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
python eval.py --weights=$PATH_TO_WEIGHTS python eval.py --weights=$PATH_TO_WEIGHTS
- 进行评估时,可修改命令行中的`weights`参数指定需要评估的权重,如果不设置,将使用默认参数文件checkpoint/final.pdparams - 进行评估时,可修改命令行中的`weights`参数指定需要评估的权重,若未指定,脚本会下载已发布的模型[model](https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams)进行评估
- 上述程序会将运行结果保存在output/EVAL/BMN\_results文件夹下,测试结果保存在evaluate\_results/bmn\_results\_validation.json文件中 - 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/EVAL/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为evaluate\_results
- 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。 - 注:评估时可能会出现loss为nan的情况。这是由于评估时用的是单个样本,可能存在没有iou>0.6的样本,所以为nan,对最终的评估结果没有影响。
...@@ -87,9 +81,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -87,9 +81,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
- ActivityNet数据集的具体使用说明可以参考其[官方网站](http://activity-net.org) - ActivityNet数据集的具体使用说明可以参考其[官方网站](http://activity-net.org)
- 下载指标评估代码,请从[ActivityNet Gitub repository](https://github.com/activitynet/ActivityNet.git)下载,将Evaluation文件夹拷贝至models/dygraph/bmn目录下。(注:由于第三方评估代码不支持python3,此处建议使用python2进行评估;若使用python3,print函数需要添加括号,请对Evaluation目录下的.py文件做相应修改。) - 下载指标评估代码,请从[ActivityNet Gitub repository](https://github.com/activitynet/ActivityNet.git)下载,将Evaluation文件夹拷贝至hapi/examples/bmn目录下。(注:由于第三方评估代码不支持python3,此处建议使用python2进行评估;若使用python3,print函数需要添加括号,请对Evaluation目录下的.py文件做相应修改。)
- 请下载[activity\_net\_1\_3\_new.json](https://paddlemodels.bj.bcebos.com/video_detection/activity_net_1_3_new.json)文件,并将其放置在models/dygraph/bmn/Evaluation/data目录下,相较于原始的activity\_net.v1-3.min.json文件,我们过滤了其中一些失效的视频条目。 - 请下载[activity\_net\_1\_3\_new.json](https://paddlemodels.bj.bcebos.com/video_detection/activity_net_1_3_new.json)文件,并将其放置在hapi/examples/bmn/Evaluation/data目录下,相较于原始的activity\_net.v1-3.min.json文件,我们过滤了其中一些失效的视频条目。
- 计算精度指标 - 计算精度指标
...@@ -100,7 +94,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -100,7 +94,7 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
| AR@1 | AR@5 | AR@10 | AR@100 | AUC | | AR@1 | AR@5 | AR@10 | AR@100 | AUC |
| :---: | :---: | :---: | :---: | :---: | | :---: | :---: | :---: | :---: | :---: |
| 33.46 | 49.25 | 56.25 | 75.40 | 67.16% | | 33.10 | 49.18 | 56.54 | 75.12 | 67.16% |
## 模型推断 ## 模型推断
...@@ -110,9 +104,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理 ...@@ -110,9 +104,9 @@ BMN的训练数据采用ActivityNet1.3提供的数据集,我们提供了处理
python predict.py --weights=$PATH_TO_WEIGHTS \ python predict.py --weights=$PATH_TO_WEIGHTS \
--filelist=$FILELIST --filelist=$FILELIST
- 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为./infer.list。`--weights`参数为训练好的权重参数,如果不设置,将使用默认参数文件checkpoint/final.pdparams - 使用python命令行启动程序时,`--filelist`参数指定待推断的文件列表,如果不设置,默认为./infer.list。`--weights`参数为训练好的权重参数,若未指定,脚本会下载已发布的模型[model](https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams)进行预测
- 上述程序会将运行结果保存在output/INFER/BMN\_results文件夹下,测试结果保存在predict\_results/bmn\_results\_test.json文件中 - 上述程序会将运行结果保存在`--output_path`参数指定的文件夹下,默认为output/INFER/BMN\_results;测试结果保存在`--result_path`参数指定的文件夹下,默认为predict\_results
## 参考论文 ## 参考论文
......
...@@ -12,11 +12,10 @@ MODEL: ...@@ -12,11 +12,10 @@ MODEL:
TRAIN: TRAIN:
subset: "train" subset: "train"
epoch: 9 epoch: 9
batch_size: 4 batch_size: 4
num_workers: 4 num_workers: 4
use_shuffle: True use_shuffle: True
device: "gpu" device: "gpu"
num_gpus: 4
learning_rate: 0.001 learning_rate: 0.001
learning_rate_decay: 0.1 learning_rate_decay: 0.1
lr_decay_iter: 4200 lr_decay_iter: 4200
...@@ -29,10 +28,6 @@ TEST: ...@@ -29,10 +28,6 @@ TEST:
subset: "validation" subset: "validation"
batch_size: 1 batch_size: 1
num_workers: 1 num_workers: 1
use_buffer: False
snms_alpha: 0.001
snms_t1: 0.5
snms_t2: 0.9
output_path: "output/EVAL/BMN_results" output_path: "output/EVAL/BMN_results"
result_path: "evaluate_results" result_path: "evaluate_results"
...@@ -40,10 +35,6 @@ INFER: ...@@ -40,10 +35,6 @@ INFER:
subset: "test" subset: "test"
batch_size: 1 batch_size: 1
num_workers: 1 num_workers: 1
use_buffer: False
snms_alpha: 0.4
snms_t1: 0.5
snms_t2: 0.9
filelist: './infer.list' filelist: './infer.list'
output_path: "output/INFER/BMN_results" output_path: "output/INFER/BMN_results"
result_path: "predict_results" result_path: "predict_results"
......
...@@ -20,7 +20,7 @@ import json ...@@ -20,7 +20,7 @@ import json
sys.path.append('../') sys.path.append('../')
from metrics import Metric from hapi.metrics import Metric
from bmn_utils import boundary_choose, bmn_post_processing from bmn_utils import boundary_choose, bmn_post_processing
...@@ -36,13 +36,26 @@ class BmnMetric(Metric): ...@@ -36,13 +36,26 @@ class BmnMetric(Metric):
#get video_dict and video_list #get video_dict and video_list
if self.mode == 'test': if self.mode == 'test':
self.get_test_dataset_dict() self.get_test_dataset_dict()
if not os.path.isdir(self.cfg.TEST.output_path):
os.makedirs(self.cfg.TEST.output_path)
if not os.path.isdir(self.cfg.TEST.result_path):
os.makedirs(self.cfg.TEST.result_path)
elif self.mode == 'infer': elif self.mode == 'infer':
self.get_infer_dataset_dict() self.get_infer_dataset_dict()
if not os.path.isdir(self.cfg.INFER.output_path):
os.makedirs(self.cfg.INFER.output_path)
if not os.path.isdir(self.cfg.INFER.result_path):
os.makedirs(self.cfg.INFER.result_path)
def add_metric_op(self, preds, label): def add_metric_op(self, *args):
pred_bm, pred_start, pred_en = preds if self.mode == 'test':
video_index = label[-1] # only extract pred_bm, pred_start, pred_en from outputs
return [pred_bm, pred_start, pred_en, video_index] #return list # and video_index from label here
pred_bm, pred_start, pred_en, _, _, _, video_index = args
else:
# in infer mode, labels only contains video_index
pred_bm, pred_start, pred_en, video_index = args
return pred_bm, pred_start, pred_en, video_index
def update(self, pred_bm, pred_start, pred_end, fid): def update(self, pred_bm, pred_start, pred_end, fid):
# generate proposals # generate proposals
......
...@@ -162,56 +162,3 @@ def bmn_post_processing(video_dict, subset, output_path, result_path): ...@@ -162,56 +162,3 @@ def bmn_post_processing(video_dict, subset, output_path, result_path):
outfile.close() outfile.close()
def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample,
num_sample_perbin):
""" generate sample mask for a boundary-matching pair """
plen = float(seg_xmax - seg_xmin)
plen_sample = plen / (num_sample * num_sample_perbin - 1.0)
total_samples = [
seg_xmin + plen_sample * ii
for ii in range(num_sample * num_sample_perbin)
]
p_mask = []
for idx in range(num_sample):
bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) *
num_sample_perbin]
bin_vector = np.zeros([tscale])
for sample in bin_samples:
sample_upper = math.ceil(sample)
sample_decimal, sample_down = math.modf(sample)
if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0:
bin_vector[int(sample_down)] += 1 - sample_decimal
if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0:
bin_vector[int(sample_upper)] += sample_decimal
bin_vector = 1.0 / num_sample_perbin * bin_vector
p_mask.append(bin_vector)
p_mask = np.stack(p_mask, axis=1)
return p_mask
def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat = []
for start_index in range(tscale):
mask_mat_vector = []
for duration_index in range(dscale):
if start_index + duration_index < tscale:
p_xmin = start_index
p_xmax = start_index + duration_index
center_len = float(p_xmax - p_xmin) + 1
sample_xmin = p_xmin - center_len * prop_boundary_ratio
sample_xmax = p_xmax + center_len * prop_boundary_ratio
p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax,
tscale, num_sample,
num_sample_perbin)
else:
p_mask = np.zeros([tscale, num_sample])
mask_mat_vector.append(p_mask)
mask_mat_vector = np.stack(mask_mat_vector, axis=2)
mask_mat.append(mask_mat_vector)
mask_mat = np.stack(mask_mat, axis=3)
mask_mat = mask_mat.astype(np.float32)
sample_mask = np.reshape(mask_mat, [tscale, -1])
return sample_mask
...@@ -18,11 +18,10 @@ import sys ...@@ -18,11 +18,10 @@ import sys
import logging import logging
import paddle.fluid as fluid import paddle.fluid as fluid
sys.path.append('../') from hapi.model import set_device, Input
from model import set_device, Input from modeling import bmn, BmnLoss
from bmn_metric import BmnMetric from bmn_metric import BmnMetric
from bmn_model import BMN, BmnLoss
from reader import BmnDataset from reader import BmnDataset
from config_utils import * from config_utils import *
...@@ -39,7 +38,6 @@ def parse_args(): ...@@ -39,7 +38,6 @@ def parse_args():
parser.add_argument( parser.add_argument(
"-d", "-d",
"--dynamic", "--dynamic",
default=True,
action='store_true', action='store_true',
help="enable dygraph mode, only support dynamic mode at present time") help="enable dygraph mode, only support dynamic mode at present time")
parser.add_argument( parser.add_argument(
...@@ -55,9 +53,20 @@ def parse_args(): ...@@ -55,9 +53,20 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--weights', '--weights',
type=str, type=str,
default="checkpoint/final", default=None,
help='weight path, None to automatically download weights provided by Paddle.' help='weight path, None to automatically download weights provided by Paddle.'
) )
parser.add_argument(
'--output_path',
type=str,
default="output/EVAL/BMN_results",
help='output dir path, default to use output/EVAL/BMN_results')
parser.add_argument(
'--result_path',
type=str,
default="evaluate_results/",
help='output dir path after post processing, default to use ./evaluate_results/'
)
parser.add_argument( parser.add_argument(
'--log_interval', '--log_interval',
type=int, type=int,
...@@ -69,17 +78,21 @@ def parse_args(): ...@@ -69,17 +78,21 @@ def parse_args():
# Performance Evaluation # Performance Evaluation
def test_bmn(args): def test_bmn(args):
# only support dynamic mode at present time
device = set_device(args.device) device = set_device(args.device)
fluid.enable_dygraph(device) if args.dynamic else None fluid.enable_dygraph(device) if args.dynamic else None
#config setting
config = parse_config(args.config_file) config = parse_config(args.config_file)
eval_cfg = merge_configs(config, 'test', vars(args)) eval_cfg = merge_configs(config, 'test', vars(args))
if not os.path.isdir(config.TEST.output_path):
os.makedirs(config.TEST.output_path)
if not os.path.isdir(config.TEST.result_path):
os.makedirs(config.TEST.result_path)
feat_dim = config.MODEL.feat_dim
tscale = config.MODEL.tscale
dscale = config.MODEL.dscale
prop_boundary_ratio = config.MODEL.prop_boundary_ratio
num_sample = config.MODEL.num_sample
num_sample_perbin = config.MODEL.num_sample_perbin
#input and video index
inputs = [ inputs = [
Input( Input(
[None, config.MODEL.feat_dim, config.MODEL.tscale], [None, config.MODEL.feat_dim, config.MODEL.tscale],
...@@ -99,9 +112,14 @@ def test_bmn(args): ...@@ -99,9 +112,14 @@ def test_bmn(args):
eval_dataset = BmnDataset(eval_cfg, 'test') eval_dataset = BmnDataset(eval_cfg, 'test')
#model #model
model = BMN(config, args.dynamic) model = bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=args.weights is None)
model.prepare( model.prepare(
loss_function=BmnLoss(config), loss_function=BmnLoss(tscale, dscale),
metrics=BmnMetric( metrics=BmnMetric(
config, mode='test'), config, mode='test'),
inputs=inputs, inputs=inputs,
...@@ -109,11 +127,11 @@ def test_bmn(args): ...@@ -109,11 +127,11 @@ def test_bmn(args):
device=device) device=device)
#load checkpoint #load checkpoint
if args.weights: if args.weights is not None:
assert os.path.exists(args.weights + '.pdparams'), \ assert os.path.exists(args.weights + '.pdparams'), \
"Given weight dir {} not exist.".format(args.weights) "Given weight dir {} not exist.".format(args.weights)
logger.info('load test weights from {}'.format(args.weights)) logger.info('load test weights from {}'.format(args.weights))
model.load(args.weights) model.load(args.weights)
model.evaluate( model.evaluate(
eval_data=eval_dataset, eval_data=eval_dataset,
......
...@@ -17,11 +17,73 @@ from paddle.fluid import ParamAttr ...@@ -17,11 +17,73 @@ from paddle.fluid import ParamAttr
import numpy as np import numpy as np
import math import math
from bmn_utils import get_interp1d_mask from hapi.model import Model, Loss
from model import Model, Loss from hapi.download import get_weights_path
__all__ = ["BMN", "BmnLoss", "bmn"]
DATATYPE = 'float32' DATATYPE = 'float32'
pretrain_infos = {
'bmn': ('https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams',
'aa84e3386e1fbd117fb96fa572feeb94')
}
def _get_interp1d_bin_mask(seg_xmin, seg_xmax, tscale, num_sample,
num_sample_perbin):
""" generate sample mask for a boundary-matching pair """
plen = float(seg_xmax - seg_xmin)
plen_sample = plen / (num_sample * num_sample_perbin - 1.0)
total_samples = [
seg_xmin + plen_sample * ii
for ii in range(num_sample * num_sample_perbin)
]
p_mask = []
for idx in range(num_sample):
bin_samples = total_samples[idx * num_sample_perbin:(idx + 1) *
num_sample_perbin]
bin_vector = np.zeros([tscale])
for sample in bin_samples:
sample_upper = math.ceil(sample)
sample_decimal, sample_down = math.modf(sample)
if int(sample_down) <= (tscale - 1) and int(sample_down) >= 0:
bin_vector[int(sample_down)] += 1 - sample_decimal
if int(sample_upper) <= (tscale - 1) and int(sample_upper) >= 0:
bin_vector[int(sample_upper)] += sample_decimal
bin_vector = 1.0 / num_sample_perbin * bin_vector
p_mask.append(bin_vector)
p_mask = np.stack(p_mask, axis=1)
return p_mask
def get_interp1d_mask(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
""" generate sample mask for each point in Boundary-Matching Map """
mask_mat = []
for start_index in range(tscale):
mask_mat_vector = []
for duration_index in range(dscale):
if start_index + duration_index < tscale:
p_xmin = start_index
p_xmax = start_index + duration_index
center_len = float(p_xmax - p_xmin) + 1
sample_xmin = p_xmin - center_len * prop_boundary_ratio
sample_xmax = p_xmax + center_len * prop_boundary_ratio
p_mask = _get_interp1d_bin_mask(sample_xmin, sample_xmax,
tscale, num_sample,
num_sample_perbin)
else:
p_mask = np.zeros([tscale, num_sample])
mask_mat_vector.append(p_mask)
mask_mat_vector = np.stack(mask_mat_vector, axis=2)
mask_mat.append(mask_mat_vector)
mask_mat = np.stack(mask_mat, axis=3)
mask_mat = mask_mat.astype(np.float32)
sample_mask = np.reshape(mask_mat, [tscale, -1])
return sample_mask
# Net # Net
class Conv1D(fluid.dygraph.Layer): class Conv1D(fluid.dygraph.Layer):
...@@ -64,16 +126,27 @@ class Conv1D(fluid.dygraph.Layer): ...@@ -64,16 +126,27 @@ class Conv1D(fluid.dygraph.Layer):
class BMN(Model): class BMN(Model):
def __init__(self, cfg, is_dygraph=True): """BMN model from
`"BMN: Boundary-Matching Network for Temporal Action Proposal Generation" <https://arxiv.org/abs/1907.09702>`_
Args:
tscale (int): sequence length, default 100.
dscale (int): max duration length, default 100.
prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5.
num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32.
num_sample_perbin (int): number of selected points in each sample, default 3.
"""
def __init__(self, tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin):
super(BMN, self).__init__() super(BMN, self).__init__()
#init config #init config
self.tscale = cfg.MODEL.tscale self.tscale = tscale
self.dscale = cfg.MODEL.dscale self.dscale = dscale
self.prop_boundary_ratio = cfg.MODEL.prop_boundary_ratio self.prop_boundary_ratio = prop_boundary_ratio
self.num_sample = cfg.MODEL.num_sample self.num_sample = num_sample
self.num_sample_perbin = cfg.MODEL.num_sample_perbin self.num_sample_perbin = num_sample_perbin
self.is_dygraph = is_dygraph
self.hidden_dim_1d = 256 self.hidden_dim_1d = 256
self.hidden_dim_2d = 128 self.hidden_dim_2d = 128
...@@ -124,23 +197,17 @@ class BMN(Model): ...@@ -124,23 +197,17 @@ class BMN(Model):
padding=1, padding=1,
act="relu") act="relu")
# init to speed up # get sample mask
sample_mask_array = get_interp1d_mask( sample_mask_array = get_interp1d_mask(
self.tscale, self.dscale, self.prop_boundary_ratio, self.tscale, self.dscale, self.prop_boundary_ratio,
self.num_sample, self.num_sample_perbin) self.num_sample, self.num_sample_perbin)
if self.is_dygraph: self.sample_mask = fluid.layers.create_parameter(
self.sample_mask = fluid.dygraph.base.to_variable( shape=[self.tscale, self.num_sample * self.dscale * self.tscale],
sample_mask_array) dtype=DATATYPE,
else: # static attr=fluid.ParamAttr(
self.sample_mask = fluid.layers.create_parameter( name="sample_mask", trainable=False),
shape=[ default_initializer=fluid.initializer.NumpyArrayInitializer(
self.tscale, self.num_sample * self.dscale * self.tscale sample_mask_array))
],
dtype=DATATYPE,
attr=fluid.ParamAttr(
name="sample_mask", trainable=False),
default_initializer=fluid.initializer.NumpyArrayInitializer(
sample_mask_array))
self.sample_mask.stop_gradient = True self.sample_mask.stop_gradient = True
...@@ -221,21 +288,30 @@ class BMN(Model): ...@@ -221,21 +288,30 @@ class BMN(Model):
class BmnLoss(Loss): class BmnLoss(Loss):
def __init__(self, cfg): """Loss for BMN model
Args:
tscale (int): sequence length, default 100.
dscale (int): max duration length, default 100.
"""
def __init__(self, tscale, dscale):
super(BmnLoss, self).__init__() super(BmnLoss, self).__init__()
self.cfg = cfg self.tscale = tscale
self.dscale = dscale
def _get_mask(self): def _get_mask(self):
dscale = self.cfg.MODEL.dscale
tscale = self.cfg.MODEL.tscale
bm_mask = [] bm_mask = []
for idx in range(dscale): for idx in range(self.dscale):
mask_vector = [1 for i in range(tscale - idx) mask_vector = [1 for i in range(self.tscale - idx)
] + [0 for i in range(idx)] ] + [0 for i in range(idx)]
bm_mask.append(mask_vector) bm_mask.append(mask_vector)
bm_mask = np.array(bm_mask, dtype=np.float32) bm_mask = np.array(bm_mask, dtype=np.float32)
self_bm_mask = fluid.layers.create_global_var( self_bm_mask = fluid.layers.create_global_var(
shape=[dscale, tscale], value=0, dtype=DATATYPE, persistable=True) shape=[self.dscale, self.tscale],
value=0,
dtype=DATATYPE,
persistable=True)
fluid.layers.assign(bm_mask, self_bm_mask) fluid.layers.assign(bm_mask, self_bm_mask)
self_bm_mask.stop_gradient = True self_bm_mask.stop_gradient = True
return self_bm_mask return self_bm_mask
...@@ -362,3 +438,29 @@ class BmnLoss(Loss): ...@@ -362,3 +438,29 @@ class BmnLoss(Loss):
loss = tem_loss + 10 * pem_reg_loss + pem_cls_loss loss = tem_loss + 10 * pem_reg_loss + pem_cls_loss
return loss return loss
def bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=True):
"""BMN model
Args:
tscale (int): sequence length, default 100.
dscale (int): max duration length, default 100.
prop_boundary_ratio (float): ratio of expanded temporal region in proposal boundary, default 0.5.
num_sample (int): number of samples betweent starting boundary and ending boundary of each propoasl, default 32.
num_sample_perbin (int): number of selected points in each sample, default 3.
pretrained (bool): If True, returns a model with pre-trained model, default True.
"""
model = BMN(tscale, dscale, prop_boundary_ratio, num_sample,
num_sample_perbin)
if pretrained:
weight_path = get_weights_path(*(pretrain_infos['bmn']))
assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams"
model.load(weight_path)
return model
...@@ -18,11 +18,10 @@ import os ...@@ -18,11 +18,10 @@ import os
import logging import logging
import paddle.fluid as fluid import paddle.fluid as fluid
sys.path.append('../') from hapi.model import set_device, Input
from model import set_device, Input from modeling import bmn, BmnLoss
from bmn_metric import BmnMetric from bmn_metric import BmnMetric
from bmn_model import BMN, BmnLoss
from reader import BmnDataset from reader import BmnDataset
from config_utils import * from config_utils import *
...@@ -39,7 +38,6 @@ def parse_args(): ...@@ -39,7 +38,6 @@ def parse_args():
parser.add_argument( parser.add_argument(
"-d", "-d",
"--dynamic", "--dynamic",
default=True,
action='store_true', action='store_true',
help="enable dygraph mode, only support dynamic mode at present time") help="enable dygraph mode, only support dynamic mode at present time")
parser.add_argument( parser.add_argument(
...@@ -52,14 +50,25 @@ def parse_args(): ...@@ -52,14 +50,25 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--weights', '--weights',
type=str, type=str,
default="checkpoint/final", default=None,
help='weight path, None to automatically download weights provided by Paddle.' help='weight path, None to automatically download weights provided by Paddle.'
) )
parser.add_argument( parser.add_argument(
'--save_dir', '--filelist',
type=str,
default="infer.list",
help='infer file list, default to use ./infer.list')
parser.add_argument(
'--output_path',
type=str,
default="output/INFER/BMN_results",
help='output dir path, default to use output/INFER/BMN_results')
parser.add_argument(
'--result_path',
type=str, type=str,
default="predict_results/", default="predict_results/",
help='output dir path, default to use ./predict_results/') help='output dir path after post processing, default to use ./predict_results/'
)
parser.add_argument( parser.add_argument(
'--log_interval', '--log_interval',
type=int, type=int,
...@@ -71,18 +80,21 @@ def parse_args(): ...@@ -71,18 +80,21 @@ def parse_args():
# Prediction # Prediction
def infer_bmn(args): def infer_bmn(args):
# only support dynamic mode at present time
device = set_device(args.device) device = set_device(args.device)
fluid.enable_dygraph(device) if args.dynamic else None fluid.enable_dygraph(device) if args.dynamic else None
#config setting
config = parse_config(args.config_file) config = parse_config(args.config_file)
infer_cfg = merge_configs(config, 'infer', vars(args)) infer_cfg = merge_configs(config, 'infer', vars(args))
if not os.path.isdir(config.INFER.output_path): feat_dim = config.MODEL.feat_dim
os.makedirs(config.INFER.output_path) tscale = config.MODEL.tscale
if not os.path.isdir(config.INFER.result_path): dscale = config.MODEL.dscale
os.makedirs(config.INFER.result_path) prop_boundary_ratio = config.MODEL.prop_boundary_ratio
num_sample = config.MODEL.num_sample
num_sample_perbin = config.MODEL.num_sample_perbin
#input and video index
inputs = [ inputs = [
Input( Input(
[None, config.MODEL.feat_dim, config.MODEL.tscale], [None, config.MODEL.feat_dim, config.MODEL.tscale],
...@@ -94,7 +106,13 @@ def infer_bmn(args): ...@@ -94,7 +106,13 @@ def infer_bmn(args):
#data #data
infer_dataset = BmnDataset(infer_cfg, 'infer') infer_dataset = BmnDataset(infer_cfg, 'infer')
model = BMN(config, args.dynamic) #model
model = bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=args.weights is None)
model.prepare( model.prepare(
metrics=BmnMetric( metrics=BmnMetric(
config, mode='infer'), config, mode='infer'),
...@@ -103,12 +121,12 @@ def infer_bmn(args): ...@@ -103,12 +121,12 @@ def infer_bmn(args):
device=device) device=device)
# load checkpoint # load checkpoint
if args.weights: if args.weights is not None:
assert os.path.exists( assert os.path.exists(
args.weights + args.weights +
".pdparams"), "Given weight dir {} not exist.".format(args.weights) ".pdparams"), "Given weight dir {} not exist.".format(args.weights)
logger.info('load test weights from {}'.format(args.weights)) logger.info('load test weights from {}'.format(args.weights))
model.load(args.weights) model.load(args.weights)
# here use model.eval instead of model.test, as post process is required in our case # here use model.eval instead of model.test, as post process is required in our case
model.evaluate( model.evaluate(
......
...@@ -21,8 +21,8 @@ import sys ...@@ -21,8 +21,8 @@ import sys
sys.path.append('../') sys.path.append('../')
from distributed import DistributedBatchSampler from hapi.distributed import DistributedBatchSampler
from paddle.fluid.io import Dataset, DataLoader from paddle.io import Dataset, DataLoader
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch train.py python -m paddle.distributed.launch train.py
...@@ -18,12 +18,11 @@ import logging ...@@ -18,12 +18,11 @@ import logging
import sys import sys
import os import os
sys.path.append('../') from hapi.model import set_device, Input
from model import set_device, Input
from bmn_model import BMN, BmnLoss
from reader import BmnDataset from reader import BmnDataset
from config_utils import * from config_utils import *
from modeling import bmn, BmnLoss
DATATYPE = 'float32' DATATYPE = 'float32'
...@@ -36,11 +35,7 @@ logger = logging.getLogger(__name__) ...@@ -36,11 +35,7 @@ logger = logging.getLogger(__name__)
def parse_args(): def parse_args():
parser = argparse.ArgumentParser("Paddle high level api of BMN.") parser = argparse.ArgumentParser("Paddle high level api of BMN.")
parser.add_argument( parser.add_argument(
"-d", "-d", "--dynamic", action='store_true', help="enable dygraph mode")
"--dynamic",
default=True,
action='store_true',
help="enable dygraph mode")
parser.add_argument( parser.add_argument(
'--config_file', '--config_file',
type=str, type=str,
...@@ -50,7 +45,7 @@ def parse_args(): ...@@ -50,7 +45,7 @@ def parse_args():
'--batch_size', '--batch_size',
type=int, type=int,
default=None, default=None,
help='training batch size. None to use config file setting.') help='training batch size. None for read from config file.')
parser.add_argument( parser.add_argument(
'--learning_rate', '--learning_rate',
type=float, type=float,
...@@ -70,8 +65,8 @@ def parse_args(): ...@@ -70,8 +65,8 @@ def parse_args():
parser.add_argument( parser.add_argument(
'--epoch', '--epoch',
type=int, type=int,
default=9, default=None,
help='epoch number, 0 for read from config file') help='epoch number, None for read from config file')
parser.add_argument( parser.add_argument(
'--valid_interval', '--valid_interval',
type=int, type=int,
...@@ -115,22 +110,23 @@ def train_bmn(args): ...@@ -115,22 +110,23 @@ def train_bmn(args):
if not os.path.isdir(args.save_dir): if not os.path.isdir(args.save_dir):
os.makedirs(args.save_dir) os.makedirs(args.save_dir)
#config setting
config = parse_config(args.config_file) config = parse_config(args.config_file)
train_cfg = merge_configs(config, 'train', vars(args)) train_cfg = merge_configs(config, 'train', vars(args))
val_cfg = merge_configs(config, 'valid', vars(args)) val_cfg = merge_configs(config, 'valid', vars(args))
inputs = [ feat_dim = config.MODEL.feat_dim
Input( tscale = config.MODEL.tscale
[None, config.MODEL.feat_dim, config.MODEL.tscale], dscale = config.MODEL.dscale
'float32', prop_boundary_ratio = config.MODEL.prop_boundary_ratio
name='feat_input') num_sample = config.MODEL.num_sample
] num_sample_perbin = config.MODEL.num_sample_perbin
gt_iou_map = Input(
[None, config.MODEL.dscale, config.MODEL.tscale], # input and label list
'float32', inputs = [Input([None, feat_dim, tscale], 'float32', name='feat_input')]
name='gt_iou_map') gt_iou_map = Input([None, dscale, tscale], 'float32', name='gt_iou_map')
gt_start = Input([None, config.MODEL.tscale], 'float32', name='gt_start') gt_start = Input([None, tscale], 'float32', name='gt_start')
gt_end = Input([None, config.MODEL.tscale], 'float32', name='gt_end') gt_end = Input([None, tscale], 'float32', name='gt_end')
labels = [gt_iou_map, gt_start, gt_end] labels = [gt_iou_map, gt_start, gt_end]
# data # data
...@@ -138,11 +134,16 @@ def train_bmn(args): ...@@ -138,11 +134,16 @@ def train_bmn(args):
val_dataset = BmnDataset(val_cfg, 'valid') val_dataset = BmnDataset(val_cfg, 'valid')
# model # model
model = BMN(config, args.dynamic) model = bmn(tscale,
dscale,
prop_boundary_ratio,
num_sample,
num_sample_perbin,
pretrained=False)
optim = optimizer(config, parameter_list=model.parameters()) optim = optimizer(config, parameter_list=model.parameters())
model.prepare( model.prepare(
optimizer=optim, optimizer=optim,
loss_function=BmnLoss(config), loss_function=BmnLoss(tscale, dscale),
inputs=inputs, inputs=inputs,
labels=labels, labels=labels,
device=device) device=device)
...@@ -150,11 +151,10 @@ def train_bmn(args): ...@@ -150,11 +151,10 @@ def train_bmn(args):
# if resume weights is given, load resume weights directly # if resume weights is given, load resume weights directly
if args.resume is not None: if args.resume is not None:
model.load(args.resume) model.load(args.resume)
model.fit(train_data=train_dataset, model.fit(train_data=train_dataset,
eval_data=val_dataset, eval_data=val_dataset,
batch_size=train_cfg.TRAIN.batch_size, batch_size=train_cfg.TRAIN.batch_size,
epochs=args.epoch, epochs=train_cfg.TRAIN.epoch,
eval_freq=args.valid_interval, eval_freq=args.valid_interval,
log_freq=args.log_interval, log_freq=args.log_interval,
save_dir=args.save_dir, save_dir=args.save_dir,
......
...@@ -80,12 +80,19 @@ data/cityscapes/testA/412_A.jpg ...@@ -80,12 +80,19 @@ data/cityscapes/testA/412_A.jpg
### 训练 ### 训练
在GPU单卡上训练: 在GPU单卡上静态图训练:
``` ```
env CUDA_VISIBLE_DEVICES=0 python train.py env CUDA_VISIBLE_DEVICES=0 python train.py --checkpoint_path=checkpoint_static
``` ```
在GPU单卡上动态图训练:
```
env CUDA_VISIBLE_DEVICES=0 python train.py --dynamic --checkpoint_path=checkpoint_dynamic
```
执行`python train.py --help`可查看更多使用方式和参数详细说明。 执行`python train.py --help`可查看更多使用方式和参数详细说明。
图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_loss','da_loss'和'db_loss'分别为生成器、判别器A和判别器B的训练损失。 图1为训练152轮的训练损失示意图,其中横坐标轴为训练轮数,纵轴为在训练集上的损失。其中,'g_loss','da_loss'和'db_loss'分别为生成器、判别器A和判别器B的训练损失。
......
...@@ -18,9 +18,10 @@ from __future__ import print_function ...@@ -18,9 +18,10 @@ from __future__ import print_function
import numpy as np import numpy as np
from layers import ConvBN, DeConvBN
import paddle.fluid as fluid import paddle.fluid as fluid
from model import Model, Loss from hapi.model import Model, Loss
from layers import ConvBN, DeConvBN
class ResnetBlock(fluid.dygraph.Layer): class ResnetBlock(fluid.dygraph.Layer):
......
...@@ -20,6 +20,8 @@ import random ...@@ -20,6 +20,8 @@ import random
import numpy as np import numpy as np
from PIL import Image, ImageOps from PIL import Image, ImageOps
import paddle
DATASET = "cityscapes" DATASET = "cityscapes"
A_LIST_FILE = "./data/" + DATASET + "/trainA.txt" A_LIST_FILE = "./data/" + DATASET + "/trainA.txt"
B_LIST_FILE = "./data/" + DATASET + "/trainB.txt" B_LIST_FILE = "./data/" + DATASET + "/trainB.txt"
...@@ -27,10 +29,8 @@ A_TEST_LIST_FILE = "./data/" + DATASET + "/testA.txt" ...@@ -27,10 +29,8 @@ A_TEST_LIST_FILE = "./data/" + DATASET + "/testA.txt"
B_TEST_LIST_FILE = "./data/" + DATASET + "/testB.txt" B_TEST_LIST_FILE = "./data/" + DATASET + "/testB.txt"
IMAGES_ROOT = "./data/" + DATASET + "/" IMAGES_ROOT = "./data/" + DATASET + "/"
import paddle.fluid as fluid
class Cityscapes(fluid.io.Dataset): class Cityscapes(paddle.io.Dataset):
def __init__(self, root_path, file_path, mode='train', return_name=False): def __init__(self, root_path, file_path, mode='train', return_name=False):
self.root_path = root_path self.root_path = root_path
self.file_path = file_path self.file_path = file_path
......
...@@ -25,9 +25,9 @@ from PIL import Image ...@@ -25,9 +25,9 @@ from PIL import Image
from scipy.misc import imsave from scipy.misc import imsave
import paddle.fluid as fluid import paddle.fluid as fluid
from check import check_gpu, check_version from hapi.model import Model, Input, set_device
from model import Model, Input, set_device from check import check_gpu, check_version
from cyclegan import Generator, GeneratorCombine from cyclegan import Generator, GeneratorCombine
...@@ -43,7 +43,7 @@ def main(): ...@@ -43,7 +43,7 @@ def main():
im_shape = [-1, 3, 256, 256] im_shape = [-1, 3, 256, 256]
input_A = Input(im_shape, 'float32', 'input_A') input_A = Input(im_shape, 'float32', 'input_A')
input_B = Input(im_shape, 'float32', 'input_B') input_B = Input(im_shape, 'float32', 'input_B')
g.prepare(inputs=[input_A, input_B]) g.prepare(inputs=[input_A, input_B], device=FLAGS.device)
g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True)
out_path = FLAGS.output + "/single" out_path = FLAGS.output + "/single"
...@@ -59,10 +59,10 @@ def main(): ...@@ -59,10 +59,10 @@ def main():
data = image.transpose([2, 0, 1])[np.newaxis, :] data = image.transpose([2, 0, 1])[np.newaxis, :]
if FLAGS.input_style == "A": if FLAGS.input_style == "A":
_, fake, _, _ = g.test([data, data]) _, fake, _, _ = g.test_batch([data, data])
if FLAGS.input_style == "B": if FLAGS.input_style == "B":
fake, _, _, _ = g.test([data, data]) fake, _, _, _ = g.test_batch([data, data])
fake = np.squeeze(fake[0]).transpose([1, 2, 0]) fake = np.squeeze(fake[0]).transpose([1, 2, 0])
...@@ -74,7 +74,7 @@ def main(): ...@@ -74,7 +74,7 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN inference") parser = argparse.ArgumentParser("CycleGAN inference")
parser.add_argument( parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode") "-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument( parser.add_argument(
"-p", "-p",
"--device", "--device",
......
...@@ -22,9 +22,9 @@ import numpy as np ...@@ -22,9 +22,9 @@ import numpy as np
from scipy.misc import imsave from scipy.misc import imsave
import paddle.fluid as fluid import paddle.fluid as fluid
from check import check_gpu, check_version from hapi.model import Model, Input, set_device
from model import Model, Input, set_device from check import check_gpu, check_version
from cyclegan import Generator, GeneratorCombine from cyclegan import Generator, GeneratorCombine
import data as data import data as data
...@@ -41,7 +41,7 @@ def main(): ...@@ -41,7 +41,7 @@ def main():
im_shape = [-1, 3, 256, 256] im_shape = [-1, 3, 256, 256]
input_A = Input(im_shape, 'float32', 'input_A') input_A = Input(im_shape, 'float32', 'input_A')
input_B = Input(im_shape, 'float32', 'input_B') input_B = Input(im_shape, 'float32', 'input_B')
g.prepare(inputs=[input_A, input_B]) g.prepare(inputs=[input_A, input_B], device=FLAGS.device)
g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True) g.load(FLAGS.init_model, skip_mismatch=True, reset_optimizer=True)
if not os.path.exists(FLAGS.output): if not os.path.exists(FLAGS.output):
...@@ -56,7 +56,7 @@ def main(): ...@@ -56,7 +56,7 @@ def main():
data_A = np.array(data_A).astype("float32") data_A = np.array(data_A).astype("float32")
data_B = np.array(data_B).astype("float32") data_B = np.array(data_B).astype("float32")
fake_A, fake_B, cyc_A, cyc_B = g.test([data_A, data_B]) fake_A, fake_B, cyc_A, cyc_B = g.test_batch([data_A, data_B])
datas = [fake_A, fake_B, cyc_A, cyc_B, data_A, data_B] datas = [fake_A, fake_B, cyc_A, cyc_B, data_A, data_B]
odatas = [] odatas = []
...@@ -75,7 +75,7 @@ def main(): ...@@ -75,7 +75,7 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN test") parser = argparse.ArgumentParser("CycleGAN test")
parser.add_argument( parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode") "-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument( parser.add_argument(
"-p", "-p",
"--device", "--device",
......
...@@ -24,12 +24,11 @@ import time ...@@ -24,12 +24,11 @@ import time
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from check import check_gpu, check_version from hapi.model import Model, Input, set_device
from model import Model, Input, set_device
import data as data from check import check_gpu, check_version
from cyclegan import Generator, Discriminator, GeneratorCombine, GLoss, DLoss from cyclegan import Generator, Discriminator, GeneratorCombine, GLoss, DLoss
import data as data
step_per_epoch = 2974 step_per_epoch = 2974
...@@ -76,23 +75,26 @@ def main(): ...@@ -76,23 +75,26 @@ def main():
fake_A = Input(im_shape, 'float32', 'fake_A') fake_A = Input(im_shape, 'float32', 'fake_A')
fake_B = Input(im_shape, 'float32', 'fake_B') fake_B = Input(im_shape, 'float32', 'fake_B')
g_AB.prepare(inputs=[input_A]) g_AB.prepare(inputs=[input_A], device=FLAGS.device)
g_BA.prepare(inputs=[input_B]) g_BA.prepare(inputs=[input_B], device=FLAGS.device)
g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B]) g.prepare(g_optimizer, GLoss(), inputs=[input_A, input_B],
d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B]) device=FLAGS.device)
d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A]) d_A.prepare(da_optimizer, DLoss(), inputs=[input_B, fake_B],
device=FLAGS.device)
d_B.prepare(db_optimizer, DLoss(), inputs=[input_A, fake_A],
device=FLAGS.device)
if FLAGS.resume: if FLAGS.resume:
g.load(FLAGS.resume) g.load(FLAGS.resume)
loader_A = fluid.io.DataLoader( loader_A = paddle.io.DataLoader(
data.DataA(), data.DataA(),
places=place, places=place,
shuffle=True, shuffle=True,
return_list=True, return_list=True,
batch_size=FLAGS.batch_size) batch_size=FLAGS.batch_size)
loader_B = fluid.io.DataLoader( loader_B = paddle.io.DataLoader(
data.DataB(), data.DataB(),
places=place, places=place,
shuffle=True, shuffle=True,
...@@ -108,14 +110,14 @@ def main(): ...@@ -108,14 +110,14 @@ def main():
data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0] data_B = data_B[0][0] if not FLAGS.dynamic else data_B[0]
start = time.time() start = time.time()
fake_B = g_AB.test(data_A)[0] fake_B = g_AB.test_batch(data_A)[0]
fake_A = g_BA.test(data_B)[0] fake_A = g_BA.test_batch(data_B)[0]
g_loss = g.train([data_A, data_B])[0] g_loss = g.train_batch([data_A, data_B])[0]
fake_pb = B_pool.get(fake_B) fake_pb = B_pool.get(fake_B)
da_loss = d_A.train([data_B, fake_pb])[0] da_loss = d_A.train_batch([data_B, fake_pb])[0]
fake_pa = A_pool.get(fake_A) fake_pa = A_pool.get(fake_A)
db_loss = d_B.train([data_A, fake_pa])[0] db_loss = d_B.train_batch([data_A, fake_pa])[0]
t = time.time() - start t = time.time() - start
if i % 20 == 0: if i % 20 == 0:
...@@ -128,7 +130,7 @@ def main(): ...@@ -128,7 +130,7 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("CycleGAN Training on Cityscapes") parser = argparse.ArgumentParser("CycleGAN Training on Cityscapes")
parser.add_argument( parser.add_argument(
"-d", "--dynamic", action='store_false', help="Enable dygraph mode") "-d", "--dynamic", action='store_true', help="Enable dygraph mode")
parser.add_argument( parser.add_argument(
"-p", "-p",
"--device", "--device",
......
...@@ -43,13 +43,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch ...@@ -43,13 +43,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
### 单卡预测 ### 单卡预测
执行如下命令进行预测 执行如下命令进行预测
```bash ```bash
python -u main.py --arch resnet50 -d --evaly-only /path/to/imagenet python -u main.py --arch resnet50 -d --eval-only /path/to/imagenet
``` ```
### 多卡预测 ### 多卡预测
执行如下命令进行多卡预测 执行如下命令进行多卡预测
```bash ```bash
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --evaly-only /path/to/imagenet CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch resnet50 --eval-only /path/to/imagenet
``` ```
...@@ -71,15 +71,20 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch ...@@ -71,15 +71,20 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
* **weight-decay**: 模型权重正则化系数,默认值:1e-4 * **weight-decay**: 模型权重正则化系数,默认值:1e-4
* **momentum**: SGD优化器的动量,默认值:0.9 * **momentum**: SGD优化器的动量,默认值:0.9
注意:使用```--resume```恢复训练时,假如你的模型路径为```./output/118.pdparams```,你输入的路径不需要带后缀,即```--resume ./output/118```即可。
## 模型 ## 模型
| 模型 | top1 acc | top5 acc | | 模型 | top1 acc | top5 acc |
| --- | --- | --- | | --- | --- | --- |
| [ResNet50](https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams) | 76.28 | 93.04 | | [ResNet18](https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams) | 71.72 | 90.60 |
| [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.84 | 90.71 | | [ResNet34](https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams) | 75.02 | 92.31 |
| [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.25 | 89.92 | | [ResNet50](https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams) | 76.27 | 93.03 |
| [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.27 | 90.66 | | [ResNet101](https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams) | 78.33 | 94.04 |
| [ResNet152](https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams) | 78.78 | 94.40 |
| [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 |
| [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 |
| [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 |
上述模型的复现参数请参考scripts下的脚本。 上述模型的复现参数请参考scripts下的脚本。
......
...@@ -18,81 +18,35 @@ import math ...@@ -18,81 +18,35 @@ import math
import random import random
import numpy as np import numpy as np
from datasets.folder import DatasetFolder from hapi.datasets import DatasetFolder
from hapi.vision.transforms import transforms
from paddle import fluid
def center_crop_resize(img):
h, w = img.shape[:2]
c = int(224 / 256 * min((h, w)))
i = (h + 1 - c) // 2
j = (w + 1 - c) // 2
img = img[i:i + c, j:j + c, :]
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
def random_crop_resize(img):
height, width = img.shape[:2]
area = height * width
for attempt in range(10):
target_area = random.uniform(0.08, 1.) * area
log_ratio = (math.log(3 / 4), math.log(4 / 3))
aspect_ratio = math.exp(random.uniform(*log_ratio))
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if w <= width and h <= height:
i = random.randint(0, height - h)
j = random.randint(0, width - w)
img = img[i:i + h, j:j + w, :]
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
return center_crop_resize(img)
def random_flip(img):
if np.random.randint(0, 2) == 1:
img = img[:, ::-1, :]
return img
def normalize_permute(img):
# transpose and convert to RGB from BGR
img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...]
mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
std = np.array([58.395, 57.120, 57.375], dtype=np.float32)
invstd = 1. / std
for v, m, s in zip(img, mean, invstd):
v.__isub__(m).__imul__(s)
return img
def compose(functions):
def process(sample):
img, label = sample
for fn in functions:
img = fn(img)
return img, label
return process
class ImageNetDataset(DatasetFolder): class ImageNetDataset(DatasetFolder):
def __init__(self, path, mode='train'): def __init__(self, path, mode='train'):
super(ImageNetDataset, self).__init__(path) super(ImageNetDataset, self).__init__(path)
self.mode = mode self.mode = mode
normalize = transforms.Normalize(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
if self.mode == 'train': if self.mode == 'train':
self.transform = compose([ self.transform = transforms.Compose([
cv2.imread, random_crop_resize, random_flip, normalize_permute transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.Permute(mode='CHW'), normalize
]) ])
else: else:
self.transform = compose( self.transform = transforms.Compose([
[cv2.imread, center_crop_resize, normalize_permute]) transforms.Resize(256), transforms.CenterCrop(224),
transforms.Permute(mode='CHW'), normalize
])
def __getitem__(self, idx): def __getitem__(self, idx):
img, label = self.samples[idx] img_path, label = self.samples[idx]
return self.transform((img, [label])) img = cv2.imread(img_path).astype(np.float32)
label = np.array([label])
return self.transform(img, label)
def __len__(self): def __len__(self):
return len(self.samples) return len(self.samples)
...@@ -24,15 +24,17 @@ sys.path.append('../') ...@@ -24,15 +24,17 @@ sys.path.append('../')
import time import time
import math import math
import numpy as np import numpy as np
import models
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.io import BatchSampler, DataLoader
from hapi.model import CrossEntropy, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.metrics import Accuracy
import hapi.vision.models as models
from model import CrossEntropy, Input, set_device
from imagenet_dataset import ImageNetDataset from imagenet_dataset import ImageNetDataset
from distributed import DistributedBatchSampler
from paddle.fluid.dygraph.parallel import ParallelEnv
from metrics import Accuracy
from paddle.fluid.io import BatchSampler, DataLoader
def make_optimizer(step_per_epoch, parameter_list=None): def make_optimizer(step_per_epoch, parameter_list=None):
...@@ -74,6 +76,9 @@ def main(): ...@@ -74,6 +76,9 @@ def main():
device = set_device(FLAGS.device) device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
model_list = [x for x in models.__dict__["__all__"]]
assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
model_list, FLAGS.arch)
model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and
not FLAGS.resume) not FLAGS.resume)
...@@ -92,7 +97,13 @@ def main(): ...@@ -92,7 +97,13 @@ def main():
len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks), len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks),
parameter_list=model.parameters()) parameter_list=model.parameters())
model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels) model.prepare(
optim,
CrossEntropy(),
Accuracy(topk=(1, 5)),
inputs,
labels,
FLAGS.device)
if FLAGS.eval_only: if FLAGS.eval_only:
model.evaluate( model.evaluate(
...@@ -150,7 +161,7 @@ if __name__ == '__main__': ...@@ -150,7 +161,7 @@ if __name__ == '__main__':
type=str, type=str,
help="checkpoint path to resume") help="checkpoint path to resume")
parser.add_argument( parser.add_argument(
"--eval-only", action='store_true', help="enable dygraph mode") "--eval-only", action='store_true', help="only evaluate the model")
parser.add_argument( parser.add_argument(
"--lr-scheduler", "--lr-scheduler",
default='piecewise', default='piecewise',
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch mobilenet_v1 \
--epoch 120 \
--batch-size 64 \
--learning-rate 0.1 \
--lr-scheduler piecewise \
--milestones 30 60 90 \
--weight-decay 3e-5 \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch mobilenet_v2 \
--epoch 240 \
--batch-size 64 \
--learning-rate 0.1 \
--lr-scheduler cosine \
--weight-decay 4e-5 \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch resnet101 \
--epoch 90 \
--batch-size 64 \
--learning-rate 0.1 \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch resnet152 \
--epoch 90 \
--batch-size 64 \
--learning-rate 0.1 \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch resnet18 \
--epoch 120 \
--batch-size 64 \
--learning-rate 0.1 \
--lr-scheduler cosine \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch resnet34 \
--epoch 120 \
--batch-size 64 \
--learning-rate 0.1 \
--lr-scheduler cosine \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch resnet50 \
--epoch 90 \
--batch-size 64 \
--learning-rate 0.1 \
-d \
data/ILSVRC2012/
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
# 默认imagenet数据存储在data/ILSVRC2012/下,去除-d便使用静态图模式运行
python -m paddle.distributed.launch main.py \
--arch vgg16 \
--epoch 90 \
--batch-size 64 \
--learning-rate 0.01 \
--lr-scheduler cosine \
-d \
data/ILSVRC2012/
\ No newline at end of file
简介
--------
本OCR任务是识别图片单行的字母信息,基于attention的seq2seq结构。 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。
## 代码结构
```
.
|-- data.py # 数据读取
|-- eval.py # 评估脚本
|-- images # 测试图片
|-- predict.py # 预测脚本
|-- seq2seq_attn.py # 模型
|-- train.py # 训练脚本
`-- utility.py # 公共模块
```
## 训练/评估/预测流程
- 设置GPU环境:
```
export CUDA_VISIBLE_DEVICES=0
```
- 训练
```
python train.py
```
更多参数可以通过`--help`查看。
- 动静切换
```
python train.py --dynamic=True
```
- 评估
```
python eval.py --init_model=checkpoint/final
```
- 预测
目前不支持动态图预测
```
python predict.py --init_model=checkpoint/final --image_path=images/ --dynamic=False --beam_size=3
```
预测结果如下:
```
Image 1: images/112_chubbiness_13557.jpg
0: chubbines
1: chubbiness
2: chubbinesS
Image 2: images/177_Interfiled_40185.jpg
0: Interflied
1: Interfiled
2: InterfIled
Image 3: images/325_dame_19109.jpg
0: da
1: damo
2: dame
Image 4: images/368_fixtures_29232.jpg
0: firtures
1: Firtures
2: fixtures
```
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from os import path
import random
import traceback
import copy
import math
import tarfile
from PIL import Image
import logging
logger = logging.getLogger(__name__)
import paddle
from paddle import fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5"
DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz"
CACHE_DIR_NAME = "attention_data"
SAVED_FILE_NAME = "data.tar.gz"
DATA_DIR_NAME = "data"
TRAIN_DATA_DIR_NAME = "train_images"
TEST_DATA_DIR_NAME = "test_images"
TRAIN_LIST_FILE_NAME = "train.list"
TEST_LIST_FILE_NAME = "test.list"
class Resize(object):
def __init__(self, height=48):
self.interp = Image.NEAREST # Image.ANTIALIAS
self.height = height
def __call__(self, samples):
shape = samples[0][0].size
for i in range(len(samples)):
im = samples[i][0]
im = im.resize((shape[0], self.height), self.interp)
samples[i][0] = im
return samples
class Normalize(object):
def __init__(self,
mean=[127.5],
std=[1.0],
scale=False,
channel_first=True):
self.mean = mean
self.std = std
self.scale = scale
self.channel_first = channel_first
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
isinstance(self.scale, bool)):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, samples):
for i in range(len(samples)):
im = samples[i][0]
im = np.array(im).astype(np.float32, copy=False)
im = im[np.newaxis, ...]
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.scale:
im = im / 255.0
#im -= mean
im -= 127.5
#im /= std
samples[i][0] = im
return samples
class PadTarget(object):
def __init__(self, SOS=0, EOS=1):
self.SOS = SOS
self.EOS = EOS
def __call__(self, samples):
lens = np.array([len(s[1]) for s in samples], dtype="int64")
max_len = np.max(lens)
for i in range(len(samples)):
label = samples[i][1]
if max_len > len(label):
pad_label = label + [self.EOS] * (max_len - len(label))
else:
pad_label = label
samples[i][1] = np.array([self.SOS] + pad_label, dtype='int64')
# label_out
samples[i].append(np.array(pad_label + [self.EOS], dtype='int64'))
mask = np.zeros((max_len + 1)).astype('float32')
mask[:len(label) + 1] = 1.0
# mask
samples[i].append(np.array(mask, dtype='float32'))
return samples
class BatchSampler(fluid.io.BatchSampler):
def __init__(self,
dataset,
batch_size,
shuffle=False,
drop_last=True,
seed=None):
self._dataset = dataset
self._batch_size = batch_size
self._shuffle = shuffle
self._drop_last = drop_last
self._random = np.random
self._random.seed(seed)
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
self._device_id = ParallelEnv().dev_id
self._num_samples = int(
math.ceil(len(self._dataset) * 1.0 / self._nranks))
self._total_size = self._num_samples * self._nranks
self._epoch = 0
def __iter__(self):
infos = copy.copy(self._dataset._sample_infos)
skip_num = 0
if self._shuffle:
if self._batch_size == 1:
self._random.RandomState(self._epoch).shuffle(infos)
else: # partial shuffle
infos = sorted(infos, key=lambda x: x.w)
skip_num = random.randint(1, 100)
infos = infos[skip_num:] + infos[:skip_num]
infos += infos[:(self._total_size - len(infos))]
last_size = self._total_size % (self._batch_size * self._nranks)
batches = []
for i in range(self._local_rank * self._batch_size,
len(infos) - last_size,
self._batch_size * self._nranks):
batches.append(infos[i:i + self._batch_size])
if (not self._drop_last) and last_size != 0:
last_local_size = last_size // self._nranks
last_infos = infos[len(infos) - last_size:]
start = self._local_rank * last_local_size
batches.append(last_infos[start:start + last_local_size])
if self._shuffle:
self._random.RandomState(self._epoch).shuffle(batches)
self._epoch += 1
for batch in batches:
batch_indices = [info.idx for info in batch]
yield batch_indices
def __len__(self):
if self._drop_last:
return self._total_size // self._batch_size
else:
return math.ceil(self._total_size / float(self._batch_size))
class SampleInfo(object):
def __init__(self, idx, h, w, im_name, labels):
self.idx = idx
self.h = h
self.w = w
self.im_name = im_name
self.labels = labels
class OCRDataset(paddle.io.Dataset):
def __init__(self, image_dir, anno_file):
self.image_dir = image_dir
self.anno_file = anno_file
self._sample_infos = []
with open(anno_file, 'r') as f:
for i, line in enumerate(f):
w, h, im_name, labels = line.strip().split(' ')
h, w = int(h), int(w)
labels = [int(c) for c in labels.split(',')]
self._sample_infos.append(SampleInfo(i, h, w, im_name, labels))
def __getitem__(self, idx):
info = self._sample_infos[idx]
im_name, labels = info.im_name, info.labels
image = Image.open(path.join(self.image_dir, im_name)).convert('L')
return [image, labels]
def __len__(self):
return len(self._sample_infos)
def train(
root_dir=None,
images_dir=None,
anno_file=None,
shuffle=True, ):
if root_dir is None:
root_dir = download_data()
if images_dir is None:
images_dir = TRAIN_DATA_DIR_NAME
images_dir = path.join(root_dir, TRAIN_DATA_DIR_NAME)
if anno_file is None:
anno_file = TRAIN_LIST_FILE_NAME
anno_file = path.join(root_dir, TRAIN_LIST_FILE_NAME)
return OCRDataset(images_dir, anno_file)
def test(
root_dir=None,
images_dir=None,
anno_file=None,
shuffle=True, ):
if root_dir is None:
root_dir = download_data()
if images_dir is None:
images_dir = TEST_DATA_DIR_NAME
images_dir = path.join(root_dir, TEST_DATA_DIR_NAME)
if anno_file is None:
anno_file = TEST_LIST_FILE_NAME
anno_file = path.join(root_dir, TEST_LIST_FILE_NAME)
return OCRDataset(images_dir, anno_file)
def download_data():
'''Download train and test data.
'''
tar_file = paddle.dataset.common.download(
DATA_URL, CACHE_DIR_NAME, DATA_MD5, save_name=SAVED_FILE_NAME)
data_dir = path.join(path.dirname(tar_file), DATA_DIR_NAME)
if not path.isdir(data_dir):
t = tarfile.open(tar_file, "r:gz")
t.extractall(path=path.dirname(tar_file))
t.close()
return data_dir
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import functools
import paddle.fluid.profiler as profiler
import paddle.fluid as fluid
from hapi.model import Input, set_device
from hapi.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments
from utility import SeqAccuracy, LoggerCallBack, SeqBeamAccuracy
from utility import postprocess
from seq2seq_attn import Seq2SeqAttModel, Seq2SeqAttInferModel, WeightCrossEntropy
import data
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('test_images', str, None, "The directory of images to be used for test.")
add_arg('test_list', str, None, "The list file of images to be used for training.")
add_arg('init_model', str, 'checkpoint/final', "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
add_arg('encoder_size', int, 200, "Encoder size.")
add_arg('decoder_size', int, 128, "Decoder size.")
add_arg('embedding_dim', int, 128, "Word vector dim.")
add_arg('num_classes', int, 95, "Number classes.")
add_arg('beam_size', int, 0, "If set beam size, will use beam search.")
add_arg('dynamic', bool, False, "Whether to use dygraph.")
# yapf: enable
def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes)
# yapf: disable
inputs = [
Input([None, 1, 48, 384], "float32", name="pixel"),
Input([None, None], "int64", name="label_in")
]
labels = [
Input([None, None], "int64", name="label_out"),
Input([None, None], "float32", name="mask")
]
# yapf: enable
model.prepare(
loss_function=WeightCrossEntropy(),
metrics=SeqAccuracy(),
inputs=inputs,
labels=labels,
device=device)
model.load(FLAGS.init_model)
test_dataset = data.test()
test_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
test_sampler = data.BatchSampler(
test_dataset,
batch_size=FLAGS.batch_size,
drop_last=False,
shuffle=False)
test_loader = fluid.io.DataLoader(
test_dataset,
batch_sampler=test_sampler,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
model.evaluate(
eval_data=test_loader,
callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
def beam_search(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size)
inputs = [
Input(
[None, 1, 48, 384], "float32", name="pixel"), Input(
[None, None], "int64", name="label_in")
]
labels = [
Input(
[None, None], "int64", name="label_out"), Input(
[None, None], "float32", name="mask")
]
model.prepare(
loss_function=None,
metrics=SeqBeamAccuracy(),
inputs=inputs,
labels=labels,
device=device)
model.load(FLAGS.init_model)
test_dataset = data.test()
test_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
test_sampler = data.BatchSampler(
test_dataset,
batch_size=FLAGS.batch_size,
drop_last=False,
shuffle=False)
test_loader = fluid.io.DataLoader(
test_dataset,
batch_sampler=test_sampler,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
model.evaluate(
eval_data=test_loader,
callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
if FLAGS.beam_size:
beam_search(FLAGS)
else:
main(FLAGS)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import random
import numpy as np
import argparse
import functools
from PIL import Image
import paddle.fluid.profiler as profiler
import paddle.fluid as fluid
from hapi.model import Input, set_device
from hapi.datasets.folder import ImageFolder
from hapi.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments
from utility import postprocess, index2word
from seq2seq_attn import Seq2SeqAttInferModel, WeightCrossEntropy
import data
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('image_path', str, None, "The directory of images to be used for test.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
# model hyper paramters
add_arg('encoder_size', int, 200, "Encoder size.")
add_arg('decoder_size', int, 128, "Decoder size.")
add_arg('embedding_dim', int, 128, "Word vector dim.")
add_arg('num_classes', int, 95, "Number classes.")
add_arg('beam_size', int, 3, "Beam size for beam search.")
add_arg('dynamic', bool, False, "Whether to use dygraph.")
# yapf: enable
def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size)
inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ]
model.prepare(inputs=inputs, device=device)
model.load(FLAGS.init_model)
fn = lambda p: Image.open(p).convert('L')
test_dataset = ImageFolder(FLAGS.image_path, loader=fn)
test_collate_fn = BatchCompose([data.Resize(), data.Normalize()])
test_loader = fluid.io.DataLoader(
test_dataset,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
samples = test_dataset.samples
#outputs = model.predict(test_loader)
ins_id = 0
for image, in test_loader:
image = image if FLAGS.dynamic else image[0]
pred = model.test_batch([image])[0]
pred = pred[:, :, np.newaxis] if len(pred.shape) == 2 else pred
pred = np.transpose(pred, [0, 2, 1])
for ins in pred:
impath = samples[ins_id]
ins_id += 1
print('Image {}: {}'.format(ins_id, impath))
for beam_idx, beam in enumerate(ins):
id_list = postprocess(beam)
word_list = index2word(id_list)
sequence = "".join(word_list)
print('{}: {}'.format(beam_idx, sequence))
if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
main(FLAGS)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.layers import BeamSearchDecoder
from hapi.text import RNNCell, RNN, DynamicDecode
from hapi.model import Model, Loss
class ConvBNPool(fluid.dygraph.Layer):
def __init__(self,
in_ch,
out_ch,
act="relu",
is_test=False,
pool=True,
use_cudnn=True):
super(ConvBNPool, self).__init__()
self.pool = pool
filter_size = 3
std = (2.0 / (filter_size**2 * in_ch))**0.5
param_0 = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, std))
std = (2.0 / (filter_size**2 * out_ch))**0.5
param_1 = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, std))
self.conv0 = fluid.dygraph.Conv2D(
in_ch,
out_ch,
3,
padding=1,
param_attr=param_0,
bias_attr=False,
act=None,
use_cudnn=use_cudnn)
self.bn0 = fluid.dygraph.BatchNorm(out_ch, act=act)
self.conv1 = fluid.dygraph.Conv2D(
out_ch,
out_ch,
filter_size=3,
padding=1,
param_attr=param_1,
bias_attr=False,
act=None,
use_cudnn=use_cudnn)
self.bn1 = fluid.dygraph.BatchNorm(out_ch, act=act)
if self.pool:
self.pool = fluid.dygraph.Pool2D(
pool_size=2,
pool_type='max',
pool_stride=2,
use_cudnn=use_cudnn,
ceil_mode=True)
def forward(self, inputs):
out = self.conv0(inputs)
out = self.bn0(out)
out = self.conv1(out)
out = self.bn1(out)
if self.pool:
out = self.pool(out)
return out
class CNN(fluid.dygraph.Layer):
def __init__(self, in_ch=1, is_test=False):
super(CNN, self).__init__()
self.conv_bn1 = ConvBNPool(in_ch, 16)
self.conv_bn2 = ConvBNPool(16, 32)
self.conv_bn3 = ConvBNPool(32, 64)
self.conv_bn4 = ConvBNPool(64, 128, pool=False)
def forward(self, inputs):
conv = self.conv_bn1(inputs)
conv = self.conv_bn2(conv)
conv = self.conv_bn3(conv)
conv = self.conv_bn4(conv)
return conv
class GRUCell(RNNCell):
def __init__(self,
input_size,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False):
super(GRUCell, self).__init__()
self.hidden_size = hidden_size
self.fc_layer = fluid.dygraph.Linear(
input_size,
hidden_size * 3,
param_attr=param_attr,
bias_attr=False)
self.gru_unit = fluid.dygraph.GRUUnit(
hidden_size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
def forward(self, inputs, states):
# step_outputs, new_states = cell(step_inputs, states)
# for GRUCell, `step_outputs` and `new_states` both are hidden
x = self.fc_layer(inputs)
hidden, _, _ = self.gru_unit(x, states)
return hidden, hidden
@property
def state_shape(self):
return [self.hidden_size]
class Encoder(fluid.dygraph.Layer):
def __init__(
self,
in_channel=1,
rnn_hidden_size=200,
decoder_size=128,
is_test=False, ):
super(Encoder, self).__init__()
self.rnn_hidden_size = rnn_hidden_size
self.backbone = CNN(in_ch=in_channel, is_test=is_test)
para_attr = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02))
bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
self.gru_fwd = RNN(cell=GRUCell(
input_size=128 * 6,
hidden_size=rnn_hidden_size,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu'),
is_reverse=False,
time_major=False)
self.gru_bwd = RNN(cell=GRUCell(
input_size=128 * 6,
hidden_size=rnn_hidden_size,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu'),
is_reverse=True,
time_major=False)
self.encoded_proj_fc = fluid.dygraph.Linear(
rnn_hidden_size * 2, decoder_size, bias_attr=False)
def forward(self, inputs):
conv_features = self.backbone(inputs)
conv_features = fluid.layers.transpose(
conv_features, perm=[0, 3, 1, 2])
n, w, c, h = conv_features.shape
seq_feature = fluid.layers.reshape(conv_features, [0, -1, c * h])
gru_fwd, _ = self.gru_fwd(seq_feature)
gru_bwd, _ = self.gru_bwd(seq_feature)
encoded_vector = fluid.layers.concat(input=[gru_fwd, gru_bwd], axis=2)
encoded_proj = self.encoded_proj_fc(encoded_vector)
return gru_bwd, encoded_vector, encoded_proj
class Attention(fluid.dygraph.Layer):
"""
Neural Machine Translation by Jointly Learning to Align and Translate.
https://arxiv.org/abs/1409.0473
"""
def __init__(self, decoder_size):
super(Attention, self).__init__()
self.fc1 = fluid.dygraph.Linear(
decoder_size, decoder_size, bias_attr=False)
self.fc2 = fluid.dygraph.Linear(decoder_size, 1, bias_attr=False)
def forward(self, encoder_vec, encoder_proj, decoder_state):
# alignment model, single-layer multilayer perceptron
decoder_state = self.fc1(decoder_state)
decoder_state = fluid.layers.unsqueeze(decoder_state, [1])
e = fluid.layers.elementwise_add(encoder_proj, decoder_state)
e = fluid.layers.tanh(e)
att_scores = self.fc2(e)
att_scores = fluid.layers.squeeze(att_scores, [2])
att_scores = fluid.layers.softmax(att_scores)
context = fluid.layers.elementwise_mul(
x=encoder_vec, y=att_scores, axis=0)
context = fluid.layers.reduce_sum(context, dim=1)
return context
class DecoderCell(RNNCell):
def __init__(self, encoder_size=200, decoder_size=128):
super(DecoderCell, self).__init__()
self.attention = Attention(decoder_size)
self.gru_cell = GRUCell(
input_size=encoder_size * 2 + decoder_size,
hidden_size=decoder_size)
def forward(self, current_word, states, encoder_vec, encoder_proj):
context = self.attention(encoder_vec, encoder_proj, states)
decoder_inputs = fluid.layers.concat([current_word, context], axis=1)
hidden, _ = self.gru_cell(decoder_inputs, states)
return hidden, hidden
class Decoder(fluid.dygraph.Layer):
def __init__(self, num_classes, emb_dim, encoder_size, decoder_size):
super(Decoder, self).__init__()
self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size))
self.fc = fluid.dygraph.Linear(
decoder_size, num_classes + 2, act='softmax')
def forward(self, target, initial_states, encoder_vec, encoder_proj):
out, _ = self.decoder_attention(
target,
initial_states=initial_states,
encoder_vec=encoder_vec,
encoder_proj=encoder_proj)
pred = self.fc(out)
return pred
class Seq2SeqAttModel(Model):
def __init__(
self,
in_channle=1,
encoder_size=200,
decoder_size=128,
emb_dim=128,
num_classes=None, ):
super(Seq2SeqAttModel, self).__init__()
self.encoder = Encoder(in_channle, encoder_size, decoder_size)
self.fc = fluid.dygraph.Linear(
input_dim=encoder_size,
output_dim=decoder_size,
bias_attr=False,
act='relu')
self.embedding = fluid.dygraph.Embedding(
[num_classes + 2, emb_dim], dtype='float32')
self.decoder = Decoder(num_classes, emb_dim, encoder_size,
decoder_size)
def forward(self, inputs, target):
gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
decoder_boot = self.fc(gru_backward[:, 0])
trg_embedding = self.embedding(target)
prediction = self.decoder(trg_embedding, decoder_boot, encoded_vector,
encoded_proj)
return prediction
class Seq2SeqAttInferModel(Seq2SeqAttModel):
def __init__(
self,
in_channle=1,
encoder_size=200,
decoder_size=128,
emb_dim=128,
num_classes=None,
beam_size=0,
bos_id=0,
eos_id=1,
max_out_len=20, ):
super(Seq2SeqAttInferModel, self).__init__(
in_channle, encoder_size, decoder_size, emb_dim, num_classes)
self.beam_size = beam_size
# dynamic decoder for inference
decoder = BeamSearchDecoder(
self.decoder.decoder_attention.cell,
start_token=bos_id,
end_token=eos_id,
beam_size=beam_size,
embedding_fn=self.embedding,
output_fn=self.decoder.fc)
self.infer_decoder = DynamicDecode(
decoder, max_step_num=max_out_len, is_test=True)
def forward(self, inputs, *args):
gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
decoder_boot = self.fc(gru_backward[:, 0])
if self.beam_size:
# Tile the batch dimension with beam_size
encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch(
encoded_vector, self.beam_size)
encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch(
encoded_proj, self.beam_size)
# dynamic decoding with beam search
rs, _ = self.infer_decoder(
inits=decoder_boot,
encoder_vec=encoded_vector,
encoder_proj=encoded_proj)
return rs
class WeightCrossEntropy(Loss):
def __init__(self):
super(WeightCrossEntropy, self).__init__(average=False)
def forward(self, outputs, labels):
predict, (label, mask) = outputs[0], labels
loss = layers.cross_entropy(predict, label=label)
loss = layers.elementwise_mul(loss, mask, axis=0)
loss = layers.reduce_sum(loss)
return loss
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import random
import numpy as np
import argparse
import functools
import paddle.fluid.profiler as profiler
import paddle.fluid as fluid
from hapi.model import Input, set_device
from hapi.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments
from utility import SeqAccuracy, LoggerCallBack
from seq2seq_attn import Seq2SeqAttModel, WeightCrossEntropy
import data
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('epoch', int, 30, "Epoch number.")
add_arg('num_workers', int, 0, "workers number.")
add_arg('lr', float, 0.001, "Learning rate.")
add_arg('lr_decay_strategy', str, "", "Learning rate decay strategy.")
add_arg('checkpoint_path', str, "checkpoint", "The directory the model to be saved to.")
add_arg('train_images', str, None, "The directory of images to be used for training.")
add_arg('train_list', str, None, "The list file of images to be used for training.")
add_arg('test_images', str, None, "The directory of images to be used for test.")
add_arg('test_list', str, None, "The list file of images to be used for training.")
add_arg('resume_path', str, None, "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
# model hyper paramters
add_arg('encoder_size', int, 200, "Encoder size.")
add_arg('decoder_size', int, 128, "Decoder size.")
add_arg('embedding_dim', int, 128, "Word vector dim.")
add_arg('num_classes', int, 95, "Number classes.")
add_arg('gradient_clip', float, 5.0, "Gradient clip value.")
add_arg('dynamic', bool, False, "Whether to use dygraph.")
# yapf: enable
def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes)
lr = FLAGS.lr
if FLAGS.lr_decay_strategy == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay(
[200000, 250000], [lr, lr * 0.1, lr * 0.01])
else:
learning_rate = lr
grad_clip = fluid.clip.GradientClipByGlobalNorm(FLAGS.gradient_clip)
optimizer = fluid.optimizer.Adam(
learning_rate=learning_rate,
parameter_list=model.parameters(),
grad_clip=grad_clip)
# yapf: disable
inputs = [
Input([None,1,48,384], "float32", name="pixel"),
Input([None, None], "int64", name="label_in"),
]
labels = [
Input([None, None], "int64", name="label_out"),
Input([None, None], "float32", name="mask"),
]
# yapf: enable
model.prepare(
optimizer,
WeightCrossEntropy(),
SeqAccuracy(),
inputs=inputs,
labels=labels)
train_dataset = data.train()
train_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
train_sampler = data.BatchSampler(
train_dataset, batch_size=FLAGS.batch_size, shuffle=True)
train_loader = fluid.io.DataLoader(
train_dataset,
batch_sampler=train_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
test_dataset = data.test()
test_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
test_sampler = data.BatchSampler(
test_dataset,
batch_size=FLAGS.batch_size,
drop_last=False,
shuffle=False)
test_loader = fluid.io.DataLoader(
test_dataset,
batch_sampler=test_sampler,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
model.fit(train_data=train_loader,
eval_data=test_loader,
epochs=FLAGS.epoch,
save_dir=FLAGS.checkpoint_path,
callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
main(FLAGS)
"""Contains common utility functions."""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
import paddle.fluid as fluid
import six
from hapi.metrics import Metric
from hapi.callbacks import ProgBarLogger
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type = distutils.util.strtobool if type == bool else type
argparser.add_argument(
"--" + argname,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class SeqAccuracy(Metric):
def __init__(self, name=None, *args, **kwargs):
super(SeqAccuracy, self).__init__(*args, **kwargs)
self._name = 'seq_acc'
self.reset()
def add_metric_op(self, output, label, mask, *args, **kwargs):
pred = fluid.layers.flatten(output, axis=2)
score, topk = fluid.layers.topk(pred, 1)
return topk, label, mask
def update(self, topk, label, mask, *args, **kwargs):
topk = topk.reshape(label.shape[0], -1)
seq_len = np.sum(mask, -1)
acc = 0
for i in range(label.shape[0]):
l = int(seq_len[i] - 1)
pred = topk[i][:l - 1]
ref = label[i][:l - 1]
if np.array_equal(pred, ref):
self.total += 1
acc += 1
self.count += 1
return float(acc) / label.shape[0]
def reset(self):
self.total = 0.
self.count = 0.
def accumulate(self):
return float(self.total) / self.count
def name(self):
return self._name
class LoggerCallBack(ProgBarLogger):
def __init__(self, log_freq=1, verbose=2, train_bs=None, eval_bs=None):
super(LoggerCallBack, self).__init__(log_freq, verbose)
self.train_bs = train_bs
self.eval_bs = eval_bs if eval_bs else train_bs
def on_train_batch_end(self, step, logs=None):
logs = logs or {}
logs['loss'] = [l / self.train_bs for l in logs['loss']]
super(LoggerCallBack, self).on_train_batch_end(step, logs)
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['loss'] = [l / self.train_bs for l in logs['loss']]
super(LoggerCallBack, self).on_epoch_end(epoch, logs)
def on_eval_batch_end(self, step, logs=None):
logs = logs or {}
logs['loss'] = [l / self.eval_bs for l in logs['loss']]
super(LoggerCallBack, self).on_eval_batch_end(step, logs)
def on_eval_end(self, logs=None):
logs = logs or {}
logs['loss'] = [l / self.eval_bs for l in logs['loss']]
super(LoggerCallBack, self).on_eval_end(logs)
def index2word(ids):
return [chr(int(k + 33)) for k in ids]
def postprocess(seq, bos_idx=0, eos_idx=1):
if type(seq) is np.ndarray:
seq = seq.tolist()
eos_pos = len(seq) - 1
for i, idx in enumerate(seq):
if idx == eos_idx:
eos_pos = i
break
seq = [
idx for idx in seq[:eos_pos + 1] if idx != bos_idx and idx != eos_idx
]
return seq
class SeqBeamAccuracy(Metric):
def __init__(self, name=None, *args, **kwargs):
super(SeqBeamAccuracy, self).__init__(*args, **kwargs)
self._name = 'seq_acc'
self.reset()
def add_metric_op(self, output, label, mask, *args, **kwargs):
return output, label, mask
def update(self, preds, labels, masks, *args, **kwargs):
preds = preds[:, :, np.newaxis] if len(preds.shape) == 2 else preds
preds = np.transpose(preds, [0, 2, 1])
seq_len = np.sum(masks, -1)
acc = 0
for i in range(labels.shape[0]):
l = int(seq_len[i] - 1)
#ref = labels[i][: l - 1]
ref = np.array(postprocess(labels[i]))
pred = preds[i]
for idx, beam in enumerate(pred):
beam_pred = np.array(postprocess(beam))
if np.array_equal(beam_pred, ref):
self.total += 1
acc += 1
break
self.count += 1
return float(acc) / labels.shape[0]
def reset(self):
self.total = 0.
self.count = 0.
def accumulate(self):
return float(self.total) / self.count
def name(self):
return self._name
# 序列标注任务
## 1. 简介
Sequence Tagging,是一个序列标注模型,模型可用于实现,分词、词性标注、专名识别等序列标注任务。我们在自建的数据集上对分词、词性标注、专名识别进行整体的评估效果(即联合标签模型),具体数值见下表;
|模型|Precision|Recall|F1-score|
|:-:|:-:|:-:|:-:|
|Lexical Analysis|88.26%|89.20%|88.73%|
## 2. 快速开始
### 安装说明
#### 1.PaddlePaddle 安装
本项目依赖 PaddlePaddle 1.7 及以上版本和PaddleHub 1.0.0及以上版本 ,PaddlePaddle安装请参考官网 [快速安装](http://www.paddlepaddle.org/paddle#quick-start),PaddleHub安装参考 [PaddleHub](https://github.com/PaddlePaddle/PaddleHub)
> Warning: GPU 和 CPU 版本的 PaddlePaddle 分别是 paddlepaddle-gpu 和 paddlepaddle,请安装时注意区别。
#### 2. 克隆代码
克隆工具集代码库到本地
```bash
git clone https://github.com/PaddlePaddle/hapi.git
cd hapi/sequence_tagging
```
#### 3. 环境依赖
PaddlePaddle的版本要求是:Python 2 版本是 2.7.15+、Python 3 版本是 3.5.1+/3.6/3.7。sequence tagging的代码可支持Python2/3,无具体版本限制
### 数据准备
#### 1. 快速下载
本项目涉及的**数据集****训练模型**的数据可通过执行以下脚本进行快速下载,若仅需使用部分数据或者模型,可根据需要参照2和3进行下载
```bash
python downloads.py all
```
或在支持运行shell脚本的环境下执行:
```bash
sh downloads.sh
```
#### 2. 训练数据集
下载数据集文件,解压后会生成 `./data/` 文件夹
```bash
python downloads.py dataset
```
#### 3. 已训练模型
我们开源了在自建数据集上训练的词法分析模型,可供用户直接使用,可通过下述链接进行下载:
```bash
# download baseline model
python downloads.py model
```
### 模型训练
基于示例的数据集,可通过下面的命令,在训练集 `./data/train.tsv` 上进行训练;
GPU上单卡训练
```
# setting visible devices for training
export CUDA_VISIBLE_DEVICES=0
python -u train.py \
--device gpu \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
GPU上多卡训练
```
# setting visible devices for training
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
--device gpu \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
CPU上训练
```
python -u train.py \
--device cpu \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
### 模型预测
加载已有的模型,对未知的数据进行预测
```bash
python predict.py \
--init_from_checkpoint model_baseline/params \
--output_file predict.result \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --output_file: 预测结果文件
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
### 模型评估
我们基于自建的数据集训练了一个词法分析的模型,可以直接用这个模型对测试集 `./data/test.tsv` 进行验证,
```bash
# baseline model
python eval.py \
--init_from_checkpoint ./model_baseline/params \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
## 3. 进阶使用
### 任务定义与建模
序列标注任务的输入是一个字符串(我们后面使用『句子』来指代它),而输出是句子中的词边界和类别。序列标注是词法分析的经典建模方式。我们使用基于 GRU 的网络结构学习特征,将学习到的特征接入 CRF 解码层完成序列标注。CRF 解码层本质上是将传统 CRF 中的线性模型换成了非线性神经网络,基于句子级别的似然概率,因而能够更好的解决标记偏置问题。模型要点如下。
1. 输入采用 one-hot 方式表示,每个字以一个 id 表示
2. one-hot 序列通过字表,转换为实向量表示的字向量序列;
3. 字向量序列作为双向 GRU 的输入,学习输入序列的特征表示,得到新的特性表示序列,我们堆叠了两层双向GRU以增加学习能力;
4. CRF 以 GRU 学习到的特征为输入,以标记序列为监督信号,实现序列标注。
可供用户下载的自有数据是对分词、词性标注、专名识别同时标注的联合数据集,进行词性和专名类别标签集合如下表,其中词性标签 24 个(小写字母),专名类别标签 4 个(大写字母)。这里需要说明的是,人名、地名、机构名和时间四个类别,在上表中存在两套标签(PER / LOC / ORG / TIME 和 nr / ns / nt / t),被标注为第二套标签的词,是模型判断为低置信度的人名、地名、机构名和时间词。开发者可以基于这两套标签,在四个类别的准确、召回之间做出自己的权衡。
| 标签 | 含义 | 标签 | 含义 | 标签 | 含义 | 标签 | 含义 |
| ---- | -------- | ---- | -------- | ---- | -------- | ---- | -------- |
| n | 普通名词 | f | 方位名词 | s | 处所名词 | t | 时间 |
| nr | 人名 | ns | 地名 | nt | 机构名 | nw | 作品名 |
| nz | 其他专名 | v | 普通动词 | vd | 动副词 | vn | 名动词 |
| a | 形容词 | ad | 副形词 | an | 名形词 | d | 副词 |
| m | 数量词 | q | 量词 | r | 代词 | p | 介词 |
| c | 连词 | u | 助词 | xc | 其他虚词 | w | 标点符号 |
| PER | 人名 | LOC | 地名 | ORG | 机构名 | TIME | 时间 |
### 模型原理介绍
上面介绍的模型原理如下图所示:<br />
<p align="center">
<img src="./images/gru-crf-model.png" width = "340" height = "300" /> <br />
Overall Architecture of GRU-CRF-MODEL
</p>
### 数据格式
训练使用的数据可以由用户根据实际的应用场景,自己组织数据。除了第一行是 `text_a\tlabel` 固定的开头,后面的每行数据都是由两列组成,以制表符分隔,第一列是 utf-8 编码的中文文本,以 `\002` 分割,第二列是对应每个字的标注,以 `\002` 分隔。我们采用 IOB2 标注体系,即以 X-B 作为类型为 X 的词的开始,以 X-I 作为类型为 X 的词的持续,以 O 表示不关注的字(实际上,在词性、专名联合标注中,不存在 O )。示例如下:
```text
除\002了\002他\002续\002任\002十\002二\002届\002政\002协\002委\002员\002,\002马\002化\002腾\002,\002雷\002军\002,\002李\002彦\002宏\002也\002被\002推\002选\002为\002新\002一\002届\002全\002国\002人\002大\002代\002表\002或\002全\002国\002政\002协\002委\002员 p-B\002p-I\002r-B\002v-B\002v-I\002m-B\002m-I\002m-I\002ORG-B\002ORG-I\002n-B\002n-I\002w-B\002PER-B\002PER-I\002PER-I\002w-B\002PER-B\002PER-I\002w-B\002PER-B\002PER-I\002PER-I\002d-B\002p-B\002v-B\002v-I\002v-B\002a-B\002m-B\002m-I\002ORG-B\002ORG-I\002ORG-I\002ORG-I\002n-B\002n-I\002c-B\002n-B\002n-I\002ORG-B\002ORG-I\002n-B\002n-I
```
+ 我们随同代码一并发布了完全版的模型和相关的依赖数据。但是,由于模型的训练数据过于庞大,我们没有发布训练数据,仅在`data`目录下放置少数样本用以示例输入数据格式。
+ 模型依赖数据包括:
1. 输入文本的词典,在`conf`目录下,对应`word.dic`
2. 对输入文本中特殊字符进行转换的字典,在`conf`目录下,对应`q2b.dic`
3. 标记标签的词典,在`conf`目录下,对应`tag.dic`
+ 在训练和预测阶段,我们都需要进行原始数据的预处理,具体处理工作包括:
1. 从原始数据文件中抽取出句子和标签,构造句子序列和标签序列
2. 将句子序列中的特殊字符进行转换
3. 依据词典获取词对应的整数索引
### 代码结构说明
```text
├── README.md # 本文档
├── data/ # 存放数据集的目录
├── conf/ # 词典及程序默认配置的目录
├── images/ # 文档图片存放位置
├── utils/ # 常用工具函数
├── train.py # 训练脚本
├── predict.py # 预测脚本
├── eval.py # 词法分析评估的脚本
├── downloads.py # 用于下载数据和模型的脚本
├── downloads.sh # 用于下载数据和模型的脚本
└──reader.py # 文件读取相关函数
```
## 4. 其他
### 在论文中引用 sequence tagging
如果您的学术工作成果中使用了 sequence tagging,请您增加下述引用。我们非常欣慰sequence tagging模型能够对您的学术工作带来帮助。
```text
@article{jiao2018LAC,
title={Chinese Lexical Analysis with Deep Bi-GRU-CRF Network},
author={Jiao, Zhenyu and Sun, Shuqi and Sun, Ke},
journal={arXiv preprint arXiv:1807.01882},
year={2018},
url={https://arxiv.org/abs/1807.01882}
}
```
### 如何贡献代码
如果你可以修复某个 issue 或者增加一个新功能,欢迎给我们提交PR。如果对应的PR被接受了,我们将根据贡献的质量和难度 进行打分(0-5分,越高越好)。如果你累计获得了 10 分,可以联系我们获得面试机会或为你写推荐信。
 
、 ,
。 .
— -
~ ~
‖ |
… .
‘ '
’ '
“ "
” "
〔 (
〕 )
〈 <
〉 >
「 '
」 '
『 "
』 "
〖 [
〗 ]
【 [
】 ]
∶ :
$ $
! !
" "
# #
% %
& &
' '
( (
) )
* *
+ +
, ,
- -
. .
/ /
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
: :
; ;
< <
= =
> >
? ?
@ @
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
[ [
\ \
] ]
^ ^
_ _
` `
a a
b b
c c
d d
e e
f f
g g
h h
i i
j j
k k
l l
m m
n n
o o
p p
q q
r r
s s
t t
u u
v v
w w
x x
y y
z z
{ {
| |
} }
 ̄ ~
〝 "
〞 "
﹐ ,
﹑ ,
﹒ .
﹔ ;
﹕ :
﹖ ?
﹗ !
﹙ (
﹚ )
﹛ {
﹜ {
﹝ [
﹞ ]
﹟ #
﹠ &
﹡ *
﹢ +
﹣ -
﹤ <
﹥ >
﹦ =
﹨ \
﹩ $
﹪ %
﹫ @
,
A a
B b
C c
D d
E e
F f
G g
H h
I i
J j
K k
L l
M m
N n
O o
P p
Q q
R r
S s
T t
U u
V v
W w
X x
Y y
Z z
0 a-B
1 a-I
2 ad-B
3 ad-I
4 an-B
5 an-I
6 c-B
7 c-I
8 d-B
9 d-I
10 f-B
11 f-I
12 m-B
13 m-I
14 n-B
15 n-I
16 nr-B
17 nr-I
18 ns-B
19 ns-I
20 nt-B
21 nt-I
22 nw-B
23 nw-I
24 nz-B
25 nz-I
26 p-B
27 p-I
28 q-B
29 q-I
30 r-B
31 r-I
32 s-B
33 s-I
34 t-B
35 t-I
36 u-B
37 u-I
38 v-B
39 v-I
40 vd-B
41 vd-I
42 vn-B
43 vn-I
44 w-B
45 w-I
46 xc-B
47 xc-I
48 PER-B
49 PER-I
50 LOC-B
51 LOC-I
52 ORG-B
53 ORG-I
54 TIME-B
55 TIME-I
56 O
因为 它太大了无法显示 source diff 。你可以改为 查看blob
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Download script, download dataset and pretrain models.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import time
import hashlib
import tarfile
import requests
FILE_INFO = {
'BASE_URL': 'https://baidu-nlp.bj.bcebos.com/',
'DATA': {
'name': 'lexical_analysis-dataset-2.0.0.tar.gz',
'md5': '71e4a9a36d0f0177929a1bccedca7dba'
},
'MODEL': {
'name': 'sequence_tagging_dy.tar.gz',
'md5': "1125d374c03c8218b6e47325dcf607e3"
},
}
def usage():
desc = ("\nDownload datasets and pretrained models for sequence tagging.\n"
"Usage:\n"
" 1. python download.py all\n"
" 2. python download.py dataset\n"
" 3. python download.py model\n")
print(desc)
def md5file(fname):
hash_md5 = hashlib.md5()
with io.open(fname, "rb") as fin:
for chunk in iter(lambda: fin.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def extract(fname, dir_path):
"""
Extract tar.gz file
"""
try:
tar = tarfile.open(fname, "r:gz")
file_names = tar.getnames()
for file_name in file_names:
tar.extract(file_name, dir_path)
print(file_name)
tar.close()
except Exception as e:
raise e
def _download(url, filename, md5sum):
"""
Download file and check md5
"""
retry = 0
retry_limit = 3
chunk_size = 4096
while not (os.path.exists(filename) and md5file(filename) == md5sum):
if retry < retry_limit:
retry += 1
else:
raise RuntimeError(
"Cannot download dataset ({0}) with retry {1} times.".format(
url, retry_limit))
try:
start = time.time()
size = 0
res = requests.get(url, stream=True)
filesize = int(res.headers['content-length'])
if res.status_code == 200:
print("[Filesize]: %0.2f MB" % (filesize / 1024 / 1024))
# save by chunk
with io.open(filename, "wb") as fout:
for chunk in res.iter_content(chunk_size=chunk_size):
if chunk:
fout.write(chunk)
size += len(chunk)
pr = '>' * int(size * 50 / filesize)
print(
'\r[Process ]: %s%.2f%%' %
(pr, float(size / filesize * 100)),
end='')
end = time.time()
print("\n[CostTime]: %.2f s" % (end - start))
except Exception as e:
print(e)
def download(name, dir_path):
url = FILE_INFO['BASE_URL'] + FILE_INFO[name]['name']
file_path = os.path.join(dir_path, FILE_INFO[name]['name'])
if not os.path.exists(dir_path):
os.makedirs(dir_path)
# download data
print("Downloading : %s" % name)
_download(url, file_path, FILE_INFO[name]['md5'])
# extract data
print("Extracting : %s" % file_path)
extract(file_path, dir_path)
os.remove(file_path)
if __name__ == '__main__':
if len(sys.argv) != 2:
usage()
sys.exit(1)
pwd = os.path.join(os.path.dirname(__file__), './')
ernie_dir = os.path.join(os.path.dirname(__file__), './pretrained')
if sys.argv[1] == 'all':
download('DATA', pwd)
download('MODEL', pwd)
if sys.argv[1] == "dataset":
download('DATA', pwd)
elif sys.argv[1] == "model":
download('MODEL', pwd)
else:
usage()
#!/bin/bash
# download baseline model file to ./model_baseline/
if [ -d ./model_baseline/ ]
then
echo "./model_baseline/ directory already existed, ignore download"
else
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/sequence_tagging_dy.tar.gz
tar xvf sequence_tagging_dy.tar.gz
/bin/rm sequence_tagging_dy.tar.gz
fi
# download dataset file to ./data/
if [ -d ./data/ ]
then
echo "./data/ directory already existed, ignore download"
else
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/lexical_analysis-dataset-2.0.0.tar.gz
tar xvf lexical_analysis-dataset-2.0.0.tar.gz
/bin/rm lexical_analysis-dataset-2.0.0.tar.gz
fi
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
from train import SeqTagging
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from utils.metrics import chunk_count
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
eval_path = args.test_file
chunk_evaluator = fluid.metrics.ChunkEvaluator()
chunk_evaluator.reset()
eval_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=eval_path, place=place, mode="test")
eval_dataset = create_dataloader(
eval_generator, place, feed_list=feed_list)
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.mode = "test"
model.prepare(inputs=inputs)
model.load(args.init_from_checkpoint, skip_mismatch=True)
for data in eval_dataset():
if len(data) == 1:
batch_data = data[0]
targets = np.array(batch_data[2])
else:
batch_data = data
targets = batch_data[2].numpy()
inputs_data = [batch_data[0], batch_data[1]]
crf_decode, length = model.test(inputs=inputs_data)
num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_count(crf_decode, targets, length, dataset.id2label_dict)
chunk_evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
precision, recall, f1 = chunk_evaluator.eval()
print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1))
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
from train import SeqTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
predict_path = args.predict_file
predict_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=predict_path, place=place, mode="predict")
predict_dataset = create_dataloader(
predict_generator, place, feed_list=feed_list)
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.mode = "test"
model.prepare(inputs=inputs)
model.load(args.init_from_checkpoint, skip_mismatch=True)
f = open(args.output_file, "wb")
for data in predict_dataset():
if len(data) == 1:
input_data = data[0]
else:
input_data = data
results, length = model.test(inputs=flatten(input_data))
for i in range(len(results)):
word_len = length[i]
word_ids = results[i][: word_len]
tags = [dataset.id2label_dict[str(id)] for id in word_ids]
f.write("\002".join(tags) + "\n")
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
main(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging dataset
"""
from __future__ import division
from __future__ import print_function
import io
import numpy as np
import paddle
class LacDataset(object):
"""
Load lexical analysis dataset
"""
def __init__(self, args):
self.word_dict_path = args.word_dict_path
self.label_dict_path = args.label_dict_path
self.word_rep_dict_path = args.word_rep_dict_path
self._load_dict()
def _load_dict(self):
self.word2id_dict = self.load_kv_dict(
self.word_dict_path, reverse=True, value_func=np.int64)
self.id2word_dict = self.load_kv_dict(self.word_dict_path)
self.label2id_dict = self.load_kv_dict(
self.label_dict_path, reverse=True, value_func=np.int64)
self.id2label_dict = self.load_kv_dict(self.label_dict_path)
if self.word_rep_dict_path is None:
self.word_replace_dict = dict()
else:
self.word_replace_dict = self.load_kv_dict(self.word_rep_dict_path)
def load_kv_dict(self,
dict_path,
reverse=False,
delimiter="\t",
key_func=None,
value_func=None):
"""
Load key-value dict from file
"""
result_dict = {}
for line in io.open(dict_path, "r", encoding='utf8'):
terms = line.strip("\n").split(delimiter)
if len(terms) != 2:
continue
if reverse:
value, key = terms
else:
key, value = terms
if key in result_dict:
raise KeyError("key duplicated with [%s]" % (key))
if key_func:
key = key_func(key)
if value_func:
value = value_func(value)
result_dict[key] = value
return result_dict
@property
def vocab_size(self):
return max(self.word2id_dict.values()) + 1
@property
def num_labels(self):
return max(self.label2id_dict.values()) + 1
def get_num_examples(self, filename):
"""num of line of file"""
return sum(1 for line in io.open(filename, "r", encoding='utf8'))
def word_to_ids(self, words):
"""convert word to word index"""
word_ids = []
for word in words:
word = self.word_replace_dict.get(word, word)
if word not in self.word2id_dict:
word = "OOV"
word_id = self.word2id_dict[word]
word_ids.append(word_id)
return word_ids
def label_to_ids(self, labels):
"""convert label to label index"""
label_ids = []
for label in labels:
if label not in self.label2id_dict:
label = "O"
label_id = self.label2id_dict[label]
label_ids.append(label_id)
return label_ids
def file_reader(self,
filename,
mode="train",
batch_size=32,
max_seq_len=126):
"""
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
"""
def wrapper():
fread = io.open(filename, "r", encoding="utf-8")
if mode == "train":
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
word_ids = word_ids[0:max_seq_len]
words_len = np.int64(len(word_ids))
word_ids += [0 for _ in range(max_seq_len - words_len)]
label_ids = label_ids[0:max_seq_len]
label_ids += [0 for _ in range(max_seq_len - words_len)]
assert len(word_ids) == len(label_ids)
yield word_ids, label_ids, words_len
elif mode == "test":
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
yield word_ids, label_ids, words_len
else:
for line in fread:
words = line.strip("\n").split('\t')[0]
if words == u"text_a":
continue
if "\002" not in words:
word_ids = self.word_to_ids(words)
else:
word_ids = self.word_to_ids(words.split("\002"))
words_len = np.int64(len(word_ids))
yield word_ids, words_len
fread.close()
return wrapper
def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def padding_data(max_len, batch_data):
padding_batch_data = []
for data in batch_data:
data += [0 for _ in range(max_len - len(data))]
padding_batch_data.append(data)
return padding_batch_data
def wrapper():
if mode == "train":
batch_words, batch_labels, seq_lens = [], [], []
for epoch in xrange(args.epoch):
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
batch_labels.append(labels)
seq_lens.append(words_len)
if len(seq_lens) == args.batch_size:
yield batch_words, seq_lens, batch_labels, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
if len(seq_lens) > 0:
yield batch_words, seq_lens, batch_labels, batch_labels
elif mode == "test":
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
batch_labels.append(labels)
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
else:
batch_words, seq_lens, max_len = [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
batch_words, seq_lens, max_len = [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
return wrapper
def create_dataloader(generator, place, feed_list=None):
if not feed_list:
data_loader = paddle.io.DataLoader.from_generator(
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
else:
data_loader = paddle.io.DataLoader.from_generator(
feed_list=feed_list,
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
data_loader.set_batch_generator(generator, places=place)
return data_loader
word_dict_path: "./conf/word.dic"
label_dict_path: "./conf/tag.dic"
word_rep_dict_path: "./conf/q2b.dic"
device: "cpu"
dynamic: True
epoch: 10
base_learning_rate: 0.001
word_emb_dim: 128
grnn_hidden_dim: 128
bigru_num: 2
emb_learning_rate: 1.0
crf_learning_rate: 1.0
batch_size: 300
max_seq_len: 126
num_devices: 1
save_dir: "model"
init_from_checkpoint: "model_baseline/params"
init_from_pretrain_model: ""
save_freq: 1
eval_freq: 1
output_file: "predict.result"
test_file: "./data/test.tsv"
train_file: "./data/train.tsv"
predict_file: "./data/infer.tsv"
mode: "train"
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.metrics import Metric
from hapi.model import Model, Input, Loss, set_device
from hapi.text.text import SequenceTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
class SeqTagging(Model):
def __init__(self, args, vocab_size, num_labels, length=None):
super(SeqTagging, self).__init__()
"""
define the lexical analysis network structure
word: stores the input of the model
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
return:
for infer: return the prediction
otherwise: return the prediction
"""
self.mode_type = args.mode
self.word_emb_dim = args.word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
self.grnn_hidden_dim = args.grnn_hidden_dim
self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir(
args) else 1.0
self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir(
args) else 1.0
self.bigru_num = args.bigru_num
self.batch_size = args.batch_size
self.init_bound = 0.1
self.length=length
self.sequence_tagging = SequenceTagging(
vocab_size=self.vocab_size,
num_labels=self.num_labels,
batch_size=self.batch_size,
word_emb_dim=self.word_emb_dim,
grnn_hidden_dim=self.grnn_hidden_dim,
emb_learning_rate=self.emb_lr,
crf_learning_rate=self.crf_lr,
bigru_num=self.bigru_num,
init_bound=self.init_bound,
length=self.length)
def forward(self, *inputs):
"""
Configure the network
"""
word = inputs[0]
lengths = inputs[1]
if self.mode_type == "train" or self.mode_type == "test":
target = inputs[2]
outputs = self.sequence_tagging(word, lengths, target)
else:
outputs = self.sequence_tagging(word, lengths)
return outputs
class Chunk_eval(fluid.dygraph.Layer):
def __init__(self,
num_chunk_types,
chunk_scheme,
excluded_chunk_types=None):
super(Chunk_eval, self).__init__()
self.num_chunk_types = num_chunk_types
self.chunk_scheme = chunk_scheme
self.excluded_chunk_types = excluded_chunk_types
def forward(self, input, label, seq_length=None):
precision = self._helper.create_variable_for_type_inference(
dtype="float32")
recall = self._helper.create_variable_for_type_inference(
dtype="float32")
f1_score = self._helper.create_variable_for_type_inference(
dtype="float32")
num_infer_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_label_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_correct_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
this_input = {"Inference": input, "Label": label}
if seq_length is not None:
this_input["SeqLength"] = seq_length
self._helper.append_op(
type='chunk_eval',
inputs=this_input,
outputs={
"Precision": [precision],
"Recall": [recall],
"F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks]
},
attrs={
"num_chunk_types": self.num_chunk_types,
"chunk_scheme": self.chunk_scheme,
"excluded_chunk_types": self.excluded_chunk_types or []
})
return (num_infer_chunks, num_label_chunks, num_correct_chunks)
class LacLoss(Loss):
def __init__(self):
super(LacLoss, self).__init__()
pass
def forward(self, outputs, labels):
avg_cost = outputs[1]
return avg_cost
class ChunkEval(Metric):
def __init__(self, num_labels, name=None, *args, **kwargs):
super(ChunkEval, self).__init__(*args, **kwargs)
self._init_name(name)
self.chunk_eval = Chunk_eval(
int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset()
def add_metric_op(self, *args):
crf_decode = args[0]
lengths = args[2]
label = args[3]
(num_infer_chunks, num_label_chunks,
num_correct_chunks) = self.chunk_eval(
input=crf_decode, label=label, seq_length=lengths)
return [num_infer_chunks, num_label_chunks, num_correct_chunks]
def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks,
*args, **kwargs):
self.infer_chunks_total += num_infer_chunks
self.label_chunks_total += num_label_chunks
self.correct_chunks_total += num_correct_chunks
precision = float(
num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
recall = float(
num_correct_chunks) / num_label_chunks if num_label_chunks else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if num_correct_chunks else 0
return [precision, recall, f1_score]
def reset(self):
self.infer_chunks_total = 0
self.label_chunks_total = 0
self.correct_chunks_total = 0
def accumulate(self):
precision = float(
self.correct_chunks_total
) / self.infer_chunks_total if self.infer_chunks_total else 0
recall = float(
self.correct_chunks_total
) / self.label_chunks_total if self.label_chunks_total else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if self.correct_chunks_total else 0
res = [precision, recall, f1_score]
return res
def _init_name(self, name):
name = name or 'chunk eval'
self._name = ['precision', 'recall', 'F1']
def name(self):
return self._name
def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length'),
Input([None, None], 'int64', name='target')]
labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [x.forward() for x in inputs + labels]
dataset = LacDataset(args)
train_path = args.train_file
test_path = args.test_file
train_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=train_path, place=place, mode="train")
test_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=test_path, place=place, mode="test")
train_dataset = create_dataloader(
train_generator, place, feed_list=feed_list)
test_dataset = create_dataloader(
test_generator, place, feed_list=feed_list)
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.prepare(
optim,
LacLoss(),
ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=args.device)
if args.init_from_checkpoint:
model.load(args.init_from_checkpoint)
if args.init_from_pretrain_model:
model.load(args.init_from_pretrain_model, reset_optimizer=True)
model.fit(train_dataset,
test_dataset,
epochs=args.epoch,
batch_size=args.batch_size,
eval_freq=args.eval_freq,
save_freq=args.save_freq,
save_dir=args.save_dir)
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
main(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import paddle.fluid as fluid
__all__ = ['check_gpu', 'check_version']
def check_gpu(use_gpu):
"""
Log error and exit when set use_gpu=true in paddlepaddle
cpu version.
"""
err = "Config use_gpu cannot be set as true while you are " \
"using paddlepaddle cpu version ! \nPlease try: \n" \
"\t1. Install paddlepaddle-gpu to run model on GPU \n" \
"\t2. Set use_gpu as false in config file to run " \
"model on CPU"
try:
if use_gpu and not fluid.is_compiled_with_cuda():
print(err)
sys.exit(1)
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.7.0')
except Exception as e:
print(err)
sys.exit(1)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5,
"Learning rate used to train with warmup.")
train_g.add_arg(
"lr_scheduler",
str,
"linear_warmup_decay",
"scheduler of learning rate.",
choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01,
"Weight decay rate for L2 regularizer.")
train_g.add_arg(
"warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for."
)
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("use_fp16", bool, False,
"Whether to use fp16 mixed precision training.")
train_g.add_arg(
"loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
)
train_g.add_arg("pred_dir", str, None,
"Path to save the prediction results")
log_g = ArgumentGroup(parser, "logging", "logging related.")
log_g.add_arg("skip_steps", int, 10,
"The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True,
"If set, use GPU for training.")
run_type_g.add_arg(
"use_fast_executor", bool, False,
"If set, use fast parallel executor (in experiment).")
run_type_g.add_arg(
"num_iteration_per_drop_scope", int, 1,
"Ihe iteration intervals to clean up temporary variables.")
run_type_g.add_arg("do_train", bool, True,
"Whether to perform training.")
run_type_g.add_arg("do_predict", bool, True,
"Whether to perform prediction.")
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class PDConfig(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.default_g.add_arg("do_train", bool, False,
"Whether to perform training.")
self.default_g.add_arg("do_predict", bool, False,
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg("do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if isinstance(self.json_config[name], list):
self.json_g.add_arg(
name,
type(self.json_config[name][0]),
self.json_config[name],
"This is from %s" % file_path,
nargs=len(self.json_config[name]))
continue
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if isinstance(self.yaml_config[name], list):
self.yaml_g.add_arg(
name,
type(self.yaml_config[name][0]),
self.yaml_config[name],
"This is from %s" % file_path,
nargs=len(self.yaml_config[name]))
continue
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
pd_config += ("my_age", int, 18, "I am forever 18.")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
print(pd_config.my_age)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import paddle.fluid as fluid
__all__ = ['chunk_count', "build_chunk"]
def build_chunk(data_list, id2label_dict):
"""
Assembly entity
"""
tag_list = [id2label_dict.get(str(id)) for id in data_list]
ner_dict = {}
ner_str = ""
ner_start = 0
for i in range(len(tag_list)):
tag = tag_list[i]
if tag == u"O":
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag
elif tag.endswith(u"B"):
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
elif tag.endswith(u"I"):
if tag.split('-')[0] != ner_str:
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
return ner_dict
def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
"""
calculate num_correct_chunks num_error_chunks total_num for metrics
"""
num_infer_chunks, num_label_chunks, num_correct_chunks = 0, 0, 0
assert infer_numpy.shape[0] == label_numpy.shape[0]
for i in range(infer_numpy.shape[0]):
infer_list = infer_numpy[i][: seq_len[i]]
label_list = label_numpy[i][: seq_len[i]]
infer_dict = build_chunk(infer_list, id2label_dict)
num_infer_chunks += len(infer_dict)
label_dict = build_chunk(label_list, id2label_dict)
num_label_chunks += len(label_dict)
for key in infer_dict:
if key in label_dict and label_dict[key] == infer_dict[key]:
num_correct_chunks += 1
return num_infer_chunks, num_label_chunks, num_correct_chunks
...@@ -19,10 +19,10 @@ import os ...@@ -19,10 +19,10 @@ import os
import argparse import argparse
import numpy as np import numpy as np
from model import Input, set_device from hapi.model import Input, set_device
from models import tsm_resnet50
from check import check_gpu, check_version from check import check_gpu, check_version
from modeling import tsm_resnet50
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
from transforms import * from transforms import *
......
...@@ -26,7 +26,7 @@ except ImportError: ...@@ -26,7 +26,7 @@ except ImportError:
import pickle import pickle
from io import BytesIO from io import BytesIO
from paddle.fluid.io import Dataset from paddle.io import Dataset
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -100,19 +100,12 @@ class KineticsDataset(Dataset): ...@@ -100,19 +100,12 @@ class KineticsDataset(Dataset):
def __getitem__(self, idx): def __getitem__(self, idx):
pickle_path = os.path.join(self.pickle_dir, self.pickle_paths[idx]) pickle_path = os.path.join(self.pickle_dir, self.pickle_paths[idx])
try: if six.PY2:
if six.PY2: data = pickle.load(open(pickle_path, 'rb'))
data = pickle.load(open(pickle_path, 'rb')) else:
else: data = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
data = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
vid, label, frames = data
vid, label, frames = data
if len(frames) < 1:
logger.error("{} contains no frame".format(pickle_path))
sys.exit(-1)
except Exception as e:
logger.error("Load {} failed: {}".format(pickle_path, e))
sys.exit(-1)
if self.label_list is not None: if self.label_list is not None:
label = self.label_list.index(label) label = self.label_list.index(label)
......
...@@ -22,10 +22,10 @@ import numpy as np ...@@ -22,10 +22,10 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from model import Model, CrossEntropy, Input, set_device from hapi.model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy from hapi.metrics import Accuracy
from models import tsm_resnet50
from modeling import tsm_resnet50
from check import check_gpu, check_version from check import check_gpu, check_version
from kinetics_dataset import KineticsDataset from kinetics_dataset import KineticsDataset
from transforms import * from transforms import *
......
...@@ -17,8 +17,8 @@ import paddle.fluid as fluid ...@@ -17,8 +17,8 @@ import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model from hapi.model import Model
from .download import get_weights_path from hapi.download import get_weights_path
__all__ = ["TSM_ResNet", "tsm_resnet50"] __all__ = ["TSM_ResNet", "tsm_resnet50"]
...@@ -196,9 +196,17 @@ def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True): ...@@ -196,9 +196,17 @@ def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True):
weight_path = get_weights_path(*(pretrain_infos[num_layers])) weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \ assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams" "suffix of weight must be .pdparams"
model.load(weight_path[:-9]) model.load(weight_path)
return model return model
def tsm_resnet50(seg_num=8, num_classes=400, pretrained=True): def tsm_resnet50(seg_num=8, num_classes=400, pretrained=True):
"""TSM model with 50-layer ResNet as backbone
Args:
seg_num (int): segment number of each video sample. Default 8.
num_classes (int): video class number. Default 400.
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
"""
return _tsm_resnet(50, seg_num, num_classes, pretrained) return _tsm_resnet(50, seg_num, num_classes, pretrained)
dataset/voc*
pretrain_weights/darknet53_pretrained.pdparams
...@@ -101,11 +101,10 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层 ...@@ -101,11 +101,10 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层
### 模型训练 ### 模型训练
数据准备完后,可使用`main.py`脚本启动训练和评估,如下脚本会自动每epoch交替进行训练和模型评估,并将checkpoint默认保存在`yolo_checkpoint`目录下。 数据准备完后,可使用`main.py`脚本启动训练和评估,如下脚本会自动每epoch交替进行训练和模型评估,并将checkpoint默认保存在`yolo_checkpoint`目录下。
YOLOv3模型训练总batch_size为64训练,以下以使用4卡Tesla P40每卡batch_size为16训练介绍训练方式。对于静态图和动态图,多卡训练中`--batch_size`为每卡上的batch_size,即总batch_size为`--batch_size`乘以卡数。 YOLOv3模型训练总batch_size为64训练,以下以使用4卡Tesla P40每卡batch_size为16训练介绍训练方式。对于静态图和动态图,多卡训练中`--batch_size`为每卡上的batch_size,即总batch_size为`--batch_size`乘以卡数。
`main.py`脚本参数可通过如下命令查询 `main.py`脚本参数可通过如下命令查询
```bash ```bash
......
...@@ -18,9 +18,8 @@ from __future__ import print_function ...@@ -18,9 +18,8 @@ from __future__ import print_function
import os import os
import cv2 import cv2
import numpy as np import numpy as np
from pycocotools.coco import COCO
from paddle.fluid.io import Dataset from paddle.io import Dataset
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -91,6 +90,7 @@ class COCODataset(Dataset): ...@@ -91,6 +90,7 @@ class COCODataset(Dataset):
self._load_roidb_and_cname2cid() self._load_roidb_and_cname2cid()
def _load_roidb_and_cname2cid(self): def _load_roidb_and_cname2cid(self):
from pycocotools.coco import COCO
assert self._anno_path.endswith('.json'), \ assert self._anno_path.endswith('.json'), \
'invalid coco annotation file: ' + anno_path 'invalid coco annotation file: ' + anno_path
coco = COCO(self._anno_path) coco = COCO(self._anno_path)
...@@ -186,30 +186,31 @@ class COCODataset(Dataset): ...@@ -186,30 +186,31 @@ class COCODataset(Dataset):
data = np.frombuffer(f.read(), dtype='uint8') data = np.frombuffer(f.read(), dtype='uint8')
im = cv2.imdecode(data, 1) im = cv2.imdecode(data, 1)
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im_info = np.array([roidb['im_id'][0], roidb['h'], roidb['w']], dtype='int32') im_id = roidb['im_id']
im_shape = np.array([roidb['h'], roidb['w']], dtype='int32')
gt_bbox = roidb['gt_bbox'] gt_bbox = roidb['gt_bbox']
gt_class = roidb['gt_class'] gt_class = roidb['gt_class']
gt_score = roidb['gt_score'] gt_score = roidb['gt_score']
return im_info, im, gt_bbox, gt_class, gt_score return im_id, im_shape, im, gt_bbox, gt_class, gt_score
def __getitem__(self, idx): def __getitem__(self, idx):
im_info, im, gt_bbox, gt_class, gt_score = self._getitem_by_index(idx) im_id, im_shape, im, gt_bbox, gt_class, gt_score = self._getitem_by_index(idx)
if self._mixup: if self._mixup:
mixup_idx = idx + np.random.randint(1, self.__len__()) mixup_idx = idx + np.random.randint(1, self.__len__())
mixup_idx %= self.__len__() mixup_idx %= self.__len__()
_, mixup_im, mixup_bbox, mixup_class, _ = \ _, _, mixup_im, mixup_bbox, mixup_class, _ = \
self._getitem_by_index(mixup_idx) self._getitem_by_index(mixup_idx)
im, gt_bbox, gt_class, gt_score = \ im_shape, im, gt_bbox, gt_class, gt_score = \
self._mixup_image(im, gt_bbox, gt_class, mixup_im, self._mixup_image(im, gt_bbox, gt_class, mixup_im,
mixup_bbox, mixup_class) mixup_bbox, mixup_class)
if self._transform: if self._transform:
im_info, im, gt_bbox, gt_class, gt_score = \ im_id, im_shape, im, gt_bbox, gt_class, gt_score = \
self._transform(im_info, im, gt_bbox, gt_class, gt_score) self._transform(im_id, im_shape, im, gt_bbox, gt_class, gt_score)
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
def _mixup_image(self, img1, bbox1, class1, img2, bbox2, class2): def _mixup_image(self, img1, bbox1, class1, img2, bbox2, class2):
factor = np.random.beta(self._alpha, self._beta) factor = np.random.beta(self._alpha, self._beta)
...@@ -234,7 +235,9 @@ class COCODataset(Dataset): ...@@ -234,7 +235,9 @@ class COCODataset(Dataset):
score2 = np.ones_like(class2, dtype="float32") * (1.0 - factor) score2 = np.ones_like(class2, dtype="float32") * (1.0 - factor)
gt_score = np.concatenate((score1, score2), axis=0) gt_score = np.concatenate((score1, score2), axis=0)
return img, gt_bbox, gt_class, gt_score im_shape = np.array([h, w], dtype='int32')
return im_shape, img, gt_bbox, gt_class, gt_score
@property @property
def mixup(self): def mixup(self):
......
...@@ -17,7 +17,7 @@ import os.path as osp ...@@ -17,7 +17,7 @@ import os.path as osp
import sys import sys
import tarfile import tarfile
from models.download import _download from hapi.download import _download
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -22,13 +22,13 @@ from PIL import Image ...@@ -22,13 +22,13 @@ from PIL import Image
from paddle import fluid from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader from paddle.io import DataLoader
from model import Model, Input, set_device from hapi.model import Model, Input, set_device
from models import yolov3_darknet53, YoloLoss
from coco import COCODataset from modeling import yolov3_darknet53, YoloLoss
from transforms import * from transforms import *
from visualizer import draw_bbox from visualizer import draw_bbox
import logging import logging
...@@ -65,7 +65,8 @@ def main(): ...@@ -65,7 +65,8 @@ def main():
device = set_device(FLAGS.device) device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 3], 'int32', name='img_info'), inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')] Input([None, 3, None, None], 'float32', name='image')]
cat2name = load_labels(FLAGS.label_list, with_background=False) cat2name = load_labels(FLAGS.label_list, with_background=False)
...@@ -87,9 +88,10 @@ def main(): ...@@ -87,9 +88,10 @@ def main():
img -= np.array(IMAGE_MEAN) img -= np.array(IMAGE_MEAN)
img /= np.array(IMAGE_STD) img /= np.array(IMAGE_STD)
img = img.transpose((2, 0, 1))[np.newaxis, :] img = img.transpose((2, 0, 1))[np.newaxis, :]
img_info = np.array([0, h, w]).astype('int32')[np.newaxis, :] img_id = np.array([0]).astype('int64')[np.newaxis, :]
img_shape = np.array([h, w]).astype('int32')[np.newaxis, :]
_, bboxes = model.test([img_info, img]) _, bboxes = model.test([img_id, img_shape, img])
vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold) vis_img = draw_bbox(orig_img, cat2name, bboxes, FLAGS.draw_threshold)
save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image) save_name = get_save_image_name(FLAGS.output_dir, FLAGS.infer_image)
......
...@@ -23,14 +23,15 @@ import numpy as np ...@@ -23,14 +23,15 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader from paddle.io import DataLoader
from model import Model, Input, set_device from hapi.model import Model, Input, set_device
from distributed import DistributedBatchSampler from hapi.distributed import DistributedBatchSampler
from models import yolov3_darknet53, YoloLoss from hapi.vision.transforms import Compose, BatchCompose
from coco_metric import COCOMetric from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset from coco import COCODataset
from coco_metric import COCOMetric
from transforms import * from transforms import *
NUM_MAX_BOXES = 50 NUM_MAX_BOXES = 50
...@@ -63,7 +64,8 @@ def main(): ...@@ -63,7 +64,8 @@ def main():
device = set_device(FLAGS.device) device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 3], 'int32', name='img_info'), inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')] Input([None, 3, None, None], 'float32', name='image')]
labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
...@@ -123,7 +125,7 @@ def main(): ...@@ -123,7 +125,7 @@ def main():
model_mode='eval' if FLAGS.eval_only else 'train', model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained) pretrained=pretrained)
if FLAGS.pretrain_weights is not None: if FLAGS.pretrain_weights and not FLAGS.eval_only:
model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True) model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters()) optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())
...@@ -163,7 +165,7 @@ def main(): ...@@ -163,7 +165,7 @@ def main():
save_dir="yolo_checkpoint/mixup", save_dir="yolo_checkpoint/mixup",
save_freq=10) save_freq=10)
# do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
dataset.mixup = False dataset.mixup = False
model.fit(train_data=loader, model.fit(train_data=loader,
epochs=FLAGS.no_mixup_epoch, epochs=FLAGS.no_mixup_epoch,
......
...@@ -16,13 +16,13 @@ from __future__ import division ...@@ -16,13 +16,13 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from model import Model, Loss from hapi.model import Model, Loss
from .darknet import darknet53, ConvBNLayer from hapi.download import get_weights_path
from .download import get_weights_path from hapi.vision.models import darknet53
__all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53'] __all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53']
...@@ -33,6 +33,46 @@ pretrain_infos = { ...@@ -33,6 +33,46 @@ pretrain_infos = {
} }
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=1,
groups=1,
padding=0,
act="leaky"):
super(ConvBNLayer, self).__init__()
self.conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=False,
act=None)
self.batch_norm = BatchNorm(
num_channels=ch_out,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02),
regularizer=L2Decay(0.)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.)))
self.act = act
def forward(self, inputs):
out = self.conv(inputs)
out = self.batch_norm(out)
if self.act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class YoloDetectionBlock(fluid.dygraph.Layer): class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel): def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__() super(YoloDetectionBlock, self).__init__()
...@@ -88,6 +128,20 @@ class YoloDetectionBlock(fluid.dygraph.Layer): ...@@ -88,6 +128,20 @@ class YoloDetectionBlock(fluid.dygraph.Layer):
class YOLOv3(Model): class YOLOv3(Model):
"""YOLOv3 model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
Args:
num_classes (int): class number, default 80.
model_mode (str): 'train', 'eval', 'test' mode, network structure
will be diffrent in the output layer and data, in 'train' mode,
no output layer append, in 'eval' and 'test', output feature
map will be decode to predictions by 'fluid.layers.yolo_box',
in 'eval' mode, return feature maps and predictions, in 'test'
mode, only return predictions. Default 'train'.
"""
def __init__(self, num_classes=80, model_mode='train'): def __init__(self, num_classes=80, model_mode='train'):
super(YOLOv3, self).__init__() super(YOLOv3, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
...@@ -138,7 +192,7 @@ class YOLOv3(Model): ...@@ -138,7 +192,7 @@ class YOLOv3(Model):
act='leaky_relu')) act='leaky_relu'))
self.route_blocks.append(route) self.route_blocks.append(route)
def forward(self, img_info, inputs): def forward(self, img_id, img_shape, inputs):
outputs = [] outputs = []
boxes = [] boxes = []
scores = [] scores = []
...@@ -163,8 +217,6 @@ class YOLOv3(Model): ...@@ -163,8 +217,6 @@ class YOLOv3(Model):
for m in anchor_mask: for m in anchor_mask:
mask_anchors.append(self.anchors[2 * m]) mask_anchors.append(self.anchors[2 * m])
mask_anchors.append(self.anchors[2 * m + 1]) mask_anchors.append(self.anchors[2 * m + 1])
img_shape = fluid.layers.slice(img_info, axes=[1], starts=[1], ends=[3])
img_id = fluid.layers.slice(img_info, axes=[1], starts=[0], ends=[1])
b, s = fluid.layers.yolo_box( b, s = fluid.layers.yolo_box(
x=block_out, x=block_out,
img_size=img_shape, img_size=img_shape,
...@@ -181,7 +233,7 @@ class YOLOv3(Model): ...@@ -181,7 +233,7 @@ class YOLOv3(Model):
if self.model_mode == 'train': if self.model_mode == 'train':
return outputs return outputs
preds = [img_id[0, :], preds = [img_id,
fluid.layers.multiclass_nms( fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(boxes, axis=1), bboxes=fluid.layers.concat(boxes, axis=1),
scores=fluid.layers.concat(scores, axis=2), scores=fluid.layers.concat(scores, axis=2),
...@@ -242,9 +294,22 @@ def _yolov3_darknet(num_layers=53, num_classes=80, ...@@ -242,9 +294,22 @@ def _yolov3_darknet(num_layers=53, num_classes=80,
weight_path = get_weights_path(*(pretrain_infos[num_layers])) weight_path = get_weights_path(*(pretrain_infos[num_layers]))
assert weight_path.endswith('.pdparams'), \ assert weight_path.endswith('.pdparams'), \
"suffix of weight must be .pdparams" "suffix of weight must be .pdparams"
model.load(weight_path[:-9]) model.load(weight_path)
return model return model
def yolov3_darknet53(num_classes=80, model_mode='train', pretrained=True): def yolov3_darknet53(num_classes=80, model_mode='train', pretrained=True):
"""YOLOv3 model with 53-layer DarkNet as backbone
Args:
num_classes (int): class number, default 80.
model_mode (str): 'train', 'eval', 'test' mode, network structure
will be diffrent in the output layer and data, in 'train' mode,
no output layer append, in 'eval' and 'test', output feature
map will be decode to predictions by 'fluid.layers.yolo_box',
in 'eval' mode, return feature maps and predictions, in 'test'
mode, only return predictions. Default 'train'.
pretrained (bool): If True, returns a model with pre-trained model
on COCO, default True
"""
return _yolov3_darknet(53, num_classes, model_mode, pretrained) return _yolov3_darknet(53, num_classes, model_mode, pretrained)
...@@ -19,48 +19,18 @@ import cv2 ...@@ -19,48 +19,18 @@ import cv2
import traceback import traceback
import numpy as np import numpy as np
import logging __all__ = [
logger = logging.getLogger(__name__) 'ColorDistort',
'RandomExpand',
__all__ = ['ColorDistort', 'RandomExpand', 'RandomCrop', 'RandomFlip', 'RandomCrop',
'NormalizeBox', 'PadBox', 'RandomShape', 'NormalizeImage', 'RandomFlip',
'BboxXYXY2XYWH', 'ResizeImage', 'Compose', 'BatchCompose'] 'NormalizeBox',
'PadBox',
'RandomShape',
class Compose(object): 'NormalizeImage',
def __init__(self, transforms=[]): 'BboxXYXY2XYWH',
self.transforms = transforms 'ResizeImage',
]
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
logger.info("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
class BatchCompose(object):
def __init__(self, transforms=[]):
self.transforms = transforms
def __call__(self, data):
for f in self.transforms:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
logger.info("fail to perform batch transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
# sample list to batch data
batch = list(zip(*data))
return batch
class ColorDistort(object): class ColorDistort(object):
...@@ -145,7 +115,7 @@ class ColorDistort(object): ...@@ -145,7 +115,7 @@ class ColorDistort(object):
img += delta img += delta
return img return img
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
if self.random_apply: if self.random_apply:
distortions = np.random.permutation([ distortions = np.random.permutation([
self.apply_brightness, self.apply_contrast, self.apply_brightness, self.apply_contrast,
...@@ -153,7 +123,7 @@ class ColorDistort(object): ...@@ -153,7 +123,7 @@ class ColorDistort(object):
]) ])
for func in distortions: for func in distortions:
im = func(im) im = func(im)
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
im = self.apply_brightness(im) im = self.apply_brightness(im)
...@@ -165,7 +135,7 @@ class ColorDistort(object): ...@@ -165,7 +135,7 @@ class ColorDistort(object):
im = self.apply_saturation(im) im = self.apply_saturation(im)
im = self.apply_hue(im) im = self.apply_hue(im)
im = self.apply_contrast(im) im = self.apply_contrast(im)
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
class RandomExpand(object): class RandomExpand(object):
...@@ -183,16 +153,16 @@ class RandomExpand(object): ...@@ -183,16 +153,16 @@ class RandomExpand(object):
self.prob = prob self.prob = prob
self.fill_value = fill_value self.fill_value = fill_value
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
if np.random.uniform(0., 1.) < self.prob: if np.random.uniform(0., 1.) < self.prob:
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
height, width, _ = im.shape height, width, _ = im.shape
expand_ratio = np.random.uniform(1., self.ratio) expand_ratio = np.random.uniform(1., self.ratio)
h = int(height * expand_ratio) h = int(height * expand_ratio)
w = int(width * expand_ratio) w = int(width * expand_ratio)
if not h > height or not w > width: if not h > height or not w > width:
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
y = np.random.randint(0, h - height) y = np.random.randint(0, h - height)
x = np.random.randint(0, w - width) x = np.random.randint(0, w - width)
canvas = np.ones((h, w, 3), dtype=np.uint8) canvas = np.ones((h, w, 3), dtype=np.uint8)
...@@ -201,7 +171,7 @@ class RandomExpand(object): ...@@ -201,7 +171,7 @@ class RandomExpand(object):
gt_bbox += np.array([x, y, x, y], dtype=np.float32) gt_bbox += np.array([x, y, x, y], dtype=np.float32)
return [im_info, canvas, gt_bbox, gt_class, gt_score] return [im_id, im_shape, canvas, gt_bbox, gt_class, gt_score]
class RandomCrop(): class RandomCrop():
...@@ -232,9 +202,9 @@ class RandomCrop(): ...@@ -232,9 +202,9 @@ class RandomCrop():
self.allow_no_crop = allow_no_crop self.allow_no_crop = allow_no_crop
self.cover_all_box = cover_all_box self.cover_all_box = cover_all_box
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
if len(gt_bbox) == 0: if len(gt_bbox) == 0:
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
# NOTE Original method attempts to generate one candidate for each # NOTE Original method attempts to generate one candidate for each
# threshold then randomly sample one from the resulting list. # threshold then randomly sample one from the resulting list.
...@@ -251,7 +221,7 @@ class RandomCrop(): ...@@ -251,7 +221,7 @@ class RandomCrop():
for thresh in thresholds: for thresh in thresholds:
if thresh == 'no_crop': if thresh == 'no_crop':
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
h, w, _ = im.shape h, w, _ = im.shape
found = False found = False
...@@ -286,9 +256,9 @@ class RandomCrop(): ...@@ -286,9 +256,9 @@ class RandomCrop():
gt_bbox = np.take(cropped_box, valid_ids, axis=0) gt_bbox = np.take(cropped_box, valid_ids, axis=0)
gt_class = np.take(gt_class, valid_ids, axis=0) gt_class = np.take(gt_class, valid_ids, axis=0)
gt_score = np.take(gt_score, valid_ids, axis=0) gt_score = np.take(gt_score, valid_ids, axis=0)
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
def _iou_matrix(self, a, b): def _iou_matrix(self, a, b):
tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2]) tl_i = np.maximum(a[:, np.newaxis, :2], b[:, :2])
...@@ -334,7 +304,7 @@ class RandomFlip(): ...@@ -334,7 +304,7 @@ class RandomFlip():
isinstance(self.is_normalized, bool)): isinstance(self.is_normalized, bool)):
raise TypeError("{}: input type is invalid.".format(self)) raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
"""Filp the image and bounding box. """Filp the image and bounding box.
Operators: Operators:
1. Flip the image numpy. 1. Flip the image numpy.
...@@ -363,20 +333,20 @@ class RandomFlip(): ...@@ -363,20 +333,20 @@ class RandomFlip():
m = "{}: invalid box, x2 should be greater than x1".format( m = "{}: invalid box, x2 should be greater than x1".format(
self) self)
raise ValueError(m) raise ValueError(m)
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
class NormalizeBox(object): class NormalizeBox(object):
"""Transform the bounding box's coornidates to [0,1].""" """Transform the bounding box's coornidates to [0,1]."""
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
height, width, _ = im.shape height, width, _ = im.shape
for i in range(gt_bbox.shape[0]): for i in range(gt_bbox.shape[0]):
gt_bbox[i][0] = gt_bbox[i][0] / width gt_bbox[i][0] = gt_bbox[i][0] / width
gt_bbox[i][1] = gt_bbox[i][1] / height gt_bbox[i][1] = gt_bbox[i][1] / height
gt_bbox[i][2] = gt_bbox[i][2] / width gt_bbox[i][2] = gt_bbox[i][2] / width
gt_bbox[i][3] = gt_bbox[i][3] / height gt_bbox[i][3] = gt_bbox[i][3] / height
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
class PadBox(object): class PadBox(object):
...@@ -388,7 +358,7 @@ class PadBox(object): ...@@ -388,7 +358,7 @@ class PadBox(object):
""" """
self.num_max_boxes = num_max_boxes self.num_max_boxes = num_max_boxes
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
gt_num = min(self.num_max_boxes, len(gt_bbox)) gt_num = min(self.num_max_boxes, len(gt_bbox))
num_max = self.num_max_boxes num_max = self.num_max_boxes
...@@ -406,7 +376,7 @@ class PadBox(object): ...@@ -406,7 +376,7 @@ class PadBox(object):
if gt_num > 0: if gt_num > 0:
pad_score[:gt_num] = gt_score[:gt_num, 0] pad_score[:gt_num] = gt_score[:gt_num, 0]
gt_score = pad_score gt_score = pad_score
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
class BboxXYXY2XYWH(object): class BboxXYXY2XYWH(object):
...@@ -414,10 +384,10 @@ class BboxXYXY2XYWH(object): ...@@ -414,10 +384,10 @@ class BboxXYXY2XYWH(object):
Convert bbox XYXY format to XYWH format. Convert bbox XYXY format to XYWH format.
""" """
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2] gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2]
gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2. gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2.
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
class RandomShape(object): class RandomShape(object):
...@@ -450,13 +420,13 @@ class RandomShape(object): ...@@ -450,13 +420,13 @@ class RandomShape(object):
method = np.random.choice(self.interps) if self.random_inter \ method = np.random.choice(self.interps) if self.random_inter \
else cv2.INTER_NEAREST else cv2.INTER_NEAREST
for i in range(len(samples)): for i in range(len(samples)):
im = samples[i][1] im = samples[i][2]
h, w = im.shape[:2] h, w = im.shape[:2]
scale_x = float(shape) / w scale_x = float(shape) / w
scale_y = float(shape) / h scale_y = float(shape) / h
im = cv2.resize( im = cv2.resize(
im, None, None, fx=scale_x, fy=scale_y, interpolation=method) im, None, None, fx=scale_x, fy=scale_y, interpolation=method)
samples[i][1] = im samples[i][2] = im
return samples return samples
...@@ -492,7 +462,7 @@ class NormalizeImage(object): ...@@ -492,7 +462,7 @@ class NormalizeImage(object):
3. (optional) permute channel 3. (optional) permute channel
""" """
for i in range(len(samples)): for i in range(len(samples)):
im = samples[i][1] im = samples[i][2]
im = im.astype(np.float32, copy=False) im = im.astype(np.float32, copy=False)
mean = np.array(self.mean)[np.newaxis, np.newaxis, :] mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :]
...@@ -502,7 +472,7 @@ class NormalizeImage(object): ...@@ -502,7 +472,7 @@ class NormalizeImage(object):
im /= std im /= std
if self.channel_first: if self.channel_first:
im = im.transpose((2, 0, 1)) im = im.transpose((2, 0, 1))
samples[i][1] = im samples[i][2] = im
return samples return samples
...@@ -595,16 +565,15 @@ class ResizeImage(object): ...@@ -595,16 +565,15 @@ class ResizeImage(object):
format(type(target_size))) format(type(target_size)))
self.target_size = target_size self.target_size = target_size
def __call__(self, im_info, im, gt_bbox, gt_class, gt_score): def __call__(self, im_id, im_shape, im, gt_bbox, gt_class, gt_score):
""" Resize the image numpy. """ Resize the image numpy.
""" """
if not isinstance(im, np.ndarray): if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self)) raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3: if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.'.format(self)) raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape im_scale_x = float(self.target_size) / float(im.shape[1])
im_scale_x = float(self.target_size) / float(im_shape[1]) im_scale_y = float(self.target_size) / float(im.shape[0])
im_scale_y = float(self.target_size) / float(im_shape[0])
resize_w = self.target_size resize_w = self.target_size
resize_h = self.target_size resize_h = self.target_size
...@@ -616,5 +585,5 @@ class ResizeImage(object): ...@@ -616,5 +585,5 @@ class ResizeImage(object):
fy=im_scale_y, fy=im_scale_y,
interpolation=self.interp) interpolation=self.interp)
return [im_info, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.configure import Config
from hapi import callbacks
from hapi import datasets
from hapi import distributed
from hapi import download
from hapi import metrics
from hapi import model
from hapi import progressbar
from hapi import text
from hapi import vision
__all__ = [
'Config',
'callbacks',
'datasets',
'distributed',
'download',
'metrics',
'model',
'progressbar',
'text',
'vision',
]
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import six import six
import copy import copy
from progressbar import ProgressBar from .progressbar import ProgressBar
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
...@@ -218,8 +218,6 @@ class ProgBarLogger(Callback): ...@@ -218,8 +218,6 @@ class ProgBarLogger(Callback):
# if steps is not None, last step will update in on_epoch_end # if steps is not None, last step will update in on_epoch_end
if self.steps and self.train_step < self.steps: if self.steps and self.train_step < self.steps:
self._updates(logs, 'train') self._updates(logs, 'train')
else:
self._updates(logs, 'train')
def on_epoch_end(self, epoch, logs=None): def on_epoch_end(self, epoch, logs=None):
logs = logs or {} logs = logs or {}
...@@ -238,7 +236,7 @@ class ProgBarLogger(Callback): ...@@ -238,7 +236,7 @@ class ProgBarLogger(Callback):
def on_eval_batch_end(self, step, logs=None): def on_eval_batch_end(self, step, logs=None):
logs = logs or {} logs = logs or {}
self.eval_step = step self.eval_step += 1
samples = logs.get('batch_size', 1) samples = logs.get('batch_size', 1)
self.evaled_samples += samples self.evaled_samples += samples
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class Config(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if isinstance(self.json_config[name], list):
self.json_g.add_arg(
name,
type(self.json_config[name][0]),
self.json_config[name],
"This is from %s" % file_path,
nargs=len(self.json_config[name]))
continue
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if isinstance(self.yaml_config[name], list):
self.yaml_g.add_arg(
name,
type(self.yaml_config[name][0]),
self.yaml_config[name],
"This is from %s" % file_path,
nargs=len(self.yaml_config[name]))
continue
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
config = Config(yaml_file="./bert.yaml")
config += ("my_age", int, 18, "I am forever 18.")
config.build()
print(config.data_dir)
print(config.my_age)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import folder
from . import mnist
from . import flowers
from .folder import *
from .mnist import *
from .flowers import *
__all__ = folder.__all__ \
+ mnist.__all__ \
+ flowers.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import io
import tarfile
import numpy as np
import scipy.io as scio
from PIL import Image
from paddle.io import Dataset
from .utils import _check_exists_and_download
__all__ = ["Flowers"]
DATA_URL = 'http://paddlemodels.bj.bcebos.com/flowers/102flowers.tgz'
LABEL_URL = 'http://paddlemodels.bj.bcebos.com/flowers/imagelabels.mat'
SETID_URL = 'http://paddlemodels.bj.bcebos.com/flowers/setid.mat'
DATA_MD5 = '52808999861908f626f3c1f4e79d11fa'
LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d'
SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c'
# In official 'readme', tstid is the flag of test data
# and trnid is the flag of train data. But test data is more than train data.
# So we exchange the train data and test data.
MODE_FLAG_MAP = {'train': 'tstid', 'test': 'trnid', 'valid': "valid"}
class Flowers(Dataset):
"""
Implement of flowers dataset
Args:
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
label_file(str): path to label file, can be set None if
:attr:`download` is True. Default None
setid_file(str): path to subset index file, can be set
None if :attr:`download` is True. Default None
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
True
Examples:
.. code-block:: python
from hapi.vision.datasets import Flowers
flowers = Flowers(mode='test')
for i in range(len(flowers)):
sample = flowers[i]
print(sample[0].shape, sample[1])
"""
def __init__(self,
data_file=None,
label_file=None,
setid_file=None,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'valid', 'test'], \
"mode should be 'train', 'valid' or 'test', but got {}".format(mode)
self.flag = MODE_FLAG_MAP[mode.lower()]
self.data_file = data_file
if self.data_file is None:
assert download, "data_file not set and auto download disabled"
self.data_file = _check_exists_and_download(
data_file, DATA_URL, DATA_MD5, 'flowers', download)
self.label_file = label_file
if self.label_file is None:
assert download, "label_file not set and auto download disabled"
self.label_file = _check_exists_and_download(
label_file, LABEL_URL, LABEL_MD5, 'flowers', download)
self.setid_file = setid_file
if self.setid_file is None:
assert download, "setid_file not set and auto download disabled"
self.setid_file = _check_exists_and_download(
setid_file, SETID_URL, SETID_MD5, 'flowers', download)
self.transform = transform
# read dataset into memory
self._load_anno()
def _load_anno(self):
self.name2mem = {}
self.data_tar = tarfile.open(self.data_file)
for ele in self.data_tar.getmembers():
self.name2mem[ele.name] = ele
self.labels = scio.loadmat(self.label_file)['labels'][0]
self.indexes = scio.loadmat(self.setid_file)[self.flag][0]
def __getitem__(self, idx):
index = self.indexes[idx]
label = np.array([self.labels[index - 1]])
img_name = "jpg/image_%05d.jpg" % index
img_ele = self.name2mem[img_name]
image = self.data_tar.extractfile(img_ele).read()
image = np.array(Image.open(io.BytesIO(image)))
if self.transform is not None:
image, label = self.transform(image, label)
return image, label
def __len__(self):
return len(self.indexes)
...@@ -16,7 +16,9 @@ import os ...@@ -16,7 +16,9 @@ import os
import sys import sys
import cv2 import cv2
from paddle.fluid.io import Dataset from paddle.io import Dataset
__all__ = ["DatasetFolder", "ImageFolder"]
def has_valid_extension(filename, extensions): def has_valid_extension(filename, extensions):
...@@ -71,14 +73,12 @@ class DatasetFolder(Dataset): ...@@ -71,14 +73,12 @@ class DatasetFolder(Dataset):
Args: Args:
root (string): Root directory path. root (string): Root directory path.
loader (callable, optional): A function to load a sample given its path. loader (callable|optional): A function to load a sample given its path.
extensions (tuple[string], optional): A list of allowed extensions. extensions (tuple[str]|optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed. both extensions and is_valid_file should not be passed.
transform (callable, optional): A function/transform that takes in transform (callable|optional): A function/transform that takes in
a sample and returns a transformed version. a sample and returns a transformed version.
target_transform (callable, optional): A function/transform that takes is_valid_file (callable|optional): A function that takes path of a file
in the target and transforms it.
is_valid_file (callable, optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files) and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed. both extensions and is_valid_file should not be passed.
...@@ -94,9 +94,9 @@ class DatasetFolder(Dataset): ...@@ -94,9 +94,9 @@ class DatasetFolder(Dataset):
loader=None, loader=None,
extensions=None, extensions=None,
transform=None, transform=None,
target_transform=None,
is_valid_file=None): is_valid_file=None):
self.root = root self.root = root
self.transform = transform
if extensions is None: if extensions is None:
extensions = IMG_EXTENSIONS extensions = IMG_EXTENSIONS
classes, class_to_idx = self._find_classes(self.root) classes, class_to_idx = self._find_classes(self.root)
...@@ -150,9 +150,7 @@ class DatasetFolder(Dataset): ...@@ -150,9 +150,7 @@ class DatasetFolder(Dataset):
path, target = self.samples[index] path, target = self.samples[index]
sample = self.loader(path) sample = self.loader(path)
if self.transform is not None: if self.transform is not None:
sample = self.transform(sample) sample, target = self.transform(sample, target)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target return sample, target
...@@ -166,3 +164,80 @@ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', ...@@ -166,3 +164,80 @@ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
def cv2_loader(path): def cv2_loader(path):
return cv2.imread(path) return cv2.imread(path)
class ImageFolder(Dataset):
"""A generic data loader where the samples are arranged in this way:
root/1.ext
root/2.ext
root/sub_dir/3.ext
Args:
root (string): Root directory path.
loader (callable, optional): A function to load a sample given its path.
extensions (tuple[string], optional): A list of allowed extensions.
both extensions and is_valid_file should not be passed.
transform (callable, optional): A function/transform that takes in
a sample and returns a transformed version.
is_valid_file (callable, optional): A function that takes path of a file
and check if the file is a valid file (used to check of corrupt files)
both extensions and is_valid_file should not be passed.
Attributes:
samples (list): List of sample path
"""
def __init__(self,
root,
loader=None,
extensions=None,
transform=None,
is_valid_file=None):
self.root = root
if extensions is None:
extensions = IMG_EXTENSIONS
samples = []
path = os.path.expanduser(root)
if not ((extensions is None) ^ (is_valid_file is None)):
raise ValueError(
"Both extensions and is_valid_file cannot be None or not None at the same time"
)
if extensions is not None:
def is_valid_file(x):
return has_valid_extension(x, extensions)
for root, _, fnames in sorted(os.walk(path, followlinks=True)):
for fname in sorted(fnames):
f = os.path.join(root, fname)
if is_valid_file(f):
samples.append(f)
if len(samples) == 0:
raise (RuntimeError(
"Found 0 files in subfolders of: " + self.root + "\n"
"Supported extensions are: " + ",".join(extensions)))
self.loader = cv2_loader if loader is None else loader
self.extensions = extensions
self.samples = samples
self.transform = transform
def __getitem__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
return [sample]
def __len__(self):
return len(self.samples)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import gzip
import struct
import numpy as np
import paddle.dataset.common
from paddle.io import Dataset
from .utils import _check_exists_and_download
__all__ = ["MNIST"]
URL_PREFIX = 'https://dataset.bj.bcebos.com/mnist/'
TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3'
TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c'
TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
class MNIST(Dataset):
"""
Implement of MNIST dataset
Args:
image_path(str): path to image file, can be set None if
:attr:`download` is True. Default None
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether auto download mnist dataset if
:attr:`image_path`/:attr:`label_path` unset. Default
True
Returns:
Dataset: MNIST Dataset.
Examples:
.. code-block:: python
from hapi.vision.datasets import MNIST
mnist = MNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].shape, sample[1])
"""
def __init__(self,
image_path=None,
label_path=None,
mode='train',
transform=None,
download=True):
assert mode.lower() in ['train', 'test'], \
"mode should be 'train' or 'test', but got {}".format(mode)
self.mode = mode.lower()
self.image_path = image_path
if self.image_path is None:
assert download, "image_path not set and auto download disabled"
image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL
image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5
self.image_path = _check_exists_and_download(
image_path, image_url, image_md5, 'mnist', download)
self.label_path = label_path
if self.label_path is None:
assert download, "label_path not set and auto download disabled"
label_url = TRAIN_LABEL_URL if mode == 'train' else TEST_LABEL_URL
label_md5 = TRAIN_LABEL_MD5 if mode == 'train' else TEST_LABEL_MD5
self.label_path = _check_exists_and_download(
label_path, label_url, label_md5, 'mnist', download)
self.transform = transform
# read dataset into memory
self._parse_dataset()
def _parse_dataset(self, buffer_size=100):
self.images = []
self.labels = []
with gzip.GzipFile(self.image_path, 'rb') as image_file:
img_buf = image_file.read()
with gzip.GzipFile(self.label_path, 'rb') as label_file:
lab_buf = label_file.read()
step_label = 0
offset_img = 0
# read from Big-endian
# get file info from magic byte
# image file : 16B
magic_byte_img = '>IIII'
magic_img, image_num, rows, cols = struct.unpack_from(
magic_byte_img, img_buf, offset_img)
offset_img += struct.calcsize(magic_byte_img)
offset_lab = 0
# label file : 8B
magic_byte_lab = '>II'
magic_lab, label_num = struct.unpack_from(magic_byte_lab,
lab_buf, offset_lab)
offset_lab += struct.calcsize(magic_byte_lab)
while True:
if step_label >= label_num:
break
fmt_label = '>' + str(buffer_size) + 'B'
labels = struct.unpack_from(fmt_label, lab_buf, offset_lab)
offset_lab += struct.calcsize(fmt_label)
step_label += buffer_size
fmt_images = '>' + str(buffer_size * rows * cols) + 'B'
images_temp = struct.unpack_from(fmt_images, img_buf,
offset_img)
images = np.reshape(images_temp, (buffer_size, rows *
cols)).astype('float32')
offset_img += struct.calcsize(fmt_images)
images = images / 255.0
images = images * 2.0
images = images - 1.0
for i in range(buffer_size):
self.images.append(images[i, :])
self.labels.append(np.array([labels[i]]))
def __getitem__(self, idx):
image, label = self.images[idx], self.labels[idx]
if self.transform is not None:
image, label = self.transform(image, label)
return image, label
def __len__(self):
return len(self.labels)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import paddle.dataset.common
def _check_exists_and_download(path, url, md5, module_name, download=True):
if path and os.path.exists(path):
return path
if download:
return paddle.dataset.common.download(url, module_name, md5)
else:
raise FileNotFoundError(
'{} not exists and auto download disabled'.format(path))
...@@ -23,7 +23,7 @@ import numpy as np ...@@ -23,7 +23,7 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.layers import collective from paddle.fluid.layers import collective
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
from paddle.fluid.io import BatchSampler from paddle.io import BatchSampler
_parallel_context_initialized = False _parallel_context_initialized = False
...@@ -39,7 +39,7 @@ class DistributedBatchSampler(BatchSampler): ...@@ -39,7 +39,7 @@ class DistributedBatchSampler(BatchSampler):
Dataset is assumed to be of constant size. Dataset is assumed to be of constant size.
Args: Args:
data_source: this could be a `fluid.io.Dataset` implement data_source: this could be a `paddle.io.Dataset` implement
or other python object which implemented or other python object which implemented
`__len__` for BatchSampler to get sample `__len__` for BatchSampler to get sample
number of data source. number of data source.
......
...@@ -29,13 +29,22 @@ from paddle.fluid.dygraph.parallel import ParallelEnv ...@@ -29,13 +29,22 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
__all__ = ['get_weights_path'] __all__ = ['get_weights_path', 'is_url']
WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights") WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/hapi/weights")
DOWNLOAD_RETRY_LIMIT = 3 DOWNLOAD_RETRY_LIMIT = 3
def is_url(path):
"""
Whether path is URL.
Args:
path (string): URL string or not.
"""
return path.startswith('http://') or path.startswith('https://')
def get_weights_path(url, md5sum=None): def get_weights_path(url, md5sum=None):
"""Get weights path from WEIGHT_HOME, if not exists, """Get weights path from WEIGHT_HOME, if not exists,
download it from url. download it from url.
...@@ -62,6 +71,7 @@ def get_path(url, root_dir, md5sum=None, check_exist=True): ...@@ -62,6 +71,7 @@ def get_path(url, root_dir, md5sum=None, check_exist=True):
WEIGHTS_HOME or DATASET_HOME WEIGHTS_HOME or DATASET_HOME
md5sum (str): md5 sum of download package md5sum (str): md5 sum of download package
""" """
assert is_url(url), "downloading from {} not a url".format(url)
# parse path after download to decompress under root_dir # parse path after download to decompress under root_dir
fullpath = map_path(url, root_dir) fullpath = map_path(url, root_dir)
......
...@@ -48,9 +48,16 @@ class Metric(object): ...@@ -48,9 +48,16 @@ class Metric(object):
format(self.__class__.__name__)) format(self.__class__.__name__))
@abc.abstractmethod @abc.abstractmethod
def update(self, *args, **kwargs): def update(self, *args):
""" """
Update states for metric Update states for metric
Inputs of :code:`update` is the outputs of :code:`Metric.add_metric_op`,
if :code:`add_metric_op` is not defined, the inputs of :code:`update`
will be flatten arguments of **output** of mode and **label** from data:
:code:`update(output1, output2, ..., label1, label2,...)`
see :code:`Metric.add_metric_op`
""" """
raise NotImplementedError("function 'update' not implemented in {}.". raise NotImplementedError("function 'update' not implemented in {}.".
format(self.__class__.__name__)) format(self.__class__.__name__))
...@@ -72,11 +79,26 @@ class Metric(object): ...@@ -72,11 +79,26 @@ class Metric(object):
raise NotImplementedError("function 'name' not implemented in {}.". raise NotImplementedError("function 'name' not implemented in {}.".
format(self.__class__.__name__)) format(self.__class__.__name__))
def add_metric_op(self, pred, label): def add_metric_op(self, *args):
""" """
Add process op for metric in program This API is advanced usage to accelerate metric calculating, calulations
from outputs of model to the states which should be updated by Metric can
be defined here, where Paddle OPs is also supported. Outputs of this API
will be the inputs of "Metric.update".
If :code:`add_metric_op` is defined, it will be called with **outputs**
of model and **labels** from data as arguments, all outputs and labels
will be concatenated and flatten and each filed as a separate argument
as follows:
:code:`add_metric_op(output1, output2, ..., label1, label2,...)`
If :code:`add_metric_op` is not defined, default behaviour is to pass
input to output, so output format will be:
:code:`return output1, output2, ..., label1, label2,...`
see :code:`Metric.update`
""" """
return pred, label return args
class Accuracy(Metric): class Accuracy(Metric):
...@@ -91,12 +113,12 @@ class Accuracy(Metric): ...@@ -91,12 +113,12 @@ class Accuracy(Metric):
self._init_name(name) self._init_name(name)
self.reset() self.reset()
def add_metric_op(self, pred, label, *args, **kwargs): def add_metric_op(self, pred, label, *args):
pred = fluid.layers.argsort(pred[0], descending=True)[1][:, :self.maxk] pred = fluid.layers.argsort(pred, descending=True)[1][:, :self.maxk]
correct = pred == label[0] correct = pred == label
return correct return correct
def update(self, correct, *args, **kwargs): def update(self, correct, *args):
accs = [] accs = []
for i, k in enumerate(self.topk): for i, k in enumerate(self.topk):
num_corrects = correct[:, :k].sum() num_corrects = correct[:, :k].sum()
......
...@@ -32,13 +32,16 @@ from paddle.fluid.dygraph.parallel import ParallelEnv ...@@ -32,13 +32,16 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
from paddle.fluid.incubate.fleet.base import role_maker from paddle.fluid.incubate.fleet.base import role_maker
from paddle.fluid.io import DataLoader, Dataset from paddle.io import DataLoader, Dataset
from distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized from hapi.distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
from metrics import Metric from hapi.metrics import Metric
from callbacks import config_callbacks from hapi.callbacks import config_callbacks
__all__ = ['Model', 'Loss', 'CrossEntropy', 'Input', 'set_device'] __all__ = [
'Model', 'Loss', 'CrossEntropy', 'Input', 'set_device',
'SoftmaxWithCrossEntropy'
]
def set_device(device): def set_device(device):
...@@ -64,7 +67,7 @@ def to_list(value): ...@@ -64,7 +67,7 @@ def to_list(value):
if value is None: if value is None:
return value return value
if isinstance(value, (list, tuple)): if isinstance(value, (list, tuple)):
return value return list(value)
return [value] return [value]
...@@ -144,6 +147,17 @@ class CrossEntropy(Loss): ...@@ -144,6 +147,17 @@ class CrossEntropy(Loss):
] ]
class SoftmaxWithCrossEntropy(Loss):
def __init__(self, average=True):
super(SoftmaxWithCrossEntropy, self).__init__()
def forward(self, outputs, labels):
return [
fluid.layers.softmax_with_cross_entropy(
o, l, return_softmax=False) for o, l in zip(outputs, labels)
]
class StaticGraphAdapter(object): class StaticGraphAdapter(object):
def __init__(self, model): def __init__(self, model):
super(StaticGraphAdapter, self).__init__() super(StaticGraphAdapter, self).__init__()
...@@ -179,17 +193,17 @@ class StaticGraphAdapter(object): ...@@ -179,17 +193,17 @@ class StaticGraphAdapter(object):
def mode(self, value): def mode(self, value):
self.model.mode = value self.model.mode = value
def train(self, inputs, labels=None): def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
self.mode = 'train' self.mode = 'train'
return self._run(inputs, labels) return self._run(inputs, labels)
def eval(self, inputs, labels=None): def eval_batch(self, inputs, labels=None):
self.mode = 'eval' self.mode = 'eval'
return self._run(inputs, labels) return self._run(inputs, labels)
def test(self, inputs): def test_batch(self, inputs):
self.mode = 'test' self.mode = 'test'
return self._run(inputs, None) return self._run(inputs, None)
...@@ -360,10 +374,27 @@ class StaticGraphAdapter(object): ...@@ -360,10 +374,27 @@ class StaticGraphAdapter(object):
metric_list, metric_splits = flatten_list(endpoints['metric']) metric_list, metric_splits = flatten_list(endpoints['metric'])
fetch_list = endpoints['loss'] + metric_list fetch_list = endpoints['loss'] + metric_list
num_loss = len(endpoints['loss']) num_loss = len(endpoints['loss'])
# if fetch Variable is same as input Variable, do not fetch
# from program, get it from input directly
pruned_fetch_list = []
pruned_fetch_idx_name_map = [""] * len(fetch_list)
for i, fetch_var in enumerate(fetch_list):
if fetch_var.name in feed.keys():
pruned_fetch_idx_name_map[i] = fetch_var.name
else:
pruned_fetch_list.append(fetch_var)
rets = self._executor.run(compiled_prog, rets = self._executor.run(compiled_prog,
feed=feed, feed=feed,
fetch_list=fetch_list, fetch_list=pruned_fetch_list,
return_numpy=False) return_numpy=False)
# restore pruned fetch_list Variable from feeds
for i, name in enumerate(pruned_fetch_idx_name_map):
if len(name) > 0:
rets.insert(i, feed[name])
# LoDTensor cannot be fetch as numpy directly # LoDTensor cannot be fetch as numpy directly
rets = [np.array(v) for v in rets] rets = [np.array(v) for v in rets]
if self.mode == 'test': if self.mode == 'test':
...@@ -442,7 +473,7 @@ class StaticGraphAdapter(object): ...@@ -442,7 +473,7 @@ class StaticGraphAdapter(object):
if mode != 'test': if mode != 'test':
for metric in self.model._metrics: for metric in self.model._metrics:
metrics.append( metrics.append(
to_list(metric.add_metric_op(outputs, labels))) to_list(metric.add_metric_op(*(outputs + labels))))
if mode == 'train' and self.model._optimizer: if mode == 'train' and self.model._optimizer:
self._loss_endpoint = fluid.layers.sum(losses) self._loss_endpoint = fluid.layers.sum(losses)
...@@ -536,7 +567,7 @@ class DynamicGraphAdapter(object): ...@@ -536,7 +567,7 @@ class DynamicGraphAdapter(object):
self.model.mode = value self.model.mode = value
# TODO multi device in dygraph mode not implemented at present time # TODO multi device in dygraph mode not implemented at present time
def train(self, inputs, labels=None): def train_batch(self, inputs, labels=None):
assert self.model._optimizer, \ assert self.model._optimizer, \
"model not ready, please call `model.prepare()` first" "model not ready, please call `model.prepare()` first"
super(Model, self.model).train() super(Model, self.model).train()
...@@ -562,14 +593,14 @@ class DynamicGraphAdapter(object): ...@@ -562,14 +593,14 @@ class DynamicGraphAdapter(object):
metrics = [] metrics = []
for metric in self.model._metrics: for metric in self.model._metrics:
metric_outs = metric.add_metric_op( metric_outs = metric.add_metric_op(
to_list(outputs), to_list(labels)) *(to_list(outputs) + to_list(labels)))
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)]) m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m) metrics.append(m)
return ([to_numpy(l) for l in losses], metrics) \ return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses] if len(metrics) > 0 else [to_numpy(l) for l in losses]
def eval(self, inputs, labels=None): def eval_batch(self, inputs, labels=None):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'eval' self.mode = 'eval'
inputs = to_list(inputs) inputs = to_list(inputs)
...@@ -601,7 +632,8 @@ class DynamicGraphAdapter(object): ...@@ -601,7 +632,8 @@ class DynamicGraphAdapter(object):
self._merge_count[self.mode + '_total'] += samples self._merge_count[self.mode + '_total'] += samples
self._merge_count[self.mode + '_batch'] = samples self._merge_count[self.mode + '_batch'] = samples
metric_outs = metric.add_metric_op(to_list(outputs), labels) metric_outs = metric.add_metric_op(
*(to_list(outputs) + to_list(labels)))
m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)]) m = metric.update(*[to_numpy(m) for m in to_list(metric_outs)])
metrics.append(m) metrics.append(m)
...@@ -610,7 +642,7 @@ class DynamicGraphAdapter(object): ...@@ -610,7 +642,7 @@ class DynamicGraphAdapter(object):
return ([to_numpy(l) for l in losses], metrics) \ return ([to_numpy(l) for l in losses], metrics) \
if len(metrics) > 0 else [to_numpy(l) for l in losses] if len(metrics) > 0 else [to_numpy(l) for l in losses]
def test(self, inputs): def test_batch(self, inputs):
super(Model, self.model).eval() super(Model, self.model).eval()
self.mode = 'test' self.mode = 'test'
inputs = [to_variable(x) for x in to_list(inputs)] inputs = [to_variable(x) for x in to_list(inputs)]
...@@ -709,14 +741,14 @@ class Model(fluid.dygraph.Layer): ...@@ -709,14 +741,14 @@ class Model(fluid.dygraph.Layer):
else: else:
self._adapter = StaticGraphAdapter(self) self._adapter = StaticGraphAdapter(self)
def train(self, *args, **kwargs): def train_batch(self, *args, **kwargs):
return self._adapter.train(*args, **kwargs) return self._adapter.train_batch(*args, **kwargs)
def eval(self, *args, **kwargs): def eval_batch(self, *args, **kwargs):
return self._adapter.eval(*args, **kwargs) return self._adapter.eval_batch(*args, **kwargs)
def test(self, *args, **kwargs): def test_batch(self, *args, **kwargs):
return self._adapter.test(*args, **kwargs) return self._adapter.test_batch(*args, **kwargs)
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if ParallelEnv().local_rank == 0: if ParallelEnv().local_rank == 0:
...@@ -767,6 +799,13 @@ class Model(fluid.dygraph.Layer): ...@@ -767,6 +799,13 @@ class Model(fluid.dygraph.Layer):
format(key, list(state.shape), list(param.shape))) format(key, list(state.shape), list(param.shape)))
return param, state return param, state
def _strip_postfix(path):
path, ext = os.path.splitext(path)
assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
"Unknown postfix {} from weights".format(ext)
return path
path = _strip_postfix(path)
param_state = _load_state_from_path(path + ".pdparams") param_state = _load_state_from_path(path + ".pdparams")
assert param_state, "Failed to load parameters, please check path." assert param_state, "Failed to load parameters, please check path."
...@@ -777,7 +816,7 @@ class Model(fluid.dygraph.Layer): ...@@ -777,7 +816,7 @@ class Model(fluid.dygraph.Layer):
except ValueError as err: except ValueError as err:
if skip_mismatch: if skip_mismatch:
warnings.warn( warnings.warn(
("Skip loading for {}. ".format(key) + err.message)) ("Skip loading for {}. ".format(key) + str(err)))
# reset optimizer when mismatch happens # reset optimizer when mismatch happens
reset_optimizer = True reset_optimizer = True
else: else:
...@@ -896,36 +935,36 @@ class Model(fluid.dygraph.Layer): ...@@ -896,36 +935,36 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
train_data (Dataset|DataLoader): An iterable data loader is used for train_data (Dataset|DataLoader): An iterable data loader is used for
train. An instance of paddle.fluid.io.Dataset or train. An instance of paddle paddle.io.Dataset or
paddle.fluid.io.Dataloader is recomended. paddle.io.Dataloader is recomended. Default: None.
eval_data (Dataset|DataLoader): An iterable data loader is used for eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation at the end of epoch. If None, will not do evaluation. evaluation at the end of epoch. If None, will not do evaluation.
An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended. is recomended. Default: None.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored. parameter will be ignored. Default: 1.
epochs (int): Integer number. The number of epochs to train the model. epochs (int): Integer number. The number of epochs to train the model. Default: 1.
eval_freq (int): The frequency, in number of epochs, an evalutation eval_freq (int): The frequency, in number of epochs, an evalutation
is performed. is performed. Default: 1.
log_freq (int): The frequency, in number of steps, the training logs log_freq (int): The frequency, in number of steps, the training logs
are printed. are printed. Default: 10.
save_dir(str|None): The directory to save checkpoint during training. save_dir(str|None): The directory to save checkpoint during training.
If None, will not save checkpoint. If None, will not save checkpoint. Default: None.
save_freq (int): The frequency, in number of epochs, to save checkpoint. save_freq (int): The frequency, in number of epochs, to save checkpoint. Default: 1.
verbose (int): The verbosity mode, should be 0, 1, or 2. verbose (int): The verbosity mode, should be 0, 1, or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
drop_last (bool): whether drop the last incomplete batch of train_data drop_last (bool): whether drop the last incomplete batch of train_data
when dataset size is not divisible by the batch size. When train_data when dataset size is not divisible by the batch size. When train_data
is an instance of Dataloader, this parameter will be ignored. is an instance of Dataloader, this parameter will be ignored. Default: False.
shuffle (bool): whther to shuffle train_data. When train_data is an instance shuffle (bool): whther to shuffle train_data. When train_data is an instance
of Dataloader, this parameter will be ignored. of Dataloader, this parameter will be ignored. Default: True.
num_workers (int): the number of subprocess to load data, 0 for no subprocess num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored. both the instance of Dataloader, this parameter will be ignored. Default: 0.
callbacks (Callback|None): A list of `Callback` instances to apply callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint` during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted. are automatically inserted. Default: None.
""" """
assert train_data is not None, \ assert train_data is not None, \
...@@ -1024,21 +1063,23 @@ class Model(fluid.dygraph.Layer): ...@@ -1024,21 +1063,23 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
eval_data (Dataset|DataLoader): An iterable data loader is used for eval_data (Dataset|DataLoader): An iterable data loader is used for
evaluation. An instance of paddle.fluid.io.Dataset or evaluation. An instance of paddle.io.Dataset or
paddle.fluid.io.Dataloader is recomended. paddle.io.Dataloader is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this When eval_data is the instance of Dataloader, this argument will be ignored.
parameter will be ignored. Default: 1.
log_freq (int): The frequency, in number of steps, the eval logs log_freq (int): The frequency, in number of steps, the eval logs
are printed. are printed. Default: 10.
verbose (int): The verbosity mode, should be 0, 1, or 2. verbose (int): The verbosity mode, should be 0, 1, or 2.
0 = silent, 1 = progress bar, 2 = one line per epoch. 0 = silent, 1 = progress bar, 2 = one line per epoch. Default: 2.
num_workers (int): The number of subprocess to load data, 0 for no subprocess num_workers (int): The number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored. both the instance of Dataloader, this parameter will be ignored. Default: 0.
callbacks (Callback|None): A list of `Callback` instances to apply callbacks (Callback|None): A list of `Callback` instances to apply
during training. If None, `ProgBarLogger` and `ModelCheckpoint` during training. If None, `ProgBarLogger` and `ModelCheckpoint`
are automatically inserted. are automatically inserted. Default: None.
Returns:
dict: Result of metric.
""" """
if fluid.in_dygraph_mode(): if fluid.in_dygraph_mode():
...@@ -1099,26 +1140,28 @@ class Model(fluid.dygraph.Layer): ...@@ -1099,26 +1140,28 @@ class Model(fluid.dygraph.Layer):
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
test_data (Dataset|DataLoader): An iterable data loader is used for test_data (Dataset|DataLoader): An iterable data loader is used for
predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader predict. An instance of paddle.io.Dataset or paddle.io.Dataloader
is recomended. is recomended.
batch_size (int): Integer number. The batch size of train_data and eval_data. batch_size (int): Integer number. The batch size of train_data and eval_data.
When train_data and eval_data are both the instance of Dataloader, this When train_data and eval_data are both the instance of Dataloader, this
parameter will be ignored. argument will be ignored. Default: 1.
num_workers (int): the number of subprocess to load data, 0 for no subprocess num_workers (int): the number of subprocess to load data, 0 for no subprocess
used and loading data in main process. When train_data and eval_data are used and loading data in main process. When train_data and eval_data are
both the instance of Dataloader, this parameter will be ignored. both the instance of Dataloader, this argument will be ignored. Default: 0.
stack_output (bool): whether stack output field like a batch, as for an output stack_output (bool): whether stack output field like a batch, as for an output
filed of a sample is in shape [X, Y], test_data contains N samples, predict filed of a sample is in shape [X, Y], test_data contains N samples, predict
output field will be in shape [N, X, Y] if stack_output is True, and will output field will be in shape [N, X, Y] if stack_output is True, and will
be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs be a length N list in shape [[X, Y], [X, Y], ....[X, Y]] if stack_outputs
is False. stack_outputs as False is used for LoDTensor output situation, is False. stack_outputs as False is used for LoDTensor output situation,
it is recommended set as True if outputs contains no LoDTensor. Default False it is recommended set as True if outputs contains no LoDTensor. Default: False.
Returns:
list: output of models.
""" """
if fluid.in_dygraph_mode(): if fluid.in_dygraph_mode():
feed_list = None feed_list = None
else: else:
feed_list = [x.forward() for x in self._inputs + self._labels] feed_list = [x.forward() for x in self._inputs]
if test_data is not None and isinstance(test_data, Dataset): if test_data is not None and isinstance(test_data, Dataset):
test_sampler = DistributedBatchSampler( test_sampler = DistributedBatchSampler(
...@@ -1142,7 +1185,7 @@ class Model(fluid.dygraph.Layer): ...@@ -1142,7 +1185,7 @@ class Model(fluid.dygraph.Layer):
outputs = [] outputs = []
for data in tqdm.tqdm(loader): for data in tqdm.tqdm(loader):
data = flatten(data) data = flatten(data)
outputs.append(self.test(data[:len(self._inputs)])) outputs.append(self.test_batch(data[:len(self._inputs)]))
# NOTE: for lod tensor output, we should not stack outputs # NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info # for stacking may loss its detail info
...@@ -1156,18 +1199,6 @@ class Model(fluid.dygraph.Layer): ...@@ -1156,18 +1199,6 @@ class Model(fluid.dygraph.Layer):
outputs = [o[:len(test_loader.dataset)] for o in outputs] outputs = [o[:len(test_loader.dataset)] for o in outputs]
return outputs return outputs
def set_eval_data(self, eval_data):
"""
Args:
eval_data (Dataset|DataLoader|None): An iterable data loader is used for
eval. An instance of paddle.fluid.io.Dataset or
paddle.fluid.io.Dataloader is recomended.
"""
assert isinstance(
eval_data,
DataLoader), "eval_data must be a instance of Dataloader!"
self._test_dataloader = eval_data
def _run_one_epoch(self, def _run_one_epoch(self,
data_loader, data_loader,
callbacks, callbacks,
...@@ -1204,11 +1235,11 @@ class Model(fluid.dygraph.Layer): ...@@ -1204,11 +1235,11 @@ class Model(fluid.dygraph.Layer):
callbacks.on_batch_begin(mode, step, logs) callbacks.on_batch_begin(mode, step, logs)
if mode == 'train': if mode == 'train':
outs = self.train(data[:len(self._inputs)], outs = self.train_batch(data[:len(self._inputs)],
data[len(self._inputs):]) data[len(self._inputs):])
else: else:
outs = self.eval(data[:len(self._inputs)], outs = self.eval_batch(data[:len(self._inputs)],
data[len(self._inputs):]) data[len(self._inputs):])
# losses # losses
loss = outs[0] if self._metrics else outs loss = outs[0] if self._metrics else outs
...@@ -1236,7 +1267,7 @@ class Model(fluid.dygraph.Layer): ...@@ -1236,7 +1267,7 @@ class Model(fluid.dygraph.Layer):
if mode == 'train': if mode == 'train':
assert epoch is not None, 'when mode is train, epoch must be given' assert epoch is not None, 'when mode is train, epoch must be given'
callbacks.on_epoch_end(epoch) callbacks.on_epoch_end(epoch, logs)
return logs return logs
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.text.text import RNNCell as RNNCell
from hapi.text.text import BasicLSTMCell as BasicLSTMCell
from hapi.text.text import BasicGRUCell as BasicGRUCell
from hapi.text.text import RNN as RNN
from hapi.text.text import DynamicDecode as DynamicDecode
from hapi.text.text import BeamSearchDecoder as BeamSearchDecoder
from hapi.text.text import MultiHeadAttention as MultiHeadAttention
from hapi.text.text import FFN as FFN
from hapi.text.text import TransformerEncoderLayer as TransformerEncoderLayer
from hapi.text.text import TransformerDecoderLayer as TransformerDecoderLayer
from hapi.text.text import TransformerEncoder as TransformerEncoder
from hapi.text.text import TransformerDecoder as TransformerDecoder
from hapi.text.text import TransformerBeamSearchDecoder as TransformerBeamSearchDecoder
from hapi.text.text import GRUCell as GRUCell
from hapi.text.text import GRUEncoderCell as GRUEncoderCell
from hapi.text.text import BiGRU as BiGRU
from hapi.text.text import Linear_chain_crf as Linear_chain_crf
from hapi.text.text import Crf_decoding as Crf_decoding
from hapi.text.text import SequenceTagging as SequenceTagging
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.text.bert.bert import BertConfig as BertConfig
from hapi.text.bert.optimization import Optimizer as Optimizer
from hapi.text.bert.dataloader import BertDataLoader as BertDataLoader
from hapi.text.bert.dataloader import BertInputExample as BertInputExample
from hapi.text.tokenizer import tokenization as tokenization
from hapi.text.bert.bert import BertEncoder as BertEncoder
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mask, padding and batching."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
"""
Add mask for batch_tokens, return out, mask_label, mask_pos;
Note: mask_pos responding the batch_tokens after padded;
"""
max_len = max([len(sent) for sent in batch_tokens])
mask_label = []
mask_pos = []
prob_mask = np.random.rand(total_token_num)
# Note: the first token is [CLS], so [low=1]
replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num)
pre_sent_len = 0
prob_index = 0
for sent_index, sent in enumerate(batch_tokens):
mask_flag = False
prob_index += pre_sent_len
for token_index, token in enumerate(sent):
prob = prob_mask[prob_index + token_index]
if prob > 0.15:
continue
elif 0.03 < prob <= 0.15:
# mask
if token != SEP and token != CLS:
mask_label.append(sent[token_index])
sent[token_index] = MASK
mask_flag = True
mask_pos.append(sent_index * max_len + token_index)
elif 0.015 < prob <= 0.03:
# random replace
if token != SEP and token != CLS:
mask_label.append(sent[token_index])
sent[token_index] = replace_ids[prob_index + token_index]
mask_flag = True
mask_pos.append(sent_index * max_len + token_index)
else:
# keep the original token
if token != SEP and token != CLS:
mask_label.append(sent[token_index])
mask_pos.append(sent_index * max_len + token_index)
pre_sent_len = len(sent)
# ensure at least mask one word in a sentence
while not mask_flag:
token_index = int(np.random.randint(1, high=len(sent) - 1, size=1))
if sent[token_index] != SEP and sent[token_index] != CLS:
mask_label.append(sent[token_index])
sent[token_index] = MASK
mask_flag = True
mask_pos.append(sent_index * max_len + token_index)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
return batch_tokens, mask_label, mask_pos
def prepare_batch_data(insts,
total_token_num,
voc_size=0,
pad_id=None,
cls_id=None,
sep_id=None,
mask_id=None,
return_input_mask=True,
return_max_len=True,
return_num_token=False):
"""
1. generate Tensor of data
2. generate Tensor of position
3. generate self attention mask, [shape: batch_size * max_len * max_len]
"""
batch_src_ids = [inst[0] for inst in insts]
batch_pos_ids = [inst[1] for inst in insts]
batch_sent_ids = [inst[2] for inst in insts]
labels_list = []
# compatible with squad, whose example includes start/end positions,
# or unique id
for i in range(3, len(insts[0]), 1):
labels = [inst[i] for inst in insts]
labels = np.array(labels).astype("int64").reshape([-1, 1])
labels_list.append(labels)
# First step: do mask without padding
if mask_id >= 0:
out, mask_label, mask_pos = mask(
batch_src_ids,
total_token_num,
vocab_size=voc_size,
CLS=cls_id,
SEP=sep_id,
MASK=mask_id)
else:
out = batch_src_ids
# Second step: padding
src_id, self_input_mask = pad_batch_data(
out, pad_idx=pad_id, return_input_mask=True)
pos_id = pad_batch_data(
batch_pos_ids,
pad_idx=pad_id,
return_pos=False,
return_input_mask=False)
sent_id = pad_batch_data(
batch_sent_ids,
pad_idx=pad_id,
return_pos=False,
return_input_mask=False)
if mask_id >= 0:
return_list = [
src_id, pos_id, sent_id, self_input_mask, mask_label, mask_pos
] + labels_list
else:
return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list
return return_list if len(return_list) > 1 else return_list[0]
def pad_batch_data(insts,
pad_idx=0,
return_pos=False,
return_input_mask=False,
return_max_len=False,
return_num_token=False):
"""
Pad the instances to the max sequence length in batch, and generate the
corresponding position data and input mask.
"""
return_list = []
max_len = max(len(inst) for inst in insts)
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
])
return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data
if return_pos:
inst_pos = np.array([
list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask:
# This is used to avoid attention on paddings.
input_mask_data = np.array([[1] * len(inst) + [0] *
(max_len - len(inst)) for inst in insts])
input_mask_data = np.expand_dims(input_mask_data, axis=-1)
return_list += [input_mask_data.astype("float32")]
if return_max_len:
return_list += [max_len]
if return_num_token:
num_token = 0
for inst in insts:
num_token += len(inst)
return_list += [num_token]
return return_list if len(return_list) > 1 else return_list[0]
if __name__ == "__main__":
pass
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"bert"
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
from hapi.text.text import PrePostProcessLayer, TransformerEncoder
from hapi.text.bert.utils.init import init_from_static_model
class BertConfig(object):
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except Exception:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class BertEncoder(Layer):
"""
bert
"""
def __init__(self, config, return_pooled_out=True, use_fp16=False):
super(BertEncoder, self).__init__()
self.config = config
self._emb_size = config['hidden_size']
self._n_layer = config['num_hidden_layers']
self._n_head = config['num_attention_heads']
self._voc_size = config['vocab_size']
self._max_position_seq_len = config['max_position_embeddings']
self._sent_types = config['type_vocab_size']
self._hidden_act = config['hidden_act']
self._prepostprocess_dropout = config['hidden_dropout_prob']
self._attention_dropout = config['attention_probs_dropout_prob']
self.return_pooled_out = return_pooled_out
self._word_emb_name = "word_embedding"
self._pos_emb_name = "pos_embedding"
self._sent_emb_name = "sent_embedding"
self._dtype = "float16" if use_fp16 else "float32"
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range'])
self._src_emb = Embedding(
size=[self._voc_size, self._emb_size],
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
dtype=self._dtype)
self._pos_emb = Embedding(
size=[self._max_position_seq_len, self._emb_size],
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer),
dtype=self._dtype)
self._sent_emb = Embedding(
size=[self._sent_types, self._emb_size],
param_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer),
dtype=self._dtype)
self.pooled_fc = Linear(
input_dim=self._emb_size,
output_dim=self._emb_size,
param_attr=fluid.ParamAttr(
name="pooled_fc.w_0", initializer=self._param_initializer),
bias_attr="pooled_fc.b_0",
act="tanh")
self.pre_process_layer = PrePostProcessLayer(
"nd", self._emb_size, self._prepostprocess_dropout, None)
self._encoder = TransformerEncoder(
n_layer=self._n_layer,
n_head=self._n_head,
d_key=self._emb_size // self._n_head,
d_value=self._emb_size // self._n_head,
d_model=self._emb_size,
d_inner_hid=self._emb_size * 4,
prepostprocess_dropout=self._prepostprocess_dropout,
attention_dropout=self._attention_dropout,
relu_dropout=0,
preprocess_cmd="",
postprocess_cmd="dan",
ffn_fc1_act=self._hidden_act)
def init_parameters(self, param_path="", verbose=False):
init_from_static_model(param_path, self, self.config, verbose)
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
src_emb = self._src_emb(src_ids)
pos_emb = self._pos_emb(position_ids)
sent_emb = self._sent_emb(sentence_ids)
emb_out = src_emb + pos_emb
emb_out = emb_out + sent_emb
emb_out = self.pre_process_layer(emb_out)
self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True)
self_attn_mask = fluid.layers.scale(
x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
n_head_self_attn_mask = fluid.layers.stack(
x=[self_attn_mask] * self._n_head, axis=1)
n_head_self_attn_mask.stop_gradient = True
enc_output = self._encoder(emb_out, n_head_self_attn_mask)
if not self.return_pooled_out:
return enc_output
next_sent_feat = fluid.layers.slice(
input=enc_output, axes=[1], starts=[0], ends=[1])
next_sent_feat = self.pooled_fc(next_sent_feat)
next_sent_feat = fluid.layers.reshape(
next_sent_feat, shape=[-1, self._emb_size])
return enc_output, next_sent_feat
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import types
import csv
import numpy as np
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert.batching import prepare_batch_data
class DataProcessor(object):
"""Base class for data converters for sequence classification data sets."""
def __init__(self, tokenizer, max_seq_len, in_tokens, random_seed=None):
self.max_seq_len = max_seq_len
self.tokenizer = tokenizer
self.vocab = self.tokenizer.vocab
self.in_tokens = in_tokens
np.random.seed(random_seed)
self.current_train_example = -1
self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
self.current_train_epoch = -1
def get_train_iter(self,
data_dir,
epoch_num=1,
shuffle=True,
shuffle_seed=None):
"""Gets a collection of `InputExample`s for the train set."""
raise NotImplementedError()
def get_dev_iter(self, data_dir):
"""Gets a collection of `InputExample`s for the dev set."""
raise NotImplementedError()
def get_test_iter(self, data_dir):
"""Gets a collection of `InputExample`s for prediction."""
raise NotImplementedError()
def get_labels(self):
"""Gets the list of labels for this data set."""
raise NotImplementedError()
def convert_example(self, index, example, labels, max_seq_len, tokenizer):
"""Converts a single `InputExample` into a single `InputFeatures`."""
feature = convert_single_example(index, example, labels, max_seq_len,
tokenizer)
return feature
def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file."""
with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = []
for line in reader:
lines.append(line)
return lines
def generate_instance(self, feature):
"""
generate instance with given feature
Args:
feature: InputFeatures(object). A single set of features of data.
"""
input_pos = list(range(len(feature.input_ids)))
return [
feature.input_ids, feature.segment_ids, input_pos, feature.label_id
]
def generate_batch_data(self,
batch_data,
total_token_num,
voc_size=-1,
mask_id=-1,
return_input_mask=True,
return_max_len=False,
return_num_token=False):
return prepare_batch_data(
batch_data,
total_token_num,
voc_size=-1,
pad_id=self.vocab["[PAD]"],
cls_id=self.vocab["[CLS]"],
sep_id=self.vocab["[SEP]"],
mask_id=-1,
return_input_mask=True,
return_max_len=False,
return_num_token=False)
def get_num_examples(self, phase):
"""Get number of examples for train, dev or test."""
if phase not in ['train', 'dev', 'test']:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'test'].")
if phase == 'train':
return len(self.train_examples)
elif phase == 'dev':
return len(self.dev_examples)
elif phase == 'test':
return len(self.test_examples)
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'test'].")
def get_train_progress(self):
"""Gets progress for training phase."""
return self.current_train_example, self.current_train_epoch
def data_generator(self, data_iter, batch_size, phase='train',
dev_count=1):
"""
Generate data for train, dev or test.
Args:
batch_size: int. The batch size of generated data.
phase: string. The phase for which to generate data.
"""
assert phase in ['train', 'dev', 'test']
if phase == 'train':
sample_num = len(self.train_examples)
elif phase == 'dev':
sample_num = len(self.dev_examples)
elif phase == 'test':
sample_num = len(self.test_examples)
else:
sample_num = -1
self.num_examples[phase] = sample_num
def instance_reader():
for epoch_idx, example_idx, example in data_iter():
if phase == 'train':
self.current_train_epoch = epoch_idx
self.current_train_example = example_idx
feature = self.convert_example(
example_idx, example,
self.get_labels(), self.max_seq_len, self.tokenizer)
instance = self.generate_instance(feature)
yield instance
def batch_reader(reader, batch_size, in_tokens):
batch, total_token_num, max_len = [], 0, 0
for instance in reader():
token_ids, sent_ids, pos_ids, label = instance[:4]
max_len = max(max_len, len(token_ids))
if in_tokens:
to_append = (len(batch) + 1) * max_len <= batch_size
else:
to_append = len(batch) < batch_size
if to_append:
batch.append(instance)
total_token_num += len(token_ids)
else:
yield batch, total_token_num
batch, total_token_num, max_len = [instance], len(
token_ids), len(token_ids)
if len(batch) > 0:
yield batch, total_token_num
def wrapper():
all_dev_batches = []
for batch_data, total_token_num in batch_reader(
instance_reader, batch_size, self.in_tokens):
batch_data = self.generate_batch_data(
batch_data,
total_token_num,
voc_size=-1,
mask_id=-1,
return_input_mask=True,
return_max_len=False,
return_num_token=False)
if len(all_dev_batches) < dev_count:
all_dev_batches.append(batch_data)
if len(all_dev_batches) == dev_count:
for batch in all_dev_batches:
yield batch
all_dev_batches = []
return wrapper
class InputExample(object):
"""A single training/test example for simple sequence classification."""
def __init__(self, guid, text_a, text_b=None, label=None):
"""Constructs a InputExample.
Args:
guid: Unique id for the example.
text_a: string. The untokenized text of the first sequence. For single
sequence tasks, only this sequence must be specified.
text_b: (Optional) string. The untokenized text of the second sequence.
Only must be specified for sequence pair tasks.
label: (Optional) string. The label of the example. This should be
specified for train and dev examples, but not for test examples.
"""
self.guid = guid
self.text_a = text_a
self.text_b = text_b
self.label = label
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
"""Truncates a sequence pair in place to the maximum length."""
# This is a simple heuristic which will always truncate the longer sequence
# one token at a time. This makes more sense than truncating an equal percent
# of tokens from each, since if one sequence is very short then each token
# that's truncated likely contains more information than a longer sequence.
while True:
total_length = len(tokens_a) + len(tokens_b)
if total_length <= max_length:
break
if len(tokens_a) > len(tokens_b):
tokens_a.pop()
else:
tokens_b.pop()
class InputFeatures(object):
"""A single set of features of data."""
def __init__(self, input_ids, input_mask, segment_ids, label_id):
self.input_ids = input_ids
self.input_mask = input_mask
self.segment_ids = segment_ids
self.label_id = label_id
class XnliProcessor(DataProcessor):
"""Processor for the XNLI data set."""
def get_train_iter(self,
data_dir,
epoch_num=1,
shuffle=True,
shuffle_seed=None):
"""See base class."""
self.language = "zh"
lines = self._read_tsv(
os.path.join(data_dir, "multinli", "multinli.train.%s.tsv" %
self.language))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "train-%d" % (i)
text_a = tokenization.convert_to_unicode(line[0])
text_b = tokenization.convert_to_unicode(line[1])
label = tokenization.convert_to_unicode(line[2])
if label == tokenization.convert_to_unicode("contradictory"):
label = tokenization.convert_to_unicode("contradiction")
examples.append(
InputExample(
guid=guid, text_a=text_a, text_b=text_b, label=label))
self.train_examples = examples
def wrapper():
if shuffle:
if shuffle_seed is not None:
np.random.seed(shuffle_seed)
for epoch_idx in range(epoch_num):
if shuffle:
np.random.shuffle(examples)
for (example_idx, example) in enumerate(examples):
yield epoch_idx, example_idx, example
return wrapper
def get_dev_iter(self, data_dir):
"""See base class."""
self.language = "zh"
lines = self._read_tsv(os.path.join(data_dir, "xnli.dev.tsv"))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "dev-%d" % (i)
language = tokenization.convert_to_unicode(line[0])
if language != tokenization.convert_to_unicode(self.language):
continue
text_a = tokenization.convert_to_unicode(line[6])
text_b = tokenization.convert_to_unicode(line[7])
label = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(
guid=guid, text_a=text_a, text_b=text_b, label=label))
self.dev_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_test_iter(self, data_dir):
"""See base class."""
self.language = "zh"
lines = self._read_tsv(os.path.join(data_dir, "xnli.test.tsv"))
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "test-%d" % (i)
language = tokenization.convert_to_unicode(line[0])
if language != tokenization.convert_to_unicode(self.language):
continue
text_a = tokenization.convert_to_unicode(line[6])
text_b = tokenization.convert_to_unicode(line[7])
label = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(
guid=guid, text_a=text_a, text_b=text_b, label=label))
self.test_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_labels(self):
"""See base class."""
return ["contradiction", "entailment", "neutral"]
class MnliProcessor(DataProcessor):
"""Processor for the MultiNLI data set (GLUE version)."""
def get_train_iter(self,
data_dir,
epoch_num=1,
shuffle=True,
shuffle_seed=None):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
self.train_examples = examples
def wrapper():
if shuffle:
if shuffle_seed is not None:
np.random.seed(shuffle_seed)
for epoch_idx in range(epoch_num):
if shuffle:
np.random.shuffle(examples)
for (example_idx, example) in enumerate(examples):
yield epoch_idx, example_idx, example
return wrapper
def get_dev_iter(self, data_dir):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")),
"dev_matched")
self.dev_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_test_iter(self, data_dir):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "test_matched.tsv")), "test")
self.test_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_labels(self):
"""See base class."""
return ["contradiction", "entailment", "neutral"]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type,
tokenization.convert_to_unicode(line[0]))
text_a = tokenization.convert_to_unicode(line[8])
text_b = tokenization.convert_to_unicode(line[9])
if set_type == "test":
label = "contradiction"
else:
label = tokenization.convert_to_unicode(line[-1])
examples.append(
InputExample(
guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
class MrpcProcessor(DataProcessor):
"""Processor for the MRPC data set (GLUE version)."""
def get_train_iter(self,
data_dir,
epoch_num=1,
shuffle=True,
shuffle_seed=None):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
self.train_examples = examples
def wrapper():
if shuffle:
if shuffle_seed is not None:
np.random.seed(shuffle_seed)
for epoch_idx in range(epoch_num):
if shuffle:
np.random.shuffle(examples)
for (example_idx, example) in enumerate(examples):
yield epoch_idx, example_idx, example
return wrapper
def get_dev_examples(self, data_dir):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
self.dev_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_test_examples(self, data_dir):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
self.test_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_labels(self):
"""See base class."""
return ["0", "1"]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, i)
text_a = tokenization.convert_to_unicode(line[3])
text_b = tokenization.convert_to_unicode(line[4])
if set_type == "test":
label = "0"
else:
label = tokenization.convert_to_unicode(line[0])
examples.append(
InputExample(
guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
class ColaProcessor(DataProcessor):
"""Processor for the CoLA data set (GLUE version)."""
def get_train_iter(self,
data_dir,
epoch_num=1,
shuffle=True,
shuffle_seed=None):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
self.train_examples = examples
def wrapper():
if shuffle:
if shuffle_seed is not None:
np.random.seed(shuffle_seed)
for epoch_idx in range(epoch_num):
if shuffle:
np.random.shuffle(examples)
for (example_idx, example) in enumerate(examples):
yield epoch_idx, example_idx, example
return wrapper
def get_dev_iter(self, data_dir):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
self.dev_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_test_iter(self, data_dir):
"""See base class."""
examples = self._create_examples(
self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
self.test_examples = examples
def wrapper():
for (example_idx, example) in enumerate(examples):
yield 0, example_idx, example
return wrapper
def get_labels(self):
"""See base class."""
return ["0", "1"]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
# Only the test set has a header
if set_type == "test" and i == 0:
continue
guid = "%s-%s" % (set_type, i)
if set_type == "test":
text_a = tokenization.convert_to_unicode(line[1])
label = "0"
else:
text_a = tokenization.convert_to_unicode(line[3])
label = tokenization.convert_to_unicode(line[1])
examples.append(
InputExample(
guid=guid, text_a=text_a, text_b=None, label=label))
return examples
def convert_single_example_to_unicode(guid, single_example):
text_a = tokenization.convert_to_unicode(single_example[0])
text_b = tokenization.convert_to_unicode(single_example[1])
label = tokenization.convert_to_unicode(single_example[2])
return InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)
def convert_single_example(ex_index, example, label_list, max_seq_length,
tokenizer):
"""Converts a single `InputExample` into a single `InputFeatures`."""
label_map = {}
for (i, label) in enumerate(label_list):
label_map[label] = i
tokens_a = tokenizer.tokenize(example.text_a)
tokens_b = None
if example.text_b:
tokens_b = tokenizer.tokenize(example.text_b)
if tokens_b:
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
else:
# Account for [CLS] and [SEP] with "- 2"
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0:(max_seq_length - 2)]
# The convention in BERT is:
# (a) For sequence pairs:
# tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
# type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1
# (b) For single sequences:
# tokens: [CLS] the dog is hairy . [SEP]
# type_ids: 0 0 0 0 0 0 0
#
# Where "type_ids" are used to indicate whether this is the first
# sequence or the second sequence. The embedding vectors for `type=0` and
# `type=1` were learned during pre-training and are added to the wordpiece
# embedding vector (and position vector). This is not *strictly* necessary
# since the [SEP] token unambiguously separates the sequences, but it makes
# it easier for the model to learn the concept of sequences.
#
# For classification tasks, the first vector (corresponding to [CLS]) is
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
if tokens_b:
for token in tokens_b:
tokens.append(token)
segment_ids.append(1)
tokens.append("[SEP]")
segment_ids.append(1)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
input_mask = [1] * len(input_ids)
label_id = label_map[example.label]
feature = InputFeatures(
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids,
label_id=label_id)
return feature
def convert_examples_to_features(examples, label_list, max_seq_length,
tokenizer):
"""Convert a set of `InputExample`s to a list of `InputFeatures`."""
features = []
for (ex_index, example) in enumerate(examples):
if ex_index % 10000 == 0:
print("Writing example %d of %d" % (ex_index, len(examples)))
feature = convert_single_example(ex_index, example, label_list,
max_seq_length, tokenizer)
features.append(feature)
return features
if __name__ == '__main__':
print("hello world")
pass
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import six
import csv
import glob
import tarfile
import itertools
import leveldb
from functools import partial
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.io import BatchSampler, DataLoader, Dataset
from hapi.distributed import DistributedBatchSampler
from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
from hapi.text.bert.batching import prepare_batch_data
import hapi.text.tokenizer.tokenization as tokenization
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
__all__ = [
'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset',
'SentencePairDataset', 'BertDataLoader'
]
class BertInputExample(object):
def __init__(self, uid, text_a, text_b=None, label=None):
self.uid = uid
self.text_a = text_a
self.text_b = text_b
self.label = label
class BertInputFeatures(object):
def __init__(self, input_ids, input_mask, segment_ids, label_id):
self.input_ids = input_ids
self.pos_ids = list(range(len(self.input_ids)))
self.input_mask = input_mask
self.segment_ids = segment_ids
self.label_id = label_id
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
"""Truncates a sequence pair in place to the maximum length."""
# This is a simple heuristic which will always truncate the longer sequence
# one token at a time. This makes more sense than truncating an equal percent
# of tokens from each, since if one sequence is very short then each token
# that's truncated likely contains more information than a longer sequence.
while True:
total_length = len(tokens_a) + len(tokens_b)
if total_length <= max_length:
break
if len(tokens_a) > len(tokens_b):
tokens_a.pop()
else:
tokens_b.pop()
def convert_single_example_to_unicode(guid, single_example):
text_a = tokenization.convert_to_unicode(single_example[0])
text_b = tokenization.convert_to_unicode(single_example[1])
label = tokenization.convert_to_unicode(single_example[2])
return BertInputExample(uid=uid, text_a=text_a, text_b=text_b, label=label)
def convert_single_example(ex_index, example, label_list, max_seq_length,
tokenizer):
"""Converts a single `BertInputExample` into a single `BertInputFeatures`."""
label_map = {}
for (i, label) in enumerate(label_list):
label_map[label] = i
tokens_a = tokenizer.tokenize(example.text_a)
tokens_b = None
if example.text_b:
tokens_b = tokenizer.tokenize(example.text_b)
if tokens_b:
# Modifies `tokens_a` and `tokens_b` in place so that the total
# length is less than the specified length.
# Account for [CLS], [SEP], [SEP] with "- 3"
_truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
else:
# Account for [CLS] and [SEP] with "- 2"
if len(tokens_a) > max_seq_length - 2:
tokens_a = tokens_a[0:(max_seq_length - 2)]
tokens = []
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in tokens_a:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
if tokens_b:
for token in tokens_b:
tokens.append(token)
segment_ids.append(1)
tokens.append("[SEP]")
segment_ids.append(1)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
input_mask = [1] * len(input_ids)
label_id = label_map[example.label]
feature = BertInputFeatures(
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids,
label_id=label_id)
return feature
def convert_examples_to_features(examples, label_list, max_seq_length,
tokenizer):
"""Convert a set of `InputExample`s to a list of `InputFeatures`."""
features = []
for (ex_index, example) in enumerate(examples):
if ex_index % 10000 == 0:
print("Writing example %d of %d" % (ex_index, len(examples)))
feature = convert_single_example(ex_index, example, label_list,
max_seq_length, tokenizer)
features.append(feature)
return features
def _read_tsv(input_file, delimiter="\t", quotechar=None):
"""Reads a tab separated value file."""
with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(f, delimiter=delimiter, quotechar=quotechar)
lines = []
for line in reader:
lines.append(line)
return lines
class SingleSentenceDataset(Dataset):
def __init__(self,
tokenizer,
label_list,
max_seq_length,
mode="all_in_memory"):
assert isinstance(mode,
str), "mode of SingleSentenceDataset should be str"
assert mode in [
"all_in_memory", "leveldb", "streaming"
], "mode of SingleSentenceDataset should be in [all_in_memory, leveldb, streaming], but get" % mode
self.delimiter = None
self.mode = mode
self.examples = []
self._db = None
self._line_processor = None
def load_all_data_in_memory(self,
input_file,
label_list,
max_seq_length,
tokenizer,
line_processor=None,
delimiter="\t",
quotechar=None):
lines = _read_tsv(input_file, delimiter=delimiter, quotechar=quotechar)
def default_line_processor(line_id, line):
assert len(line) == 2
text_a = line[0]
label = line[1]
return BertInputExample(
str(line_id), text_a=text_a, text_b=None, label=label)
if line_processor is None:
line_processor = default_line_processor
for (line_id, line) in enumerate(lines):
input_example = line_processor(line_id, line)
if not input_example:
continue
input_feature = convert_single_example(
str(line_id), input_example, label_list, max_seq_length,
tokenizer)
self.examples.append(input_feature)
def prepare_leveldb(self,
input_file,
leveldb_file,
label_list,
max_seq_length,
tokenizer,
line_processor=None,
delimiter="\t",
quotechar=None):
def default_line_processor(line_id, line):
assert len(line) == 2
text_a = line[0]
label = line[1]
return BertInputExample(
str(line_id), text_a=text_a, text_b=None, label=label)
if line_processor is None:
line_processor = default_line_processor
if ParallelEnv().nranks > 1:
leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank)
if not os.path.exists(leveldb_file):
print("putting data %s into leveldb %s" %
(input_file, leveldb_file))
_example_num = 0
_db = leveldb.LevelDB(leveldb_file, create_if_missing=True)
with io.open(input_file, "r", encoding="utf8") as f:
reader = csv.reader(
f, delimiter=delimiter, quotechar=quotechar)
line_id = 0
for (_line_id, line) in enumerate(reader):
if line_processor(str(_line_id), line) is None:
continue
line_str = delimiter.join(line)
_db.Put(
str(line_id).encode("utf8"), line_str.encode("utf8"))
line_id += 1
_example_num += 1
_db.Put("_example_num_".encode("utf8"),
str(_example_num).encode("utf8"))
else:
_db = leveldb.LevelDB(leveldb_file, create_if_missing=False)
self.label_list = label_list
self.max_seq_length = max_seq_length
self.tokenizer = tokenizer
self.delimiter = delimiter
self._db = _db
self._line_processor = line_processor
def __getitem__(self, idx):
if self.mode == "all_in_memory":
return self.examples[idx].input_ids, self.examples[
idx].pos_ids, self.examples[idx].segment_ids, self.examples[
idx].label_id
if self.mode == "leveldb":
assert self._db is not None, "you shold call prepare_leveldb before you run dataloader"
line_str = self._db.Get(str(idx).encode("utf8"))
line_str = line_str.decode("utf8")
line = line_str.split(self.delimiter)
input_example = self._line_processor(str(idx + 1), line)
input_example = convert_single_example(
str(idx + 1), input_example, self.label_list,
self.max_seq_length, self.tokenizer)
return input_example.input_ids, input_example.pos_ids, input_example.segment_ids, input_example.label_id
def __len__(self):
if self.mode == "all_in_memory":
return len(self.examples)
if self.mode == "leveldb":
assert self._db is not None, "you shold call prepare_leveldb before you run dataloader"
exmaple_num = self._db.Get("_example_num_".encode("utf8"))
exmaple_num = exmaple_num.decode("utf8")
return int(exmaple_num)
class SentencePairDataset(Dataset):
def __init__(self,
tokenizer,
label_ist,
max_seq_length,
mode="all_in_memory"):
assert isinstance(mode,
str), "mode of SentencePairDataset should be str"
assert mode in [
"all_in_memory", "leveldb"
], "mode of SentencePairDataset should be in [all_in_memory, leveldb], but get" % mode
self.examples = []
def load_all_data_in_memory(self,
input_file,
label_list,
max_seq_length,
tokenizer,
line_processor=None,
delimiter="\t",
quotechar=None):
lines = _read_tsv(input_file, delimiter=delimiter, quotechar=quotechar)
def default_line_processor(line_id, line):
assert len(line) == 3
text_a = line[0]
text_b = line[1]
label = line[2]
return BertInputExample(
str(line_id), text_a=text_a, text_b=text_b, label=label)
if line_processor is None:
line_processor = default_line_processor
for (line_id, line) in enumerate(lines):
input_example = line_processor(line_id, line)
if not input_example:
continue
input_feature = convert_single_example(
str(line_id), input_example, label_list, max_seq_length,
tokenizer)
self.examples.append(input_feature)
def __getitem__(self, idx):
return self.examples[idx].input_ids, self.examples[
idx].pos_ids, self.examples[idx].segment_ids, self.examples[
idx].label_id
def __len__(self):
return len(self.examples)
def _prepare_train_batch(insts,
vocab_size=0,
pad_id=None,
cls_id=None,
sep_id=None,
mask_id=-1,
return_input_mask=True,
return_max_len=True,
return_num_token=False):
return prepare_batch_data(
insts,
0,
voc_size=vocab_size,
pad_id=pad_id,
cls_id=cls_id,
sep_id=sep_id,
mask_id=mask_id,
return_input_mask=return_input_mask,
return_max_len=return_max_len,
return_num_token=return_num_token)
class BertDataLoader(object):
def __init__(self,
input_file,
tokenizer,
label_list,
max_seq_length,
batch_size,
shuffle=False,
drop_last=False,
mode="all_in_memory",
leveldb_file="./leveldb",
line_processor=None,
delimiter="\t",
quotechar=None,
device=fluid.CPUPlace(),
num_workers=0,
return_list=True,
phase="train"):
assert phase in [
"train", "predict", "test"
], "phase of BertDataLoader should be in [train, predict, test], but get %s" % phase
self.dataset = SingleSentenceDataset(tokenizer, label_list,
max_seq_length, mode)
if mode == "all_in_memory":
self.dataset.load_all_data_in_memory(
input_file, label_list, max_seq_length, tokenizer,
line_processor, delimiter, quotechar)
elif mode == "leveldb":
self.dataset.prepare_leveldb(input_file, leveldb_file, label_list,
max_seq_length, tokenizer,
line_processor, delimiter, quotechar)
else:
raise ValueError("mode should be in [all_in_memory, leveldb]")
if phase == "train":
self.sampler = DistributedBatchSampler(
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
elif phase == "test" or phase == "predict":
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader(
dataset=self.dataset,
batch_sampler=self.sampler,
places=device,
collate_fn=partial(
_prepare_train_batch,
vocab_size=-1,
pad_id=tokenizer.vocab["[PAD]"],
cls_id=tokenizer.vocab["[CLS]"],
sep_id=tokenizer.vocab["[SEP]"],
mask_id=-1,
return_input_mask=True,
return_max_len=False,
return_num_token=False),
num_workers=num_workers,
return_list=return_list)
if __name__ == "__main__":
print("hello world.")
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
class ConstantLR(LearningRateDecay):
def __init__(self, learning_rate, begin=0, step=1, dtype='float32'):
super(ConstantLR, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
def step(self):
return self.learning_rate
class LinearDecay(LearningRateDecay):
def __init__(self,
learning_rate,
warmup_steps,
decay_steps,
end_learning_rate=0.0001,
power=1.0,
cycle=False,
begin=0,
step=1,
dtype='float32'):
super(LinearDecay, self).__init__(begin, step, dtype)
self.learning_rate = learning_rate
self.warmup_steps = warmup_steps
self.decay_steps = decay_steps
self.end_learning_rate = end_learning_rate
self.power = power
self.cycle = cycle
def step(self):
if self.step_num < self.warmup_steps:
decayed_lr = self.learning_rate * (self.step_num /
self.warmup_steps)
decayed_lr = self.create_lr_var(decayed_lr)
else:
tmp_step_num = self.step_num
tmp_decay_steps = self.decay_steps
if self.cycle:
div_res = fluid.layers.ceil(
self.create_lr_var(tmp_step_num / float(self.decay_steps)))
if tmp_step_num == 0:
div_res = self.create_lr_var(1.0)
tmp_decay_steps = self.decay_steps * div_res
else:
tmp_step_num = self.create_lr_var(
tmp_step_num
if tmp_step_num < self.decay_steps else self.decay_steps)
decayed_lr = (self.learning_rate - self.end_learning_rate) * \
((1 - tmp_step_num / tmp_decay_steps) ** self.power) + self.end_learning_rate
return decayed_lr
class Optimizer(object):
def __init__(self,
warmup_steps,
num_train_steps,
learning_rate,
model_cls,
weight_decay,
scheduler='linear_warmup_decay',
loss_scaling=1.0,
parameter_list=None):
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
self.learning_rate = learning_rate
self.model_cls = model_cls
self.weight_decay = weight_decay
self.scheduler = scheduler
self.loss_scaling = loss_scaling
self.parameter_list = parameter_list
self.scheduled_lr = 0.0
self.optimizer = self.lr_schedule()
def lr_schedule(self):
if self.warmup_steps > 0:
if self.scheduler == 'noam_decay':
self.scheduled_lr = fluid.dygraph.NoamDecay(1 / (
self.warmup_steps * (self.learning_rate**2)),
self.warmup_steps)
elif self.scheduler == 'linear_warmup_decay':
self.scheduled_lr = LinearDecay(self.learning_rate,
self.warmup_steps,
self.num_train_steps, 0.0)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
else:
self.scheduled_lr = ConstantLR(self.learning_rate)
optimizer = fluid.optimizer.Adam(
learning_rate=self.scheduled_lr,
parameter_list=self.parameter_list)
return optimizer
def exclude_from_weight_decay(self, name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
loss.backward()
if self.weight_decay > 0:
for param in self.model_cls.parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_data_parallel:
assert model is not None
model.apply_collective_grads()
#_, param_grads = self.optimizer.minimize(loss, grad_clip=grad_clip)
_, param_grads = self.optimizer.minimize(loss)
if self.weight_decay > 0:
for param, grad in param_grads:
if self.exclude_from_weight_decay(param.name):
continue
if isinstance(self.scheduled_lr.step(), float):
updated_param = param.numpy() - param_list[
param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step()
else:
updated_param = param.numpy(
) - param_list[param.name].numpy(
) * self.weight_decay * self.scheduled_lr.step().numpy()
updated_param_var = fluid.dygraph.to_variable(updated_param)
param = updated_param_var
#param = fluid.layers.reshape(x=updated_param_var, shape=list(updated_param_var.shape))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Optimization and learning rate scheduling."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from utils.fp16 import create_master_params_grads, master_param_to_train_param, apply_dynamic_loss_scaling
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
""" Applies linear warmup of learning rate from 0 and decay to 0."""
with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter(
)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
warmup_lr = learning_rate * (global_step / warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
learning_rate=learning_rate,
decay_steps=num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
fluid.layers.tensor.assign(decayed_lr, lr)
return lr
def optimization(loss,
warmup_steps,
num_train_steps,
learning_rate,
train_program,
startup_prog,
weight_decay,
scheduler='linear_warmup_decay',
use_fp16=False,
use_dynamic_loss_scaling=False,
init_loss_scaling=1.0,
incr_every_n_steps=1000,
decr_every_n_nan_or_inf=2,
incr_ratio=2.0,
decr_ratio=0.8):
scheduled_lr, loss_scaling = None, None
if scheduler == 'noam_decay':
if warmup_steps > 0:
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps)
else:
print(
"WARNING: noam decay of learning rate should have postive warmup "
"steps but given {}, using constant learning rate instead!"
.format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
elif scheduler == 'linear_warmup_decay':
if warmup_steps > 0:
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
else:
print(
"WARNING: linear warmup decay of learning rate should have "
"postive warmup steps but given {}, use constant learning rate "
"instead!".format(warmup_steps))
scheduled_lr = fluid.layers.create_global_var(
name=fluid.unique_name.generate("learning_rate"),
shape=[1],
value=learning_rate,
dtype='float32',
persistable=True)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=1.0))
def exclude_from_weight_decay(param):
name = param.name.rstrip(".master")
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict()
if use_fp16:
loss_scaling = fluid.layers.create_global_var(
name=fluid.unique_name.generate("loss_scaling"),
shape=[1],
value=init_loss_scaling,
dtype='float32',
persistable=True)
loss *= loss_scaling
param_grads = optimizer.backward(loss)
master_param_grads = create_master_params_grads(
param_grads, train_program, startup_prog, loss_scaling)
if weight_decay > 0:
for param, _ in master_param_grads:
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
if use_dynamic_loss_scaling:
apply_dynamic_loss_scaling(
loss_scaling, master_param_grads, incr_every_n_steps,
decr_every_n_nan_or_inf, incr_ratio, decr_ratio)
optimizer.apply_gradients(master_param_grads)
if weight_decay > 0:
for param, grad in master_param_grads:
if exclude_from_weight_decay(param):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
master_param_to_train_param(master_param_grads, param_grads,
train_program)
else:
if weight_decay > 0:
for param in train_program.all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss)
if weight_decay > 0:
for param, grad in param_grads:
if exclude_from_weight_decay(param):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
return scheduled_lr, loss_scaling
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from hapi.text.bert.utils.args import str2bool as str2bool
from hapi.text.bert.utils.args import ArgumentGroup as ArgumentGroup
from hapi.text.bert.utils.args import print_arguments as print_arguments
from hapi.text.bert.utils.args import check_cuda as check_cuda
from hapi.text.bert.utils.cards import get_cards as get_cards
from hapi.text.bert.utils.fp16 import cast_fp16_to_fp32 as cast_fp16_to_fp32
from hapi.text.bert.utils.fp16 import cast_fp32_to_fp16 as cast_fp32_to_fp16
from hapi.text.bert.utils.fp16 import copy_to_master_param as copy_to_master_param
from hapi.text.bert.utils.fp16 import create_master_params_grads as create_master_params_grads
from hapi.text.bert.utils.fp16 import master_param_to_train_param as master_param_to_train_param
from hapi.text.bert.utils.init import init_checkpoint as init_checkpoint
from hapi.text.bert.utils.init import init_pretraining_params as init_pretraining_params
from hapi.text.bert.utils.init import init_from_static_model as init_from_static_model
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Arguments for configuration."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import argparse
import paddle.fluid as fluid
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def check_cuda(use_cuda, err = \
"\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
):
try:
if use_cuda == True and fluid.is_compiled_with_cuda() == False:
print(err)
sys.exit(1)
except Exception as e:
pass
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
def get_cards():
"""
get gpu cards number
"""
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil
import sys
import os
def usage():
"""
usage information
"""
print
print("please use command: ")
print(
"python convert_static_to_dygraph.py input_params_dir output_params_dir"
)
print
def convert_static_to_dygraph(static_model_path, dygraph_model_path):
"""
convert paddle static bert model to dygraph model
"""
def mkdir(path):
if not os.path.isdir(path):
if os.path.split(path)[0]:
mkdir(os.path.split(path)[0])
else:
return
os.mkdir(path)
if os.path.exists(dygraph_model_path):
shutil.rmtree(dygraph_model_path)
mkdir(dygraph_model_path)
if not os.path.exists(static_model_path):
print("paddle static model path doesn't exist.....")
return -1
file_list = []
for root, dirs, files in os.walk(static_model_path):
file_list.extend(files)
os.makedirs(os.path.join(dygraph_model_path, "PretrainModelLayer_0"))
os.makedirs(
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0"))
os.makedirs(
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/PrePostProcessLayer_0"))
os.makedirs(
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/PrePostProcessLayer_0"))
#os.chdir(static_model_path)
#convert embedding file
embedding_type = ["word", "pos", "sent"]
for i in range(3):
src_name = embedding_type[i] + "_embedding"
trg_name = "Embedding_" + str(i) + "." + src_name
shutil.copyfile(
os.path.join(static_model_path, src_name),
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/" + trg_name))
#convert pre_encoder file
shutil.copyfile(
os.path.join(static_model_path, "pre_encoder_layer_norm_scale"),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/PrePostProcessLayer_0/LayerNorm_0._layer_norm_scale"
))
shutil.copyfile(
os.path.join(static_model_path, "pre_encoder_layer_norm_bias"),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/PrePostProcessLayer_0/LayerNorm_0._layer_norm_bias"
))
#convert mask lm params file
shutil.copyfile(
os.path.join(static_model_path, "mask_lm_out_fc.b_0"),
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/Layer_0.mask_lm_out_fc.b_0"))
shutil.copyfile(
os.path.join(static_model_path, "mask_lm_trans_fc.b_0"),
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/FC_0.mask_lm_trans_fc.b_0"))
shutil.copyfile(
os.path.join(static_model_path, "mask_lm_trans_fc.w_0"),
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/FC_0.mask_lm_trans_fc.w_0"))
shutil.copyfile(
os.path.join(static_model_path, "mask_lm_trans_layer_norm_bias"),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/PrePostProcessLayer_0/LayerNorm_0._layer_norm_bias"
))
shutil.copyfile(
os.path.join(static_model_path, "mask_lm_trans_layer_norm_scale"),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/PrePostProcessLayer_0/LayerNorm_0._layer_norm_scale"
))
shutil.copyfile(
os.path.join(static_model_path, "next_sent_fc.b_0"),
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/FC_1.next_sent_fc.b_0"))
shutil.copyfile(
os.path.join(static_model_path, "next_sent_fc.w_0"),
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/FC_1.next_sent_fc.w_0"))
shutil.copyfile(
os.path.join(static_model_path, "pooled_fc.b_0"),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/FC_0.pooled_fc.b_0"))
shutil.copyfile(
os.path.join(static_model_path, "pooled_fc.w_0"),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/FC_0.pooled_fc.w_0"))
encoder_num = 0
for f in file_list:
if not f.startswith("encoder_layer"):
continue
layer_num = f.split('_')[2]
if int(layer_num) > encoder_num:
encoder_num = int(layer_num)
encoder_num += 1
for i in range(encoder_num):
encoder_dir = "EncoderSubLayer_" + str(i)
os.makedirs(
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/" +
"EncoderLayer_0/", encoder_dir))
os.makedirs(
os.path.join(dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/" +
"EncoderLayer_0/", encoder_dir +
"/PositionwiseFeedForwardLayer_0"))
os.makedirs(
os.path.join(
dygraph_model_path, "PretrainModelLayer_0/BertModelLayer_0/" +
"EncoderLayer_0/", encoder_dir + "/MultiHeadAttentionLayer_0"))
os.makedirs(
os.path.join(
dygraph_model_path, "PretrainModelLayer_0/BertModelLayer_0/" +
"EncoderLayer_0/", encoder_dir + "/PrePostProcessLayer_1"))
os.makedirs(
os.path.join(
dygraph_model_path, "PretrainModelLayer_0/BertModelLayer_0/" +
"EncoderLayer_0/", encoder_dir + "/PrePostProcessLayer_3"))
encoder_map_dict = {
"ffn_fc_0.b_0":
("PositionwiseFeedForwardLayer_0", "FC_0.ffn_fc_0.b_0"),
"ffn_fc_0.w_0":
("PositionwiseFeedForwardLayer_0", "FC_0.ffn_fc_0.w_0"),
"ffn_fc_1.b_0":
("PositionwiseFeedForwardLayer_0", "FC_1.ffn_fc_1.b_0"),
"ffn_fc_1.w_0":
("PositionwiseFeedForwardLayer_0", "FC_1.ffn_fc_1.w_0"),
"multi_head_att_key_fc.b_0":
("MultiHeadAttentionLayer_0", "FC_1.key_fc.b_0"),
"multi_head_att_key_fc.w_0":
("MultiHeadAttentionLayer_0", "FC_1.key_fc.w_0"),
"multi_head_att_output_fc.b_0":
("MultiHeadAttentionLayer_0", "FC_3.output_fc.b_0"),
"multi_head_att_output_fc.w_0":
("MultiHeadAttentionLayer_0", "FC_3.output_fc.w_0"),
"multi_head_att_query_fc.b_0":
("MultiHeadAttentionLayer_0", "FC_0.query_fc.b_0"),
"multi_head_att_query_fc.w_0":
("MultiHeadAttentionLayer_0", "FC_0.query_fc.w_0"),
"multi_head_att_value_fc.b_0":
("MultiHeadAttentionLayer_0", "FC_2.value_fc.b_0"),
"multi_head_att_value_fc.w_0":
("MultiHeadAttentionLayer_0", "FC_2.value_fc.w_0"),
"post_att_layer_norm_bias":
("PrePostProcessLayer_1", "LayerNorm_0.post_att_layer_norm_bias"),
"post_att_layer_norm_scale":
("PrePostProcessLayer_1", "LayerNorm_0.post_att_layer_norm_scale"),
"post_ffn_layer_norm_bias":
("PrePostProcessLayer_3", "LayerNorm_0.post_ffn_layer_norm_bias"),
"post_ffn_layer_norm_scale":
("PrePostProcessLayer_3", "LayerNorm_0.post_ffn_layer_norm_scale")
}
for f in file_list:
if not f.startswith("encoder_layer"):
continue
layer_num = f.split('_')[2]
suffix_name = "_".join(f.split('_')[3:])
in_dir = encoder_map_dict[suffix_name][0]
rename = encoder_map_dict[suffix_name][1]
encoder_layer = "EncoderSubLayer_" + layer_num
shutil.copyfile(
os.path.join(static_model_path, f),
os.path.join(
dygraph_model_path,
"PretrainModelLayer_0/BertModelLayer_0/EncoderLayer_0/" +
encoder_layer + "/" + in_dir + "/" + rename))
if __name__ == "__main__":
if len(sys.argv) < 3:
usage()
exit(1)
static_model_path = sys.argv[1]
dygraph_model_path = sys.argv[2]
convert_static_to_dygraph(static_model_path, dygraph_model_path)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import paddle.fluid as fluid
def cast_fp16_to_fp32(i, o, prog):
prog.global_block().append_op(
type="cast",
inputs={"X": i},
outputs={"Out": o},
attrs={
"in_dtype": fluid.core.VarDesc.VarType.FP16,
"out_dtype": fluid.core.VarDesc.VarType.FP32
})
def cast_fp32_to_fp16(i, o, prog):
prog.global_block().append_op(
type="cast",
inputs={"X": i},
outputs={"Out": o},
attrs={
"in_dtype": fluid.core.VarDesc.VarType.FP32,
"out_dtype": fluid.core.VarDesc.VarType.FP16
})
def copy_to_master_param(p, block):
v = block.vars.get(p.name, None)
if v is None:
raise ValueError("no param name %s found!" % p.name)
new_p = fluid.framework.Parameter(
block=block,
shape=v.shape,
dtype=fluid.core.VarDesc.VarType.FP32,
type=v.type,
lod_level=v.lod_level,
stop_gradient=p.stop_gradient,
trainable=p.trainable,
optimize_attr=p.optimize_attr,
regularizer=p.regularizer,
gradient_clip_attr=p.gradient_clip_attr,
error_clip=p.error_clip,
name=v.name + ".master")
return new_p
def create_master_params_grads(params_grads, main_prog, startup_prog,
loss_scaling):
master_params_grads = []
tmp_role = main_prog._current_role
OpRole = fluid.core.op_proto_and_checker_maker.OpRole
main_prog._current_role = OpRole.Backward
for p, g in params_grads:
# create master parameters
master_param = copy_to_master_param(p, main_prog.global_block())
startup_master_param = startup_prog.global_block()._clone_variable(
master_param)
startup_p = startup_prog.global_block().var(p.name)
cast_fp16_to_fp32(startup_p, startup_master_param, startup_prog)
# cast fp16 gradients to fp32 before apply gradients
if g.name.find("layer_norm") > -1:
if loss_scaling > 1:
scaled_g = g / float(loss_scaling)
else:
scaled_g = g
master_params_grads.append([p, scaled_g])
continue
master_grad = fluid.layers.cast(g, "float32")
if loss_scaling > 1:
master_grad = master_grad / float(loss_scaling)
master_params_grads.append([master_param, master_grad])
main_prog._current_role = tmp_role
return master_params_grads
def master_param_to_train_param(master_params_grads, params_grads, main_prog):
for idx, m_p_g in enumerate(master_params_grads):
train_p, _ = params_grads[idx]
if train_p.name.find("layer_norm") > -1:
continue
with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]):
cast_fp32_to_fp16(m_p_g[0], train_p, main_prog)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import six
import ast
import copy
import numpy as np
import paddle.fluid as fluid
def cast_fp32_to_fp16(exe, main_program):
print("Cast parameters to float16 data format.")
for param in main_program.global_block().all_parameters():
if not param.name.endswith(".master"):
param_t = fluid.global_scope().find_var(param.name).get_tensor()
data = np.array(param_t)
if param.name.find("layer_norm") == -1:
param_t.set(np.float16(data).view(np.uint16), exe.place)
master_param_var = fluid.global_scope().find_var(param.name +
".master")
if master_param_var is not None:
master_param_var.get_tensor().set(data, exe.place)
def init_checkpoint(exe, init_checkpoint_path, main_program, use_fp16=False):
assert os.path.exists(
init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path
def existed_persitables(var):
if not fluid.io.is_persistable(var):
return False
return os.path.exists(os.path.join(init_checkpoint_path, var.name))
fluid.io.load_vars(
exe,
init_checkpoint_path,
main_program=main_program,
predicate=existed_persitables)
print("Load model from {}".format(init_checkpoint_path))
if use_fp16:
cast_fp32_to_fp16(exe, main_program)
def init_pretraining_params(exe,
pretraining_params_path,
main_program,
use_fp16=False):
assert os.path.exists(pretraining_params_path
), "[%s] cann't be found." % pretraining_params_path
def existed_params(var):
if not isinstance(var, fluid.framework.Parameter):
return False
return os.path.exists(os.path.join(pretraining_params_path, var.name))
fluid.io.load_vars(
exe,
pretraining_params_path,
main_program=main_program,
predicate=existed_params)
print("Load pretraining parameters from {}.".format(
pretraining_params_path))
if use_fp16:
cast_fp32_to_fp16(exe, main_program)
def init_from_static_model(dir_path,
backbone_model,
bert_config,
verbose=False):
def load_numpy_weight(file_name):
if six.PY2:
res = np.load(os.path.join(dir_path, file_name), allow_pickle=True)
else:
res = np.load(
os.path.join(dir_path, file_name),
allow_pickle=True,
encoding='latin1')
assert res is not None
return res
# load word embedding
_param = load_numpy_weight("word_embedding")
backbone_model._src_emb.set_dict({"weight": _param})
if verbose:
print("INIT word embedding")
_param = load_numpy_weight("pos_embedding")
backbone_model._pos_emb.set_dict({"weight": _param})
if verbose:
print("INIT pos embedding")
_param = load_numpy_weight("sent_embedding")
backbone_model._sent_emb.set_dict({"weight": _param})
if verbose:
print("INIT sent embedding")
_param0 = load_numpy_weight("pooled_fc.w_0")
_param1 = load_numpy_weight("pooled_fc.b_0")
backbone_model.pooled_fc.set_dict({"weight": _param0, "bias": _param1})
if verbose:
print("INIT pooled_fc")
_param0 = load_numpy_weight("pre_encoder_layer_norm_scale")
_param1 = load_numpy_weight("pre_encoder_layer_norm_bias")
backbone_model.pre_process_layer._sub_layers["layer_norm_0"].set_dict({
"weight": _param0,
"bias": _param1
})
if verbose:
print("INIT pre_encoder layer norm")
for _i in range(bert_config["num_hidden_layers"]):
_param_weight = "encoder_layer_%d_multi_head_att_query_fc.w_0" % _i
_param_bias = "encoder_layer_%d_multi_head_att_query_fc.b_0" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers["layer_%d" %
_i].self_attn.q_fc.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT multi_head_att_query_fc %d" % _i)
_param_weight = "encoder_layer_%d_multi_head_att_key_fc.w_0" % _i
_param_bias = "encoder_layer_%d_multi_head_att_key_fc.b_0" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers["layer_%d" %
_i].self_attn.k_fc.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT multi_head_att_key_fc %d" % _i)
_param_weight = "encoder_layer_%d_multi_head_att_value_fc.w_0" % _i
_param_bias = "encoder_layer_%d_multi_head_att_value_fc.b_0" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers["layer_%d" %
_i].self_attn.v_fc.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT multi_head_att_value_fc %d" % _i)
# init output fc
_param_weight = "encoder_layer_%d_multi_head_att_output_fc.w_0" % _i
_param_bias = "encoder_layer_%d_multi_head_att_output_fc.b_0" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers["layer_%d" %
_i].self_attn.proj_fc.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT multi_head_att_output_fc %d" % _i)
# init layer_norm 1
_param_weight = "encoder_layer_%d_post_att_layer_norm_scale" % _i
_param_bias = "encoder_layer_%d_post_att_layer_norm_bias" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers[
"layer_%d" % _i].postprocesser1.layer_norm_0.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT layer norm in attention at %d layer" % _i)
# init layer_norm 2
_param_weight = "encoder_layer_%d_post_ffn_layer_norm_scale" % _i
_param_bias = "encoder_layer_%d_post_ffn_layer_norm_bias" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers[
"layer_%d" % _i].postprocesser2.layer_norm_0.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT layer norm in FFN at %d layer" % _i)
# init FFN 1
_param_weight = "encoder_layer_%d_ffn_fc_0.w_0" % _i
_param_bias = "encoder_layer_%d_ffn_fc_0.b_0" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers["layer_%d" % _i].ffn.fc1.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT FFN-1 at %d layer" % _i)
# init FFN 2
_param_weight = "encoder_layer_%d_ffn_fc_1.w_0" % _i
_param_bias = "encoder_layer_%d_ffn_fc_1.b_0" % _i
_param_weight = load_numpy_weight(_param_weight)
_param_bias = load_numpy_weight(_param_bias)
backbone_model._encoder._sub_layers["layer_%d" % _i].ffn.fc2.set_dict({
"weight": _param_weight,
"bias": _param_bias
})
if verbose:
print("INIT FFN-2 at %d layer" % _i)
return True
import collections # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
import copy #
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import six import six
import sys import sys
if six.PY2:
reload(sys)
sys.setdefaultencoding('utf8')
import ast
import time
import argparse as argparse
import numpy as np
import multiprocessing
import collections
import copy
from functools import partial, reduce from functools import partial, reduce
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers.utils as utils import paddle.fluid.layers.utils as utils
from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
from paddle.fluid.dygraph import to_variable, Embedding, Linear, LayerNorm from paddle.fluid.dygraph import to_variable, Embedding, Linear, LayerNorm, GRUUnit
from paddle.fluid.data_feeder import convert_dtype from paddle.fluid.data_feeder import convert_dtype
from paddle.fluid import layers from paddle.fluid import layers
...@@ -19,7 +48,8 @@ __all__ = [ ...@@ -19,7 +48,8 @@ __all__ = [
'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode', 'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode',
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN', 'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer', 'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder' 'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf',
'Crf_decoding', 'SequenceTagging'
] ]
...@@ -188,7 +218,19 @@ class BasicLSTMCell(RNNCell): ...@@ -188,7 +218,19 @@ class BasicLSTMCell(RNNCell):
gate_activation=None, gate_activation=None,
activation=None, activation=None,
forget_bias=1.0, forget_bias=1.0,
dtype='float32'): dtype='float32',
forget_gate_weights={"w": None,
"h": None,
"b": None},
input_gate_weights={"w": None,
"h": None,
"b": None},
output_gate_weights={"w": None,
"h": None,
"b": None},
cell_weights={"w": None,
"h": None,
"b": None}):
super(BasicLSTMCell, self).__init__() super(BasicLSTMCell, self).__init__()
self._hidden_size = hidden_size self._hidden_size = hidden_size
...@@ -202,25 +244,225 @@ class BasicLSTMCell(RNNCell): ...@@ -202,25 +244,225 @@ class BasicLSTMCell(RNNCell):
self._dtype = dtype self._dtype = dtype
self._input_size = input_size self._input_size = input_size
self._weight = self.create_parameter( self.use_customized_weight = False
attr=self._param_attr, for _weights in [
shape=[ forget_gate_weights, input_gate_weights, output_gate_weights,
self._input_size + self._hidden_size, 4 * self._hidden_size cell_weights
], ]:
dtype=self._dtype) for _key in _weights:
if _weights[_key] is not None:
self._bias = self.create_parameter( self.use_customized_weight = True
attr=self._bias_attr, break
shape=[4 * self._hidden_size], if self.use_customized_weight:
dtype=self._dtype, break
is_bias=True)
if not self.use_customized_weight:
self._weight = self.create_parameter(
attr=self._param_attr,
shape=[
self._input_size + self._hidden_size, 4 * self._hidden_size
],
dtype=self._dtype)
self._bias = self.create_parameter(
attr=self._bias_attr,
shape=[4 * self._hidden_size],
dtype=self._dtype,
is_bias=True)
else:
if "w" in forget_gate_weights and forget_gate_weights[
"w"] is not None:
self.fg_w = forget_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_forget_gate_w"
else:
tmp_param_attr = self._param_attr
self.fg_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in forget_gate_weights and forget_gate_weights[
"h"] is not None:
self.fg_h = forget_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_forget_gate_h"
else:
tmp_param_attr = self._param_attr
self.fg_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in forget_gate_weights and forget_gate_weights[
"b"] is not None:
self.fg_b = forget_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_forget_gate_b"
else:
tmp_param_attr = self._bias_attr
self.fg_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if "w" in input_gate_weights and input_gate_weights[
"w"] is not None:
self.ig_w = input_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_input_gate_w"
else:
tmp_param_attr = self._param_attr
self.ig_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in input_gate_weights and input_gate_weights[
"h"] is not None:
self.ig_h = input_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_input_gate_h"
else:
tmp_param_attr = self._param_attr
self.ig_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in input_gate_weights and input_gate_weights[
"b"] is not None:
self.ig_b = input_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_input_gate_b"
else:
tmp_param_attr = self._bias_attr
self.ig_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if "w" in output_gate_weights and output_gate_weights[
"w"] is not None:
self.og_w = output_gate_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_output_gate_w"
else:
tmp_param_attr = self._param_attr
self.og_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in output_gate_weights and output_gate_weights[
"h"] is not None:
self.og_h = output_gate_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_output_gate_h"
else:
tmp_param_attr = self._param_attr
self.og_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in output_gate_weights and output_gate_weights[
"b"] is not None:
self.og_b = output_gate_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_output_gate_b"
else:
tmp_param_attr = self._bias_attr
self.og_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if "w" in cell_weights and cell_weights["w"] is not None:
self.c_w = cell_weights["w"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_cell_w"
else:
tmp_param_attr = self._param_attr
self.c_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in cell_weights and cell_weights["h"] is not None:
self.c_h = cell_weights["h"]
else:
if self._param_attr is not None and self._param_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._param_attr)
tmp_param_attr.name += "_cell_h"
else:
tmp_param_attr = self._param_attr
self.c_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in cell_weights and cell_weights["b"] is not None:
self.c_b = cell_weights["b"]
else:
if self._bias_attr is not None and self._bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(self._bias_attr)
tmp_param_attr.name += "_cell_b"
else:
tmp_param_attr = self._bias_attr
self.c_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
def forward(self, input, state): def forward(self, input, state):
if self.use_customized_weight:
weight_w = fluid.layers.concat(
[self.ig_w, self.c_w, self.fg_w, self.og_w], axis=-1)
weight_h = fluid.layers.concat(
[self.ig_h, self.c_h, self.fg_h, self.og_h], axis=-1)
_weight = fluid.layers.concat([weight_w, weight_h], axis=0)
_bias = fluid.layers.concat(
[self.ig_b, self.c_b, self.fg_b, self.og_b])
else:
_weight = self._weight
_bias = self._bias
pre_hidden, pre_cell = state pre_hidden, pre_cell = state
concat_input_hidden = layers.concat([input, pre_hidden], 1) concat_input_hidden = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._weight) gate_input = layers.matmul(x=concat_input_hidden, y=_weight)
gate_input = layers.elementwise_add(gate_input, self._bias) gate_input = layers.elementwise_add(gate_input, _bias)
i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
new_cell = layers.elementwise_add( new_cell = layers.elementwise_add(
layers.elementwise_mul( layers.elementwise_mul(
...@@ -277,16 +519,39 @@ class BasicGRUCell(RNNCell): ...@@ -277,16 +519,39 @@ class BasicGRUCell(RNNCell):
bias_attr=None, bias_attr=None,
gate_activation=None, gate_activation=None,
activation=None, activation=None,
dtype='float32'): dtype='float32',
update_gate_weights={"w": None,
"h": None,
"b": None},
reset_gate_weights={"w": None,
"h": None,
"b": None},
cell_weights={"w": None,
"h": None,
"b": None}):
super(BasicGRUCell, self).__init__() super(BasicGRUCell, self).__init__()
self._input_size = input_size self._input_size = input_size
self._hiden_size = hidden_size self._hidden_size = hidden_size
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._gate_activation = gate_activation or layers.sigmoid self._gate_activation = gate_activation or layers.sigmoid
self._activation = activation or layers.tanh self._activation = activation or layers.tanh
self._dtype = dtype self._dtype = dtype
assert isinstance(update_gate_weights, dict)
assert isinstance(reset_gate_weights, dict)
assert isinstance(cell_weights, dict)
self.use_customized_weight = False
for _weights in [
update_gate_weights, reset_gate_weights, cell_weights
]:
for _key in _weights:
if _weights[_key] is not None:
self.use_customized_weight = True
if self.use_customized_weight:
break
if self._param_attr is not None and self._param_attr.name is not None: if self._param_attr is not None and self._param_attr.name is not None:
gate_param_attr = copy.deepcopy(self._param_attr) gate_param_attr = copy.deepcopy(self._param_attr)
candidate_param_attr = copy.deepcopy(self._param_attr) candidate_param_attr = copy.deepcopy(self._param_attr)
...@@ -296,43 +561,194 @@ class BasicGRUCell(RNNCell): ...@@ -296,43 +561,194 @@ class BasicGRUCell(RNNCell):
gate_param_attr = self._param_attr gate_param_attr = self._param_attr
candidate_param_attr = self._param_attr candidate_param_attr = self._param_attr
self._gate_weight = self.create_parameter( if not self.use_customized_weight:
attr=gate_param_attr, self._gate_weight = self.create_parameter(
shape=[self._input_size + self._hiden_size, 2 * self._hiden_size], attr=gate_param_attr,
dtype=self._dtype) shape=[
self._input_size + self._hidden_size, 2 * self._hidden_size
self._candidate_weight = self.create_parameter( ],
attr=candidate_param_attr, dtype=self._dtype)
shape=[self._input_size + self._hiden_size, self._hiden_size],
dtype=self._dtype) self._candidate_weight = self.create_parameter(
attr=candidate_param_attr,
shape=[
self._input_size + self._hidden_size, self._hidden_size
],
dtype=self._dtype)
if self._bias_attr is not None and self._bias_attr.name is not None:
gate_bias_attr = copy.deepcopy(self._bias_attr)
candidate_bias_attr = copy.deepcopy(self._bias_attr)
gate_bias_attr.name += "_gate"
candidate_bias_attr.name += "_candidate"
else:
gate_bias_attr = self._bias_attr
candidate_bias_attr = self._bias_attr
self._gate_bias = self.create_parameter(
attr=gate_bias_attr,
shape=[2 * self._hidden_size],
dtype=self._dtype,
is_bias=True)
self._candidate_bias = self.create_parameter(
attr=candidate_bias_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
if self._bias_attr is not None and self._bias_attr.name is not None:
gate_bias_attr = copy.deepcopy(self._bias_attr)
candidate_bias_attr = copy.deepcopy(self._bias_attr)
gate_bias_attr.name += "_gate"
candidate_bias_attr.name += "_candidate"
else: else:
gate_bias_attr = self._bias_attr
candidate_bias_attr = self._bias_attr # create the parameters of gates in gru
if "w" in update_gate_weights and update_gate_weights[
self._gate_bias = self.create_parameter( "w"] is not None:
attr=gate_bias_attr, self.ug_w = update_gate_weights["w"]
shape=[2 * self._hiden_size], else:
dtype=self._dtype, if gate_param_attr is not None and gate_param_attr.name is not None:
is_bias=True) tmp_param_attr = copy.deepcopy(gate_param_attr)
self._candidate_bias = self.create_parameter( tmp_param_attr.name += "_update_gate_w"
attr=candidate_bias_attr, else:
shape=[self._hiden_size], tmp_param_attr = gate_param_attr
dtype=self._dtype, self.ug_w = self.create_parameter(
is_bias=True) attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in update_gate_weights and update_gate_weights[
"h"] is not None:
self.ug_h = update_gate_weights["h"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_update_gate_h"
else:
tmp_param_attr = gate_param_attr
self.ug_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in update_gate_weights and update_gate_weights[
"b"] is not None:
self.ug_b = update_gate_weights["b"]
else:
if gate_bias_attr is not None and gate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_bias_attr)
tmp_param_attr.name += "_update_gate_b"
else:
tmp_param_attr = gate_bias_attr
self.ug_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# reset gate parameters
if "w" in reset_gate_weights and reset_gate_weights[
"w"] is not None:
self.rg_w = reset_gate_weights["w"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_reset_gate_w"
else:
tmp_param_attr = gate_param_attr
self.rg_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in reset_gate_weights and reset_gate_weights[
"h"] is not None:
self.rg_h = reset_gate_weights["h"]
else:
if gate_param_attr is not None and gate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_param_attr)
tmp_param_attr.name += "_reset_gate_h"
else:
tmp_param_attr = gate_param_attr
self.rg_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in reset_gate_weights and reset_gate_weights[
"b"] is not None:
self.rg_b = reused_params["b"]
else:
if gate_bias_attr is not None and gate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(gate_bias_attr)
tmp_param_attr.name += "_reset_gate_b"
else:
tmp_param_attr = gate_bias_attr
self.rg_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
# cell parameters
if "w" in cell_weights and cell_weights["w"] is not None:
self.c_w = cell_weights["w"]
else:
if candidate_param_attr is not None and candidate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_param_attr)
tmp_param_attr.name += "_cell_w"
else:
tmp_param_attr = gate_param_attr
self.c_w = self.create_parameter(
attr=tmp_param_attr,
shape=[self._input_size, self._hidden_size],
dtype=self._dtype)
if "h" in cell_weights and cell_weights["h"] is not None:
self.c_h = cell_weights["h"]
else:
if candidate_param_attr is not None and candidate_param_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_param_attr)
tmp_param_attr.name += "_cell_h"
else:
tmp_param_attr = gate_param_attr
self.c_h = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size, self._hidden_size],
dtype=self._dtype)
if "b" in cell_weights and cell_weights["b"] is not None:
self.c_b = cell_weights["b"]
else:
if candidate_bias_attr is not None and candidate_bias_attr.name is not None:
tmp_param_attr = copy.deepcopy(candidate_bias_attr)
tmp_param_attr.name += "_cell_b"
else:
tmp_param_attr = gate_bias_attr
self.c_b = self.create_parameter(
attr=tmp_param_attr,
shape=[self._hidden_size],
dtype=self._dtype,
is_bias=True)
def forward(self, input, state): def forward(self, input, state):
if self.use_customized_weight:
rg_weights = layers.concat([self.rg_w, self.rg_h], axis=0)
ug_weights = layers.concat([self.ug_w, self.ug_h], axis=0)
_gate_weight = layers.concat([rg_weights, ug_weights], axis=-1)
_candidate_weight = layers.concat([self.c_w, self.c_h], axis=0)
_gate_bias = layers.concat([self.rg_b, self.ug_b], axis=0)
_candidate_bias = self.c_b
else:
_gate_weight = self._gate_weight
_gate_bias = self._gate_bias
_candidate_weight = self._candidate_weight
_candidate_bias = self._candidate_bias
pre_hidden = state pre_hidden = state
concat_input_hidden = layers.concat([input, pre_hidden], axis=1) concat_input_hidden = layers.concat([input, pre_hidden], axis=1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight) gate_input = layers.matmul(x=concat_input_hidden, y=_gate_weight)
gate_input = layers.elementwise_add(gate_input, self._gate_bias) gate_input = layers.elementwise_add(gate_input, _gate_bias)
gate_input = self._gate_activation(gate_input) gate_input = self._gate_activation(gate_input)
r, u = layers.split(gate_input, num_or_sections=2, dim=1) r, u = layers.split(gate_input, num_or_sections=2, dim=1)
...@@ -340,8 +756,8 @@ class BasicGRUCell(RNNCell): ...@@ -340,8 +756,8 @@ class BasicGRUCell(RNNCell):
r_hidden = r * pre_hidden r_hidden = r * pre_hidden
candidate = layers.matmul( candidate = layers.matmul(
layers.concat([input, r_hidden], 1), self._candidate_weight) layers.concat([input, r_hidden], 1), _candidate_weight)
candidate = layers.elementwise_add(candidate, self._candidate_bias) candidate = layers.elementwise_add(candidate, _candidate_bias)
c = self._activation(candidate) c = self._activation(candidate)
new_hidden = u * pre_hidden + (1 - u) * c new_hidden = u * pre_hidden + (1 - u) * c
...@@ -669,7 +1085,11 @@ class PrePostProcessLayer(Layer): ...@@ -669,7 +1085,11 @@ class PrePostProcessLayer(Layer):
PrePostProcessLayer PrePostProcessLayer
""" """
def __init__(self, process_cmd, d_model, dropout_rate): def __init__(self,
process_cmd,
d_model,
dropout_rate,
reused_layer_norm=None):
super(PrePostProcessLayer, self).__init__() super(PrePostProcessLayer, self).__init__()
self.process_cmd = process_cmd self.process_cmd = process_cmd
self.functors = [] self.functors = []
...@@ -677,16 +1097,21 @@ class PrePostProcessLayer(Layer): ...@@ -677,16 +1097,21 @@ class PrePostProcessLayer(Layer):
if cmd == "a": # add residual connection if cmd == "a": # add residual connection
self.functors.append(lambda x, y: x + y if y else x) self.functors.append(lambda x, y: x + y if y else x)
elif cmd == "n": # add layer normalization elif cmd == "n": # add layer normalization
if reused_layer_norm is not None:
layer_norm = reused_layer_norm
else:
layer_norm = LayerNorm(
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.)))
self.functors.append( self.functors.append(
self.add_sublayer( self.add_sublayer(
"layer_norm_%d" % len( "layer_norm_%d" % len(
self.sublayers(include_sublayers=False)), self.sublayers(include_sublayers=False)),
LayerNorm( layer_norm))
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.)))))
elif cmd == "d": # add dropout elif cmd == "d": # add dropout
self.functors.append(lambda x: layers.dropout( self.functors.append(lambda x: layers.dropout(
x, dropout_prob=dropout_rate, is_test=False) x, dropout_prob=dropout_rate, is_test=False)
...@@ -706,21 +1131,48 @@ class MultiHeadAttention(Layer): ...@@ -706,21 +1131,48 @@ class MultiHeadAttention(Layer):
Multi-Head Attention Multi-Head Attention
""" """
def __init__(self, d_key, d_value, d_model, n_head=1, dropout_rate=0.): def __init__(self,
d_key,
d_value,
d_model,
n_head=1,
dropout_rate=0.0,
reused_query_fc=None,
reused_key_fc=None,
reused_value_fc=None,
reused_proj_fc=None):
super(MultiHeadAttention, self).__init__() super(MultiHeadAttention, self).__init__()
self.n_head = n_head self.n_head = n_head
self.d_key = d_key self.d_key = d_key
self.d_value = d_value self.d_value = d_value
self.d_model = d_model self.d_model = d_model
self.dropout_rate = dropout_rate self.dropout_rate = dropout_rate
self.q_fc = Linear(
input_dim=d_model, output_dim=d_key * n_head, bias_attr=False) if reused_query_fc is not None:
self.k_fc = Linear( self.q_fc = reused_query_fc
input_dim=d_model, output_dim=d_key * n_head, bias_attr=False) else:
self.v_fc = Linear( self.q_fc = Linear(
input_dim=d_model, output_dim=d_value * n_head, bias_attr=False) input_dim=d_model, output_dim=d_key * n_head, bias_attr=False)
self.proj_fc = Linear( if reused_key_fc is not None:
input_dim=d_value * n_head, output_dim=d_model, bias_attr=False) self.k_fc = reused_key_fc
else:
self.k_fc = Linear(
input_dim=d_model, output_dim=d_key * n_head, bias_attr=False)
if reused_value_fc is not None:
self.v_fc = reused_value_fc
else:
self.v_fc = Linear(
input_dim=d_model,
output_dim=d_value * n_head,
bias_attr=False)
if reused_proj_fc is not None:
self.proj_fc = reused_proj_fc
else:
self.proj_fc = Linear(
input_dim=d_value * n_head,
output_dim=d_model,
bias_attr=False)
def _prepare_qkv(self, queries, keys, values, cache=None): def _prepare_qkv(self, queries, keys, values, cache=None):
if keys is None: # self-attention if keys is None: # self-attention
...@@ -797,12 +1249,24 @@ class FFN(Layer): ...@@ -797,12 +1249,24 @@ class FFN(Layer):
Feed-Forward Network Feed-Forward Network
""" """
def __init__(self, d_inner_hid, d_model, dropout_rate): def __init__(self,
d_inner_hid,
d_model,
dropout_rate,
fc1_act="relu",
reused_fc1=None,
reused_fc2=None):
super(FFN, self).__init__() super(FFN, self).__init__()
self.dropout_rate = dropout_rate self.dropout_rate = dropout_rate
self.fc1 = Linear( if reused_fc1 is not None:
input_dim=d_model, output_dim=d_inner_hid, act="relu") self.fc1 = reused_fc1
self.fc2 = Linear(input_dim=d_inner_hid, output_dim=d_model) else:
self.fc1 = Linear(
input_dim=d_model, output_dim=d_inner_hid, act=fc1_act)
if reused_fc2 is not None:
self.fc2 = reused_fc2
else:
self.fc2 = Linear(input_dim=d_inner_hid, output_dim=d_model)
def forward(self, x): def forward(self, x):
hidden = self.fc1(x) hidden = self.fc1(x)
...@@ -828,22 +1292,52 @@ class TransformerEncoderLayer(Layer): ...@@ -828,22 +1292,52 @@ class TransformerEncoderLayer(Layer):
attention_dropout, attention_dropout,
relu_dropout, relu_dropout,
preprocess_cmd="n", preprocess_cmd="n",
postprocess_cmd="da"): postprocess_cmd="da",
ffn_fc1_act="relu",
reused_pre_selatt_layernorm=None,
reused_multihead_att_weights={
"reused_query_fc": None,
"reused_key_fc": None,
"reused_value_fc": None,
"reused_proj_fc": None
},
reused_post_selfatt_layernorm=None,
reused_pre_ffn_layernorm=None,
reused_ffn_weights={"reused_fc1": None,
"reused_fc2": None},
reused_post_ffn_layernorm=None):
super(TransformerEncoderLayer, self).__init__() super(TransformerEncoderLayer, self).__init__()
self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, reused_pre_selatt_layernorm)
attention_dropout) self.self_attn = MultiHeadAttention(
self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model, d_key,
prepostprocess_dropout) d_value,
d_model,
n_head,
attention_dropout,
reused_query_fc=reused_multihead_att_weights["reused_query_fc"],
reused_key_fc=reused_multihead_att_weights["reused_key_fc"],
reused_value_fc=reused_multihead_att_weights["reused_value_fc"],
reused_proj_fc=reused_multihead_att_weights["reused_proj_fc"])
self.postprocesser1 = PrePostProcessLayer(
postprocess_cmd, d_model, prepostprocess_dropout,
reused_post_selfatt_layernorm)
self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model, self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
self.ffn = FFN(d_inner_hid, d_model, relu_dropout) reused_pre_ffn_layernorm)
self.ffn = FFN(d_inner_hid,
d_model,
relu_dropout,
fc1_act=ffn_fc1_act,
reused_fc1=reused_ffn_weights["reused_fc1"],
reused_fc2=reused_ffn_weights["reused_fc2"])
self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model, self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
reused_post_ffn_layernorm)
def forward(self, enc_input, attn_bias): def forward(self, enc_input, attn_bias):
attn_output = self.self_attn( attn_output = self.self_attn(
...@@ -871,7 +1365,8 @@ class TransformerEncoder(Layer): ...@@ -871,7 +1365,8 @@ class TransformerEncoder(Layer):
attention_dropout, attention_dropout,
relu_dropout, relu_dropout,
preprocess_cmd="n", preprocess_cmd="n",
postprocess_cmd="da"): postprocess_cmd="da",
ffn_fc1_act="relu"):
super(TransformerEncoder, self).__init__() super(TransformerEncoder, self).__init__()
...@@ -881,9 +1376,17 @@ class TransformerEncoder(Layer): ...@@ -881,9 +1376,17 @@ class TransformerEncoder(Layer):
self.add_sublayer( self.add_sublayer(
"layer_%d" % i, "layer_%d" % i,
TransformerEncoderLayer( TransformerEncoderLayer(
n_head, d_key, d_value, d_model, d_inner_hid, n_head,
prepostprocess_dropout, attention_dropout, d_key,
relu_dropout, preprocess_cmd, postprocess_cmd))) d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
preprocess_cmd,
postprocess_cmd,
ffn_fc1_act=ffn_fc1_act)))
self.processer = PrePostProcessLayer(preprocess_cmd, d_model, self.processer = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout)
...@@ -910,28 +1413,79 @@ class TransformerDecoderLayer(Layer): ...@@ -910,28 +1413,79 @@ class TransformerDecoderLayer(Layer):
attention_dropout, attention_dropout,
relu_dropout, relu_dropout,
preprocess_cmd="n", preprocess_cmd="n",
postprocess_cmd="da"): postprocess_cmd="da",
reused_pre_selfatt_layernorm=None,
reused_self_multihead_att_weights={
"reused_query_fc": None,
"reused_key_fc": None,
"reused_value_fc": None,
"reused_proj_fc": None
},
reused_post_selfatt_layernorm=None,
reused_pre_crossatt_layernorm=None,
reused_cross_multihead_att_weights={
"reused_query_fc": None,
"reused_key_fc": None,
"reused_value_fc": None,
"reused_proj_fc": None
},
reused_post_crossatt_layernorm=None,
reused_pre_ffn_layernorm=None,
reused_ffn_weights={"reused_fc1": None,
"reused_fc2": None},
reused_post_ffn_layernorm=None):
super(TransformerDecoderLayer, self).__init__() super(TransformerDecoderLayer, self).__init__()
self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, reused_pre_selfatt_layernorm)
attention_dropout) self.self_attn = MultiHeadAttention(
self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model, d_key,
prepostprocess_dropout) d_value,
d_model,
n_head,
attention_dropout,
reused_query_fc=reused_self_multihead_att_weights[
"reused_query_fc"],
reused_key_fc=reused_self_multihead_att_weights["reused_key_fc"],
reused_value_fc=reused_self_multihead_att_weights[
"reused_value_fc"],
reused_proj_fc=reused_self_multihead_att_weights["reused_proj_fc"])
self.postprocesser1 = PrePostProcessLayer(
postprocess_cmd, d_model, prepostprocess_dropout,
reused_post_selfatt_layernorm)
self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model, self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
self.cross_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, reused_pre_crossatt_layernorm)
attention_dropout) self.cross_attn = MultiHeadAttention(
self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model, d_key,
prepostprocess_dropout) d_value,
d_model,
n_head,
attention_dropout,
reused_query_fc=reused_cross_multihead_att_weights[
"reused_query_fc"],
reused_key_fc=reused_cross_multihead_att_weights["reused_key_fc"],
reused_value_fc=reused_cross_multihead_att_weights[
"reused_value_fc"],
reused_proj_fc=reused_cross_multihead_att_weights[
"reused_proj_fc"])
self.postprocesser2 = PrePostProcessLayer(
postprocess_cmd, d_model, prepostprocess_dropout,
reused_post_crossatt_layernorm)
self.preprocesser3 = PrePostProcessLayer(preprocess_cmd, d_model, self.preprocesser3 = PrePostProcessLayer(preprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
self.ffn = FFN(d_inner_hid, d_model, relu_dropout) reused_pre_ffn_layernorm)
self.ffn = FFN(d_inner_hid,
d_model,
relu_dropout,
reused_fc1=reused_ffn_weights["reused_fc1"],
reused_fc2=reused_ffn_weights["reused_fc2"])
self.postprocesser3 = PrePostProcessLayer(postprocess_cmd, d_model, self.postprocesser3 = PrePostProcessLayer(postprocess_cmd, d_model,
prepostprocess_dropout) prepostprocess_dropout,
reused_post_ffn_layernorm)
def forward(self, def forward(self,
dec_input, dec_input,
...@@ -998,3 +1552,304 @@ class TransformerDecoder(Layer): ...@@ -998,3 +1552,304 @@ class TransformerDecoder(Layer):
decoder_layer.cross_attn.cal_kv(enc_output, enc_output))) decoder_layer.cross_attn.cal_kv(enc_output, enc_output)))
for decoder_layer in self.decoder_layers for decoder_layer in self.decoder_layers
] ]
#TODO: we should merge GRUCell with BasicGRUCell
class GRUCell(RNNCell):
def __init__(self,
input_size,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False):
super(GRUCell, self).__init__()
self.hidden_size = hidden_size
self.fc_layer = Linear(
input_size, hidden_size * 3, param_attr=param_attr)
self.gru_unit = GRUUnit(
hidden_size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
def forward(self, inputs, states):
# for GRUCell, `step_outputs` and `new_states` both are hidden
x = self.fc_layer(inputs)
hidden, _, _ = self.gru_unit(x, states)
return hidden, hidden
@property
def state_shape(self):
return [self.hidden_size]
#TODO: we should merge GRUCell with BasicGRUCell
class GRUEncoderCell(RNNCell):
def __init__(self,
num_layers,
input_size,
hidden_size,
dropout_prob=0.,
init_scale=0.1):
super(GRUEncoderCell, self).__init__()
self.dropout_prob = dropout_prob
# use add_sublayer to add multi-layers
self.gru_cells = []
for i in range(num_layers):
self.gru_cells.append(
self.add_sublayer(
"gru_%d" % i,
#BasicGRUCell(
GRUCell(
input_size=input_size if i == 0 else hidden_size,
hidden_size=hidden_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)))))
def forward(self, step_input, states):
new_states = []
for i, gru_cell in enumerate(self.gru_cells):
out, state = gru_cell(step_input, states[i])
step_input = layers.dropout(
out,
self.dropout_prob,
dropout_implementation='upscale_in_train'
) if self.dropout_prob > 0 else out
new_states.append(step_input)
return step_input, new_states
@property
def state_shape(self):
return [cell.state_shape for cell in self.gru_cells]
class BiGRU(fluid.dygraph.Layer):
def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
super(BiGRU, self).__init__()
self.gru = RNN(GRUEncoderCell(1, input_dim, grnn_hidden_dim, 0.0,
init_bound),
is_reverse=False,
time_major=False)
self.gru_r = RNN(GRUEncoderCell(1, input_dim, grnn_hidden_dim, 0.0,
init_bound),
is_reverse=True,
time_major=False)
def forward(self, input_feature):
pre_gru, pre_state = self.gru(input_feature)
gru_r, r_state = self.gru_r(input_feature)
bi_merge = fluid.layers.concat(input=[pre_gru, gru_r], axis=-1)
return bi_merge
class Linear_chain_crf(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Linear_chain_crf, self).__init__()
self._param_attr = param_attr
self._dtype = dtype
self._size = size
self._is_test = is_test
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label, length=None):
alpha = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
emission_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
transition_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
log_likelihood = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": [label]
}
if length is not None:
this_inputs['Length'] = [length]
self._helper.append_op(
type='linear_chain_crf',
inputs=this_inputs,
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
},
attrs={"is_test": self._is_test, })
return log_likelihood
class Crf_decoding(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Crf_decoding, self).__init__()
self._dtype = dtype
self._size = size
self._is_test = is_test
self._param_attr = param_attr
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label=None, length=None):
viterbi_path = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": label
}
if length is not None:
this_inputs['Length'] = [length]
self._helper.append_op(
type='crf_decoding',
inputs=this_inputs,
outputs={"ViterbiPath": [viterbi_path]},
attrs={"is_test": self._is_test, })
return viterbi_path
class SequenceTagging(fluid.dygraph.Layer):
def __init__(self,
vocab_size,
num_labels,
batch_size,
word_emb_dim=128,
grnn_hidden_dim=128,
emb_learning_rate=0.1,
crf_learning_rate=0.1,
bigru_num=2,
init_bound=0.1,
length=None):
super(SequenceTagging, self).__init__()
"""
define the sequence tagging network structure
word: stores the input of the model
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
return:
for infer: return the prediction
otherwise: return the prediction
"""
self.word_emb_dim = word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
self.grnn_hidden_dim = grnn_hidden_dim
self.emb_lr = emb_learning_rate
self.crf_lr = crf_learning_rate
self.bigru_num = bigru_num
self.batch_size = batch_size
self.init_bound = 0.1
self.word_embedding = Embedding(
size=[self.vocab_size, self.word_emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(
learning_rate=self.emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound)))
h_0 = fluid.layers.create_global_var(
shape=[self.batch_size, self.grnn_hidden_dim],
value=0.0,
dtype='float32',
persistable=True,
force_cpu=True,
name='h_0')
self.bigru_units = []
for i in range(self.bigru_num):
if i == 0:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
else:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim * 2,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
self.fc = Linear(
input_dim=self.grnn_hidden_dim * 2,
output_dim=self.num_labels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.linear_chain_crf = Linear_chain_crf(
param_attr=fluid.ParamAttr(
name='linear_chain_crfw', learning_rate=self.crf_lr),
size=self.num_labels)
self.crf_decoding = Crf_decoding(
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=self.crf_lr),
size=self.num_labels)
def forward(self, word, lengths, target=None):
"""
Configure the network
"""
word_embed = self.word_embedding(word)
input_feature = word_embed
for i in range(self.bigru_num):
bigru_output = self.bigru_units[i](input_feature)
input_feature = bigru_output
emission = self.fc(bigru_output)
if target is not None:
crf_cost = self.linear_chain_crf(
input=emission, label=target, length=lengths)
avg_cost = fluid.layers.mean(x=crf_cost)
self.crf_decoding.weight = self.linear_chain_crf.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, avg_cost, lengths
else:
self.linear_chain_crf.weight = self.crf_decoding.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, lengths
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import unicodedata
import six
import io
def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if six.PY3:
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
elif six.PY2:
if isinstance(text, str):
return text.decode("utf-8", "ignore")
elif isinstance(text, unicode):
return text
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
else:
raise ValueError("Not running on Python2 or Python 3?")
def printable_text(text):
"""Returns text encoded in a way suitable for print or `tf.logging`."""
# These functions want `str` for both Python2 and Python3, but in one case
# it's a Unicode string and in the other it's a byte string.
if six.PY3:
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
elif six.PY2:
if isinstance(text, str):
return text
elif isinstance(text, unicode):
return text.encode("utf-8")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
else:
raise ValueError("Not running on Python2 or Python 3?")
def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
fin = io.open(vocab_file, encoding="utf8")
for num, line in enumerate(fin):
items = convert_to_unicode(line.strip()).split("\t")
if len(items) > 2:
break
token = items[0]
index = items[1] if len(items) == 2 else num
token = token.strip()
vocab[token] = int(index)
return vocab
def convert_by_vocab(vocab, items):
"""Converts a sequence of [tokens|ids] using the vocab."""
output = []
for item in items:
output.append(vocab[item])
return output
def convert_tokens_to_ids(vocab, tokens):
return convert_by_vocab(vocab, tokens)
def convert_ids_to_tokens(inv_vocab, ids):
return convert_by_vocab(inv_vocab, ids)
def whitespace_tokenize(text):
"""Runs basic whitespace cleaning and splitting on a peice of text."""
text = text.strip()
if not text:
return []
tokens = text.split()
return tokens
class FullTokenizer(object):
"""Runs end-to-end tokenziation."""
def __init__(self, vocab_file, do_lower_case=True):
self.vocab = load_vocab(vocab_file)
self.inv_vocab = {v: k for k, v in self.vocab.items()}
self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
def tokenize(self, text):
split_tokens = []
for token in self.basic_tokenizer.tokenize(text):
for sub_token in self.wordpiece_tokenizer.tokenize(token):
split_tokens.append(sub_token)
return split_tokens
def convert_tokens_to_ids(self, tokens):
return convert_by_vocab(self.vocab, tokens)
def convert_ids_to_tokens(self, ids):
return convert_by_vocab(self.inv_vocab, ids)
class CharTokenizer(object):
"""Runs end-to-end tokenziation."""
def __init__(self, vocab_file, do_lower_case=True):
self.vocab = load_vocab(vocab_file)
self.inv_vocab = {v: k for k, v in self.vocab.items()}
self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
def tokenize(self, text):
split_tokens = []
for token in text.lower().split(" "):
for sub_token in self.wordpiece_tokenizer.tokenize(token):
split_tokens.append(sub_token)
return split_tokens
def convert_tokens_to_ids(self, tokens):
return convert_by_vocab(self.vocab, tokens)
def convert_ids_to_tokens(self, ids):
return convert_by_vocab(self.inv_vocab, ids)
class BasicTokenizer(object):
"""Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
def __init__(self, do_lower_case=True):
"""Constructs a BasicTokenizer.
Args:
do_lower_case: Whether to lower case the input.
"""
self.do_lower_case = do_lower_case
def tokenize(self, text):
"""Tokenizes a piece of text."""
text = convert_to_unicode(text)
text = self._clean_text(text)
# This was added on November 1st, 2018 for the multilingual and Chinese
# models. This is also applied to the English models now, but it doesn't
# matter since the English models were not trained on any Chinese data
# and generally don't have any Chinese data in them (there are Chinese
# characters in the vocabulary because Wikipedia does have some Chinese
# words in the English Wikipedia.).
text = self._tokenize_chinese_chars(text)
orig_tokens = whitespace_tokenize(text)
split_tokens = []
for token in orig_tokens:
if self.do_lower_case:
token = token.lower()
token = self._run_strip_accents(token)
split_tokens.extend(self._run_split_on_punc(token))
output_tokens = whitespace_tokenize(" ".join(split_tokens))
return output_tokens
def _run_strip_accents(self, text):
"""Strips accents from a piece of text."""
text = unicodedata.normalize("NFD", text)
output = []
for char in text:
cat = unicodedata.category(char)
if cat == "Mn":
continue
output.append(char)
return "".join(output)
def _run_split_on_punc(self, text):
"""Splits punctuation on a piece of text."""
chars = list(text)
i = 0
start_new_word = True
output = []
while i < len(chars):
char = chars[i]
if _is_punctuation(char):
output.append([char])
start_new_word = True
else:
if start_new_word:
output.append([])
start_new_word = False
output[-1].append(char)
i += 1
return ["".join(x) for x in output]
def _tokenize_chinese_chars(self, text):
"""Adds whitespace around any CJK character."""
output = []
for char in text:
cp = ord(char)
if self._is_chinese_char(cp):
output.append(" ")
output.append(char)
output.append(" ")
else:
output.append(char)
return "".join(output)
def _is_chinese_char(self, cp):
"""Checks whether CP is the codepoint of a CJK character."""
# This defines a "chinese character" as anything in the CJK Unicode block:
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
#
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
# despite its name. The modern Korean Hangul alphabet is a different block,
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
# space-separated words, so they are not treated specially and handled
# like the all of the other languages.
if ((cp >= 0x4E00 and cp <= 0x9FFF) or #
(cp >= 0x3400 and cp <= 0x4DBF) or #
(cp >= 0x20000 and cp <= 0x2A6DF) or #
(cp >= 0x2A700 and cp <= 0x2B73F) or #
(cp >= 0x2B740 and cp <= 0x2B81F) or #
(cp >= 0x2B820 and cp <= 0x2CEAF) or
(cp >= 0xF900 and cp <= 0xFAFF) or #
(cp >= 0x2F800 and cp <= 0x2FA1F)): #
return True
return False
def _clean_text(self, text):
"""Performs invalid character removal and whitespace cleanup on text."""
output = []
for char in text:
cp = ord(char)
if cp == 0 or cp == 0xfffd or _is_control(char):
continue
if _is_whitespace(char):
output.append(" ")
else:
output.append(char)
return "".join(output)
class WordpieceTokenizer(object):
"""Runs WordPiece tokenziation."""
def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
self.vocab = vocab
self.unk_token = unk_token
self.max_input_chars_per_word = max_input_chars_per_word
def tokenize(self, text):
"""Tokenizes a piece of text into its word pieces.
This uses a greedy longest-match-first algorithm to perform tokenization
using the given vocabulary.
For example:
input = "unaffable"
output = ["un", "##aff", "##able"]
Args:
text: A single token or whitespace separated tokens. This should have
already been passed through `BasicTokenizer.
Returns:
A list of wordpiece tokens.
"""
text = convert_to_unicode(text)
output_tokens = []
for token in whitespace_tokenize(text):
chars = list(token)
if len(chars) > self.max_input_chars_per_word:
output_tokens.append(self.unk_token)
continue
is_bad = False
start = 0
sub_tokens = []
while start < len(chars):
end = len(chars)
cur_substr = None
while start < end:
substr = "".join(chars[start:end])
if start > 0:
substr = "##" + substr
if substr in self.vocab:
cur_substr = substr
break
end -= 1
if cur_substr is None:
is_bad = True
break
sub_tokens.append(cur_substr)
start = end
if is_bad:
output_tokens.append(self.unk_token)
else:
output_tokens.extend(sub_tokens)
return output_tokens
def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# as whitespace since they are generally considered as such.
if char == " " or char == "\t" or char == "\n" or char == "\r":
return True
cat = unicodedata.category(char)
if cat == "Zs":
return True
return False
def _is_control(char):
"""Checks whether `chars` is a control character."""
# These are technically control characters but we count them as whitespace
# characters.
if char == "\t" or char == "\n" or char == "\r":
return False
cat = unicodedata.category(char)
if cat.startswith("C"):
return True
return False
def _is_punctuation(char):
"""Checks whether `chars` is a punctuation character."""
cp = ord(char)
# We treat all non-letter/number ASCII as punctuation.
# Characters such as "^", "$", and "`" are not in the Unicode
# Punctuation class but we treat them as punctuation anyways, for
# consistency.
if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
(cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
return True
cat = unicodedata.category(char)
if cat.startswith("P"):
return True
return False
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import models
from . import transforms
__all__ = ["models", "transforms"]
...@@ -17,21 +17,15 @@ from . import vgg ...@@ -17,21 +17,15 @@ from . import vgg
from . import mobilenetv1 from . import mobilenetv1
from . import mobilenetv2 from . import mobilenetv2
from . import darknet from . import darknet
from . import yolov3
from . import tsm
from .resnet import * from .resnet import *
from .mobilenetv1 import * from .mobilenetv1 import *
from .mobilenetv2 import * from .mobilenetv2 import *
from .vgg import * from .vgg import *
from .darknet import * from .darknet import *
from .yolov3 import *
from .tsm import *
__all__ = resnet.__all__ \ __all__ = resnet.__all__ \
+ vgg.__all__ \ + vgg.__all__ \
+ mobilenetv1.__all__ \ + mobilenetv1.__all__ \
+ mobilenetv2.__all__ \ + mobilenetv2.__all__ \
+ darknet.__all__ \ + darknet.__all__
+ yolov3.__all__ \
+ tsm.__all__
...@@ -18,10 +18,10 @@ from paddle.fluid.regularizer import L2Decay ...@@ -18,10 +18,10 @@ from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from model import Model from hapi.model import Model
from .download import get_weights_path from hapi.download import get_weights_path
__all__ = ['DarkNet53', 'ConvBNLayer', 'darknet53'] __all__ = ['DarkNet', 'darknet53']
# {num_layers: (url, md5)} # {num_layers: (url, md5)}
pretrain_infos = { pretrain_infos = {
...@@ -136,9 +136,17 @@ class LayerWarp(fluid.dygraph.Layer): ...@@ -136,9 +136,17 @@ class LayerWarp(fluid.dygraph.Layer):
DarkNet_cfg = {53: ([1, 2, 8, 8, 4])} DarkNet_cfg = {53: ([1, 2, 8, 8, 4])}
class DarkNet53(Model): class DarkNet(Model):
"""DarkNet model from
`"YOLOv3: An Incremental Improvement" <https://arxiv.org/abs/1804.02767>`_
Args:
num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53.
ch_in (int): channel number of input data, default 3.
"""
def __init__(self, num_layers=53, ch_in=3): def __init__(self, num_layers=53, ch_in=3):
super(DarkNet53, self).__init__() super(DarkNet, self).__init__()
assert num_layers in DarkNet_cfg.keys(), \ assert num_layers in DarkNet_cfg.keys(), \
"only support num_layers in {} currently" \ "only support num_layers in {} currently" \
.format(DarkNet_cfg.keys()) .format(DarkNet_cfg.keys())
...@@ -188,7 +196,7 @@ class DarkNet53(Model): ...@@ -188,7 +196,7 @@ class DarkNet53(Model):
def _darknet(num_layers=53, input_channels=3, pretrained=True): def _darknet(num_layers=53, input_channels=3, pretrained=True):
model = DarkNet53(num_layers, input_channels) model = DarkNet(num_layers, input_channels)
if pretrained: if pretrained:
assert num_layers in pretrain_infos.keys(), \ assert num_layers in pretrain_infos.keys(), \
"DarkNet{} do not have pretrained weights now, " \ "DarkNet{} do not have pretrained weights now, " \
...@@ -201,4 +209,11 @@ def _darknet(num_layers=53, input_channels=3, pretrained=True): ...@@ -201,4 +209,11 @@ def _darknet(num_layers=53, input_channels=3, pretrained=True):
def darknet53(input_channels=3, pretrained=True): def darknet53(input_channels=3, pretrained=True):
"""DarkNet 53-layer model
Args:
input_channels (bool): channel number of input data, default 3.
pretrained (bool): If True, returns a model pre-trained on ImageNet,
default True.
"""
return _darknet(53, input_channels, pretrained) return _darknet(53, input_channels, pretrained)
...@@ -19,8 +19,8 @@ from paddle.fluid.initializer import MSRA ...@@ -19,8 +19,8 @@ from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model from hapi.model import Model
from .download import get_weights_path from hapi.download import get_weights_path
__all__ = ['MobileNetV1', 'mobilenet_v1'] __all__ = ['MobileNetV1', 'mobilenet_v1']
...@@ -111,13 +111,22 @@ class MobileNetV1(Model): ...@@ -111,13 +111,22 @@ class MobileNetV1(Model):
Args: Args:
scale (float): scale of channels in each layer. Default: 1.0. scale (float): scale of channels in each layer. Default: 1.0.
class_dim (int): output dim of last fc layer. Default: 1000. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
""" """
def __init__(self, scale=1.0, class_dim=1000): def __init__(self,
scale=1.0,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(MobileNetV1, self).__init__() super(MobileNetV1, self).__init__()
self.scale = scale self.scale = scale
self.dwsl = [] self.dwsl = []
self.num_classes = num_classes
self.with_pool = with_pool
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
num_channels=3, num_channels=3,
...@@ -227,23 +236,29 @@ class MobileNetV1(Model): ...@@ -227,23 +236,29 @@ class MobileNetV1(Model):
name="conv6") name="conv6")
self.dwsl.append(dws6) self.dwsl.append(dws6)
self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) if with_pool:
self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
self.out = Linear( if num_classes > -1:
int(1024 * scale), self.out = Linear(
class_dim, int(1024 * scale),
act='softmax', num_classes,
param_attr=ParamAttr( act=classifier_activation,
initializer=MSRA(), name=self.full_name() + "fc7_weights"), param_attr=ParamAttr(
bias_attr=ParamAttr(name="fc7_offset")) initializer=MSRA(), name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
def forward(self, inputs): def forward(self, inputs):
y = self.conv1(inputs) y = self.conv1(inputs)
for dws in self.dwsl: for dws in self.dwsl:
y = dws(y) y = dws(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, 1024]) if self.with_pool:
y = self.out(y) y = self.pool2d_avg(y)
if self.num_classes > 0:
y = fluid.layers.reshape(y, shape=[-1, 1024])
y = self.out(y)
return y return y
...@@ -261,6 +276,13 @@ def _mobilenet(arch, pretrained=False, **kwargs): ...@@ -261,6 +276,13 @@ def _mobilenet(arch, pretrained=False, **kwargs):
return model return model
def mobilenet_v1(pretrained=False, scale=1.0): def mobilenet_v1(pretrained=False, scale=1.0, **kwargs):
model = _mobilenet('mobilenetv1_' + str(scale), pretrained, scale=scale) """MobileNetV1
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
scale: (float): scale of channels in each layer. Default: 1.0.
"""
model = _mobilenet(
'mobilenetv1_' + str(scale), pretrained, scale=scale, **kwargs)
return model return model
...@@ -18,8 +18,8 @@ import paddle.fluid as fluid ...@@ -18,8 +18,8 @@ import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model from hapi.model import Model
from .download import get_weights_path from hapi.download import get_weights_path
__all__ = ['MobileNetV2', 'mobilenet_v2'] __all__ = ['MobileNetV2', 'mobilenet_v2']
...@@ -156,13 +156,21 @@ class MobileNetV2(Model): ...@@ -156,13 +156,21 @@ class MobileNetV2(Model):
Args: Args:
scale (float): scale of channels in each layer. Default: 1.0. scale (float): scale of channels in each layer. Default: 1.0.
class_dim (int): output dim of last fc layer. Default: 1000. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
""" """
def __init__(self, scale=1.0, class_dim=1000): def __init__(self,
scale=1.0,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(MobileNetV2, self).__init__() super(MobileNetV2, self).__init__()
self.scale = scale self.scale = scale
self.class_dim = class_dim self.num_classes = num_classes
self.with_pool = with_pool
bottleneck_params_list = [ bottleneck_params_list = [
(1, 16, 1, 1), (1, 16, 1, 1),
...@@ -174,7 +182,6 @@ class MobileNetV2(Model): ...@@ -174,7 +182,6 @@ class MobileNetV2(Model):
(6, 320, 1, 1), (6, 320, 1, 1),
] ]
#1. conv1
self._conv1 = ConvBNLayer( self._conv1 = ConvBNLayer(
num_channels=3, num_channels=3,
num_filters=int(32 * scale), num_filters=int(32 * scale),
...@@ -182,7 +189,6 @@ class MobileNetV2(Model): ...@@ -182,7 +189,6 @@ class MobileNetV2(Model):
stride=2, stride=2,
padding=1) padding=1)
#2. bottleneck sequences
self._invl = [] self._invl = []
i = 1 i = 1
in_c = int(32 * scale) in_c = int(32 * scale)
...@@ -196,7 +202,6 @@ class MobileNetV2(Model): ...@@ -196,7 +202,6 @@ class MobileNetV2(Model):
self._invl.append(tmp) self._invl.append(tmp)
in_c = int(c * scale) in_c = int(c * scale)
#3. last_conv
self._out_c = int(1280 * scale) if scale > 1.0 else 1280 self._out_c = int(1280 * scale) if scale > 1.0 else 1280
self._conv9 = ConvBNLayer( self._conv9 = ConvBNLayer(
num_channels=in_c, num_channels=in_c,
...@@ -205,26 +210,29 @@ class MobileNetV2(Model): ...@@ -205,26 +210,29 @@ class MobileNetV2(Model):
stride=1, stride=1,
padding=0) padding=0)
#4. pool if with_pool:
self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
#5. fc if num_classes > 0:
tmp_param = ParamAttr(name=self.full_name() + "fc10_weights") tmp_param = ParamAttr(name=self.full_name() + "fc10_weights")
self._fc = Linear( self._fc = Linear(
self._out_c, self._out_c,
class_dim, num_classes,
act='softmax', act=classifier_activation,
param_attr=tmp_param, param_attr=tmp_param,
bias_attr=ParamAttr(name="fc10_offset")) bias_attr=ParamAttr(name="fc10_offset"))
def forward(self, inputs): def forward(self, inputs):
y = self._conv1(inputs, if_act=True) y = self._conv1(inputs, if_act=True)
for inv in self._invl: for inv in self._invl:
y = inv(y) y = inv(y)
y = self._conv9(y, if_act=True) y = self._conv9(y, if_act=True)
y = self._pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self._out_c]) if self.with_pool:
y = self._fc(y) y = self._pool2d_avg(y)
if self.num_classes > 0:
y = fluid.layers.reshape(y, shape=[-1, self._out_c])
y = self._fc(y)
return y return y
...@@ -242,11 +250,13 @@ def _mobilenet(arch, pretrained=False, **kwargs): ...@@ -242,11 +250,13 @@ def _mobilenet(arch, pretrained=False, **kwargs):
return model return model
def mobilenet_v2(pretrained=False, scale=1.0): def mobilenet_v2(pretrained=False, scale=1.0, **kwargs):
"""MobileNetV2 """MobileNetV2
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
scale: (float): scale of channels in each layer. Default: 1.0.
""" """
model = _mobilenet('mobilenetv2_' + str(scale), pretrained, scale=scale) model = _mobilenet(
'mobilenetv2_' + str(scale), pretrained, scale=scale, **kwargs)
return model return model
...@@ -22,16 +22,26 @@ from paddle.fluid.layer_helper import LayerHelper ...@@ -22,16 +22,26 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.container import Sequential
from model import Model from hapi.model import Model
from .download import get_weights_path from hapi.download import get_weights_path
__all__ = [ __all__ = [
'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152' 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
] ]
model_urls = { model_urls = {
'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams',
'0ba53eea9bc970962d0ef96f7b94057e'),
'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams',
'46bc9f7c3dd2e55b7866285bee91eff3'),
'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams',
'0884c9087266496c41c60d14a96f8530') '0884c9087266496c41c60d14a96f8530'),
'resnet101':
('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams',
'fb07a451df331e4b0bb861ed97c3a9b9'),
'resnet152':
('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams',
'f9c700f26d3644bb76ad2226ed5f5713'),
} }
...@@ -163,12 +173,23 @@ class ResNet(Model): ...@@ -163,12 +173,23 @@ class ResNet(Model):
Args: Args:
Block (BasicBlock|BottleneckBlock): block module of model. Block (BasicBlock|BottleneckBlock): block module of model.
depth (int): layers of resnet, default: 50. depth (int): layers of resnet, default: 50.
num_classes (int): output dim of last fc layer, default: 1000. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
""" """
def __init__(self, Block, depth=50, num_classes=1000): def __init__(self,
Block,
depth=50,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.num_classes = num_classes
self.with_pool = with_pool
layer_config = { layer_config = {
18: [2, 2, 2, 2], 18: [2, 2, 2, 2],
34: [3, 4, 6, 3], 34: [3, 4, 6, 3],
...@@ -212,31 +233,37 @@ class ResNet(Model): ...@@ -212,31 +233,37 @@ class ResNet(Model):
Sequential(*blocks)) Sequential(*blocks))
self.layers.append(layer) self.layers.append(layer)
self.global_pool = Pool2D( if with_pool:
pool_size=7, pool_type='avg', global_pooling=True) self.global_pool = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0) if num_classes > 0:
self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1 stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0)
self.fc = Linear( self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1
self.fc_input_dim, self.fc = Linear(
num_classes, self.fc_input_dim,
act='softmax', num_classes,
param_attr=fluid.param_attr.ParamAttr( act=classifier_activation,
initializer=fluid.initializer.Uniform(-stdv, stdv))) param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs): def forward(self, inputs):
x = self.conv(inputs) x = self.conv(inputs)
x = self.pool(x) x = self.pool(x)
for layer in self.layers: for layer in self.layers:
x = layer(x) x = layer(x)
x = self.global_pool(x)
x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) if self.with_pool:
x = self.fc(x) x = self.global_pool(x)
if self.num_classes > -1:
x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim])
x = self.fc(x)
return x return x
def _resnet(arch, Block, depth, pretrained): def _resnet(arch, Block, depth, pretrained, **kwargs):
model = ResNet(Block, depth) model = ResNet(Block, depth, **kwargs)
if pretrained: if pretrained:
assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
arch) arch)
...@@ -248,46 +275,46 @@ def _resnet(arch, Block, depth, pretrained): ...@@ -248,46 +275,46 @@ def _resnet(arch, Block, depth, pretrained):
return model return model
def resnet18(pretrained=False): def resnet18(pretrained=False, **kwargs):
"""ResNet 18-layer model """ResNet 18-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _resnet('resnet18', BasicBlock, 18, pretrained) return _resnet('resnet18', BasicBlock, 18, pretrained, **kwargs)
def resnet34(pretrained=False): def resnet34(pretrained=False, **kwargs):
"""ResNet 34-layer model """ResNet 34-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _resnet('resnet34', BasicBlock, 34, pretrained) return _resnet('resnet34', BasicBlock, 34, pretrained, **kwargs)
def resnet50(pretrained=False): def resnet50(pretrained=False, **kwargs):
"""ResNet 50-layer model """ResNet 50-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _resnet('resnet50', BottleneckBlock, 50, pretrained) return _resnet('resnet50', BottleneckBlock, 50, pretrained, **kwargs)
def resnet101(pretrained=False): def resnet101(pretrained=False, **kwargs):
"""ResNet 101-layer model """ResNet 101-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _resnet('resnet101', BottleneckBlock, 101, pretrained) return _resnet('resnet101', BottleneckBlock, 101, pretrained, **kwargs)
def resnet152(pretrained=False): def resnet152(pretrained=False, **kwargs):
"""ResNet 152-layer model """ResNet 152-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _resnet('resnet152', BottleneckBlock, 152, pretrained) return _resnet('resnet152', BottleneckBlock, 152, pretrained, **kwargs)
...@@ -17,18 +17,14 @@ import paddle.fluid as fluid ...@@ -17,18 +17,14 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.container import Sequential
from model import Model from hapi.model import Model
from .download import get_weights_path from hapi.download import get_weights_path
__all__ = [ __all__ = [
'VGG', 'VGG',
'vgg11', 'vgg11',
'vgg11_bn',
'vgg13', 'vgg13',
'vgg13_bn',
'vgg16', 'vgg16',
'vgg16_bn',
'vgg19_bn',
'vgg19', 'vgg19',
] ]
...@@ -39,11 +35,11 @@ model_urls = { ...@@ -39,11 +35,11 @@ model_urls = {
class Classifier(fluid.dygraph.Layer): class Classifier(fluid.dygraph.Layer):
def __init__(self, num_classes): def __init__(self, num_classes, classifier_activation='softmax'):
super(Classifier, self).__init__() super(Classifier, self).__init__()
self.linear1 = Linear(512 * 7 * 7, 4096) self.linear1 = Linear(512 * 7 * 7, 4096)
self.linear2 = Linear(4096, 4096) self.linear2 = Linear(4096, 4096)
self.linear3 = Linear(4096, num_classes, act='softmax') self.linear3 = Linear(4096, num_classes, act=classifier_activation)
def forward(self, x): def forward(self, x):
x = self.linear1(x) x = self.linear1(x)
...@@ -62,20 +58,30 @@ class VGG(Model): ...@@ -62,20 +58,30 @@ class VGG(Model):
Args: Args:
features (fluid.dygraph.Layer): vgg features create by function make_layers. features (fluid.dygraph.Layer): vgg features create by function make_layers.
num_classes (int): output dim of last fc layer. Default: 1000. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
""" """
def __init__(self, features, num_classes=1000): def __init__(self,
features,
num_classes=1000,
classifier_activation='softmax'):
super(VGG, self).__init__() super(VGG, self).__init__()
self.features = features self.features = features
classifier = Classifier(num_classes) self.num_classes = num_classes
self.classifier = self.add_sublayer("classifier",
Sequential(classifier)) if num_classes > 0:
classifier = Classifier(num_classes, classifier_activation)
self.classifier = self.add_sublayer("classifier",
Sequential(classifier))
def forward(self, x): def forward(self, x):
x = self.features(x) x = self.features(x)
x = fluid.layers.flatten(x, 1)
x = self.classifier(x) if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.classifier(x)
return x return x
...@@ -114,7 +120,10 @@ cfgs = { ...@@ -114,7 +120,10 @@ cfgs = {
def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs) model = VGG(make_layers(
cfgs[cfg], batch_norm=batch_norm),
num_classes=1000,
**kwargs)
if pretrained: if pretrained:
assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format( assert arch in model_urls, "{} model do not have a pretrained model now, you should set pretrained=False".format(
...@@ -128,73 +137,53 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): ...@@ -128,73 +137,53 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs):
return model return model
def vgg11(pretrained=False, **kwargs): def vgg11(pretrained=False, batch_norm=False, **kwargs):
"""VGG 11-layer model """VGG 11-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
""" """
return _vgg('vgg11', 'A', False, pretrained, **kwargs) model_name = 'vgg11'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'A', batch_norm, pretrained, **kwargs)
def vgg11_bn(pretrained=False, **kwargs):
"""VGG 11-layer model with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
return _vgg('vgg11_bn', 'A', True, pretrained, **kwargs)
def vgg13(pretrained=False, batch_norm=False, **kwargs):
def vgg13(pretrained=False, **kwargs):
"""VGG 13-layer model """VGG 13-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
""" batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
return _vgg('vgg13', 'B', False, pretrained, **kwargs)
def vgg13_bn(pretrained=False, **kwargs):
"""VGG 13-layer model with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _vgg('vgg13_bn', 'B', True, pretrained, **kwargs) model_name = 'vgg13'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'B', batch_norm, pretrained, **kwargs)
def vgg16(pretrained=False, **kwargs): def vgg16(pretrained=False, batch_norm=False, **kwargs):
"""VGG 16-layer model """VGG 16-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
""" batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
return _vgg('vgg16', 'D', False, pretrained, **kwargs)
def vgg16_bn(pretrained=False, **kwargs):
"""VGG 16-layer with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _vgg('vgg16_bn', 'D', True, pretrained, **kwargs) model_name = 'vgg16'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'D', batch_norm, pretrained, **kwargs)
def vgg19(pretrained=False, **kwargs): def vgg19(pretrained=False, batch_norm=False, **kwargs):
"""VGG 19-layer model """VGG 19-layer model
Args: Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet pretrained (bool): If True, returns a model pre-trained on ImageNet. Default: False.
""" batch_norm (bool): If True, returns a model with batch_norm layer. Default: False.
return _vgg('vgg19', 'E', False, pretrained, **kwargs)
def vgg19_bn(pretrained=False, **kwargs):
"""VGG 19-layer model with batch normalization
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
""" """
return _vgg('vgg19_bn', 'E', True, pretrained, **kwargs) model_name = 'vgg19'
if batch_norm:
model_name += ('_bn')
return _vgg(model_name, 'E', batch_norm, pretrained, **kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import transforms
from . import functional
from .transforms import *
from .functional import *
__all__ = transforms.__all__ \
+ functional.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import collections
import random
import cv2
import numpy as np
if sys.version_info < (3, 3):
Sequence = collections.Sequence
Iterable = collections.Iterable
else:
Sequence = collections.abc.Sequence
Iterable = collections.abc.Iterable
__all__ = ['flip', 'resize']
def flip(image, code):
"""
Accordding to the code (the type of flip), flip the input image
Args:
image: Input image, with (H, W, C) shape
code: code that indicates the type of flip.
-1 : Flip horizontally and vertically
0 : Flip vertically
1 : Flip horizontally
"""
return cv2.flip(image, flipCode=code)
def resize(img, size, interpolation=cv2.INTER_LINEAR):
"""
resize the input data to given size
Args:
input: Input data, could be image or masks, with (H, W, C) shape
size: Target size of input data, with (height, width) shape.
interpolation: Interpolation method.
"""
if isinstance(interpolation, Sequence):
interpolation = random.choice(interpolation)
if isinstance(size, int):
h, w = img.shape[:2]
if (w <= h and w == size) or (h <= w and h == size):
return img
if w < h:
ow = size
oh = int(size * h / w)
return cv2.resize(img, (ow, oh), interpolation=interpolation)
else:
oh = size
ow = int(size * w / h)
return cv2.resize(img, (ow, oh), interpolation=interpolation)
else:
return cv2.resize(img, size[::-1], interpolation=interpolation)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import math
import sys
import random
import cv2
import numpy as np
import numbers
import types
import collections
import warnings
import traceback
from . import functional as F
if sys.version_info < (3, 3):
Iterable = collections.Iterable
else:
Iterable = collections.abc.Iterable
__all__ = [
"Compose",
"BatchCompose",
"Resize",
"RandomResizedCrop",
"CenterCropResize",
"CenterCrop",
"RandomHorizontalFlip",
"RandomVerticalFlip",
"Permute",
"Normalize",
"GaussianNoise",
"BrightnessTransform",
"SaturationTransform",
"ContrastTransform",
"HueTransform",
"ColorJitter",
]
class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
class BatchCompose(object):
"""Composes several batch transforms together
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
these transforms perform on batch data.
"""
def __init__(self, transforms=[]):
self.transforms = transforms
def __call__(self, data):
for f in self.transforms:
try:
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform batch transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
# sample list to batch data
batch = list(zip(*data))
return batch
class Resize(object):
"""Resize the input Image to the given size.
Args:
size (int|list|tuple): Desired output size. If size is a sequence like
(h, w), output size will be matched to this. If size is an int,
smaller edge of the image will be matched to this number.
i.e, if height > width, then image will be rescaled to
(size * height / width, size)
interpolation (int): interpolation mode of resize. Default: cv2.INTER_LINEAR.
"""
def __init__(self, size, interpolation=cv2.INTER_LINEAR):
assert isinstance(size, int) or (isinstance(size, Iterable) and
len(size) == 2)
self.size = size
self.interpolation = interpolation
def __call__(self, img, lbl):
return F.resize(img, self.size, self.interpolation), lbl
class RandomResizedCrop(object):
"""Crop the input data to random size and aspect ratio.
A crop of random size (default: of 0.08 to 1.0) of the original size and a random
aspect ratio (default: of 3/4 to 1.33) of the original aspect ratio is made.
After applying crop transfrom, the input data will be resized to given size.
Args:
output_size (int|list|tuple): Target size of output image, with (height, width) shape.
scale (list|tuple): Range of size of the origin size cropped. Default: (0.08, 1.0)
ratio (list|tuple): Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33)
"""
def __init__(self,
output_size,
scale=(0.08, 1.0),
ratio=(3. / 4, 4. / 3),
interpolation=cv2.INTER_LINEAR):
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
self.output_size = output_size
assert (scale[0] <= scale[1]), "scale should be of kind (min, max)"
assert (ratio[0] <= ratio[1]), "ratio should be of kind (min, max)"
self.scale = scale
self.ratio = ratio
self.interpolation = interpolation
def _get_params(self, image, attempts=10):
height, width, _ = image.shape
area = height * width
for _ in range(attempts):
target_area = np.random.uniform(*self.scale) * area
log_ratio = tuple(math.log(x) for x in self.ratio)
aspect_ratio = math.exp(np.random.uniform(*log_ratio))
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < w <= width and 0 < h <= height:
x = np.random.randint(0, width - w + 1)
y = np.random.randint(0, height - h + 1)
return x, y, w, h
# Fallback to central crop
in_ratio = float(width) / float(height)
if in_ratio < min(self.ratio):
w = width
h = int(round(w / min(self.ratio)))
elif in_ratio > max(self.ratio):
h = height
w = int(round(h * max(self.ratio)))
else: # whole image
w = width
h = height
x = (width - w) // 2
y = (height - h) // 2
return x, y, w, h
def __call__(self, img, lbl):
x, y, w, h = self._get_params(img)
cropped_img = img[y:y + h, x:x + w]
return F.resize(cropped_img, self.output_size, self.interpolation), lbl
class CenterCropResize(object):
"""Crops to center of image with padding then scales size.
Args:
size (int|list|tuple): Target size of output image, with (height, width) shape.
crop_padding (int): center crop with the padding. Default: 32.
interpolation (int): interpolation mode of resize. Default: cv2.INTER_LINEAR.
"""
def __init__(self, size, crop_padding=32, interpolation=cv2.INTER_LINEAR):
if isinstance(size, int):
self.size = (size, size)
else:
self.size = size
self.crop_padding = crop_padding
self.interpolation = interpolation
def _get_params(self, img):
h, w = img.shape[:2]
size = min(self.size)
c = int(size / (size + self.crop_padding) * min((h, w)))
x = (h + 1 - c) // 2
y = (w + 1 - c) // 2
return c, x, y
def __call__(self, img, lbl):
c, x, y = self._get_params(img)
cropped_img = img[x:x + c, y:y + c, :]
return F.resize(cropped_img, self.size, self.interpolation), lbl
class CenterCrop(object):
"""Crops the given the input data at the center.
Args:
output_size: Target size of output image, with (height, width) shape.
"""
def __init__(self, output_size):
if isinstance(output_size, int):
self.output_size = (output_size, output_size)
else:
self.output_size = output_size
def _get_params(self, img):
th, tw = self.output_size
h, w, _ = img.shape
assert th <= h and tw <= w, "output size is bigger than image size"
x = int(round((w - tw) / 2.0))
y = int(round((h - th) / 2.0))
return x, y
def __call__(self, img, lbl):
x, y = self._get_params(img)
th, tw = self.output_size
return img[y:y + th, x:x + tw], lbl
class RandomHorizontalFlip(object):
"""Horizontally flip the input data randomly with a given probability.
Args:
prob (float): probability of the input data being flipped. Default: 0.5
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img, lbl):
if np.random.random() < self.prob:
return F.flip(img, code=1), lbl
return img, lbl
class RandomVerticalFlip(object):
"""Vertically flip the input data randomly with a given probability.
Args:
prob (float): probability of the input data being flipped. Default: 0.5
"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img, lbl):
if np.random.random() < self.prob:
return F.flip(img, code=0), lbl
return img, lbl
class Normalize(object):
"""Normalize the input data with mean and standard deviation.
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels,
this transform will normalize each channel of the input data.
``output[channel] = (input[channel] - mean[channel]) / std[channel]``
Args:
mean (int|float|list): Sequence of means for each channel.
std (int|float|list): Sequence of standard deviations for each channel.
"""
def __init__(self, mean=0.0, std=1.0):
if isinstance(mean, numbers.Number):
mean = [mean, mean, mean]
if isinstance(std, numbers.Number):
mean = [std, std, std]
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
def __call__(self, img, lbl):
return (img - self.mean) / self.std, lbl
class Permute(object):
"""Change input data to a target mode.
For example, most transforms use HWC mode image,
while the Neural Network might use CHW mode input tensor.
Input image should be HWC mode and an instance of numpy.ndarray.
Args:
mode: Output mode of input. Default: "CHW".
to_rgb: convert 'bgr' image to 'rgb'. Default: True.
"""
def __init__(self, mode="CHW", to_rgb=True):
assert mode in [
"CHW"
], "Only support 'CHW' mode, but received mode: {}".format(mode)
self.mode = mode
self.to_rgb = to_rgb
def __call__(self, img, lbl):
if self.to_rgb:
img = img[..., ::-1]
if self.mode == "CHW":
return img.transpose((2, 0, 1)), lbl
return img, lbl
class GaussianNoise(object):
"""Add random gaussian noise to the input data.
Gaussian noise is generated with given mean and std.
Args:
mean: Gaussian mean used to generate noise.
std: Gaussian standard deviation used to generate noise.
"""
def __init__(self, mean=0.0, std=1.0):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
def __call__(self, img, lbl):
dtype = img.dtype
noise = np.random.normal(self.mean, self.std, img.shape) * 255
img = img + noise.astype(np.float32)
return np.clip(img, 0, 255).astype(dtype), lbl
class BrightnessTransform(object):
"""Adjust brightness of the image.
Args:
value: How much to adjust the brightness. Can be any
non negative number. 0 gives the original image
"""
def __init__(self, value):
if value < 0:
raise ValueError("brightness value should be non-negative")
self.value = value
def __call__(self, img, lbl):
if self.value == 0:
return img, lbl
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha
return img.clip(0, 255).astype(dtype), lbl
class ContrastTransform(object):
"""Adjust contrast of the image.
Args:
value: How much to adjust the contrast. Can be any
non negative number. 0 gives the original image
"""
def __init__(self, value):
if value < 0:
raise ValueError("contrast value should be non-negative")
self.value = value
def __call__(self, img, lbl):
if self.value == 0:
return img, lbl
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * (
1 - alpha)
return img.clip(0, 255).astype(dtype), lbl
class SaturationTransform(object):
"""Adjust saturation of the image.
Args:
value: How much to adjust the saturation. Can be any
non negative number. 0 gives the original image
"""
def __init__(self, value):
if value < 0:
raise ValueError("saturation value should be non-negative")
self.value = value
def __call__(self, img, lbl):
if self.value == 0:
return img, lbl
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = gray_img[..., np.newaxis]
img = img * alpha + gray_img * (1 - alpha)
return img.clip(0, 255).astype(dtype), lbl
class HueTransform(object):
"""Adjust hue of the image.
Args:
value: How much to adjust the hue. Can be any number
between 0 and 0.5, 0 gives the original image
"""
def __init__(self, value):
if value < 0 or value > 0.5:
raise ValueError("hue value should be in [0.0, 0.5]")
self.value = value
def __call__(self, img, lbl):
if self.value == 0:
return img, lbl
dtype = img.dtype
img = img.astype(np.uint8)
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL)
h, s, v = cv2.split(hsv_img)
alpha = np.random.uniform(-self.value, self.value)
h = h.astype(np.uint8)
# uint8 addition take cares of rotation across boundaries
with np.errstate(over="ignore"):
h += np.uint8(alpha * 255)
hsv_img = cv2.merge([h, s, v])
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype), lbl
class ColorJitter(object):
"""Randomly change the brightness, contrast, saturation and hue of an image.
Args:
brightness: How much to jitter brightness.
Chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
or the given [min, max]. Should be non negative numbers.
contrast: How much to jitter contrast.
Chosen uniformly from [max(0, 1 - contrast), 1 + contrast]
or the given [min, max]. Should be non negative numbers.
saturation: How much to jitter saturation.
Chosen uniformly from [max(0, 1 - saturation), 1 + saturation]
or the given [min, max]. Should be non negative numbers.
hue: How much to jitter hue.
Chosen uniformly from [-hue, hue] or the given [min, max].
Should have 0<= hue <= 0.5 or -0.5 <= min <= max <= 0.5.
"""
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
transforms = []
if brightness != 0:
transforms.append(BrightnessTransform(brightness))
if contrast != 0:
transforms.append(ContrastTransform(contrast))
if saturation != 0:
transforms.append(SaturationTransform(saturation))
if hue != 0:
transforms.append(HueTransform(hue))
random.shuffle(transforms)
self.transforms = Compose(transforms)
def __call__(self, img, lbl):
return self.transforms(img, lbl)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
lexical analysis network structure
"""
from __future__ import division
from __future__ import print_function
import io
import os
import sys
import math
import argparse
import numpy as np
from metrics import Metric
from model import Model, Input, Loss, set_device
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.initializer import NormalInitializer
from paddle.fluid.dygraph.nn import Embedding, Linear, GRUUnit
class DynamicGRU(fluid.dygraph.Layer):
def __init__(self,
size,
h_0=None,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
self.gru_unit = GRUUnit(
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = fluid.layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = fluid.layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = fluid.layers.concat(res, axis=1)
return res
class BiGRU(fluid.dygraph.Layer):
def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None):
super(BiGRU, self).__init__()
self.pre_gru = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru = DynamicGRU(
size=grnn_hidden_dim,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.pre_gru_r = Linear(
input_dim=input_dim,
output_dim=grnn_hidden_dim * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.gru_r = DynamicGRU(
size=grnn_hidden_dim,
is_reverse=True,
h_0=h_0,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
def forward(self, input_feature):
res_pre_gru = self.pre_gru(input_feature)
res_gru = self.gru(res_pre_gru)
res_pre_gru_r = self.pre_gru_r(input_feature)
res_gru_r = self.gru_r(res_pre_gru_r)
bi_merge = fluid.layers.concat(input=[res_gru, res_gru_r], axis=-1)
return bi_merge
class Linear_chain_crf(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Linear_chain_crf, self).__init__()
self._param_attr = param_attr
self._dtype = dtype
self._size = size
self._is_test = is_test
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label, length=None):
alpha = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
emission_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
transition_exps = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
log_likelihood = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": [label]
}
if length:
this_inputs['Length'] = [length]
self._helper.append_op(
type='linear_chain_crf',
inputs=this_inputs,
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood
},
attrs={"is_test": self._is_test, })
return log_likelihood
class Crf_decoding(fluid.dygraph.Layer):
def __init__(self, param_attr, size=None, is_test=False, dtype='float32'):
super(Crf_decoding, self).__init__()
self._dtype = dtype
self._size = size
self._is_test = is_test
self._param_attr = param_attr
self._transition = self.create_parameter(
attr=self._param_attr,
shape=[self._size + 2, self._size],
dtype=self._dtype)
@property
def weight(self):
return self._transition
@weight.setter
def weight(self, value):
self._transition = value
def forward(self, input, label=None, length=None):
viterbi_path = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
this_inputs = {
"Emission": [input],
"Transition": self._transition,
"Label": label
}
if length:
this_inputs['Length'] = [length]
self._helper.append_op(
type='crf_decoding',
inputs=this_inputs,
outputs={"ViterbiPath": [viterbi_path]},
attrs={"is_test": self._is_test, })
return viterbi_path
class Chunk_eval(fluid.dygraph.Layer):
def __init__(self,
num_chunk_types,
chunk_scheme,
excluded_chunk_types=None):
super(Chunk_eval, self).__init__()
self.num_chunk_types = num_chunk_types
self.chunk_scheme = chunk_scheme
self.excluded_chunk_types = excluded_chunk_types
def forward(self, input, label, seq_length=None):
precision = self._helper.create_variable_for_type_inference(
dtype="float32")
recall = self._helper.create_variable_for_type_inference(
dtype="float32")
f1_score = self._helper.create_variable_for_type_inference(
dtype="float32")
num_infer_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_label_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
num_correct_chunks = self._helper.create_variable_for_type_inference(
dtype="int64")
this_input = {"Inference": input, "Label": label[0]}
if seq_length:
this_input["SeqLength"] = seq_length[0]
self._helper.append_op(
type='chunk_eval',
inputs=this_input,
outputs={
"Precision": [precision],
"Recall": [recall],
"F1-Score": [f1_score],
"NumInferChunks": [num_infer_chunks],
"NumLabelChunks": [num_label_chunks],
"NumCorrectChunks": [num_correct_chunks]
},
attrs={
"num_chunk_types": self.num_chunk_types,
"chunk_scheme": self.chunk_scheme,
"excluded_chunk_types": self.excluded_chunk_types or []
})
return (num_infer_chunks, num_label_chunks, num_correct_chunks)
class LAC(Model):
def __init__(self, args, vocab_size, num_labels, length=None):
super(LAC, self).__init__()
"""
define the lexical analysis network structure
word: stores the input of the model
for_infer: a boolean value, indicating if the model to be created is for training or predicting.
return:
for infer: return the prediction
otherwise: return the prediction
"""
self.word_emb_dim = args.word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
self.grnn_hidden_dim = args.grnn_hidden_dim
self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir(
args) else 1.0
self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir(
args) else 1.0
self.bigru_num = args.bigru_num
self.init_bound = 0.1
self.word_embedding = Embedding(
size=[self.vocab_size, self.word_emb_dim],
dtype='float32',
param_attr=fluid.ParamAttr(
learning_rate=self.emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound)))
h_0 = fluid.layers.create_global_var(
shape=[args.batch_size, self.grnn_hidden_dim],
value=0.0,
dtype='float32',
persistable=True,
force_cpu=True,
name='h_0')
self.bigru_units = []
for i in range(self.bigru_num):
if i == 0:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
else:
self.bigru_units.append(
self.add_sublayer(
"bigru_units%d" % i,
BiGRU(
self.grnn_hidden_dim * 2,
self.grnn_hidden_dim,
self.init_bound,
h_0=h_0)))
self.fc = Linear(
input_dim=self.grnn_hidden_dim * 2,
output_dim=self.num_labels,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-self.init_bound, high=self.init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
self.linear_chain_crf = Linear_chain_crf(
param_attr=fluid.ParamAttr(
name='linear_chain_crfw', learning_rate=self.crf_lr),
size=self.num_labels)
self.crf_decoding = Crf_decoding(
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=self.crf_lr),
size=self.num_labels)
def forward(self, word, target, lengths):
"""
Configure the network
"""
word_embed = self.word_embedding(word)
input_feature = word_embed
for i in range(self.bigru_num):
bigru_output = self.bigru_units[i](input_feature)
input_feature = bigru_output
emission = self.fc(bigru_output)
crf_cost = self.linear_chain_crf(
input=emission, label=target, length=lengths)
avg_cost = fluid.layers.mean(x=crf_cost)
self.crf_decoding.weight = self.linear_chain_crf.weight
crf_decode = self.crf_decoding(input=emission, length=lengths)
return crf_decode, avg_cost, lengths
class LacLoss(Loss):
def __init__(self):
super(LacLoss, self).__init__()
pass
def forward(self, outputs, labels):
avg_cost = outputs[1]
return avg_cost
class ChunkEval(Metric):
def __init__(self, num_labels, name=None, *args, **kwargs):
super(ChunkEval, self).__init__(*args, **kwargs)
self._init_name(name)
self.chunk_eval = Chunk_eval(
int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset()
def add_metric_op(self, pred, label, *args, **kwargs):
crf_decode = pred[0]
lengths = pred[2]
(num_infer_chunks, num_label_chunks,
num_correct_chunks) = self.chunk_eval(
input=crf_decode, label=label, seq_length=lengths)
return [num_infer_chunks, num_label_chunks, num_correct_chunks]
def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks,
*args, **kwargs):
self.infer_chunks_total += num_infer_chunks
self.label_chunks_total += num_label_chunks
self.correct_chunks_total += num_correct_chunks
precision = float(
num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
recall = float(
num_correct_chunks) / num_label_chunks if num_label_chunks else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if num_correct_chunks else 0
return [precision, recall, f1_score]
def reset(self):
self.infer_chunks_total = 0
self.label_chunks_total = 0
self.correct_chunks_total = 0
def accumulate(self):
precision = float(
self.correct_chunks_total
) / self.infer_chunks_total if self.infer_chunks_total else 0
recall = float(
self.correct_chunks_total
) / self.label_chunks_total if self.label_chunks_total else 0
f1_score = float(2 * precision * recall) / (
precision + recall) if self.correct_chunks_total else 0
res = [precision, recall, f1_score]
return res
def _init_name(self, name):
name = name or 'chunk eval'
self._name = ['precision', 'recall', 'F1']
def name(self):
return self._name
class LacDataset(object):
"""
Load lexical analysis dataset
"""
def __init__(self, args):
self.word_dict_path = args.word_dict_path
self.label_dict_path = args.label_dict_path
self.word_rep_dict_path = args.word_rep_dict_path
self._load_dict()
def _load_dict(self):
self.word2id_dict = self.load_kv_dict(
self.word_dict_path, reverse=True, value_func=np.int64)
self.id2word_dict = self.load_kv_dict(self.word_dict_path)
self.label2id_dict = self.load_kv_dict(
self.label_dict_path, reverse=True, value_func=np.int64)
self.id2label_dict = self.load_kv_dict(self.label_dict_path)
if self.word_rep_dict_path is None:
self.word_replace_dict = dict()
else:
self.word_replace_dict = self.load_kv_dict(self.word_rep_dict_path)
def load_kv_dict(self,
dict_path,
reverse=False,
delimiter="\t",
key_func=None,
value_func=None):
"""
Load key-value dict from file
"""
result_dict = {}
for line in io.open(dict_path, "r", encoding='utf8'):
terms = line.strip("\n").split(delimiter)
if len(terms) != 2:
continue
if reverse:
value, key = terms
else:
key, value = terms
if key in result_dict:
raise KeyError("key duplicated with [%s]" % (key))
if key_func:
key = key_func(key)
if value_func:
value = value_func(value)
result_dict[key] = value
return result_dict
@property
def vocab_size(self):
return len(self.word2id_dict.values())
@property
def num_labels(self):
return len(self.label2id_dict.values())
def get_num_examples(self, filename):
"""num of line of file"""
return sum(1 for line in io.open(filename, "r", encoding='utf8'))
def word_to_ids(self, words):
"""convert word to word index"""
word_ids = []
for word in words:
word = self.word_replace_dict.get(word, word)
if word not in self.word2id_dict:
word = "OOV"
word_id = self.word2id_dict[word]
word_ids.append(word_id)
return word_ids
def label_to_ids(self, labels):
"""convert label to label index"""
label_ids = []
for label in labels:
if label not in self.label2id_dict:
label = "O"
label_id = self.label2id_dict[label]
label_ids.append(label_id)
return label_ids
def file_reader(self,
filename,
mode="train",
batch_size=32,
max_seq_len=126):
"""
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
"""
def wrapper():
fread = io.open(filename, "r", encoding="utf-8")
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
word_ids = word_ids[0:max_seq_len]
words_len = np.int64(len(word_ids))
word_ids += [0 for _ in range(max_seq_len - words_len)]
label_ids = label_ids[0:max_seq_len]
label_ids += [0 for _ in range(max_seq_len - words_len)]
assert len(word_ids) == len(label_ids)
yield word_ids, label_ids, words_len
fread.close()
return wrapper
def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def wrapper():
batch_words, batch_labels, seq_lens = [], [], []
for epoch in xrange(args.epoch):
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
batch_labels.append(labels)
seq_lens.append(words_len)
if len(seq_lens) == args.batch_size:
yield batch_words, batch_labels, seq_lens, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
if len(seq_lens) > 0:
yield batch_words, batch_labels, seq_lens, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
return wrapper
def create_dataloader(generator, place, feed_list=None):
if not feed_list:
data_loader = fluid.io.DataLoader.from_generator(
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
else:
data_loader = fluid.io.DataLoader.from_generator(
feed_list=feed_list,
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
data_loader.set_batch_generator(generator, places=place)
return data_loader
def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [
Input(
[None, args.max_seq_len], 'int64', name='words'), Input(
[None, args.max_seq_len], 'int64', name='target'), Input(
[None], 'int64', name='length')
]
labels = [Input([None, args.max_seq_len], 'int64', name='labels')]
feed = [x.forward() for x in inputs + labels]
dataset = LacDataset(args)
train_path = os.path.join(args.data, "train.tsv")
test_path = os.path.join(args.data, "test.tsv")
if args.dynamic:
feed_list = None
else:
feed_list = feed
train_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=train_path, place=place, mode="train")
test_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=test_path, place=place, mode="test")
train_dataset = create_dataloader(
train_generator, place, feed_list=feed_list)
test_dataset = create_dataloader(
test_generator, place, feed_list=feed_list)
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = LAC(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.prepare(
optim,
LacLoss(),
ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=args.device)
if args.resume is not None:
model.load(args.resume)
model.fit(train_dataset,
test_dataset,
epochs=args.epoch,
batch_size=args.batch_size,
eval_freq=args.eval_freq,
save_freq=args.save_freq,
save_dir=args.save_dir)
if __name__ == '__main__':
parser = argparse.ArgumentParser("LAC training")
parser.add_argument(
"-dir", "--data", default=None, type=str, help='path to LAC dataset')
parser.add_argument(
"-wd",
"--word_dict_path",
default=None,
type=str,
help='word dict path')
parser.add_argument(
"-ld",
"--label_dict_path",
default=None,
type=str,
help='label dict path')
parser.add_argument(
"-wrd",
"--word_rep_dict_path",
default=None,
type=str,
help='The path of the word replacement Dictionary.')
parser.add_argument(
"-dev",
"--device",
type=str,
default='gpu',
help="device to use, gpu or cpu")
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=10, type=int, help="number of epoch")
parser.add_argument(
'-lr',
'--base_learning_rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"--word_emb_dim",
default=128,
type=int,
help='word embedding dimension')
parser.add_argument(
"--grnn_hidden_dim", default=128, type=int, help="hidden dimension")
parser.add_argument(
"--bigru_num", default=2, type=int, help='the number of bi-rnn')
parser.add_argument("-elr", "--emb_learning_rate", default=1.0, type=float)
parser.add_argument("-clr", "--crf_learning_rate", default=1.0, type=float)
parser.add_argument(
"-b", "--batch_size", default=300, type=int, help="batch size")
parser.add_argument(
"--max_seq_len", default=126, type=int, help="max sequence length")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
parser.add_argument(
"-r",
"--resume",
default=None,
type=str,
help="checkpoint path to resume")
parser.add_argument(
"-o",
"--save_dir",
default="./model",
type=str,
help="save model path")
parser.add_argument(
"-sf", "--save_freq", default=1, type=int, help="save frequency")
parser.add_argument(
"-ef", "--eval_freq", default=1, type=int, help="eval frequency")
args = parser.parse_args()
print(args)
main(args)
...@@ -24,7 +24,7 @@ import numpy as np ...@@ -24,7 +24,7 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.io import MNIST as MnistDataset from vision.datasets import MNIST as MnistDataset
from model import Model, CrossEntropy, Input, set_device from model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy from metrics import Accuracy
......
[metadata]
name = hapi
author = zhouxiangyang
author_email = zhouxiangyang@baidu.com
version = 0.0.1
description = HAPI
long_description = file: README.md
long_description_content_type = text/markdown
home_page = https://github.com/PaddlePaddle/hapi
license = Apache 2.0
classifier =
Private :: Do Not Upload
Programming Language :: Python
Programming Language :: Python :: 2
Programming Language :: Python :: 2.7
Programming Language :: Python :: 3
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
keywords =
paddlepaddle
paddle
high-level-api
[options]
packages = find:
#install_requires =
# paddlepaddle-gpu >= 1.5.2
include_package_data = True
zip_safe = False
[sdist]
dist_dir = output/dist
[bdist_wheel]
dist_dir = output/dist
[easy_install]
index_url = http://pip.baidu.com/root/baidu/+simple/
# -*- coding: UTF-8 -*-
################################################################################
#
# Copyright (c) 2020 Baidu.com, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
"""
Setup script.
Authors: zhouxiangyang(zhouxiangyang@baidu.com)
Date: 2020/2/4 00:00:01
"""
import setuptools
with open("README.md", "r") as fh:
long_description = fh.read()
setuptools.setup(
name="hapi",
version="0.0.1",
author="PaddlePaddle",
author_email="zhouxiangyang@baidu.com",
description="A Paddle High-level API that supports both static and dynamic execution modes (still under development)",
url="https://github.com/PaddlePaddle/hapi",
packages=[
'hapi',
'hapi.datasets',
'hapi.text',
'hapi.text.tokenizer',
'hapi.text.bert',
'hapi.text.bert.utils',
'hapi.vision',
'hapi.vision.models',
'hapi.vision.transforms',
],
package_dir={
'hapi': './hapi',
'hapi.datasets': './hapi/datasets',
'hapi.text': './hapi/text',
'hapi.text.tokenizer': './hapi/text/tokenizer',
'hapi.text.bert': './hapi/text/bert',
'hapi.text.bert.utils': './hapi/text/bert/utils',
'hapi.vision': './hapi/vision',
'hapi.vision.models': './hapi/vision/models',
'hapi.vision.transforms': './hapi/vision/transforms',
},
platforms="any",
license='Apache 2.0',
classifiers=[
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
], )
import paddle
from hapi.model import set_device
from hapi.text.bert.dataloader import SingleSentenceDataLoader
import hapi.text.tokenizer.tokenization as tokenization
device = set_device("cpu")
paddle.fluid.enable_dygraph(device)
tokenizer = tokenization.FullTokenizer(
vocab_file="./tmp/hapi/data/pretrained_models/uncased_L-12_H-768_A-12/vocab.txt",
do_lower_case=True)
bert_dataloader = SingleSentenceDataLoader(
"./tmp/hapi/aaa.txt",
tokenizer, ["1", "2"],
max_seq_length=32,
batch_size=1)
for data in bert_dataloader.dataloader():
print(data)
...@@ -12,11 +12,13 @@ ...@@ -12,11 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest import unittest
import time import time
import random import random
from callbacks import config_callbacks from hapi.callbacks import config_callbacks
class TestCallbacks(unittest.TestCase): class TestCallbacks(unittest.TestCase):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest
import numpy as np
from hapi.datasets import *
class TestFolderDatasets(unittest.TestCase):
def test_dataset(self):
dataset_folder = DatasetFolder('tests/test_data')
for _ in dataset_folder:
pass
assert len(dataset_folder) == 3
assert len(dataset_folder.classes) == 2
class TestMNISTTest(unittest.TestCase):
def test_main(self):
mnist = MNIST(mode='test')
self.assertTrue(len(mnist) == 10000)
for i in range(len(mnist)):
image, label = mnist[i]
self.assertTrue(image.shape[0] == 784)
self.assertTrue(label.shape[0] == 1)
self.assertTrue(0 <= int(label) <= 9)
class TestMNISTTrain(unittest.TestCase):
def test_main(self):
mnist = MNIST(mode='train')
self.assertTrue(len(mnist) == 60000)
for i in range(len(mnist)):
image, label = mnist[i]
self.assertTrue(image.shape[0] == 784)
self.assertTrue(label.shape[0] == 1)
self.assertTrue(0 <= int(label) <= 9)
class TestFlowersTrain(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='train')
self.assertTrue(len(flowers) == 6149)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 6149)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
class TestFlowersValid(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='valid')
self.assertTrue(len(flowers) == 1020)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 1020)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
class TestFlowersTest(unittest.TestCase):
def test_main(self):
flowers = Flowers(mode='test')
self.assertTrue(len(flowers) == 1020)
# traversal whole dataset may cost a
# long time, randomly check 1 sample
idx = np.random.randint(0, 1020)
image, label = flowers[idx]
self.assertTrue(len(image.shape) == 3)
self.assertTrue(image.shape[2] == 3)
self.assertTrue(label.shape[0] == 1)
if __name__ == '__main__':
unittest.main()
...@@ -15,24 +15,25 @@ ...@@ -15,24 +15,25 @@
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest import unittest
import os import os
import sys
sys.path.append('../')
import numpy as np import numpy as np
import contextlib import contextlib
import paddle import paddle
from paddle import fluid from paddle import fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from model import Model, CrossEntropy, Input, Loss, set_device from paddle.io import BatchSampler, DataLoader
from metrics import Accuracy
from callbacks import ProgBarLogger from hapi.model import Model, CrossEntropy, Input, Loss, set_device
from paddle.fluid.io import BatchSampler, DataLoader from hapi.metrics import Accuracy
from paddle.fluid.io import MNIST as MnistDataset from hapi.callbacks import ProgBarLogger
from hapi.datasets import MNIST as MnistDataset
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
......
...@@ -12,11 +12,13 @@ ...@@ -12,11 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest import unittest
import random import random
import time import time
from progressbar import ProgressBar from hapi.progressbar import ProgressBar
class TestProgressBar(unittest.TestCase): class TestProgressBar(unittest.TestCase):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# when test, you should add hapi root path to the PYTHONPATH,
# export PYTHONPATH=PATH_TO_HAPI:$PYTHONPATH
import unittest
from hapi.datasets import DatasetFolder
import hapi.vision.transforms as transforms
class TestTransforms(unittest.TestCase):
def do_transform(self, trans):
dataset_folder = DatasetFolder('tests/test_data', transform=trans)
for _ in dataset_folder:
pass
def test_trans0(self):
normalize = transforms.Normalize(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
trans = transforms.Compose([
transforms.RandomResizedCrop(224), transforms.GaussianNoise(),
transforms.ColorJitter(
brightness=0.4, contrast=0.4, saturation=0.4,
hue=0.4), transforms.RandomHorizontalFlip(),
transforms.Permute(mode='CHW'), normalize
])
self.do_transform(trans)
def test_trans1(self):
trans = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
])
self.do_transform(trans)
def test_trans2(self):
trans = transforms.Compose([transforms.CenterCropResize(224)])
self.do_transform(trans)
if __name__ == '__main__':
unittest.main()
...@@ -22,7 +22,7 @@ from functools import partial ...@@ -22,7 +22,7 @@ from functools import partial
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.io import DataLoader from paddle.io import DataLoader
from paddle.fluid.layers.utils import flatten from paddle.fluid.layers.utils import flatten
from utils.configure import PDConfig from utils.configure import PDConfig
......
...@@ -22,7 +22,7 @@ from functools import partial ...@@ -22,7 +22,7 @@ from functools import partial
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler, DataLoader, Dataset from paddle.io import BatchSampler, DataLoader, Dataset
def create_data_loader(args, device): def create_data_loader(args, device):
......
...@@ -21,7 +21,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ...@@ -21,7 +21,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.io import DataLoader from paddle.io import DataLoader
from utils.configure import PDConfig from utils.configure import PDConfig
from utils.check import check_gpu, check_version from utils.check import check_gpu, check_version
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册