diff --git a/examples/bert/bert_classifier.py b/examples/bert/bert_classifier.py old mode 100755 new mode 100644 index 5e1f049a397d5b295f7e25d3a63fae1a30e5b81b..ef43ae2076e665a88fd896dd7e6f830c9e38640c --- a/examples/bert/bert_classifier.py +++ b/examples/bert/bert_classifier.py @@ -16,14 +16,60 @@ import paddle.fluid as fluid from hapi.metrics import Accuracy from hapi.configure import Config +from hapi.text.bert import BertEncoder +from paddle.fluid.dygraph import Linear, Layer from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input - -from cls import ClsModelLayer import hapi.text.tokenizer.tokenization as tokenization from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample -def train(): +class ClsModelLayer(Model): + """ + classify model + """ + + def __init__(self, + args, + config, + num_labels, + return_pooled_out=True, + use_fp16=False): + super(ClsModelLayer, self).__init__() + self.config = config + self.use_fp16 = use_fp16 + self.loss_scaling = args.loss_scaling + + self.bert_layer = BertEncoder( + config=self.config, return_pooled_out=True, use_fp16=self.use_fp16) + + self.cls_fc = Linear( + input_dim=self.config["hidden_size"], + output_dim=num_labels, + param_attr=fluid.ParamAttr( + name="cls_out_w", + initializer=fluid.initializer.TruncatedNormal(scale=0.02)), + bias_attr=fluid.ParamAttr( + name="cls_out_b", initializer=fluid.initializer.Constant(0.))) + + def forward(self, src_ids, position_ids, sentence_ids, input_mask): + """ + forward + """ + + enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids, + sentence_ids, input_mask) + + cls_feats = fluid.layers.dropout( + x=next_sent_feat, + dropout_prob=0.1, + dropout_implementation="upscale_in_train") + + pred = self.cls_fc(cls_feats) + + return pred + + +def main(): config = Config(yaml_file="./bert.yaml") config.build() @@ -35,8 +81,6 @@ def train(): bert_config = BertConfig(config.bert_config_path) bert_config.print_config() - trainer_count = fluid.dygraph.parallel.Env().nranks - tokenizer = tokenization.FullTokenizer( vocab_file=config.vocab_path, do_lower_case=config.do_lower_case) @@ -52,14 +96,24 @@ def train(): return BertInputExample( uid=uid, text_a=text_a, text_b=text_b, label=label) - bert_dataloader = BertDataLoader( + train_dataloader = BertDataLoader( "./data/glue_data/MNLI/train.tsv", tokenizer, ["contradiction", "entailment", "neutral"], - max_seq_length=64, - batch_size=32, + max_seq_length=config.max_seq_len, + batch_size=config.batch_size, line_processor=mnli_line_processor) - num_train_examples = len(bert_dataloader.dataset) + test_dataloader = BertDataLoader( + "./data/glue_data/MNLI/dev_matched.tsv", + tokenizer, ["contradiction", "entailment", "neutral"], + max_seq_length=config.max_seq_len, + batch_size=config.batch_size, + line_processor=mnli_line_processor, + shuffle=False, + phase="predict") + + trainer_count = fluid.dygraph.parallel.Env().nranks + num_train_examples = len(train_dataloader.dataset) max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count warmup_steps = int(max_train_steps * config.warmup_proportion) @@ -82,7 +136,6 @@ def train(): config, bert_config, len(["contradiction", "entailment", "neutral"]), - is_training=True, return_pooled_out=True) optimizer = Optimizer( @@ -106,10 +159,15 @@ def train(): cls_model.bert_layer.init_parameters( config.init_pretraining_params, verbose=config.verbose) - cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch) + # do train + cls_model.fit(train_data=train_dataloader.dataloader, + epochs=config.epoch, + save_dir=config.checkpoints) - return cls_model + # do eval + cls_model.evaluate( + eval_data=test_dataloader.dataloader, batch_size=config.batch_size) if __name__ == '__main__': - cls_model = train() + main() diff --git a/examples/bert/cls.py b/examples/bert/cls.py deleted file mode 100644 index 1e3ed6cdc28fc2d97f74e8697f26c3e654feb414..0000000000000000000000000000000000000000 --- a/examples/bert/cls.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"dygraph transformer layers" - -import six -import json -import numpy as np - -import paddle -import paddle.fluid as fluid -from paddle.fluid.dygraph import Linear, Layer - -from hapi.text.bert import BertEncoder -from hapi.model import Model - - -class ClsModelLayer(Model): - """ - classify model - """ - - def __init__(self, - args, - config, - num_labels, - is_training=True, - return_pooled_out=True, - use_fp16=False): - super(ClsModelLayer, self).__init__() - self.config = config - self.is_training = is_training - self.use_fp16 = use_fp16 - self.loss_scaling = args.loss_scaling - - self.bert_layer = BertEncoder( - config=self.config, return_pooled_out=True, use_fp16=self.use_fp16) - - self.cls_fc = Linear( - input_dim=self.config["hidden_size"], - output_dim=num_labels, - param_attr=fluid.ParamAttr( - name="cls_out_w", - initializer=fluid.initializer.TruncatedNormal(scale=0.02)), - bias_attr=fluid.ParamAttr( - name="cls_out_b", initializer=fluid.initializer.Constant(0.))) - - def forward(self, src_ids, position_ids, sentence_ids, input_mask): - """ - forward - """ - - enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids, - sentence_ids, input_mask) - - cls_feats = fluid.layers.dropout( - x=next_sent_feat, - dropout_prob=0.1, - dropout_implementation="upscale_in_train") - - logits = self.cls_fc(cls_feats) - - return logits diff --git a/examples/bert_leveldb/bert.yaml b/examples/bert_leveldb/bert.yaml index 012c1ce43f0eeb2bd121b9565e0e71e7df137eb6..5b21a6c635903cb8506d70e23c9294cbd7771d9e 100644 --- a/examples/bert_leveldb/bert.yaml +++ b/examples/bert_leveldb/bert.yaml @@ -18,7 +18,7 @@ batch_size: 32 in_tokens: False do_lower_case: True random_seed: 5512 -use_cuda: False +use_cuda: True shuffle: True do_train: True do_test: True diff --git a/examples/bert_leveldb/bert_classifier.py b/examples/bert_leveldb/bert_classifier.py old mode 100755 new mode 100644 index e329f17ff1dd66a7b3892cdd460ffb0c95b3d039..11bc85758ebbe81ae68b3c141d4582ee8d41508c --- a/examples/bert_leveldb/bert_classifier.py +++ b/examples/bert_leveldb/bert_classifier.py @@ -16,14 +16,60 @@ import paddle.fluid as fluid from hapi.metrics import Accuracy from hapi.configure import Config +from hapi.text.bert import BertEncoder +from paddle.fluid.dygraph import Linear, Layer from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input - -from cls import ClsModelLayer import hapi.text.tokenizer.tokenization as tokenization from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample -def train(): +class ClsModelLayer(Model): + """ + classify model + """ + + def __init__(self, + args, + config, + num_labels, + return_pooled_out=True, + use_fp16=False): + super(ClsModelLayer, self).__init__() + self.config = config + self.use_fp16 = use_fp16 + self.loss_scaling = args.loss_scaling + + self.bert_layer = BertEncoder( + config=self.config, return_pooled_out=True, use_fp16=self.use_fp16) + + self.cls_fc = Linear( + input_dim=self.config["hidden_size"], + output_dim=num_labels, + param_attr=fluid.ParamAttr( + name="cls_out_w", + initializer=fluid.initializer.TruncatedNormal(scale=0.02)), + bias_attr=fluid.ParamAttr( + name="cls_out_b", initializer=fluid.initializer.Constant(0.))) + + def forward(self, src_ids, position_ids, sentence_ids, input_mask): + """ + forward + """ + + enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids, + sentence_ids, input_mask) + + cls_feats = fluid.layers.dropout( + x=next_sent_feat, + dropout_prob=0.1, + dropout_implementation="upscale_in_train") + + pred = self.cls_fc(cls_feats) + + return pred + + +def main(): config = Config(yaml_file="./bert.yaml") config.build() @@ -35,8 +81,6 @@ def train(): bert_config = BertConfig(config.bert_config_path) bert_config.print_config() - trainer_count = fluid.dygraph.parallel.Env().nranks - tokenizer = tokenization.FullTokenizer( vocab_file=config.vocab_path, do_lower_case=config.do_lower_case) @@ -52,15 +96,26 @@ def train(): return BertInputExample( uid=uid, text_a=text_a, text_b=text_b, label=label) - bert_dataloader = BertDataLoader( + train_dataloader = BertDataLoader( "./data/glue_data/MNLI/train.tsv", tokenizer, ["contradiction", "entailment", "neutral"], - max_seq_length=64, - batch_size=32, + max_seq_length=config.max_seq_len, + batch_size=config.batch_size, line_processor=mnli_line_processor, - mode="leveldb") + mode="leveldb", + phase="train") - num_train_examples = len(bert_dataloader.dataset) + test_dataloader = BertDataLoader( + "./data/glue_data/MNLI/dev_matched.tsv", + tokenizer, ["contradiction", "entailment", "neutral"], + max_seq_length=config.max_seq_len, + batch_size=config.batch_size, + line_processor=mnli_line_processor, + shuffle=False, + phase="predict") + + trainer_count = fluid.dygraph.parallel.Env().nranks + num_train_examples = len(train_dataloader.dataset) max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count warmup_steps = int(max_train_steps * config.warmup_proportion) @@ -83,7 +138,6 @@ def train(): config, bert_config, len(["contradiction", "entailment", "neutral"]), - is_training=True, return_pooled_out=True) optimizer = Optimizer( @@ -107,10 +161,15 @@ def train(): cls_model.bert_layer.init_parameters( config.init_pretraining_params, verbose=config.verbose) - cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch) + # do train + cls_model.fit(train_data=train_dataloader.dataloader, + epochs=config.epoch, + save_dir=config.checkpoints) - return cls_model + # do eval + cls_model.evaluate( + eval_data=test_dataloader.dataloader, batch_size=config.batch_size) if __name__ == '__main__': - cls_model = train() + main() diff --git a/examples/bert_leveldb/cls.py b/examples/bert_leveldb/cls.py deleted file mode 100644 index 1e3ed6cdc28fc2d97f74e8697f26c3e654feb414..0000000000000000000000000000000000000000 --- a/examples/bert_leveldb/cls.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"dygraph transformer layers" - -import six -import json -import numpy as np - -import paddle -import paddle.fluid as fluid -from paddle.fluid.dygraph import Linear, Layer - -from hapi.text.bert import BertEncoder -from hapi.model import Model - - -class ClsModelLayer(Model): - """ - classify model - """ - - def __init__(self, - args, - config, - num_labels, - is_training=True, - return_pooled_out=True, - use_fp16=False): - super(ClsModelLayer, self).__init__() - self.config = config - self.is_training = is_training - self.use_fp16 = use_fp16 - self.loss_scaling = args.loss_scaling - - self.bert_layer = BertEncoder( - config=self.config, return_pooled_out=True, use_fp16=self.use_fp16) - - self.cls_fc = Linear( - input_dim=self.config["hidden_size"], - output_dim=num_labels, - param_attr=fluid.ParamAttr( - name="cls_out_w", - initializer=fluid.initializer.TruncatedNormal(scale=0.02)), - bias_attr=fluid.ParamAttr( - name="cls_out_b", initializer=fluid.initializer.Constant(0.))) - - def forward(self, src_ids, position_ids, sentence_ids, input_mask): - """ - forward - """ - - enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids, - sentence_ids, input_mask) - - cls_feats = fluid.layers.dropout( - x=next_sent_feat, - dropout_prob=0.1, - dropout_implementation="upscale_in_train") - - logits = self.cls_fc(cls_feats) - - return logits diff --git a/examples/bert_leveldb/nohup.out b/examples/bert_leveldb/nohup.out deleted file mode 100644 index 77ae49cd61b2527ee7674142c8e0236933bd6314..0000000000000000000000000000000000000000 --- a/examples/bert_leveldb/nohup.out +++ /dev/null @@ -1,312 +0,0 @@ -grep: warning: GREP_OPTIONS is deprecated; please use an alias or script -2020-04-13 13:08:30,568-WARNING: use_shared_memory can only be used in multi-process mode(num_workers > 0), set use_shared_memory as False -W0413 13:08:31.584532 119379 device_context.cc:237] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 10.1, Runtime API Version: 9.0 -W0413 13:08:31.589192 119379 device_context.cc:245] device: 0, cuDNN Version: 7.5. ----------------------------------------------------------------------- -bert_config_path: ./data/pretrained_models/uncased_L-12_H-768_A-12//bert_config.json -init_checkpoint: None -init_pretraining_params: ./data/pretrained_models/uncased_L-12_H-768_A-12//dygraph_params/ -checkpoints: ./data/saved_model/mnli_models -epoch: 3 -learning_rate: 5e-05 -lr_scheduler: linear_warmup_decay -weight_decay: 0.01 -warmup_proportion: 0.1 -save_steps: 1000 -validation_steps: 100 -loss_scaling: 1.0 -skip_steps: 10 -data_dir: ./data/glue_data/MNLI/ -vocab_path: ./data/pretrained_models/uncased_L-12_H-768_A-12//vocab.txt -max_seq_len: 128 -batch_size: 64 -in_tokens: False -do_lower_case: True -random_seed: 5512 -use_cuda: True -shuffle: True -do_train: True -do_test: True -use_data_parallel: False -verbose: False ----------------------------------------------------------------------- -attention_probs_dropout_prob: 0.1 -hidden_act: gelu -hidden_dropout_prob: 0.1 -hidden_size: 768 -initializer_range: 0.02 -intermediate_size: 3072 -max_position_embeddings: 512 -num_attention_heads: 12 -num_hidden_layers: 12 -type_vocab_size: 2 -vocab_size: 30522 ------------------------------------------------- -Trainer count: 1 -Num train examples: 392703 -Max train steps: 18407 -Num warmup steps: 1840 -Epoch 1/3 -step 10/12272 - loss: 1.1000 - acc_top1: 0.3531 - acc_top2: 0.6813 - 1s/step -step 20/12272 - loss: 1.1878 - acc_top1: 0.3578 - acc_top2: 0.6875 - 1s/step -step 30/12272 - loss: 1.0812 - acc_top1: 0.3708 - acc_top2: 0.6948 - 1s/step -step 40/12272 - loss: 1.1244 - acc_top1: 0.3773 - acc_top2: 0.6992 - 1s/step -step 50/12272 - loss: 1.1202 - acc_top1: 0.3756 - acc_top2: 0.7006 - 1s/step -step 60/12272 - loss: 1.1291 - acc_top1: 0.3703 - acc_top2: 0.6990 - 1s/step -step 70/12272 - loss: 1.0991 - acc_top1: 0.3634 - acc_top2: 0.6946 - 1s/step -step 80/12272 - loss: 1.0988 - acc_top1: 0.3602 - acc_top2: 0.6914 - 1s/step -step 90/12272 - loss: 1.0718 - acc_top1: 0.3646 - acc_top2: 0.6889 - 1s/step -step 100/12272 - loss: 1.0949 - acc_top1: 0.3638 - acc_top2: 0.6878 - 1s/step -step 110/12272 - loss: 1.1120 - acc_top1: 0.3608 - acc_top2: 0.6895 - 1s/step -step 120/12272 - loss: 1.1105 - acc_top1: 0.3622 - acc_top2: 0.6922 - 1s/step -step 130/12272 - loss: 1.0958 - acc_top1: 0.3623 - acc_top2: 0.6940 - 1s/step -step 140/12272 - loss: 1.0995 - acc_top1: 0.3636 - acc_top2: 0.6926 - 1s/step -step 150/12272 - loss: 1.1272 - acc_top1: 0.3671 - acc_top2: 0.6950 - 1s/step -step 160/12272 - loss: 1.0850 - acc_top1: 0.3697 - acc_top2: 0.6975 - 1s/step -step 170/12272 - loss: 1.0607 - acc_top1: 0.3691 - acc_top2: 0.6991 - 1s/step -step 180/12272 - loss: 1.0623 - acc_top1: 0.3707 - acc_top2: 0.6991 - 1s/step -step 190/12272 - loss: 1.1092 - acc_top1: 0.3697 - acc_top2: 0.6997 - 1s/step -step 200/12272 - loss: 1.1046 - acc_top1: 0.3713 - acc_top2: 0.7030 - 1s/step -step 210/12272 - loss: 1.0945 - acc_top1: 0.3720 - acc_top2: 0.7043 - 1s/step -step 220/12272 - loss: 1.0935 - acc_top1: 0.3719 - acc_top2: 0.7051 - 1s/step -step 230/12272 - loss: 1.1567 - acc_top1: 0.3742 - acc_top2: 0.7048 - 1s/step -step 240/12272 - loss: 1.0745 - acc_top1: 0.3766 - acc_top2: 0.7081 - 1s/step -step 250/12272 - loss: 1.0664 - acc_top1: 0.3756 - acc_top2: 0.7090 - 1s/step -step 260/12272 - loss: 1.0770 - acc_top1: 0.3751 - acc_top2: 0.7085 - 1s/step -step 270/12272 - loss: 1.1008 - acc_top1: 0.3730 - acc_top2: 0.7088 - 1s/step -step 280/12272 - loss: 1.0850 - acc_top1: 0.3737 - acc_top2: 0.7098 - 1s/step -step 290/12272 - loss: 1.0759 - acc_top1: 0.3747 - acc_top2: 0.7100 - 1s/step -step 300/12272 - loss: 1.0352 - acc_top1: 0.3758 - acc_top2: 0.7108 - 1s/step -step 310/12272 - loss: 1.0224 - acc_top1: 0.3786 - acc_top2: 0.7127 - 1s/step -step 320/12272 - loss: 1.0919 - acc_top1: 0.3800 - acc_top2: 0.7137 - 1s/step -step 330/12272 - loss: 1.0884 - acc_top1: 0.3825 - acc_top2: 0.7145 - 1s/step -step 340/12272 - loss: 1.1380 - acc_top1: 0.3849 - acc_top2: 0.7157 - 1s/step -step 350/12272 - loss: 0.9523 - acc_top1: 0.3890 - acc_top2: 0.7176 - 1s/step -step 360/12272 - loss: 0.9963 - acc_top1: 0.3922 - acc_top2: 0.7191 - 1s/step -step 370/12272 - loss: 1.1187 - acc_top1: 0.3955 - acc_top2: 0.7205 - 1s/step -step 380/12272 - loss: 0.9634 - acc_top1: 0.3988 - acc_top2: 0.7229 - 1s/step -step 390/12272 - loss: 0.9944 - acc_top1: 0.4017 - acc_top2: 0.7254 - 1s/step -step 400/12272 - loss: 1.1071 - acc_top1: 0.4044 - acc_top2: 0.7272 - 1s/step -step 410/12272 - loss: 0.9307 - acc_top1: 0.4070 - acc_top2: 0.7293 - 1s/step -step 420/12272 - loss: 1.1307 - acc_top1: 0.4087 - acc_top2: 0.7315 - 1s/step -step 430/12272 - loss: 0.9936 - acc_top1: 0.4110 - acc_top2: 0.7334 - 1s/step -step 440/12272 - loss: 0.9791 - acc_top1: 0.4139 - acc_top2: 0.7357 - 1s/step -step 450/12272 - loss: 1.0112 - acc_top1: 0.4147 - acc_top2: 0.7372 - 1s/step -step 460/12272 - loss: 0.8554 - acc_top1: 0.4179 - acc_top2: 0.7395 - 1s/step -step 470/12272 - loss: 0.9411 - acc_top1: 0.4198 - acc_top2: 0.7406 - 1s/step -step 480/12272 - loss: 0.8481 - acc_top1: 0.4231 - acc_top2: 0.7424 - 1s/step -step 490/12272 - loss: 1.0338 - acc_top1: 0.4261 - acc_top2: 0.7441 - 1s/step -step 500/12272 - loss: 0.9651 - acc_top1: 0.4281 - acc_top2: 0.7459 - 1s/step -step 510/12272 - loss: 0.8091 - acc_top1: 0.4306 - acc_top2: 0.7479 - 1s/step -step 520/12272 - loss: 1.0528 - acc_top1: 0.4325 - acc_top2: 0.7489 - 1s/step -step 530/12272 - loss: 0.9898 - acc_top1: 0.4338 - acc_top2: 0.7500 - 1s/step -step 540/12272 - loss: 0.7900 - acc_top1: 0.4364 - acc_top2: 0.7519 - 1s/step -step 550/12272 - loss: 0.9055 - acc_top1: 0.4389 - acc_top2: 0.7534 - 1s/step -step 560/12272 - loss: 1.0092 - acc_top1: 0.4410 - acc_top2: 0.7549 - 1s/step -step 570/12272 - loss: 0.7068 - acc_top1: 0.4441 - acc_top2: 0.7570 - 1s/step -step 580/12272 - loss: 0.9695 - acc_top1: 0.4455 - acc_top2: 0.7581 - 1s/step -step 590/12272 - loss: 0.8640 - acc_top1: 0.4487 - acc_top2: 0.7600 - 1s/step -step 600/12272 - loss: 0.9068 - acc_top1: 0.4514 - acc_top2: 0.7618 - 1s/step -step 610/12272 - loss: 0.9023 - acc_top1: 0.4524 - acc_top2: 0.7627 - 1s/step -step 620/12272 - loss: 0.7377 - acc_top1: 0.4552 - acc_top2: 0.7640 - 1s/step -step 630/12272 - loss: 0.8900 - acc_top1: 0.4574 - acc_top2: 0.7659 - 1s/step -step 640/12272 - loss: 0.8902 - acc_top1: 0.4590 - acc_top2: 0.7669 - 1s/step -step 650/12272 - loss: 0.9069 - acc_top1: 0.4608 - acc_top2: 0.7686 - 1s/step -step 660/12272 - loss: 0.9630 - acc_top1: 0.4631 - acc_top2: 0.7699 - 1s/step -step 670/12272 - loss: 0.9005 - acc_top1: 0.4652 - acc_top2: 0.7712 - 1s/step -step 680/12272 - loss: 1.0725 - acc_top1: 0.4670 - acc_top2: 0.7725 - 1s/step -step 690/12272 - loss: 0.8322 - acc_top1: 0.4689 - acc_top2: 0.7739 - 1s/step -step 700/12272 - loss: 0.9874 - acc_top1: 0.4714 - acc_top2: 0.7753 - 1s/step -step 710/12272 - loss: 0.7915 - acc_top1: 0.4728 - acc_top2: 0.7765 - 1s/step -step 720/12272 - loss: 0.7174 - acc_top1: 0.4746 - acc_top2: 0.7777 - 1s/step -step 730/12272 - loss: 0.7635 - acc_top1: 0.4770 - acc_top2: 0.7793 - 1s/step -step 740/12272 - loss: 0.9180 - acc_top1: 0.4793 - acc_top2: 0.7804 - 1s/step -step 750/12272 - loss: 0.8424 - acc_top1: 0.4817 - acc_top2: 0.7815 - 1s/step -step 760/12272 - loss: 0.9357 - acc_top1: 0.4837 - acc_top2: 0.7829 - 1s/step -step 770/12272 - loss: 0.7643 - acc_top1: 0.4858 - acc_top2: 0.7839 - 1s/step -step 780/12272 - loss: 0.8910 - acc_top1: 0.4868 - acc_top2: 0.7849 - 1s/step -step 790/12272 - loss: 0.8781 - acc_top1: 0.4888 - acc_top2: 0.7862 - 1s/step -step 800/12272 - loss: 0.8005 - acc_top1: 0.4907 - acc_top2: 0.7877 - 1s/step -step 810/12272 - loss: 0.6740 - acc_top1: 0.4929 - acc_top2: 0.7889 - 1s/step -step 820/12272 - loss: 0.7026 - acc_top1: 0.4947 - acc_top2: 0.7898 - 1s/step -step 830/12272 - loss: 0.8666 - acc_top1: 0.4964 - acc_top2: 0.7908 - 1s/step -step 840/12272 - loss: 0.6296 - acc_top1: 0.4983 - acc_top2: 0.7920 - 1s/step -step 850/12272 - loss: 0.7907 - acc_top1: 0.4992 - acc_top2: 0.7930 - 1s/step -step 860/12272 - loss: 0.7292 - acc_top1: 0.5007 - acc_top2: 0.7935 - 1s/step -step 870/12272 - loss: 0.7498 - acc_top1: 0.5026 - acc_top2: 0.7944 - 1s/step -step 880/12272 - loss: 0.9928 - acc_top1: 0.5040 - acc_top2: 0.7953 - 1s/step -step 890/12272 - loss: 1.0025 - acc_top1: 0.5056 - acc_top2: 0.7962 - 1s/step -step 900/12272 - loss: 0.7810 - acc_top1: 0.5071 - acc_top2: 0.7969 - 1s/step -step 910/12272 - loss: 0.6114 - acc_top1: 0.5090 - acc_top2: 0.7978 - 1s/step -step 920/12272 - loss: 0.7780 - acc_top1: 0.5105 - acc_top2: 0.7988 - 1s/step -step 930/12272 - loss: 0.9457 - acc_top1: 0.5116 - acc_top2: 0.7995 - 1s/step -step 940/12272 - loss: 0.7907 - acc_top1: 0.5135 - acc_top2: 0.8006 - 1s/step -step 950/12272 - loss: 0.5520 - acc_top1: 0.5153 - acc_top2: 0.8013 - 1s/step -step 960/12272 - loss: 0.8251 - acc_top1: 0.5168 - acc_top2: 0.8022 - 1s/step -step 970/12272 - loss: 0.8482 - acc_top1: 0.5179 - acc_top2: 0.8031 - 1s/step -step 980/12272 - loss: 0.8010 - acc_top1: 0.5196 - acc_top2: 0.8038 - 1s/step -step 990/12272 - loss: 0.8326 - acc_top1: 0.5207 - acc_top2: 0.8047 - 1s/step -step 1000/12272 - loss: 0.6979 - acc_top1: 0.5222 - acc_top2: 0.8057 - 1s/step -step 1010/12272 - loss: 0.7506 - acc_top1: 0.5234 - acc_top2: 0.8065 - 1s/step -step 1020/12272 - loss: 0.8457 - acc_top1: 0.5248 - acc_top2: 0.8073 - 1s/step -step 1030/12272 - loss: 0.8698 - acc_top1: 0.5263 - acc_top2: 0.8082 - 1s/step -step 1040/12272 - loss: 0.7016 - acc_top1: 0.5279 - acc_top2: 0.8091 - 1s/step -step 1050/12272 - loss: 0.7766 - acc_top1: 0.5290 - acc_top2: 0.8099 - 1s/step -step 1060/12272 - loss: 0.7994 - acc_top1: 0.5300 - acc_top2: 0.8105 - 1s/step -step 1070/12272 - loss: 0.7053 - acc_top1: 0.5317 - acc_top2: 0.8115 - 1s/step -step 1080/12272 - loss: 0.9085 - acc_top1: 0.5330 - acc_top2: 0.8125 - 1s/step -step 1090/12272 - loss: 0.7556 - acc_top1: 0.5342 - acc_top2: 0.8134 - 1s/step -step 1100/12272 - loss: 0.9364 - acc_top1: 0.5355 - acc_top2: 0.8141 - 1s/step -step 1110/12272 - loss: 0.9403 - acc_top1: 0.5367 - acc_top2: 0.8148 - 1s/step -step 1120/12272 - loss: 0.8228 - acc_top1: 0.5375 - acc_top2: 0.8152 - 1s/step -step 1130/12272 - loss: 0.6802 - acc_top1: 0.5388 - acc_top2: 0.8160 - 1s/step -step 1140/12272 - loss: 0.8222 - acc_top1: 0.5397 - acc_top2: 0.8167 - 1s/step -step 1150/12272 - loss: 0.9321 - acc_top1: 0.5407 - acc_top2: 0.8172 - 1s/step -step 1160/12272 - loss: 0.7478 - acc_top1: 0.5417 - acc_top2: 0.8181 - 1s/step -step 1170/12272 - loss: 0.7976 - acc_top1: 0.5430 - acc_top2: 0.8188 - 1s/step -step 1180/12272 - loss: 0.7386 - acc_top1: 0.5441 - acc_top2: 0.8192 - 1s/step -step 1190/12272 - loss: 0.6448 - acc_top1: 0.5450 - acc_top2: 0.8200 - 1s/step -step 1200/12272 - loss: 0.7441 - acc_top1: 0.5463 - acc_top2: 0.8206 - 1s/step -step 1210/12272 - loss: 0.8171 - acc_top1: 0.5476 - acc_top2: 0.8213 - 1s/step -step 1220/12272 - loss: 0.7480 - acc_top1: 0.5487 - acc_top2: 0.8219 - 1s/step -step 1230/12272 - loss: 0.6363 - acc_top1: 0.5497 - acc_top2: 0.8225 - 1s/step -step 1240/12272 - loss: 0.6630 - acc_top1: 0.5507 - acc_top2: 0.8231 - 1s/step -step 1250/12272 - loss: 0.8668 - acc_top1: 0.5517 - acc_top2: 0.8237 - 1s/step -step 1260/12272 - loss: 0.6057 - acc_top1: 0.5527 - acc_top2: 0.8243 - 1s/step -step 1270/12272 - loss: 0.8432 - acc_top1: 0.5538 - acc_top2: 0.8248 - 1s/step -step 1280/12272 - loss: 0.8447 - acc_top1: 0.5546 - acc_top2: 0.8253 - 1s/step -step 1290/12272 - loss: 0.6928 - acc_top1: 0.5556 - acc_top2: 0.8261 - 1s/step -step 1300/12272 - loss: 0.7872 - acc_top1: 0.5567 - acc_top2: 0.8266 - 1s/step -step 1310/12272 - loss: 0.7968 - acc_top1: 0.5570 - acc_top2: 0.8269 - 1s/step -step 1320/12272 - loss: 0.8059 - acc_top1: 0.5580 - acc_top2: 0.8275 - 1s/step -step 1330/12272 - loss: 0.8603 - acc_top1: 0.5587 - acc_top2: 0.8278 - 1s/step -step 1340/12272 - loss: 0.7872 - acc_top1: 0.5599 - acc_top2: 0.8285 - 1s/step -step 1350/12272 - loss: 0.7037 - acc_top1: 0.5609 - acc_top2: 0.8290 - 1s/step -step 1360/12272 - loss: 0.8268 - acc_top1: 0.5618 - acc_top2: 0.8297 - 1s/step -step 1370/12272 - loss: 0.5962 - acc_top1: 0.5627 - acc_top2: 0.8303 - 1s/step -step 1380/12272 - loss: 0.7712 - acc_top1: 0.5638 - acc_top2: 0.8310 - 1s/step -step 1390/12272 - loss: 0.5770 - acc_top1: 0.5650 - acc_top2: 0.8315 - 1s/step -step 1400/12272 - loss: 0.7174 - acc_top1: 0.5656 - acc_top2: 0.8319 - 1s/step -step 1410/12272 - loss: 0.6224 - acc_top1: 0.5660 - acc_top2: 0.8323 - 1s/step -step 1420/12272 - loss: 0.6782 - acc_top1: 0.5671 - acc_top2: 0.8328 - 1s/step -step 1430/12272 - loss: 0.4087 - acc_top1: 0.5682 - acc_top2: 0.8335 - 1s/step -step 1440/12272 - loss: 0.7534 - acc_top1: 0.5692 - acc_top2: 0.8342 - 1s/step -step 1450/12272 - loss: 0.6446 - acc_top1: 0.5702 - acc_top2: 0.8345 - 1s/step -step 1460/12272 - loss: 0.6606 - acc_top1: 0.5712 - acc_top2: 0.8351 - 1s/step -step 1470/12272 - loss: 0.7308 - acc_top1: 0.5723 - acc_top2: 0.8357 - 1s/step -step 1480/12272 - loss: 0.9016 - acc_top1: 0.5727 - acc_top2: 0.8359 - 1s/step -step 1490/12272 - loss: 0.8445 - acc_top1: 0.5730 - acc_top2: 0.8362 - 1s/step -step 1500/12272 - loss: 0.8217 - acc_top1: 0.5737 - acc_top2: 0.8367 - 1s/step -step 1510/12272 - loss: 0.8413 - acc_top1: 0.5747 - acc_top2: 0.8370 - 1s/step -step 1520/12272 - loss: 0.4643 - acc_top1: 0.5757 - acc_top2: 0.8376 - 1s/step -step 1530/12272 - loss: 0.9351 - acc_top1: 0.5764 - acc_top2: 0.8381 - 1s/step -step 1540/12272 - loss: 0.7856 - acc_top1: 0.5773 - acc_top2: 0.8386 - 1s/step -step 1550/12272 - loss: 0.5921 - acc_top1: 0.5780 - acc_top2: 0.8390 - 1s/step -step 1560/12272 - loss: 0.4460 - acc_top1: 0.5788 - acc_top2: 0.8395 - 1s/step -step 1570/12272 - loss: 0.6814 - acc_top1: 0.5793 - acc_top2: 0.8401 - 1s/step -step 1580/12272 - loss: 0.4115 - acc_top1: 0.5805 - acc_top2: 0.8407 - 1s/step -step 1590/12272 - loss: 0.9326 - acc_top1: 0.5810 - acc_top2: 0.8410 - 1s/step -step 1600/12272 - loss: 0.6989 - acc_top1: 0.5818 - acc_top2: 0.8413 - 1s/step -step 1610/12272 - loss: 0.5238 - acc_top1: 0.5826 - acc_top2: 0.8418 - 1s/step -step 1620/12272 - loss: 0.5827 - acc_top1: 0.5832 - acc_top2: 0.8422 - 1s/step -step 1630/12272 - loss: 0.7703 - acc_top1: 0.5838 - acc_top2: 0.8425 - 1s/step -step 1640/12272 - loss: 0.7926 - acc_top1: 0.5844 - acc_top2: 0.8428 - 1s/step -step 1650/12272 - loss: 0.7143 - acc_top1: 0.5851 - acc_top2: 0.8434 - 1s/step -step 1660/12272 - loss: 0.6240 - acc_top1: 0.5858 - acc_top2: 0.8438 - 1s/step -step 1670/12272 - loss: 0.7869 - acc_top1: 0.5862 - acc_top2: 0.8440 - 1s/step -step 1680/12272 - loss: 0.6485 - acc_top1: 0.5868 - acc_top2: 0.8444 - 1s/step -step 1690/12272 - loss: 0.7539 - acc_top1: 0.5876 - acc_top2: 0.8450 - 1s/step -step 1700/12272 - loss: 0.6173 - acc_top1: 0.5882 - acc_top2: 0.8454 - 1s/step -step 1710/12272 - loss: 0.8056 - acc_top1: 0.5890 - acc_top2: 0.8458 - 1s/step -step 1720/12272 - loss: 0.7035 - acc_top1: 0.5898 - acc_top2: 0.8463 - 1s/step -step 1730/12272 - loss: 0.5892 - acc_top1: 0.5908 - acc_top2: 0.8468 - 1s/step -step 1740/12272 - loss: 0.7755 - acc_top1: 0.5915 - acc_top2: 0.8472 - 1s/step -step 1750/12272 - loss: 0.6911 - acc_top1: 0.5920 - acc_top2: 0.8474 - 1s/step -step 1760/12272 - loss: 0.6309 - acc_top1: 0.5926 - acc_top2: 0.8477 - 1s/step -step 1770/12272 - loss: 0.7506 - acc_top1: 0.5932 - acc_top2: 0.8480 - 1s/step -step 1780/12272 - loss: 0.8711 - acc_top1: 0.5939 - acc_top2: 0.8482 - 1s/step -step 1790/12272 - loss: 0.9146 - acc_top1: 0.5945 - acc_top2: 0.8484 - 1s/step -step 1800/12272 - loss: 0.6208 - acc_top1: 0.5952 - acc_top2: 0.8487 - 1s/step -step 1810/12272 - loss: 0.8506 - acc_top1: 0.5959 - acc_top2: 0.8490 - 1s/step -step 1820/12272 - loss: 0.8330 - acc_top1: 0.5965 - acc_top2: 0.8494 - 1s/step -step 1830/12272 - loss: 0.8315 - acc_top1: 0.5970 - acc_top2: 0.8497 - 1s/step -step 1840/12272 - loss: 0.6227 - acc_top1: 0.5977 - acc_top2: 0.8501 - 1s/step -step 1850/12272 - loss: 0.5972 - acc_top1: 0.5985 - acc_top2: 0.8506 - 1s/step -step 1860/12272 - loss: 0.6309 - acc_top1: 0.5992 - acc_top2: 0.8510 - 1s/step -step 1870/12272 - loss: 0.8707 - acc_top1: 0.5995 - acc_top2: 0.8512 - 1s/step -step 1880/12272 - loss: 0.6419 - acc_top1: 0.6004 - acc_top2: 0.8516 - 1s/step -step 1890/12272 - loss: 0.6015 - acc_top1: 0.6010 - acc_top2: 0.8521 - 1s/step -step 1900/12272 - loss: 0.6000 - acc_top1: 0.6015 - acc_top2: 0.8524 - 1s/step -step 1910/12272 - loss: 0.7010 - acc_top1: 0.6020 - acc_top2: 0.8527 - 1s/step -step 1920/12272 - loss: 0.8539 - acc_top1: 0.6026 - acc_top2: 0.8530 - 1s/step -step 1930/12272 - loss: 0.8381 - acc_top1: 0.6031 - acc_top2: 0.8533 - 1s/step -step 1940/12272 - loss: 0.5921 - acc_top1: 0.6039 - acc_top2: 0.8537 - 1s/step -step 1950/12272 - loss: 0.4974 - acc_top1: 0.6047 - acc_top2: 0.8541 - 1s/step -step 1960/12272 - loss: 0.8269 - acc_top1: 0.6052 - acc_top2: 0.8544 - 1s/step -step 1970/12272 - loss: 0.6157 - acc_top1: 0.6058 - acc_top2: 0.8548 - 1s/step -step 1980/12272 - loss: 1.0949 - acc_top1: 0.6064 - acc_top2: 0.8552 - 1s/step -step 1990/12272 - loss: 0.6442 - acc_top1: 0.6070 - acc_top2: 0.8555 - 1s/step -step 2000/12272 - loss: 0.8747 - acc_top1: 0.6073 - acc_top2: 0.8558 - 1s/step -step 2010/12272 - loss: 0.8101 - acc_top1: 0.6078 - acc_top2: 0.8560 - 1s/step -step 2020/12272 - loss: 0.8623 - acc_top1: 0.6082 - acc_top2: 0.8562 - 1s/step -step 2030/12272 - loss: 0.6664 - acc_top1: 0.6089 - acc_top2: 0.8567 - 1s/step -step 2040/12272 - loss: 0.7616 - acc_top1: 0.6092 - acc_top2: 0.8567 - 1s/step -step 2050/12272 - loss: 0.7282 - acc_top1: 0.6095 - acc_top2: 0.8570 - 1s/step -step 2060/12272 - loss: 0.6914 - acc_top1: 0.6099 - acc_top2: 0.8574 - 1s/step -step 2070/12272 - loss: 0.6129 - acc_top1: 0.6105 - acc_top2: 0.8577 - 1s/step -step 2080/12272 - loss: 0.5605 - acc_top1: 0.6111 - acc_top2: 0.8580 - 1s/step -step 2090/12272 - loss: 0.6432 - acc_top1: 0.6116 - acc_top2: 0.8582 - 1s/step -step 2100/12272 - loss: 0.6783 - acc_top1: 0.6121 - acc_top2: 0.8586 - 1s/step -step 2110/12272 - loss: 0.5949 - acc_top1: 0.6128 - acc_top2: 0.8589 - 1s/step -step 2120/12272 - loss: 0.7832 - acc_top1: 0.6134 - acc_top2: 0.8592 - 1s/step -step 2130/12272 - loss: 0.6633 - acc_top1: 0.6139 - acc_top2: 0.8594 - 1s/step -step 2140/12272 - loss: 0.8456 - acc_top1: 0.6143 - acc_top2: 0.8596 - 1s/step -step 2150/12272 - loss: 0.7133 - acc_top1: 0.6150 - acc_top2: 0.8599 - 1s/step -step 2160/12272 - loss: 0.4699 - acc_top1: 0.6155 - acc_top2: 0.8602 - 1s/step -step 2170/12272 - loss: 0.6013 - acc_top1: 0.6161 - acc_top2: 0.8605 - 1s/step -step 2180/12272 - loss: 0.5676 - acc_top1: 0.6165 - acc_top2: 0.8608 - 1s/step -step 2190/12272 - loss: 0.5850 - acc_top1: 0.6172 - acc_top2: 0.8611 - 1s/step -step 2200/12272 - loss: 0.6887 - acc_top1: 0.6177 - acc_top2: 0.8612 - 1s/step -step 2210/12272 - loss: 0.5706 - acc_top1: 0.6180 - acc_top2: 0.8614 - 1s/step -step 2220/12272 - loss: 0.8251 - acc_top1: 0.6184 - acc_top2: 0.8617 - 1s/step -step 2230/12272 - loss: 0.6532 - acc_top1: 0.6188 - acc_top2: 0.8620 - 1s/step -step 2240/12272 - loss: 0.5888 - acc_top1: 0.6194 - acc_top2: 0.8623 - 1s/step -step 2250/12272 - loss: 0.6360 - acc_top1: 0.6198 - acc_top2: 0.8625 - 1s/step -step 2260/12272 - loss: 1.0555 - acc_top1: 0.6202 - acc_top2: 0.8628 - 1s/step -step 2270/12272 - loss: 0.4848 - acc_top1: 0.6207 - acc_top2: 0.8629 - 1s/step -step 2280/12272 - loss: 0.7243 - acc_top1: 0.6212 - acc_top2: 0.8632 - 1s/step -step 2290/12272 - loss: 0.4358 - acc_top1: 0.6216 - acc_top2: 0.8635 - 1s/step -step 2300/12272 - loss: 0.5473 - acc_top1: 0.6221 - acc_top2: 0.8637 - 1s/step -step 2310/12272 - loss: 0.6440 - acc_top1: 0.6226 - acc_top2: 0.8640 - 1s/step -step 2320/12272 - loss: 0.5785 - acc_top1: 0.6233 - acc_top2: 0.8643 - 1s/step -step 2330/12272 - loss: 0.7199 - acc_top1: 0.6237 - acc_top2: 0.8646 - 1s/step -step 2340/12272 - loss: 0.5622 - acc_top1: 0.6241 - acc_top2: 0.8647 - 1s/step -step 2350/12272 - loss: 0.6742 - acc_top1: 0.6245 - acc_top2: 0.8650 - 1s/step -step 2360/12272 - loss: 0.8149 - acc_top1: 0.6249 - acc_top2: 0.8652 - 1s/step -step 2370/12272 - loss: 0.5900 - acc_top1: 0.6253 - acc_top2: 0.8654 - 1s/step -step 2380/12272 - loss: 0.8046 - acc_top1: 0.6256 - acc_top2: 0.8656 - 1s/step -step 2390/12272 - loss: 0.6097 - acc_top1: 0.6262 - acc_top2: 0.8659 - 1s/step -step 2400/12272 - loss: 0.5936 - acc_top1: 0.6266 - acc_top2: 0.8660 - 1s/step -step 2410/12272 - loss: 0.7245 - acc_top1: 0.6270 - acc_top2: 0.8662 - 1s/step -step 2420/12272 - loss: 0.6349 - acc_top1: 0.6274 - acc_top2: 0.8665 - 1s/step -step 2430/12272 - loss: 0.7009 - acc_top1: 0.6278 - acc_top2: 0.8668 - 1s/step -step 2440/12272 - loss: 0.3881 - acc_top1: 0.6282 - acc_top2: 0.8670 - 1s/step -step 2450/12272 - loss: 0.5226 - acc_top1: 0.6286 - acc_top2: 0.8673 - 1s/step -step 2460/12272 - loss: 0.5748 - acc_top1: 0.6292 - acc_top2: 0.8675 - 1s/step -step 2470/12272 - loss: 0.4798 - acc_top1: 0.6297 - acc_top2: 0.8678 - 1s/step -step 2480/12272 - loss: 0.5857 - acc_top1: 0.6303 - acc_top2: 0.8680 - 1s/step -step 2490/12272 - loss: 0.6729 - acc_top1: 0.6308 - acc_top2: 0.8683 - 1s/step -step 2500/12272 - loss: 0.6392 - acc_top1: 0.6312 - acc_top2: 0.8686 - 1s/step -step 2510/12272 - loss: 0.9607 - acc_top1: 0.6315 - acc_top2: 0.8687 - 1s/step -step 2520/12272 - loss: 0.6036 - acc_top1: 0.6319 - acc_top2: 0.8690 - 1s/step -step 2530/12272 - loss: 0.6505 - acc_top1: 0.6324 - acc_top2: 0.8693 - 1s/step -step 2540/12272 - loss: 0.4558 - acc_top1: 0.6329 - acc_top2: 0.8696 - 1s/step -step 2550/12272 - loss: 0.4215 - acc_top1: 0.6333 - acc_top2: 0.8699 - 1s/step -step 2560/12272 - loss: 0.6908 - acc_top1: 0.6338 - acc_top2: 0.8701 - 1s/step -step 2570/12272 - loss: 0.5833 - acc_top1: 0.6342 - acc_top2: 0.8703 - 1s/step -step 2580/12272 - loss: 0.8548 - acc_top1: 0.6346 - acc_top2: 0.8706 - 1s/step -step 2590/12272 - loss: 0.5770 - acc_top1: 0.6351 - acc_top2: 0.8708 - 1s/step -step 2600/12272 - loss: 0.4476 - acc_top1: 0.6355 - acc_top2: 0.8711 - 1s/step -step 2610/12272 - loss: 0.4145 - acc_top1: 0.6360 - acc_top2: 0.8714 - 1s/step -step 2620/12272 - loss: 0.6625 - acc_top1: 0.6365 - acc_top2: 0.8717 - 1s/step -step 2630/12272 - loss: 0.4808 - acc_top1: 0.6369 - acc_top2: 0.8719 - 1s/step diff --git a/examples/bert_leveldb/run_classifier_multi_gpu.sh b/examples/bert_leveldb/run_classifier_multi_gpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..1b7d6aea60c385e32bafbcfd35ae420f1e5824a6 --- /dev/null +++ b/examples/bert_leveldb/run_classifier_multi_gpu.sh @@ -0,0 +1,27 @@ +#!/bin/bash +BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/" +TASK_NAME='MNLI' +DATA_PATH="./data/glue_data/MNLI/" +CKPT_PATH="./data/saved_model/mnli_models" + +# start fine-tuning +python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\ + --use_cuda true \ + --do_train true \ + --do_test true \ + --batch_size 64 \ + --init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \ + --data_dir ${DATA_PATH} \ + --vocab_path ${BERT_BASE_PATH}/vocab.txt \ + --checkpoints ${CKPT_PATH} \ + --save_steps 1000 \ + --weight_decay 0.01 \ + --warmup_proportion 0.1 \ + --validation_steps 100 \ + --epoch 3 \ + --max_seq_len 128 \ + --bert_config_path ${BERT_BASE_PATH}/bert_config.json \ + --learning_rate 5e-5 \ + --skip_steps 10 \ + --shuffle true + diff --git a/examples/bert_leveldb/run_classifier_single_gpu.sh b/examples/bert_leveldb/run_classifier_single_gpu.sh index 6168975510ca601258a962df1dcfbe7c7f786c9b..5b52aafd0a63dfb250c7ab7dcefc09b60f406ac2 100755 --- a/examples/bert_leveldb/run_classifier_single_gpu.sh +++ b/examples/bert_leveldb/run_classifier_single_gpu.sh @@ -4,7 +4,7 @@ TASK_NAME='MNLI' DATA_PATH="./data/glue_data/MNLI/" CKPT_PATH="./data/saved_model/mnli_models" -export CUDA_VISIBLE_DEVICES=7 +export CUDA_VISIBLE_DEVICES=0 # start fine-tuning python3.7 bert_classifier.py\ diff --git a/examples/bmn/bmn_metric.py b/examples/bmn/bmn_metric.py index 22adab5644975d9762e80ad320dbbb17318654a9..cbcad9a1e15d5356127c748194ac907bcfda5967 100644 --- a/examples/bmn/bmn_metric.py +++ b/examples/bmn/bmn_metric.py @@ -47,10 +47,15 @@ class BmnMetric(Metric): if not os.path.isdir(self.cfg.INFER.result_path): os.makedirs(self.cfg.INFER.result_path) - def add_metric_op(self, preds, label): - pred_bm, pred_start, pred_en = preds - video_index = label[-1] - return [pred_bm, pred_start, pred_en, video_index] #return list + def add_metric_op(self, *args): + if self.mode == 'test': + # only extract pred_bm, pred_start, pred_en from outputs + # and video_index from label here + pred_bm, pred_start, pred_en, _, _, _, video_index = args + else: + # in infer mode, labels only contains video_index + pred_bm, pred_start, pred_en, video_index = args + return pred_bm, pred_start, pred_en, video_index def update(self, pred_bm, pred_start, pred_end, fid): # generate proposals diff --git a/examples/bmn/eval.py b/examples/bmn/eval.py index 071f5d13401a9218ee2749a67edb70148ec973a4..2b129d146b4b8d46dc293f23c8a1bcb458da8fe4 100644 --- a/examples/bmn/eval.py +++ b/examples/bmn/eval.py @@ -19,7 +19,8 @@ import logging import paddle.fluid as fluid from hapi.model import set_device, Input -from hapi.vision.models import bmn, BmnLoss + +from modeling import bmn, BmnLoss from bmn_metric import BmnMetric from reader import BmnDataset from config_utils import * diff --git a/hapi/vision/models/bmn_model.py b/examples/bmn/modeling.py similarity index 99% rename from hapi/vision/models/bmn_model.py rename to examples/bmn/modeling.py index e15555debd2cf0a07299d414310487b1c8bfa905..f0fa26e1a687fc1d524870a03be470edf280fc9c 100644 --- a/hapi/vision/models/bmn_model.py +++ b/examples/bmn/modeling.py @@ -26,7 +26,7 @@ DATATYPE = 'float32' pretrain_infos = { 'bmn': ('https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams', - '9286c821acc4cad46d6613b931ba468c') + 'aa84e3386e1fbd117fb96fa572feeb94') } @@ -462,5 +462,5 @@ def bmn(tscale, weight_path = get_weights_path(*(pretrain_infos['bmn'])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" - model.load(weight_path[:-9]) + model.load(weight_path) return model diff --git a/examples/bmn/predict.py b/examples/bmn/predict.py index fb1b10452fb34a3380d3e027df5383d2a85fb0c0..6e0ae99d189fa5812765dd31f0461499acbe6fcc 100644 --- a/examples/bmn/predict.py +++ b/examples/bmn/predict.py @@ -19,7 +19,8 @@ import logging import paddle.fluid as fluid from hapi.model import set_device, Input -from hapi.vision.models import bmn, BmnLoss + +from modeling import bmn, BmnLoss from bmn_metric import BmnMetric from reader import BmnDataset from config_utils import * diff --git a/examples/bmn/train.py b/examples/bmn/train.py index 178085ae7eb1275d43df65e3ce0d96b37a25c882..c6bcdef549e943774f11313bd58db0cde6c0e0aa 100644 --- a/examples/bmn/train.py +++ b/examples/bmn/train.py @@ -19,9 +19,10 @@ import sys import os from hapi.model import set_device, Input -from hapi.vision.models import bmn, BmnLoss + from reader import BmnDataset from config_utils import * +from modeling import bmn, BmnLoss DATATYPE = 'float32' diff --git a/examples/ocr/README.md b/examples/ocr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d3d592d195702e196c1e525da38424b47274a18b --- /dev/null +++ b/examples/ocr/README.md @@ -0,0 +1,76 @@ +简介 +-------- +本OCR任务是识别图片单行的字母信息,基于attention的seq2seq结构。 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。 + +## 代码结构 +``` +. +|-- data.py # 数据读取 +|-- eval.py # 评估脚本 +|-- images # 测试图片 +|-- predict.py # 预测脚本 +|-- seq2seq_attn.py # 模型 +|-- train.py # 训练脚本 +`-- utility.py # 公共模块 +``` + +## 训练/评估/预测流程 + +- 设置GPU环境: + +``` +export CUDA_VISIBLE_DEVICES=0 +``` + +- 训练 + +``` +python train.py +``` + +更多参数可以通过`--help`查看。 + + +- 动静切换 + + +``` +python train.py --dynamic=True +``` + + +- 评估 + +``` +python eval.py --init_model=checkpoint/final +``` + + +- 预测 + +目前不支持动态图预测 + +``` +python predict.py --init_model=checkpoint/final --image_path=images/ --dynamic=False --beam_size=3 +``` + +预测结果如下: + +``` +Image 1: images/112_chubbiness_13557.jpg +0: chubbines +1: chubbiness +2: chubbinesS +Image 2: images/177_Interfiled_40185.jpg +0: Interflied +1: Interfiled +2: InterfIled +Image 3: images/325_dame_19109.jpg +0: da +1: damo +2: dame +Image 4: images/368_fixtures_29232.jpg +0: firtures +1: Firtures +2: fixtures +``` diff --git a/examples/ocr/data.py b/examples/ocr/data.py new file mode 100644 index 0000000000000000000000000000000000000000..23e676e2625d3c75be9bec9b00777a11c38e0e6e --- /dev/null +++ b/examples/ocr/data.py @@ -0,0 +1,234 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from os import path +import random +import traceback +import copy +import math +import tarfile +from PIL import Image + +import logging +logger = logging.getLogger(__name__) + +import paddle +from paddle import fluid +from paddle.fluid.dygraph.parallel import ParallelEnv + +DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5" +DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz" +CACHE_DIR_NAME = "attention_data" +SAVED_FILE_NAME = "data.tar.gz" +DATA_DIR_NAME = "data" +TRAIN_DATA_DIR_NAME = "train_images" +TEST_DATA_DIR_NAME = "test_images" +TRAIN_LIST_FILE_NAME = "train.list" +TEST_LIST_FILE_NAME = "test.list" + + +class Resize(object): + def __init__(self, height=48): + self.interp = Image.NEAREST # Image.ANTIALIAS + self.height = height + + def __call__(self, samples): + shape = samples[0][0].size + for i in range(len(samples)): + im = samples[i][0] + im = im.resize((shape[0], self.height), self.interp) + samples[i][0] = im + return samples + + +class Normalize(object): + def __init__(self, + mean=[127.5], + std=[1.0], + scale=False, + channel_first=True): + self.mean = mean + self.std = std + self.scale = scale + self.channel_first = channel_first + if not (isinstance(self.mean, list) and isinstance(self.std, list) and + isinstance(self.scale, bool)): + raise TypeError("{}: input type is invalid.".format(self)) + + def __call__(self, samples): + for i in range(len(samples)): + im = samples[i][0] + im = np.array(im).astype(np.float32, copy=False) + im = im[np.newaxis, ...] + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + if self.scale: + im = im / 255.0 + #im -= mean + im -= 127.5 + #im /= std + samples[i][0] = im + return samples + + +class PadTarget(object): + def __init__(self, SOS=0, EOS=1): + self.SOS = SOS + self.EOS = EOS + + def __call__(self, samples): + lens = np.array([len(s[1]) for s in samples], dtype="int64") + max_len = np.max(lens) + for i in range(len(samples)): + label = samples[i][1] + if max_len > len(label): + pad_label = label + [self.EOS] * (max_len - len(label)) + else: + pad_label = label + samples[i][1] = np.array([self.SOS] + pad_label, dtype='int64') + # label_out + samples[i].append(np.array(pad_label + [self.EOS], dtype='int64')) + mask = np.zeros((max_len + 1)).astype('float32') + mask[:len(label) + 1] = 1.0 + # mask + samples[i].append(np.array(mask, dtype='float32')) + return samples + + +class BatchSampler(fluid.io.BatchSampler): + def __init__(self, + dataset, + batch_size, + shuffle=False, + drop_last=True, + seed=None): + self._dataset = dataset + self._batch_size = batch_size + self._shuffle = shuffle + self._drop_last = drop_last + self._random = np.random + self._random.seed(seed) + self._nranks = ParallelEnv().nranks + self._local_rank = ParallelEnv().local_rank + self._device_id = ParallelEnv().dev_id + self._num_samples = int( + math.ceil(len(self._dataset) * 1.0 / self._nranks)) + self._total_size = self._num_samples * self._nranks + self._epoch = 0 + + def __iter__(self): + infos = copy.copy(self._dataset._sample_infos) + skip_num = 0 + if self._shuffle: + if self._batch_size == 1: + self._random.RandomState(self._epoch).shuffle(infos) + else: # partial shuffle + infos = sorted(infos, key=lambda x: x.w) + skip_num = random.randint(1, 100) + + infos = infos[skip_num:] + infos[:skip_num] + infos += infos[:(self._total_size - len(infos))] + last_size = self._total_size % (self._batch_size * self._nranks) + batches = [] + for i in range(self._local_rank * self._batch_size, + len(infos) - last_size, + self._batch_size * self._nranks): + batches.append(infos[i:i + self._batch_size]) + + if (not self._drop_last) and last_size != 0: + last_local_size = last_size // self._nranks + last_infos = infos[len(infos) - last_size:] + start = self._local_rank * last_local_size + batches.append(last_infos[start:start + last_local_size]) + + if self._shuffle: + self._random.RandomState(self._epoch).shuffle(batches) + self._epoch += 1 + + for batch in batches: + batch_indices = [info.idx for info in batch] + yield batch_indices + + def __len__(self): + if self._drop_last: + return self._total_size // self._batch_size + else: + return math.ceil(self._total_size / float(self._batch_size)) + + +class SampleInfo(object): + def __init__(self, idx, h, w, im_name, labels): + self.idx = idx + self.h = h + self.w = w + self.im_name = im_name + self.labels = labels + + +class OCRDataset(paddle.io.Dataset): + def __init__(self, image_dir, anno_file): + self.image_dir = image_dir + self.anno_file = anno_file + self._sample_infos = [] + with open(anno_file, 'r') as f: + for i, line in enumerate(f): + w, h, im_name, labels = line.strip().split(' ') + h, w = int(h), int(w) + labels = [int(c) for c in labels.split(',')] + self._sample_infos.append(SampleInfo(i, h, w, im_name, labels)) + + def __getitem__(self, idx): + info = self._sample_infos[idx] + im_name, labels = info.im_name, info.labels + image = Image.open(path.join(self.image_dir, im_name)).convert('L') + return [image, labels] + + def __len__(self): + return len(self._sample_infos) + + +def train( + root_dir=None, + images_dir=None, + anno_file=None, + shuffle=True, ): + if root_dir is None: + root_dir = download_data() + if images_dir is None: + images_dir = TRAIN_DATA_DIR_NAME + images_dir = path.join(root_dir, TRAIN_DATA_DIR_NAME) + if anno_file is None: + anno_file = TRAIN_LIST_FILE_NAME + anno_file = path.join(root_dir, TRAIN_LIST_FILE_NAME) + return OCRDataset(images_dir, anno_file) + + +def test( + root_dir=None, + images_dir=None, + anno_file=None, + shuffle=True, ): + if root_dir is None: + root_dir = download_data() + if images_dir is None: + images_dir = TEST_DATA_DIR_NAME + images_dir = path.join(root_dir, TEST_DATA_DIR_NAME) + if anno_file is None: + anno_file = TEST_LIST_FILE_NAME + anno_file = path.join(root_dir, TEST_LIST_FILE_NAME) + return OCRDataset(images_dir, anno_file) + + +def download_data(): + '''Download train and test data. + ''' + tar_file = paddle.dataset.common.download( + DATA_URL, CACHE_DIR_NAME, DATA_MD5, save_name=SAVED_FILE_NAME) + data_dir = path.join(path.dirname(tar_file), DATA_DIR_NAME) + if not path.isdir(data_dir): + t = tarfile.open(tar_file, "r:gz") + t.extractall(path=path.dirname(tar_file)) + t.close() + return data_dir diff --git a/examples/ocr/eval.py b/examples/ocr/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..1adffa5401679ab0d49cc586c0238ce1c01fa1b8 --- /dev/null +++ b/examples/ocr/eval.py @@ -0,0 +1,152 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import argparse +import functools + +import paddle.fluid.profiler as profiler +import paddle.fluid as fluid + +from hapi.model import Input, set_device +from hapi.vision.transforms import BatchCompose + +from utility import add_arguments, print_arguments +from utility import SeqAccuracy, LoggerCallBack, SeqBeamAccuracy +from utility import postprocess +from seq2seq_attn import Seq2SeqAttModel, Seq2SeqAttInferModel, WeightCrossEntropy +import data + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 32, "Minibatch size.") +add_arg('test_images', str, None, "The directory of images to be used for test.") +add_arg('test_list', str, None, "The list file of images to be used for training.") +add_arg('init_model', str, 'checkpoint/final', "The init model file of directory.") +add_arg('use_gpu', bool, True, "Whether use GPU to train.") +add_arg('encoder_size', int, 200, "Encoder size.") +add_arg('decoder_size', int, 128, "Decoder size.") +add_arg('embedding_dim', int, 128, "Word vector dim.") +add_arg('num_classes', int, 95, "Number classes.") +add_arg('beam_size', int, 0, "If set beam size, will use beam search.") +add_arg('dynamic', bool, False, "Whether to use dygraph.") +# yapf: enable + + +def main(FLAGS): + device = set_device("gpu" if FLAGS.use_gpu else "cpu") + fluid.enable_dygraph(device) if FLAGS.dynamic else None + model = Seq2SeqAttModel( + encoder_size=FLAGS.encoder_size, + decoder_size=FLAGS.decoder_size, + emb_dim=FLAGS.embedding_dim, + num_classes=FLAGS.num_classes) + + # yapf: disable + inputs = [ + Input([None, 1, 48, 384], "float32", name="pixel"), + Input([None, None], "int64", name="label_in") + ] + labels = [ + Input([None, None], "int64", name="label_out"), + Input([None, None], "float32", name="mask") + ] + # yapf: enable + + model.prepare( + loss_function=WeightCrossEntropy(), + metrics=SeqAccuracy(), + inputs=inputs, + labels=labels, + device=device) + model.load(FLAGS.init_model) + + test_dataset = data.test() + test_collate_fn = BatchCompose( + [data.Resize(), data.Normalize(), data.PadTarget()]) + test_sampler = data.BatchSampler( + test_dataset, + batch_size=FLAGS.batch_size, + drop_last=False, + shuffle=False) + test_loader = fluid.io.DataLoader( + test_dataset, + batch_sampler=test_sampler, + places=device, + num_workers=0, + return_list=True, + collate_fn=test_collate_fn) + + model.evaluate( + eval_data=test_loader, + callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)]) + + +def beam_search(FLAGS): + device = set_device("gpu" if FLAGS.use_gpu else "cpu") + fluid.enable_dygraph(device) if FLAGS.dynamic else None + model = Seq2SeqAttInferModel( + encoder_size=FLAGS.encoder_size, + decoder_size=FLAGS.decoder_size, + emb_dim=FLAGS.embedding_dim, + num_classes=FLAGS.num_classes, + beam_size=FLAGS.beam_size) + + inputs = [ + Input( + [None, 1, 48, 384], "float32", name="pixel"), Input( + [None, None], "int64", name="label_in") + ] + labels = [ + Input( + [None, None], "int64", name="label_out"), Input( + [None, None], "float32", name="mask") + ] + model.prepare( + loss_function=None, + metrics=SeqBeamAccuracy(), + inputs=inputs, + labels=labels, + device=device) + model.load(FLAGS.init_model) + + test_dataset = data.test() + test_collate_fn = BatchCompose( + [data.Resize(), data.Normalize(), data.PadTarget()]) + test_sampler = data.BatchSampler( + test_dataset, + batch_size=FLAGS.batch_size, + drop_last=False, + shuffle=False) + test_loader = fluid.io.DataLoader( + test_dataset, + batch_sampler=test_sampler, + places=device, + num_workers=0, + return_list=True, + collate_fn=test_collate_fn) + + model.evaluate( + eval_data=test_loader, + callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)]) + + +if __name__ == '__main__': + FLAGS = parser.parse_args() + print_arguments(FLAGS) + if FLAGS.beam_size: + beam_search(FLAGS) + else: + main(FLAGS) diff --git a/examples/ocr/images/112_chubbiness_13557.jpg b/examples/ocr/images/112_chubbiness_13557.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4474a0db2b40a618ecb5401022958651e9aa0543 Binary files /dev/null and b/examples/ocr/images/112_chubbiness_13557.jpg differ diff --git a/examples/ocr/images/177_Interfiled_40185.jpg b/examples/ocr/images/177_Interfiled_40185.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c110e3d8ef85bc917c5574feaa8e9bb8a65d80c9 Binary files /dev/null and b/examples/ocr/images/177_Interfiled_40185.jpg differ diff --git a/examples/ocr/images/325_dame_19109.jpg b/examples/ocr/images/325_dame_19109.jpg new file mode 100644 index 0000000000000000000000000000000000000000..12554431319a03fedd33a51da806414b56e2119e Binary files /dev/null and b/examples/ocr/images/325_dame_19109.jpg differ diff --git a/examples/ocr/images/368_fixtures_29232.jpg b/examples/ocr/images/368_fixtures_29232.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7566131c8f1e222be21a4f9dd6f9321705dea617 Binary files /dev/null and b/examples/ocr/images/368_fixtures_29232.jpg differ diff --git a/examples/ocr/predict.py b/examples/ocr/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..242d4f80b9bbdbade61b0cc086196482ffa588e9 --- /dev/null +++ b/examples/ocr/predict.py @@ -0,0 +1,101 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import os +import sys +import random +import numpy as np + +import argparse +import functools +from PIL import Image + +import paddle.fluid.profiler as profiler +import paddle.fluid as fluid + +from hapi.model import Input, set_device +from hapi.datasets.folder import ImageFolder +from hapi.vision.transforms import BatchCompose + +from utility import add_arguments, print_arguments +from utility import postprocess, index2word +from seq2seq_attn import Seq2SeqAttInferModel, WeightCrossEntropy +import data + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 1, "Minibatch size.") +add_arg('image_path', str, None, "The directory of images to be used for test.") +add_arg('init_model', str, None, "The init model file of directory.") +add_arg('use_gpu', bool, True, "Whether use GPU to train.") +# model hyper paramters +add_arg('encoder_size', int, 200, "Encoder size.") +add_arg('decoder_size', int, 128, "Decoder size.") +add_arg('embedding_dim', int, 128, "Word vector dim.") +add_arg('num_classes', int, 95, "Number classes.") +add_arg('beam_size', int, 3, "Beam size for beam search.") +add_arg('dynamic', bool, False, "Whether to use dygraph.") +# yapf: enable + + +def main(FLAGS): + device = set_device("gpu" if FLAGS.use_gpu else "cpu") + fluid.enable_dygraph(device) if FLAGS.dynamic else None + model = Seq2SeqAttInferModel( + encoder_size=FLAGS.encoder_size, + decoder_size=FLAGS.decoder_size, + emb_dim=FLAGS.embedding_dim, + num_classes=FLAGS.num_classes, + beam_size=FLAGS.beam_size) + + inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ] + + model.prepare(inputs=inputs, device=device) + model.load(FLAGS.init_model) + + fn = lambda p: Image.open(p).convert('L') + test_dataset = ImageFolder(FLAGS.image_path, loader=fn) + test_collate_fn = BatchCompose([data.Resize(), data.Normalize()]) + test_loader = fluid.io.DataLoader( + test_dataset, + places=device, + num_workers=0, + return_list=True, + collate_fn=test_collate_fn) + + samples = test_dataset.samples + #outputs = model.predict(test_loader) + ins_id = 0 + for image, in test_loader: + image = image if FLAGS.dynamic else image[0] + pred = model.test_batch([image])[0] + pred = pred[:, :, np.newaxis] if len(pred.shape) == 2 else pred + pred = np.transpose(pred, [0, 2, 1]) + for ins in pred: + impath = samples[ins_id] + ins_id += 1 + print('Image {}: {}'.format(ins_id, impath)) + for beam_idx, beam in enumerate(ins): + id_list = postprocess(beam) + word_list = index2word(id_list) + sequence = "".join(word_list) + print('{}: {}'.format(beam_idx, sequence)) + + +if __name__ == '__main__': + FLAGS = parser.parse_args() + print_arguments(FLAGS) + main(FLAGS) diff --git a/examples/ocr/seq2seq_attn.py b/examples/ocr/seq2seq_attn.py new file mode 100644 index 0000000000000000000000000000000000000000..675e4e4ab0b30874dffd1b0bbc84b7c54c42354b --- /dev/null +++ b/examples/ocr/seq2seq_attn.py @@ -0,0 +1,333 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.layers as layers +from paddle.fluid.layers import BeamSearchDecoder + +from hapi.text import RNNCell, RNN, DynamicDecode +from hapi.model import Model, Loss + + +class ConvBNPool(fluid.dygraph.Layer): + def __init__(self, + in_ch, + out_ch, + act="relu", + is_test=False, + pool=True, + use_cudnn=True): + super(ConvBNPool, self).__init__() + self.pool = pool + + filter_size = 3 + std = (2.0 / (filter_size**2 * in_ch))**0.5 + param_0 = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, std)) + + std = (2.0 / (filter_size**2 * out_ch))**0.5 + param_1 = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, std)) + + self.conv0 = fluid.dygraph.Conv2D( + in_ch, + out_ch, + 3, + padding=1, + param_attr=param_0, + bias_attr=False, + act=None, + use_cudnn=use_cudnn) + self.bn0 = fluid.dygraph.BatchNorm(out_ch, act=act) + self.conv1 = fluid.dygraph.Conv2D( + out_ch, + out_ch, + filter_size=3, + padding=1, + param_attr=param_1, + bias_attr=False, + act=None, + use_cudnn=use_cudnn) + self.bn1 = fluid.dygraph.BatchNorm(out_ch, act=act) + + if self.pool: + self.pool = fluid.dygraph.Pool2D( + pool_size=2, + pool_type='max', + pool_stride=2, + use_cudnn=use_cudnn, + ceil_mode=True) + + def forward(self, inputs): + out = self.conv0(inputs) + out = self.bn0(out) + out = self.conv1(out) + out = self.bn1(out) + if self.pool: + out = self.pool(out) + return out + + +class CNN(fluid.dygraph.Layer): + def __init__(self, in_ch=1, is_test=False): + super(CNN, self).__init__() + self.conv_bn1 = ConvBNPool(in_ch, 16) + self.conv_bn2 = ConvBNPool(16, 32) + self.conv_bn3 = ConvBNPool(32, 64) + self.conv_bn4 = ConvBNPool(64, 128, pool=False) + + def forward(self, inputs): + conv = self.conv_bn1(inputs) + conv = self.conv_bn2(conv) + conv = self.conv_bn3(conv) + conv = self.conv_bn4(conv) + return conv + + +class GRUCell(RNNCell): + def __init__(self, + input_size, + hidden_size, + param_attr=None, + bias_attr=None, + gate_activation='sigmoid', + candidate_activation='tanh', + origin_mode=False): + super(GRUCell, self).__init__() + self.hidden_size = hidden_size + self.fc_layer = fluid.dygraph.Linear( + input_size, + hidden_size * 3, + param_attr=param_attr, + bias_attr=False) + + self.gru_unit = fluid.dygraph.GRUUnit( + hidden_size * 3, + param_attr=param_attr, + bias_attr=bias_attr, + activation=candidate_activation, + gate_activation=gate_activation, + origin_mode=origin_mode) + + def forward(self, inputs, states): + # step_outputs, new_states = cell(step_inputs, states) + # for GRUCell, `step_outputs` and `new_states` both are hidden + x = self.fc_layer(inputs) + hidden, _, _ = self.gru_unit(x, states) + return hidden, hidden + + @property + def state_shape(self): + return [self.hidden_size] + + +class Encoder(fluid.dygraph.Layer): + def __init__( + self, + in_channel=1, + rnn_hidden_size=200, + decoder_size=128, + is_test=False, ): + super(Encoder, self).__init__() + self.rnn_hidden_size = rnn_hidden_size + + self.backbone = CNN(in_ch=in_channel, is_test=is_test) + + para_attr = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, 0.02)) + bias_attr = fluid.ParamAttr( + initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0) + self.gru_fwd = RNN(cell=GRUCell( + input_size=128 * 6, + hidden_size=rnn_hidden_size, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu'), + is_reverse=False, + time_major=False) + self.gru_bwd = RNN(cell=GRUCell( + input_size=128 * 6, + hidden_size=rnn_hidden_size, + param_attr=para_attr, + bias_attr=bias_attr, + candidate_activation='relu'), + is_reverse=True, + time_major=False) + self.encoded_proj_fc = fluid.dygraph.Linear( + rnn_hidden_size * 2, decoder_size, bias_attr=False) + + def forward(self, inputs): + conv_features = self.backbone(inputs) + conv_features = fluid.layers.transpose( + conv_features, perm=[0, 3, 1, 2]) + + n, w, c, h = conv_features.shape + seq_feature = fluid.layers.reshape(conv_features, [0, -1, c * h]) + + gru_fwd, _ = self.gru_fwd(seq_feature) + gru_bwd, _ = self.gru_bwd(seq_feature) + + encoded_vector = fluid.layers.concat(input=[gru_fwd, gru_bwd], axis=2) + encoded_proj = self.encoded_proj_fc(encoded_vector) + return gru_bwd, encoded_vector, encoded_proj + + +class Attention(fluid.dygraph.Layer): + """ + Neural Machine Translation by Jointly Learning to Align and Translate. + https://arxiv.org/abs/1409.0473 + """ + + def __init__(self, decoder_size): + super(Attention, self).__init__() + self.fc1 = fluid.dygraph.Linear( + decoder_size, decoder_size, bias_attr=False) + self.fc2 = fluid.dygraph.Linear(decoder_size, 1, bias_attr=False) + + def forward(self, encoder_vec, encoder_proj, decoder_state): + # alignment model, single-layer multilayer perceptron + decoder_state = self.fc1(decoder_state) + decoder_state = fluid.layers.unsqueeze(decoder_state, [1]) + + e = fluid.layers.elementwise_add(encoder_proj, decoder_state) + e = fluid.layers.tanh(e) + + att_scores = self.fc2(e) + att_scores = fluid.layers.squeeze(att_scores, [2]) + att_scores = fluid.layers.softmax(att_scores) + + context = fluid.layers.elementwise_mul( + x=encoder_vec, y=att_scores, axis=0) + context = fluid.layers.reduce_sum(context, dim=1) + return context + + +class DecoderCell(RNNCell): + def __init__(self, encoder_size=200, decoder_size=128): + super(DecoderCell, self).__init__() + self.attention = Attention(decoder_size) + self.gru_cell = GRUCell( + input_size=encoder_size * 2 + decoder_size, + hidden_size=decoder_size) + + def forward(self, current_word, states, encoder_vec, encoder_proj): + context = self.attention(encoder_vec, encoder_proj, states) + decoder_inputs = fluid.layers.concat([current_word, context], axis=1) + hidden, _ = self.gru_cell(decoder_inputs, states) + return hidden, hidden + + +class Decoder(fluid.dygraph.Layer): + def __init__(self, num_classes, emb_dim, encoder_size, decoder_size): + super(Decoder, self).__init__() + self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size)) + self.fc = fluid.dygraph.Linear( + decoder_size, num_classes + 2, act='softmax') + + def forward(self, target, initial_states, encoder_vec, encoder_proj): + out, _ = self.decoder_attention( + target, + initial_states=initial_states, + encoder_vec=encoder_vec, + encoder_proj=encoder_proj) + pred = self.fc(out) + return pred + + +class Seq2SeqAttModel(Model): + def __init__( + self, + in_channle=1, + encoder_size=200, + decoder_size=128, + emb_dim=128, + num_classes=None, ): + super(Seq2SeqAttModel, self).__init__() + self.encoder = Encoder(in_channle, encoder_size, decoder_size) + self.fc = fluid.dygraph.Linear( + input_dim=encoder_size, + output_dim=decoder_size, + bias_attr=False, + act='relu') + self.embedding = fluid.dygraph.Embedding( + [num_classes + 2, emb_dim], dtype='float32') + self.decoder = Decoder(num_classes, emb_dim, encoder_size, + decoder_size) + + def forward(self, inputs, target): + gru_backward, encoded_vector, encoded_proj = self.encoder(inputs) + decoder_boot = self.fc(gru_backward[:, 0]) + trg_embedding = self.embedding(target) + prediction = self.decoder(trg_embedding, decoder_boot, encoded_vector, + encoded_proj) + return prediction + + +class Seq2SeqAttInferModel(Seq2SeqAttModel): + def __init__( + self, + in_channle=1, + encoder_size=200, + decoder_size=128, + emb_dim=128, + num_classes=None, + beam_size=0, + bos_id=0, + eos_id=1, + max_out_len=20, ): + super(Seq2SeqAttInferModel, self).__init__( + in_channle, encoder_size, decoder_size, emb_dim, num_classes) + self.beam_size = beam_size + # dynamic decoder for inference + decoder = BeamSearchDecoder( + self.decoder.decoder_attention.cell, + start_token=bos_id, + end_token=eos_id, + beam_size=beam_size, + embedding_fn=self.embedding, + output_fn=self.decoder.fc) + self.infer_decoder = DynamicDecode( + decoder, max_step_num=max_out_len, is_test=True) + + def forward(self, inputs, *args): + gru_backward, encoded_vector, encoded_proj = self.encoder(inputs) + decoder_boot = self.fc(gru_backward[:, 0]) + + if self.beam_size: + # Tile the batch dimension with beam_size + encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch( + encoded_vector, self.beam_size) + encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch( + encoded_proj, self.beam_size) + # dynamic decoding with beam search + rs, _ = self.infer_decoder( + inits=decoder_boot, + encoder_vec=encoded_vector, + encoder_proj=encoded_proj) + return rs + + +class WeightCrossEntropy(Loss): + def __init__(self): + super(WeightCrossEntropy, self).__init__(average=False) + + def forward(self, outputs, labels): + predict, (label, mask) = outputs[0], labels + loss = layers.cross_entropy(predict, label=label) + loss = layers.elementwise_mul(loss, mask, axis=0) + loss = layers.reduce_sum(loss) + return loss diff --git a/examples/ocr/train.py b/examples/ocr/train.py new file mode 100644 index 0000000000000000000000000000000000000000..d72173dfde7791b53af80f04697f8e3defd01445 --- /dev/null +++ b/examples/ocr/train.py @@ -0,0 +1,138 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import os +import sys +import random +import numpy as np + +import argparse +import functools + +import paddle.fluid.profiler as profiler +import paddle.fluid as fluid + +from hapi.model import Input, set_device +from hapi.vision.transforms import BatchCompose + +from utility import add_arguments, print_arguments +from utility import SeqAccuracy, LoggerCallBack +from seq2seq_attn import Seq2SeqAttModel, WeightCrossEntropy +import data + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 32, "Minibatch size.") +add_arg('epoch', int, 30, "Epoch number.") +add_arg('num_workers', int, 0, "workers number.") +add_arg('lr', float, 0.001, "Learning rate.") +add_arg('lr_decay_strategy', str, "", "Learning rate decay strategy.") +add_arg('checkpoint_path', str, "checkpoint", "The directory the model to be saved to.") +add_arg('train_images', str, None, "The directory of images to be used for training.") +add_arg('train_list', str, None, "The list file of images to be used for training.") +add_arg('test_images', str, None, "The directory of images to be used for test.") +add_arg('test_list', str, None, "The list file of images to be used for training.") +add_arg('resume_path', str, None, "The init model file of directory.") +add_arg('use_gpu', bool, True, "Whether use GPU to train.") +# model hyper paramters +add_arg('encoder_size', int, 200, "Encoder size.") +add_arg('decoder_size', int, 128, "Decoder size.") +add_arg('embedding_dim', int, 128, "Word vector dim.") +add_arg('num_classes', int, 95, "Number classes.") +add_arg('gradient_clip', float, 5.0, "Gradient clip value.") +add_arg('dynamic', bool, False, "Whether to use dygraph.") +# yapf: enable + + +def main(FLAGS): + device = set_device("gpu" if FLAGS.use_gpu else "cpu") + fluid.enable_dygraph(device) if FLAGS.dynamic else None + + model = Seq2SeqAttModel( + encoder_size=FLAGS.encoder_size, + decoder_size=FLAGS.decoder_size, + emb_dim=FLAGS.embedding_dim, + num_classes=FLAGS.num_classes) + + lr = FLAGS.lr + if FLAGS.lr_decay_strategy == "piecewise_decay": + learning_rate = fluid.layers.piecewise_decay( + [200000, 250000], [lr, lr * 0.1, lr * 0.01]) + else: + learning_rate = lr + grad_clip = fluid.clip.GradientClipByGlobalNorm(FLAGS.gradient_clip) + optimizer = fluid.optimizer.Adam( + learning_rate=learning_rate, + parameter_list=model.parameters(), + grad_clip=grad_clip) + + # yapf: disable + inputs = [ + Input([None,1,48,384], "float32", name="pixel"), + Input([None, None], "int64", name="label_in"), + ] + labels = [ + Input([None, None], "int64", name="label_out"), + Input([None, None], "float32", name="mask"), + ] + # yapf: enable + + model.prepare( + optimizer, + WeightCrossEntropy(), + SeqAccuracy(), + inputs=inputs, + labels=labels) + + train_dataset = data.train() + train_collate_fn = BatchCompose( + [data.Resize(), data.Normalize(), data.PadTarget()]) + train_sampler = data.BatchSampler( + train_dataset, batch_size=FLAGS.batch_size, shuffle=True) + train_loader = fluid.io.DataLoader( + train_dataset, + batch_sampler=train_sampler, + places=device, + num_workers=FLAGS.num_workers, + return_list=True, + collate_fn=train_collate_fn) + test_dataset = data.test() + test_collate_fn = BatchCompose( + [data.Resize(), data.Normalize(), data.PadTarget()]) + test_sampler = data.BatchSampler( + test_dataset, + batch_size=FLAGS.batch_size, + drop_last=False, + shuffle=False) + test_loader = fluid.io.DataLoader( + test_dataset, + batch_sampler=test_sampler, + places=device, + num_workers=0, + return_list=True, + collate_fn=test_collate_fn) + + model.fit(train_data=train_loader, + eval_data=test_loader, + epochs=FLAGS.epoch, + save_dir=FLAGS.checkpoint_path, + callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)]) + + +if __name__ == '__main__': + FLAGS = parser.parse_args() + print_arguments(FLAGS) + main(FLAGS) diff --git a/examples/ocr/utility.py b/examples/ocr/utility.py new file mode 100644 index 0000000000000000000000000000000000000000..d47b3f17d16452c1292402abc15b534eec4b3459 --- /dev/null +++ b/examples/ocr/utility.py @@ -0,0 +1,186 @@ +"""Contains common utility functions.""" +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import distutils.util +import numpy as np +import paddle.fluid as fluid +import six + +from hapi.metrics import Metric +from hapi.callbacks import ProgBarLogger + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----------- Configuration Arguments -----------") + for arg, value in sorted(six.iteritems(vars(args))): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + +def add_arguments(argname, type, default, help, argparser, **kwargs): + """Add argparse's argument. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + add_argument("name", str, "Jonh", "User name.", parser) + args = parser.parse_args() + """ + type = distutils.util.strtobool if type == bool else type + argparser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +class SeqAccuracy(Metric): + def __init__(self, name=None, *args, **kwargs): + super(SeqAccuracy, self).__init__(*args, **kwargs) + self._name = 'seq_acc' + self.reset() + + def add_metric_op(self, output, label, mask, *args, **kwargs): + pred = fluid.layers.flatten(output, axis=2) + score, topk = fluid.layers.topk(pred, 1) + return topk, label, mask + + def update(self, topk, label, mask, *args, **kwargs): + topk = topk.reshape(label.shape[0], -1) + seq_len = np.sum(mask, -1) + acc = 0 + for i in range(label.shape[0]): + l = int(seq_len[i] - 1) + pred = topk[i][:l - 1] + ref = label[i][:l - 1] + if np.array_equal(pred, ref): + self.total += 1 + acc += 1 + self.count += 1 + return float(acc) / label.shape[0] + + def reset(self): + self.total = 0. + self.count = 0. + + def accumulate(self): + return float(self.total) / self.count + + def name(self): + return self._name + + +class LoggerCallBack(ProgBarLogger): + def __init__(self, log_freq=1, verbose=2, train_bs=None, eval_bs=None): + super(LoggerCallBack, self).__init__(log_freq, verbose) + self.train_bs = train_bs + self.eval_bs = eval_bs if eval_bs else train_bs + + def on_train_batch_end(self, step, logs=None): + logs = logs or {} + logs['loss'] = [l / self.train_bs for l in logs['loss']] + super(LoggerCallBack, self).on_train_batch_end(step, logs) + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + logs['loss'] = [l / self.train_bs for l in logs['loss']] + super(LoggerCallBack, self).on_epoch_end(epoch, logs) + + def on_eval_batch_end(self, step, logs=None): + logs = logs or {} + logs['loss'] = [l / self.eval_bs for l in logs['loss']] + super(LoggerCallBack, self).on_eval_batch_end(step, logs) + + def on_eval_end(self, logs=None): + logs = logs or {} + logs['loss'] = [l / self.eval_bs for l in logs['loss']] + super(LoggerCallBack, self).on_eval_end(logs) + + +def index2word(ids): + return [chr(int(k + 33)) for k in ids] + + +def postprocess(seq, bos_idx=0, eos_idx=1): + if type(seq) is np.ndarray: + seq = seq.tolist() + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] if idx != bos_idx and idx != eos_idx + ] + return seq + + +class SeqBeamAccuracy(Metric): + def __init__(self, name=None, *args, **kwargs): + super(SeqBeamAccuracy, self).__init__(*args, **kwargs) + self._name = 'seq_acc' + self.reset() + + def add_metric_op(self, output, label, mask, *args, **kwargs): + return output, label, mask + + def update(self, preds, labels, masks, *args, **kwargs): + preds = preds[:, :, np.newaxis] if len(preds.shape) == 2 else preds + preds = np.transpose(preds, [0, 2, 1]) + seq_len = np.sum(masks, -1) + acc = 0 + for i in range(labels.shape[0]): + l = int(seq_len[i] - 1) + #ref = labels[i][: l - 1] + ref = np.array(postprocess(labels[i])) + pred = preds[i] + for idx, beam in enumerate(pred): + beam_pred = np.array(postprocess(beam)) + if np.array_equal(beam_pred, ref): + self.total += 1 + acc += 1 + break + self.count += 1 + return float(acc) / labels.shape[0] + + def reset(self): + self.total = 0. + self.count = 0. + + def accumulate(self): + return float(self.total) / self.count + + def name(self): + return self._name diff --git a/examples/tsm/infer.py b/examples/tsm/infer.py index 3de1c8438fe3f35be3a527950e0fa65705defe77..a41b667a71ab1188077ad1b44b259841e55a8f4d 100644 --- a/examples/tsm/infer.py +++ b/examples/tsm/infer.py @@ -20,9 +20,9 @@ import argparse import numpy as np from hapi.model import Input, set_device -from hapi.vision.models import tsm_resnet50 from check import check_gpu, check_version +from modeling import tsm_resnet50 from kinetics_dataset import KineticsDataset from transforms import * diff --git a/examples/tsm/main.py b/examples/tsm/main.py index 24b37938e82d999bfd046913d0f711bf74650cc3..4a2b8890fdffbb33ce9c776b189d0b4ac14b1816 100644 --- a/examples/tsm/main.py +++ b/examples/tsm/main.py @@ -24,8 +24,8 @@ from paddle.fluid.dygraph.parallel import ParallelEnv from hapi.model import Model, CrossEntropy, Input, set_device from hapi.metrics import Accuracy -from hapi.vision.models import tsm_resnet50 +from modeling import tsm_resnet50 from check import check_gpu, check_version from kinetics_dataset import KineticsDataset from transforms import * diff --git a/hapi/vision/models/tsm.py b/examples/tsm/modeling.py similarity index 99% rename from hapi/vision/models/tsm.py rename to examples/tsm/modeling.py index 8b50f7073ee6e229acf4953c778ef60e2815cdb8..c2422ed3f1cf57e9fd029bb01e04e55d5296e918 100644 --- a/hapi/vision/models/tsm.py +++ b/examples/tsm/modeling.py @@ -196,7 +196,7 @@ def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True): weight_path = get_weights_path(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" - model.load(weight_path[:-9]) + model.load(weight_path) return model diff --git a/examples/yolov3/README.md b/examples/yolov3/README.md index 9a0d2cc15c4390f1fe666b776dfeadd9c1af558a..fdbfc569be5040137758407ba5dc2ca26e6cc594 100644 --- a/examples/yolov3/README.md +++ b/examples/yolov3/README.md @@ -99,18 +99,12 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层 | ... ``` -```bash -sh pretrain_weights/download.sh -``` - ### 模型训练 数据准备完成后,可使用`main.py`脚本启动训练和评估,如下脚本会自动每epoch交替进行训练和模型评估,并将checkpoint默认保存在`yolo_checkpoint`目录下。 YOLOv3模型训练总batch_size为64训练,以下以使用4卡Tesla P40每卡batch_size为16训练介绍训练方式。对于静态图和动态图,多卡训练中`--batch_size`为每卡上的batch_size,即总batch_size为`--batch_size`乘以卡数。 -YOLOv3模型训练须加载骨干网络[DarkNet53]()的预训练权重,可在训练时通过`--pretrain_weights`指定,若指定为URL,将自动下载权重至`~/.cache/paddle/weights`目录并加载。 - `main.py`脚本参数可通过如下命令查询 ```bash @@ -122,7 +116,7 @@ python main.py --help 使用如下方式进行多卡训练: ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data= --batch_size=16 --pretrain_weights=https://paddlemodels.bj.bcebos.com/hapi/darknet53_pretrained.pdparams +CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data= --batch_size=16 ``` #### 动态图训练 @@ -132,7 +126,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data= 使用如下方式进行多卡训练: ```bash -CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data= --batch_size=16 -d --pretrain_weights=https://paddlemodels.bj.bcebos.com/hapi/darknet53_pretrained.pdparams +CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data= --batch_size=16 -d ``` diff --git a/hapi/datasets/coco.py b/examples/yolov3/coco.py similarity index 100% rename from hapi/datasets/coco.py rename to examples/yolov3/coco.py diff --git a/examples/yolov3/infer.py b/examples/yolov3/infer.py index cc7cbdc823082520b0adb22d8dcf71ab3b8fab74..8b0e3abd1843c3413f9756aa7db65cf2de16ef0a 100644 --- a/examples/yolov3/infer.py +++ b/examples/yolov3/infer.py @@ -25,8 +25,9 @@ from paddle.fluid.optimizer import Momentum from paddle.io import DataLoader from hapi.model import Model, Input, set_device -from hapi.vision.models import yolov3_darknet53, YoloLoss -from hapi.vision.transforms import * + +from modeling import yolov3_darknet53, YoloLoss +from transforms import * from visualizer import draw_bbox diff --git a/examples/yolov3/main.py b/examples/yolov3/main.py index 7203329c7f75c97c1646b25320a44adadbcfc5ac..dea9eba5429a2878038aef11a9ca404696b2f7a8 100644 --- a/examples/yolov3/main.py +++ b/examples/yolov3/main.py @@ -27,12 +27,12 @@ from paddle.io import DataLoader from hapi.model import Model, Input, set_device from hapi.distributed import DistributedBatchSampler -from hapi.download import is_url, get_weights_path -from hapi.datasets import COCODataset -from hapi.vision.transforms import * -from hapi.vision.models import yolov3_darknet53, YoloLoss +from hapi.vision.transforms import Compose, BatchCompose +from modeling import yolov3_darknet53, YoloLoss +from coco import COCODataset from coco_metric import COCOMetric +from transforms import * NUM_MAX_BOXES = 50 @@ -126,10 +126,7 @@ def main(): pretrained=pretrained) if FLAGS.pretrain_weights and not FLAGS.eval_only: - pretrain_weights = FLAGS.pretrain_weights - if is_url(pretrain_weights): - pretrain_weights = get_weights_path(pretrain_weights) - model.load(pretrain_weights, skip_mismatch=True, reset_optimizer=True) + model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True) optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters()) @@ -168,7 +165,7 @@ def main(): save_dir="yolo_checkpoint/mixup", save_freq=10) - # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches + # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches dataset.mixup = False model.fit(train_data=loader, epochs=FLAGS.no_mixup_epoch, @@ -200,8 +197,7 @@ if __name__ == '__main__': parser.add_argument( "-j", "--num_workers", default=4, type=int, help="reader worker number") parser.add_argument( - "-p", "--pretrain_weights", - default="./pretrain_weights/darknet53_pretrained", type=str, + "-p", "--pretrain_weights", default=None, type=str, help="path to pretrained weights") parser.add_argument( "-r", "--resume", default=None, type=str, diff --git a/hapi/vision/models/yolov3.py b/examples/yolov3/modeling.py similarity index 87% rename from hapi/vision/models/yolov3.py rename to examples/yolov3/modeling.py index bafe148b14b7e1baf8124c61969238e049e2d6f0..be462f5afbca8b987775e63e52a7950d2c3d60fd 100644 --- a/hapi/vision/models/yolov3.py +++ b/examples/yolov3/modeling.py @@ -16,13 +16,13 @@ from __future__ import division from __future__ import print_function import paddle.fluid as fluid -from paddle.fluid.dygraph.nn import Conv2D +from paddle.fluid.dygraph.nn import Conv2D, BatchNorm from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay from hapi.model import Model, Loss from hapi.download import get_weights_path -from .darknet import darknet53, ConvBNLayer +from hapi.vision.models import darknet53 __all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53'] @@ -33,6 +33,46 @@ pretrain_infos = { } +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + ch_in, + ch_out, + filter_size=3, + stride=1, + groups=1, + padding=0, + act="leaky"): + super(ConvBNLayer, self).__init__() + + self.conv = Conv2D( + num_channels=ch_in, + num_filters=ch_out, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=groups, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02)), + bias_attr=False, + act=None) + self.batch_norm = BatchNorm( + num_channels=ch_out, + param_attr=ParamAttr( + initializer=fluid.initializer.Normal(0., 0.02), + regularizer=L2Decay(0.)), + bias_attr=ParamAttr( + initializer=fluid.initializer.Constant(0.0), + regularizer=L2Decay(0.))) + + self.act = act + + def forward(self, inputs): + out = self.conv(inputs) + out = self.batch_norm(out) + if self.act == 'leaky': + out = fluid.layers.leaky_relu(x=out, alpha=0.1) + return out + class YoloDetectionBlock(fluid.dygraph.Layer): def __init__(self, ch_in, channel): super(YoloDetectionBlock, self).__init__() @@ -118,7 +158,7 @@ class YOLOv3(Model): self.nms_posk = 100 self.draw_thresh = 0.5 - self.backbone = darknet53(pretrained=False) + self.backbone = darknet53(pretrained=(model_mode=='train')) self.block_outputs = [] self.yolo_blocks = [] self.route_blocks = [] @@ -254,7 +294,7 @@ def _yolov3_darknet(num_layers=53, num_classes=80, weight_path = get_weights_path(*(pretrain_infos[num_layers])) assert weight_path.endswith('.pdparams'), \ "suffix of weight must be .pdparams" - model.load(weight_path[:-9]) + model.load(weight_path) return model diff --git a/hapi/vision/transforms/detection_transforms.py b/examples/yolov3/transforms.py similarity index 100% rename from hapi/vision/transforms/detection_transforms.py rename to examples/yolov3/transforms.py diff --git a/hapi/callbacks.py b/hapi/callbacks.py index 7d4618366b9ea94fb08f386a78001337439c777d..f02eec1ac7b20fe3d5ec771493378b4e74cc3796 100644 --- a/hapi/callbacks.py +++ b/hapi/callbacks.py @@ -218,8 +218,6 @@ class ProgBarLogger(Callback): # if steps is not None, last step will update in on_epoch_end if self.steps and self.train_step < self.steps: self._updates(logs, 'train') - else: - self._updates(logs, 'train') def on_epoch_end(self, epoch, logs=None): logs = logs or {} @@ -238,7 +236,7 @@ class ProgBarLogger(Callback): def on_eval_batch_end(self, step, logs=None): logs = logs or {} - self.eval_step = step + self.eval_step += 1 samples = logs.get('batch_size', 1) self.evaled_samples += samples diff --git a/hapi/datasets/__init__.py b/hapi/datasets/__init__.py index e982a6bc1736aa3db69fab4bc7a74c82a4a7edde..fc5df6401992def4bc37329794e534a832924da3 100644 --- a/hapi/datasets/__init__.py +++ b/hapi/datasets/__init__.py @@ -12,7 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from . import folder +from . import mnist +from . import flowers + from .folder import * from .mnist import * from .flowers import * -from .coco import * + +__all__ = folder.__all__ \ + + mnist.__all__ \ + + flowers.__all__ diff --git a/hapi/datasets/folder.py b/hapi/datasets/folder.py index 5c728a63f8d8b0bf313d94a3d5e5c605686d6451..23f2c9592915e3e83d596c9cc3679eca306a4bd5 100644 --- a/hapi/datasets/folder.py +++ b/hapi/datasets/folder.py @@ -18,7 +18,7 @@ import cv2 from paddle.io import Dataset -__all__ = ["DatasetFolder"] +__all__ = ["DatasetFolder", "ImageFolder"] def has_valid_extension(filename, extensions): @@ -164,3 +164,80 @@ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', def cv2_loader(path): return cv2.imread(path) + + +class ImageFolder(Dataset): + """A generic data loader where the samples are arranged in this way: + + root/1.ext + root/2.ext + root/sub_dir/3.ext + + Args: + root (string): Root directory path. + loader (callable, optional): A function to load a sample given its path. + extensions (tuple[string], optional): A list of allowed extensions. + both extensions and is_valid_file should not be passed. + transform (callable, optional): A function/transform that takes in + a sample and returns a transformed version. + is_valid_file (callable, optional): A function that takes path of a file + and check if the file is a valid file (used to check of corrupt files) + both extensions and is_valid_file should not be passed. + + Attributes: + samples (list): List of sample path + """ + + def __init__(self, + root, + loader=None, + extensions=None, + transform=None, + is_valid_file=None): + self.root = root + if extensions is None: + extensions = IMG_EXTENSIONS + + samples = [] + path = os.path.expanduser(root) + if not ((extensions is None) ^ (is_valid_file is None)): + raise ValueError( + "Both extensions and is_valid_file cannot be None or not None at the same time" + ) + if extensions is not None: + + def is_valid_file(x): + return has_valid_extension(x, extensions) + + for root, _, fnames in sorted(os.walk(path, followlinks=True)): + for fname in sorted(fnames): + f = os.path.join(root, fname) + if is_valid_file(f): + samples.append(f) + + if len(samples) == 0: + raise (RuntimeError( + "Found 0 files in subfolders of: " + self.root + "\n" + "Supported extensions are: " + ",".join(extensions))) + + self.loader = cv2_loader if loader is None else loader + self.extensions = extensions + self.samples = samples + self.transform = transform + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (sample, target) where target is class_index of the target class. + """ + path = self.samples[index] + sample = self.loader(path) + if self.transform is not None: + sample = self.transform(sample) + return [sample] + + def __len__(self): + return len(self.samples) diff --git a/hapi/model.py b/hapi/model.py index bd6cf0d07484633259ffeacc3ee4623102063d75..438b9d7812d416cf8909998f7dea4be7b4d1709b 100644 --- a/hapi/model.py +++ b/hapi/model.py @@ -816,7 +816,7 @@ class Model(fluid.dygraph.Layer): except ValueError as err: if skip_mismatch: warnings.warn( - ("Skip loading for {}. ".format(key) + err.message)) + ("Skip loading for {}. ".format(key) + str(err))) # reset optimizer when mismatch happens reset_optimizer = True else: @@ -1161,7 +1161,7 @@ class Model(fluid.dygraph.Layer): if fluid.in_dygraph_mode(): feed_list = None else: - feed_list = [x.forward() for x in self._inputs + self._labels] + feed_list = [x.forward() for x in self._inputs] if test_data is not None and isinstance(test_data, Dataset): test_sampler = DistributedBatchSampler( @@ -1281,7 +1281,7 @@ class Model(fluid.dygraph.Layer): if mode == 'train': assert epoch is not None, 'when mode is train, epoch must be given' - callbacks.on_epoch_end(epoch) + callbacks.on_epoch_end(epoch, logs) return logs diff --git a/hapi/text/bert/dataloader.py b/hapi/text/bert/dataloader.py index 2cbddac1d266c8ebb26d96f4a3f2a8e81781c562..838ad50b370c7ff4c9247880adc994be3aedf501 100644 --- a/hapi/text/bert/dataloader.py +++ b/hapi/text/bert/dataloader.py @@ -30,6 +30,7 @@ from hapi.distributed import DistributedBatchSampler from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor from hapi.text.bert.batching import prepare_batch_data import hapi.text.tokenizer.tokenization as tokenization +from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy __all__ = [ 'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset', @@ -227,6 +228,9 @@ class SingleSentenceDataset(Dataset): if line_processor is None: line_processor = default_line_processor + if ParallelEnv().nranks > 1: + leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank) + if not os.path.exists(leveldb_file): print("putting data %s into leveldb %s" % (input_file, leveldb_file)) @@ -384,7 +388,12 @@ class BertDataLoader(object): quotechar=None, device=fluid.CPUPlace(), num_workers=0, - return_list=True): + return_list=True, + phase="train"): + + assert phase in [ + "train", "predict", "test" + ], "phase of BertDataLoader should be in [train, predict, test], but get %s" % phase self.dataset = SingleSentenceDataset(tokenizer, label_list, max_seq_length, mode) @@ -394,15 +403,21 @@ class BertDataLoader(object): input_file, label_list, max_seq_length, tokenizer, line_processor, delimiter, quotechar) elif mode == "leveldb": - #prepare_leveldb(self, input_file, leveldb_file, label_list, max_seq_length, tokenizer, line_processor=None, delimiter="\t", quotechar=None): self.dataset.prepare_leveldb(input_file, leveldb_file, label_list, max_seq_length, tokenizer, line_processor, delimiter, quotechar) else: raise ValueError("mode should be in [all_in_memory, leveldb]") - self.sampler = DistributedBatchSampler( - self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last) + if phase == "train": + self.sampler = DistributedBatchSampler( + self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last) + elif phase == "test" or phase == "predict": + self.sampler = BatchSampler( + dataset=self.dataset, + batch_size=batch_size, + shuffle=shuffle, + drop_last=drop_last) self.dataloader = DataLoader( dataset=self.dataset, diff --git a/hapi/text/bert/optimization.py b/hapi/text/bert/optimization.py index 2bf6b7f2621273ff78bc88a6ef92f1f630072175..b2ba8f65a744754e8ff96ca66ccf818bc8b06c34 100755 --- a/hapi/text/bert/optimization.py +++ b/hapi/text/bert/optimization.py @@ -130,6 +130,18 @@ class Optimizer(object): return True return False + def state_dict(self): + return self.optimizer.state_dict() + + def set_dict(self, state_dict): + return self.optimizer.set_dict(state_dict) + + def get_opti_var_name_list(self): + return self.optimizer.get_opti_var_name_list() + + def current_step_lr(self): + return self.optimizer.current_step_lr() + def minimize(self, loss, use_data_parallel=False, model=None): param_list = dict() diff --git a/hapi/text/text.py b/hapi/text/text.py index 0f58f785795ccff7485a1f5daabce39086cf7fa2..e5be32bcb531b938c3cc8c21ec7caf2a4f40ee6e 100644 --- a/hapi/text/text.py +++ b/hapi/text/text.py @@ -22,7 +22,7 @@ import sys if six.PY2: reload(sys) sys.setdefaultencoding('utf8') - + import ast import time import argparse as argparse @@ -44,13 +44,12 @@ from paddle.fluid import layers from paddle.fluid.dygraph import Layer from paddle.fluid.layers import BeamSearchDecoder - __all__ = [ 'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode', 'BeamSearchDecoder', 'MultiHeadAttention', 'FFN', 'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer', - 'TransformerDecoder', 'TransformerBeamSearchDecoder', 'GRUCell', 'GRUEncoderCell', - 'BiGRU', 'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging' + 'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf', + 'Crf_decoding', 'SequenceTagging' ] @@ -219,7 +218,19 @@ class BasicLSTMCell(RNNCell): gate_activation=None, activation=None, forget_bias=1.0, - dtype='float32'): + dtype='float32', + forget_gate_weights={"w": None, + "h": None, + "b": None}, + input_gate_weights={"w": None, + "h": None, + "b": None}, + output_gate_weights={"w": None, + "h": None, + "b": None}, + cell_weights={"w": None, + "h": None, + "b": None}): super(BasicLSTMCell, self).__init__() self._hidden_size = hidden_size @@ -233,25 +244,225 @@ class BasicLSTMCell(RNNCell): self._dtype = dtype self._input_size = input_size - self._weight = self.create_parameter( - attr=self._param_attr, - shape=[ - self._input_size + self._hidden_size, 4 * self._hidden_size - ], - dtype=self._dtype) - - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hidden_size], - dtype=self._dtype, - is_bias=True) + self.use_customized_weight = False + for _weights in [ + forget_gate_weights, input_gate_weights, output_gate_weights, + cell_weights + ]: + for _key in _weights: + if _weights[_key] is not None: + self.use_customized_weight = True + break + if self.use_customized_weight: + break + + if not self.use_customized_weight: + + self._weight = self.create_parameter( + attr=self._param_attr, + shape=[ + self._input_size + self._hidden_size, 4 * self._hidden_size + ], + dtype=self._dtype) + + self._bias = self.create_parameter( + attr=self._bias_attr, + shape=[4 * self._hidden_size], + dtype=self._dtype, + is_bias=True) + else: + if "w" in forget_gate_weights and forget_gate_weights[ + "w"] is not None: + self.fg_w = forget_gate_weights["w"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_forget_gate_w" + else: + tmp_param_attr = self._param_attr + self.fg_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in forget_gate_weights and forget_gate_weights[ + "h"] is not None: + self.fg_h = forget_gate_weights["h"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_forget_gate_h" + else: + tmp_param_attr = self._param_attr + self.fg_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in forget_gate_weights and forget_gate_weights[ + "b"] is not None: + self.fg_b = forget_gate_weights["b"] + else: + if self._bias_attr is not None and self._bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._bias_attr) + tmp_param_attr.name += "_forget_gate_b" + else: + tmp_param_attr = self._bias_attr + self.fg_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) + + if "w" in input_gate_weights and input_gate_weights[ + "w"] is not None: + self.ig_w = input_gate_weights["w"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_input_gate_w" + else: + tmp_param_attr = self._param_attr + + self.ig_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in input_gate_weights and input_gate_weights[ + "h"] is not None: + self.ig_h = input_gate_weights["h"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_input_gate_h" + else: + tmp_param_attr = self._param_attr + + self.ig_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in input_gate_weights and input_gate_weights[ + "b"] is not None: + self.ig_b = input_gate_weights["b"] + else: + if self._bias_attr is not None and self._bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._bias_attr) + tmp_param_attr.name += "_input_gate_b" + else: + tmp_param_attr = self._bias_attr + self.ig_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) + + if "w" in output_gate_weights and output_gate_weights[ + "w"] is not None: + self.og_w = output_gate_weights["w"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_output_gate_w" + else: + tmp_param_attr = self._param_attr + self.og_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in output_gate_weights and output_gate_weights[ + "h"] is not None: + self.og_h = output_gate_weights["h"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_output_gate_h" + else: + tmp_param_attr = self._param_attr + + self.og_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in output_gate_weights and output_gate_weights[ + "b"] is not None: + self.og_b = output_gate_weights["b"] + else: + if self._bias_attr is not None and self._bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._bias_attr) + tmp_param_attr.name += "_output_gate_b" + else: + tmp_param_attr = self._bias_attr + self.og_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) + + if "w" in cell_weights and cell_weights["w"] is not None: + self.c_w = cell_weights["w"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_cell_w" + else: + tmp_param_attr = self._param_attr + + self.c_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in cell_weights and cell_weights["h"] is not None: + self.c_h = cell_weights["h"] + else: + if self._param_attr is not None and self._param_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._param_attr) + tmp_param_attr.name += "_cell_h" + else: + tmp_param_attr = self._param_attr + self.c_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in cell_weights and cell_weights["b"] is not None: + self.c_b = cell_weights["b"] + else: + if self._bias_attr is not None and self._bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(self._bias_attr) + tmp_param_attr.name += "_cell_b" + else: + tmp_param_attr = self._bias_attr + self.c_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) def forward(self, input, state): + + if self.use_customized_weight: + weight_w = fluid.layers.concat( + [self.ig_w, self.c_w, self.fg_w, self.og_w], axis=-1) + weight_h = fluid.layers.concat( + [self.ig_h, self.c_h, self.fg_h, self.og_h], axis=-1) + _weight = fluid.layers.concat([weight_w, weight_h], axis=0) + _bias = fluid.layers.concat( + [self.ig_b, self.c_b, self.fg_b, self.og_b]) + else: + _weight = self._weight + _bias = self._bias + pre_hidden, pre_cell = state concat_input_hidden = layers.concat([input, pre_hidden], 1) - gate_input = layers.matmul(x=concat_input_hidden, y=self._weight) + gate_input = layers.matmul(x=concat_input_hidden, y=_weight) - gate_input = layers.elementwise_add(gate_input, self._bias) + gate_input = layers.elementwise_add(gate_input, _bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) new_cell = layers.elementwise_add( layers.elementwise_mul( @@ -308,7 +519,16 @@ class BasicGRUCell(RNNCell): bias_attr=None, gate_activation=None, activation=None, - dtype='float32'): + dtype='float32', + update_gate_weights={"w": None, + "h": None, + "b": None}, + reset_gate_weights={"w": None, + "h": None, + "b": None}, + cell_weights={"w": None, + "h": None, + "b": None}): super(BasicGRUCell, self).__init__() self._input_size = input_size self._hidden_size = hidden_size @@ -318,6 +538,20 @@ class BasicGRUCell(RNNCell): self._activation = activation or layers.tanh self._dtype = dtype + assert isinstance(update_gate_weights, dict) + assert isinstance(reset_gate_weights, dict) + assert isinstance(cell_weights, dict) + + self.use_customized_weight = False + for _weights in [ + update_gate_weights, reset_gate_weights, cell_weights + ]: + for _key in _weights: + if _weights[_key] is not None: + self.use_customized_weight = True + if self.use_customized_weight: + break + if self._param_attr is not None and self._param_attr.name is not None: gate_param_attr = copy.deepcopy(self._param_attr) candidate_param_attr = copy.deepcopy(self._param_attr) @@ -327,43 +561,194 @@ class BasicGRUCell(RNNCell): gate_param_attr = self._param_attr candidate_param_attr = self._param_attr - self._gate_weight = self.create_parameter( - attr=gate_param_attr, - shape=[self._input_size + self._hidden_size, 2 * self._hidden_size], - dtype=self._dtype) - - self._candidate_weight = self.create_parameter( - attr=candidate_param_attr, - shape=[self._input_size + self._hidden_size, self._hidden_size], - dtype=self._dtype) + if not self.use_customized_weight: + self._gate_weight = self.create_parameter( + attr=gate_param_attr, + shape=[ + self._input_size + self._hidden_size, 2 * self._hidden_size + ], + dtype=self._dtype) + + self._candidate_weight = self.create_parameter( + attr=candidate_param_attr, + shape=[ + self._input_size + self._hidden_size, self._hidden_size + ], + dtype=self._dtype) + + if self._bias_attr is not None and self._bias_attr.name is not None: + gate_bias_attr = copy.deepcopy(self._bias_attr) + candidate_bias_attr = copy.deepcopy(self._bias_attr) + gate_bias_attr.name += "_gate" + candidate_bias_attr.name += "_candidate" + else: + gate_bias_attr = self._bias_attr + candidate_bias_attr = self._bias_attr + + self._gate_bias = self.create_parameter( + attr=gate_bias_attr, + shape=[2 * self._hidden_size], + dtype=self._dtype, + is_bias=True) + self._candidate_bias = self.create_parameter( + attr=candidate_bias_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) - if self._bias_attr is not None and self._bias_attr.name is not None: - gate_bias_attr = copy.deepcopy(self._bias_attr) - candidate_bias_attr = copy.deepcopy(self._bias_attr) - gate_bias_attr.name += "_gate" - candidate_bias_attr.name += "_candidate" else: - gate_bias_attr = self._bias_attr - candidate_bias_attr = self._bias_attr - - self._gate_bias = self.create_parameter( - attr=gate_bias_attr, - shape=[2 * self._hidden_size], - dtype=self._dtype, - is_bias=True) - self._candidate_bias = self.create_parameter( - attr=candidate_bias_attr, - shape=[self._hidden_size], - dtype=self._dtype, - is_bias=True) + + # create the parameters of gates in gru + if "w" in update_gate_weights and update_gate_weights[ + "w"] is not None: + self.ug_w = update_gate_weights["w"] + else: + if gate_param_attr is not None and gate_param_attr.name is not None: + tmp_param_attr = copy.deepcopy(gate_param_attr) + tmp_param_attr.name += "_update_gate_w" + else: + tmp_param_attr = gate_param_attr + self.ug_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in update_gate_weights and update_gate_weights[ + "h"] is not None: + self.ug_h = update_gate_weights["h"] + else: + if gate_param_attr is not None and gate_param_attr.name is not None: + tmp_param_attr = copy.deepcopy(gate_param_attr) + tmp_param_attr.name += "_update_gate_h" + else: + tmp_param_attr = gate_param_attr + self.ug_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in update_gate_weights and update_gate_weights[ + "b"] is not None: + self.ug_b = update_gate_weights["b"] + else: + if gate_bias_attr is not None and gate_bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(gate_bias_attr) + tmp_param_attr.name += "_update_gate_b" + else: + tmp_param_attr = gate_bias_attr + self.ug_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) + + # reset gate parameters + if "w" in reset_gate_weights and reset_gate_weights[ + "w"] is not None: + self.rg_w = reset_gate_weights["w"] + else: + if gate_param_attr is not None and gate_param_attr.name is not None: + tmp_param_attr = copy.deepcopy(gate_param_attr) + tmp_param_attr.name += "_reset_gate_w" + else: + tmp_param_attr = gate_param_attr + self.rg_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in reset_gate_weights and reset_gate_weights[ + "h"] is not None: + self.rg_h = reset_gate_weights["h"] + else: + if gate_param_attr is not None and gate_param_attr.name is not None: + tmp_param_attr = copy.deepcopy(gate_param_attr) + tmp_param_attr.name += "_reset_gate_h" + else: + tmp_param_attr = gate_param_attr + self.rg_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in reset_gate_weights and reset_gate_weights[ + "b"] is not None: + self.rg_b = reused_params["b"] + else: + if gate_bias_attr is not None and gate_bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(gate_bias_attr) + tmp_param_attr.name += "_reset_gate_b" + else: + tmp_param_attr = gate_bias_attr + self.rg_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) + + # cell parameters + if "w" in cell_weights and cell_weights["w"] is not None: + self.c_w = cell_weights["w"] + else: + if candidate_param_attr is not None and candidate_param_attr.name is not None: + tmp_param_attr = copy.deepcopy(candidate_param_attr) + tmp_param_attr.name += "_cell_w" + else: + tmp_param_attr = gate_param_attr + + self.c_w = self.create_parameter( + attr=tmp_param_attr, + shape=[self._input_size, self._hidden_size], + dtype=self._dtype) + + if "h" in cell_weights and cell_weights["h"] is not None: + self.c_h = cell_weights["h"] + else: + if candidate_param_attr is not None and candidate_param_attr.name is not None: + tmp_param_attr = copy.deepcopy(candidate_param_attr) + tmp_param_attr.name += "_cell_h" + else: + tmp_param_attr = gate_param_attr + self.c_h = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size, self._hidden_size], + dtype=self._dtype) + + if "b" in cell_weights and cell_weights["b"] is not None: + self.c_b = cell_weights["b"] + else: + if candidate_bias_attr is not None and candidate_bias_attr.name is not None: + tmp_param_attr = copy.deepcopy(candidate_bias_attr) + tmp_param_attr.name += "_cell_b" + else: + tmp_param_attr = gate_bias_attr + self.c_b = self.create_parameter( + attr=tmp_param_attr, + shape=[self._hidden_size], + dtype=self._dtype, + is_bias=True) def forward(self, input, state): + + if self.use_customized_weight: + rg_weights = layers.concat([self.rg_w, self.rg_h], axis=0) + ug_weights = layers.concat([self.ug_w, self.ug_h], axis=0) + _gate_weight = layers.concat([rg_weights, ug_weights], axis=-1) + _candidate_weight = layers.concat([self.c_w, self.c_h], axis=0) + _gate_bias = layers.concat([self.rg_b, self.ug_b], axis=0) + _candidate_bias = self.c_b + else: + _gate_weight = self._gate_weight + _gate_bias = self._gate_bias + _candidate_weight = self._candidate_weight + _candidate_bias = self._candidate_bias + pre_hidden = state concat_input_hidden = layers.concat([input, pre_hidden], axis=1) - gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight) + gate_input = layers.matmul(x=concat_input_hidden, y=_gate_weight) - gate_input = layers.elementwise_add(gate_input, self._gate_bias) + gate_input = layers.elementwise_add(gate_input, _gate_bias) gate_input = self._gate_activation(gate_input) r, u = layers.split(gate_input, num_or_sections=2, dim=1) @@ -371,8 +756,8 @@ class BasicGRUCell(RNNCell): r_hidden = r * pre_hidden candidate = layers.matmul( - layers.concat([input, r_hidden], 1), self._candidate_weight) - candidate = layers.elementwise_add(candidate, self._candidate_bias) + layers.concat([input, r_hidden], 1), _candidate_weight) + candidate = layers.elementwise_add(candidate, _candidate_bias) c = self._activation(candidate) new_hidden = u * pre_hidden + (1 - u) * c @@ -700,7 +1085,11 @@ class PrePostProcessLayer(Layer): PrePostProcessLayer """ - def __init__(self, process_cmd, d_model, dropout_rate): + def __init__(self, + process_cmd, + d_model, + dropout_rate, + reused_layer_norm=None): super(PrePostProcessLayer, self).__init__() self.process_cmd = process_cmd self.functors = [] @@ -708,16 +1097,21 @@ class PrePostProcessLayer(Layer): if cmd == "a": # add residual connection self.functors.append(lambda x, y: x + y if y else x) elif cmd == "n": # add layer normalization + if reused_layer_norm is not None: + layer_norm = reused_layer_norm + else: + layer_norm = LayerNorm( + normalized_shape=d_model, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(1.)), + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(0.))) + self.functors.append( self.add_sublayer( "layer_norm_%d" % len( self.sublayers(include_sublayers=False)), - LayerNorm( - normalized_shape=d_model, - param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.)), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.))))) + layer_norm)) elif cmd == "d": # add dropout self.functors.append(lambda x: layers.dropout( x, dropout_prob=dropout_rate, is_test=False) @@ -737,21 +1131,48 @@ class MultiHeadAttention(Layer): Multi-Head Attention """ - def __init__(self, d_key, d_value, d_model, n_head=1, dropout_rate=0.): + def __init__(self, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0.0, + reused_query_fc=None, + reused_key_fc=None, + reused_value_fc=None, + reused_proj_fc=None): + super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_key = d_key self.d_value = d_value self.d_model = d_model self.dropout_rate = dropout_rate - self.q_fc = Linear( - input_dim=d_model, output_dim=d_key * n_head, bias_attr=False) - self.k_fc = Linear( - input_dim=d_model, output_dim=d_key * n_head, bias_attr=False) - self.v_fc = Linear( - input_dim=d_model, output_dim=d_value * n_head, bias_attr=False) - self.proj_fc = Linear( - input_dim=d_value * n_head, output_dim=d_model, bias_attr=False) + + if reused_query_fc is not None: + self.q_fc = reused_query_fc + else: + self.q_fc = Linear( + input_dim=d_model, output_dim=d_key * n_head, bias_attr=False) + if reused_key_fc is not None: + self.k_fc = reused_key_fc + else: + self.k_fc = Linear( + input_dim=d_model, output_dim=d_key * n_head, bias_attr=False) + if reused_value_fc is not None: + self.v_fc = reused_value_fc + else: + self.v_fc = Linear( + input_dim=d_model, + output_dim=d_value * n_head, + bias_attr=False) + if reused_proj_fc is not None: + self.proj_fc = reused_proj_fc + else: + self.proj_fc = Linear( + input_dim=d_value * n_head, + output_dim=d_model, + bias_attr=False) def _prepare_qkv(self, queries, keys, values, cache=None): if keys is None: # self-attention @@ -828,12 +1249,24 @@ class FFN(Layer): Feed-Forward Network """ - def __init__(self, d_inner_hid, d_model, dropout_rate): + def __init__(self, + d_inner_hid, + d_model, + dropout_rate, + fc1_act="relu", + reused_fc1=None, + reused_fc2=None): super(FFN, self).__init__() self.dropout_rate = dropout_rate - self.fc1 = Linear( - input_dim=d_model, output_dim=d_inner_hid, act="relu") - self.fc2 = Linear(input_dim=d_inner_hid, output_dim=d_model) + if reused_fc1 is not None: + self.fc1 = reused_fc1 + else: + self.fc1 = Linear( + input_dim=d_model, output_dim=d_inner_hid, act=fc1_act) + if reused_fc2 is not None: + self.fc2 = reused_fc2 + else: + self.fc2 = Linear(input_dim=d_inner_hid, output_dim=d_model) def forward(self, x): hidden = self.fc1(x) @@ -859,22 +1292,52 @@ class TransformerEncoderLayer(Layer): attention_dropout, relu_dropout, preprocess_cmd="n", - postprocess_cmd="da"): + postprocess_cmd="da", + ffn_fc1_act="relu", + reused_pre_selatt_layernorm=None, + reused_multihead_att_weights={ + "reused_query_fc": None, + "reused_key_fc": None, + "reused_value_fc": None, + "reused_proj_fc": None + }, + reused_post_selfatt_layernorm=None, + reused_pre_ffn_layernorm=None, + reused_ffn_weights={"reused_fc1": None, + "reused_fc2": None}, + reused_post_ffn_layernorm=None): super(TransformerEncoderLayer, self).__init__() self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, - attention_dropout) - self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) + prepostprocess_dropout, + reused_pre_selatt_layernorm) + self.self_attn = MultiHeadAttention( + d_key, + d_value, + d_model, + n_head, + attention_dropout, + reused_query_fc=reused_multihead_att_weights["reused_query_fc"], + reused_key_fc=reused_multihead_att_weights["reused_key_fc"], + reused_value_fc=reused_multihead_att_weights["reused_value_fc"], + reused_proj_fc=reused_multihead_att_weights["reused_proj_fc"]) + self.postprocesser1 = PrePostProcessLayer( + postprocess_cmd, d_model, prepostprocess_dropout, + reused_post_selfatt_layernorm) self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.ffn = FFN(d_inner_hid, d_model, relu_dropout) + prepostprocess_dropout, + reused_pre_ffn_layernorm) + self.ffn = FFN(d_inner_hid, + d_model, + relu_dropout, + fc1_act=ffn_fc1_act, + reused_fc1=reused_ffn_weights["reused_fc1"], + reused_fc2=reused_ffn_weights["reused_fc2"]) self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) + prepostprocess_dropout, + reused_post_ffn_layernorm) def forward(self, enc_input, attn_bias): attn_output = self.self_attn( @@ -902,7 +1365,8 @@ class TransformerEncoder(Layer): attention_dropout, relu_dropout, preprocess_cmd="n", - postprocess_cmd="da"): + postprocess_cmd="da", + ffn_fc1_act="relu"): super(TransformerEncoder, self).__init__() @@ -912,9 +1376,17 @@ class TransformerEncoder(Layer): self.add_sublayer( "layer_%d" % i, TransformerEncoderLayer( - n_head, d_key, d_value, d_model, d_inner_hid, - prepostprocess_dropout, attention_dropout, - relu_dropout, preprocess_cmd, postprocess_cmd))) + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + prepostprocess_dropout, + attention_dropout, + relu_dropout, + preprocess_cmd, + postprocess_cmd, + ffn_fc1_act=ffn_fc1_act))) self.processer = PrePostProcessLayer(preprocess_cmd, d_model, prepostprocess_dropout) @@ -941,28 +1413,79 @@ class TransformerDecoderLayer(Layer): attention_dropout, relu_dropout, preprocess_cmd="n", - postprocess_cmd="da"): + postprocess_cmd="da", + reused_pre_selfatt_layernorm=None, + reused_self_multihead_att_weights={ + "reused_query_fc": None, + "reused_key_fc": None, + "reused_value_fc": None, + "reused_proj_fc": None + }, + reused_post_selfatt_layernorm=None, + reused_pre_crossatt_layernorm=None, + reused_cross_multihead_att_weights={ + "reused_query_fc": None, + "reused_key_fc": None, + "reused_value_fc": None, + "reused_proj_fc": None + }, + reused_post_crossatt_layernorm=None, + reused_pre_ffn_layernorm=None, + reused_ffn_weights={"reused_fc1": None, + "reused_fc2": None}, + reused_post_ffn_layernorm=None): super(TransformerDecoderLayer, self).__init__() self.preprocesser1 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.self_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, - attention_dropout) - self.postprocesser1 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) + prepostprocess_dropout, + reused_pre_selfatt_layernorm) + self.self_attn = MultiHeadAttention( + d_key, + d_value, + d_model, + n_head, + attention_dropout, + reused_query_fc=reused_self_multihead_att_weights[ + "reused_query_fc"], + reused_key_fc=reused_self_multihead_att_weights["reused_key_fc"], + reused_value_fc=reused_self_multihead_att_weights[ + "reused_value_fc"], + reused_proj_fc=reused_self_multihead_att_weights["reused_proj_fc"]) + self.postprocesser1 = PrePostProcessLayer( + postprocess_cmd, d_model, prepostprocess_dropout, + reused_post_selfatt_layernorm) self.preprocesser2 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.cross_attn = MultiHeadAttention(d_key, d_value, d_model, n_head, - attention_dropout) - self.postprocesser2 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) + prepostprocess_dropout, + reused_pre_crossatt_layernorm) + self.cross_attn = MultiHeadAttention( + d_key, + d_value, + d_model, + n_head, + attention_dropout, + reused_query_fc=reused_cross_multihead_att_weights[ + "reused_query_fc"], + reused_key_fc=reused_cross_multihead_att_weights["reused_key_fc"], + reused_value_fc=reused_cross_multihead_att_weights[ + "reused_value_fc"], + reused_proj_fc=reused_cross_multihead_att_weights[ + "reused_proj_fc"]) + self.postprocesser2 = PrePostProcessLayer( + postprocess_cmd, d_model, prepostprocess_dropout, + reused_post_crossatt_layernorm) self.preprocesser3 = PrePostProcessLayer(preprocess_cmd, d_model, - prepostprocess_dropout) - self.ffn = FFN(d_inner_hid, d_model, relu_dropout) + prepostprocess_dropout, + reused_pre_ffn_layernorm) + self.ffn = FFN(d_inner_hid, + d_model, + relu_dropout, + reused_fc1=reused_ffn_weights["reused_fc1"], + reused_fc2=reused_ffn_weights["reused_fc2"]) self.postprocesser3 = PrePostProcessLayer(postprocess_cmd, d_model, - prepostprocess_dropout) + prepostprocess_dropout, + reused_post_ffn_layernorm) def forward(self, dec_input, @@ -1031,7 +1554,7 @@ class TransformerDecoder(Layer): ] - +#TODO: we should merge GRUCell with BasicGRUCell class GRUCell(RNNCell): def __init__(self, input_size, @@ -1044,9 +1567,7 @@ class GRUCell(RNNCell): super(GRUCell, self).__init__() self.hidden_size = hidden_size self.fc_layer = Linear( - input_size, - hidden_size * 3, - param_attr=param_attr) + input_size, hidden_size * 3, param_attr=param_attr) self.gru_unit = GRUUnit( hidden_size * 3, @@ -1067,7 +1588,8 @@ class GRUCell(RNNCell): return [self.hidden_size] -class GRUEncoderCell(RNNCell): +#TODO: we should merge GRUCell with BasicGRUCell +class GRUEncoderCell(RNNCell): def __init__(self, num_layers, input_size, @@ -1086,8 +1608,9 @@ class GRUEncoderCell(RNNCell): GRUCell( input_size=input_size if i == 0 else hidden_size, hidden_size=hidden_size, - param_attr=fluid.ParamAttr(initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale))))) + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-init_scale, high=init_scale))))) def forward(self, step_input, states): new_states = [] @@ -1109,18 +1632,17 @@ class GRUEncoderCell(RNNCell): class BiGRU(fluid.dygraph.Layer): def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): super(BiGRU, self).__init__() - self.gru = RNN(GRUEncoderCell(1, input_dim, - grnn_hidden_dim, 0.0, init_bound), + self.gru = RNN(GRUEncoderCell(1, input_dim, grnn_hidden_dim, 0.0, + init_bound), is_reverse=False, time_major=False) - self.gru_r = RNN(GRUEncoderCell(1, input_dim, - grnn_hidden_dim, 0.0, init_bound), - is_reverse=True, - time_major=False) - + self.gru_r = RNN(GRUEncoderCell(1, input_dim, grnn_hidden_dim, 0.0, + init_bound), + is_reverse=True, + time_major=False) - def forward(self, input_feature): + def forward(self, input_feature): pre_gru, pre_state = self.gru(input_feature) gru_r, r_state = self.gru_r(input_feature) bi_merge = fluid.layers.concat(input=[pre_gru, gru_r], axis=-1) @@ -1320,14 +1842,14 @@ class SequenceTagging(fluid.dygraph.Layer): emission = self.fc(bigru_output) - if target is not None: + if target is not None: crf_cost = self.linear_chain_crf( input=emission, label=target, length=lengths) avg_cost = fluid.layers.mean(x=crf_cost) self.crf_decoding.weight = self.linear_chain_crf.weight crf_decode = self.crf_decoding(input=emission, length=lengths) return crf_decode, avg_cost, lengths - else: + else: self.linear_chain_crf.weight = self.crf_decoding.weight crf_decode = self.crf_decoding(input=emission, length=lengths) return crf_decode, lengths diff --git a/hapi/vision/models/__init__.py b/hapi/vision/models/__init__.py index 02cf6182a57f8bd30be2887efdfb0dfa131dd69a..d444cd6627e8228a796c29cd7396d459e10cc4c7 100644 --- a/hapi/vision/models/__init__.py +++ b/hapi/vision/models/__init__.py @@ -17,24 +17,15 @@ from . import vgg from . import mobilenetv1 from . import mobilenetv2 from . import darknet -from . import yolov3 -from . import tsm -from . import bmn_model from .resnet import * from .mobilenetv1 import * from .mobilenetv2 import * from .vgg import * from .darknet import * -from .yolov3 import * -from .tsm import * -from .bmn_model import * __all__ = resnet.__all__ \ + vgg.__all__ \ + mobilenetv1.__all__ \ + mobilenetv2.__all__ \ - + darknet.__all__ \ - + yolov3.__all__ \ - + tsm.__all__ \ - + bmn_model.__all__ + + darknet.__all__ diff --git a/hapi/vision/models/darknet.py b/hapi/vision/models/darknet.py index f1d8e030ec4c3efd0cc62796e361390ccfa8717f..08e4171ada84ec16cc149f23f3a41691c2fb97d1 100755 --- a/hapi/vision/models/darknet.py +++ b/hapi/vision/models/darknet.py @@ -22,7 +22,7 @@ from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear from hapi.model import Model from hapi.download import get_weights_path -__all__ = ['DarkNet', 'ConvBNLayer', 'darknet53'] +__all__ = ['DarkNet', 'darknet53'] # {num_layers: (url, md5)} pretrain_infos = { diff --git a/hapi/vision/transforms/__init__.py b/hapi/vision/transforms/__init__.py index 4367c712420fef78c6a81d681f8e4e9342f1540a..f7c5b63b19ed081ee6887850c1aa3ef918715222 100644 --- a/hapi/vision/transforms/__init__.py +++ b/hapi/vision/transforms/__init__.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +from . import transforms +from . import functional + from .transforms import * from .functional import * -from .detection_transforms import * + +__all__ = transforms.__all__ \ + + functional.__all__ diff --git a/hapi/vision/transforms/functional.py b/hapi/vision/transforms/functional.py index 8305619cc291a7f18c7258f1936fd9af684a967e..a4ca466c12ca5bf1e4db6fa4e47f58f95f73aea9 100644 --- a/hapi/vision/transforms/functional.py +++ b/hapi/vision/transforms/functional.py @@ -26,6 +26,8 @@ else: Sequence = collections.abc.Sequence Iterable = collections.abc.Iterable +__all__ = ['flip', 'resize'] + def flip(image, code): """