提交 3421f8c6 编写于 作者: H Hongkun Yu 提交者: A. Unique TensorFlower

Internal change

PiperOrigin-RevId: 339113572
上级 b95fa6e1
......@@ -13,8 +13,8 @@
# limitations under the License.
# ==============================================================================
"""All necessary imports for registration."""
# pylint: disable=unused-import
from official.nlp import tasks as nlp_task
from official.nlp import configs
from official.nlp import tasks
from official.utils.testing import mock_task
from official.vision import beta
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Configs package definition."""
from official.nlp.configs import finetuning_experiments
task:
hub_module_url: ''
model:
num_classes: 3
init_checkpoint: ''
metric_type: 'accuracy'
train_data:
drop_remainder: true
global_batch_size: 32
input_path: ''
is_training: true
seq_length: 128
label_type: 'int'
validation_data:
drop_remainder: false
global_batch_size: 32
input_path: ''
is_training: false
seq_length: 128
label_type: 'int'
trainer:
checkpoint_interval: 3000
optimizer_config:
learning_rate:
polynomial:
# 100% of train_steps.
decay_steps: 36813
end_learning_rate: 0.0
initial_learning_rate: 3.0e-05
power: 1.0
type: polynomial
optimizer:
type: adamw
warmup:
polynomial:
power: 1
# ~10% of train_steps.
warmup_steps: 3681
type: polynomial
steps_per_loop: 1000
summary_interval: 1000
# Training data size 392,702 examples, 3 epochs.
train_steps: 36813
validation_interval: 6135
# Eval data size = 9815 examples.
validation_steps: 307
best_checkpoint_export_subdir: 'best_ckpt'
best_checkpoint_eval_metric: 'cls_accuracy'
best_checkpoint_metric_comp: 'higher'
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Finetuning experiment configurations."""
# pylint: disable=g-doc-return-or-yield,line-too-long
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import optimization
from official.nlp.data import question_answering_dataloader
from official.nlp.data import sentence_prediction_dataloader
from official.nlp.tasks import question_answering
from official.nlp.tasks import sentence_prediction
@exp_factory.register_config_factory('bert/sentence_prediction')
def bert_sentence_prediction() -> cfg.ExperimentConfig:
r"""BERT GLUE."""
config = cfg.ExperimentConfig(
task=sentence_prediction.SentencePredictionConfig(
train_data=sentence_prediction_dataloader
.SentencePredictionDataConfig(),
validation_data=sentence_prediction_dataloader
.SentencePredictionDataConfig(
is_training=False, drop_remainder=False)),
trainer=cfg.TrainerConfig(
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'adamw',
'adamw': {
'weight_decay_rate':
0.01,
'exclude_from_weight_decay':
['LayerNorm', 'layer_norm', 'bias'],
}
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {
'initial_learning_rate': 3e-5,
'end_learning_rate': 0.0,
}
},
'warmup': {
'type': 'polynomial'
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
config.task.model.encoder.type = 'bert'
return config
@exp_factory.register_config_factory('bert/squad')
def bert_squad() -> cfg.ExperimentConfig:
"""BERT Squad V1/V2."""
config = cfg.ExperimentConfig(
task=question_answering.QuestionAnsweringConfig(
train_data=question_answering_dataloader.QADataConfig(),
validation_data=question_answering_dataloader.QADataConfig()),
trainer=cfg.TrainerConfig(
optimizer_config=optimization.OptimizationConfig({
'optimizer': {
'type': 'adamw',
'adamw': {
'weight_decay_rate':
0.01,
'exclude_from_weight_decay':
['LayerNorm', 'layer_norm', 'bias'],
}
},
'learning_rate': {
'type': 'polynomial',
'polynomial': {
'initial_learning_rate': 8e-5,
'end_learning_rate': 0.0,
}
},
'warmup': {
'type': 'polynomial'
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
config.task.model.encoder.type = 'bert'
return config
task:
model:
encoder:
type: bert
bert:
attention_dropout_rate: 0.1
dropout_rate: 0.1
hidden_activation: gelu
hidden_size: 768
initializer_range: 0.02
intermediate_size: 3072
max_position_embeddings: 512
num_attention_heads: 12
num_layers: 12
type_vocab_size: 2
vocab_size: 30522
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册