Internal change

PiperOrigin-RevId: 339113572

Internal change
PiperOrigin-RevId: 339113572
3421f8c6 · Hongkun Yu · A. Unique TensorFlower · b95fa6e1 · 3421f8c6 · 3421f8c6
5 changed file
--- a/official/common/registry_imports.py
+++ b/official/common/registry_imports.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """All necessary imports for registration."""
-
 # pylint: disable=unused-import
-from official.nlp import tasks as nlp_task
+from official.nlp import configs
+from official.nlp import tasks
 from official.utils.testing import mock_task
 from official.vision import beta
--- a/official/nlp/configs/__init__.py
+++ b/official/nlp/configs/__init__.py
-
+# Lint as: python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Configs package definition."""
+from official.nlp.configs import finetuning_experiments
--- a/official/nlp/configs/experiments/glue_mnli_matched.yaml
+++ b/official/nlp/configs/experiments/glue_mnli_matched.yaml
+task:
+  hub_module_url: ''
+  model:
+    num_classes: 3
+  init_checkpoint: ''
+  metric_type: 'accuracy'
+  train_data:
+    drop_remainder: true
+    global_batch_size: 32
+    input_path: ''
+    is_training: true
+    seq_length: 128
+    label_type: 'int'
+  validation_data:
+    drop_remainder: false
+    global_batch_size: 32
+    input_path: ''
+    is_training: false
+    seq_length: 128
+    label_type: 'int'
+trainer:
+  checkpoint_interval: 3000
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        # 100% of train_steps.
+        decay_steps: 36813
+        end_learning_rate: 0.0
+        initial_learning_rate: 3.0e-05
+        power: 1.0
+      type: polynomial
+    optimizer:
+      type: adamw
+    warmup:
+      polynomial:
+        power: 1
+        # ~10% of train_steps.
+        warmup_steps: 3681
+      type: polynomial
+  steps_per_loop: 1000
+  summary_interval: 1000
+  # Training data size 392,702 examples, 3 epochs.
+  train_steps: 36813
+  validation_interval: 6135
+  # Eval data size = 9815 examples.
+  validation_steps: 307
+  best_checkpoint_export_subdir: 'best_ckpt'
+  best_checkpoint_eval_metric: 'cls_accuracy'
+  best_checkpoint_metric_comp: 'higher'
--- a/official/nlp/configs/finetuning_experiments.py
+++ b/official/nlp/configs/finetuning_experiments.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Finetuning experiment configurations."""
+# pylint: disable=g-doc-return-or-yield,line-too-long
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import optimization
+from official.nlp.data import question_answering_dataloader
+from official.nlp.data import sentence_prediction_dataloader
+from official.nlp.tasks import question_answering
+from official.nlp.tasks import sentence_prediction
+
+
+@exp_factory.register_config_factory('bert/sentence_prediction')
+def bert_sentence_prediction() -> cfg.ExperimentConfig:
+  r"""BERT GLUE."""
+  config = cfg.ExperimentConfig(
+      task=sentence_prediction.SentencePredictionConfig(
+          train_data=sentence_prediction_dataloader
+          .SentencePredictionDataConfig(),
+          validation_data=sentence_prediction_dataloader
+          .SentencePredictionDataConfig(
+              is_training=False, drop_remainder=False)),
+      trainer=cfg.TrainerConfig(
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adamw',
+                  'adamw': {
+                      'weight_decay_rate':
+                          0.01,
+                      'exclude_from_weight_decay':
+                          ['LayerNorm', 'layer_norm', 'bias'],
+                  }
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 3e-5,
+                      'end_learning_rate': 0.0,
+                  }
+              },
+              'warmup': {
+                  'type': 'polynomial'
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  config.task.model.encoder.type = 'bert'
+  return config
+
+
+@exp_factory.register_config_factory('bert/squad')
+def bert_squad() -> cfg.ExperimentConfig:
+  """BERT Squad V1/V2."""
+  config = cfg.ExperimentConfig(
+      task=question_answering.QuestionAnsweringConfig(
+          train_data=question_answering_dataloader.QADataConfig(),
+          validation_data=question_answering_dataloader.QADataConfig()),
+      trainer=cfg.TrainerConfig(
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adamw',
+                  'adamw': {
+                      'weight_decay_rate':
+                          0.01,
+                      'exclude_from_weight_decay':
+                          ['LayerNorm', 'layer_norm', 'bias'],
+                  }
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 8e-5,
+                      'end_learning_rate': 0.0,
+                  }
+              },
+              'warmup': {
+                  'type': 'polynomial'
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+  config.task.model.encoder.type = 'bert'
+  return config
--- a/official/nlp/configs/models/bert_en_uncased_base.yaml
+++ b/official/nlp/configs/models/bert_en_uncased_base.yaml
+task:
+  model:
+    encoder:
+      type: bert
+      bert:
+        attention_dropout_rate: 0.1
+        dropout_rate: 0.1
+        hidden_activation: gelu
+        hidden_size: 768
+        initializer_range: 0.02
+        intermediate_size: 3072
+        max_position_embeddings: 512
+        num_attention_heads: 12
+        num_layers: 12
+        type_vocab_size: 2
+        vocab_size: 30522