Internal change

PiperOrigin-RevId: 422638136

Internal change
PiperOrigin-RevId: 422638136
27fb855b · Le Hou · A. Unique TensorFlower · 143fd0b6 · 27fb855b · 27fb855b
44 changed file
--- a/official/legacy/albert/configs.py
+++ b/official/legacy/albert/configs.py
@@ -16,7 +16,7 @@

 import six

-from official.nlp.bert import configs
+from official.legacy.bert import configs


 class AlbertConfig(configs.BertConfig):

--- a/official/nlp/bert/README.md
+++ b/official/nlp/bert/README.md
@@ -2,7 +2,7 @@

 **WARNING**: We are on the way to deprecate most of the code in this directory.
 Please see
-[this link](https://github.com/tensorflow/models/blob/master/official/nlp/docs/train.md)
+[this link](../g3doc/tutorials/bert_new.md)
 for the new tutorial and use the new code in `nlp/modeling`. This README is
 still correct for this legacy implementation.


--- a/official/nlp/bert/__init__.py
+++ b/official/nlp/bert/__init__.py
--- a/official/nlp/bert/bert_cloud_tpu.md
+++ b/official/nlp/bert/bert_cloud_tpu.md
--- a/official/nlp/bert/bert_models.py
+++ b/official/nlp/bert/bert_models.py
@@ -18,8 +18,8 @@ import gin
 import tensorflow as tf
 import tensorflow_hub as hub
 from official.legacy.albert import configs as albert_configs
+from official.legacy.bert import configs
 from official.modeling import tf_utils
-from official.nlp.bert import configs
 from official.nlp.modeling import models
 from official.nlp.modeling import networks


--- a/official/nlp/bert/bert_models_test.py
+++ b/official/nlp/bert/bert_models_test.py
@@ -14,8 +14,8 @@

 import tensorflow as tf

-from official.nlp.bert import bert_models
-from official.nlp.bert import configs as bert_configs
+from official.legacy.bert import bert_models
+from official.legacy.bert import configs as bert_configs
 from official.nlp.modeling import networks



--- a/official/nlp/bert/common_flags.py
+++ b/official/nlp/bert/common_flags.py
--- a/official/nlp/bert/configs.py
+++ b/official/nlp/bert/configs.py
--- a/official/nlp/bert/export_tfhub.py
+++ b/official/nlp/bert/export_tfhub.py
@@ -25,8 +25,8 @@ from absl import app
 from absl import flags
 from absl import logging
 import tensorflow as tf
-from official.nlp.bert import bert_models
-from official.nlp.bert import configs
+from official.legacy.bert import bert_models
+from official.legacy.bert import configs

 FLAGS = flags.FLAGS


--- a/official/nlp/bert/export_tfhub_test.py
+++ b/official/nlp/bert/export_tfhub_test.py
@@ -21,8 +21,8 @@ import numpy as np
 import tensorflow as tf
 import tensorflow_hub as hub

-from official.nlp.bert import configs
-from official.nlp.bert import export_tfhub
+from official.legacy.bert import configs
+from official.legacy.bert import export_tfhub


 class ExportTfhubTest(tf.test.TestCase, parameterized.TestCase):

--- a/official/nlp/bert/input_pipeline.py
+++ b/official/nlp/bert/input_pipeline.py
--- a/official/nlp/bert/model_saving_utils.py
+++ b/official/nlp/bert/model_saving_utils.py
@@ -15,10 +15,9 @@
 """Utilities to save models."""

 import os
-
+import typing
 from absl import logging
 import tensorflow as tf
-import typing


 def export_bert_model(model_export_path: typing.Text,

--- a/official/nlp/bert/model_training_utils.py
+++ b/official/nlp/bert/model_training_utils.py
--- a/official/nlp/bert/model_training_utils_test.py
+++ b/official/nlp/bert/model_training_utils_test.py
@@ -25,8 +25,8 @@ import tensorflow as tf

 from tensorflow.python.distribute import combinations
 from tensorflow.python.distribute import strategy_combinations
-from official.nlp.bert import common_flags
-from official.nlp.bert import model_training_utils
+from official.legacy.bert import common_flags
+from official.legacy.bert import model_training_utils


 common_flags.define_common_bert_flags()

--- a/official/nlp/bert/run_classifier.py
+++ b/official/nlp/bert/run_classifier.py
@@ -26,13 +26,13 @@ from absl import logging
 import gin
 import tensorflow as tf
 from official.common import distribute_utils
+from official.legacy.bert import bert_models
+from official.legacy.bert import common_flags
+from official.legacy.bert import configs as bert_configs
+from official.legacy.bert import input_pipeline
+from official.legacy.bert import model_saving_utils
 from official.modeling import performance
 from official.nlp import optimization
-from official.nlp.bert import bert_models
-from official.nlp.bert import common_flags
-from official.nlp.bert import configs as bert_configs
-from official.nlp.bert import input_pipeline
-from official.nlp.bert import model_saving_utils
 from official.utils.misc import keras_utils

 flags.DEFINE_enum(

--- a/official/nlp/bert/run_pretraining.py
+++ b/official/nlp/bert/run_pretraining.py
@@ -21,13 +21,13 @@ from absl import logging
 import gin
 import tensorflow as tf
 from official.common import distribute_utils
+from official.legacy.bert import bert_models
+from official.legacy.bert import common_flags
+from official.legacy.bert import configs
+from official.legacy.bert import input_pipeline
+from official.legacy.bert import model_training_utils
 from official.modeling import performance
 from official.nlp import optimization
-from official.nlp.bert import bert_models
-from official.nlp.bert import common_flags
-from official.nlp.bert import configs
-from official.nlp.bert import input_pipeline
-from official.nlp.bert import model_training_utils


 flags.DEFINE_string('input_files', None,

--- a/official/nlp/bert/run_squad.py
+++ b/official/nlp/bert/run_squad.py
@@ -25,10 +25,10 @@ from absl import logging
 import gin
 import tensorflow as tf
 from official.common import distribute_utils
-from official.nlp.bert import configs as bert_configs
-from official.nlp.bert import run_squad_helper
-from official.nlp.bert import tokenization
+from official.legacy.bert import configs as bert_configs
+from official.legacy.bert import run_squad_helper
 from official.nlp.data import squad_lib as squad_lib_wp
+from official.nlp.tools import tokenization
 from official.utils.misc import keras_utils



--- a/official/nlp/bert/run_squad_helper.py
+++ b/official/nlp/bert/run_squad_helper.py
@@ -21,16 +21,16 @@ import os
 from absl import flags
 from absl import logging
 import tensorflow as tf
+from official.legacy.bert import bert_models
+from official.legacy.bert import common_flags
+from official.legacy.bert import input_pipeline
+from official.legacy.bert import model_saving_utils
+from official.legacy.bert import model_training_utils
 from official.modeling import performance
 from official.nlp import optimization
-from official.nlp.bert import bert_models
-from official.nlp.bert import common_flags
-from official.nlp.bert import input_pipeline
-from official.nlp.bert import model_saving_utils
-from official.nlp.bert import model_training_utils
-from official.nlp.bert import squad_evaluate_v1_1
-from official.nlp.bert import squad_evaluate_v2_0
 from official.nlp.data import squad_lib_sp
+from official.nlp.tools import squad_evaluate_v1_1
+from official.nlp.tools import squad_evaluate_v2_0
 from official.utils.misc import keras_utils



--- a/official/nlp/bert/serving.py
+++ b/official/nlp/bert/serving.py
@@ -18,8 +18,8 @@ from absl import app
 from absl import flags
 import tensorflow as tf

-from official.nlp.bert import bert_models
-from official.nlp.bert import configs
+from official.legacy.bert import bert_models
+from official.legacy.bert import configs

 flags.DEFINE_integer(
    "sequence_length", None, "Sequence length to parse the tf.Example. If "

--- a/official/nlp/data/classifier_data_lib.py
+++ b/official/nlp/data/classifier_data_lib.py
@@ -24,7 +24,7 @@ from absl import logging
 import tensorflow as tf
 import tensorflow_datasets as tfds

-from official.nlp.bert import tokenization
+from official.nlp.tools import tokenization


 class InputExample(object):

--- a/official/nlp/data/classifier_data_lib_test.py
+++ b/official/nlp/data/classifier_data_lib_test.py
@@ -21,8 +21,8 @@ from absl.testing import parameterized
 import tensorflow as tf
 import tensorflow_datasets as tfds

-from official.nlp.bert import tokenization
 from official.nlp.data import classifier_data_lib
+from official.nlp.tools import tokenization


 def decode_record(record, name_to_features):

--- a/official/nlp/data/create_finetuning_data.py
+++ b/official/nlp/data/create_finetuning_data.py
@@ -22,7 +22,6 @@ import os
 from absl import app
 from absl import flags
 import tensorflow as tf
-from official.nlp.bert import tokenization
 from official.nlp.data import classifier_data_lib
 from official.nlp.data import sentence_retrieval_lib
 # word-piece tokenizer based squad_lib
@@ -30,6 +29,7 @@ from official.nlp.data import squad_lib as squad_lib_wp
 # sentence-piece tokenizer based squad_lib
 from official.nlp.data import squad_lib_sp
 from official.nlp.data import tagging_data_lib
+from official.nlp.tools import tokenization

 FLAGS = flags.FLAGS


--- a/official/nlp/data/create_pretraining_data.py
+++ b/official/nlp/data/create_pretraining_data.py
@@ -24,7 +24,7 @@ from absl import flags
 from absl import logging
 import tensorflow as tf

-from official.nlp.bert import tokenization
+from official.nlp.tools import tokenization

 FLAGS = flags.FLAGS


--- a/official/nlp/data/create_xlnet_pretraining_data.py
+++ b/official/nlp/data/create_xlnet_pretraining_data.py
@@ -14,6 +14,7 @@

 """Create LM TF examples for XLNet."""

+import dataclasses
 import json
 import math
 import os
@@ -28,11 +29,10 @@ from absl import app
 from absl import flags
 from absl import logging

-import dataclasses
 import numpy as np
 import tensorflow as tf

-from official.nlp.bert import tokenization
+from official.nlp.tools import tokenization

 special_symbols = {
    "<unk>": 0,

--- a/official/nlp/data/sentence_retrieval_lib.py
+++ b/official/nlp/data/sentence_retrieval_lib.py
@@ -17,8 +17,8 @@
 import os

 from absl import logging
-from official.nlp.bert import tokenization
 from official.nlp.data import classifier_data_lib
+from official.nlp.tools import tokenization


 class BuccProcessor(classifier_data_lib.DataProcessor):

--- a/official/nlp/data/squad_lib.py
+++ b/official/nlp/data/squad_lib.py
@@ -25,7 +25,7 @@ import six
 from absl import logging
 import tensorflow as tf

-from official.nlp.bert import tokenization
+from official.nlp.tools import tokenization


 class SquadExample(object):

--- a/official/nlp/data/squad_lib_sp.py
+++ b/official/nlp/data/squad_lib_sp.py
@@ -28,7 +28,7 @@ from absl import logging
 import numpy as np
 import tensorflow as tf

-from official.nlp.bert import tokenization
+from official.nlp.tools import tokenization


 class SquadExample(object):

--- a/official/nlp/data/tagging_data_lib.py
+++ b/official/nlp/data/tagging_data_lib.py
@@ -19,8 +19,8 @@ import os
 from absl import logging
 import tensorflow as tf

-from official.nlp.bert import tokenization
 from official.nlp.data import classifier_data_lib
+from official.nlp.tools import tokenization

 # A negative label id for the padding label, which will not contribute
 # to loss/metrics in training.

--- a/official/nlp/data/tagging_data_lib_test.py
+++ b/official/nlp/data/tagging_data_lib_test.py
@@ -19,8 +19,8 @@ import random
 from absl.testing import parameterized
 import tensorflow as tf

-from official.nlp.bert import tokenization
 from official.nlp.data import tagging_data_lib
+from official.nlp.tools import tokenization


 def _create_fake_file(filename, labels, is_test):

--- a/official/nlp/tasks/dual_encoder_test.py
+++ b/official/nlp/tasks/dual_encoder_test.py
@@ -19,7 +19,7 @@ import os
 from absl.testing import parameterized
 import tensorflow as tf

-from official.nlp.bert import configs
+from official.legacy.bert import configs
 from official.nlp.configs import bert
 from official.nlp.configs import encoders
 from official.nlp.data import dual_encoder_dataloader

--- a/official/nlp/tasks/question_answering.py
+++ b/official/nlp/tasks/question_answering.py
@@ -13,13 +13,13 @@
 # limitations under the License.

 """Question answering task."""
+import dataclasses
 import functools
 import json
 import os
 from typing import List, Optional

 from absl import logging
-import dataclasses
 import orbit
 import tensorflow as tf

@@ -27,15 +27,15 @@ from official.core import base_task
 from official.core import config_definitions as cfg
 from official.core import task_factory
 from official.modeling.hyperparams import base_config
-from official.nlp.bert import squad_evaluate_v1_1
-from official.nlp.bert import squad_evaluate_v2_0
-from official.nlp.bert import tokenization
 from official.nlp.configs import encoders
 from official.nlp.data import data_loader_factory
 from official.nlp.data import squad_lib as squad_lib_wp
 from official.nlp.data import squad_lib_sp
 from official.nlp.modeling import models
 from official.nlp.tasks import utils
+from official.nlp.tools import squad_evaluate_v1_1
+from official.nlp.tools import squad_evaluate_v2_0
+from official.nlp.tools import tokenization


 @dataclasses.dataclass

--- a/official/nlp/tools/export_tfhub.py
+++ b/official/nlp/tools/export_tfhub.py
@@ -71,8 +71,8 @@ from absl import app
 from absl import flags
 import gin

+from official.legacy.bert import configs
 from official.modeling import hyperparams
-from official.nlp.bert import configs
 from official.nlp.configs import encoders
 from official.nlp.tools import export_tfhub_lib


--- a/official/nlp/tools/export_tfhub_lib.py
+++ b/official/nlp/tools/export_tfhub_lib.py
@@ -28,8 +28,8 @@ import tensorflow as tf
 from tensorflow.core.protobuf import saved_model_pb2
 from tensorflow.python.ops import control_flow_ops
 # pylint: enable=g-direct-tensorflow-import
+from official.legacy.bert import configs
 from official.modeling import tf_utils
-from official.nlp.bert import configs
 from official.nlp.configs import encoders
 from official.nlp.modeling import layers
 from official.nlp.modeling import models

--- a/official/nlp/tools/export_tfhub_lib_test.py
+++ b/official/nlp/tools/export_tfhub_lib_test.py
@@ -24,8 +24,8 @@ import tensorflow_hub as hub
 import tensorflow_text as text

 from sentencepiece import SentencePieceTrainer
+from official.legacy.bert import configs
 from official.modeling import tf_utils
-from official.nlp.bert import configs
 from official.nlp.configs import encoders
 from official.nlp.modeling import layers
 from official.nlp.modeling import models

--- a/official/nlp/bert/squad_evaluate_v1_1.py
+++ b/official/nlp/bert/squad_evaluate_v1_1.py
--- a/official/nlp/bert/squad_evaluate_v2_0.py
+++ b/official/nlp/bert/squad_evaluate_v2_0.py
--- a/official/nlp/bert/tf1_checkpoint_converter_lib.py
+++ b/official/nlp/bert/tf1_checkpoint_converter_lib.py
--- a/official/nlp/tools/tf2_albert_encoder_checkpoint_converter.py
+++ b/official/nlp/tools/tf2_albert_encoder_checkpoint_converter.py
@@ -25,9 +25,9 @@ from absl import flags
 import tensorflow as tf
 from official.legacy.albert import configs
 from official.modeling import tf_utils
-from official.nlp.bert import tf1_checkpoint_converter_lib
 from official.nlp.modeling import models
 from official.nlp.modeling import networks
+from official.nlp.tools import tf1_bert_checkpoint_converter_lib

 FLAGS = flags.FLAGS

@@ -128,12 +128,12 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint,
  # Create a temporary V1 name-converted checkpoint in the output directory.
  temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1")
  temporary_checkpoint = os.path.join(temporary_checkpoint_dir, "ckpt")
-  tf1_checkpoint_converter_lib.convert(
+  tf1_bert_checkpoint_converter_lib.convert(
      checkpoint_from_path=v1_checkpoint,
      checkpoint_to_path=temporary_checkpoint,
      num_heads=bert_config.num_attention_heads,
      name_replacements=ALBERT_NAME_REPLACEMENTS,
-      permutations=tf1_checkpoint_converter_lib.BERT_V2_PERMUTATIONS,
+      permutations=tf1_bert_checkpoint_converter_lib.BERT_V2_PERMUTATIONS,
      exclude_patterns=["adam", "Adam"])

  # Create a V2 checkpoint from the temporary checkpoint.
@@ -144,9 +144,8 @@ def convert_checkpoint(bert_config, output_path, v1_checkpoint,
  else:
    raise ValueError("Unsupported converted_model: %s" % converted_model)

-  tf1_checkpoint_converter_lib.create_v2_checkpoint(model, temporary_checkpoint,
-                                                    output_path,
-                                                    checkpoint_model_name)
+  tf1_bert_checkpoint_converter_lib.create_v2_checkpoint(
+      model, temporary_checkpoint, output_path, checkpoint_model_name)

  # Clean up the temporary checkpoint, if it exists.
  try:

--- a/official/nlp/bert/tf2_encoder_checkpoint_converter.py
+++ b/official/nlp/bert/tf2_encoder_checkpoint_converter.py
@@ -25,11 +25,11 @@ from absl import app
 from absl import flags

 import tensorflow as tf
+from official.legacy.bert import configs
 from official.modeling import tf_utils
-from official.nlp.bert import configs
-from official.nlp.bert import tf1_checkpoint_converter_lib
 from official.nlp.modeling import models
 from official.nlp.modeling import networks
+from official.nlp.tools import tf1_bert_checkpoint_converter_lib

 FLAGS = flags.FLAGS

@@ -111,12 +111,13 @@ def convert_checkpoint(bert_config,
  temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1")
  temporary_checkpoint = os.path.join(temporary_checkpoint_dir, "ckpt")

-  tf1_checkpoint_converter_lib.convert(
+  tf1_bert_checkpoint_converter_lib.convert(
      checkpoint_from_path=v1_checkpoint,
      checkpoint_to_path=temporary_checkpoint,
      num_heads=bert_config.num_attention_heads,
-      name_replacements=tf1_checkpoint_converter_lib.BERT_V2_NAME_REPLACEMENTS,
-      permutations=tf1_checkpoint_converter_lib.BERT_V2_PERMUTATIONS,
+      name_replacements=(
+          tf1_bert_checkpoint_converter_lib.BERT_V2_NAME_REPLACEMENTS),
+      permutations=tf1_bert_checkpoint_converter_lib.BERT_V2_PERMUTATIONS,
      exclude_patterns=["adam", "Adam"])

  if converted_model == "encoder":
@@ -127,9 +128,8 @@ def convert_checkpoint(bert_config,
    raise ValueError("Unsupported converted_model: %s" % converted_model)

  # Create a V2 checkpoint from the temporary checkpoint.
-  tf1_checkpoint_converter_lib.create_v2_checkpoint(model, temporary_checkpoint,
-                                                    output_path,
-                                                    checkpoint_model_name)
+  tf1_bert_checkpoint_converter_lib.create_v2_checkpoint(
+      model, temporary_checkpoint, output_path, checkpoint_model_name)

  # Clean up the temporary checkpoint, if it exists.
  try:

--- a/official/nlp/bert/tokenization.py
+++ b/official/nlp/bert/tokenization.py
--- a/official/nlp/bert/tokenization_test.py
+++ b/official/nlp/bert/tokenization_test.py
@@ -18,7 +18,7 @@ import tempfile
 import six
 import tensorflow as tf

-from official.nlp.bert import tokenization
+from official.nlp.tools import tokenization


 class TokenizationTest(tf.test.TestCase):

--- a/official/nlp/xlnet/training_utils.py
+++ b/official/nlp/xlnet/training_utils.py
@@ -21,7 +21,7 @@ from typing import Any, Callable, Dict, Optional, Text
 from absl import logging
 import tensorflow as tf

-from official.nlp.bert import model_training_utils
+from official.legacy.bert import model_training_utils
 from official.nlp.xlnet import data_utils

 # pytype: disable=attribute-error

--- a/official/projects/nhnet/raw_data_processor.py
+++ b/official/projects/nhnet/raw_data_processor.py
@@ -22,8 +22,8 @@ import urllib.parse

 import tensorflow as tf

-from official.nlp.bert import tokenization
 from official.nlp.data import classifier_data_lib
+from official.nlp.tools import tokenization


 class RawDataProcessor(object):

--- a/official/projects/nhnet/utils.py
+++ b/official/projects/nhnet/utils.py
@@ -18,8 +18,8 @@ from typing import Optional, Text
 from absl import logging
 import tensorflow as tf

+from official.legacy.bert import configs
 from official.modeling.hyperparams import params_dict
-from official.nlp.bert import configs
 from official.projects.nhnet import configs as nhnet_configs