Internal change

PiperOrigin-RevId: 284203261

Internal change
PiperOrigin-RevId: 284203261
6e847fbd · Chen Chen · A. Unique TensorFlower · 6f64dcb5 · 6e847fbd · 6e847fbd
4 changed file
--- a/official/nlp/bert/tf1_checkpoint_converter_lib.py
+++ b/official/nlp/bert/tf1_checkpoint_converter_lib.py
@@ -94,13 +94,13 @@ def _get_permutation(name, permutations):

 def _get_new_shape(name, shape, num_heads):
  """Checks whether a variable requires reshape by pattern matching."""
-  if "attention/output/dense/kernel" in name:
+  if "self_attention_output/kernel" in name:
    return tuple([num_heads, shape[0] // num_heads, shape[1]])
-  if "attention/output/dense/bias" in name:
+  if "self_attention_output/bias" in name:
    return shape

  patterns = [
-      "attention/self/query", "attention/self/value", "attention/self/key"
+      "self_attention/query", "self_attention/value", "self_attention/key"
  ]
  for pattern in patterns:
    if pattern in name:
@@ -161,7 +161,7 @@ def convert(checkpoint_from_path,
      # See if we need to reshape the underlying tensor.
      new_shape = None
      if num_heads > 0:
-        new_shape = _get_new_shape(var_name, tensor.shape, num_heads)
+        new_shape = _get_new_shape(new_var_name, tensor.shape, num_heads)
      if new_shape:
        tf.logging.info("Veriable %s has a shape change from %s to %s",


--- a/official/nlp/bert/tf2_albert_encoder_checkpoint_converter.py
+++ b/official/nlp/bert/tf2_albert_encoder_checkpoint_converter.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A converter from a tf1 ALBERT encoder checkpoint to a tf2 encoder checkpoint.
+
+The conversion will yield an object-oriented checkpoint that can be used
+to restore a AlbertTransformerEncoder object.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from absl import app
+from absl import flags
+
+import tensorflow as tf
+from official.modeling import activations
+from official.nlp import bert_modeling as modeling
+from official.nlp.bert import tf1_checkpoint_converter_lib
+from official.nlp.modeling import networks
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("albert_config_file", None,
+                    "Albert configuration file to define core bert layers.")
+flags.DEFINE_string(
+    "checkpoint_to_convert", None,
+    "Initial checkpoint from a pretrained BERT model core (that is, only the "
+    "BertModel, with no task heads.)")
+flags.DEFINE_string("converted_checkpoint_path", None,
+                    "Name for the created object-based V2 checkpoint.")
+
+
+ALBERT_NAME_REPLACEMENTS = (
+    ("bert/encoder/", ""),
+    ("bert/", ""),
+    ("embeddings/word_embeddings", "word_embeddings/embeddings"),
+    ("embeddings/position_embeddings", "position_embedding/embeddings"),
+    ("embeddings/token_type_embeddings", "type_embeddings/embeddings"),
+    ("embeddings/LayerNorm", "embeddings/layer_norm"),
+    ("embedding_hidden_mapping_in", "embedding_projection"),
+    ("group_0/inner_group_0/", ""),
+    ("attention_1/self", "self_attention"),
+    ("attention_1/output/dense", "self_attention_output"),
+    ("LayerNorm/", "self_attention_layer_norm/"),
+    ("ffn_1/intermediate/dense", "intermediate"),
+    ("ffn_1/intermediate/output/dense", "output"),
+    ("LayerNorm_1/", "output_layer_norm/"),
+    ("pooler/dense", "pooler_transform"),
+    ("cls/predictions/output_bias", "cls/predictions/output_bias/bias"),
+    ("cls/seq_relationship/output_bias", "predictions/transform/logits/bias"),
+    ("cls/seq_relationship/output_weights",
+     "predictions/transform/logits/kernel"),
+)
+
+
+def _create_albert_model(cfg):
+  """Creates a BERT keras core model from BERT configuration.
+
+  Args:
+    cfg: A `BertConfig` to create the core model.
+
+  Returns:
+    A keras model.
+  """
+  albert_encoder = networks.AlbertTransformerEncoder(
+      vocab_size=cfg.vocab_size,
+      hidden_size=cfg.hidden_size,
+      embedding_width=cfg.embedding_size,
+      num_layers=cfg.num_hidden_layers,
+      num_attention_heads=cfg.num_attention_heads,
+      intermediate_size=cfg.intermediate_size,
+      activation=activations.gelu,
+      dropout_rate=cfg.hidden_dropout_prob,
+      attention_dropout_rate=cfg.attention_probs_dropout_prob,
+      sequence_length=cfg.max_position_embeddings,
+      type_vocab_size=cfg.type_vocab_size,
+      initializer=tf.keras.initializers.TruncatedNormal(
+          stddev=cfg.initializer_range))
+  return albert_encoder
+
+
+def convert_checkpoint(bert_config, output_path, v1_checkpoint):
+  """Converts a V1 checkpoint into an OO V2 checkpoint."""
+  output_dir, _ = os.path.split(output_path)
+
+  # Create a temporary V1 name-converted checkpoint in the output directory.
+  temporary_checkpoint_dir = os.path.join(output_dir, "temp_v1")
+  temporary_checkpoint = os.path.join(temporary_checkpoint_dir, "ckpt")
+  tf1_checkpoint_converter_lib.convert(
+      checkpoint_from_path=v1_checkpoint,
+      checkpoint_to_path=temporary_checkpoint,
+      num_heads=bert_config.num_attention_heads,
+      name_replacements=ALBERT_NAME_REPLACEMENTS,
+      permutations=tf1_checkpoint_converter_lib.BERT_V2_PERMUTATIONS,
+      exclude_patterns=["adam", "Adam"])
+
+  # Create a V2 checkpoint from the temporary checkpoint.
+  model = _create_albert_model(bert_config)
+  tf1_checkpoint_converter_lib.create_v2_checkpoint(model, temporary_checkpoint,
+                                                    output_path)
+
+  # Clean up the temporary checkpoint, if it exists.
+  try:
+    tf.io.gfile.rmtree(temporary_checkpoint_dir)
+  except tf.errors.OpError:
+    # If it doesn't exist, we don't need to clean it up; continue.
+    pass
+
+
+def main(_):
+  assert tf.version.VERSION.startswith('2.')
+  output_path = FLAGS.converted_checkpoint_path
+  v1_checkpoint = FLAGS.checkpoint_to_convert
+  albert_config = modeling.AlbertConfig.from_json_file(FLAGS.albert_config_file)
+  convert_checkpoint(albert_config, output_path, v1_checkpoint)
+
+
+if __name__ == "__main__":
+  app.run(main)
--- a/official/nlp/bert_modeling.py
+++ b/official/nlp/bert_modeling.py
@@ -107,6 +107,44 @@ class BertConfig(object):
    return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"


+class AlbertConfig(BertConfig):
+  """Configuration for `ALBERT`."""
+
+  def __init__(self,
+               embedding_size,
+               num_hidden_groups=1,
+               inner_group_num=1,
+               **kwargs):
+    """Constructs AlbertConfig.
+
+    Args:
+      embedding_size: Size of the factorized word embeddings.
+      num_hidden_groups: Number of group for the hidden layers, parameters in
+        the same group are shared. Note that this value and also the following
+        'inner_group_num' has to be 1 for now, because all released ALBERT
+        models set them to 1. We may support arbitary valid values in future.
+      inner_group_num: Number of inner repetition of attention and ffn.
+      **kwargs: The remaining arguments are the same as above 'BertConfig'.
+    """
+    super(AlbertConfig, self).__init__(**kwargs)
+    self.embedding_size = embedding_size
+
+    # TODO(chendouble): 'inner_group_num' and 'num_hidden_groups' are always 1
+    # in the released ALBERT. Support other values in AlbertTransformerEncoder
+    # if needed.
+    if inner_group_num != 1 or num_hidden_groups != 1:
+      raise ValueError("We only support 'inner_group_num' and "
+                       "'num_hidden_groups' as 1.")
+
+  @classmethod
+  def from_dict(cls, json_object):
+    """Constructs a `AlbertConfig` from a Python dictionary of parameters."""
+    config = AlbertConfig(embedding_size=None, vocab_size=None)
+    for (key, value) in six.iteritems(json_object):
+      config.__dict__[key] = value
+    return config
+
+
 def get_bert_model(input_word_ids,
                   input_mask,
                   input_type_ids,

--- a/official/nlp/modeling/networks/__init__.py
+++ b/official/nlp/modeling/networks/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Networks package definition."""
+from official.nlp.modeling.networks.albert_transformer_encoder import AlbertTransformerEncoder
 from official.nlp.modeling.networks.classification import Classification
 from official.nlp.modeling.networks.masked_lm import MaskedLM
 from official.nlp.modeling.networks.span_labeling import SpanLabeling