提交 8d927957 编写于 作者: S shibeiji

debug for machine down because of out of memory when global shuffle level was set for large dataset

上级 9991df86
...@@ -388,7 +388,7 @@ class AdamWeightDecayDynamicLR(Optimizer): ...@@ -388,7 +388,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
beta2=0.999, beta2=0.999,
eps=1e-6, eps=1e-6,
weight_decay=0.0, weight_decay=0.0,
decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name): decay_filter=lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x.name.lower()):
super(AdamWeightDecayDynamicLR, self).__init__(0.0, params) super(AdamWeightDecayDynamicLR, self).__init__(0.0, params)
if self.is_group: if self.is_group:
raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.") raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
......
...@@ -36,8 +36,8 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e ...@@ -36,8 +36,8 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
ds = de.TFRecordDataset(data_files, schema_dir if schema_dir != "" else None, ds = de.TFRecordDataset(data_files, schema_dir if schema_dir != "" else None,
columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels", columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
"masked_lm_positions", "masked_lm_ids", "masked_lm_weights"], "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank, shuffle=de.Shuffle.FILES if do_shuffle == "true" else False,
shard_equal_rows=True) num_shards=device_num, shard_id=rank, shard_equal_rows=True)
ori_dataset_size = ds.get_dataset_size() ori_dataset_size = ds.get_dataset_size()
print('origin dataset size: ', ori_dataset_size) print('origin dataset size: ', ori_dataset_size)
new_size = ori_dataset_size new_size = ori_dataset_size
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册