未验证 提交 62222bf4 编写于 作者: L liym27 提交者: GitHub

Fix test_bert on GPU (#24692)

DataLoader makes the data diff even if the data of reader is the same on CUDA place. This PR doesn't use DataLoader to pass the test. we will use DataLoader back after we fix it.
上级 a6ab43aa
...@@ -260,51 +260,48 @@ class DataReader(object): ...@@ -260,51 +260,48 @@ class DataReader(object):
yield token_ids, sent_ids, pos_ids, label yield token_ids, sent_ids, pos_ids, label
def data_generator(self): def data_generator(self):
def wrapper(): def reader():
def reader(): for epoch in range(self.epoch):
for epoch in range(self.epoch): self.current_epoch = epoch + 1
self.current_epoch = epoch + 1 sample_generator = self.build_fake_data()
sample_generator = self.build_fake_data() for sample in sample_generator:
for sample in sample_generator: if sample is None:
if sample is None: continue
continue yield sample
yield sample
def batch_reader(reader, batch_size, in_tokens):
def batch_reader(reader, batch_size, in_tokens): batch, total_token_num, max_len = [], 0, 0
batch, total_token_num, max_len = [], 0, 0 for parsed_line in reader():
for parsed_line in reader(): token_ids, sent_ids, pos_ids, label = parsed_line
token_ids, sent_ids, pos_ids, label = parsed_line max_len = max(max_len, len(token_ids))
max_len = max(max_len, len(token_ids)) if in_tokens:
if in_tokens: to_append = (len(batch) + 1) * max_len <= batch_size
to_append = (len(batch) + 1) * max_len <= batch_size else:
else: to_append = len(batch) < batch_size
to_append = len(batch) < batch_size if to_append:
if to_append: batch.append(parsed_line)
batch.append(parsed_line) total_token_num += len(token_ids)
total_token_num += len(token_ids) else:
else:
yield batch, total_token_num
batch, total_token_num, max_len = [parsed_line], len(
token_ids), len(token_ids)
if len(batch) > 0:
yield batch, total_token_num yield batch, total_token_num
batch, total_token_num, max_len = [parsed_line], len(
for batch_data, total_token_num in batch_reader( token_ids), len(token_ids)
reader, self.batch_size, self.in_tokens):
yield prepare_batch_data( if len(batch) > 0:
batch_data, yield batch, total_token_num
total_token_num,
voc_size=self.voc_size, for batch_data, total_token_num in batch_reader(reader, self.batch_size,
pad_id=self.pad_id, self.in_tokens):
cls_id=self.cls_id, yield prepare_batch_data(
sep_id=self.sep_id, batch_data,
mask_id=self.mask_id, total_token_num,
return_input_mask=True, voc_size=self.voc_size,
return_max_len=False, pad_id=self.pad_id,
return_num_token=False) cls_id=self.cls_id,
sep_id=self.sep_id,
return wrapper mask_id=self.mask_id,
return_input_mask=True,
return_max_len=False,
return_num_token=False)
class ModelHyperParams(object): class ModelHyperParams(object):
......
...@@ -17,14 +17,17 @@ import unittest ...@@ -17,14 +17,17 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator
from bert_dygraph_model import PretrainModelLayer from bert_dygraph_model import PretrainModelLayer
from bert_utils import get_bert_config, get_feed_data_reader from bert_utils import get_bert_config, get_feed_data_reader
program_translator = ProgramTranslator() program_translator = ProgramTranslator()
place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace( place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace(
) )
SEED = 2020 SEED = 2020
STEP_NUM = 10 STEP_NUM = 10
PRINT_STEP = 2 PRINT_STEP = 2
...@@ -35,19 +38,16 @@ def train(bert_config, data_reader): ...@@ -35,19 +38,16 @@ def train(bert_config, data_reader):
fluid.default_main_program().random_seed = SEED fluid.default_main_program().random_seed = SEED
fluid.default_startup_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED
data_loader = fluid.io.DataLoader.from_generator(
capacity=50, iterable=True)
data_loader.set_batch_generator(
data_reader.data_generator(), places=place)
bert = PretrainModelLayer( bert = PretrainModelLayer(
config=bert_config, weight_sharing=False, use_fp16=False) config=bert_config, weight_sharing=False, use_fp16=False)
optimizer = fluid.optimizer.Adam(parameter_list=bert.parameters()) optimizer = fluid.optimizer.Adam(parameter_list=bert.parameters())
step_idx = 0 step_idx = 0
speed_list = [] speed_list = []
for input_data in data_loader(): for input_data in data_reader.data_generator():
input_data = [to_variable(ele) for ele in input_data]
src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos, labels = input_data src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos, labels = input_data
next_sent_acc, mask_lm_loss, total_loss = bert( next_sent_acc, mask_lm_loss, total_loss = bert(
src_ids=src_ids, src_ids=src_ids,
position_ids=pos_ids, position_ids=pos_ids,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册