diff --git a/README.md b/README.md index 84b2fbe6ffe220cf68b8aa22a471f30c0b257f07..b2b3e3cb733710ff183730c58228b60250697865 100644 --- a/README.md +++ b/README.md @@ -741,7 +741,7 @@ BERT包含了如下输入对象 ```yaml token_ids: 一个shape为[batch_size, seq_len]的矩阵,每行是一条样本,其中的每个元素为文本中的每个token对应的单词id。 position_ids: 一个shape为[batch_size, seq_len]的矩阵,每行是一条样本,其中的每个元素为文本中的每个token对应的位置id。 -segment_ids: 一个shape为[batch_size, seq_len]的0/1矩阵,用于支持BERT、ERNIE等模型的输入,当元素为0时,代表当前token属于分类任务或匹配任务的text1,为1时代表当前token属于匹配任务的text2. +segment_ids: 一个shape为[batch_size, seq_len]的0/1矩阵,用于支持BERT、ERNIE等模型的输入,当元素为0时,代表当前token属于分类任务或匹配任务的text1,为1时代表当前token属于匹配任务的text2。 input_mask: 一个shape为[batch_size, seq_len]的矩阵,其中的每个元素为0或1,表示该位置是否是padding词(为1时代表是真实词,为0时代表是填充词)。 ``` @@ -781,6 +781,7 @@ sentence_pair_embedding: 一个shape为[batch_size, hidden_size]的matrix, float ## 附录C:内置任务范式(paradigm) + #### 分类范式:cls 分类范式额外包含以下配置字段: @@ -788,6 +789,7 @@ sentence_pair_embedding: 一个shape为[batch_size, hidden_size]的matrix, float ```yaml n_classes(REQUIRED): int类型。分类任务的类别数。 pred_output_path (OPTIONAL) : str类型。预测输出结果的保存路径,当该参数未空时,保存至全局配置文件中的`save_path`字段指定路径下的任务目录。 +save_infermodel_every_n_steps (OPTIONAL) : int类型。周期性保存预测模型的间隔,未设置或设为-1时仅在该任务训练结束时保存预测模型。默认为-1。 ``` 分类范式包含如下的输入对象: @@ -812,6 +814,7 @@ sentence_embedding: 一个shape为[batch_size, hidden_size]的matrix, float32类 ```yaml pred_output_path (OPTIONAL) : str类型。预测输出结果的保存路径,当该参数未空时,保存至全局配置文件中的`save_path`字段指定路径下的任务目录。 +save_infermodel_every_n_steps (OPTIONAL) : int类型。周期性保存预测模型的间隔,未设置或设为-1时仅在该任务训练结束时保存预测模型。默认为-1。 ``` 匹配范式包含如下的输入对象: @@ -838,6 +841,7 @@ sentence_pair_embedding: 一个shape为[batch_size, hidden_size]的matrix, float max_answer_len(REQUIRED): int类型。预测的最大答案长度 n_best_size (OPTIONAL) : int类型,默认为20。预测时保存的nbest回答文件中每条样本的n_best数量 pred_output_path (OPTIONAL) : str类型。预测输出结果的保存路径,当该参数未空时,保存至全局配置文件中的`save_path`字段指定路径下的任务目录 +save_infermodel_every_n_steps (OPTIONAL) : int类型。周期性保存预测模型的间隔,未设置或设为-1时仅在该任务训练结束时保存预测模型。默认为-1。 ``` 机器阅读理解范式包含如下的输入对象: @@ -885,7 +889,8 @@ do_lower_case (OPTIONAL): bool类型。大小写标志位。默认为False,即 for_cn: bool类型。中文模式标志位。默认为False,即默认输入为英文,设置为True后,分词器、后处理等按照中文语言进行处理。 print_every_n_steps (OPTIONAL): int类型。默认为5。训练阶段打印日志的频率(step为单位)。 -save_every_n_steps (OPTIONAL): int类型。默认为-1。训练过程中保存checkpoint模型的频率,默认不保存。 +save_ckpt_every_n_steps (OPTIONAL): int类型。默认为-1。训练过程中保存完整计算图的检查点(checkpoint)的频率,默认-1,仅在最后一个step自动保存检查点。 +save_infermodel_every_n_steps (OPTIONAL) : int类型。周期性保存预测模型的间隔,未设置或设为-1时仅在该任务训练结束时保存预测模型。默认为-1。 optimizer(REQUIRED): str类型。优化器名称,目前框架只支持adam,未来会支持更多优化器。 learning_rate(REQUIRED): str类型。训练阶段的学习率。 diff --git a/demo/demo2/config.yaml b/demo/demo2/config.yaml index 1cc55777a39c747f98dd4e89fcaae4af587592d5..fe00c1a7339b1be418e68fcd836b3d657eb8c708 100644 --- a/demo/demo2/config.yaml +++ b/demo/demo2/config.yaml @@ -12,6 +12,8 @@ do_lower_case: True max_seq_len: 512 batch_size: 4 +save_ckpt_every_n_steps: 5 +save_infermodel_every_n_steps: 5 num_epochs: 2 optimizer: "adam" learning_rate: 3e-5 diff --git a/paddlepalm/README.md b/paddlepalm/README.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/paddlepalm/_downloader.py b/paddlepalm/_downloader.py index 52521e812ce279139ba72b87827cbdfdf6881965..1b8de4b629a491148e43b71f96cb70c0542d15d4 100644 --- a/paddlepalm/_downloader.py +++ b/paddlepalm/_downloader.py @@ -33,6 +33,7 @@ ssl._create_default_https_context = ssl._create_unverified_context _items = { 'pretrain': {'ernie-en-uncased-large': 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz', 'bert-en-uncased-large': 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz', + 'bert-en-uncased-base': 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz', 'utils': None}, 'reader': {'utils': None}, 'backbone': {'utils': None}, @@ -90,7 +91,7 @@ def _download(item, scope, path, silent=False): tar.extractall(path = data_dir) tar.close() os.remove(filename) - if scope == 'bert-en-uncased-large': + if scope.startswith('bert'): source_path = data_dir + '/' + data_name.split('.')[0] fileList = os.listdir(source_path) for file in fileList: diff --git a/paddlepalm/backbone/bert.py b/paddlepalm/backbone/bert.py index 74f772c44fab376bd411011f8ca82b59cef4f0df..d3592a5526447694e8a14d01dee2b9987740b2ed 100644 --- a/paddlepalm/backbone/bert.py +++ b/paddlepalm/backbone/bert.py @@ -52,9 +52,9 @@ class Model(backbone): @property def inputs_attr(self): - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32']} @property @@ -73,7 +73,7 @@ class Model(backbone): self._emb_dtype = 'float32' # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( + emb_out = fluid.embedding( input=src_ids, size=[self._voc_size, self._emb_size], dtype=self._emb_dtype, @@ -84,14 +84,14 @@ class Model(backbone): # fluid.global_scope().find_var('backbone-word_embedding').get_tensor() embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name) - position_emb_out = fluid.layers.embedding( + position_emb_out = fluid.embedding( input=pos_ids, size=[self._max_position_seq_len, self._emb_size], dtype=self._emb_dtype, param_attr=fluid.ParamAttr( name=scope_name+self._pos_emb_name, initializer=self._param_initializer)) - sent_emb_out = fluid.layers.embedding( + sent_emb_out = fluid.embedding( sent_ids, size=[self._sent_types, self._emb_size], dtype=self._emb_dtype, diff --git a/paddlepalm/backbone/ernie.py b/paddlepalm/backbone/ernie.py index 1e471537cf9485f533dbc4f048662f0a5bc30c60..ded196385112513d001c6db4505cdc3883592984 100644 --- a/paddlepalm/backbone/ernie.py +++ b/paddlepalm/backbone/ernie.py @@ -62,11 +62,11 @@ class Model(backbone): @property def inputs_attr(self): - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'], - "task_ids": [[-1,-1, 1], 'int64']} + "task_ids": [[-1,-1], 'int64']} @property def outputs_attr(self): @@ -85,7 +85,7 @@ class Model(backbone): task_ids = inputs['task_ids'] # padding id in vocabulary must be set to 0 - emb_out = fluid.layers.embedding( + emb_out = fluid.embedding( input=src_ids, size=[self._voc_size, self._emb_size], dtype=self._emb_dtype, @@ -96,14 +96,14 @@ class Model(backbone): # fluid.global_scope().find_var('backbone-word_embedding').get_tensor() embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name) - position_emb_out = fluid.layers.embedding( + position_emb_out = fluid.embedding( input=pos_ids, size=[self._max_position_seq_len, self._emb_size], dtype=self._emb_dtype, param_attr=fluid.ParamAttr( name=scope_name+self._pos_emb_name, initializer=self._param_initializer)) - sent_emb_out = fluid.layers.embedding( + sent_emb_out = fluid.embedding( sent_ids, size=[self._sent_types, self._emb_size], dtype=self._emb_dtype, @@ -113,7 +113,7 @@ class Model(backbone): emb_out = emb_out + position_emb_out emb_out = emb_out + sent_emb_out - task_emb_out = fluid.layers.embedding( + task_emb_out = fluid.embedding( task_ids, size=[self._task_types, self._emb_size], dtype=self._emb_dtype, diff --git a/paddlepalm/mtl_controller.py b/paddlepalm/mtl_controller.py index b25bbcaaee511f82795ca759d3f32769c0e55cc7..30fac4dd7802d091e1ccb37f92f6477de28d2144 100755 --- a/paddlepalm/mtl_controller.py +++ b/paddlepalm/mtl_controller.py @@ -473,7 +473,7 @@ class Controller(object): # compute loss task_id_var = net_inputs['__task_id'] - task_id_vec = layers.one_hot(task_id_var, num_instances) + task_id_vec = fluid.one_hot(task_id_var, num_instances) losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0) loss = layers.reduce_sum(task_id_vec * losses) @@ -622,8 +622,9 @@ class Controller(object): global_step += 1 cur_task.cur_train_step += 1 - if cur_task.save_infermodel_every_n_steps > 0 and cur_task.cur_train_step % cur_task.save_infermodel_every_n_steps == 0: - cur_task.save(suffix='.step'+str(cur_task.cur_train_step)) + cur_task_global_step = cur_task.cur_train_step + cur_task.cur_train_epoch * cur_task.steps_pur_epoch + if cur_task.is_target and cur_task.save_infermodel_every_n_steps > 0 and cur_task_global_step % cur_task.save_infermodel_every_n_steps == 0: + cur_task.save(suffix='.step'+str(cur_task_global_step)) if global_step % main_conf.get('print_every_n_steps', 5) == 0: loss = rt_outputs[cur_task.name+'/loss'] @@ -641,7 +642,7 @@ class Controller(object): print(cur_task.name+': train finished!') cur_task.save() - if 'save_every_n_steps' in main_conf and global_step % main_conf['save_every_n_steps'] == 0: + if 'save_ckpt_every_n_steps' in main_conf and global_step % main_conf['save_ckpt_every_n_steps'] == 0: save_path = os.path.join(main_conf['save_path'], 'ckpt', "step_" + str(global_step)) fluid.io.save_persistables(self.exe, save_path, saver_program) diff --git a/paddlepalm/reader/cls.py b/paddlepalm/reader/cls.py index 1ecf6cbf7ffd5c6aea62297a292ca2e014232053..dd5e7f3b88b980410de13a23d2b10717b88ea6b3 100644 --- a/paddlepalm/reader/cls.py +++ b/paddlepalm/reader/cls.py @@ -62,18 +62,18 @@ class Reader(reader): @property def outputs_attr(self): if self._is_training: - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'], - "label_ids": [[-1,1], 'int64'], - "task_ids": [[-1, -1, 1], 'int64'] + "label_ids": [[-1], 'int64'], + "task_ids": [[-1, -1], 'int64'] } else: - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], - "task_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], + "task_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'] } diff --git a/paddlepalm/reader/match.py b/paddlepalm/reader/match.py index 41dab1a268be301cac924b809b140da48f34897e..27dfa96785b8ec2978979ad486d28b5fae077c53 100644 --- a/paddlepalm/reader/match.py +++ b/paddlepalm/reader/match.py @@ -72,12 +72,12 @@ class Reader(reader): @property def outputs_attr(self): if self._is_training: - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'], - "label_ids": [[-1,1], 'int64'], - "task_ids": [[-1, -1, 1], 'int64'] + "label_ids": [[-1], 'int64'], + "task_ids": [[-1, -1], 'int64'] } if siamese: if learning_strategy == 'pointwise': @@ -102,10 +102,10 @@ class Reader(reader): else: else: - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], - "task_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], + "task_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'] } diff --git a/paddlepalm/reader/mlm.py b/paddlepalm/reader/mlm.py index 2455cd99a3fb19462ff4a16fe6de1d24a0d2cc72..4eb0cbf2c0b5b530c393cea41bf54fd9d7bb34ab 100644 --- a/paddlepalm/reader/mlm.py +++ b/paddlepalm/reader/mlm.py @@ -60,13 +60,13 @@ class Reader(reader): @property def outputs_attr(self): - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'], - "task_ids": [[-1, -1, 1], 'int64'], - "mask_label": [[-1, 1], 'int64'], - "mask_pos": [[-1, 1], 'int64'], + "task_ids": [[-1, -1], 'int64'], + "mask_label": [[-1], 'int64'], + "mask_pos": [[-1], 'int64'], } diff --git a/paddlepalm/reader/mrc.py b/paddlepalm/reader/mrc.py index 01d9adc3e3c9d6164881b98e8d603f9d5d8d0a7e..d1127ae1f50f45a73593c0fb452ff946cadb93c4 100644 --- a/paddlepalm/reader/mrc.py +++ b/paddlepalm/reader/mrc.py @@ -69,22 +69,21 @@ class Reader(reader): @property def outputs_attr(self): if self._is_training: - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'], - "start_positions": [[-1, 1], 'int64'], - "unique_ids": [[-1, 1], 'int64'], - "end_positions": [[-1, 1], 'int64'], - "task_ids": [[-1, -1, 1], 'int64'] + "start_positions": [[-1], 'int64'], + "end_positions": [[-1], 'int64'], + "task_ids": [[-1, -1], 'int64'] } else: - return {"token_ids": [[-1, -1, 1], 'int64'], - "position_ids": [[-1, -1, 1], 'int64'], - "segment_ids": [[-1, -1, 1], 'int64'], - "task_ids": [[-1, -1, 1], 'int64'], + return {"token_ids": [[-1, -1], 'int64'], + "position_ids": [[-1, -1], 'int64'], + "segment_ids": [[-1, -1], 'int64'], + "task_ids": [[-1, -1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'], - "unique_ids": [[-1, 1], 'int64'] + "unique_ids": [[-1], 'int64'] } @property diff --git a/paddlepalm/reader/utils/batching4bert.py b/paddlepalm/reader/utils/batching4bert.py index daeb25ae9e0fd2dfd4abe021453a71ccd790d562..96998b21e0fe97c18776fef827d290e4c2a89525 100644 --- a/paddlepalm/reader/utils/batching4bert.py +++ b/paddlepalm/reader/utils/batching4bert.py @@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3): sent[token_index] = MASK mask_flag = True mask_pos.append(sent_index * max_len + token_index) - mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) - mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) + mask_label = np.array(mask_label).astype("int64").reshape([-1]) + mask_pos = np.array(mask_pos).astype("int64").reshape([-1]) return batch_tokens, mask_label, mask_pos @@ -96,7 +96,7 @@ def prepare_batch_data(insts, # or unique id for i in range(3, len(insts[0]), 1): labels = [inst[i] for inst in insts] - labels = np.array(labels).astype("int64").reshape([-1, 1]) + labels = np.array(labels).astype("int64").reshape([-1]) labels_list.append(labels) # First step: do mask without padding if mask_id >= 0: @@ -154,14 +154,14 @@ def pad_batch_data(insts, inst_data = np.array([ list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts ]) - return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_data.astype("int64").reshape([-1, max_len])] # position data if return_pos: inst_pos = np.array([ list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst)) for inst in insts ]) - return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_pos.astype("int64").reshape([-1, max_len])] if return_input_mask: # This is used to avoid attention on paddings. input_mask_data = np.array([[1] * len(inst) + [0] * diff --git a/paddlepalm/reader/utils/batching4ernie.py b/paddlepalm/reader/utils/batching4ernie.py index d3d13573c38af3d7d6e7027cbff06969b449b722..7a7f86890c5d01b0c36ec0f3aeefec1b3135128a 100644 --- a/paddlepalm/reader/utils/batching4ernie.py +++ b/paddlepalm/reader/utils/batching4ernie.py @@ -113,8 +113,8 @@ def mask(batch_tokens, pre_sent_len = len(sent) - mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) - mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) + mask_label = np.array(mask_label).astype("int64").reshape([-1]) + mask_pos = np.array(mask_pos).astype("int64").reshape([-1]) return batch_tokens, mask_label, mask_pos @@ -136,7 +136,7 @@ def pad_batch_data(insts, inst_data = np.array( [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts]) - return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_data.astype("int64").reshape([-1, max_len])] # position data if return_pos: @@ -145,7 +145,7 @@ def pad_batch_data(insts, for inst in insts ]) - return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_pos.astype("int64").reshape([-1, max_len])] if return_input_mask: # This is used to avoid attention on paddings. @@ -165,7 +165,7 @@ def pad_batch_data(insts, if return_seq_lens: seq_lens = np.array([len(inst) for inst in insts]) - return_list += [seq_lens.astype("int64").reshape([-1, 1])] + return_list += [seq_lens.astype("int64").reshape([-1])] return return_list if len(return_list) > 1 else return_list[0] diff --git a/paddlepalm/reader/utils/mlm_batching.py b/paddlepalm/reader/utils/mlm_batching.py index b726ea95b8f228a4494d1841e335996af9e718f7..5895862b87edfa68877cd54869aac0ea911a2a9b 100644 --- a/paddlepalm/reader/utils/mlm_batching.py +++ b/paddlepalm/reader/utils/mlm_batching.py @@ -168,14 +168,14 @@ def pad_batch_data(insts, inst_data = np.array([ list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts ]) - return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_data.astype("int64").reshape([-1, max_len])] # position data if return_pos: inst_pos = np.array([ list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst)) for inst in insts ]) - return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] + return_list += [inst_pos.astype("int64").reshape([-1, max_len])] if return_input_mask: # This is used to avoid attention on paddings. input_mask_data = np.array([[1] * len(inst) + [0] * diff --git a/paddlepalm/reader/utils/reader4ernie.py b/paddlepalm/reader/utils/reader4ernie.py index 82f99ffd6392785a0738c09bf65243f331105eb7..46c9d51f7a5e6ed136dbf3575eb5153897d31be4 100644 --- a/paddlepalm/reader/utils/reader4ernie.py +++ b/paddlepalm/reader/utils/reader4ernie.py @@ -480,17 +480,17 @@ class ClassifyReader(BaseReader): batch_labels = [record.label_id for record in batch_records] if self.is_classify: batch_labels = np.array(batch_labels).astype("int64").reshape( - [-1, 1]) + [-1]) elif self.is_regression: batch_labels = np.array(batch_labels).astype("float32").reshape( - [-1, 1]) + [-1]) if batch_records[0].qid: batch_qids = [record.qid for record in batch_records] batch_qids = np.array(batch_qids).astype("int64").reshape( - [-1, 1]) + [-1]) else: - batch_qids = np.array([]).astype("int64").reshape([-1, 1]) + batch_qids = np.array([]).astype("int64").reshape([-1]) # padding padded_token_ids, input_mask = pad_batch_data( @@ -918,19 +918,19 @@ class MRCReader(BaseReader): record.end_position for record in batch_records ] batch_start_position = np.array(batch_start_position).astype( - "int64").reshape([-1, 1]) + "int64").reshape([-1]) batch_end_position = np.array(batch_end_position).astype( - "int64").reshape([-1, 1]) + "int64").reshape([-1]) else: batch_size = len(batch_token_ids) batch_start_position = np.zeros( - shape=[batch_size, 1], dtype="int64") - batch_end_position = np.zeros(shape=[batch_size, 1], dtype="int64") + shape=[batch_size], dtype="int64") + batch_end_position = np.zeros(shape=[batch_size], dtype="int64") batch_unique_ids = [record.unique_id for record in batch_records] batch_unique_ids = np.array(batch_unique_ids).astype("int64").reshape( - [-1, 1]) + [-1]) # padding padded_token_ids, input_mask = pad_batch_data( diff --git a/paddlepalm/task_paradigm/cls.py b/paddlepalm/task_paradigm/cls.py index 6cbacf79dd12622c4d952c29040c0c42768e2d11..2893dc33ce833f597d1f04311f8728d15112e606 100644 --- a/paddlepalm/task_paradigm/cls.py +++ b/paddlepalm/task_paradigm/cls.py @@ -43,7 +43,7 @@ class TaskParadigm(task_paradigm): @property def inputs_attrs(self): if self._is_training: - reader = {"label_ids": [[-1, 1], 'int64']} + reader = {"label_ids": [[-1], 'int64']} else: reader = {} bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']} @@ -75,8 +75,9 @@ class TaskParadigm(task_paradigm): name=scope_name+"cls_out_b", initializer=fluid.initializer.Constant(0.))) if self._is_training: - loss = fluid.layers.softmax_with_cross_entropy( - logits=logits, label=label_ids) + inputs = fluid.layers.softmax(logits) + loss = fluid.layers.cross_entropy( + input=inputs, label=label_ids) loss = layers.mean(loss) return {"loss": loss} else: diff --git a/paddlepalm/task_paradigm/match.py b/paddlepalm/task_paradigm/match.py index fbf63e9e21299e7561adb584eaccd417861de624..68404fb97799fa39690d9d7cd5622c05789fed6f 100644 --- a/paddlepalm/task_paradigm/match.py +++ b/paddlepalm/task_paradigm/match.py @@ -44,7 +44,7 @@ class TaskParadigm(task_paradigm): @property def inputs_attrs(self): if self._is_training: - reader = {"label_ids": [[-1, 1], 'int64']} + reader = {"label_ids": [[-1], 'int64']} else: reader = {} bb = {"sentence_pair_embedding": [[-1, self._hidden_size], 'float32']} @@ -84,8 +84,9 @@ class TaskParadigm(task_paradigm): initializer=fluid.initializer.Constant(0.))) if self._is_training: - ce_loss, probs = fluid.layers.softmax_with_cross_entropy( - logits=logits, label=labels, return_softmax=True) + inputs = fluid.layers.softmax(logits) + ce_loss = fluid.layers.cross_entropy( + input=inputs, label=labels) loss = fluid.layers.mean(x=ce_loss) return {'loss': loss} else: diff --git a/paddlepalm/task_paradigm/mlm.py b/paddlepalm/task_paradigm/mlm.py index ec86dd151e8b0f86c345120f4a5907f0afb91d5c..5b99ac7dbd3a5591ce871533f970cc888abb754c 100644 --- a/paddlepalm/task_paradigm/mlm.py +++ b/paddlepalm/task_paradigm/mlm.py @@ -33,8 +33,8 @@ class TaskParadigm(task_paradigm): @property def inputs_attrs(self): reader = { - "mask_label": [[-1, 1], 'int64'], - "mask_pos": [[-1, 1], 'int64']} + "mask_label": [[-1], 'int64'], + "mask_pos": [[-1], 'int64']} if not self._is_training: del reader['mask_label'] del reader['batchsize_x_seqlen'] @@ -100,8 +100,9 @@ class TaskParadigm(task_paradigm): is_bias=True) if self._is_training: - mask_lm_loss = fluid.layers.softmax_with_cross_entropy( - logits=fc_out, label=mask_label) + inputs = fluid.layers.softmax(fc_out) + mask_lm_loss = fluid.layers.cross_entropy( + input=inputs, label=mask_label) loss = fluid.layers.mean(mask_lm_loss) return {'loss': loss} else: diff --git a/paddlepalm/task_paradigm/mrc.py b/paddlepalm/task_paradigm/mrc.py index c035acf733c535def965ea2f989354ff8db98f82..1f051350d74974bcbd3ed20f54b394503e20f6eb 100644 --- a/paddlepalm/task_paradigm/mrc.py +++ b/paddlepalm/task_paradigm/mrc.py @@ -49,11 +49,11 @@ class TaskParadigm(task_paradigm): @property def inputs_attrs(self): if self._is_training: - reader = {"start_positions": [[-1, 1], 'int64'], - "end_positions": [[-1, 1], 'int64'], + reader = {"start_positions": [[-1], 'int64'], + "end_positions": [[-1], 'int64'], } else: - reader = {'unique_ids': [[-1, 1], 'int64']} + reader = {'unique_ids': [[-1], 'int64']} bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']} return {'reader': reader, 'backbone': bb} @@ -70,7 +70,7 @@ class TaskParadigm(task_paradigm): else: return {'start_logits': [[-1, -1, 1], 'float32'], 'end_logits': [[-1, -1, 1], 'float32'], - 'unique_ids': [[-1, 1], 'int64']} + 'unique_ids': [[-1], 'int64']} def build(self, inputs, scope_name=""): @@ -102,9 +102,11 @@ class TaskParadigm(task_paradigm): start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0) def _compute_single_loss(logits, positions): - """Compute start/end loss for mrc model""" - loss = fluid.layers.softmax_with_cross_entropy( - logits=logits, label=positions) + """Compute start/en + d loss for mrc model""" + inputs = fluid.layers.softmax(logits) + loss = fluid.layers.cross_entropy( + input=inputs, label=positions) loss = fluid.layers.mean(x=loss) return loss @@ -122,7 +124,7 @@ class TaskParadigm(task_paradigm): def postprocess(self, rt_outputs): """this func will be called after each step(batch) of training/evaluating/predicting process.""" if not self._is_training: - unique_ids = np.squeeze(rt_outputs['unique_ids'], -1) + unique_ids = rt_outputs['unique_ids'] start_logits = rt_outputs['start_logits'] end_logits = rt_outputs['end_logits'] for idx in range(len(unique_ids)): diff --git a/paddlepalm/utils/reader_helper.py b/paddlepalm/utils/reader_helper.py index 92c7c8fdd91311d8515e51668ce7197c516876ba..c8a42ec071e13a38b297ad6fcd0c5299201a03cb 100644 --- a/paddlepalm/utils/reader_helper.py +++ b/paddlepalm/utils/reader_helper.py @@ -19,7 +19,6 @@ import random import numpy as np import paddle from paddle import fluid -from paddle.fluid import layers def _check_and_adapt_shape_dtype(rt_val, attr, message=""): @@ -65,7 +64,7 @@ def create_net_inputs(input_attrs, async=False, iterator_fn=None, dev_count=1, n inputs = [] ret = {} for name, shape, dtype in input_attrs: - p = layers.data(name, shape=shape, dtype=dtype) + p = fluid.data(name, shape=shape, dtype=dtype) ret[name] = p inputs.append(p) @@ -227,7 +226,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc names = [] start = 0 if insert_taskid: - ret.append(([1,1], 'int64')) + ret.append(([1, 1], 'int64')) names.append('__task_id') start += 1 diff --git a/script/convert_params.sh b/script/convert_params.sh new file mode 100755 index 0000000000000000000000000000000000000000..e645d9ab9be0815ceb5423bc9c629184cca323f7 --- /dev/null +++ b/script/convert_params.sh @@ -0,0 +1,37 @@ + +#!/bin/sh +if [[ $# != 1 ]]; then + echo "usage: bash convert_params.sh " + exit 1 +fi + +if [[ -f $1/__palminfo__ ]]; then + echo "already converted." + exit 0 +fi + +echo "converting..." +if [[ -d $1/params ]]; then + cd $1/params +else + cd $1 +fi + +mkdir .palm.backup + +for file in $(ls *) + do cp $file .palm.backup; mv $file "__paddlepalm_"$file +done +tar -cf __rawmodel__ .palm.backup/* +rm .palm.backup/* +mv __rawmodel__ .palm.backup +# find . ! -name '__rawmodel__' -exec rm {} + +tar -cf __palmmodel__ __paddlepalm_* +touch __palminfo__ +ls __paddlepalm_* > __palminfo__ +rm __paddlepalm_* + +cd - >/dev/null + +echo "done!" + diff --git a/script/download_pretrain_backbone.sh b/script/download_pretrain_backbone.sh new file mode 100755 index 0000000000000000000000000000000000000000..bc64a428801cf08e3f184ad50955dd706187341e --- /dev/null +++ b/script/download_pretrain_backbone.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -e + +if [[ $# != 1 ]]; then + echo "Usage: bash download_pretrain.sh " + exit 1 +fi + +if [[ $1 == 'bert' ]]; then + name="bert" + link="https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz" + packname="uncased_L-24_H-1024_A-16.tar.gz" + dirname="uncased_L-24_H-1024_A-16" +elif [[ $1 == 'ernie' ]]; then + name="ernie" + link="https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz" + packname="ERNIE_Large_en_stable-2.0.0.tar.gz" +else + echo "$1 is currently not supported." + exit 1 +fi + +if [[ ! -d pretrain_model ]]; then + mkdir pretrain_model +fi + +cd pretrain_model +mkdir $name +cd $name +echo "downloading ${name}..." +wget --no-check-certificate $link +echo "decompressing..." +tar -zxf $packname +rm -rf $packname +if [[ $dirname != "" ]]; then + mv $dirname/* . + rm -rf $dirname +fi + +cd ../.. + + diff --git a/script/recover_params.sh b/script/recover_params.sh new file mode 100755 index 0000000000000000000000000000000000000000..a99ceb500f5e9f70dfa8660d8c308ec1f0841c5b --- /dev/null +++ b/script/recover_params.sh @@ -0,0 +1,33 @@ + +#!/bin/sh +if [[ $# != 1 ]]; then + echo "usage: bash recover_params.sh " + exit 1 +fi + +if [[ ! -d $1 ]]; then + echo "$1 not found." + exit 1 +fi + +if [[ ! -f $1/__palmmodel__ ]]; then + echo "paddlepalm model not found." + exit 1 +fi + +echo "recovering..." +if [[ -d $1/params ]]; then + cd $1/params +else + cd $1 +fi +rm __palm* +mv .palm.backup/__rawmodel__ . +rm -rf .palm.backup +tar -xf __rawmodel__ +mv .palm.backup/* . +rm __rawmodel__ + +rm -rf .palm.backup +cd - >/dev/null + diff --git a/setup.py b/setup.py index bfeb6be5427166b2f2468af4f76457e9a11f7497..6c81d9e193a37e2cc2a480d841a463f3e5c294ef 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ """ Setup script. Authors: zhouxiangyang(zhouxiangyang@baidu.com) -Date: 2019/09/29 21:00:01 +Date: 2019/12/05 13:24:01 """ import setuptools from io import open @@ -27,10 +27,10 @@ with open("README.md", "r", encoding='utf-8') as fh: setuptools.setup( name="paddlepalm", - version="0.2.1", + version="0.2.2", author="PaddlePaddle", author_email="zhangyiming04@baidu.com", - description="A Multi-task Learning Lib for PaddlePaddle Users.", + description="A Lib for PaddlePaddle Users.", # long_description=long_description, # long_description_content_type="text/markdown", url="https://github.com/PaddlePaddle/PALM",