diff --git a/PaddleNLP/Research/ACL2018-DAM/config.py b/PaddleNLP/Research/ACL2018-DAM/config.py index 6b9c664942ff2ea23e0f284662dbeae172caca3c..4f185ef7b7e3b4bdc608434abd286a2d77431b98 100644 --- a/PaddleNLP/Research/ACL2018-DAM/config.py +++ b/PaddleNLP/Research/ACL2018-DAM/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Attention Matching Network """ @@ -5,6 +18,7 @@ Deep Attention Matching Network import argparse import six + def parse_args(): """ Deep Attention Matching Network Config @@ -12,14 +26,14 @@ def parse_args(): parser = argparse.ArgumentParser("DAM Config") parser.add_argument( - '--do_train', - type=bool, - default=False, + '--do_train', + type=bool, + default=False, help='Whether to perform training.') parser.add_argument( - '--do_test', - type=bool, - default=False, + '--do_test', + type=bool, + default=False, help='Whether to perform training.') parser.add_argument( diff --git a/PaddleNLP/Research/ACL2018-DAM/evaluation.py b/PaddleNLP/Research/ACL2018-DAM/evaluation.py index ad0bbc0f3fa7ebfa31ecc0378b969b7ca10fa5d2..997bab94c6911acc2a568fb543a3fa8fa520b442 100755 --- a/PaddleNLP/Research/ACL2018-DAM/evaluation.py +++ b/PaddleNLP/Research/ACL2018-DAM/evaluation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Evaluation """ diff --git a/PaddleNLP/Research/ACL2018-DAM/layers.py b/PaddleNLP/Research/ACL2018-DAM/layers.py index 3d4ab5dcd90ab8075ee4bc8c820875fdbb5d7f5c..66a7734b4e20b5c3015fe7017fda718608fc3ef2 100755 --- a/PaddleNLP/Research/ACL2018-DAM/layers.py +++ b/PaddleNLP/Research/ACL2018-DAM/layers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Layers """ @@ -77,7 +90,7 @@ def dot_product_attention(query, """ logits = fluid.layers.matmul( - x=query, y=key, transpose_y=True, alpha=d_key ** (-0.5)) + x=query, y=key, transpose_y=True, alpha=d_key**(-0.5)) if (q_mask is not None) and (k_mask is not None): if mask_cache is not None and q_mask.name in mask_cache and k_mask.name in mask_cache[ @@ -87,7 +100,7 @@ def dot_product_attention(query, mask = fluid.layers.matmul(x=q_mask, y=k_mask, transpose_y=True) another_mask = fluid.layers.scale( mask, - scale=float(2 ** 32 - 1), + scale=float(2**32 - 1), bias=float(-1), bias_after_scale=False) if mask_cache is not None: diff --git a/PaddleNLP/Research/ACL2018-DAM/main.py b/PaddleNLP/Research/ACL2018-DAM/main.py index 8d01d8cff1fa3dd314780b8370cdfd551f0b9da2..950ea49a015149808a622b1d0d84d535c48d3192 100755 --- a/PaddleNLP/Research/ACL2018-DAM/main.py +++ b/PaddleNLP/Research/ACL2018-DAM/main.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Attention Matching Network """ @@ -174,9 +187,8 @@ def train(args): print("device count %d" % dev_count) print("theoretical memory usage: ") - print( - fluid.contrib.memory_usage( - program=train_program, batch_size=args.batch_size)) + print(fluid.contrib.memory_usage( + program=train_program, batch_size=args.batch_size)) exe = fluid.Executor(place) exe.run(train_startup) @@ -247,9 +259,8 @@ def train(args): if (args.save_path is not None) and (step % save_step == 0): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) - print( - time.strftime('%Y-%m-%d %H:%M:%S', - time.localtime(time.time()))) + print(time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, 'score.' + str(step)) @@ -294,9 +305,8 @@ def train(args): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) - print( - time.strftime('%Y-%m-%d %H:%M:%S', - time.localtime(time.time()))) + print(time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, diff --git a/PaddleNLP/Research/ACL2018-DAM/net.py b/PaddleNLP/Research/ACL2018-DAM/net.py index 240b1493251aec431f0a76c73f3af9586d9e55c1..9db151e01a20cd85230d52e77fc56fb28b184a9d 100755 --- a/PaddleNLP/Research/ACL2018-DAM/net.py +++ b/PaddleNLP/Research/ACL2018-DAM/net.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Attention Matching Network """ @@ -12,6 +25,7 @@ class Net(object): """ Deep attention matching network """ + def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size, stack_num, channel1_num, channel2_num): """ diff --git a/PaddleNLP/Research/ACL2018-DAM/reader.py b/PaddleNLP/Research/ACL2018-DAM/reader.py index 6a2653c21e531b949bafddbfde54f058c442eb35..d446f6ac47f0c99e240c5ddc648f6da06d854979 100755 --- a/PaddleNLP/Research/ACL2018-DAM/reader.py +++ b/PaddleNLP/Research/ACL2018-DAM/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Reader for deep attention matching network """ diff --git a/PaddleNLP/Research/ACL2018-DAM/util.py b/PaddleNLP/Research/ACL2018-DAM/util.py index 88910c9a5b6dcb5a462ba5ca962ad72c782737af..a604fffef0cc969edc929688b138bd020e37c983 100755 --- a/PaddleNLP/Research/ACL2018-DAM/util.py +++ b/PaddleNLP/Research/ACL2018-DAM/util.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Utils """ @@ -20,7 +33,7 @@ def mkdir(path): """ Mkdir """ - if not os.path.isdir(path): + if not os.path.isdir(path): if os.path.split(path)[0]: mkdir(os.path.split(path)[0]) else: diff --git a/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py b/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py index 0267eb1f8367b850b2c4436026ca0d1ebdab615c..d813ffceff61c40cadf229d33924263098fe9681 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py +++ b/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #!/usr/bin/python #-*- coding:utf-8 -*- @@ -25,9 +38,8 @@ def compute_paragraph_score(sample): doc['segmented_paragraphs_scores'] = [] for p_idx, para_tokens in enumerate(doc['segmented_paragraphs']): if len(question) > 0: - related_score = metric_max_over_ground_truths(f1_score, - para_tokens, - [question]) + related_score = metric_max_over_ground_truths( + f1_score, para_tokens, [question]) else: related_score = 0.0 doc['segmented_paragraphs_scores'].append(related_score) @@ -63,7 +75,7 @@ def dup_remove(doc): prev_del_num = 0 del_num = 0 for p_idx in del_ids: - if p_idx < para_id: + if p_idx < para_id: prev_del_num += 1 del doc["segmented_paragraphs"][p_idx - del_num] del doc["segmented_paragraphs_scores"][p_idx - del_num] @@ -142,7 +154,8 @@ def paragraph_selection(sample, mode): para_infos = [] for p_idx, (para_tokens, para_scores) in \ enumerate(zip(doc['segmented_paragraphs'], doc['segmented_paragraphs_scores'])): - para_infos.append((para_tokens, para_scores, len(para_tokens), p_idx)) + para_infos.append( + (para_tokens, para_scores, len(para_tokens), p_idx)) para_infos.sort(key=lambda x: (-x[1], x[2])) topN_idx = [] for para_info in para_infos[:topN]: @@ -158,7 +171,7 @@ def paragraph_selection(sample, mode): break if doc_id == d_idx and id == para_id and mode == "train": continue - total_len += 1 + doc['paragraphs_length'][id] + total_len += 1 + doc['paragraphs_length'][id] final_idx.append(id) total_segmented_content = copy.deepcopy(segmented_title) final_idx.sort() @@ -168,7 +181,8 @@ def paragraph_selection(sample, mode): incre_len += 1 + doc['paragraphs_length'][id] if doc_id == d_idx and id == para_id: incre_len += 1 - total_segmented_content += [splitter] + doc['segmented_paragraphs'][id] + total_segmented_content += [splitter] + doc['segmented_paragraphs'][ + id] if doc_id == d_idx: answer_start = incre_len + sample['answer_spans'][0][0] answer_end = incre_len + sample['answer_spans'][0][1] @@ -191,9 +205,9 @@ if __name__ == "__main__": try: sample = json.loads(line, encoding='utf8') except: - print >>sys.stderr, "Invalid input json format - '{}' will be ignored".format(line) + print >> sys.stderr, "Invalid input json format - '{}' will be ignored".format( + line) continue compute_paragraph_score(sample) paragraph_selection(sample, mode) print(json.dumps(sample, encoding='utf8', ensure_ascii=False)) - diff --git a/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py b/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py index 7273e7046e82cd816298af81c175ab10136b16d0..38e56b5e67c54f9867856f7e477697fdbe5d0bd3 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py +++ b/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py @@ -1,8 +1,22 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import os, sys, json import nltk + def _nltk_tokenize(sequence): tokens = nltk.word_tokenize(sequence) @@ -11,10 +25,12 @@ def _nltk_tokenize(sequence): token_words = [] for token in tokens: cur_char_offset = sequence.find(token, cur_char_offset) - token_offsets.append([cur_char_offset, cur_char_offset + len(token) - 1]) + token_offsets.append( + [cur_char_offset, cur_char_offset + len(token) - 1]) token_words.append(token) return token_offsets, token_words + def segment(input_js): _, input_js['segmented_question'] = _nltk_tokenize(input_js['question']) for doc_id, doc in enumerate(input_js['documents']): @@ -36,7 +52,7 @@ if __name__ == '__main__': exit() nltk.download('punkt') - + for line in open(sys.argv[1]): dureader_js = json.loads(line.strip()) segment(dureader_js) diff --git a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py index 022db4dd1bdf98d2a7e0ead659e988ff109b59e9..833844824eb30a00ee1f6364afe52e66df412bef 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py +++ b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import sys diff --git a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py index c301e12f6edd566af2ede63c3c8780daedd75468..5b102200bc9a03670fb2df19a8023d47f0d3d33d 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py +++ b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py @@ -1,6 +1,19 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import json -import pandas as pd +import pandas as pd if __name__ == '__main__': if len(sys.argv) != 3: @@ -11,4 +24,4 @@ if __name__ == '__main__': df = pd.read_json(infile) with open(outfile, 'w') as f: for row in df.iterrows(): - f.write(row[1].to_json() + '\n') \ No newline at end of file + f.write(row[1].to_json() + '\n') diff --git a/PaddleNLP/Research/ACL2019-JEMT/config.py b/PaddleNLP/Research/ACL2019-JEMT/config.py index d56fe2f82624672aaf40bae2662ad25394f30df2..920fdc34d1b6d970da6d8fed47e26b8ef73bd48c 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/config.py +++ b/PaddleNLP/Research/ACL2019-JEMT/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class TrainTaskConfig(object): # support both CPU and GPU now. use_gpu = True diff --git a/PaddleNLP/Research/ACL2019-JEMT/desc.py b/PaddleNLP/Research/ACL2019-JEMT/desc.py index 857ef02a614f002e2774cc15de5d07e2e2109f8e..07326bf4b7f06954d7accb620a0ffdec2383df51 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/desc.py +++ b/PaddleNLP/Research/ACL2019-JEMT/desc.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # The placeholder for batch_size in compile time. Must be -1 currently to be # consistent with some ops' infer-shape output in compile time, such as the # sequence_expand op used in beamsearch decoder. @@ -65,43 +78,37 @@ input_descs = { # Names of word embedding table which might be reused for weight sharing. word_emb_param_names = ( "src_word_emb_table", - "trg_word_emb_table", -) + "trg_word_emb_table", ) phone_emb_param_name = "phone_emb_table" # Names of position encoding table which will be initialized externally. pos_enc_param_names = ( "src_pos_enc_table", - "trg_pos_enc_table", -) + "trg_pos_enc_table", ) # separated inputs for different usages. encoder_data_input_fields = ( "src_word", "src_pos", "src_slf_attn_bias", "src_phone", - "src_phone_mask", -) + "src_phone_mask", ) decoder_data_input_fields = ( "trg_word", "trg_pos", "trg_slf_attn_bias", "trg_src_attn_bias", - "enc_output", -) + "enc_output", ) label_data_input_fields = ( "lbl_word", - "lbl_weight", -) + "lbl_weight", ) # In fast decoder, trg_pos (only containing the current time step) is generated # by ops and trg_slf_attn_bias is not needed. fast_decoder_data_input_fields = ( "trg_word", "init_score", "init_idx", - "trg_src_attn_bias", -) + "trg_src_attn_bias", ) # Set seed for CE dropout_seed = None diff --git a/PaddleNLP/Research/ACL2019-JEMT/infer.py b/PaddleNLP/Research/ACL2019-JEMT/infer.py index 15c6dc18c954e2dd09b006e49a260c9a2aa8f303..08d1c7d80ef5976236c450a40a4d9cb886d7b675 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/infer.py +++ b/PaddleNLP/Research/ACL2019-JEMT/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import multiprocessing @@ -86,10 +99,8 @@ def parse_args(): trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath) phone_dict = reader.DataReader.load_dict(args.phoneme_vocab_fpath) dict_args = [ - "src_vocab_size", - str(len(src_dict)), "trg_vocab_size", - str(len(trg_dict)), "phone_vocab_size", - str(len(phone_dict)), "bos_idx", + "src_vocab_size", str(len(src_dict)), "trg_vocab_size", + str(len(trg_dict)), "phone_vocab_size", str(len(phone_dict)), "bos_idx", str(src_dict[args.special_token[0]]), "eos_idx", str(src_dict[args.special_token[1]]), "unk_idx", str(src_dict[args.special_token[2]]) @@ -147,10 +158,10 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, phone_pad_idx, # beamsearch_op must use tensors with lod init_score = to_lodtensor( - np.zeros_like(trg_word, dtype="float32").reshape(-1, 1), place, - [range(trg_word.shape[0] + 1)] * 2) - trg_word = to_lodtensor(trg_word, place, - [range(trg_word.shape[0] + 1)] * 2) + np.zeros_like( + trg_word, dtype="float32").reshape(-1, 1), + place, [range(trg_word.shape[0] + 1)] * 2) + trg_word = to_lodtensor(trg_word, place, [range(trg_word.shape[0] + 1)] * 2) init_idx = np.asarray(range(len(insts)), dtype="int32") data_input_dict = dict( @@ -315,7 +326,8 @@ def fast_infer(args): sub_start = seq_ids.lod()[1][start + j] sub_end = seq_ids.lod()[1][start + j + 1] hyps[i].append(" ".join([ - trg_idx2word[idx] for idx in post_process_seq( + trg_idx2word[idx] + for idx in post_process_seq( np.array(seq_ids)[sub_start:sub_end]) ])) scores[i].append(np.array(seq_scores)[sub_end - 1]) diff --git a/PaddleNLP/Research/ACL2019-JEMT/model.py b/PaddleNLP/Research/ACL2019-JEMT/model.py index c0a9c375372b14d35d64e6368bf39f1c623d4ea2..83e8760a4b4a02e0effc1328b53f89f295b7769d 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/model.py +++ b/PaddleNLP/Research/ACL2019-JEMT/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from functools import partial import numpy as np @@ -51,12 +64,12 @@ def position_encoding_init(n_position, d_pos_vec): channels = d_pos_vec position = np.arange(n_position) num_timescales = channels // 2 - log_timescale_increment = ( - np.log(float(1e4) / float(1)) / (num_timescales - 1)) - inv_timescales = np.exp( - np.arange(num_timescales)) * -log_timescale_increment - scaled_time = np.expand_dims(position, 1) * np.expand_dims( - inv_timescales, 0) + log_timescale_increment = (np.log(float(1e4) / float(1)) / + (num_timescales - 1)) + inv_timescales = np.exp(np.arange( + num_timescales)) * -log_timescale_increment + scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, + 0) signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) signal = np.pad(signal, [[0, 0], [0, np.mod(channels, 2)]], 'constant') position_enc = signal @@ -91,17 +104,15 @@ def multi_head_attention(queries, """ Add linear projection to queries, keys, and values. """ - q = layers.fc( - input=queries, - size=d_key * n_head, - bias_attr=False, - num_flatten_dims=2) + q = layers.fc(input=queries, + size=d_key * n_head, + bias_attr=False, + num_flatten_dims=2) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. fc_layer = wrap_layer_with_block( - layers.fc, - fluid.default_main_program().current_block(). - parent_idx) if cache is not None and static_kv else layers.fc + layers.fc, fluid.default_main_program().current_block( + ).parent_idx) if cache is not None and static_kv else layers.fc k = fc_layer( input=keys, size=d_key * n_head, @@ -132,12 +143,12 @@ def multi_head_attention(queries, # into global block to use as cache among beam search. reshape_layer = wrap_layer_with_block( layers.reshape, - fluid.default_main_program().current_block(). - parent_idx) if cache is not None and static_kv else layers.reshape + fluid.default_main_program().current_block( + ).parent_idx) if cache is not None and static_kv else layers.reshape transpose_layer = wrap_layer_with_block( layers.transpose, - fluid.default_main_program().current_block().parent_idx - ) if cache is not None and static_kv else layers.transpose + fluid.default_main_program().current_block(). + parent_idx) if cache is not None and static_kv else layers.transpose reshaped_k = reshape_layer( x=keys, shape=[0, 0, n_head, d_key], inplace=True) k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3]) @@ -214,8 +225,10 @@ def multi_head_attention(queries, out = __combine_heads(ctx_multiheads) # Project back to the model size. - proj_out = layers.fc( - input=out, size=d_model, bias_attr=False, num_flatten_dims=2) + proj_out = layers.fc(input=out, + size=d_model, + bias_attr=False, + num_flatten_dims=2) return proj_out @@ -225,14 +238,13 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate): This module consists of two linear transformations with a ReLU activation in between, which is applied to each position separately and identically. """ - hidden = layers.fc( - input=x, size=d_inner_hid, num_flatten_dims=2, act="relu") + hidden = layers.fc(input=x, + size=d_inner_hid, + num_flatten_dims=2, + act="relu") if dropout_rate: hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - seed=dropout_seed, - is_test=False) + hidden, dropout_prob=dropout_rate, seed=dropout_seed, is_test=False) out = layers.fc(input=hidden, size=d_hid, num_flatten_dims=2) return out @@ -313,8 +325,7 @@ def prepare_encoder(src_word, param_attr=fluid.ParamAttr( name=pos_enc_param_names[0], trainable=False)) src_pos_enc.stop_gradient = True - enc_input = ( - 1 - beta) * src_word_emb + beta * mean_phone_emb + src_pos_enc + enc_input = (1 - beta) * src_word_emb + beta * mean_phone_emb + src_pos_enc return layers.dropout( enc_input, dropout_prob=dropout_rate, seed=dropout_seed, is_test=False) if dropout_rate else enc_input @@ -374,8 +385,8 @@ def encoder_layer(enc_input, """ attn_output = multi_head_attention( pre_process_layer(enc_input, preprocess_cmd, - prepostprocess_dropout), None, None, attn_bias, - d_key, d_value, d_model, n_head, attention_dropout) + prepostprocess_dropout), None, None, attn_bias, d_key, + d_value, d_model, n_head, attention_dropout) attn_output = post_process_layer(enc_input, attn_output, postprocess_cmd, prepostprocess_dropout) ffd_output = positionwise_feed_forward( @@ -415,8 +426,7 @@ def encoder(enc_input, attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd, - ) + postprocess_cmd, ) enc_input = enc_output enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout) @@ -459,8 +469,7 @@ def decoder_layer(dec_input, dec_input, slf_attn_output, postprocess_cmd, - prepostprocess_dropout, - ) + prepostprocess_dropout, ) enc_attn_output = multi_head_attention( pre_process_layer(slf_attn_output, preprocess_cmd, prepostprocess_dropout), @@ -479,21 +488,18 @@ def decoder_layer(dec_input, slf_attn_output, enc_attn_output, postprocess_cmd, - prepostprocess_dropout, - ) + prepostprocess_dropout, ) ffd_output = positionwise_feed_forward( pre_process_layer(enc_attn_output, preprocess_cmd, prepostprocess_dropout), d_inner_hid, d_model, - relu_dropout, - ) + relu_dropout, ) dec_output = post_process_layer( enc_attn_output, ffd_output, postprocess_cmd, - prepostprocess_dropout, - ) + prepostprocess_dropout, ) return dec_output @@ -632,8 +638,7 @@ def transformer(src_vocab_size, postprocess_cmd, weight_sharing, beta, - enc_inputs, - ) + enc_inputs, ) predict = wrap_decoder( trg_vocab_size, @@ -651,14 +656,14 @@ def transformer(src_vocab_size, postprocess_cmd, weight_sharing, dec_inputs, - enc_output, - ) + enc_output, ) # Padding index do not contribute to the total loss. The weights is used to # cancel padding index in calculating the loss. if label_smooth_eps: label = layers.label_smooth( - label=layers.one_hot(input=label, depth=trg_vocab_size), + label=layers.one_hot( + input=label, depth=trg_vocab_size), epsilon=label_smooth_eps) cost = layers.softmax_with_cross_entropy( @@ -730,8 +735,7 @@ def wrap_encoder(src_vocab_size, attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd, - ) + postprocess_cmd, ) return enc_output @@ -803,8 +807,9 @@ def wrap_decoder(trg_vocab_size, word_emb_param_names[0]), transpose_y=True) else: - predict = layers.fc( - input=dec_output, size=trg_vocab_size, bias_attr=False) + predict = layers.fc(input=dec_output, + size=trg_vocab_size, + bias_attr=False) if dec_inputs is None: # Return probs for independent decoder program. predict = layers.softmax(predict) @@ -879,8 +884,7 @@ def fast_decode(src_vocab_size, force_cpu=True) step_idx = layers.fill_constant( shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) - cond = layers.less_than( - x=step_idx, y=max_len) # default force_cpu=True + cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write( diff --git a/PaddleNLP/Research/ACL2019-JEMT/reader.py b/PaddleNLP/Research/ACL2019-JEMT/reader.py index 26a486c8556400188edf7a31b1137a3a22b51c04..e6cf619cf5de78e2f8761224151fb2087152ee51 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/reader.py +++ b/PaddleNLP/Research/ACL2019-JEMT/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import glob import six import os @@ -302,9 +315,8 @@ class DataReader(object): f = tarfile.open(fpaths[0], "r") for line in f.extractfile(tar_fname): fields = line.strip("\n").split(self._field_delimiter) - if (not self._only_src - and len(fields) == 2) or (self._only_src - and len(fields) == 1): + if (not self._only_src and len(fields) == 2) or ( + self._only_src and len(fields) == 1): yield fields else: for fpath in fpaths: @@ -381,5 +393,5 @@ class DataReader(object): for idx in batch_ids] else: yield [(self._src_seq_ids[idx], self._src_phone_ids[idx], - self._trg_seq_ids[idx][:-1], - self._trg_seq_ids[idx][1:]) for idx in batch_ids] + self._trg_seq_ids[idx][:-1], self._trg_seq_ids[idx][1:]) + for idx in batch_ids] diff --git a/PaddleNLP/Research/ACL2019-JEMT/train.py b/PaddleNLP/Research/ACL2019-JEMT/train.py index d33b0c1d09fff462b2b87d3eb6a6a033fb4a62f6..03afbce2a5be22ff99574fcf7fafcd2984003844 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/train.py +++ b/PaddleNLP/Research/ACL2019-JEMT/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import copy @@ -141,10 +154,8 @@ def parse_args(): trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath) phone_dict = reader.DataReader.load_dict(args.phoneme_vocab_fpath) dict_args = [ - "src_vocab_size", - str(len(src_dict)), "trg_vocab_size", - str(len(trg_dict)), "phone_vocab_size", - str(len(phone_dict)), "bos_idx", + "src_vocab_size", str(len(src_dict)), "trg_vocab_size", + str(len(trg_dict)), "phone_vocab_size", str(len(phone_dict)), "bos_idx", str(src_dict[args.special_token[0]]), "eos_idx", str(src_dict[args.special_token[1]]), "unk_idx", str(src_dict[args.special_token[2]]) @@ -157,8 +168,8 @@ def parse_args(): def append_nccl2_prepare(startup_prog, trainer_id, worker_endpoints, current_endpoint): - assert (trainer_id >= 0 and len(worker_endpoints) > 1 - and current_endpoint in worker_endpoints) + assert (trainer_id >= 0 and len(worker_endpoints) > 1 and + current_endpoint in worker_endpoints) eps = copy.deepcopy(worker_endpoints) eps.remove(current_endpoint) nccl_id_var = startup_prog.global_block().create_var( @@ -189,8 +200,8 @@ def pad_phoneme_data(phoneme_seqs, pad_idx, max_seq_len): batch_size = len(phoneme_seqs) phoneme_data = pad_idx * np.ones( (batch_size, max_seq_len, max_ph_seq_len), dtype=np.int64) - phoneme_mask = np.zeros((batch_size, max_seq_len, max_ph_seq_len), - dtype=np.int64) + phoneme_mask = np.zeros( + (batch_size, max_seq_len, max_ph_seq_len), dtype=np.int64) for i in range(batch_size): cur_ph_seq = phoneme_seqs[i] @@ -237,17 +248,16 @@ def pad_batch_data(insts, if is_target: # This is used to avoid attention on paddings and subsequent # words. - slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, - max_len)) + slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, max_len)) slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape([-1, 1, max_len, max_len]) slf_attn_bias_data = np.tile(slf_attn_bias_data, [1, n_head, 1, 1]) * [-1e9] else: # This is used to avoid attention on paddings. - slf_attn_bias_data = np.array( - [[0] * len(inst) + [-1e9] * (max_len - len(inst)) - for inst in insts]) + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) slf_attn_bias_data = np.tile( slf_attn_bias_data.reshape([-1, 1, 1, max_len]), [1, n_head, max_len, 1]) @@ -359,8 +369,8 @@ def prepare_data_generator(args, for item in data_reader(): inst_num_per_part = len(item) // count for i in range(count): - yield item[inst_num_per_part * i:inst_num_per_part * - (i + 1)] + yield item[inst_num_per_part * i:inst_num_per_part * (i + 1 + )] return __impl__ @@ -401,8 +411,8 @@ def prepare_feed_dict_list(data_generator, init_flag, count): feed_dict_list.append(pos_enc_tables) else: feed_dict_list[idx] = dict( - list(pos_enc_tables.items()) + - list(feed_dict_list[idx].items())) + list(pos_enc_tables.items()) + list(feed_dict_list[idx] + .items())) return feed_dict_list if len(feed_dict_list) == count else None @@ -487,11 +497,10 @@ def test_context(exe, train_exe, dev_count): data_generator = test_data() while True: try: - feed_dict_list = prepare_feed_dict_list( - data_generator, False, dev_count) - outs = test_exe.run( - fetch_list=[sum_cost.name, token_num.name], - feed=feed_dict_list) + feed_dict_list = prepare_feed_dict_list(data_generator, False, + dev_count) + outs = test_exe.run(fetch_list=[sum_cost.name, token_num.name], + feed=feed_dict_list) except (StopIteration, fluid.core.EOFException): # The current pass is over. if args.use_py_reader: @@ -562,10 +571,10 @@ def train_loop(exe, # the best cross-entropy value with label smoothing loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log( - (1. - TrainTaskConfig.label_smooth_eps)) + - TrainTaskConfig.label_smooth_eps * - np.log(TrainTaskConfig.label_smooth_eps / - (ModelHyperParams.trg_vocab_size - 1) + 1e-20)) + (1. - TrainTaskConfig.label_smooth_eps + )) + TrainTaskConfig.label_smooth_eps * + np.log(TrainTaskConfig.label_smooth_eps / ( + ModelHyperParams.trg_vocab_size - 1) + 1e-20)) step_idx = 0 init_flag = True @@ -583,8 +592,8 @@ def train_loop(exe, batch_id = 0 while True: try: - feed_dict_list = prepare_feed_dict_list( - data_generator, init_flag, dev_count) + feed_dict_list = prepare_feed_dict_list(data_generator, + init_flag, dev_count) outs = train_exe.run( fetch_list=[sum_cost.name, token_num.name] if step_idx % args.fetch_steps == 0 else [], @@ -609,12 +618,11 @@ def train_loop(exe, else: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " - "normalized loss: %f, ppl: %f, speed: %.2f step/s" - % (step_idx, pass_id, batch_id, total_avg_cost, - total_avg_cost - loss_normalizer, - np.exp([min(total_avg_cost, 100) - ]), args.fetch_steps / - (time.time() - avg_batch_time))) + "normalized loss: %f, ppl: %f, speed: %.2f step/s" % + (step_idx, pass_id, batch_id, total_avg_cost, + total_avg_cost - loss_normalizer, np.exp( + [min(total_avg_cost, 100)]), + args.fetch_steps / (time.time() - avg_batch_time))) avg_batch_time = time.time() if step_idx % TrainTaskConfig.save_freq == 0 and step_idx > 0: @@ -643,8 +651,9 @@ def train_loop(exe, val_avg_cost, val_ppl = test() logging.info( "epoch: %d, val avg loss: %f, val normalized loss: %f, val ppl: %f," - " consumed %fs" % (pass_id, val_avg_cost, val_avg_cost - - loss_normalizer, val_ppl, time_consumed)) + " consumed %fs" % (pass_id, val_avg_cost, + val_avg_cost - loss_normalizer, val_ppl, + time_consumed)) else: logging.info("epoch: %d, consumed %fs" % (pass_id, time_consumed)) if not args.enable_ce: @@ -734,8 +743,8 @@ def train(args): if args.local: logging.info("local start_up:") - train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, - avg_cost, token_num, predict, pyreader) + train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, avg_cost, + token_num, predict, pyreader) else: if args.update_method == "nccl2": trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) diff --git a/PaddleNLP/add_license.py b/PaddleNLP/add_license.py new file mode 100644 index 0000000000000000000000000000000000000000..436cf0e0558b1710744c46154c7f6ca689025e6e --- /dev/null +++ b/PaddleNLP/add_license.py @@ -0,0 +1,50 @@ +import os + +filePath = os.getcwd() + + +def get_all_files(dir): + fileDirList = [] + + for root, dirs, files in os.walk(dir): + for file in files: + file_path = os.path.join(root, file) + fileDirList.append(file_path) + for dir in dirs: + dir_path = os.path.join(root, dir) + get_all_files(dir_path) + + return fileDirList + + +fileDirList = get_all_files(filePath) +for code in fileDirList: + split = os.path.splitext(code) + if (split[1] == '.py' and not '__init__' in split[0] and + not '_ce' in split[0]): + + with open(code, 'r') as fz: + content = fz.read() + if content.find('Copyright') >= 0: + fz.close() + continue + else: + string = "# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.\n" \ + "#\n" \ + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n" \ + "# you may not use this file except in compliance with the License.\n" \ + "# You may obtain a copy of the License at\n" \ + "#\n" \ + "# http://www.apache.org/licenses/LICENSE-2.0\n" \ + "#\n" \ + "# Unless required by applicable law or agreed to in writing, software\n" \ + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n" \ + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \ + "# See the License for the specific language governing permissions and\n" \ + "# limitations under the License.\n"+content + fz.close() + with open(code, 'w') as f: + f.write(string) + print "file %s write success!" % code + f.close() +print "read and write success!" diff --git a/PaddleNLP/emotion_detection/config.py b/PaddleNLP/emotion_detection/config.py index 1d8f4156e4f118b40072fb12b2efe445f85afb0c..f21d60b9495a23ca74a02ab51a92a149f81dc4ec 100644 --- a/PaddleNLP/emotion_detection/config.py +++ b/PaddleNLP/emotion_detection/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ EmoTect config """ @@ -9,10 +22,12 @@ from __future__ import print_function import six import json + class EmoTectConfig(object): """ EmoTect Config """ + def __init__(self, config_path): self._config_dict = self._parse(config_path) @@ -21,7 +36,8 @@ class EmoTectConfig(object): with open(config_path) as json_file: config_dict = json.load(json_file) except Exception: - raise IOError("Error in parsing emotect model config file '%s'" % config_path) + raise IOError("Error in parsing emotect model config file '%s'" % + config_path) else: return config_dict diff --git a/PaddleNLP/emotion_detection/reader.py b/PaddleNLP/emotion_detection/reader.py index 75a1be574d4508b44dd76e7f25842f4073d72e17..197827d802256e1a7b622480d4e7eea77d34b896 100644 --- a/PaddleNLP/emotion_detection/reader.py +++ b/PaddleNLP/emotion_detection/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ EmoTect Reader, data converters for classification data. """ @@ -10,14 +23,13 @@ import numpy as np from utils import load_vocab from utils import data_reader + class EmoTectProcessor(object): """ Processor class for data convertors for EmoTect. """ - def __init__(self, - data_dir, - vocab_path, - random_seed=None): + + def __init__(self, data_dir, vocab_path, random_seed=None): self.data_dir = data_dir self.vocab = load_vocab(vocab_path) self.num_examples = {"train": -1, "dev": -1, "test": -1, "infer": -1} @@ -27,29 +39,33 @@ class EmoTectProcessor(object): """ Load training examples """ - return data_reader(os.path.join(self.data_dir, "train.tsv"), - self.vocab, self.num_examples, "train", epoch) + return data_reader( + os.path.join(self.data_dir, "train.tsv"), self.vocab, + self.num_examples, "train", epoch) def get_dev_examples(self, data_dir): """ Load dev examples """ - return data_reader(os.path.join(self.data_dir, "dev.tsv"), - self.vocab, self.num_examples, "dev") + return data_reader( + os.path.join(self.data_dir, "dev.tsv"), self.vocab, + self.num_examples, "dev") def get_test_examples(self, data_dir): """ Load test examples """ - return data_reader(os.path.join(self.data_dir, "test.tsv"), - self.vocab, self.num_examples, "test") + return data_reader( + os.path.join(self.data_dir, "test.tsv"), self.vocab, + self.num_examples, "test") def get_infer_examples(self, data_dir): """ Load infer querys """ - return data_reader(os.path.join(self.data_dir, "infer.tsv"), - self.vocab, self.num_examples, "infer") + return data_reader( + os.path.join(self.data_dir, "infer.tsv"), self.vocab, + self.num_examples, "infer") def get_labels(self): """ @@ -63,7 +79,8 @@ class EmoTectProcessor(object): """ if phase not in ['train', 'dev', 'test', 'infer']: raise ValueError( - "Unknown phase, which should be in ['train', 'dev', 'test', 'infer'].") + "Unknown phase, which should be in ['train', 'dev', 'test', 'infer']." + ) return self.num_examples[phase] def get_train_progress(self): @@ -77,14 +94,18 @@ class EmoTectProcessor(object): Generate data for train, dev or test """ if phase == "train": - return paddle.batch(self.get_train_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_train_examples(self.data_dir, epoch), batch_size) elif phase == "dev": - return paddle.batch(self.get_dev_examples(self.data_dir), batch_size) + return paddle.batch( + self.get_dev_examples(self.data_dir), batch_size) elif phase == "test": - return paddle.batch(self.get_test_examples(self.data_dir), batch_size) + return paddle.batch( + self.get_test_examples(self.data_dir), batch_size) elif phase == "infer": - return paddle.batch(self.get_infer_examples(self.data_dir), batch_size) + return paddle.batch( + self.get_infer_examples(self.data_dir), batch_size) else: raise ValueError( - "Unknown phase, which should be in ['train', 'dev', 'test', 'infer'].") - + "Unknown phase, which should be in ['train', 'dev', 'test', 'infer']." + ) diff --git a/PaddleNLP/emotion_detection/run_classifier.py b/PaddleNLP/emotion_detection/run_classifier.py index 6bdf9812f73288e672d4c10fd5f0f6a58a3d3a1b..4dca35a8577cde0d5a6b7cd352d1b65ab0f8091b 100644 --- a/PaddleNLP/emotion_detection/run_classifier.py +++ b/PaddleNLP/emotion_detection/run_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Emotion Detection Task """ @@ -25,37 +38,48 @@ import utils parser = argparse.ArgumentParser(__doc__) model_g = utils.ArgumentGroup(parser, "model", "model configuration and paths.") -model_g.add_arg("config_path", str, None, "Path to the json file for EmoTect model config.") -model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") +model_g.add_arg("config_path", str, None, + "Path to the json file for EmoTect model config.") +model_g.add_arg("init_checkpoint", str, None, + "Init checkpoint to resume training from.") model_g.add_arg("output_dir", str, None, "Directory path to save checkpoints") train_g = utils.ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 10, "Number of epoches for training.") -train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.") -train_g.add_arg("validation_steps", int, 1000, "The steps interval to evaluate model performance.") +train_g.add_arg("save_steps", int, 10000, + "The steps interval to save checkpoints.") +train_g.add_arg("validation_steps", int, 1000, + "The steps interval to evaluate model performance.") train_g.add_arg("lr", float, 0.002, "The Learning rate value for training.") log_g = utils.ArgumentGroup(parser, "logging", "logging related") log_g.add_arg("skip_steps", int, 10, "The steps interval to print loss.") log_g.add_arg("verbose", bool, False, "Whether to output verbose log") -data_g = utils.ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options") +data_g = utils.ArgumentGroup( + parser, "data", "Data paths, vocab paths and data processing options") data_g.add_arg("data_dir", str, None, "Directory path to training data.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") -data_g.add_arg("batch_size", int, 256, "Total examples' number in batch for training.") +data_g.add_arg("batch_size", int, 256, + "Total examples' number in batch for training.") data_g.add_arg("random_seed", int, 0, "Random seed.") run_type_g = utils.ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_cuda", bool, False, "If set, use GPU for training.") -run_type_g.add_arg("task_name", str, None, "The name of task to perform sentiment classification.") +run_type_g.add_arg("task_name", str, None, + "The name of task to perform sentiment classification.") run_type_g.add_arg("do_train", bool, False, "Whether to perform training.") run_type_g.add_arg("do_val", bool, False, "Whether to perform evaluation.") run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.") -parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') +parser.add_argument( + '--enable_ce', + action='store_true', + help='If set, run the task with continuous evaluation logs.') args = parser.parse_args() + def create_model(args, pyreader_name, emotect_config, @@ -98,11 +122,17 @@ def create_model(args, if is_infer: data = fluid.layers.read_file(pyreader) - probs = network(data, None, emotect_config["vocab_size"], class_dim=num_labels, is_infer=True) + probs = network( + data, + None, + emotect_config["vocab_size"], + class_dim=num_labels, + is_infer=True) return pyreader, probs data, label = fluid.layers.read_file(pyreader) - avg_loss, probs = network(data, label, emotect_config["vocab_size"], class_dim=num_labels) + avg_loss, probs = network( + data, label, emotect_config["vocab_size"], class_dim=num_labels) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs) return pyreader, avg_loss, accuracy, num_seqs @@ -118,8 +148,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): while True: try: np_loss, np_acc, np_num_seqs = exe.run(program=test_program, - fetch_list=fetch_list, - return_numpy=False) + fetch_list=fetch_list, + return_numpy=False) np_loss = np.array(np_loss) np_acc = np.array(np_acc) np_num_seqs = np.array(np_num_seqs) @@ -131,8 +161,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): break time_end = time.time() print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" % - (eval_phase, np.sum(total_cost) / np.sum(total_num_seqs), - np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin)) + (eval_phase, np.sum(total_cost) / np.sum(total_num_seqs), + np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin)) def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): @@ -141,10 +171,11 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): while True: try: batch_probs = exe.run(program=infer_program, - fetch_list=fetch_list, - return_numpy=True) + fetch_list=fetch_list, + return_numpy=True) for probs in batch_probs[0]: - print("%d\t%f\t%f\t%f" % (np.argmax(probs), probs[0], probs[1], probs[2])) + print("%d\t%f\t%f\t%f" % + (np.argmax(probs), probs[0], probs[1], probs[2])) except fluid.core.EOFException as e: infer_pyreader.reset() break @@ -165,9 +196,10 @@ def main(args): exe = fluid.Executor(place) task_name = args.task_name.lower() - processor = reader.EmoTectProcessor(data_dir=args.data_dir, - vocab_path=args.vocab_path, - random_seed=args.random_seed) + processor = reader.EmoTectProcessor( + data_dir=args.data_dir, + vocab_path=args.vocab_path, + random_seed=args.random_seed) num_labels = len(processor.get_labels()) if not (args.do_train or args.do_val or args.do_infer): @@ -180,9 +212,7 @@ def main(args): if args.do_train: train_data_generator = processor.data_generator( - batch_size=args.batch_size, - phase='train', - epoch=args.epoch) + batch_size=args.batch_size, phase='train', epoch=args.epoch) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size + 1 @@ -210,7 +240,7 @@ def main(args): lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % - (lower_mem, upper_mem, unit)) + (lower_mem, upper_mem, unit)) if args.do_val: test_prog = fluid.Program() @@ -241,17 +271,12 @@ def main(args): if args.do_train: if args.init_checkpoint: utils.init_checkpoint( - exe, - args.init_checkpoint, - main_program=startup_prog) + exe, args.init_checkpoint, main_program=startup_prog) elif args.do_val or args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or infer!") - utils.init_checkpoint( - exe, - args.init_checkpoint, - main_program=test_prog) + utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog) if args.do_train: train_exe = exe @@ -288,22 +313,27 @@ def main(args): total_num_seqs.extend(np_num_seqs) if args.verbose: - verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size() + verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( + ) print(verbose) time_end = time.time() used_time = time_end - time_begin print("step: %d, avg loss: %f, " - "avg acc: %f, speed: %f steps/s" % - (steps, np.sum(total_cost) / np.sum(total_num_seqs), - np.sum(total_acc) / np.sum(total_num_seqs), - args.skip_steps / used_time)) - ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time]) + "avg acc: %f, speed: %f steps/s" % + (steps, np.sum(total_cost) / np.sum(total_num_seqs), + np.sum(total_acc) / np.sum(total_num_seqs), + args.skip_steps / used_time)) + ce_info.append([ + np.sum(total_cost) / np.sum(total_num_seqs), + np.sum(total_acc) / np.sum(total_num_seqs), used_time + ]) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.save_steps == 0: - save_path = os.path.join(args.output_dir, "step_" + str(steps)) + save_path = os.path.join(args.output_dir, + "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: @@ -315,8 +345,8 @@ def main(args): phase='dev', epoch=1)) evaluate(test_exe, test_prog, test_pyreader, - [loss.name, accuracy.name, num_seqs.name], - "dev") + [loss.name, accuracy.name, num_seqs.name], + "dev") except fluid.core.EOFException: save_path = os.path.join(args.output_dir, "step_" + str(steps)) @@ -336,33 +366,25 @@ def main(args): except: print("ce info error") print("kpis\teach_step_duration_%s_card%s\t%s" % - (task_name, card_num, ce_time)) - print("kpis\ttrain_loss_%s_card%s\t%f" % - (task_name, card_num, ce_loss)) - print("kpis\ttrain_acc_%s_card%s\t%f" % - (task_name, card_num, ce_acc)) + (task_name, card_num, ce_time)) + print("kpis\ttrain_loss_%s_card%s\t%f" % (task_name, card_num, ce_loss)) + print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc)) # evaluate on test set if not args.do_train and args.do_val: test_pyreader.decorate_paddle_reader( processor.data_generator( - batch_size=args.batch_size, - phase='test', - epoch=1)) + batch_size=args.batch_size, phase='test', epoch=1)) print("Final test result:") evaluate(test_exe, test_prog, test_pyreader, - [loss.name, accuracy.name, num_seqs.name], - "test") + [loss.name, accuracy.name, num_seqs.name], "test") # infer if args.do_infer: infer_pyreader.decorate_paddle_reader( processor.data_generator( - batch_size=args.batch_size, - phase='infer', - epoch=1)) - infer(test_exe, test_prog, infer_pyreader, - [probs.name], "infer") + batch_size=args.batch_size, phase='infer', epoch=1)) + infer(test_exe, test_prog, infer_pyreader, [probs.name], "infer") def get_cards(): diff --git a/PaddleNLP/emotion_detection/run_ernie_classifier.py b/PaddleNLP/emotion_detection/run_ernie_classifier.py index 774eab50150a2f5520e56664684ce8dd300787d6..8dfb6b4d4016f759cb54dd9a4dadf215b3f7adbe 100644 --- a/PaddleNLP/emotion_detection/run_ernie_classifier.py +++ b/PaddleNLP/emotion_detection/run_ernie_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Emotion Detection Task, based on ERNIE """ diff --git a/PaddleNLP/emotion_detection/utils.py b/PaddleNLP/emotion_detection/utils.py index ac916d25fd3f275ed679302d1fd9309bf240c674..44d108c9f8b3a1427eb5cb5a23d382076da0c14a 100644 --- a/PaddleNLP/emotion_detection/utils.py +++ b/PaddleNLP/emotion_detection/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ EmoTect utilities. """ @@ -16,6 +29,7 @@ import paddle import paddle.fluid as fluid import numpy as np + def str2bool(value): """ String to Boolean @@ -29,6 +43,7 @@ class ArgumentGroup(object): """ Argument Class """ + def __init__(self, parser, title, des): self._group = parser.add_argument_group(title=title, description=des) @@ -92,27 +107,33 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch=1): cols = line.strip().split("\t") if len(cols) != 1: query = cols[-1] - wids = [word_dict[x] if x in word_dict else unk_id - for x in query.strip().split(" ")] - all_data.append((wids,)) + wids = [ + word_dict[x] if x in word_dict else unk_id + for x in query.strip().split(" ") + ] + all_data.append((wids, )) else: cols = line.strip().split("\t") if len(cols) != 2: sys.stderr.write("[NOTICE] Error Format Line!") continue label = int(cols[0]) - wids = [word_dict[x] if x in word_dict else unk_id - for x in cols[1].split(" ")] + wids = [ + word_dict[x] if x in word_dict else unk_id + for x in cols[1].split(" ") + ] all_data.append((wids, label)) num_examples[phrase] = len(all_data) if phrase == "infer": + def reader(): """ Infer reader function """ for wids in all_data: yield wids + return reader def reader(): @@ -124,6 +145,7 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch=1): random.shuffle(all_data) for wids, label in all_data: yield wids, label + return reader diff --git a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py index c982c2538b69e7cd7fcdf41485ee86330e552cd5..c6883d1879fc9a5dae6eb39b9ce51de067d5c511 100755 --- a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py +++ b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The function lex_net(args) define the lexical analysis network structure """ diff --git a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py index 0b247347fca2043bc5a069eabe9d7d247b71b8e6..3c9a7f60513e819d08f807de047fa55955c0df57 100755 --- a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py +++ b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding: utf-8 """ The file_reader converts raw corpus to input. diff --git a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py index 0dae04c49b42525861286e7d45ed1dda92146b20..7e1a027df4ff8999121193ef5f37186a62a0a7d8 100755 --- a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py +++ b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This file is used to train the model. """ diff --git a/PaddleNLP/lexical_analysis/evaluate.py b/PaddleNLP/lexical_analysis/evaluate.py index 108dafa98ce6c9de6e8e8ca5d4e2692eff93d2f7..84d4d929822175127d54e31e8cccd57169797e7b 100644 --- a/PaddleNLP/lexical_analysis/evaluate.py +++ b/PaddleNLP/lexical_analysis/evaluate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf-8 """ evaluate wordseg for LAC and other open-source wordseg tools @@ -20,7 +33,7 @@ def to_unicode(string): def to_set(words): """ cut list to set of (string, off) """ off = 0 - s= set() + s = set() for w in words: if w: s.add((off, w)) @@ -145,7 +158,7 @@ def get_pkuseg_result(sentences): seg = pkuseg.pkuseg() preds = [] for sentence in sentences: - sent_seg = " ".join(seg.cut(sentence)) + sent_seg = " ".join(seg.cut(sentence)) sent_seg = to_unicode(sent_seg) preds.append(sent_seg) return preds @@ -161,7 +174,8 @@ def get_hanlp_result(sentences): preds = [] for sentence in sentences: arraylist = HanLP.segment(sentence) - sent_seg = " ".join([term.toString().split("/")[0] for term in arraylist]) + sent_seg = " ".join( + [term.toString().split("/")[0] for term in arraylist]) sent_seg = to_unicode(sent_seg) preds.append(sent_seg) return preds diff --git a/PaddleNLP/lexical_analysis/reader.py b/PaddleNLP/lexical_analysis/reader.py index 4655c5ebd282e3c7be43f702d7917cb23be1b931..340c154fec02713ae6f976924e91750efe037657 100644 --- a/PaddleNLP/lexical_analysis/reader.py +++ b/PaddleNLP/lexical_analysis/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The file_reader converts raw corpus to input. """ @@ -9,7 +22,10 @@ import glob def load_kv_dict(dict_path, - reverse=False, delimiter="\t", key_func=None, value_func=None): + reverse=False, + delimiter="\t", + key_func=None, + value_func=None): """ Load key-value dict from file """ @@ -34,11 +50,14 @@ def load_kv_dict(dict_path, class Dataset(object): """data reader""" + def __init__(self, args, mode="train"): # read dict - self.word2id_dict = load_kv_dict(args.word_dict_path, reverse=True, value_func=int) + self.word2id_dict = load_kv_dict( + args.word_dict_path, reverse=True, value_func=int) self.id2word_dict = load_kv_dict(args.word_dict_path) - self.label2id_dict = load_kv_dict(args.label_dict_path, reverse=True, value_func=int) + self.label2id_dict = load_kv_dict( + args.label_dict_path, reverse=True, value_func=int) self.id2label_dict = load_kv_dict(args.label_dict_path) self.word_replace_dict = load_kv_dict(args.word_rep_dict_path) @@ -78,12 +97,12 @@ class Dataset(object): label_ids.append(label_id) return label_ids - def file_reader(self, filename, max_seq_len=64, mode="train"): """ yield (word_idx, target_idx) one by one from file, or yield (word_idx, ) in `infer` mode """ + def wrapper(): fread = io.open(filename, "r", encoding="utf-8") headline = next(fread) @@ -93,9 +112,11 @@ class Dataset(object): for line in fread: words = line.strip("\n").split("\002") word_ids = self.word_to_ids(words) - yield word_ids[0:max_seq_len], [0 for _ in word_ids][0: max_seq_len] + yield word_ids[0:max_seq_len], [0 for _ in word_ids][ + 0:max_seq_len] else: - assert len(headline) == 2 and headline[0] == "text_a" and headline[1] == "label" + assert len(headline) == 2 and headline[ + 0] == "text_a" and headline[1] == "label" for line in fread: words, labels = line.strip("\n").split("\t") word_ids = self.word_to_ids(words.split("\002")) @@ -109,9 +130,21 @@ class Dataset(object): if __name__ == "__main__": parser = argparse.ArgumentParser(__doc__) - parser.add_argument("--word_dict_path", type=str, default="./conf/word.dic", help="word dict") - parser.add_argument("--label_dict_path", type=str, default="./conf/tag.dic", help="label dict") - parser.add_argument("--word_rep_dict_path", type=str, default="./conf/q2b.dic", help="word replace dict") + parser.add_argument( + "--word_dict_path", + type=str, + default="./conf/word.dic", + help="word dict") + parser.add_argument( + "--label_dict_path", + type=str, + default="./conf/tag.dic", + help="label dict") + parser.add_argument( + "--word_rep_dict_path", + type=str, + default="./conf/q2b.dic", + help="word replace dict") args = parser.parse_args() dataset = Dataset(args) data_generator = dataset.file_reader("data/train.tsv") diff --git a/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py b/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py index 96092dc29e9dc81f1b853d735ca18fe80b52b2fb..3e0c21a2397d08cb3e49358cb124671bb9b1aba8 100644 --- a/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py +++ b/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Sentiment Classification Task """ @@ -28,7 +41,6 @@ from models.representation.ernie import ernie_encoder from models.sequence_labeling import nets import utils - # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = utils.ArgumentGroup(parser, "model", "model configuration and paths.") diff --git a/PaddleNLP/lexical_analysis/run_sequence_labeling.py b/PaddleNLP/lexical_analysis/run_sequence_labeling.py index 6f20cfe34a4044ffa376f07d63efcebdc5a6ffb7..0dd8707ca2b4504926c1aea1d1d4f367c140d555 100644 --- a/PaddleNLP/lexical_analysis/run_sequence_labeling.py +++ b/PaddleNLP/lexical_analysis/run_sequence_labeling.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Baidu's open-source Lexical Analysis tool for Chinese, including: 1. Word Segmentation, diff --git a/PaddleNLP/lexical_analysis/utils.py b/PaddleNLP/lexical_analysis/utils.py index 2513d1285e60aee15ed60f79aa593e70528146ac..46d88883277e3274154b2cd39e48b78ffa584015 100644 --- a/PaddleNLP/lexical_analysis/utils.py +++ b/PaddleNLP/lexical_analysis/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ util tools """ @@ -19,6 +32,7 @@ class ArgumentGroup(object): """ Put arguments to one group """ + def __init__(self, parser, title, des): """none""" self._group = parser.add_argument_group(title=title, description=des) @@ -86,7 +100,7 @@ def parse_result(words, crf_decode, dataset): sent_len = offset_list[sent_index + 1] - offset_list[sent_index] last_word = "" last_tag = "" - for tag_index in range(sent_len): # iterate every word in sent + for tag_index in range(sent_len): # iterate every word in sent index = tag_index + offset_list[sent_index] cur_word_id = str(words[index][0]) cur_tag_id = str(crf_decode[index][0]) diff --git a/PaddleNLP/models/classification/nets.py b/PaddleNLP/models/classification/nets.py index 279199e832dc64fec3a2970c510b413c39c18f13..da20ccd3ff0e93bb64f41e94a4e2574e85ab9a70 100644 --- a/PaddleNLP/models/classification/nets.py +++ b/PaddleNLP/models/classification/nets.py @@ -1,9 +1,23 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provide nets for text classification """ import paddle.fluid as fluid + def bow_net(data, label, dict_dim, @@ -192,14 +206,14 @@ def gru_net(data, def textcnn_net(data, - label, - dict_dim, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - win_sizes=None, - is_infer=False): + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + win_sizes=None, + is_infer=False): """ Textcnn_net """ diff --git a/PaddleNLP/models/matching/bow.py b/PaddleNLP/models/matching/bow.py index c07250e13fc07f70c55850dce33a787fb1432ee3..e862734cd0ff332958968f651fe50e158cc70a98 100644 --- a/PaddleNLP/models/matching/bow.py +++ b/PaddleNLP/models/matching/bow.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ bow class """ diff --git a/PaddleNLP/models/matching/cnn.py b/PaddleNLP/models/matching/cnn.py index 6b36e5797ca307def19b1f2e42dab077744c0e5e..9293759b6cd93869ed2cd58520a798fa0b976815 100644 --- a/PaddleNLP/models/matching/cnn.py +++ b/PaddleNLP/models/matching/cnn.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ cnn class """ @@ -30,8 +43,8 @@ class CNN(object): left_emb = emb_layer.ops(left) right_emb = emb_layer.ops(right) # Presentation context - cnn_layer = layers.SequenceConvPoolLayer( - self.filter_size, self.num_filters, "conv") + cnn_layer = layers.SequenceConvPoolLayer(self.filter_size, + self.num_filters, "conv") left_cnn = cnn_layer.ops(left_emb) right_cnn = cnn_layer.ops(right_emb) # matching layer diff --git a/PaddleNLP/models/matching/gru.py b/PaddleNLP/models/matching/gru.py index 2d7f3e7150f06136857d6379469993f4dc649439..36884c6b578a655d00109fee93635b7e8f29c532 100644 --- a/PaddleNLP/models/matching/gru.py +++ b/PaddleNLP/models/matching/gru.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ gru class """ diff --git a/PaddleNLP/models/matching/losses/hinge_loss.py b/PaddleNLP/models/matching/losses/hinge_loss.py index 664a959957e968ea281e58e30ecf40b59b41a1a6..72cf3d7f34991eb76f1e42ff2669499b7a19f71b 100644 --- a/PaddleNLP/models/matching/losses/hinge_loss.py +++ b/PaddleNLP/models/matching/losses/hinge_loss.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ hinge loss """ diff --git a/PaddleNLP/models/matching/losses/log_loss.py b/PaddleNLP/models/matching/losses/log_loss.py index a62abdfc40e675e6c6d109a8277edcb6c6517480..47743fb0c1ca495e6e191ba1a82316ce04ea7aa4 100644 --- a/PaddleNLP/models/matching/losses/log_loss.py +++ b/PaddleNLP/models/matching/losses/log_loss.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ log loss """ diff --git a/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py b/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py index 65a1bc31d6aadeb53594e5fed2600355d30f4d2e..882f590458d7ab55a0a6899bf853ee96552606af 100644 --- a/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py +++ b/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ softmax loss """ diff --git a/PaddleNLP/models/matching/lstm.py b/PaddleNLP/models/matching/lstm.py index 3a656da74053fe3e1d159204c89f131409e9361c..3af323c5f195abc2db28381231025a5f29cc158c 100644 --- a/PaddleNLP/models/matching/lstm.py +++ b/PaddleNLP/models/matching/lstm.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ lstm class """ diff --git a/PaddleNLP/models/matching/mm_dnn.py b/PaddleNLP/models/matching/mm_dnn.py index 0a0011ed0692682a6e752e9425aedbe3583dbcc7..a9212d0eeb7853d29209312dbf8c2707aa3db504 100644 --- a/PaddleNLP/models/matching/mm_dnn.py +++ b/PaddleNLP/models/matching/mm_dnn.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ MMDNN class """ diff --git a/PaddleNLP/models/matching/optimizers/paddle_optimizers.py b/PaddleNLP/models/matching/optimizers/paddle_optimizers.py index ad3276fa152e044e379c67d0ff16f662bcdebbd0..024a89754a759ba965e850f380e6e5640ae40231 100644 --- a/PaddleNLP/models/matching/optimizers/paddle_optimizers.py +++ b/PaddleNLP/models/matching/optimizers/paddle_optimizers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ optimizer calss """ @@ -43,5 +56,8 @@ class AdamOptimizer(object): Adam optimizer operation """ adam = fluid.optimizer.AdamOptimizer( - self.learning_rate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon) + self.learning_rate, + beta1=self.beta1, + beta2=self.beta2, + epsilon=self.epsilon) adam.minimize(loss) diff --git a/PaddleNLP/models/matching/paddle_layers.py b/PaddleNLP/models/matching/paddle_layers.py index 9c5baa18ad97ac022c54d69657d967583c756948..4ff82b881de14917d83442b18fe794cd46484b52 100644 --- a/PaddleNLP/models/matching/paddle_layers.py +++ b/PaddleNLP/models/matching/paddle_layers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ network layers """ @@ -23,9 +36,11 @@ class EmbeddingLayer(object): """ operation """ - emb = fluid.layers.embedding(input=input, size=[ - self.dict_size, self.emb_dim], is_sparse=True, - param_attr=attr.ParamAttr(name=self.name)) + emb = fluid.layers.embedding( + input=input, + size=[self.dict_size, self.emb_dim], + is_sparse=True, + param_attr=attr.ParamAttr(name=self.name)) return emb @@ -44,8 +59,7 @@ class SequencePoolLayer(object): """ operation """ - pool = fluid.layers.sequence_pool( - input=input, pool_type=self.pool_type) + pool = fluid.layers.sequence_pool(input=input, pool_type=self.pool_type) return pool @@ -66,9 +80,12 @@ class FCLayer(object): """ operation """ - fc = fluid.layers.fc(input=input, size=self.fc_dim, param_attr=attr.ParamAttr( - name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act, name=self.name) + fc = fluid.layers.fc(input=input, + size=self.fc_dim, + param_attr=attr.ParamAttr(name="%s.w" % self.name), + bias_attr=attr.ParamAttr(name="%s.b" % self.name), + act=self.act, + name=self.name) return fc @@ -88,12 +105,16 @@ class DynamicGRULayer(object): """ operation """ - proj = fluid.layers.fc(input=input, size=self.gru_dim * 3, - param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), - bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) - gru = fluid.layers.dynamic_gru(input=proj, size=self.gru_dim, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name)) + proj = fluid.layers.fc( + input=input, + size=self.gru_dim * 3, + param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), + bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) + gru = fluid.layers.dynamic_gru( + input=proj, + size=self.gru_dim, + param_attr=attr.ParamAttr(name="%s.w" % self.name), + bias_attr=attr.ParamAttr(name="%s.b" % self.name)) return gru @@ -113,12 +134,16 @@ class DynamicLSTMLayer(object): """ operation """ - proj = fluid.layers.fc(input=input, size=self.lstm_dim * 4, - param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), - bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) - lstm, _ = fluid.layers.dynamic_lstm(input=proj, size=self.lstm_dim * 4, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name)) + proj = fluid.layers.fc( + input=input, + size=self.lstm_dim * 4, + param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), + bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) + lstm, _ = fluid.layers.dynamic_lstm( + input=proj, + size=self.lstm_dim * 4, + param_attr=attr.ParamAttr(name="%s.w" % self.name), + bias_attr=attr.ParamAttr(name="%s.b" % self.name)) return lstm @@ -161,9 +186,12 @@ class SequenceConvPoolLayer(object): """ operation """ - conv = fluid.nets.sequence_conv_pool(input=input, filter_size=self.filter_size, - num_filters=self.num_filters, - param_attr=attr.ParamAttr(name=self.name), act="relu") + conv = fluid.nets.sequence_conv_pool( + input=input, + filter_size=self.filter_size, + num_filters=self.num_filters, + param_attr=attr.ParamAttr(name=self.name), + act="relu") return conv @@ -259,7 +287,8 @@ class SoftmaxWithCrossEntropyLayer(object): """ operation """ - loss = fluid.layers.softmax_with_cross_entropy(logits=input, label=label) + loss = fluid.layers.softmax_with_cross_entropy( + logits=input, label=label) return loss @@ -354,8 +383,8 @@ class ConstantLayer(object): """ operation """ - constant = fluid.layers.fill_constant_batch_size_like( - input, shape, dtype, value) + constant = fluid.layers.fill_constant_batch_size_like(input, shape, + dtype, value) return constant @@ -396,6 +425,7 @@ class SoftsignLayer(object): softsign = fluid.layers.softsign(input) return softsign + # class MatmulLayer(object): # def __init__(self, transpose_x, transpose_y): # self.transpose_x = transpose_x @@ -405,7 +435,6 @@ class SoftsignLayer(object): # matmul = fluid.layers.matmul(x, y, self.transpose_x, self.transpose_y) # return matmul - # class Conv2dLayer(object): # def __init__(self, num_filters, filter_size, act, name): # self.num_filters = num_filters @@ -417,7 +446,6 @@ class SoftsignLayer(object): # conv = fluid.layers.conv2d(input, self.num_filters, self.filter_size, param_attr=attr.ParamAttr(name="%s.w" % self.name), bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act) # return conv - # class Pool2dLayer(object): # def __init__(self, pool_size, pool_type): # self.pool_size = pool_size diff --git a/PaddleNLP/models/neural_machine_translation/transformer/desc.py b/PaddleNLP/models/neural_machine_translation/transformer/desc.py index 5eeb0dbfdacd09325913a182bfbbb7a557b6a72d..3f60414f63ea6fa3eb94c1f0a43d1c1b9e74e0d0 100644 --- a/PaddleNLP/models/neural_machine_translation/transformer/desc.py +++ b/PaddleNLP/models/neural_machine_translation/transformer/desc.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # The placeholder for batch_size in compile time. Must be -1 currently to be # consistent with some ops' infer-shape output in compile time, such as the # sequence_expand op used in beamsearch decoder. diff --git a/PaddleNLP/models/neural_machine_translation/transformer/model.py b/PaddleNLP/models/neural_machine_translation/transformer/model.py index eec482972f7b3da8418593cd9396d04a7ac414fd..b36731f796107df3b475d4f84d63410b646f4ada 100644 --- a/PaddleNLP/models/neural_machine_translation/transformer/model.py +++ b/PaddleNLP/models/neural_machine_translation/transformer/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from functools import partial import numpy as np diff --git a/PaddleNLP/models/representation/ernie.py b/PaddleNLP/models/representation/ernie.py index 90061d9dd6a1f9d4e05539d53b17185d24e86729..23db3ac3316178a99f9e54a931a58cdefece4d11 100644 --- a/PaddleNLP/models/representation/ernie.py +++ b/PaddleNLP/models/representation/ernie.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provides ErnieModel and ErnieConfig """ diff --git a/PaddleNLP/models/sequence_labeling/nets.py b/PaddleNLP/models/sequence_labeling/nets.py index 77db87bae5005d8de7aedbec4f650da1ed33532a..cf9e39a8ab675d558e0eb2632aea69c30ae45949 100644 --- a/PaddleNLP/models/sequence_labeling/nets.py +++ b/PaddleNLP/models/sequence_labeling/nets.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The function lex_net(args) define the lexical analysis network structure """ @@ -96,8 +109,7 @@ def lex_net(word, target, args, vocab_size, num_labels): input=emission, label=target, param_attr=fluid.ParamAttr( - name='crfw', - learning_rate=crf_lr)) + name='crfw', learning_rate=crf_lr)) crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw')) avg_cost = fluid.layers.mean(x=crf_cost) diff --git a/PaddleNLP/models/transformer_encoder.py b/PaddleNLP/models/transformer_encoder.py index 5c20735bf47f9b68eefd7bad00229cddb77fd2a2..77908896cd2d5beebecd86cd873fafb22b999407 100644 --- a/PaddleNLP/models/transformer_encoder.py +++ b/PaddleNLP/models/transformer_encoder.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Transformer encoder.""" from __future__ import absolute_import @@ -100,7 +113,7 @@ def multi_head_attention(queries, """ Scaled Dot-Product Attention """ - scaled_q = layers.scale(x=q, scale=d_key ** -0.5) + scaled_q = layers.scale(x=q, scale=d_key**-0.5) product = layers.matmul(x=scaled_q, y=k, transpose_y=True) if attn_bias: product += attn_bias diff --git a/PaddleNLP/preprocess/ernie/task_reader.py b/PaddleNLP/preprocess/ernie/task_reader.py index 209ac795a454ba695a41062dcdead31035e7648f..1ff5c10f5360f5903a3f06065d34b4145dcec16e 100644 --- a/PaddleNLP/preprocess/ernie/task_reader.py +++ b/PaddleNLP/preprocess/ernie/task_reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provides reader for classification and sequence labing """ @@ -18,6 +31,7 @@ from preprocess.padding import pad_batch_data class BaseReader(object): """BaseReader for classify and sequence labeling task""" + def __init__(self, vocab_path, label_map_config=None, @@ -211,6 +225,7 @@ class BaseReader(object): class ClassifyReader(BaseReader): """ClassifyReader""" + def _read_tsv(self, input_file, quotechar=None): """Reads a tab separated value file.""" with open(input_file, "r") as f: @@ -239,7 +254,10 @@ class ClassifyReader(BaseReader): # padding padded_token_ids, input_mask, seq_lens = pad_batch_data( - batch_token_ids, pad_idx=self.pad_id, return_input_mask=True, return_seq_lens=True) + batch_token_ids, + pad_idx=self.pad_id, + return_input_mask=True, + return_seq_lens=True) padded_text_type_ids = pad_batch_data( batch_text_type_ids, pad_idx=self.pad_id) padded_position_ids = pad_batch_data( @@ -255,6 +273,7 @@ class ClassifyReader(BaseReader): class SequenceLabelReader(BaseReader): """SequenceLabelReader""" + def _pad_batch_records(self, batch_records): batch_token_ids = [record.token_ids for record in batch_records] batch_text_type_ids = [record.text_type_ids for record in batch_records] @@ -314,7 +333,9 @@ class SequenceLabelReader(BaseReader): position_ids = list(range(len(token_ids))) text_type_ids = [0] * len(token_ids) no_entity_id = len(self.label_map) - 1 - labels = [label if label in self.label_map else u"O" for label in labels] + labels = [ + label if label in self.label_map else u"O" for label in labels + ] label_ids = [no_entity_id] + [ self.label_map[label] for label in labels ] + [no_entity_id] @@ -332,6 +353,7 @@ class SequenceLabelReader(BaseReader): class ExtractEmbeddingReader(BaseReader): """ExtractEmbeddingReader""" + def _pad_batch_records(self, batch_records): batch_token_ids = [record.token_ids for record in batch_records] batch_text_type_ids = [record.text_type_ids for record in batch_records] diff --git a/PaddleNLP/preprocess/padding.py b/PaddleNLP/preprocess/padding.py index 630a4c180ef16c60434b139fa6ba787ea468b761..6094562d396181349bebac9e883f6fca9dc71afc 100644 --- a/PaddleNLP/preprocess/padding.py +++ b/PaddleNLP/preprocess/padding.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Mask, padding and batching. """ diff --git a/PaddleNLP/preprocess/tokenizer/reader.py b/PaddleNLP/preprocess/tokenizer/reader.py index 46b88a1c63bb97c9fdf73f54ce9ca7d3f5aa3ac3..39274f34f5b7beef680bf95b8fd0681f26e22d03 100644 --- a/PaddleNLP/preprocess/tokenizer/reader.py +++ b/PaddleNLP/preprocess/tokenizer/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The file_reader converts raw corpus to input. """ @@ -5,6 +18,7 @@ import os import __future__ import io + def file_reader(file_dir, word2id_dict, label2id_dict, @@ -15,6 +29,7 @@ def file_reader(file_dir, """ word_dict_len = max(map(int, word2id_dict.values())) + 1 label_dict_len = max(map(int, label2id_dict.values())) + 1 + def reader(): """ the data generator @@ -24,7 +39,8 @@ def file_reader(file_dir, for filename in files: if not filename.startswith(filename_feature): continue - for line in io.open(os.path.join(root, filename), 'r', encoding='utf8'): + for line in io.open( + os.path.join(root, filename), 'r', encoding='utf8'): index += 1 bad_line = False line = line.strip("\n") @@ -52,8 +68,9 @@ def file_reader(file_dir, else: target_idx.append(int(label2id_dict["O"])) if len(word_idx) != len(target_idx): - continue + continue yield word_idx, target_idx + return reader @@ -68,6 +85,7 @@ def test_reader(file_dir, #print (word2id_dict) word_dict_len = max(map(int, word2id_dict.values())) + 1 label_dict_len = max(map(int, label2id_dict.values())) + 1 + #print word_dict_len #print label_dict_len def reader(): @@ -94,6 +112,7 @@ def test_reader(file_dir, else: word_idx.append(int(word2id_dict["OOV"])) yield word_idx, words + return reader diff --git a/PaddleNLP/preprocess/tokenizer/tokenizer.py b/PaddleNLP/preprocess/tokenizer/tokenizer.py index 910f45a72cf6a2af3dcbd9b1f59abb158c6ece0a..f1f31f717057c91d924ea7fa975b0038bd5c7da7 100644 --- a/PaddleNLP/preprocess/tokenizer/tokenizer.py +++ b/PaddleNLP/preprocess/tokenizer/tokenizer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provides wordseg tools """ @@ -11,12 +24,13 @@ import time import sys import io -if sys.version_info > (3,): +if sys.version_info > (3, ): sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') else: reload(sys) sys.setdefaultencoding("utf8") + def parse_args(): """ Arguments Parse @@ -26,32 +40,27 @@ def parse_args(): '--batch_size', type=int, default=5, - help='The size of a batch. (default: %(default)d)' - ) + help='The size of a batch. (default: %(default)d)') parser.add_argument( '--model_path', type=str, default='./conf/model', - help='A path to the model. (default: %(default)s)' - ) + help='A path to the model. (default: %(default)s)') parser.add_argument( '--test_data_dir', type=str, default='./data/test_data', - help='A directory with test data files. (default: %(default)s)' - ) + help='A directory with test data files. (default: %(default)s)') parser.add_argument( "--word_dict_path", type=str, default="./conf/word.dic", - help="The path of the word dictionary. (default: %(default)s)" - ) + help="The path of the word dictionary. (default: %(default)s)") parser.add_argument( "--label_dict_path", type=str, default="./conf/tag.dic", - help="The path of the label dictionary. (default: %(default)s)" - ) + help="The path of the label dictionary. (default: %(default)s)") parser.add_argument( "--word_rep_dict_path", type=str, @@ -104,17 +113,15 @@ def infer(args): Tokenize """ id2word_dict = reader.load_dict(args.word_dict_path) - word2id_dict = reader.load_reverse_dict(args.word_dict_path) + word2id_dict = reader.load_reverse_dict(args.word_dict_path) id2label_dict = reader.load_dict(args.label_dict_path) label2id_dict = reader.load_reverse_dict(args.label_dict_path) q2b_dict = reader.load_dict(args.word_rep_dict_path) test_data = paddle.batch( - reader.test_reader(args.test_data_dir, - word2id_dict, - label2id_dict, - q2b_dict), - batch_size = args.batch_size) + reader.test_reader(args.test_data_dir, word2id_dict, label2id_dict, + q2b_dict), + batch_size=args.batch_size) place = fluid.CPUPlace() #place = fluid.CUDAPlace(0) exe = fluid.Executor(place) @@ -130,9 +137,9 @@ def infer(args): #print(word_idx) word_list = [x[1] for x in data] (crf_decode, ) = exe.run(inference_program, - feed={"word":word_idx}, - fetch_list=fetch_targets, - return_numpy=False) + feed={"word": word_idx}, + fetch_list=fetch_targets, + return_numpy=False) lod_info = (crf_decode.lod())[0] np_data = np.array(crf_decode) assert len(data) == len(lod_info) - 1 @@ -145,7 +152,7 @@ def infer(args): cur_full_tag = "" words = word_list[sen_index] for tag_index in range(lod_info[sen_index], - lod_info[sen_index + 1]): + lod_info[sen_index + 1]): cur_word = words[word_index] cur_tag = id2label_dict[str(np_data[tag_index][0])] if cur_tag.endswith("-B") or cur_tag.endswith("O"): diff --git a/PaddleNLP/sentiment_classification/config.py b/PaddleNLP/sentiment_classification/config.py index 49023d27e1c363f63b75d4ec1caa797564e34788..a1b37518bab35af176b385ddfa9ce2d5445a1731 100644 --- a/PaddleNLP/sentiment_classification/config.py +++ b/PaddleNLP/sentiment_classification/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Senta model. """ @@ -11,10 +24,12 @@ import json import numpy as np import paddle.fluid as fluid + class SentaConfig(object): """ Senta Config """ + def __init__(self, config_path): self._config_dict = self._parse(config_path) @@ -24,7 +39,7 @@ class SentaConfig(object): config_dict = json.load(json_file) except Exception: raise IOError("Error in parsing bert model config file '%s'" % - config_path) + config_path) else: return config_dict diff --git a/PaddleNLP/sentiment_classification/reader.py b/PaddleNLP/sentiment_classification/reader.py index 38a0f6e95c8640155f4369f8547279ccb01d5ff3..939cf648353ca81f32f82198890da334833ad788 100644 --- a/PaddleNLP/sentiment_classification/reader.py +++ b/PaddleNLP/sentiment_classification/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Senta Reader """ @@ -12,15 +25,13 @@ from utils import data_reader import paddle import paddle.fluid as fluid + class SentaProcessor(object): """ Processor class for data convertors for senta """ - def __init__(self, - data_dir, - vocab_path, - random_seed=None): + def __init__(self, data_dir, vocab_path, random_seed=None): self.data_dir = data_dir self.vocab = load_vocab(vocab_path) self.num_examples = {"train": -1, "dev": -1, "infer": -1} @@ -30,19 +41,22 @@ class SentaProcessor(object): """ Load training examples """ - return data_reader((self.data_dir + "/train.tsv"), self.vocab, self.num_examples, "train", epoch) + return data_reader((self.data_dir + "/train.tsv"), self.vocab, + self.num_examples, "train", epoch) def get_dev_examples(self, data_dir, epoch): """ Load dev examples """ - return data_reader((self.data_dir + "/dev.tsv"), self.vocab, self.num_examples, "dev", epoch) + return data_reader((self.data_dir + "/dev.tsv"), self.vocab, + self.num_examples, "dev", epoch) def get_test_examples(self, data_dir, epoch): """ Load test examples """ - return data_reader((self.data_dir + "/test.tsv"), self.vocab, self.num_examples, "infer", epoch) + return data_reader((self.data_dir + "/test.tsv"), self.vocab, + self.num_examples, "infer", epoch) def get_labels(self): """ @@ -70,11 +84,14 @@ class SentaProcessor(object): Generate data for train, dev or infer """ if phase == "train": - return paddle.batch(self.get_train_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_train_examples(self.data_dir, epoch), batch_size) elif phase == "dev": - return paddle.batch(self.get_dev_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_dev_examples(self.data_dir, epoch), batch_size) elif phase == "infer": - return paddle.batch(self.get_test_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_test_examples(self.data_dir, epoch), batch_size) else: raise ValueError( "Unknown phase, which should be in ['train', 'dev', 'infer'].") diff --git a/PaddleNLP/sentiment_classification/run_classifier.py b/PaddleNLP/sentiment_classification/run_classifier.py index 4ad4e56e37d79adf2fbe47616dcaa2287f8009de..c95ecb45a45279010bf442f12d026eb97e691a99 100644 --- a/PaddleNLP/sentiment_classification/run_classifier.py +++ b/PaddleNLP/sentiment_classification/run_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Sentiment Classification Task """ diff --git a/PaddleNLP/sentiment_classification/run_ernie_classifier.py b/PaddleNLP/sentiment_classification/run_ernie_classifier.py index c2b910a169b901d5f21fe2f8a3ae3a7609fce54b..d6b8494561a20a201e88bfab4c3e7023e504f81a 100644 --- a/PaddleNLP/sentiment_classification/run_ernie_classifier.py +++ b/PaddleNLP/sentiment_classification/run_ernie_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Sentiment Classification Task """ diff --git a/PaddleNLP/sentiment_classification/utils.py b/PaddleNLP/sentiment_classification/utils.py index c117178b224ecf77b05e724c7849444d65729f51..4e261f2edf5e6dcbfa75fd90f860f7a9b30c673c 100644 --- a/PaddleNLP/sentiment_classification/utils.py +++ b/PaddleNLP/sentiment_classification/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Arguments for configuration """ @@ -31,6 +44,7 @@ class ArgumentGroup(object): """ Argument Class """ + def __init__(self, parser, title, des): self._group = parser.add_argument_group(title=title, description=des) @@ -79,7 +93,7 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): predicate=existed_persitables) print("Load model from {}".format(init_checkpoint_path)) - + def data_reader(file_path, word_dict, num_examples, phrase, epoch): """ Convert word sequence into slot @@ -95,15 +109,17 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch): sys.stderr.write("[NOTICE] Error Format Line!") continue label = int(cols[1]) - wids = [word_dict[x] if x in word_dict else unk_id - for x in cols[0].split(" ")] + wids = [ + word_dict[x] if x in word_dict else unk_id + for x in cols[0].split(" ") + ] all_data.append((wids, label)) if phrase == "train": random.shuffle(all_data) num_examples[phrase] = len(all_data) - + def reader(): """ Reader Function @@ -111,8 +127,10 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch): for epoch_index in range(epoch): for doc, label in all_data: yield doc, label + return reader + def load_vocab(file_path): """ load the given vocabulary diff --git a/PaddleNLP/similarity_net/config.py b/PaddleNLP/similarity_net/config.py index b9cc1544f4df552935372949394b891a5caadc31..5e541a0ba8cec51cb63b32833cb03dc5d3fecbfb 100644 --- a/PaddleNLP/similarity_net/config.py +++ b/PaddleNLP/similarity_net/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ SimNet config """ @@ -21,12 +34,14 @@ class SimNetConfig(object): with open(config_path) as json_file: config_dict = json.load(json_file) except Exception: - raise IOError("Error in parsing simnet model config file '%s'" % config_path) + raise IOError("Error in parsing simnet model config file '%s'" % + config_path) else: if config_dict["task_mode"] != self.task_mode: raise ValueError( - "the config '{}' does not match the task_mode '{}'".format(self.config_path, self.task_mode)) + "the config '{}' does not match the task_mode '{}'".format( + self.config_path, self.task_mode)) return config_dict def __getitem__(self, key): diff --git a/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py b/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py index f9ba7a5130d3acd4451f4eb76559082461941788..13ff8307480dc89271bdb3dd098ea7845fdb224e 100644 --- a/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py +++ b/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ comput unicom """ diff --git a/PaddleNLP/similarity_net/evaluate/unicom_split.py b/PaddleNLP/similarity_net/evaluate/unicom_split.py index 0d93116d48a45697690ddda23e93c93f85069cb9..5edb201b5e9ef4febe029c5b18034948142133b3 100644 --- a/PaddleNLP/similarity_net/evaluate/unicom_split.py +++ b/PaddleNLP/similarity_net/evaluate/unicom_split.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ split unicom file """ diff --git a/PaddleNLP/similarity_net/reader.py b/PaddleNLP/similarity_net/reader.py index a33c57b315dd8c58bc99565102efd4f08229b0f5..d5971c6835d68968ebe7c91203563c8db9bb59ed 100644 --- a/PaddleNLP/similarity_net/reader.py +++ b/PaddleNLP/similarity_net/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ SimNet reader """ @@ -25,15 +38,24 @@ class SimNetProcessor(object): Reader with Pairwise """ if mode == "valid": - with codecs.open(self.args.valid_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.valid_data_dir, "r", + "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: @@ -43,27 +65,47 @@ class SimNetProcessor(object): with codecs.open(self.args.test_data_dir, "r", "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: title = [0] yield [query, title] else: - with codecs.open(self.args.train_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.train_data_dir, "r", + "utf-8") as file: for line in file: query, pos_title, neg_title = line.strip().split("\t") - if len(query) == 0 or len(pos_title) == 0 or len(neg_title) == 0: - logging.warning("line not match format in test file") + if len(query) == 0 or len(pos_title) == 0 or len( + neg_title) == 0: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - pos_title = [self.vocab[word] for word in pos_title.split(" ") if word in self.vocab] - neg_title = [self.vocab[word] for word in neg_title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + pos_title = [ + self.vocab[word] for word in pos_title.split(" ") + if word in self.vocab + ] + neg_title = [ + self.vocab[word] for word in neg_title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(pos_title) == 0: @@ -77,15 +119,24 @@ class SimNetProcessor(object): Reader with Pointwise """ if mode == "valid": - with codecs.open(self.args.valid_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.valid_data_dir, "r", + "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: @@ -95,27 +146,44 @@ class SimNetProcessor(object): with codecs.open(self.args.test_data_dir, "r", "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: title = [0] yield [query, title] else: - with codecs.open(self.args.train_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.train_data_dir, "r", + "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] label = int(label) if len(query) == 0: query = [0] @@ -138,8 +206,14 @@ class SimNetProcessor(object): if len(query) == 0 or len(title) == 0: logging.warning("line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: diff --git a/PaddleNLP/similarity_net/run_classifier.py b/PaddleNLP/similarity_net/run_classifier.py index 4413c24c6722b2703dd6f758a2febd8924857dae..8e31a1332e300a7e5270a8656a70605a5a203487 100644 --- a/PaddleNLP/similarity_net/run_classifier.py +++ b/PaddleNLP/similarity_net/run_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ SimNet Task """ @@ -164,16 +177,16 @@ def train(conf_dict, args): infer_program = fluid.default_main_program().clone(for_test=True) avg_cost = loss.compute(pred, label) avg_cost.persistable = True - + # operate Optimization optimizer.ops(avg_cost) executor = fluid.Executor(place) executor.run(fluid.default_startup_program()) if args.init_checkpoint is not None: - utils.init_checkpoint(executor, args.init_checkpoint, - fluid.default_startup_program()) - + utils.init_checkpoint(executor, args.init_checkpoint, + fluid.default_startup_program()) + # Get and run executor parallel_executor = fluid.ParallelExecutor( use_cuda=args.use_cuda, diff --git a/PaddleNLP/similarity_net/utils.py b/PaddleNLP/similarity_net/utils.py index 2f11717e0a1e60a46aec35db4600127ea552e1b1..7e4c50755ee5114977630cffc529630d1346f7ce 100644 --- a/PaddleNLP/similarity_net/utils.py +++ b/PaddleNLP/similarity_net/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # --coding=utf-8 """ SimNet utilities. @@ -17,6 +30,7 @@ import paddle.fluid as fluid ******functions for file processing****** """ + def load_vocab(file_path): """ load the given vocabulary @@ -47,7 +61,8 @@ def get_result_file(args): """ with codecs.open(args.test_data_dir, "r", "utf-8") as test_file: with codecs.open("predictions.txt", "r", "utf-8") as predictions_file: - with codecs.open(args.test_result_path, "w", "utf-8") as test_result_file: + with codecs.open(args.test_result_path, "w", + "utf-8") as test_result_file: test_datas = [line.strip("\n") for line in test_file] predictions = [line.strip("\n") for line in predictions_file] for test_data, prediction in zip(test_datas, predictions): @@ -287,7 +302,7 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): """ assert os.path.exists( init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path - + def existed_persitables(var): if not fluid.io.is_persistable(var): return False @@ -299,4 +314,3 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): main_program=main_program, predicate=existed_persitables) print("Load model from {}".format(init_checkpoint_path)) - diff --git a/PaddleNLP/unarchived/chinese_ner/infer.py b/PaddleNLP/unarchived/chinese_ner/infer.py index 4bcb9e0bff5c79d46f1ab550d4098751f616d495..02aa9fb5fb3f0f1944f9335a99485e6e4aff8754 100644 --- a/PaddleNLP/unarchived/chinese_ner/infer.py +++ b/PaddleNLP/unarchived/chinese_ner/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import argparse import time diff --git a/PaddleNLP/unarchived/chinese_ner/reader.py b/PaddleNLP/unarchived/chinese_ner/reader.py index 9aa49c9feea19a46045dd099ee01211e21d72882..c482d5e4b25086d74fafd768098b5159f8bd88b6 100644 --- a/PaddleNLP/unarchived/chinese_ner/reader.py +++ b/PaddleNLP/unarchived/chinese_ner/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os diff --git a/PaddleNLP/unarchived/chinese_ner/train.py b/PaddleNLP/unarchived/chinese_ner/train.py index 894260d399a4957cf6fed6b573eca6d12ebdee6d..5f0ddb8cb6d87a3308c5880511a457ab9976e88f 100644 --- a/PaddleNLP/unarchived/chinese_ner/train.py +++ b/PaddleNLP/unarchived/chinese_ner/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import math import time diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/model.py b/PaddleNLP/unarchived/deep_attention_matching_net/model.py index 05c2053113c87491e8470e68970dbd9bedf6961a..e7a531c71a2eec5d1f0262f20ee19f68930c467e 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/model.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import numpy as np import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py b/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py index 006486268c110d35e7fb7d13258bd86245288994..3119f1bfbf9e92c97671b799ee138f0733e77e8c 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import six import numpy as np diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py b/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py index 28d1c655c2c389aad3204e65d91c07fb1b39d224..6a26be1655d03a62ca18a0b2c2f874b7f50f0afd 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import six import numpy as np diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py index 4983975a39cdb04c5a0154272cf5035cbc5df3c7..b94c643b363d5085a933e2e49a7de607bdfb0d88 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import six import numpy as np diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py index 350003c2849ab0228b4c4d610ab955d000bb1def..49dc98ad06a8bd1f3cd0280050c0af7225ba13d4 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import six diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py index 530c6ba5f7b617f99321342102c64a175ed1a651..cc3f12b8cd56886764eee01f2e9c65777c175e53 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py index b581acc11f2f03465d072128c7605681310c4b31..f89a7c0999e8e1cbd716a3ae7e7b721dbbdaf5a7 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import numpy as np @@ -190,7 +203,8 @@ def make_one_batch_input(data_batches, index): turns = np.array(data_batches["turns"][index]).astype('int64') tt_turns_len = np.array(data_batches["tt_turns_len"][index]).astype('int64') - every_turn_len = np.array(data_batches["every_turn_len"][index]).astype('int64') + every_turn_len = np.array(data_batches["every_turn_len"][index]).astype( + 'int64') response = np.array(data_batches["response"][index]).astype('int64') response_len = np.array(data_batches["response_len"][index]).astype('int64') diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py index 9da8571f2c47b6e87219b8c579ae3a7645f6afd5..f521a6f5f8cc0e12f3d7f539a2c289b63aea5b71 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import os diff --git a/PaddleNLP/unarchived/language_model/gru/infer.py b/PaddleNLP/unarchived/language_model/gru/infer.py index e7595d0bc300faae397869de856b72a7c0e9e680..f10383afb22b3e01c4a353e651f8359dc636cf32 100644 --- a/PaddleNLP/unarchived/language_model/gru/infer.py +++ b/PaddleNLP/unarchived/language_model/gru/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import math diff --git a/PaddleNLP/unarchived/language_model/gru/train.py b/PaddleNLP/unarchived/language_model/gru/train.py index 8f27310621fd92bdf9921722b01b3fa638042099..3e6183341b810c4bb9d7ad27d6b4562328552425 100644 --- a/PaddleNLP/unarchived/language_model/gru/train.py +++ b/PaddleNLP/unarchived/language_model/gru/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys import time diff --git a/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py b/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py index 9a912a1e4ffc552f699cd4d9d41999bb3422d369..e5541ee462e4817facc77a4a4957ace85b3d53d2 100644 --- a/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py +++ b/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys import time diff --git a/PaddleNLP/unarchived/language_model/gru/utils.py b/PaddleNLP/unarchived/language_model/gru/utils.py index dd03a89835e620dc8432a6ca16392fc5173a12d4..4535999d96addd9a414562bf5ffde44269d6cb0d 100644 --- a/PaddleNLP/unarchived/language_model/gru/utils.py +++ b/PaddleNLP/unarchived/language_model/gru/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import numpy as np diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py b/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py index 4a74a9bae639e9d5f607f8ba5f0c66ba93577772..6729b84df294c1f86fd40c23dfa75e531d322872 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #!/usr/bin/python #-*- coding:utf-8 -*- diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py index a93c2835623a746bb1d0a36fde9b2ad28dbd2497..38e56b5e67c54f9867856f7e477697fdbe5d0bd3 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import os, sys, json diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py index 022db4dd1bdf98d2a7e0ead659e988ff109b59e9..833844824eb30a00ee1f6364afe52e66df412bef 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import sys diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py index fcb24756c64e04365c23603e86e09f107a1f7721..5b102200bc9a03670fb2df19a8023d47f0d3d33d 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import json import pandas as pd diff --git a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py index eba1d5f36c09d1314de716902234ae41c9536a15..aef110f5ee25416d5b4113f47ac838c22ebfc863 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py +++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py index 823341ed9084e80b5fe74655bf8db897d72175f0..0be63dee020a26dea9e6faa1fa02e2fd0b573c64 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class TrainTaskConfig(object): # support both CPU and GPU now. use_gpu = True diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py index 4ba1852fd63b8e38ef8091a5d98bece6277905fd..9c4246922c3d9daa91039c438beff2657aaab744 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import multiprocessing diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py index 5b19be6a526852ac348d710800a741d6cfd971c1..f4e6506ad7611aef22f1d0908dff3eb9a2830cf0 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from functools import partial import numpy as np diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py index 38ba3416b55521bb61e65eefb7a824b84d028818..a4d034e9e68b9d9548778d410009a7871a96e9e0 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py index 0629f64c1ca04183251a70d1ff935aa0784e8101..d124947b556ce5d0e458aa420fdcb108564a0956 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import contextlib diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py index 0a846825e0027d2fef9ee4515a0ac2b887806c1c..4869213ac05987083b0684fe2c39ac6e22f8aa8a 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import glob import six import os diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py index 3ae3955be0f654a9240067fe979543d937cdcfb8..fa719c94f4e64b2936ffcece3aa9b3aabc4fe960 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import copy diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py index 9319aad8f15c03177da5b7b6f123ab0afeb90cc0..44f547f0ca53fc91648364ba80965db8e30c2cb8 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function import numpy as np diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py index 17ee1951bd5955ca7a8534b50380e3f694c047f4..2747176964f2ce920db3a2b2dd553b130546f8a5 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py index 5050d0bf499e59db505758b0af9eed71e6af7de7..aed2dd3d52d3bbea63e1e982468441733d44f629 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Conll03 dataset. """ diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py index 68e621371e09b654007134c8ce449e3491b9516f..64a34d26d2cef52e9710ace20422e9a9a440caa1 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function import os diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py index f40f1bb19481e34288ede7247f4fbe827be6f590..21567c06dbe1c0e191acadd825da1627960eff2c 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #!/usr/bin/env python # -*- coding: utf-8 -*- import logging diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py index 03e7e62fd5f8496d4a9436ad34ec7763b46b460d..930b19ecca5d53031e40ef40a7ec46e92799ba2a 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py b/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py index d11da00e22d21eca4e9194958d20794291e2fd2d..7a3cbb11365c5d7cbff34c27d1b3d4c28f9fe401 100644 --- a/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py +++ b/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import unittest import contextlib import paddle diff --git a/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py b/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py index e2ba98660d07a3dde694a2bb6ff0bed3a3feaaa8..c75369cae0a8e52ab6fa3014517f321fdc2d163c 100644 --- a/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py +++ b/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import unittest import contextlib import paddle diff --git a/PaddleNLP/unarchived/text_classification/infer.py b/PaddleNLP/unarchived/text_classification/infer.py index a858b9a89cdaa5f826ba359756dc4c02be39ce2e..9990e1f443cdc1efd46f7a7f6225e110761f6c36 100644 --- a/PaddleNLP/unarchived/text_classification/infer.py +++ b/PaddleNLP/unarchived/text_classification/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import unittest diff --git a/PaddleNLP/unarchived/text_classification/nets.py b/PaddleNLP/unarchived/text_classification/nets.py index 4a7caad99f89ae6db0a748634a7c9b0d6632f2ec..4dc83aa432786ca393dde1f5e3039d3c83d5ff33 100644 --- a/PaddleNLP/unarchived/text_classification/nets.py +++ b/PaddleNLP/unarchived/text_classification/nets.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import numpy as np diff --git a/PaddleNLP/unarchived/text_classification/train.py b/PaddleNLP/unarchived/text_classification/train.py index a6978a15d2d58a91998b6941a438d804e3e0ee5e..363e8e704f3aaa2bca70abaa04b308da5207a08f 100644 --- a/PaddleNLP/unarchived/text_classification/train.py +++ b/PaddleNLP/unarchived/text_classification/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import six import sys diff --git a/PaddleNLP/unarchived/text_classification/utils.py b/PaddleNLP/unarchived/text_classification/utils.py index dce4743d9219aa9ed5ca78b9f690eb1366d92304..ecddc3cf41606064ef65f0bd0725e592337e38e5 100644 --- a/PaddleNLP/unarchived/text_classification/utils.py +++ b/PaddleNLP/unarchived/text_classification/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys import time diff --git a/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py b/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py index ccc37926b76c70b39e9ef0d28e168b1e56a61721..70c2ee06897116d71742a7eb86f3ea912a67dce4 100755 --- a/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py +++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function diff --git a/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py b/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py index 0d7b0bc14b1b6a56364a7aa5435880f71dc67785..7b60ec4823f845cdcd694ae6d1c617a0eeda1909 100644 --- a/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py +++ b/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py @@ -1 +1,14 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # Just for test `git push` diff --git a/PaddleNLP/unarchived/text_matching_on_quora/models/test.py b/PaddleNLP/unarchived/text_matching_on_quora/models/test.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..33ed0ecf10ec4cad807ebb6df1590de65eeeab1e 100644 --- a/PaddleNLP/unarchived/text_matching_on_quora/models/test.py +++ b/PaddleNLP/unarchived/text_matching_on_quora/models/test.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.