From 7e2a66b0baeff5edef0672c13b7439f597a5e087 Mon Sep 17 00:00:00 2001 From: Li Fuchen Date: Mon, 23 Sep 2019 16:48:27 +0800 Subject: [PATCH] add license for nlp models (#3390) add license for nlp models --- PaddleNLP/Research/ACL2018-DAM/config.py | 26 +++- PaddleNLP/Research/ACL2018-DAM/evaluation.py | 13 ++ PaddleNLP/Research/ACL2018-DAM/layers.py | 17 +- PaddleNLP/Research/ACL2018-DAM/main.py | 28 ++-- PaddleNLP/Research/ACL2018-DAM/net.py | 14 ++ PaddleNLP/Research/ACL2018-DAM/reader.py | 13 ++ PaddleNLP/Research/ACL2018-DAM/util.py | 15 +- .../src/paragraph_extraction.py | 32 ++-- .../utils/marco_tokenize_data.py | 20 ++- .../utils/marcov1_to_dureader.py | 13 ++ .../utils/marcov2_to_v1_tojsonl.py | 17 +- PaddleNLP/Research/ACL2019-JEMT/config.py | 13 ++ PaddleNLP/Research/ACL2019-JEMT/desc.py | 31 ++-- PaddleNLP/Research/ACL2019-JEMT/infer.py | 30 ++-- PaddleNLP/Research/ACL2019-JEMT/model.py | 106 +++++++------ PaddleNLP/Research/ACL2019-JEMT/reader.py | 22 ++- PaddleNLP/Research/ACL2019-JEMT/train.py | 85 +++++----- PaddleNLP/add_license.py | 50 ++++++ PaddleNLP/emotion_detection/config.py | 18 ++- PaddleNLP/emotion_detection/reader.py | 59 ++++--- PaddleNLP/emotion_detection/run_classifier.py | 132 +++++++++------- .../emotion_detection/run_ernie_classifier.py | 13 ++ PaddleNLP/emotion_detection/utils.py | 32 +++- .../ELMo/LAC_demo/network.py | 13 ++ .../ELMo/LAC_demo/reader.py | 13 ++ .../ELMo/LAC_demo/train.py | 13 ++ PaddleNLP/lexical_analysis/evaluate.py | 20 ++- PaddleNLP/lexical_analysis/reader.py | 51 ++++-- .../run_ernie_sequence_labeling.py | 14 +- .../lexical_analysis/run_sequence_labeling.py | 13 ++ PaddleNLP/lexical_analysis/utils.py | 16 +- PaddleNLP/models/classification/nets.py | 30 +++- PaddleNLP/models/matching/bow.py | 13 ++ PaddleNLP/models/matching/cnn.py | 17 +- PaddleNLP/models/matching/gru.py | 13 ++ .../models/matching/losses/hinge_loss.py | 13 ++ PaddleNLP/models/matching/losses/log_loss.py | 13 ++ .../losses/softmax_cross_entropy_loss.py | 13 ++ PaddleNLP/models/matching/lstm.py | 13 ++ PaddleNLP/models/matching/mm_dnn.py | 13 ++ .../matching/optimizers/paddle_optimizers.py | 18 ++- PaddleNLP/models/matching/paddle_layers.py | 84 ++++++---- .../transformer/desc.py | 13 ++ .../transformer/model.py | 13 ++ PaddleNLP/models/representation/ernie.py | 13 ++ PaddleNLP/models/sequence_labeling/nets.py | 16 +- PaddleNLP/models/transformer_encoder.py | 15 +- PaddleNLP/preprocess/ernie/task_reader.py | 26 +++- PaddleNLP/preprocess/padding.py | 13 ++ PaddleNLP/preprocess/tokenizer/reader.py | 23 ++- PaddleNLP/preprocess/tokenizer/tokenizer.py | 49 +++--- PaddleNLP/sentiment_classification/config.py | 17 +- PaddleNLP/sentiment_classification/reader.py | 37 +++-- .../run_classifier.py | 13 ++ .../run_ernie_classifier.py | 13 ++ PaddleNLP/sentiment_classification/utils.py | 26 +++- PaddleNLP/similarity_net/config.py | 19 ++- .../evaluate/unicom_compute_pos_neg.py | 13 ++ .../similarity_net/evaluate/unicom_split.py | 13 ++ PaddleNLP/similarity_net/reader.py | 146 +++++++++++++----- PaddleNLP/similarity_net/run_classifier.py | 21 ++- PaddleNLP/similarity_net/utils.py | 20 ++- PaddleNLP/unarchived/chinese_ner/infer.py | 13 ++ PaddleNLP/unarchived/chinese_ner/reader.py | 13 ++ PaddleNLP/unarchived/chinese_ner/train.py | 13 ++ .../deep_attention_matching_net/model.py | 13 ++ .../test_and_evaluate.py | 13 ++ .../train_and_evaluate.py | 13 ++ .../utils/douban_evaluation.py | 13 ++ .../utils/evaluation.py | 13 ++ .../utils/layers.py | 13 ++ .../utils/reader.py | 16 +- .../deep_attention_matching_net/utils/util.py | 13 ++ .../unarchived/language_model/gru/infer.py | 13 ++ .../unarchived/language_model/gru/train.py | 13 ++ .../language_model/gru/train_on_cloud.py | 13 ++ .../unarchived/language_model/gru/utils.py | 13 ++ .../paragraph_extraction.py | 13 ++ .../utils/marco_tokenize_data.py | 13 ++ .../utils/marcov1_to_dureader.py | 13 ++ .../utils/marcov2_to_v1_tojsonl.py | 13 ++ .../rnn_search/attention_model.py | 13 ++ .../transformer/config.py | 13 ++ .../transformer/infer.py | 13 ++ .../transformer/model.py | 13 ++ .../transformer/optim.py | 13 ++ .../transformer/profile.py | 13 ++ .../transformer/reader.py | 13 ++ .../transformer/train.py | 13 ++ .../sequence_tagging_for_ner/infer.py | 13 ++ .../sequence_tagging_for_ner/network_conf.py | 13 ++ .../sequence_tagging_for_ner/reader.py | 13 ++ .../sequence_tagging_for_ner/train.py | 13 ++ .../sequence_tagging_for_ner/utils.py | 13 ++ .../sequence_tagging_for_ner/utils_extend.py | 13 ++ .../clouds/scdb_parallel_executor.py | 13 ++ .../clouds/scdb_single_card.py | 13 ++ .../unarchived/text_classification/infer.py | 13 ++ .../unarchived/text_classification/nets.py | 13 ++ .../unarchived/text_classification/train.py | 13 ++ .../unarchived/text_classification/utils.py | 13 ++ .../configs/basic_config.py | 13 ++ .../text_matching_on_quora/models/pwim.py | 13 ++ .../text_matching_on_quora/models/test.py | 13 ++ 104 files changed, 1892 insertions(+), 368 deletions(-) create mode 100644 PaddleNLP/add_license.py diff --git a/PaddleNLP/Research/ACL2018-DAM/config.py b/PaddleNLP/Research/ACL2018-DAM/config.py index 6b9c6649..4f185ef7 100644 --- a/PaddleNLP/Research/ACL2018-DAM/config.py +++ b/PaddleNLP/Research/ACL2018-DAM/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Attention Matching Network """ @@ -5,6 +18,7 @@ Deep Attention Matching Network import argparse import six + def parse_args(): """ Deep Attention Matching Network Config @@ -12,14 +26,14 @@ def parse_args(): parser = argparse.ArgumentParser("DAM Config") parser.add_argument( - '--do_train', - type=bool, - default=False, + '--do_train', + type=bool, + default=False, help='Whether to perform training.') parser.add_argument( - '--do_test', - type=bool, - default=False, + '--do_test', + type=bool, + default=False, help='Whether to perform training.') parser.add_argument( diff --git a/PaddleNLP/Research/ACL2018-DAM/evaluation.py b/PaddleNLP/Research/ACL2018-DAM/evaluation.py index ad0bbc0f..997bab94 100755 --- a/PaddleNLP/Research/ACL2018-DAM/evaluation.py +++ b/PaddleNLP/Research/ACL2018-DAM/evaluation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Evaluation """ diff --git a/PaddleNLP/Research/ACL2018-DAM/layers.py b/PaddleNLP/Research/ACL2018-DAM/layers.py index 3d4ab5dc..66a7734b 100755 --- a/PaddleNLP/Research/ACL2018-DAM/layers.py +++ b/PaddleNLP/Research/ACL2018-DAM/layers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Layers """ @@ -77,7 +90,7 @@ def dot_product_attention(query, """ logits = fluid.layers.matmul( - x=query, y=key, transpose_y=True, alpha=d_key ** (-0.5)) + x=query, y=key, transpose_y=True, alpha=d_key**(-0.5)) if (q_mask is not None) and (k_mask is not None): if mask_cache is not None and q_mask.name in mask_cache and k_mask.name in mask_cache[ @@ -87,7 +100,7 @@ def dot_product_attention(query, mask = fluid.layers.matmul(x=q_mask, y=k_mask, transpose_y=True) another_mask = fluid.layers.scale( mask, - scale=float(2 ** 32 - 1), + scale=float(2**32 - 1), bias=float(-1), bias_after_scale=False) if mask_cache is not None: diff --git a/PaddleNLP/Research/ACL2018-DAM/main.py b/PaddleNLP/Research/ACL2018-DAM/main.py index 8d01d8cf..950ea49a 100755 --- a/PaddleNLP/Research/ACL2018-DAM/main.py +++ b/PaddleNLP/Research/ACL2018-DAM/main.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Attention Matching Network """ @@ -174,9 +187,8 @@ def train(args): print("device count %d" % dev_count) print("theoretical memory usage: ") - print( - fluid.contrib.memory_usage( - program=train_program, batch_size=args.batch_size)) + print(fluid.contrib.memory_usage( + program=train_program, batch_size=args.batch_size)) exe = fluid.Executor(place) exe.run(train_startup) @@ -247,9 +259,8 @@ def train(args): if (args.save_path is not None) and (step % save_step == 0): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) - print( - time.strftime('%Y-%m-%d %H:%M:%S', - time.localtime(time.time()))) + print(time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, 'score.' + str(step)) @@ -294,9 +305,8 @@ def train(args): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) - print( - time.strftime('%Y-%m-%d %H:%M:%S', - time.localtime(time.time()))) + print(time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, diff --git a/PaddleNLP/Research/ACL2018-DAM/net.py b/PaddleNLP/Research/ACL2018-DAM/net.py index 240b1493..9db151e0 100755 --- a/PaddleNLP/Research/ACL2018-DAM/net.py +++ b/PaddleNLP/Research/ACL2018-DAM/net.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Deep Attention Matching Network """ @@ -12,6 +25,7 @@ class Net(object): """ Deep attention matching network """ + def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size, stack_num, channel1_num, channel2_num): """ diff --git a/PaddleNLP/Research/ACL2018-DAM/reader.py b/PaddleNLP/Research/ACL2018-DAM/reader.py index 6a2653c2..d446f6ac 100755 --- a/PaddleNLP/Research/ACL2018-DAM/reader.py +++ b/PaddleNLP/Research/ACL2018-DAM/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Reader for deep attention matching network """ diff --git a/PaddleNLP/Research/ACL2018-DAM/util.py b/PaddleNLP/Research/ACL2018-DAM/util.py index 88910c9a..a604fffe 100755 --- a/PaddleNLP/Research/ACL2018-DAM/util.py +++ b/PaddleNLP/Research/ACL2018-DAM/util.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Utils """ @@ -20,7 +33,7 @@ def mkdir(path): """ Mkdir """ - if not os.path.isdir(path): + if not os.path.isdir(path): if os.path.split(path)[0]: mkdir(os.path.split(path)[0]) else: diff --git a/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py b/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py index 0267eb1f..d813ffce 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py +++ b/PaddleNLP/Research/ACL2018-DuReader/src/paragraph_extraction.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #!/usr/bin/python #-*- coding:utf-8 -*- @@ -25,9 +38,8 @@ def compute_paragraph_score(sample): doc['segmented_paragraphs_scores'] = [] for p_idx, para_tokens in enumerate(doc['segmented_paragraphs']): if len(question) > 0: - related_score = metric_max_over_ground_truths(f1_score, - para_tokens, - [question]) + related_score = metric_max_over_ground_truths( + f1_score, para_tokens, [question]) else: related_score = 0.0 doc['segmented_paragraphs_scores'].append(related_score) @@ -63,7 +75,7 @@ def dup_remove(doc): prev_del_num = 0 del_num = 0 for p_idx in del_ids: - if p_idx < para_id: + if p_idx < para_id: prev_del_num += 1 del doc["segmented_paragraphs"][p_idx - del_num] del doc["segmented_paragraphs_scores"][p_idx - del_num] @@ -142,7 +154,8 @@ def paragraph_selection(sample, mode): para_infos = [] for p_idx, (para_tokens, para_scores) in \ enumerate(zip(doc['segmented_paragraphs'], doc['segmented_paragraphs_scores'])): - para_infos.append((para_tokens, para_scores, len(para_tokens), p_idx)) + para_infos.append( + (para_tokens, para_scores, len(para_tokens), p_idx)) para_infos.sort(key=lambda x: (-x[1], x[2])) topN_idx = [] for para_info in para_infos[:topN]: @@ -158,7 +171,7 @@ def paragraph_selection(sample, mode): break if doc_id == d_idx and id == para_id and mode == "train": continue - total_len += 1 + doc['paragraphs_length'][id] + total_len += 1 + doc['paragraphs_length'][id] final_idx.append(id) total_segmented_content = copy.deepcopy(segmented_title) final_idx.sort() @@ -168,7 +181,8 @@ def paragraph_selection(sample, mode): incre_len += 1 + doc['paragraphs_length'][id] if doc_id == d_idx and id == para_id: incre_len += 1 - total_segmented_content += [splitter] + doc['segmented_paragraphs'][id] + total_segmented_content += [splitter] + doc['segmented_paragraphs'][ + id] if doc_id == d_idx: answer_start = incre_len + sample['answer_spans'][0][0] answer_end = incre_len + sample['answer_spans'][0][1] @@ -191,9 +205,9 @@ if __name__ == "__main__": try: sample = json.loads(line, encoding='utf8') except: - print >>sys.stderr, "Invalid input json format - '{}' will be ignored".format(line) + print >> sys.stderr, "Invalid input json format - '{}' will be ignored".format( + line) continue compute_paragraph_score(sample) paragraph_selection(sample, mode) print(json.dumps(sample, encoding='utf8', ensure_ascii=False)) - diff --git a/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py b/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py index 7273e704..38e56b5e 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py +++ b/PaddleNLP/Research/ACL2018-DuReader/utils/marco_tokenize_data.py @@ -1,8 +1,22 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import os, sys, json import nltk + def _nltk_tokenize(sequence): tokens = nltk.word_tokenize(sequence) @@ -11,10 +25,12 @@ def _nltk_tokenize(sequence): token_words = [] for token in tokens: cur_char_offset = sequence.find(token, cur_char_offset) - token_offsets.append([cur_char_offset, cur_char_offset + len(token) - 1]) + token_offsets.append( + [cur_char_offset, cur_char_offset + len(token) - 1]) token_words.append(token) return token_offsets, token_words + def segment(input_js): _, input_js['segmented_question'] = _nltk_tokenize(input_js['question']) for doc_id, doc in enumerate(input_js['documents']): @@ -36,7 +52,7 @@ if __name__ == '__main__': exit() nltk.download('punkt') - + for line in open(sys.argv[1]): dureader_js = json.loads(line.strip()) segment(dureader_js) diff --git a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py index 022db4dd..83384482 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py +++ b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov1_to_dureader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import sys diff --git a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py index c301e12f..5b102200 100644 --- a/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py +++ b/PaddleNLP/Research/ACL2018-DuReader/utils/marcov2_to_v1_tojsonl.py @@ -1,6 +1,19 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import json -import pandas as pd +import pandas as pd if __name__ == '__main__': if len(sys.argv) != 3: @@ -11,4 +24,4 @@ if __name__ == '__main__': df = pd.read_json(infile) with open(outfile, 'w') as f: for row in df.iterrows(): - f.write(row[1].to_json() + '\n') \ No newline at end of file + f.write(row[1].to_json() + '\n') diff --git a/PaddleNLP/Research/ACL2019-JEMT/config.py b/PaddleNLP/Research/ACL2019-JEMT/config.py index d56fe2f8..920fdc34 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/config.py +++ b/PaddleNLP/Research/ACL2019-JEMT/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class TrainTaskConfig(object): # support both CPU and GPU now. use_gpu = True diff --git a/PaddleNLP/Research/ACL2019-JEMT/desc.py b/PaddleNLP/Research/ACL2019-JEMT/desc.py index 857ef02a..07326bf4 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/desc.py +++ b/PaddleNLP/Research/ACL2019-JEMT/desc.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # The placeholder for batch_size in compile time. Must be -1 currently to be # consistent with some ops' infer-shape output in compile time, such as the # sequence_expand op used in beamsearch decoder. @@ -65,43 +78,37 @@ input_descs = { # Names of word embedding table which might be reused for weight sharing. word_emb_param_names = ( "src_word_emb_table", - "trg_word_emb_table", -) + "trg_word_emb_table", ) phone_emb_param_name = "phone_emb_table" # Names of position encoding table which will be initialized externally. pos_enc_param_names = ( "src_pos_enc_table", - "trg_pos_enc_table", -) + "trg_pos_enc_table", ) # separated inputs for different usages. encoder_data_input_fields = ( "src_word", "src_pos", "src_slf_attn_bias", "src_phone", - "src_phone_mask", -) + "src_phone_mask", ) decoder_data_input_fields = ( "trg_word", "trg_pos", "trg_slf_attn_bias", "trg_src_attn_bias", - "enc_output", -) + "enc_output", ) label_data_input_fields = ( "lbl_word", - "lbl_weight", -) + "lbl_weight", ) # In fast decoder, trg_pos (only containing the current time step) is generated # by ops and trg_slf_attn_bias is not needed. fast_decoder_data_input_fields = ( "trg_word", "init_score", "init_idx", - "trg_src_attn_bias", -) + "trg_src_attn_bias", ) # Set seed for CE dropout_seed = None diff --git a/PaddleNLP/Research/ACL2019-JEMT/infer.py b/PaddleNLP/Research/ACL2019-JEMT/infer.py index 15c6dc18..08d1c7d8 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/infer.py +++ b/PaddleNLP/Research/ACL2019-JEMT/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import multiprocessing @@ -86,10 +99,8 @@ def parse_args(): trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath) phone_dict = reader.DataReader.load_dict(args.phoneme_vocab_fpath) dict_args = [ - "src_vocab_size", - str(len(src_dict)), "trg_vocab_size", - str(len(trg_dict)), "phone_vocab_size", - str(len(phone_dict)), "bos_idx", + "src_vocab_size", str(len(src_dict)), "trg_vocab_size", + str(len(trg_dict)), "phone_vocab_size", str(len(phone_dict)), "bos_idx", str(src_dict[args.special_token[0]]), "eos_idx", str(src_dict[args.special_token[1]]), "unk_idx", str(src_dict[args.special_token[2]]) @@ -147,10 +158,10 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, phone_pad_idx, # beamsearch_op must use tensors with lod init_score = to_lodtensor( - np.zeros_like(trg_word, dtype="float32").reshape(-1, 1), place, - [range(trg_word.shape[0] + 1)] * 2) - trg_word = to_lodtensor(trg_word, place, - [range(trg_word.shape[0] + 1)] * 2) + np.zeros_like( + trg_word, dtype="float32").reshape(-1, 1), + place, [range(trg_word.shape[0] + 1)] * 2) + trg_word = to_lodtensor(trg_word, place, [range(trg_word.shape[0] + 1)] * 2) init_idx = np.asarray(range(len(insts)), dtype="int32") data_input_dict = dict( @@ -315,7 +326,8 @@ def fast_infer(args): sub_start = seq_ids.lod()[1][start + j] sub_end = seq_ids.lod()[1][start + j + 1] hyps[i].append(" ".join([ - trg_idx2word[idx] for idx in post_process_seq( + trg_idx2word[idx] + for idx in post_process_seq( np.array(seq_ids)[sub_start:sub_end]) ])) scores[i].append(np.array(seq_scores)[sub_end - 1]) diff --git a/PaddleNLP/Research/ACL2019-JEMT/model.py b/PaddleNLP/Research/ACL2019-JEMT/model.py index c0a9c375..83e8760a 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/model.py +++ b/PaddleNLP/Research/ACL2019-JEMT/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from functools import partial import numpy as np @@ -51,12 +64,12 @@ def position_encoding_init(n_position, d_pos_vec): channels = d_pos_vec position = np.arange(n_position) num_timescales = channels // 2 - log_timescale_increment = ( - np.log(float(1e4) / float(1)) / (num_timescales - 1)) - inv_timescales = np.exp( - np.arange(num_timescales)) * -log_timescale_increment - scaled_time = np.expand_dims(position, 1) * np.expand_dims( - inv_timescales, 0) + log_timescale_increment = (np.log(float(1e4) / float(1)) / + (num_timescales - 1)) + inv_timescales = np.exp(np.arange( + num_timescales)) * -log_timescale_increment + scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales, + 0) signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1) signal = np.pad(signal, [[0, 0], [0, np.mod(channels, 2)]], 'constant') position_enc = signal @@ -91,17 +104,15 @@ def multi_head_attention(queries, """ Add linear projection to queries, keys, and values. """ - q = layers.fc( - input=queries, - size=d_key * n_head, - bias_attr=False, - num_flatten_dims=2) + q = layers.fc(input=queries, + size=d_key * n_head, + bias_attr=False, + num_flatten_dims=2) # For encoder-decoder attention in inference, insert the ops and vars # into global block to use as cache among beam search. fc_layer = wrap_layer_with_block( - layers.fc, - fluid.default_main_program().current_block(). - parent_idx) if cache is not None and static_kv else layers.fc + layers.fc, fluid.default_main_program().current_block( + ).parent_idx) if cache is not None and static_kv else layers.fc k = fc_layer( input=keys, size=d_key * n_head, @@ -132,12 +143,12 @@ def multi_head_attention(queries, # into global block to use as cache among beam search. reshape_layer = wrap_layer_with_block( layers.reshape, - fluid.default_main_program().current_block(). - parent_idx) if cache is not None and static_kv else layers.reshape + fluid.default_main_program().current_block( + ).parent_idx) if cache is not None and static_kv else layers.reshape transpose_layer = wrap_layer_with_block( layers.transpose, - fluid.default_main_program().current_block().parent_idx - ) if cache is not None and static_kv else layers.transpose + fluid.default_main_program().current_block(). + parent_idx) if cache is not None and static_kv else layers.transpose reshaped_k = reshape_layer( x=keys, shape=[0, 0, n_head, d_key], inplace=True) k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3]) @@ -214,8 +225,10 @@ def multi_head_attention(queries, out = __combine_heads(ctx_multiheads) # Project back to the model size. - proj_out = layers.fc( - input=out, size=d_model, bias_attr=False, num_flatten_dims=2) + proj_out = layers.fc(input=out, + size=d_model, + bias_attr=False, + num_flatten_dims=2) return proj_out @@ -225,14 +238,13 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate): This module consists of two linear transformations with a ReLU activation in between, which is applied to each position separately and identically. """ - hidden = layers.fc( - input=x, size=d_inner_hid, num_flatten_dims=2, act="relu") + hidden = layers.fc(input=x, + size=d_inner_hid, + num_flatten_dims=2, + act="relu") if dropout_rate: hidden = layers.dropout( - hidden, - dropout_prob=dropout_rate, - seed=dropout_seed, - is_test=False) + hidden, dropout_prob=dropout_rate, seed=dropout_seed, is_test=False) out = layers.fc(input=hidden, size=d_hid, num_flatten_dims=2) return out @@ -313,8 +325,7 @@ def prepare_encoder(src_word, param_attr=fluid.ParamAttr( name=pos_enc_param_names[0], trainable=False)) src_pos_enc.stop_gradient = True - enc_input = ( - 1 - beta) * src_word_emb + beta * mean_phone_emb + src_pos_enc + enc_input = (1 - beta) * src_word_emb + beta * mean_phone_emb + src_pos_enc return layers.dropout( enc_input, dropout_prob=dropout_rate, seed=dropout_seed, is_test=False) if dropout_rate else enc_input @@ -374,8 +385,8 @@ def encoder_layer(enc_input, """ attn_output = multi_head_attention( pre_process_layer(enc_input, preprocess_cmd, - prepostprocess_dropout), None, None, attn_bias, - d_key, d_value, d_model, n_head, attention_dropout) + prepostprocess_dropout), None, None, attn_bias, d_key, + d_value, d_model, n_head, attention_dropout) attn_output = post_process_layer(enc_input, attn_output, postprocess_cmd, prepostprocess_dropout) ffd_output = positionwise_feed_forward( @@ -415,8 +426,7 @@ def encoder(enc_input, attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd, - ) + postprocess_cmd, ) enc_input = enc_output enc_output = pre_process_layer(enc_output, preprocess_cmd, prepostprocess_dropout) @@ -459,8 +469,7 @@ def decoder_layer(dec_input, dec_input, slf_attn_output, postprocess_cmd, - prepostprocess_dropout, - ) + prepostprocess_dropout, ) enc_attn_output = multi_head_attention( pre_process_layer(slf_attn_output, preprocess_cmd, prepostprocess_dropout), @@ -479,21 +488,18 @@ def decoder_layer(dec_input, slf_attn_output, enc_attn_output, postprocess_cmd, - prepostprocess_dropout, - ) + prepostprocess_dropout, ) ffd_output = positionwise_feed_forward( pre_process_layer(enc_attn_output, preprocess_cmd, prepostprocess_dropout), d_inner_hid, d_model, - relu_dropout, - ) + relu_dropout, ) dec_output = post_process_layer( enc_attn_output, ffd_output, postprocess_cmd, - prepostprocess_dropout, - ) + prepostprocess_dropout, ) return dec_output @@ -632,8 +638,7 @@ def transformer(src_vocab_size, postprocess_cmd, weight_sharing, beta, - enc_inputs, - ) + enc_inputs, ) predict = wrap_decoder( trg_vocab_size, @@ -651,14 +656,14 @@ def transformer(src_vocab_size, postprocess_cmd, weight_sharing, dec_inputs, - enc_output, - ) + enc_output, ) # Padding index do not contribute to the total loss. The weights is used to # cancel padding index in calculating the loss. if label_smooth_eps: label = layers.label_smooth( - label=layers.one_hot(input=label, depth=trg_vocab_size), + label=layers.one_hot( + input=label, depth=trg_vocab_size), epsilon=label_smooth_eps) cost = layers.softmax_with_cross_entropy( @@ -730,8 +735,7 @@ def wrap_encoder(src_vocab_size, attention_dropout, relu_dropout, preprocess_cmd, - postprocess_cmd, - ) + postprocess_cmd, ) return enc_output @@ -803,8 +807,9 @@ def wrap_decoder(trg_vocab_size, word_emb_param_names[0]), transpose_y=True) else: - predict = layers.fc( - input=dec_output, size=trg_vocab_size, bias_attr=False) + predict = layers.fc(input=dec_output, + size=trg_vocab_size, + bias_attr=False) if dec_inputs is None: # Return probs for independent decoder program. predict = layers.softmax(predict) @@ -879,8 +884,7 @@ def fast_decode(src_vocab_size, force_cpu=True) step_idx = layers.fill_constant( shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) - cond = layers.less_than( - x=step_idx, y=max_len) # default force_cpu=True + cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write( diff --git a/PaddleNLP/Research/ACL2019-JEMT/reader.py b/PaddleNLP/Research/ACL2019-JEMT/reader.py index 26a486c8..e6cf619c 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/reader.py +++ b/PaddleNLP/Research/ACL2019-JEMT/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import glob import six import os @@ -302,9 +315,8 @@ class DataReader(object): f = tarfile.open(fpaths[0], "r") for line in f.extractfile(tar_fname): fields = line.strip("\n").split(self._field_delimiter) - if (not self._only_src - and len(fields) == 2) or (self._only_src - and len(fields) == 1): + if (not self._only_src and len(fields) == 2) or ( + self._only_src and len(fields) == 1): yield fields else: for fpath in fpaths: @@ -381,5 +393,5 @@ class DataReader(object): for idx in batch_ids] else: yield [(self._src_seq_ids[idx], self._src_phone_ids[idx], - self._trg_seq_ids[idx][:-1], - self._trg_seq_ids[idx][1:]) for idx in batch_ids] + self._trg_seq_ids[idx][:-1], self._trg_seq_ids[idx][1:]) + for idx in batch_ids] diff --git a/PaddleNLP/Research/ACL2019-JEMT/train.py b/PaddleNLP/Research/ACL2019-JEMT/train.py index d33b0c1d..03afbce2 100644 --- a/PaddleNLP/Research/ACL2019-JEMT/train.py +++ b/PaddleNLP/Research/ACL2019-JEMT/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import copy @@ -141,10 +154,8 @@ def parse_args(): trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath) phone_dict = reader.DataReader.load_dict(args.phoneme_vocab_fpath) dict_args = [ - "src_vocab_size", - str(len(src_dict)), "trg_vocab_size", - str(len(trg_dict)), "phone_vocab_size", - str(len(phone_dict)), "bos_idx", + "src_vocab_size", str(len(src_dict)), "trg_vocab_size", + str(len(trg_dict)), "phone_vocab_size", str(len(phone_dict)), "bos_idx", str(src_dict[args.special_token[0]]), "eos_idx", str(src_dict[args.special_token[1]]), "unk_idx", str(src_dict[args.special_token[2]]) @@ -157,8 +168,8 @@ def parse_args(): def append_nccl2_prepare(startup_prog, trainer_id, worker_endpoints, current_endpoint): - assert (trainer_id >= 0 and len(worker_endpoints) > 1 - and current_endpoint in worker_endpoints) + assert (trainer_id >= 0 and len(worker_endpoints) > 1 and + current_endpoint in worker_endpoints) eps = copy.deepcopy(worker_endpoints) eps.remove(current_endpoint) nccl_id_var = startup_prog.global_block().create_var( @@ -189,8 +200,8 @@ def pad_phoneme_data(phoneme_seqs, pad_idx, max_seq_len): batch_size = len(phoneme_seqs) phoneme_data = pad_idx * np.ones( (batch_size, max_seq_len, max_ph_seq_len), dtype=np.int64) - phoneme_mask = np.zeros((batch_size, max_seq_len, max_ph_seq_len), - dtype=np.int64) + phoneme_mask = np.zeros( + (batch_size, max_seq_len, max_ph_seq_len), dtype=np.int64) for i in range(batch_size): cur_ph_seq = phoneme_seqs[i] @@ -237,17 +248,16 @@ def pad_batch_data(insts, if is_target: # This is used to avoid attention on paddings and subsequent # words. - slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, - max_len)) + slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, max_len)) slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape([-1, 1, max_len, max_len]) slf_attn_bias_data = np.tile(slf_attn_bias_data, [1, n_head, 1, 1]) * [-1e9] else: # This is used to avoid attention on paddings. - slf_attn_bias_data = np.array( - [[0] * len(inst) + [-1e9] * (max_len - len(inst)) - for inst in insts]) + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) slf_attn_bias_data = np.tile( slf_attn_bias_data.reshape([-1, 1, 1, max_len]), [1, n_head, max_len, 1]) @@ -359,8 +369,8 @@ def prepare_data_generator(args, for item in data_reader(): inst_num_per_part = len(item) // count for i in range(count): - yield item[inst_num_per_part * i:inst_num_per_part * - (i + 1)] + yield item[inst_num_per_part * i:inst_num_per_part * (i + 1 + )] return __impl__ @@ -401,8 +411,8 @@ def prepare_feed_dict_list(data_generator, init_flag, count): feed_dict_list.append(pos_enc_tables) else: feed_dict_list[idx] = dict( - list(pos_enc_tables.items()) + - list(feed_dict_list[idx].items())) + list(pos_enc_tables.items()) + list(feed_dict_list[idx] + .items())) return feed_dict_list if len(feed_dict_list) == count else None @@ -487,11 +497,10 @@ def test_context(exe, train_exe, dev_count): data_generator = test_data() while True: try: - feed_dict_list = prepare_feed_dict_list( - data_generator, False, dev_count) - outs = test_exe.run( - fetch_list=[sum_cost.name, token_num.name], - feed=feed_dict_list) + feed_dict_list = prepare_feed_dict_list(data_generator, False, + dev_count) + outs = test_exe.run(fetch_list=[sum_cost.name, token_num.name], + feed=feed_dict_list) except (StopIteration, fluid.core.EOFException): # The current pass is over. if args.use_py_reader: @@ -562,10 +571,10 @@ def train_loop(exe, # the best cross-entropy value with label smoothing loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log( - (1. - TrainTaskConfig.label_smooth_eps)) + - TrainTaskConfig.label_smooth_eps * - np.log(TrainTaskConfig.label_smooth_eps / - (ModelHyperParams.trg_vocab_size - 1) + 1e-20)) + (1. - TrainTaskConfig.label_smooth_eps + )) + TrainTaskConfig.label_smooth_eps * + np.log(TrainTaskConfig.label_smooth_eps / ( + ModelHyperParams.trg_vocab_size - 1) + 1e-20)) step_idx = 0 init_flag = True @@ -583,8 +592,8 @@ def train_loop(exe, batch_id = 0 while True: try: - feed_dict_list = prepare_feed_dict_list( - data_generator, init_flag, dev_count) + feed_dict_list = prepare_feed_dict_list(data_generator, + init_flag, dev_count) outs = train_exe.run( fetch_list=[sum_cost.name, token_num.name] if step_idx % args.fetch_steps == 0 else [], @@ -609,12 +618,11 @@ def train_loop(exe, else: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " - "normalized loss: %f, ppl: %f, speed: %.2f step/s" - % (step_idx, pass_id, batch_id, total_avg_cost, - total_avg_cost - loss_normalizer, - np.exp([min(total_avg_cost, 100) - ]), args.fetch_steps / - (time.time() - avg_batch_time))) + "normalized loss: %f, ppl: %f, speed: %.2f step/s" % + (step_idx, pass_id, batch_id, total_avg_cost, + total_avg_cost - loss_normalizer, np.exp( + [min(total_avg_cost, 100)]), + args.fetch_steps / (time.time() - avg_batch_time))) avg_batch_time = time.time() if step_idx % TrainTaskConfig.save_freq == 0 and step_idx > 0: @@ -643,8 +651,9 @@ def train_loop(exe, val_avg_cost, val_ppl = test() logging.info( "epoch: %d, val avg loss: %f, val normalized loss: %f, val ppl: %f," - " consumed %fs" % (pass_id, val_avg_cost, val_avg_cost - - loss_normalizer, val_ppl, time_consumed)) + " consumed %fs" % (pass_id, val_avg_cost, + val_avg_cost - loss_normalizer, val_ppl, + time_consumed)) else: logging.info("epoch: %d, consumed %fs" % (pass_id, time_consumed)) if not args.enable_ce: @@ -734,8 +743,8 @@ def train(args): if args.local: logging.info("local start_up:") - train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, - avg_cost, token_num, predict, pyreader) + train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, avg_cost, + token_num, predict, pyreader) else: if args.update_method == "nccl2": trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) diff --git a/PaddleNLP/add_license.py b/PaddleNLP/add_license.py new file mode 100644 index 00000000..436cf0e0 --- /dev/null +++ b/PaddleNLP/add_license.py @@ -0,0 +1,50 @@ +import os + +filePath = os.getcwd() + + +def get_all_files(dir): + fileDirList = [] + + for root, dirs, files in os.walk(dir): + for file in files: + file_path = os.path.join(root, file) + fileDirList.append(file_path) + for dir in dirs: + dir_path = os.path.join(root, dir) + get_all_files(dir_path) + + return fileDirList + + +fileDirList = get_all_files(filePath) +for code in fileDirList: + split = os.path.splitext(code) + if (split[1] == '.py' and not '__init__' in split[0] and + not '_ce' in split[0]): + + with open(code, 'r') as fz: + content = fz.read() + if content.find('Copyright') >= 0: + fz.close() + continue + else: + string = "# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.\n" \ + "#\n" \ + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n" \ + "# you may not use this file except in compliance with the License.\n" \ + "# You may obtain a copy of the License at\n" \ + "#\n" \ + "# http://www.apache.org/licenses/LICENSE-2.0\n" \ + "#\n" \ + "# Unless required by applicable law or agreed to in writing, software\n" \ + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n" \ + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \ + "# See the License for the specific language governing permissions and\n" \ + "# limitations under the License.\n"+content + fz.close() + with open(code, 'w') as f: + f.write(string) + print "file %s write success!" % code + f.close() +print "read and write success!" diff --git a/PaddleNLP/emotion_detection/config.py b/PaddleNLP/emotion_detection/config.py index 1d8f4156..f21d60b9 100644 --- a/PaddleNLP/emotion_detection/config.py +++ b/PaddleNLP/emotion_detection/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ EmoTect config """ @@ -9,10 +22,12 @@ from __future__ import print_function import six import json + class EmoTectConfig(object): """ EmoTect Config """ + def __init__(self, config_path): self._config_dict = self._parse(config_path) @@ -21,7 +36,8 @@ class EmoTectConfig(object): with open(config_path) as json_file: config_dict = json.load(json_file) except Exception: - raise IOError("Error in parsing emotect model config file '%s'" % config_path) + raise IOError("Error in parsing emotect model config file '%s'" % + config_path) else: return config_dict diff --git a/PaddleNLP/emotion_detection/reader.py b/PaddleNLP/emotion_detection/reader.py index 75a1be57..197827d8 100644 --- a/PaddleNLP/emotion_detection/reader.py +++ b/PaddleNLP/emotion_detection/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ EmoTect Reader, data converters for classification data. """ @@ -10,14 +23,13 @@ import numpy as np from utils import load_vocab from utils import data_reader + class EmoTectProcessor(object): """ Processor class for data convertors for EmoTect. """ - def __init__(self, - data_dir, - vocab_path, - random_seed=None): + + def __init__(self, data_dir, vocab_path, random_seed=None): self.data_dir = data_dir self.vocab = load_vocab(vocab_path) self.num_examples = {"train": -1, "dev": -1, "test": -1, "infer": -1} @@ -27,29 +39,33 @@ class EmoTectProcessor(object): """ Load training examples """ - return data_reader(os.path.join(self.data_dir, "train.tsv"), - self.vocab, self.num_examples, "train", epoch) + return data_reader( + os.path.join(self.data_dir, "train.tsv"), self.vocab, + self.num_examples, "train", epoch) def get_dev_examples(self, data_dir): """ Load dev examples """ - return data_reader(os.path.join(self.data_dir, "dev.tsv"), - self.vocab, self.num_examples, "dev") + return data_reader( + os.path.join(self.data_dir, "dev.tsv"), self.vocab, + self.num_examples, "dev") def get_test_examples(self, data_dir): """ Load test examples """ - return data_reader(os.path.join(self.data_dir, "test.tsv"), - self.vocab, self.num_examples, "test") + return data_reader( + os.path.join(self.data_dir, "test.tsv"), self.vocab, + self.num_examples, "test") def get_infer_examples(self, data_dir): """ Load infer querys """ - return data_reader(os.path.join(self.data_dir, "infer.tsv"), - self.vocab, self.num_examples, "infer") + return data_reader( + os.path.join(self.data_dir, "infer.tsv"), self.vocab, + self.num_examples, "infer") def get_labels(self): """ @@ -63,7 +79,8 @@ class EmoTectProcessor(object): """ if phase not in ['train', 'dev', 'test', 'infer']: raise ValueError( - "Unknown phase, which should be in ['train', 'dev', 'test', 'infer'].") + "Unknown phase, which should be in ['train', 'dev', 'test', 'infer']." + ) return self.num_examples[phase] def get_train_progress(self): @@ -77,14 +94,18 @@ class EmoTectProcessor(object): Generate data for train, dev or test """ if phase == "train": - return paddle.batch(self.get_train_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_train_examples(self.data_dir, epoch), batch_size) elif phase == "dev": - return paddle.batch(self.get_dev_examples(self.data_dir), batch_size) + return paddle.batch( + self.get_dev_examples(self.data_dir), batch_size) elif phase == "test": - return paddle.batch(self.get_test_examples(self.data_dir), batch_size) + return paddle.batch( + self.get_test_examples(self.data_dir), batch_size) elif phase == "infer": - return paddle.batch(self.get_infer_examples(self.data_dir), batch_size) + return paddle.batch( + self.get_infer_examples(self.data_dir), batch_size) else: raise ValueError( - "Unknown phase, which should be in ['train', 'dev', 'test', 'infer'].") - + "Unknown phase, which should be in ['train', 'dev', 'test', 'infer']." + ) diff --git a/PaddleNLP/emotion_detection/run_classifier.py b/PaddleNLP/emotion_detection/run_classifier.py index 6bdf9812..4dca35a8 100644 --- a/PaddleNLP/emotion_detection/run_classifier.py +++ b/PaddleNLP/emotion_detection/run_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Emotion Detection Task """ @@ -25,37 +38,48 @@ import utils parser = argparse.ArgumentParser(__doc__) model_g = utils.ArgumentGroup(parser, "model", "model configuration and paths.") -model_g.add_arg("config_path", str, None, "Path to the json file for EmoTect model config.") -model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") +model_g.add_arg("config_path", str, None, + "Path to the json file for EmoTect model config.") +model_g.add_arg("init_checkpoint", str, None, + "Init checkpoint to resume training from.") model_g.add_arg("output_dir", str, None, "Directory path to save checkpoints") train_g = utils.ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 10, "Number of epoches for training.") -train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.") -train_g.add_arg("validation_steps", int, 1000, "The steps interval to evaluate model performance.") +train_g.add_arg("save_steps", int, 10000, + "The steps interval to save checkpoints.") +train_g.add_arg("validation_steps", int, 1000, + "The steps interval to evaluate model performance.") train_g.add_arg("lr", float, 0.002, "The Learning rate value for training.") log_g = utils.ArgumentGroup(parser, "logging", "logging related") log_g.add_arg("skip_steps", int, 10, "The steps interval to print loss.") log_g.add_arg("verbose", bool, False, "Whether to output verbose log") -data_g = utils.ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options") +data_g = utils.ArgumentGroup( + parser, "data", "Data paths, vocab paths and data processing options") data_g.add_arg("data_dir", str, None, "Directory path to training data.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") -data_g.add_arg("batch_size", int, 256, "Total examples' number in batch for training.") +data_g.add_arg("batch_size", int, 256, + "Total examples' number in batch for training.") data_g.add_arg("random_seed", int, 0, "Random seed.") run_type_g = utils.ArgumentGroup(parser, "run_type", "running type options.") run_type_g.add_arg("use_cuda", bool, False, "If set, use GPU for training.") -run_type_g.add_arg("task_name", str, None, "The name of task to perform sentiment classification.") +run_type_g.add_arg("task_name", str, None, + "The name of task to perform sentiment classification.") run_type_g.add_arg("do_train", bool, False, "Whether to perform training.") run_type_g.add_arg("do_val", bool, False, "Whether to perform evaluation.") run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.") -parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') +parser.add_argument( + '--enable_ce', + action='store_true', + help='If set, run the task with continuous evaluation logs.') args = parser.parse_args() + def create_model(args, pyreader_name, emotect_config, @@ -98,11 +122,17 @@ def create_model(args, if is_infer: data = fluid.layers.read_file(pyreader) - probs = network(data, None, emotect_config["vocab_size"], class_dim=num_labels, is_infer=True) + probs = network( + data, + None, + emotect_config["vocab_size"], + class_dim=num_labels, + is_infer=True) return pyreader, probs data, label = fluid.layers.read_file(pyreader) - avg_loss, probs = network(data, label, emotect_config["vocab_size"], class_dim=num_labels) + avg_loss, probs = network( + data, label, emotect_config["vocab_size"], class_dim=num_labels) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs) return pyreader, avg_loss, accuracy, num_seqs @@ -118,8 +148,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): while True: try: np_loss, np_acc, np_num_seqs = exe.run(program=test_program, - fetch_list=fetch_list, - return_numpy=False) + fetch_list=fetch_list, + return_numpy=False) np_loss = np.array(np_loss) np_acc = np.array(np_acc) np_num_seqs = np.array(np_num_seqs) @@ -131,8 +161,8 @@ def evaluate(exe, test_program, test_pyreader, fetch_list, eval_phase): break time_end = time.time() print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" % - (eval_phase, np.sum(total_cost) / np.sum(total_num_seqs), - np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin)) + (eval_phase, np.sum(total_cost) / np.sum(total_num_seqs), + np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin)) def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): @@ -141,10 +171,11 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase): while True: try: batch_probs = exe.run(program=infer_program, - fetch_list=fetch_list, - return_numpy=True) + fetch_list=fetch_list, + return_numpy=True) for probs in batch_probs[0]: - print("%d\t%f\t%f\t%f" % (np.argmax(probs), probs[0], probs[1], probs[2])) + print("%d\t%f\t%f\t%f" % + (np.argmax(probs), probs[0], probs[1], probs[2])) except fluid.core.EOFException as e: infer_pyreader.reset() break @@ -165,9 +196,10 @@ def main(args): exe = fluid.Executor(place) task_name = args.task_name.lower() - processor = reader.EmoTectProcessor(data_dir=args.data_dir, - vocab_path=args.vocab_path, - random_seed=args.random_seed) + processor = reader.EmoTectProcessor( + data_dir=args.data_dir, + vocab_path=args.vocab_path, + random_seed=args.random_seed) num_labels = len(processor.get_labels()) if not (args.do_train or args.do_val or args.do_infer): @@ -180,9 +212,7 @@ def main(args): if args.do_train: train_data_generator = processor.data_generator( - batch_size=args.batch_size, - phase='train', - epoch=args.epoch) + batch_size=args.batch_size, phase='train', epoch=args.epoch) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size + 1 @@ -210,7 +240,7 @@ def main(args): lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % - (lower_mem, upper_mem, unit)) + (lower_mem, upper_mem, unit)) if args.do_val: test_prog = fluid.Program() @@ -241,17 +271,12 @@ def main(args): if args.do_train: if args.init_checkpoint: utils.init_checkpoint( - exe, - args.init_checkpoint, - main_program=startup_prog) + exe, args.init_checkpoint, main_program=startup_prog) elif args.do_val or args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or infer!") - utils.init_checkpoint( - exe, - args.init_checkpoint, - main_program=test_prog) + utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog) if args.do_train: train_exe = exe @@ -288,22 +313,27 @@ def main(args): total_num_seqs.extend(np_num_seqs) if args.verbose: - verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size() + verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( + ) print(verbose) time_end = time.time() used_time = time_end - time_begin print("step: %d, avg loss: %f, " - "avg acc: %f, speed: %f steps/s" % - (steps, np.sum(total_cost) / np.sum(total_num_seqs), - np.sum(total_acc) / np.sum(total_num_seqs), - args.skip_steps / used_time)) - ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time]) + "avg acc: %f, speed: %f steps/s" % + (steps, np.sum(total_cost) / np.sum(total_num_seqs), + np.sum(total_acc) / np.sum(total_num_seqs), + args.skip_steps / used_time)) + ce_info.append([ + np.sum(total_cost) / np.sum(total_num_seqs), + np.sum(total_acc) / np.sum(total_num_seqs), used_time + ]) total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() if steps % args.save_steps == 0: - save_path = os.path.join(args.output_dir, "step_" + str(steps)) + save_path = os.path.join(args.output_dir, + "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: @@ -315,8 +345,8 @@ def main(args): phase='dev', epoch=1)) evaluate(test_exe, test_prog, test_pyreader, - [loss.name, accuracy.name, num_seqs.name], - "dev") + [loss.name, accuracy.name, num_seqs.name], + "dev") except fluid.core.EOFException: save_path = os.path.join(args.output_dir, "step_" + str(steps)) @@ -336,33 +366,25 @@ def main(args): except: print("ce info error") print("kpis\teach_step_duration_%s_card%s\t%s" % - (task_name, card_num, ce_time)) - print("kpis\ttrain_loss_%s_card%s\t%f" % - (task_name, card_num, ce_loss)) - print("kpis\ttrain_acc_%s_card%s\t%f" % - (task_name, card_num, ce_acc)) + (task_name, card_num, ce_time)) + print("kpis\ttrain_loss_%s_card%s\t%f" % (task_name, card_num, ce_loss)) + print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc)) # evaluate on test set if not args.do_train and args.do_val: test_pyreader.decorate_paddle_reader( processor.data_generator( - batch_size=args.batch_size, - phase='test', - epoch=1)) + batch_size=args.batch_size, phase='test', epoch=1)) print("Final test result:") evaluate(test_exe, test_prog, test_pyreader, - [loss.name, accuracy.name, num_seqs.name], - "test") + [loss.name, accuracy.name, num_seqs.name], "test") # infer if args.do_infer: infer_pyreader.decorate_paddle_reader( processor.data_generator( - batch_size=args.batch_size, - phase='infer', - epoch=1)) - infer(test_exe, test_prog, infer_pyreader, - [probs.name], "infer") + batch_size=args.batch_size, phase='infer', epoch=1)) + infer(test_exe, test_prog, infer_pyreader, [probs.name], "infer") def get_cards(): diff --git a/PaddleNLP/emotion_detection/run_ernie_classifier.py b/PaddleNLP/emotion_detection/run_ernie_classifier.py index 774eab50..8dfb6b4d 100644 --- a/PaddleNLP/emotion_detection/run_ernie_classifier.py +++ b/PaddleNLP/emotion_detection/run_ernie_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Emotion Detection Task, based on ERNIE """ diff --git a/PaddleNLP/emotion_detection/utils.py b/PaddleNLP/emotion_detection/utils.py index ac916d25..44d108c9 100644 --- a/PaddleNLP/emotion_detection/utils.py +++ b/PaddleNLP/emotion_detection/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ EmoTect utilities. """ @@ -16,6 +29,7 @@ import paddle import paddle.fluid as fluid import numpy as np + def str2bool(value): """ String to Boolean @@ -29,6 +43,7 @@ class ArgumentGroup(object): """ Argument Class """ + def __init__(self, parser, title, des): self._group = parser.add_argument_group(title=title, description=des) @@ -92,27 +107,33 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch=1): cols = line.strip().split("\t") if len(cols) != 1: query = cols[-1] - wids = [word_dict[x] if x in word_dict else unk_id - for x in query.strip().split(" ")] - all_data.append((wids,)) + wids = [ + word_dict[x] if x in word_dict else unk_id + for x in query.strip().split(" ") + ] + all_data.append((wids, )) else: cols = line.strip().split("\t") if len(cols) != 2: sys.stderr.write("[NOTICE] Error Format Line!") continue label = int(cols[0]) - wids = [word_dict[x] if x in word_dict else unk_id - for x in cols[1].split(" ")] + wids = [ + word_dict[x] if x in word_dict else unk_id + for x in cols[1].split(" ") + ] all_data.append((wids, label)) num_examples[phrase] = len(all_data) if phrase == "infer": + def reader(): """ Infer reader function """ for wids in all_data: yield wids + return reader def reader(): @@ -124,6 +145,7 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch=1): random.shuffle(all_data) for wids, label in all_data: yield wids, label + return reader diff --git a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py index c982c253..c6883d18 100755 --- a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py +++ b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/network.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The function lex_net(args) define the lexical analysis network structure """ diff --git a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py index 0b247347..3c9a7f60 100755 --- a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py +++ b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding: utf-8 """ The file_reader converts raw corpus to input. diff --git a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py index 0dae04c4..7e1a027d 100755 --- a/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py +++ b/PaddleNLP/language_representations_kit/ELMo/LAC_demo/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This file is used to train the model. """ diff --git a/PaddleNLP/lexical_analysis/evaluate.py b/PaddleNLP/lexical_analysis/evaluate.py index 108dafa9..84d4d929 100644 --- a/PaddleNLP/lexical_analysis/evaluate.py +++ b/PaddleNLP/lexical_analysis/evaluate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf-8 """ evaluate wordseg for LAC and other open-source wordseg tools @@ -20,7 +33,7 @@ def to_unicode(string): def to_set(words): """ cut list to set of (string, off) """ off = 0 - s= set() + s = set() for w in words: if w: s.add((off, w)) @@ -145,7 +158,7 @@ def get_pkuseg_result(sentences): seg = pkuseg.pkuseg() preds = [] for sentence in sentences: - sent_seg = " ".join(seg.cut(sentence)) + sent_seg = " ".join(seg.cut(sentence)) sent_seg = to_unicode(sent_seg) preds.append(sent_seg) return preds @@ -161,7 +174,8 @@ def get_hanlp_result(sentences): preds = [] for sentence in sentences: arraylist = HanLP.segment(sentence) - sent_seg = " ".join([term.toString().split("/")[0] for term in arraylist]) + sent_seg = " ".join( + [term.toString().split("/")[0] for term in arraylist]) sent_seg = to_unicode(sent_seg) preds.append(sent_seg) return preds diff --git a/PaddleNLP/lexical_analysis/reader.py b/PaddleNLP/lexical_analysis/reader.py index 4655c5eb..340c154f 100644 --- a/PaddleNLP/lexical_analysis/reader.py +++ b/PaddleNLP/lexical_analysis/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The file_reader converts raw corpus to input. """ @@ -9,7 +22,10 @@ import glob def load_kv_dict(dict_path, - reverse=False, delimiter="\t", key_func=None, value_func=None): + reverse=False, + delimiter="\t", + key_func=None, + value_func=None): """ Load key-value dict from file """ @@ -34,11 +50,14 @@ def load_kv_dict(dict_path, class Dataset(object): """data reader""" + def __init__(self, args, mode="train"): # read dict - self.word2id_dict = load_kv_dict(args.word_dict_path, reverse=True, value_func=int) + self.word2id_dict = load_kv_dict( + args.word_dict_path, reverse=True, value_func=int) self.id2word_dict = load_kv_dict(args.word_dict_path) - self.label2id_dict = load_kv_dict(args.label_dict_path, reverse=True, value_func=int) + self.label2id_dict = load_kv_dict( + args.label_dict_path, reverse=True, value_func=int) self.id2label_dict = load_kv_dict(args.label_dict_path) self.word_replace_dict = load_kv_dict(args.word_rep_dict_path) @@ -78,12 +97,12 @@ class Dataset(object): label_ids.append(label_id) return label_ids - def file_reader(self, filename, max_seq_len=64, mode="train"): """ yield (word_idx, target_idx) one by one from file, or yield (word_idx, ) in `infer` mode """ + def wrapper(): fread = io.open(filename, "r", encoding="utf-8") headline = next(fread) @@ -93,9 +112,11 @@ class Dataset(object): for line in fread: words = line.strip("\n").split("\002") word_ids = self.word_to_ids(words) - yield word_ids[0:max_seq_len], [0 for _ in word_ids][0: max_seq_len] + yield word_ids[0:max_seq_len], [0 for _ in word_ids][ + 0:max_seq_len] else: - assert len(headline) == 2 and headline[0] == "text_a" and headline[1] == "label" + assert len(headline) == 2 and headline[ + 0] == "text_a" and headline[1] == "label" for line in fread: words, labels = line.strip("\n").split("\t") word_ids = self.word_to_ids(words.split("\002")) @@ -109,9 +130,21 @@ class Dataset(object): if __name__ == "__main__": parser = argparse.ArgumentParser(__doc__) - parser.add_argument("--word_dict_path", type=str, default="./conf/word.dic", help="word dict") - parser.add_argument("--label_dict_path", type=str, default="./conf/tag.dic", help="label dict") - parser.add_argument("--word_rep_dict_path", type=str, default="./conf/q2b.dic", help="word replace dict") + parser.add_argument( + "--word_dict_path", + type=str, + default="./conf/word.dic", + help="word dict") + parser.add_argument( + "--label_dict_path", + type=str, + default="./conf/tag.dic", + help="label dict") + parser.add_argument( + "--word_rep_dict_path", + type=str, + default="./conf/q2b.dic", + help="word replace dict") args = parser.parse_args() dataset = Dataset(args) data_generator = dataset.file_reader("data/train.tsv") diff --git a/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py b/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py index 96092dc2..3e0c21a2 100644 --- a/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py +++ b/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Sentiment Classification Task """ @@ -28,7 +41,6 @@ from models.representation.ernie import ernie_encoder from models.sequence_labeling import nets import utils - # yapf: disable parser = argparse.ArgumentParser(__doc__) model_g = utils.ArgumentGroup(parser, "model", "model configuration and paths.") diff --git a/PaddleNLP/lexical_analysis/run_sequence_labeling.py b/PaddleNLP/lexical_analysis/run_sequence_labeling.py index 6f20cfe3..0dd8707c 100644 --- a/PaddleNLP/lexical_analysis/run_sequence_labeling.py +++ b/PaddleNLP/lexical_analysis/run_sequence_labeling.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Baidu's open-source Lexical Analysis tool for Chinese, including: 1. Word Segmentation, diff --git a/PaddleNLP/lexical_analysis/utils.py b/PaddleNLP/lexical_analysis/utils.py index 2513d128..46d88883 100644 --- a/PaddleNLP/lexical_analysis/utils.py +++ b/PaddleNLP/lexical_analysis/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ util tools """ @@ -19,6 +32,7 @@ class ArgumentGroup(object): """ Put arguments to one group """ + def __init__(self, parser, title, des): """none""" self._group = parser.add_argument_group(title=title, description=des) @@ -86,7 +100,7 @@ def parse_result(words, crf_decode, dataset): sent_len = offset_list[sent_index + 1] - offset_list[sent_index] last_word = "" last_tag = "" - for tag_index in range(sent_len): # iterate every word in sent + for tag_index in range(sent_len): # iterate every word in sent index = tag_index + offset_list[sent_index] cur_word_id = str(words[index][0]) cur_tag_id = str(crf_decode[index][0]) diff --git a/PaddleNLP/models/classification/nets.py b/PaddleNLP/models/classification/nets.py index 279199e8..da20ccd3 100644 --- a/PaddleNLP/models/classification/nets.py +++ b/PaddleNLP/models/classification/nets.py @@ -1,9 +1,23 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provide nets for text classification """ import paddle.fluid as fluid + def bow_net(data, label, dict_dim, @@ -192,14 +206,14 @@ def gru_net(data, def textcnn_net(data, - label, - dict_dim, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2, - win_sizes=None, - is_infer=False): + label, + dict_dim, + emb_dim=128, + hid_dim=128, + hid_dim2=96, + class_dim=2, + win_sizes=None, + is_infer=False): """ Textcnn_net """ diff --git a/PaddleNLP/models/matching/bow.py b/PaddleNLP/models/matching/bow.py index c07250e1..e862734c 100644 --- a/PaddleNLP/models/matching/bow.py +++ b/PaddleNLP/models/matching/bow.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ bow class """ diff --git a/PaddleNLP/models/matching/cnn.py b/PaddleNLP/models/matching/cnn.py index 6b36e579..9293759b 100644 --- a/PaddleNLP/models/matching/cnn.py +++ b/PaddleNLP/models/matching/cnn.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ cnn class """ @@ -30,8 +43,8 @@ class CNN(object): left_emb = emb_layer.ops(left) right_emb = emb_layer.ops(right) # Presentation context - cnn_layer = layers.SequenceConvPoolLayer( - self.filter_size, self.num_filters, "conv") + cnn_layer = layers.SequenceConvPoolLayer(self.filter_size, + self.num_filters, "conv") left_cnn = cnn_layer.ops(left_emb) right_cnn = cnn_layer.ops(right_emb) # matching layer diff --git a/PaddleNLP/models/matching/gru.py b/PaddleNLP/models/matching/gru.py index 2d7f3e71..36884c6b 100644 --- a/PaddleNLP/models/matching/gru.py +++ b/PaddleNLP/models/matching/gru.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ gru class """ diff --git a/PaddleNLP/models/matching/losses/hinge_loss.py b/PaddleNLP/models/matching/losses/hinge_loss.py index 664a9599..72cf3d7f 100644 --- a/PaddleNLP/models/matching/losses/hinge_loss.py +++ b/PaddleNLP/models/matching/losses/hinge_loss.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ hinge loss """ diff --git a/PaddleNLP/models/matching/losses/log_loss.py b/PaddleNLP/models/matching/losses/log_loss.py index a62abdfc..47743fb0 100644 --- a/PaddleNLP/models/matching/losses/log_loss.py +++ b/PaddleNLP/models/matching/losses/log_loss.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ log loss """ diff --git a/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py b/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py index 65a1bc31..882f5904 100644 --- a/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py +++ b/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ softmax loss """ diff --git a/PaddleNLP/models/matching/lstm.py b/PaddleNLP/models/matching/lstm.py index 3a656da7..3af323c5 100644 --- a/PaddleNLP/models/matching/lstm.py +++ b/PaddleNLP/models/matching/lstm.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ lstm class """ diff --git a/PaddleNLP/models/matching/mm_dnn.py b/PaddleNLP/models/matching/mm_dnn.py index 0a0011ed..a9212d0e 100644 --- a/PaddleNLP/models/matching/mm_dnn.py +++ b/PaddleNLP/models/matching/mm_dnn.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ MMDNN class """ diff --git a/PaddleNLP/models/matching/optimizers/paddle_optimizers.py b/PaddleNLP/models/matching/optimizers/paddle_optimizers.py index ad3276fa..024a8975 100644 --- a/PaddleNLP/models/matching/optimizers/paddle_optimizers.py +++ b/PaddleNLP/models/matching/optimizers/paddle_optimizers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ optimizer calss """ @@ -43,5 +56,8 @@ class AdamOptimizer(object): Adam optimizer operation """ adam = fluid.optimizer.AdamOptimizer( - self.learning_rate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon) + self.learning_rate, + beta1=self.beta1, + beta2=self.beta2, + epsilon=self.epsilon) adam.minimize(loss) diff --git a/PaddleNLP/models/matching/paddle_layers.py b/PaddleNLP/models/matching/paddle_layers.py index 9c5baa18..4ff82b88 100644 --- a/PaddleNLP/models/matching/paddle_layers.py +++ b/PaddleNLP/models/matching/paddle_layers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ network layers """ @@ -23,9 +36,11 @@ class EmbeddingLayer(object): """ operation """ - emb = fluid.layers.embedding(input=input, size=[ - self.dict_size, self.emb_dim], is_sparse=True, - param_attr=attr.ParamAttr(name=self.name)) + emb = fluid.layers.embedding( + input=input, + size=[self.dict_size, self.emb_dim], + is_sparse=True, + param_attr=attr.ParamAttr(name=self.name)) return emb @@ -44,8 +59,7 @@ class SequencePoolLayer(object): """ operation """ - pool = fluid.layers.sequence_pool( - input=input, pool_type=self.pool_type) + pool = fluid.layers.sequence_pool(input=input, pool_type=self.pool_type) return pool @@ -66,9 +80,12 @@ class FCLayer(object): """ operation """ - fc = fluid.layers.fc(input=input, size=self.fc_dim, param_attr=attr.ParamAttr( - name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act, name=self.name) + fc = fluid.layers.fc(input=input, + size=self.fc_dim, + param_attr=attr.ParamAttr(name="%s.w" % self.name), + bias_attr=attr.ParamAttr(name="%s.b" % self.name), + act=self.act, + name=self.name) return fc @@ -88,12 +105,16 @@ class DynamicGRULayer(object): """ operation """ - proj = fluid.layers.fc(input=input, size=self.gru_dim * 3, - param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), - bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) - gru = fluid.layers.dynamic_gru(input=proj, size=self.gru_dim, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name)) + proj = fluid.layers.fc( + input=input, + size=self.gru_dim * 3, + param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), + bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) + gru = fluid.layers.dynamic_gru( + input=proj, + size=self.gru_dim, + param_attr=attr.ParamAttr(name="%s.w" % self.name), + bias_attr=attr.ParamAttr(name="%s.b" % self.name)) return gru @@ -113,12 +134,16 @@ class DynamicLSTMLayer(object): """ operation """ - proj = fluid.layers.fc(input=input, size=self.lstm_dim * 4, - param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), - bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) - lstm, _ = fluid.layers.dynamic_lstm(input=proj, size=self.lstm_dim * 4, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name)) + proj = fluid.layers.fc( + input=input, + size=self.lstm_dim * 4, + param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), + bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) + lstm, _ = fluid.layers.dynamic_lstm( + input=proj, + size=self.lstm_dim * 4, + param_attr=attr.ParamAttr(name="%s.w" % self.name), + bias_attr=attr.ParamAttr(name="%s.b" % self.name)) return lstm @@ -161,9 +186,12 @@ class SequenceConvPoolLayer(object): """ operation """ - conv = fluid.nets.sequence_conv_pool(input=input, filter_size=self.filter_size, - num_filters=self.num_filters, - param_attr=attr.ParamAttr(name=self.name), act="relu") + conv = fluid.nets.sequence_conv_pool( + input=input, + filter_size=self.filter_size, + num_filters=self.num_filters, + param_attr=attr.ParamAttr(name=self.name), + act="relu") return conv @@ -259,7 +287,8 @@ class SoftmaxWithCrossEntropyLayer(object): """ operation """ - loss = fluid.layers.softmax_with_cross_entropy(logits=input, label=label) + loss = fluid.layers.softmax_with_cross_entropy( + logits=input, label=label) return loss @@ -354,8 +383,8 @@ class ConstantLayer(object): """ operation """ - constant = fluid.layers.fill_constant_batch_size_like( - input, shape, dtype, value) + constant = fluid.layers.fill_constant_batch_size_like(input, shape, + dtype, value) return constant @@ -396,6 +425,7 @@ class SoftsignLayer(object): softsign = fluid.layers.softsign(input) return softsign + # class MatmulLayer(object): # def __init__(self, transpose_x, transpose_y): # self.transpose_x = transpose_x @@ -405,7 +435,6 @@ class SoftsignLayer(object): # matmul = fluid.layers.matmul(x, y, self.transpose_x, self.transpose_y) # return matmul - # class Conv2dLayer(object): # def __init__(self, num_filters, filter_size, act, name): # self.num_filters = num_filters @@ -417,7 +446,6 @@ class SoftsignLayer(object): # conv = fluid.layers.conv2d(input, self.num_filters, self.filter_size, param_attr=attr.ParamAttr(name="%s.w" % self.name), bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act) # return conv - # class Pool2dLayer(object): # def __init__(self, pool_size, pool_type): # self.pool_size = pool_size diff --git a/PaddleNLP/models/neural_machine_translation/transformer/desc.py b/PaddleNLP/models/neural_machine_translation/transformer/desc.py index 5eeb0dbf..3f60414f 100644 --- a/PaddleNLP/models/neural_machine_translation/transformer/desc.py +++ b/PaddleNLP/models/neural_machine_translation/transformer/desc.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # The placeholder for batch_size in compile time. Must be -1 currently to be # consistent with some ops' infer-shape output in compile time, such as the # sequence_expand op used in beamsearch decoder. diff --git a/PaddleNLP/models/neural_machine_translation/transformer/model.py b/PaddleNLP/models/neural_machine_translation/transformer/model.py index eec48297..b36731f7 100644 --- a/PaddleNLP/models/neural_machine_translation/transformer/model.py +++ b/PaddleNLP/models/neural_machine_translation/transformer/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from functools import partial import numpy as np diff --git a/PaddleNLP/models/representation/ernie.py b/PaddleNLP/models/representation/ernie.py index 90061d9d..23db3ac3 100644 --- a/PaddleNLP/models/representation/ernie.py +++ b/PaddleNLP/models/representation/ernie.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provides ErnieModel and ErnieConfig """ diff --git a/PaddleNLP/models/sequence_labeling/nets.py b/PaddleNLP/models/sequence_labeling/nets.py index 77db87ba..cf9e39a8 100644 --- a/PaddleNLP/models/sequence_labeling/nets.py +++ b/PaddleNLP/models/sequence_labeling/nets.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The function lex_net(args) define the lexical analysis network structure """ @@ -96,8 +109,7 @@ def lex_net(word, target, args, vocab_size, num_labels): input=emission, label=target, param_attr=fluid.ParamAttr( - name='crfw', - learning_rate=crf_lr)) + name='crfw', learning_rate=crf_lr)) crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw')) avg_cost = fluid.layers.mean(x=crf_cost) diff --git a/PaddleNLP/models/transformer_encoder.py b/PaddleNLP/models/transformer_encoder.py index 5c20735b..77908896 100644 --- a/PaddleNLP/models/transformer_encoder.py +++ b/PaddleNLP/models/transformer_encoder.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Transformer encoder.""" from __future__ import absolute_import @@ -100,7 +113,7 @@ def multi_head_attention(queries, """ Scaled Dot-Product Attention """ - scaled_q = layers.scale(x=q, scale=d_key ** -0.5) + scaled_q = layers.scale(x=q, scale=d_key**-0.5) product = layers.matmul(x=scaled_q, y=k, transpose_y=True) if attn_bias: product += attn_bias diff --git a/PaddleNLP/preprocess/ernie/task_reader.py b/PaddleNLP/preprocess/ernie/task_reader.py index 209ac795..1ff5c10f 100644 --- a/PaddleNLP/preprocess/ernie/task_reader.py +++ b/PaddleNLP/preprocess/ernie/task_reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provides reader for classification and sequence labing """ @@ -18,6 +31,7 @@ from preprocess.padding import pad_batch_data class BaseReader(object): """BaseReader for classify and sequence labeling task""" + def __init__(self, vocab_path, label_map_config=None, @@ -211,6 +225,7 @@ class BaseReader(object): class ClassifyReader(BaseReader): """ClassifyReader""" + def _read_tsv(self, input_file, quotechar=None): """Reads a tab separated value file.""" with open(input_file, "r") as f: @@ -239,7 +254,10 @@ class ClassifyReader(BaseReader): # padding padded_token_ids, input_mask, seq_lens = pad_batch_data( - batch_token_ids, pad_idx=self.pad_id, return_input_mask=True, return_seq_lens=True) + batch_token_ids, + pad_idx=self.pad_id, + return_input_mask=True, + return_seq_lens=True) padded_text_type_ids = pad_batch_data( batch_text_type_ids, pad_idx=self.pad_id) padded_position_ids = pad_batch_data( @@ -255,6 +273,7 @@ class ClassifyReader(BaseReader): class SequenceLabelReader(BaseReader): """SequenceLabelReader""" + def _pad_batch_records(self, batch_records): batch_token_ids = [record.token_ids for record in batch_records] batch_text_type_ids = [record.text_type_ids for record in batch_records] @@ -314,7 +333,9 @@ class SequenceLabelReader(BaseReader): position_ids = list(range(len(token_ids))) text_type_ids = [0] * len(token_ids) no_entity_id = len(self.label_map) - 1 - labels = [label if label in self.label_map else u"O" for label in labels] + labels = [ + label if label in self.label_map else u"O" for label in labels + ] label_ids = [no_entity_id] + [ self.label_map[label] for label in labels ] + [no_entity_id] @@ -332,6 +353,7 @@ class SequenceLabelReader(BaseReader): class ExtractEmbeddingReader(BaseReader): """ExtractEmbeddingReader""" + def _pad_batch_records(self, batch_records): batch_token_ids = [record.token_ids for record in batch_records] batch_text_type_ids = [record.text_type_ids for record in batch_records] diff --git a/PaddleNLP/preprocess/padding.py b/PaddleNLP/preprocess/padding.py index 630a4c18..6094562d 100644 --- a/PaddleNLP/preprocess/padding.py +++ b/PaddleNLP/preprocess/padding.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Mask, padding and batching. """ diff --git a/PaddleNLP/preprocess/tokenizer/reader.py b/PaddleNLP/preprocess/tokenizer/reader.py index 46b88a1c..39274f34 100644 --- a/PaddleNLP/preprocess/tokenizer/reader.py +++ b/PaddleNLP/preprocess/tokenizer/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ The file_reader converts raw corpus to input. """ @@ -5,6 +18,7 @@ import os import __future__ import io + def file_reader(file_dir, word2id_dict, label2id_dict, @@ -15,6 +29,7 @@ def file_reader(file_dir, """ word_dict_len = max(map(int, word2id_dict.values())) + 1 label_dict_len = max(map(int, label2id_dict.values())) + 1 + def reader(): """ the data generator @@ -24,7 +39,8 @@ def file_reader(file_dir, for filename in files: if not filename.startswith(filename_feature): continue - for line in io.open(os.path.join(root, filename), 'r', encoding='utf8'): + for line in io.open( + os.path.join(root, filename), 'r', encoding='utf8'): index += 1 bad_line = False line = line.strip("\n") @@ -52,8 +68,9 @@ def file_reader(file_dir, else: target_idx.append(int(label2id_dict["O"])) if len(word_idx) != len(target_idx): - continue + continue yield word_idx, target_idx + return reader @@ -68,6 +85,7 @@ def test_reader(file_dir, #print (word2id_dict) word_dict_len = max(map(int, word2id_dict.values())) + 1 label_dict_len = max(map(int, label2id_dict.values())) + 1 + #print word_dict_len #print label_dict_len def reader(): @@ -94,6 +112,7 @@ def test_reader(file_dir, else: word_idx.append(int(word2id_dict["OOV"])) yield word_idx, words + return reader diff --git a/PaddleNLP/preprocess/tokenizer/tokenizer.py b/PaddleNLP/preprocess/tokenizer/tokenizer.py index 910f45a7..f1f31f71 100644 --- a/PaddleNLP/preprocess/tokenizer/tokenizer.py +++ b/PaddleNLP/preprocess/tokenizer/tokenizer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ This module provides wordseg tools """ @@ -11,12 +24,13 @@ import time import sys import io -if sys.version_info > (3,): +if sys.version_info > (3, ): sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') else: reload(sys) sys.setdefaultencoding("utf8") + def parse_args(): """ Arguments Parse @@ -26,32 +40,27 @@ def parse_args(): '--batch_size', type=int, default=5, - help='The size of a batch. (default: %(default)d)' - ) + help='The size of a batch. (default: %(default)d)') parser.add_argument( '--model_path', type=str, default='./conf/model', - help='A path to the model. (default: %(default)s)' - ) + help='A path to the model. (default: %(default)s)') parser.add_argument( '--test_data_dir', type=str, default='./data/test_data', - help='A directory with test data files. (default: %(default)s)' - ) + help='A directory with test data files. (default: %(default)s)') parser.add_argument( "--word_dict_path", type=str, default="./conf/word.dic", - help="The path of the word dictionary. (default: %(default)s)" - ) + help="The path of the word dictionary. (default: %(default)s)") parser.add_argument( "--label_dict_path", type=str, default="./conf/tag.dic", - help="The path of the label dictionary. (default: %(default)s)" - ) + help="The path of the label dictionary. (default: %(default)s)") parser.add_argument( "--word_rep_dict_path", type=str, @@ -104,17 +113,15 @@ def infer(args): Tokenize """ id2word_dict = reader.load_dict(args.word_dict_path) - word2id_dict = reader.load_reverse_dict(args.word_dict_path) + word2id_dict = reader.load_reverse_dict(args.word_dict_path) id2label_dict = reader.load_dict(args.label_dict_path) label2id_dict = reader.load_reverse_dict(args.label_dict_path) q2b_dict = reader.load_dict(args.word_rep_dict_path) test_data = paddle.batch( - reader.test_reader(args.test_data_dir, - word2id_dict, - label2id_dict, - q2b_dict), - batch_size = args.batch_size) + reader.test_reader(args.test_data_dir, word2id_dict, label2id_dict, + q2b_dict), + batch_size=args.batch_size) place = fluid.CPUPlace() #place = fluid.CUDAPlace(0) exe = fluid.Executor(place) @@ -130,9 +137,9 @@ def infer(args): #print(word_idx) word_list = [x[1] for x in data] (crf_decode, ) = exe.run(inference_program, - feed={"word":word_idx}, - fetch_list=fetch_targets, - return_numpy=False) + feed={"word": word_idx}, + fetch_list=fetch_targets, + return_numpy=False) lod_info = (crf_decode.lod())[0] np_data = np.array(crf_decode) assert len(data) == len(lod_info) - 1 @@ -145,7 +152,7 @@ def infer(args): cur_full_tag = "" words = word_list[sen_index] for tag_index in range(lod_info[sen_index], - lod_info[sen_index + 1]): + lod_info[sen_index + 1]): cur_word = words[word_index] cur_tag = id2label_dict[str(np_data[tag_index][0])] if cur_tag.endswith("-B") or cur_tag.endswith("O"): diff --git a/PaddleNLP/sentiment_classification/config.py b/PaddleNLP/sentiment_classification/config.py index 49023d27..a1b37518 100644 --- a/PaddleNLP/sentiment_classification/config.py +++ b/PaddleNLP/sentiment_classification/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Senta model. """ @@ -11,10 +24,12 @@ import json import numpy as np import paddle.fluid as fluid + class SentaConfig(object): """ Senta Config """ + def __init__(self, config_path): self._config_dict = self._parse(config_path) @@ -24,7 +39,7 @@ class SentaConfig(object): config_dict = json.load(json_file) except Exception: raise IOError("Error in parsing bert model config file '%s'" % - config_path) + config_path) else: return config_dict diff --git a/PaddleNLP/sentiment_classification/reader.py b/PaddleNLP/sentiment_classification/reader.py index 38a0f6e9..939cf648 100644 --- a/PaddleNLP/sentiment_classification/reader.py +++ b/PaddleNLP/sentiment_classification/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Senta Reader """ @@ -12,15 +25,13 @@ from utils import data_reader import paddle import paddle.fluid as fluid + class SentaProcessor(object): """ Processor class for data convertors for senta """ - def __init__(self, - data_dir, - vocab_path, - random_seed=None): + def __init__(self, data_dir, vocab_path, random_seed=None): self.data_dir = data_dir self.vocab = load_vocab(vocab_path) self.num_examples = {"train": -1, "dev": -1, "infer": -1} @@ -30,19 +41,22 @@ class SentaProcessor(object): """ Load training examples """ - return data_reader((self.data_dir + "/train.tsv"), self.vocab, self.num_examples, "train", epoch) + return data_reader((self.data_dir + "/train.tsv"), self.vocab, + self.num_examples, "train", epoch) def get_dev_examples(self, data_dir, epoch): """ Load dev examples """ - return data_reader((self.data_dir + "/dev.tsv"), self.vocab, self.num_examples, "dev", epoch) + return data_reader((self.data_dir + "/dev.tsv"), self.vocab, + self.num_examples, "dev", epoch) def get_test_examples(self, data_dir, epoch): """ Load test examples """ - return data_reader((self.data_dir + "/test.tsv"), self.vocab, self.num_examples, "infer", epoch) + return data_reader((self.data_dir + "/test.tsv"), self.vocab, + self.num_examples, "infer", epoch) def get_labels(self): """ @@ -70,11 +84,14 @@ class SentaProcessor(object): Generate data for train, dev or infer """ if phase == "train": - return paddle.batch(self.get_train_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_train_examples(self.data_dir, epoch), batch_size) elif phase == "dev": - return paddle.batch(self.get_dev_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_dev_examples(self.data_dir, epoch), batch_size) elif phase == "infer": - return paddle.batch(self.get_test_examples(self.data_dir, epoch), batch_size) + return paddle.batch( + self.get_test_examples(self.data_dir, epoch), batch_size) else: raise ValueError( "Unknown phase, which should be in ['train', 'dev', 'infer'].") diff --git a/PaddleNLP/sentiment_classification/run_classifier.py b/PaddleNLP/sentiment_classification/run_classifier.py index 4ad4e56e..c95ecb45 100644 --- a/PaddleNLP/sentiment_classification/run_classifier.py +++ b/PaddleNLP/sentiment_classification/run_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Sentiment Classification Task """ diff --git a/PaddleNLP/sentiment_classification/run_ernie_classifier.py b/PaddleNLP/sentiment_classification/run_ernie_classifier.py index c2b910a1..d6b84945 100644 --- a/PaddleNLP/sentiment_classification/run_ernie_classifier.py +++ b/PaddleNLP/sentiment_classification/run_ernie_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Sentiment Classification Task """ diff --git a/PaddleNLP/sentiment_classification/utils.py b/PaddleNLP/sentiment_classification/utils.py index c117178b..4e261f2e 100644 --- a/PaddleNLP/sentiment_classification/utils.py +++ b/PaddleNLP/sentiment_classification/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Arguments for configuration """ @@ -31,6 +44,7 @@ class ArgumentGroup(object): """ Argument Class """ + def __init__(self, parser, title, des): self._group = parser.add_argument_group(title=title, description=des) @@ -79,7 +93,7 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): predicate=existed_persitables) print("Load model from {}".format(init_checkpoint_path)) - + def data_reader(file_path, word_dict, num_examples, phrase, epoch): """ Convert word sequence into slot @@ -95,15 +109,17 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch): sys.stderr.write("[NOTICE] Error Format Line!") continue label = int(cols[1]) - wids = [word_dict[x] if x in word_dict else unk_id - for x in cols[0].split(" ")] + wids = [ + word_dict[x] if x in word_dict else unk_id + for x in cols[0].split(" ") + ] all_data.append((wids, label)) if phrase == "train": random.shuffle(all_data) num_examples[phrase] = len(all_data) - + def reader(): """ Reader Function @@ -111,8 +127,10 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch): for epoch_index in range(epoch): for doc, label in all_data: yield doc, label + return reader + def load_vocab(file_path): """ load the given vocabulary diff --git a/PaddleNLP/similarity_net/config.py b/PaddleNLP/similarity_net/config.py index b9cc1544..5e541a0b 100644 --- a/PaddleNLP/similarity_net/config.py +++ b/PaddleNLP/similarity_net/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ SimNet config """ @@ -21,12 +34,14 @@ class SimNetConfig(object): with open(config_path) as json_file: config_dict = json.load(json_file) except Exception: - raise IOError("Error in parsing simnet model config file '%s'" % config_path) + raise IOError("Error in parsing simnet model config file '%s'" % + config_path) else: if config_dict["task_mode"] != self.task_mode: raise ValueError( - "the config '{}' does not match the task_mode '{}'".format(self.config_path, self.task_mode)) + "the config '{}' does not match the task_mode '{}'".format( + self.config_path, self.task_mode)) return config_dict def __getitem__(self, key): diff --git a/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py b/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py index f9ba7a51..13ff8307 100644 --- a/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py +++ b/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ comput unicom """ diff --git a/PaddleNLP/similarity_net/evaluate/unicom_split.py b/PaddleNLP/similarity_net/evaluate/unicom_split.py index 0d93116d..5edb201b 100644 --- a/PaddleNLP/similarity_net/evaluate/unicom_split.py +++ b/PaddleNLP/similarity_net/evaluate/unicom_split.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ split unicom file """ diff --git a/PaddleNLP/similarity_net/reader.py b/PaddleNLP/similarity_net/reader.py index a33c57b3..d5971c68 100644 --- a/PaddleNLP/similarity_net/reader.py +++ b/PaddleNLP/similarity_net/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ SimNet reader """ @@ -25,15 +38,24 @@ class SimNetProcessor(object): Reader with Pairwise """ if mode == "valid": - with codecs.open(self.args.valid_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.valid_data_dir, "r", + "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: @@ -43,27 +65,47 @@ class SimNetProcessor(object): with codecs.open(self.args.test_data_dir, "r", "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: title = [0] yield [query, title] else: - with codecs.open(self.args.train_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.train_data_dir, "r", + "utf-8") as file: for line in file: query, pos_title, neg_title = line.strip().split("\t") - if len(query) == 0 or len(pos_title) == 0 or len(neg_title) == 0: - logging.warning("line not match format in test file") + if len(query) == 0 or len(pos_title) == 0 or len( + neg_title) == 0: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - pos_title = [self.vocab[word] for word in pos_title.split(" ") if word in self.vocab] - neg_title = [self.vocab[word] for word in neg_title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + pos_title = [ + self.vocab[word] for word in pos_title.split(" ") + if word in self.vocab + ] + neg_title = [ + self.vocab[word] for word in neg_title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(pos_title) == 0: @@ -77,15 +119,24 @@ class SimNetProcessor(object): Reader with Pointwise """ if mode == "valid": - with codecs.open(self.args.valid_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.valid_data_dir, "r", + "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: @@ -95,27 +146,44 @@ class SimNetProcessor(object): with codecs.open(self.args.test_data_dir, "r", "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: title = [0] yield [query, title] else: - with codecs.open(self.args.train_data_dir, "r", "utf-8") as file: + with codecs.open(self.args.train_data_dir, "r", + "utf-8") as file: for line in file: query, title, label = line.strip().split("\t") - if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int( - label) not in [0, 1]: - logging.warning("line not match format in test file") + if len(query) == 0 or len(title) == 0 or len( + label) == 0 or not label.isdigit() or int( + label) not in [0, 1]: + logging.warning( + "line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] label = int(label) if len(query) == 0: query = [0] @@ -138,8 +206,14 @@ class SimNetProcessor(object): if len(query) == 0 or len(title) == 0: logging.warning("line not match format in test file") continue - query = [self.vocab[word] for word in query.split(" ") if word in self.vocab] - title = [self.vocab[word] for word in title.split(" ") if word in self.vocab] + query = [ + self.vocab[word] for word in query.split(" ") + if word in self.vocab + ] + title = [ + self.vocab[word] for word in title.split(" ") + if word in self.vocab + ] if len(query) == 0: query = [0] if len(title) == 0: diff --git a/PaddleNLP/similarity_net/run_classifier.py b/PaddleNLP/similarity_net/run_classifier.py index 4413c24c..8e31a133 100644 --- a/PaddleNLP/similarity_net/run_classifier.py +++ b/PaddleNLP/similarity_net/run_classifier.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ SimNet Task """ @@ -164,16 +177,16 @@ def train(conf_dict, args): infer_program = fluid.default_main_program().clone(for_test=True) avg_cost = loss.compute(pred, label) avg_cost.persistable = True - + # operate Optimization optimizer.ops(avg_cost) executor = fluid.Executor(place) executor.run(fluid.default_startup_program()) if args.init_checkpoint is not None: - utils.init_checkpoint(executor, args.init_checkpoint, - fluid.default_startup_program()) - + utils.init_checkpoint(executor, args.init_checkpoint, + fluid.default_startup_program()) + # Get and run executor parallel_executor = fluid.ParallelExecutor( use_cuda=args.use_cuda, diff --git a/PaddleNLP/similarity_net/utils.py b/PaddleNLP/similarity_net/utils.py index 2f11717e..7e4c5075 100644 --- a/PaddleNLP/similarity_net/utils.py +++ b/PaddleNLP/similarity_net/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # --coding=utf-8 """ SimNet utilities. @@ -17,6 +30,7 @@ import paddle.fluid as fluid ******functions for file processing****** """ + def load_vocab(file_path): """ load the given vocabulary @@ -47,7 +61,8 @@ def get_result_file(args): """ with codecs.open(args.test_data_dir, "r", "utf-8") as test_file: with codecs.open("predictions.txt", "r", "utf-8") as predictions_file: - with codecs.open(args.test_result_path, "w", "utf-8") as test_result_file: + with codecs.open(args.test_result_path, "w", + "utf-8") as test_result_file: test_datas = [line.strip("\n") for line in test_file] predictions = [line.strip("\n") for line in predictions_file] for test_data, prediction in zip(test_datas, predictions): @@ -287,7 +302,7 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): """ assert os.path.exists( init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path - + def existed_persitables(var): if not fluid.io.is_persistable(var): return False @@ -299,4 +314,3 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): main_program=main_program, predicate=existed_persitables) print("Load model from {}".format(init_checkpoint_path)) - diff --git a/PaddleNLP/unarchived/chinese_ner/infer.py b/PaddleNLP/unarchived/chinese_ner/infer.py index 4bcb9e0b..02aa9fb5 100644 --- a/PaddleNLP/unarchived/chinese_ner/infer.py +++ b/PaddleNLP/unarchived/chinese_ner/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import argparse import time diff --git a/PaddleNLP/unarchived/chinese_ner/reader.py b/PaddleNLP/unarchived/chinese_ner/reader.py index 9aa49c9f..c482d5e4 100644 --- a/PaddleNLP/unarchived/chinese_ner/reader.py +++ b/PaddleNLP/unarchived/chinese_ner/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os diff --git a/PaddleNLP/unarchived/chinese_ner/train.py b/PaddleNLP/unarchived/chinese_ner/train.py index 894260d3..5f0ddb8c 100644 --- a/PaddleNLP/unarchived/chinese_ner/train.py +++ b/PaddleNLP/unarchived/chinese_ner/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import math import time diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/model.py b/PaddleNLP/unarchived/deep_attention_matching_net/model.py index 05c20531..e7a531c7 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/model.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import numpy as np import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py b/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py index 00648626..3119f1bf 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import six import numpy as np diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py b/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py index 28d1c655..6a26be16 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import six import numpy as np diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py index 4983975a..b94c643b 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import six import numpy as np diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py index 350003c2..49dc98ad 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import six diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py index 530c6ba5..cc3f12b8 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py index b581acc1..f89a7c09 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import numpy as np @@ -190,7 +203,8 @@ def make_one_batch_input(data_batches, index): turns = np.array(data_batches["turns"][index]).astype('int64') tt_turns_len = np.array(data_batches["tt_turns_len"][index]).astype('int64') - every_turn_len = np.array(data_batches["every_turn_len"][index]).astype('int64') + every_turn_len = np.array(data_batches["every_turn_len"][index]).astype( + 'int64') response = np.array(data_batches["response"][index]).astype('int64') response_len = np.array(data_batches["response_len"][index]).astype('int64') diff --git a/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py b/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py index 9da8571f..f521a6f5 100644 --- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py +++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import six import os diff --git a/PaddleNLP/unarchived/language_model/gru/infer.py b/PaddleNLP/unarchived/language_model/gru/infer.py index e7595d0b..f10383af 100644 --- a/PaddleNLP/unarchived/language_model/gru/infer.py +++ b/PaddleNLP/unarchived/language_model/gru/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import math diff --git a/PaddleNLP/unarchived/language_model/gru/train.py b/PaddleNLP/unarchived/language_model/gru/train.py index 8f273106..3e618334 100644 --- a/PaddleNLP/unarchived/language_model/gru/train.py +++ b/PaddleNLP/unarchived/language_model/gru/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys import time diff --git a/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py b/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py index 9a912a1e..e5541ee4 100644 --- a/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py +++ b/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys import time diff --git a/PaddleNLP/unarchived/language_model/gru/utils.py b/PaddleNLP/unarchived/language_model/gru/utils.py index dd03a898..4535999d 100644 --- a/PaddleNLP/unarchived/language_model/gru/utils.py +++ b/PaddleNLP/unarchived/language_model/gru/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import numpy as np diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py b/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py index 4a74a9ba..6729b84d 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #!/usr/bin/python #-*- coding:utf-8 -*- diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py index a93c2835..38e56b5e 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import os, sys, json diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py index 022db4dd..83384482 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #coding=utf8 import sys diff --git a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py index fcb24756..5b102200 100644 --- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py +++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import json import pandas as pd diff --git a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py index eba1d5f3..aef110f5 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py +++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py index 823341ed..0be63dee 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. class TrainTaskConfig(object): # support both CPU and GPU now. use_gpu = True diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py index 4ba1852f..9c424692 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import multiprocessing diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py index 5b19be6a..f4e6506a 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from functools import partial import numpy as np diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py index 38ba3416..a4d034e9 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py index 0629f64c..d124947b 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import contextlib diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py index 0a846825..4869213a 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import glob import six import os diff --git a/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py b/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py index 3ae3955b..fa719c94 100644 --- a/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py +++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import argparse import ast import copy diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py index 9319aad8..44f547f0 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function import numpy as np diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py index 17ee1951..27471769 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py index 5050d0bf..aed2dd3d 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Conll03 dataset. """ diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py index 68e62137..64a34d26 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function import os diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py index f40f1bb1..21567c06 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #!/usr/bin/env python # -*- coding: utf-8 -*- import logging diff --git a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py index 03e7e62f..930b19ec 100644 --- a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py +++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import numpy as np import paddle.fluid as fluid diff --git a/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py b/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py index d11da00e..7a3cbb11 100644 --- a/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py +++ b/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import unittest import contextlib import paddle diff --git a/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py b/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py index e2ba9866..c75369ca 100644 --- a/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py +++ b/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import unittest import contextlib import paddle diff --git a/PaddleNLP/unarchived/text_classification/infer.py b/PaddleNLP/unarchived/text_classification/infer.py index a858b9a8..9990e1f4 100644 --- a/PaddleNLP/unarchived/text_classification/infer.py +++ b/PaddleNLP/unarchived/text_classification/infer.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import unittest diff --git a/PaddleNLP/unarchived/text_classification/nets.py b/PaddleNLP/unarchived/text_classification/nets.py index 4a7caad9..4dc83aa4 100644 --- a/PaddleNLP/unarchived/text_classification/nets.py +++ b/PaddleNLP/unarchived/text_classification/nets.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import sys import time import numpy as np diff --git a/PaddleNLP/unarchived/text_classification/train.py b/PaddleNLP/unarchived/text_classification/train.py index a6978a15..363e8e70 100644 --- a/PaddleNLP/unarchived/text_classification/train.py +++ b/PaddleNLP/unarchived/text_classification/train.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import six import sys diff --git a/PaddleNLP/unarchived/text_classification/utils.py b/PaddleNLP/unarchived/text_classification/utils.py index dce4743d..ecddc3cf 100644 --- a/PaddleNLP/unarchived/text_classification/utils.py +++ b/PaddleNLP/unarchived/text_classification/utils.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import os import sys import time diff --git a/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py b/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py index ccc37926..70c2ee06 100755 --- a/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py +++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py @@ -1,3 +1,16 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import print_function diff --git a/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py b/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py index 0d7b0bc1..7b60ec48 100644 --- a/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py +++ b/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py @@ -1 +1,14 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # Just for test `git push` diff --git a/PaddleNLP/unarchived/text_matching_on_quora/models/test.py b/PaddleNLP/unarchived/text_matching_on_quora/models/test.py index e69de29b..33ed0ecf 100644 --- a/PaddleNLP/unarchived/text_matching_on_quora/models/test.py +++ b/PaddleNLP/unarchived/text_matching_on_quora/models/test.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -- GitLab