未验证 提交 7e2a66b0 编写于 作者: L Li Fuchen 提交者: GitHub

add license for nlp models (#3390)

add license for nlp models
上级 88d125f2
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Deep Attention Matching Network
"""
......@@ -5,6 +18,7 @@ Deep Attention Matching Network
import argparse
import six
def parse_args():
"""
Deep Attention Matching Network Config
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Evaluation
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Layers
"""
......@@ -77,7 +90,7 @@ def dot_product_attention(query,
"""
logits = fluid.layers.matmul(
x=query, y=key, transpose_y=True, alpha=d_key ** (-0.5))
x=query, y=key, transpose_y=True, alpha=d_key**(-0.5))
if (q_mask is not None) and (k_mask is not None):
if mask_cache is not None and q_mask.name in mask_cache and k_mask.name in mask_cache[
......@@ -87,7 +100,7 @@ def dot_product_attention(query,
mask = fluid.layers.matmul(x=q_mask, y=k_mask, transpose_y=True)
another_mask = fluid.layers.scale(
mask,
scale=float(2 ** 32 - 1),
scale=float(2**32 - 1),
bias=float(-1),
bias_after_scale=False)
if mask_cache is not None:
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Deep Attention Matching Network
"""
......@@ -174,8 +187,7 @@ def train(args):
print("device count %d" % dev_count)
print("theoretical memory usage: ")
print(
fluid.contrib.memory_usage(
print(fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size))
exe = fluid.Executor(place)
......@@ -247,8 +259,7 @@ def train(args):
if (args.save_path is not None) and (step % save_step == 0):
save_path = os.path.join(args.save_path, "step_" + str(step))
print("Save model at step %d ... " % step)
print(
time.strftime('%Y-%m-%d %H:%M:%S',
print(time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(time.time())))
fluid.io.save_persistables(exe, save_path, train_program)
......@@ -294,8 +305,7 @@ def train(args):
save_path = os.path.join(args.save_path,
"step_" + str(step))
print("Save model at step %d ... " % step)
print(
time.strftime('%Y-%m-%d %H:%M:%S',
print(time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(time.time())))
fluid.io.save_persistables(exe, save_path, train_program)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Deep Attention Matching Network
"""
......@@ -12,6 +25,7 @@ class Net(object):
"""
Deep attention matching network
"""
def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
stack_num, channel1_num, channel2_num):
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Reader for deep attention matching network
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Utils
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/python
#-*- coding:utf-8 -*-
......@@ -25,9 +38,8 @@ def compute_paragraph_score(sample):
doc['segmented_paragraphs_scores'] = []
for p_idx, para_tokens in enumerate(doc['segmented_paragraphs']):
if len(question) > 0:
related_score = metric_max_over_ground_truths(f1_score,
para_tokens,
[question])
related_score = metric_max_over_ground_truths(
f1_score, para_tokens, [question])
else:
related_score = 0.0
doc['segmented_paragraphs_scores'].append(related_score)
......@@ -142,7 +154,8 @@ def paragraph_selection(sample, mode):
para_infos = []
for p_idx, (para_tokens, para_scores) in \
enumerate(zip(doc['segmented_paragraphs'], doc['segmented_paragraphs_scores'])):
para_infos.append((para_tokens, para_scores, len(para_tokens), p_idx))
para_infos.append(
(para_tokens, para_scores, len(para_tokens), p_idx))
para_infos.sort(key=lambda x: (-x[1], x[2]))
topN_idx = []
for para_info in para_infos[:topN]:
......@@ -168,7 +181,8 @@ def paragraph_selection(sample, mode):
incre_len += 1 + doc['paragraphs_length'][id]
if doc_id == d_idx and id == para_id:
incre_len += 1
total_segmented_content += [splitter] + doc['segmented_paragraphs'][id]
total_segmented_content += [splitter] + doc['segmented_paragraphs'][
id]
if doc_id == d_idx:
answer_start = incre_len + sample['answer_spans'][0][0]
answer_end = incre_len + sample['answer_spans'][0][1]
......@@ -191,9 +205,9 @@ if __name__ == "__main__":
try:
sample = json.loads(line, encoding='utf8')
except:
print >>sys.stderr, "Invalid input json format - '{}' will be ignored".format(line)
print >> sys.stderr, "Invalid input json format - '{}' will be ignored".format(
line)
continue
compute_paragraph_score(sample)
paragraph_selection(sample, mode)
print(json.dumps(sample, encoding='utf8', ensure_ascii=False))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#coding=utf8
import os, sys, json
import nltk
def _nltk_tokenize(sequence):
tokens = nltk.word_tokenize(sequence)
......@@ -11,10 +25,12 @@ def _nltk_tokenize(sequence):
token_words = []
for token in tokens:
cur_char_offset = sequence.find(token, cur_char_offset)
token_offsets.append([cur_char_offset, cur_char_offset + len(token) - 1])
token_offsets.append(
[cur_char_offset, cur_char_offset + len(token) - 1])
token_words.append(token)
return token_offsets, token_words
def segment(input_js):
_, input_js['segmented_question'] = _nltk_tokenize(input_js['question'])
for doc_id, doc in enumerate(input_js['documents']):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#coding=utf8
import sys
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import json
import pandas as pd
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class TrainTaskConfig(object):
# support both CPU and GPU now.
use_gpu = True
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The placeholder for batch_size in compile time. Must be -1 currently to be
# consistent with some ops' infer-shape output in compile time, such as the
# sequence_expand op used in beamsearch decoder.
......@@ -65,43 +78,37 @@ input_descs = {
# Names of word embedding table which might be reused for weight sharing.
word_emb_param_names = (
"src_word_emb_table",
"trg_word_emb_table",
)
"trg_word_emb_table", )
phone_emb_param_name = "phone_emb_table"
# Names of position encoding table which will be initialized externally.
pos_enc_param_names = (
"src_pos_enc_table",
"trg_pos_enc_table",
)
"trg_pos_enc_table", )
# separated inputs for different usages.
encoder_data_input_fields = (
"src_word",
"src_pos",
"src_slf_attn_bias",
"src_phone",
"src_phone_mask",
)
"src_phone_mask", )
decoder_data_input_fields = (
"trg_word",
"trg_pos",
"trg_slf_attn_bias",
"trg_src_attn_bias",
"enc_output",
)
"enc_output", )
label_data_input_fields = (
"lbl_word",
"lbl_weight",
)
"lbl_weight", )
# In fast decoder, trg_pos (only containing the current time step) is generated
# by ops and trg_slf_attn_bias is not needed.
fast_decoder_data_input_fields = (
"trg_word",
"init_score",
"init_idx",
"trg_src_attn_bias",
)
"trg_src_attn_bias", )
# Set seed for CE
dropout_seed = None
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import multiprocessing
......@@ -86,10 +99,8 @@ def parse_args():
trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath)
phone_dict = reader.DataReader.load_dict(args.phoneme_vocab_fpath)
dict_args = [
"src_vocab_size",
str(len(src_dict)), "trg_vocab_size",
str(len(trg_dict)), "phone_vocab_size",
str(len(phone_dict)), "bos_idx",
"src_vocab_size", str(len(src_dict)), "trg_vocab_size",
str(len(trg_dict)), "phone_vocab_size", str(len(phone_dict)), "bos_idx",
str(src_dict[args.special_token[0]]), "eos_idx",
str(src_dict[args.special_token[1]]), "unk_idx",
str(src_dict[args.special_token[2]])
......@@ -147,10 +158,10 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, phone_pad_idx,
# beamsearch_op must use tensors with lod
init_score = to_lodtensor(
np.zeros_like(trg_word, dtype="float32").reshape(-1, 1), place,
[range(trg_word.shape[0] + 1)] * 2)
trg_word = to_lodtensor(trg_word, place,
[range(trg_word.shape[0] + 1)] * 2)
np.zeros_like(
trg_word, dtype="float32").reshape(-1, 1),
place, [range(trg_word.shape[0] + 1)] * 2)
trg_word = to_lodtensor(trg_word, place, [range(trg_word.shape[0] + 1)] * 2)
init_idx = np.asarray(range(len(insts)), dtype="int32")
data_input_dict = dict(
......@@ -315,7 +326,8 @@ def fast_infer(args):
sub_start = seq_ids.lod()[1][start + j]
sub_end = seq_ids.lod()[1][start + j + 1]
hyps[i].append(" ".join([
trg_idx2word[idx] for idx in post_process_seq(
trg_idx2word[idx]
for idx in post_process_seq(
np.array(seq_ids)[sub_start:sub_end])
]))
scores[i].append(np.array(seq_scores)[sub_end - 1])
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import numpy as np
......@@ -51,12 +64,12 @@ def position_encoding_init(n_position, d_pos_vec):
channels = d_pos_vec
position = np.arange(n_position)
num_timescales = channels // 2
log_timescale_increment = (
np.log(float(1e4) / float(1)) / (num_timescales - 1))
inv_timescales = np.exp(
np.arange(num_timescales)) * -log_timescale_increment
scaled_time = np.expand_dims(position, 1) * np.expand_dims(
inv_timescales, 0)
log_timescale_increment = (np.log(float(1e4) / float(1)) /
(num_timescales - 1))
inv_timescales = np.exp(np.arange(
num_timescales)) * -log_timescale_increment
scaled_time = np.expand_dims(position, 1) * np.expand_dims(inv_timescales,
0)
signal = np.concatenate([np.sin(scaled_time), np.cos(scaled_time)], axis=1)
signal = np.pad(signal, [[0, 0], [0, np.mod(channels, 2)]], 'constant')
position_enc = signal
......@@ -91,17 +104,15 @@ def multi_head_attention(queries,
"""
Add linear projection to queries, keys, and values.
"""
q = layers.fc(
input=queries,
q = layers.fc(input=queries,
size=d_key * n_head,
bias_attr=False,
num_flatten_dims=2)
# For encoder-decoder attention in inference, insert the ops and vars
# into global block to use as cache among beam search.
fc_layer = wrap_layer_with_block(
layers.fc,
fluid.default_main_program().current_block().
parent_idx) if cache is not None and static_kv else layers.fc
layers.fc, fluid.default_main_program().current_block(
).parent_idx) if cache is not None and static_kv else layers.fc
k = fc_layer(
input=keys,
size=d_key * n_head,
......@@ -132,12 +143,12 @@ def multi_head_attention(queries,
# into global block to use as cache among beam search.
reshape_layer = wrap_layer_with_block(
layers.reshape,
fluid.default_main_program().current_block().
parent_idx) if cache is not None and static_kv else layers.reshape
fluid.default_main_program().current_block(
).parent_idx) if cache is not None and static_kv else layers.reshape
transpose_layer = wrap_layer_with_block(
layers.transpose,
fluid.default_main_program().current_block().parent_idx
) if cache is not None and static_kv else layers.transpose
fluid.default_main_program().current_block().
parent_idx) if cache is not None and static_kv else layers.transpose
reshaped_k = reshape_layer(
x=keys, shape=[0, 0, n_head, d_key], inplace=True)
k = transpose_layer(x=reshaped_k, perm=[0, 2, 1, 3])
......@@ -214,8 +225,10 @@ def multi_head_attention(queries,
out = __combine_heads(ctx_multiheads)
# Project back to the model size.
proj_out = layers.fc(
input=out, size=d_model, bias_attr=False, num_flatten_dims=2)
proj_out = layers.fc(input=out,
size=d_model,
bias_attr=False,
num_flatten_dims=2)
return proj_out
......@@ -225,14 +238,13 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid, dropout_rate):
This module consists of two linear transformations with a ReLU activation
in between, which is applied to each position separately and identically.
"""
hidden = layers.fc(
input=x, size=d_inner_hid, num_flatten_dims=2, act="relu")
hidden = layers.fc(input=x,
size=d_inner_hid,
num_flatten_dims=2,
act="relu")
if dropout_rate:
hidden = layers.dropout(
hidden,
dropout_prob=dropout_rate,
seed=dropout_seed,
is_test=False)
hidden, dropout_prob=dropout_rate, seed=dropout_seed, is_test=False)
out = layers.fc(input=hidden, size=d_hid, num_flatten_dims=2)
return out
......@@ -313,8 +325,7 @@ def prepare_encoder(src_word,
param_attr=fluid.ParamAttr(
name=pos_enc_param_names[0], trainable=False))
src_pos_enc.stop_gradient = True
enc_input = (
1 - beta) * src_word_emb + beta * mean_phone_emb + src_pos_enc
enc_input = (1 - beta) * src_word_emb + beta * mean_phone_emb + src_pos_enc
return layers.dropout(
enc_input, dropout_prob=dropout_rate, seed=dropout_seed,
is_test=False) if dropout_rate else enc_input
......@@ -374,8 +385,8 @@ def encoder_layer(enc_input,
"""
attn_output = multi_head_attention(
pre_process_layer(enc_input, preprocess_cmd,
prepostprocess_dropout), None, None, attn_bias,
d_key, d_value, d_model, n_head, attention_dropout)
prepostprocess_dropout), None, None, attn_bias, d_key,
d_value, d_model, n_head, attention_dropout)
attn_output = post_process_layer(enc_input, attn_output, postprocess_cmd,
prepostprocess_dropout)
ffd_output = positionwise_feed_forward(
......@@ -415,8 +426,7 @@ def encoder(enc_input,
attention_dropout,
relu_dropout,
preprocess_cmd,
postprocess_cmd,
)
postprocess_cmd, )
enc_input = enc_output
enc_output = pre_process_layer(enc_output, preprocess_cmd,
prepostprocess_dropout)
......@@ -459,8 +469,7 @@ def decoder_layer(dec_input,
dec_input,
slf_attn_output,
postprocess_cmd,
prepostprocess_dropout,
)
prepostprocess_dropout, )
enc_attn_output = multi_head_attention(
pre_process_layer(slf_attn_output, preprocess_cmd,
prepostprocess_dropout),
......@@ -479,21 +488,18 @@ def decoder_layer(dec_input,
slf_attn_output,
enc_attn_output,
postprocess_cmd,
prepostprocess_dropout,
)
prepostprocess_dropout, )
ffd_output = positionwise_feed_forward(
pre_process_layer(enc_attn_output, preprocess_cmd,
prepostprocess_dropout),
d_inner_hid,
d_model,
relu_dropout,
)
relu_dropout, )
dec_output = post_process_layer(
enc_attn_output,
ffd_output,
postprocess_cmd,
prepostprocess_dropout,
)
prepostprocess_dropout, )
return dec_output
......@@ -632,8 +638,7 @@ def transformer(src_vocab_size,
postprocess_cmd,
weight_sharing,
beta,
enc_inputs,
)
enc_inputs, )
predict = wrap_decoder(
trg_vocab_size,
......@@ -651,14 +656,14 @@ def transformer(src_vocab_size,
postprocess_cmd,
weight_sharing,
dec_inputs,
enc_output,
)
enc_output, )
# Padding index do not contribute to the total loss. The weights is used to
# cancel padding index in calculating the loss.
if label_smooth_eps:
label = layers.label_smooth(
label=layers.one_hot(input=label, depth=trg_vocab_size),
label=layers.one_hot(
input=label, depth=trg_vocab_size),
epsilon=label_smooth_eps)
cost = layers.softmax_with_cross_entropy(
......@@ -730,8 +735,7 @@ def wrap_encoder(src_vocab_size,
attention_dropout,
relu_dropout,
preprocess_cmd,
postprocess_cmd,
)
postprocess_cmd, )
return enc_output
......@@ -803,8 +807,9 @@ def wrap_decoder(trg_vocab_size,
word_emb_param_names[0]),
transpose_y=True)
else:
predict = layers.fc(
input=dec_output, size=trg_vocab_size, bias_attr=False)
predict = layers.fc(input=dec_output,
size=trg_vocab_size,
bias_attr=False)
if dec_inputs is None:
# Return probs for independent decoder program.
predict = layers.softmax(predict)
......@@ -879,8 +884,7 @@ def fast_decode(src_vocab_size,
force_cpu=True)
step_idx = layers.fill_constant(
shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True)
cond = layers.less_than(
x=step_idx, y=max_len) # default force_cpu=True
cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True
while_op = layers.While(cond)
# array states will be stored for each step.
ids = layers.array_write(
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import six
import os
......@@ -302,9 +315,8 @@ class DataReader(object):
f = tarfile.open(fpaths[0], "r")
for line in f.extractfile(tar_fname):
fields = line.strip("\n").split(self._field_delimiter)
if (not self._only_src
and len(fields) == 2) or (self._only_src
and len(fields) == 1):
if (not self._only_src and len(fields) == 2) or (
self._only_src and len(fields) == 1):
yield fields
else:
for fpath in fpaths:
......@@ -381,5 +393,5 @@ class DataReader(object):
for idx in batch_ids]
else:
yield [(self._src_seq_ids[idx], self._src_phone_ids[idx],
self._trg_seq_ids[idx][:-1],
self._trg_seq_ids[idx][1:]) for idx in batch_ids]
self._trg_seq_ids[idx][:-1], self._trg_seq_ids[idx][1:])
for idx in batch_ids]
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import copy
......@@ -141,10 +154,8 @@ def parse_args():
trg_dict = reader.DataReader.load_dict(args.trg_vocab_fpath)
phone_dict = reader.DataReader.load_dict(args.phoneme_vocab_fpath)
dict_args = [
"src_vocab_size",
str(len(src_dict)), "trg_vocab_size",
str(len(trg_dict)), "phone_vocab_size",
str(len(phone_dict)), "bos_idx",
"src_vocab_size", str(len(src_dict)), "trg_vocab_size",
str(len(trg_dict)), "phone_vocab_size", str(len(phone_dict)), "bos_idx",
str(src_dict[args.special_token[0]]), "eos_idx",
str(src_dict[args.special_token[1]]), "unk_idx",
str(src_dict[args.special_token[2]])
......@@ -157,8 +168,8 @@ def parse_args():
def append_nccl2_prepare(startup_prog, trainer_id, worker_endpoints,
current_endpoint):
assert (trainer_id >= 0 and len(worker_endpoints) > 1
and current_endpoint in worker_endpoints)
assert (trainer_id >= 0 and len(worker_endpoints) > 1 and
current_endpoint in worker_endpoints)
eps = copy.deepcopy(worker_endpoints)
eps.remove(current_endpoint)
nccl_id_var = startup_prog.global_block().create_var(
......@@ -189,8 +200,8 @@ def pad_phoneme_data(phoneme_seqs, pad_idx, max_seq_len):
batch_size = len(phoneme_seqs)
phoneme_data = pad_idx * np.ones(
(batch_size, max_seq_len, max_ph_seq_len), dtype=np.int64)
phoneme_mask = np.zeros((batch_size, max_seq_len, max_ph_seq_len),
dtype=np.int64)
phoneme_mask = np.zeros(
(batch_size, max_seq_len, max_ph_seq_len), dtype=np.int64)
for i in range(batch_size):
cur_ph_seq = phoneme_seqs[i]
......@@ -237,16 +248,15 @@ def pad_batch_data(insts,
if is_target:
# This is used to avoid attention on paddings and subsequent
# words.
slf_attn_bias_data = np.ones((inst_data.shape[0], max_len,
max_len))
slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, max_len))
slf_attn_bias_data = np.triu(slf_attn_bias_data,
1).reshape([-1, 1, max_len, max_len])
slf_attn_bias_data = np.tile(slf_attn_bias_data,
[1, n_head, 1, 1]) * [-1e9]
else:
# This is used to avoid attention on paddings.
slf_attn_bias_data = np.array(
[[0] * len(inst) + [-1e9] * (max_len - len(inst))
slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] *
(max_len - len(inst))
for inst in insts])
slf_attn_bias_data = np.tile(
slf_attn_bias_data.reshape([-1, 1, 1, max_len]),
......@@ -359,8 +369,8 @@ def prepare_data_generator(args,
for item in data_reader():
inst_num_per_part = len(item) // count
for i in range(count):
yield item[inst_num_per_part * i:inst_num_per_part *
(i + 1)]
yield item[inst_num_per_part * i:inst_num_per_part * (i + 1
)]
return __impl__
......@@ -401,8 +411,8 @@ def prepare_feed_dict_list(data_generator, init_flag, count):
feed_dict_list.append(pos_enc_tables)
else:
feed_dict_list[idx] = dict(
list(pos_enc_tables.items()) +
list(feed_dict_list[idx].items()))
list(pos_enc_tables.items()) + list(feed_dict_list[idx]
.items()))
return feed_dict_list if len(feed_dict_list) == count else None
......@@ -487,10 +497,9 @@ def test_context(exe, train_exe, dev_count):
data_generator = test_data()
while True:
try:
feed_dict_list = prepare_feed_dict_list(
data_generator, False, dev_count)
outs = test_exe.run(
fetch_list=[sum_cost.name, token_num.name],
feed_dict_list = prepare_feed_dict_list(data_generator, False,
dev_count)
outs = test_exe.run(fetch_list=[sum_cost.name, token_num.name],
feed=feed_dict_list)
except (StopIteration, fluid.core.EOFException):
# The current pass is over.
......@@ -562,10 +571,10 @@ def train_loop(exe,
# the best cross-entropy value with label smoothing
loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log(
(1. - TrainTaskConfig.label_smooth_eps)) +
TrainTaskConfig.label_smooth_eps *
np.log(TrainTaskConfig.label_smooth_eps /
(ModelHyperParams.trg_vocab_size - 1) + 1e-20))
(1. - TrainTaskConfig.label_smooth_eps
)) + TrainTaskConfig.label_smooth_eps *
np.log(TrainTaskConfig.label_smooth_eps / (
ModelHyperParams.trg_vocab_size - 1) + 1e-20))
step_idx = 0
init_flag = True
......@@ -583,8 +592,8 @@ def train_loop(exe,
batch_id = 0
while True:
try:
feed_dict_list = prepare_feed_dict_list(
data_generator, init_flag, dev_count)
feed_dict_list = prepare_feed_dict_list(data_generator,
init_flag, dev_count)
outs = train_exe.run(
fetch_list=[sum_cost.name, token_num.name]
if step_idx % args.fetch_steps == 0 else [],
......@@ -609,12 +618,11 @@ def train_loop(exe,
else:
logging.info(
"step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
"normalized loss: %f, ppl: %f, speed: %.2f step/s"
% (step_idx, pass_id, batch_id, total_avg_cost,
total_avg_cost - loss_normalizer,
np.exp([min(total_avg_cost, 100)
]), args.fetch_steps /
(time.time() - avg_batch_time)))
"normalized loss: %f, ppl: %f, speed: %.2f step/s" %
(step_idx, pass_id, batch_id, total_avg_cost,
total_avg_cost - loss_normalizer, np.exp(
[min(total_avg_cost, 100)]),
args.fetch_steps / (time.time() - avg_batch_time)))
avg_batch_time = time.time()
if step_idx % TrainTaskConfig.save_freq == 0 and step_idx > 0:
......@@ -643,8 +651,9 @@ def train_loop(exe,
val_avg_cost, val_ppl = test()
logging.info(
"epoch: %d, val avg loss: %f, val normalized loss: %f, val ppl: %f,"
" consumed %fs" % (pass_id, val_avg_cost, val_avg_cost -
loss_normalizer, val_ppl, time_consumed))
" consumed %fs" % (pass_id, val_avg_cost,
val_avg_cost - loss_normalizer, val_ppl,
time_consumed))
else:
logging.info("epoch: %d, consumed %fs" % (pass_id, time_consumed))
if not args.enable_ce:
......@@ -734,8 +743,8 @@ def train(args):
if args.local:
logging.info("local start_up:")
train_loop(exe, train_prog, startup_prog, dev_count, sum_cost,
avg_cost, token_num, predict, pyreader)
train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, avg_cost,
token_num, predict, pyreader)
else:
if args.update_method == "nccl2":
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
......
import os
filePath = os.getcwd()
def get_all_files(dir):
fileDirList = []
for root, dirs, files in os.walk(dir):
for file in files:
file_path = os.path.join(root, file)
fileDirList.append(file_path)
for dir in dirs:
dir_path = os.path.join(root, dir)
get_all_files(dir_path)
return fileDirList
fileDirList = get_all_files(filePath)
for code in fileDirList:
split = os.path.splitext(code)
if (split[1] == '.py' and not '__init__' in split[0] and
not '_ce' in split[0]):
with open(code, 'r') as fz:
content = fz.read()
if content.find('Copyright') >= 0:
fz.close()
continue
else:
string = "# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.\n" \
"#\n" \
"# Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
"# you may not use this file except in compliance with the License.\n" \
"# You may obtain a copy of the License at\n" \
"#\n" \
"# http://www.apache.org/licenses/LICENSE-2.0\n" \
"#\n" \
"# Unless required by applicable law or agreed to in writing, software\n" \
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
"# See the License for the specific language governing permissions and\n" \
"# limitations under the License.\n"+content
fz.close()
with open(code, 'w') as f:
f.write(string)
print "file %s write success!" % code
f.close()
print "read and write success!"
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
EmoTect config
"""
......@@ -9,10 +22,12 @@ from __future__ import print_function
import six
import json
class EmoTectConfig(object):
"""
EmoTect Config
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
......@@ -21,7 +36,8 @@ class EmoTectConfig(object):
with open(config_path) as json_file:
config_dict = json.load(json_file)
except Exception:
raise IOError("Error in parsing emotect model config file '%s'" % config_path)
raise IOError("Error in parsing emotect model config file '%s'" %
config_path)
else:
return config_dict
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
EmoTect Reader, data converters for classification data.
"""
......@@ -10,14 +23,13 @@ import numpy as np
from utils import load_vocab
from utils import data_reader
class EmoTectProcessor(object):
"""
Processor class for data convertors for EmoTect.
"""
def __init__(self,
data_dir,
vocab_path,
random_seed=None):
def __init__(self, data_dir, vocab_path, random_seed=None):
self.data_dir = data_dir
self.vocab = load_vocab(vocab_path)
self.num_examples = {"train": -1, "dev": -1, "test": -1, "infer": -1}
......@@ -27,29 +39,33 @@ class EmoTectProcessor(object):
"""
Load training examples
"""
return data_reader(os.path.join(self.data_dir, "train.tsv"),
self.vocab, self.num_examples, "train", epoch)
return data_reader(
os.path.join(self.data_dir, "train.tsv"), self.vocab,
self.num_examples, "train", epoch)
def get_dev_examples(self, data_dir):
"""
Load dev examples
"""
return data_reader(os.path.join(self.data_dir, "dev.tsv"),
self.vocab, self.num_examples, "dev")
return data_reader(
os.path.join(self.data_dir, "dev.tsv"), self.vocab,
self.num_examples, "dev")
def get_test_examples(self, data_dir):
"""
Load test examples
"""
return data_reader(os.path.join(self.data_dir, "test.tsv"),
self.vocab, self.num_examples, "test")
return data_reader(
os.path.join(self.data_dir, "test.tsv"), self.vocab,
self.num_examples, "test")
def get_infer_examples(self, data_dir):
"""
Load infer querys
"""
return data_reader(os.path.join(self.data_dir, "infer.tsv"),
self.vocab, self.num_examples, "infer")
return data_reader(
os.path.join(self.data_dir, "infer.tsv"), self.vocab,
self.num_examples, "infer")
def get_labels(self):
"""
......@@ -63,7 +79,8 @@ class EmoTectProcessor(object):
"""
if phase not in ['train', 'dev', 'test', 'infer']:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'test', 'infer'].")
"Unknown phase, which should be in ['train', 'dev', 'test', 'infer']."
)
return self.num_examples[phase]
def get_train_progress(self):
......@@ -77,14 +94,18 @@ class EmoTectProcessor(object):
Generate data for train, dev or test
"""
if phase == "train":
return paddle.batch(self.get_train_examples(self.data_dir, epoch), batch_size)
return paddle.batch(
self.get_train_examples(self.data_dir, epoch), batch_size)
elif phase == "dev":
return paddle.batch(self.get_dev_examples(self.data_dir), batch_size)
return paddle.batch(
self.get_dev_examples(self.data_dir), batch_size)
elif phase == "test":
return paddle.batch(self.get_test_examples(self.data_dir), batch_size)
return paddle.batch(
self.get_test_examples(self.data_dir), batch_size)
elif phase == "infer":
return paddle.batch(self.get_infer_examples(self.data_dir), batch_size)
return paddle.batch(
self.get_infer_examples(self.data_dir), batch_size)
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'test', 'infer'].")
"Unknown phase, which should be in ['train', 'dev', 'test', 'infer']."
)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Emotion Detection Task
"""
......@@ -25,37 +38,48 @@ import utils
parser = argparse.ArgumentParser(__doc__)
model_g = utils.ArgumentGroup(parser, "model", "model configuration and paths.")
model_g.add_arg("config_path", str, None, "Path to the json file for EmoTect model config.")
model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.")
model_g.add_arg("config_path", str, None,
"Path to the json file for EmoTect model config.")
model_g.add_arg("init_checkpoint", str, None,
"Init checkpoint to resume training from.")
model_g.add_arg("output_dir", str, None, "Directory path to save checkpoints")
train_g = utils.ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 10, "Number of epoches for training.")
train_g.add_arg("save_steps", int, 10000, "The steps interval to save checkpoints.")
train_g.add_arg("validation_steps", int, 1000, "The steps interval to evaluate model performance.")
train_g.add_arg("save_steps", int, 10000,
"The steps interval to save checkpoints.")
train_g.add_arg("validation_steps", int, 1000,
"The steps interval to evaluate model performance.")
train_g.add_arg("lr", float, 0.002, "The Learning rate value for training.")
log_g = utils.ArgumentGroup(parser, "logging", "logging related")
log_g.add_arg("skip_steps", int, 10, "The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log")
data_g = utils.ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options")
data_g = utils.ArgumentGroup(
parser, "data", "Data paths, vocab paths and data processing options")
data_g.add_arg("data_dir", str, None, "Directory path to training data.")
data_g.add_arg("vocab_path", str, None, "Vocabulary path.")
data_g.add_arg("batch_size", int, 256, "Total examples' number in batch for training.")
data_g.add_arg("batch_size", int, 256,
"Total examples' number in batch for training.")
data_g.add_arg("random_seed", int, 0, "Random seed.")
run_type_g = utils.ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, False, "If set, use GPU for training.")
run_type_g.add_arg("task_name", str, None, "The name of task to perform sentiment classification.")
run_type_g.add_arg("task_name", str, None,
"The name of task to perform sentiment classification.")
run_type_g.add_arg("do_train", bool, False, "Whether to perform training.")
run_type_g.add_arg("do_val", bool, False, "Whether to perform evaluation.")
run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run the task with continuous evaluation logs.')
args = parser.parse_args()
def create_model(args,
pyreader_name,
emotect_config,
......@@ -98,11 +122,17 @@ def create_model(args,
if is_infer:
data = fluid.layers.read_file(pyreader)
probs = network(data, None, emotect_config["vocab_size"], class_dim=num_labels, is_infer=True)
probs = network(
data,
None,
emotect_config["vocab_size"],
class_dim=num_labels,
is_infer=True)
return pyreader, probs
data, label = fluid.layers.read_file(pyreader)
avg_loss, probs = network(data, label, emotect_config["vocab_size"], class_dim=num_labels)
avg_loss, probs = network(
data, label, emotect_config["vocab_size"], class_dim=num_labels)
num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs)
return pyreader, avg_loss, accuracy, num_seqs
......@@ -144,7 +174,8 @@ def infer(exe, infer_program, infer_pyreader, fetch_list, infer_phase):
fetch_list=fetch_list,
return_numpy=True)
for probs in batch_probs[0]:
print("%d\t%f\t%f\t%f" % (np.argmax(probs), probs[0], probs[1], probs[2]))
print("%d\t%f\t%f\t%f" %
(np.argmax(probs), probs[0], probs[1], probs[2]))
except fluid.core.EOFException as e:
infer_pyreader.reset()
break
......@@ -165,7 +196,8 @@ def main(args):
exe = fluid.Executor(place)
task_name = args.task_name.lower()
processor = reader.EmoTectProcessor(data_dir=args.data_dir,
processor = reader.EmoTectProcessor(
data_dir=args.data_dir,
vocab_path=args.vocab_path,
random_seed=args.random_seed)
num_labels = len(processor.get_labels())
......@@ -180,9 +212,7 @@ def main(args):
if args.do_train:
train_data_generator = processor.data_generator(
batch_size=args.batch_size,
phase='train',
epoch=args.epoch)
batch_size=args.batch_size, phase='train', epoch=args.epoch)
num_train_examples = processor.get_num_examples(phase="train")
max_train_steps = args.epoch * num_train_examples // args.batch_size + 1
......@@ -241,17 +271,12 @@ def main(args):
if args.do_train:
if args.init_checkpoint:
utils.init_checkpoint(
exe,
args.init_checkpoint,
main_program=startup_prog)
exe, args.init_checkpoint, main_program=startup_prog)
elif args.do_val or args.do_infer:
if not args.init_checkpoint:
raise ValueError("args 'init_checkpoint' should be set if"
"only doing validation or infer!")
utils.init_checkpoint(
exe,
args.init_checkpoint,
main_program=test_prog)
utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog)
if args.do_train:
train_exe = exe
......@@ -288,7 +313,8 @@ def main(args):
total_num_seqs.extend(np_num_seqs)
if args.verbose:
verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size()
verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
)
print(verbose)
time_end = time.time()
......@@ -298,12 +324,16 @@ def main(args):
(steps, np.sum(total_cost) / np.sum(total_num_seqs),
np.sum(total_acc) / np.sum(total_num_seqs),
args.skip_steps / used_time))
ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time])
ce_info.append([
np.sum(total_cost) / np.sum(total_num_seqs),
np.sum(total_acc) / np.sum(total_num_seqs), used_time
])
total_cost, total_acc, total_num_seqs = [], [], []
time_begin = time.time()
if steps % args.save_steps == 0:
save_path = os.path.join(args.output_dir, "step_" + str(steps))
save_path = os.path.join(args.output_dir,
"step_" + str(steps))
fluid.io.save_persistables(exe, save_path, train_program)
if steps % args.validation_steps == 0:
......@@ -337,32 +367,24 @@ def main(args):
print("ce info error")
print("kpis\teach_step_duration_%s_card%s\t%s" %
(task_name, card_num, ce_time))
print("kpis\ttrain_loss_%s_card%s\t%f" %
(task_name, card_num, ce_loss))
print("kpis\ttrain_acc_%s_card%s\t%f" %
(task_name, card_num, ce_acc))
print("kpis\ttrain_loss_%s_card%s\t%f" % (task_name, card_num, ce_loss))
print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc))
# evaluate on test set
if not args.do_train and args.do_val:
test_pyreader.decorate_paddle_reader(
processor.data_generator(
batch_size=args.batch_size,
phase='test',
epoch=1))
batch_size=args.batch_size, phase='test', epoch=1))
print("Final test result:")
evaluate(test_exe, test_prog, test_pyreader,
[loss.name, accuracy.name, num_seqs.name],
"test")
[loss.name, accuracy.name, num_seqs.name], "test")
# infer
if args.do_infer:
infer_pyreader.decorate_paddle_reader(
processor.data_generator(
batch_size=args.batch_size,
phase='infer',
epoch=1))
infer(test_exe, test_prog, infer_pyreader,
[probs.name], "infer")
batch_size=args.batch_size, phase='infer', epoch=1))
infer(test_exe, test_prog, infer_pyreader, [probs.name], "infer")
def get_cards():
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Emotion Detection Task, based on ERNIE
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
EmoTect utilities.
"""
......@@ -16,6 +29,7 @@ import paddle
import paddle.fluid as fluid
import numpy as np
def str2bool(value):
"""
String to Boolean
......@@ -29,6 +43,7 @@ class ArgumentGroup(object):
"""
Argument Class
"""
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
......@@ -92,27 +107,33 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch=1):
cols = line.strip().split("\t")
if len(cols) != 1:
query = cols[-1]
wids = [word_dict[x] if x in word_dict else unk_id
for x in query.strip().split(" ")]
all_data.append((wids,))
wids = [
word_dict[x] if x in word_dict else unk_id
for x in query.strip().split(" ")
]
all_data.append((wids, ))
else:
cols = line.strip().split("\t")
if len(cols) != 2:
sys.stderr.write("[NOTICE] Error Format Line!")
continue
label = int(cols[0])
wids = [word_dict[x] if x in word_dict else unk_id
for x in cols[1].split(" ")]
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[1].split(" ")
]
all_data.append((wids, label))
num_examples[phrase] = len(all_data)
if phrase == "infer":
def reader():
"""
Infer reader function
"""
for wids in all_data:
yield wids
return reader
def reader():
......@@ -124,6 +145,7 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch=1):
random.shuffle(all_data)
for wids, label in all_data:
yield wids, label
return reader
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The function lex_net(args) define the lexical analysis network structure
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#coding: utf-8
"""
The file_reader converts raw corpus to input.
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file is used to train the model.
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#coding=utf-8
"""
evaluate wordseg for LAC and other open-source wordseg tools
......@@ -20,7 +33,7 @@ def to_unicode(string):
def to_set(words):
""" cut list to set of (string, off) """
off = 0
s= set()
s = set()
for w in words:
if w:
s.add((off, w))
......@@ -161,7 +174,8 @@ def get_hanlp_result(sentences):
preds = []
for sentence in sentences:
arraylist = HanLP.segment(sentence)
sent_seg = " ".join([term.toString().split("/")[0] for term in arraylist])
sent_seg = " ".join(
[term.toString().split("/")[0] for term in arraylist])
sent_seg = to_unicode(sent_seg)
preds.append(sent_seg)
return preds
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The file_reader converts raw corpus to input.
"""
......@@ -9,7 +22,10 @@ import glob
def load_kv_dict(dict_path,
reverse=False, delimiter="\t", key_func=None, value_func=None):
reverse=False,
delimiter="\t",
key_func=None,
value_func=None):
"""
Load key-value dict from file
"""
......@@ -34,11 +50,14 @@ def load_kv_dict(dict_path,
class Dataset(object):
"""data reader"""
def __init__(self, args, mode="train"):
# read dict
self.word2id_dict = load_kv_dict(args.word_dict_path, reverse=True, value_func=int)
self.word2id_dict = load_kv_dict(
args.word_dict_path, reverse=True, value_func=int)
self.id2word_dict = load_kv_dict(args.word_dict_path)
self.label2id_dict = load_kv_dict(args.label_dict_path, reverse=True, value_func=int)
self.label2id_dict = load_kv_dict(
args.label_dict_path, reverse=True, value_func=int)
self.id2label_dict = load_kv_dict(args.label_dict_path)
self.word_replace_dict = load_kv_dict(args.word_rep_dict_path)
......@@ -78,12 +97,12 @@ class Dataset(object):
label_ids.append(label_id)
return label_ids
def file_reader(self, filename, max_seq_len=64, mode="train"):
"""
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
"""
def wrapper():
fread = io.open(filename, "r", encoding="utf-8")
headline = next(fread)
......@@ -93,9 +112,11 @@ class Dataset(object):
for line in fread:
words = line.strip("\n").split("\002")
word_ids = self.word_to_ids(words)
yield word_ids[0:max_seq_len], [0 for _ in word_ids][0: max_seq_len]
yield word_ids[0:max_seq_len], [0 for _ in word_ids][
0:max_seq_len]
else:
assert len(headline) == 2 and headline[0] == "text_a" and headline[1] == "label"
assert len(headline) == 2 and headline[
0] == "text_a" and headline[1] == "label"
for line in fread:
words, labels = line.strip("\n").split("\t")
word_ids = self.word_to_ids(words.split("\002"))
......@@ -109,9 +130,21 @@ class Dataset(object):
if __name__ == "__main__":
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--word_dict_path", type=str, default="./conf/word.dic", help="word dict")
parser.add_argument("--label_dict_path", type=str, default="./conf/tag.dic", help="label dict")
parser.add_argument("--word_rep_dict_path", type=str, default="./conf/q2b.dic", help="word replace dict")
parser.add_argument(
"--word_dict_path",
type=str,
default="./conf/word.dic",
help="word dict")
parser.add_argument(
"--label_dict_path",
type=str,
default="./conf/tag.dic",
help="label dict")
parser.add_argument(
"--word_rep_dict_path",
type=str,
default="./conf/q2b.dic",
help="word replace dict")
args = parser.parse_args()
dataset = Dataset(args)
data_generator = dataset.file_reader("data/train.tsv")
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Sentiment Classification Task
"""
......@@ -28,7 +41,6 @@ from models.representation.ernie import ernie_encoder
from models.sequence_labeling import nets
import utils
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
model_g = utils.ArgumentGroup(parser, "model", "model configuration and paths.")
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Baidu's open-source Lexical Analysis tool for Chinese, including:
1. Word Segmentation,
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
util tools
"""
......@@ -19,6 +32,7 @@ class ArgumentGroup(object):
"""
Put arguments to one group
"""
def __init__(self, parser, title, des):
"""none"""
self._group = parser.add_argument_group(title=title, description=des)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provide nets for text classification
"""
import paddle.fluid as fluid
def bow_net(data,
label,
dict_dim,
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
bow class
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
cnn class
"""
......@@ -30,8 +43,8 @@ class CNN(object):
left_emb = emb_layer.ops(left)
right_emb = emb_layer.ops(right)
# Presentation context
cnn_layer = layers.SequenceConvPoolLayer(
self.filter_size, self.num_filters, "conv")
cnn_layer = layers.SequenceConvPoolLayer(self.filter_size,
self.num_filters, "conv")
left_cnn = cnn_layer.ops(left_emb)
right_cnn = cnn_layer.ops(right_emb)
# matching layer
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
gru class
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
hinge loss
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
log loss
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
softmax loss
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
lstm class
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MMDNN class
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
optimizer calss
"""
......@@ -43,5 +56,8 @@ class AdamOptimizer(object):
Adam optimizer operation
"""
adam = fluid.optimizer.AdamOptimizer(
self.learning_rate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon)
self.learning_rate,
beta1=self.beta1,
beta2=self.beta2,
epsilon=self.epsilon)
adam.minimize(loss)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
network layers
"""
......@@ -23,8 +36,10 @@ class EmbeddingLayer(object):
"""
operation
"""
emb = fluid.layers.embedding(input=input, size=[
self.dict_size, self.emb_dim], is_sparse=True,
emb = fluid.layers.embedding(
input=input,
size=[self.dict_size, self.emb_dim],
is_sparse=True,
param_attr=attr.ParamAttr(name=self.name))
return emb
......@@ -44,8 +59,7 @@ class SequencePoolLayer(object):
"""
operation
"""
pool = fluid.layers.sequence_pool(
input=input, pool_type=self.pool_type)
pool = fluid.layers.sequence_pool(input=input, pool_type=self.pool_type)
return pool
......@@ -66,9 +80,12 @@ class FCLayer(object):
"""
operation
"""
fc = fluid.layers.fc(input=input, size=self.fc_dim, param_attr=attr.ParamAttr(
name="%s.w" % self.name),
bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act, name=self.name)
fc = fluid.layers.fc(input=input,
size=self.fc_dim,
param_attr=attr.ParamAttr(name="%s.w" % self.name),
bias_attr=attr.ParamAttr(name="%s.b" % self.name),
act=self.act,
name=self.name)
return fc
......@@ -88,10 +105,14 @@ class DynamicGRULayer(object):
"""
operation
"""
proj = fluid.layers.fc(input=input, size=self.gru_dim * 3,
proj = fluid.layers.fc(
input=input,
size=self.gru_dim * 3,
param_attr=attr.ParamAttr(name="%s_fc.w" % self.name),
bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name))
gru = fluid.layers.dynamic_gru(input=proj, size=self.gru_dim,
gru = fluid.layers.dynamic_gru(
input=proj,
size=self.gru_dim,
param_attr=attr.ParamAttr(name="%s.w" % self.name),
bias_attr=attr.ParamAttr(name="%s.b" % self.name))
return gru
......@@ -113,10 +134,14 @@ class DynamicLSTMLayer(object):
"""
operation
"""
proj = fluid.layers.fc(input=input, size=self.lstm_dim * 4,
proj = fluid.layers.fc(
input=input,
size=self.lstm_dim * 4,
param_attr=attr.ParamAttr(name="%s_fc.w" % self.name),
bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name))
lstm, _ = fluid.layers.dynamic_lstm(input=proj, size=self.lstm_dim * 4,
lstm, _ = fluid.layers.dynamic_lstm(
input=proj,
size=self.lstm_dim * 4,
param_attr=attr.ParamAttr(name="%s.w" % self.name),
bias_attr=attr.ParamAttr(name="%s.b" % self.name))
return lstm
......@@ -161,9 +186,12 @@ class SequenceConvPoolLayer(object):
"""
operation
"""
conv = fluid.nets.sequence_conv_pool(input=input, filter_size=self.filter_size,
conv = fluid.nets.sequence_conv_pool(
input=input,
filter_size=self.filter_size,
num_filters=self.num_filters,
param_attr=attr.ParamAttr(name=self.name), act="relu")
param_attr=attr.ParamAttr(name=self.name),
act="relu")
return conv
......@@ -259,7 +287,8 @@ class SoftmaxWithCrossEntropyLayer(object):
"""
operation
"""
loss = fluid.layers.softmax_with_cross_entropy(logits=input, label=label)
loss = fluid.layers.softmax_with_cross_entropy(
logits=input, label=label)
return loss
......@@ -354,8 +383,8 @@ class ConstantLayer(object):
"""
operation
"""
constant = fluid.layers.fill_constant_batch_size_like(
input, shape, dtype, value)
constant = fluid.layers.fill_constant_batch_size_like(input, shape,
dtype, value)
return constant
......@@ -396,6 +425,7 @@ class SoftsignLayer(object):
softsign = fluid.layers.softsign(input)
return softsign
# class MatmulLayer(object):
# def __init__(self, transpose_x, transpose_y):
# self.transpose_x = transpose_x
......@@ -405,7 +435,6 @@ class SoftsignLayer(object):
# matmul = fluid.layers.matmul(x, y, self.transpose_x, self.transpose_y)
# return matmul
# class Conv2dLayer(object):
# def __init__(self, num_filters, filter_size, act, name):
# self.num_filters = num_filters
......@@ -417,7 +446,6 @@ class SoftsignLayer(object):
# conv = fluid.layers.conv2d(input, self.num_filters, self.filter_size, param_attr=attr.ParamAttr(name="%s.w" % self.name), bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act)
# return conv
# class Pool2dLayer(object):
# def __init__(self, pool_size, pool_type):
# self.pool_size = pool_size
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The placeholder for batch_size in compile time. Must be -1 currently to be
# consistent with some ops' infer-shape output in compile time, such as the
# sequence_expand op used in beamsearch decoder.
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provides ErnieModel and ErnieConfig
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The function lex_net(args) define the lexical analysis network structure
"""
......@@ -96,8 +109,7 @@ def lex_net(word, target, args, vocab_size, num_labels):
input=emission,
label=target,
param_attr=fluid.ParamAttr(
name='crfw',
learning_rate=crf_lr))
name='crfw', learning_rate=crf_lr))
crf_decode = fluid.layers.crf_decoding(
input=emission, param_attr=fluid.ParamAttr(name='crfw'))
avg_cost = fluid.layers.mean(x=crf_cost)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Transformer encoder."""
from __future__ import absolute_import
......@@ -100,7 +113,7 @@ def multi_head_attention(queries,
"""
Scaled Dot-Product Attention
"""
scaled_q = layers.scale(x=q, scale=d_key ** -0.5)
scaled_q = layers.scale(x=q, scale=d_key**-0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
if attn_bias:
product += attn_bias
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provides reader for classification and sequence labing
"""
......@@ -18,6 +31,7 @@ from preprocess.padding import pad_batch_data
class BaseReader(object):
"""BaseReader for classify and sequence labeling task"""
def __init__(self,
vocab_path,
label_map_config=None,
......@@ -211,6 +225,7 @@ class BaseReader(object):
class ClassifyReader(BaseReader):
"""ClassifyReader"""
def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, "r") as f:
......@@ -239,7 +254,10 @@ class ClassifyReader(BaseReader):
# padding
padded_token_ids, input_mask, seq_lens = pad_batch_data(
batch_token_ids, pad_idx=self.pad_id, return_input_mask=True, return_seq_lens=True)
batch_token_ids,
pad_idx=self.pad_id,
return_input_mask=True,
return_seq_lens=True)
padded_text_type_ids = pad_batch_data(
batch_text_type_ids, pad_idx=self.pad_id)
padded_position_ids = pad_batch_data(
......@@ -255,6 +273,7 @@ class ClassifyReader(BaseReader):
class SequenceLabelReader(BaseReader):
"""SequenceLabelReader"""
def _pad_batch_records(self, batch_records):
batch_token_ids = [record.token_ids for record in batch_records]
batch_text_type_ids = [record.text_type_ids for record in batch_records]
......@@ -314,7 +333,9 @@ class SequenceLabelReader(BaseReader):
position_ids = list(range(len(token_ids)))
text_type_ids = [0] * len(token_ids)
no_entity_id = len(self.label_map) - 1
labels = [label if label in self.label_map else u"O" for label in labels]
labels = [
label if label in self.label_map else u"O" for label in labels
]
label_ids = [no_entity_id] + [
self.label_map[label] for label in labels
] + [no_entity_id]
......@@ -332,6 +353,7 @@ class SequenceLabelReader(BaseReader):
class ExtractEmbeddingReader(BaseReader):
"""ExtractEmbeddingReader"""
def _pad_batch_records(self, batch_records):
batch_token_ids = [record.token_ids for record in batch_records]
batch_text_type_ids = [record.text_type_ids for record in batch_records]
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Mask, padding and batching.
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The file_reader converts raw corpus to input.
"""
......@@ -5,6 +18,7 @@ import os
import __future__
import io
def file_reader(file_dir,
word2id_dict,
label2id_dict,
......@@ -15,6 +29,7 @@ def file_reader(file_dir,
"""
word_dict_len = max(map(int, word2id_dict.values())) + 1
label_dict_len = max(map(int, label2id_dict.values())) + 1
def reader():
"""
the data generator
......@@ -24,7 +39,8 @@ def file_reader(file_dir,
for filename in files:
if not filename.startswith(filename_feature):
continue
for line in io.open(os.path.join(root, filename), 'r', encoding='utf8'):
for line in io.open(
os.path.join(root, filename), 'r', encoding='utf8'):
index += 1
bad_line = False
line = line.strip("\n")
......@@ -54,6 +70,7 @@ def file_reader(file_dir,
if len(word_idx) != len(target_idx):
continue
yield word_idx, target_idx
return reader
......@@ -68,6 +85,7 @@ def test_reader(file_dir,
#print (word2id_dict)
word_dict_len = max(map(int, word2id_dict.values())) + 1
label_dict_len = max(map(int, label2id_dict.values())) + 1
#print word_dict_len
#print label_dict_len
def reader():
......@@ -94,6 +112,7 @@ def test_reader(file_dir,
else:
word_idx.append(int(word2id_dict["OOV"]))
yield word_idx, words
return reader
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module provides wordseg tools
"""
......@@ -11,12 +24,13 @@ import time
import sys
import io
if sys.version_info > (3,):
if sys.version_info > (3, ):
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
else:
reload(sys)
sys.setdefaultencoding("utf8")
def parse_args():
"""
Arguments Parse
......@@ -26,32 +40,27 @@ def parse_args():
'--batch_size',
type=int,
default=5,
help='The size of a batch. (default: %(default)d)'
)
help='The size of a batch. (default: %(default)d)')
parser.add_argument(
'--model_path',
type=str,
default='./conf/model',
help='A path to the model. (default: %(default)s)'
)
help='A path to the model. (default: %(default)s)')
parser.add_argument(
'--test_data_dir',
type=str,
default='./data/test_data',
help='A directory with test data files. (default: %(default)s)'
)
help='A directory with test data files. (default: %(default)s)')
parser.add_argument(
"--word_dict_path",
type=str,
default="./conf/word.dic",
help="The path of the word dictionary. (default: %(default)s)"
)
help="The path of the word dictionary. (default: %(default)s)")
parser.add_argument(
"--label_dict_path",
type=str,
default="./conf/tag.dic",
help="The path of the label dictionary. (default: %(default)s)"
)
help="The path of the label dictionary. (default: %(default)s)")
parser.add_argument(
"--word_rep_dict_path",
type=str,
......@@ -110,11 +119,9 @@ def infer(args):
label2id_dict = reader.load_reverse_dict(args.label_dict_path)
q2b_dict = reader.load_dict(args.word_rep_dict_path)
test_data = paddle.batch(
reader.test_reader(args.test_data_dir,
word2id_dict,
label2id_dict,
reader.test_reader(args.test_data_dir, word2id_dict, label2id_dict,
q2b_dict),
batch_size = args.batch_size)
batch_size=args.batch_size)
place = fluid.CPUPlace()
#place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
......@@ -130,7 +137,7 @@ def infer(args):
#print(word_idx)
word_list = [x[1] for x in data]
(crf_decode, ) = exe.run(inference_program,
feed={"word":word_idx},
feed={"word": word_idx},
fetch_list=fetch_targets,
return_numpy=False)
lod_info = (crf_decode.lod())[0]
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Senta model.
"""
......@@ -11,10 +24,12 @@ import json
import numpy as np
import paddle.fluid as fluid
class SentaConfig(object):
"""
Senta Config
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Senta Reader
"""
......@@ -12,15 +25,13 @@ from utils import data_reader
import paddle
import paddle.fluid as fluid
class SentaProcessor(object):
"""
Processor class for data convertors for senta
"""
def __init__(self,
data_dir,
vocab_path,
random_seed=None):
def __init__(self, data_dir, vocab_path, random_seed=None):
self.data_dir = data_dir
self.vocab = load_vocab(vocab_path)
self.num_examples = {"train": -1, "dev": -1, "infer": -1}
......@@ -30,19 +41,22 @@ class SentaProcessor(object):
"""
Load training examples
"""
return data_reader((self.data_dir + "/train.tsv"), self.vocab, self.num_examples, "train", epoch)
return data_reader((self.data_dir + "/train.tsv"), self.vocab,
self.num_examples, "train", epoch)
def get_dev_examples(self, data_dir, epoch):
"""
Load dev examples
"""
return data_reader((self.data_dir + "/dev.tsv"), self.vocab, self.num_examples, "dev", epoch)
return data_reader((self.data_dir + "/dev.tsv"), self.vocab,
self.num_examples, "dev", epoch)
def get_test_examples(self, data_dir, epoch):
"""
Load test examples
"""
return data_reader((self.data_dir + "/test.tsv"), self.vocab, self.num_examples, "infer", epoch)
return data_reader((self.data_dir + "/test.tsv"), self.vocab,
self.num_examples, "infer", epoch)
def get_labels(self):
"""
......@@ -70,11 +84,14 @@ class SentaProcessor(object):
Generate data for train, dev or infer
"""
if phase == "train":
return paddle.batch(self.get_train_examples(self.data_dir, epoch), batch_size)
return paddle.batch(
self.get_train_examples(self.data_dir, epoch), batch_size)
elif phase == "dev":
return paddle.batch(self.get_dev_examples(self.data_dir, epoch), batch_size)
return paddle.batch(
self.get_dev_examples(self.data_dir, epoch), batch_size)
elif phase == "infer":
return paddle.batch(self.get_test_examples(self.data_dir, epoch), batch_size)
return paddle.batch(
self.get_test_examples(self.data_dir, epoch), batch_size)
else:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'infer'].")
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Sentiment Classification Task
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Sentiment Classification Task
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Arguments for configuration
"""
......@@ -31,6 +44,7 @@ class ArgumentGroup(object):
"""
Argument Class
"""
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
......@@ -95,8 +109,10 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch):
sys.stderr.write("[NOTICE] Error Format Line!")
continue
label = int(cols[1])
wids = [word_dict[x] if x in word_dict else unk_id
for x in cols[0].split(" ")]
wids = [
word_dict[x] if x in word_dict else unk_id
for x in cols[0].split(" ")
]
all_data.append((wids, label))
if phrase == "train":
......@@ -111,8 +127,10 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch):
for epoch_index in range(epoch):
for doc, label in all_data:
yield doc, label
return reader
def load_vocab(file_path):
"""
load the given vocabulary
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SimNet config
"""
......@@ -21,12 +34,14 @@ class SimNetConfig(object):
with open(config_path) as json_file:
config_dict = json.load(json_file)
except Exception:
raise IOError("Error in parsing simnet model config file '%s'" % config_path)
raise IOError("Error in parsing simnet model config file '%s'" %
config_path)
else:
if config_dict["task_mode"] != self.task_mode:
raise ValueError(
"the config '{}' does not match the task_mode '{}'".format(self.config_path, self.task_mode))
"the config '{}' does not match the task_mode '{}'".format(
self.config_path, self.task_mode))
return config_dict
def __getitem__(self, key):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
comput unicom
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
split unicom file
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SimNet reader
"""
......@@ -25,15 +38,24 @@ class SimNetProcessor(object):
Reader with Pairwise
"""
if mode == "valid":
with codecs.open(self.args.valid_data_dir, "r", "utf-8") as file:
with codecs.open(self.args.valid_data_dir, "r",
"utf-8") as file:
for line in file:
query, title, label = line.strip().split("\t")
if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int(
if len(query) == 0 or len(title) == 0 or len(
label) == 0 or not label.isdigit() or int(
label) not in [0, 1]:
logging.warning("line not match format in test file")
logging.warning(
"line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
title = [self.vocab[word] for word in title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
title = [
self.vocab[word] for word in title.split(" ")
if word in self.vocab
]
if len(query) == 0:
query = [0]
if len(title) == 0:
......@@ -43,27 +65,47 @@ class SimNetProcessor(object):
with codecs.open(self.args.test_data_dir, "r", "utf-8") as file:
for line in file:
query, title, label = line.strip().split("\t")
if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int(
if len(query) == 0 or len(title) == 0 or len(
label) == 0 or not label.isdigit() or int(
label) not in [0, 1]:
logging.warning("line not match format in test file")
logging.warning(
"line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
title = [self.vocab[word] for word in title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
title = [
self.vocab[word] for word in title.split(" ")
if word in self.vocab
]
if len(query) == 0:
query = [0]
if len(title) == 0:
title = [0]
yield [query, title]
else:
with codecs.open(self.args.train_data_dir, "r", "utf-8") as file:
with codecs.open(self.args.train_data_dir, "r",
"utf-8") as file:
for line in file:
query, pos_title, neg_title = line.strip().split("\t")
if len(query) == 0 or len(pos_title) == 0 or len(neg_title) == 0:
logging.warning("line not match format in test file")
if len(query) == 0 or len(pos_title) == 0 or len(
neg_title) == 0:
logging.warning(
"line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
pos_title = [self.vocab[word] for word in pos_title.split(" ") if word in self.vocab]
neg_title = [self.vocab[word] for word in neg_title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
pos_title = [
self.vocab[word] for word in pos_title.split(" ")
if word in self.vocab
]
neg_title = [
self.vocab[word] for word in neg_title.split(" ")
if word in self.vocab
]
if len(query) == 0:
query = [0]
if len(pos_title) == 0:
......@@ -77,15 +119,24 @@ class SimNetProcessor(object):
Reader with Pointwise
"""
if mode == "valid":
with codecs.open(self.args.valid_data_dir, "r", "utf-8") as file:
with codecs.open(self.args.valid_data_dir, "r",
"utf-8") as file:
for line in file:
query, title, label = line.strip().split("\t")
if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int(
if len(query) == 0 or len(title) == 0 or len(
label) == 0 or not label.isdigit() or int(
label) not in [0, 1]:
logging.warning("line not match format in test file")
logging.warning(
"line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
title = [self.vocab[word] for word in title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
title = [
self.vocab[word] for word in title.split(" ")
if word in self.vocab
]
if len(query) == 0:
query = [0]
if len(title) == 0:
......@@ -95,27 +146,44 @@ class SimNetProcessor(object):
with codecs.open(self.args.test_data_dir, "r", "utf-8") as file:
for line in file:
query, title, label = line.strip().split("\t")
if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int(
if len(query) == 0 or len(title) == 0 or len(
label) == 0 or not label.isdigit() or int(
label) not in [0, 1]:
logging.warning("line not match format in test file")
logging.warning(
"line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
title = [self.vocab[word] for word in title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
title = [
self.vocab[word] for word in title.split(" ")
if word in self.vocab
]
if len(query) == 0:
query = [0]
if len(title) == 0:
title = [0]
yield [query, title]
else:
with codecs.open(self.args.train_data_dir, "r", "utf-8") as file:
with codecs.open(self.args.train_data_dir, "r",
"utf-8") as file:
for line in file:
query, title, label = line.strip().split("\t")
if len(query) == 0 or len(title) == 0 or len(label) == 0 or not label.isdigit() or int(
if len(query) == 0 or len(title) == 0 or len(
label) == 0 or not label.isdigit() or int(
label) not in [0, 1]:
logging.warning("line not match format in test file")
logging.warning(
"line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
title = [self.vocab[word] for word in title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
title = [
self.vocab[word] for word in title.split(" ")
if word in self.vocab
]
label = int(label)
if len(query) == 0:
query = [0]
......@@ -138,8 +206,14 @@ class SimNetProcessor(object):
if len(query) == 0 or len(title) == 0:
logging.warning("line not match format in test file")
continue
query = [self.vocab[word] for word in query.split(" ") if word in self.vocab]
title = [self.vocab[word] for word in title.split(" ") if word in self.vocab]
query = [
self.vocab[word] for word in query.split(" ")
if word in self.vocab
]
title = [
self.vocab[word] for word in title.split(" ")
if word in self.vocab
]
if len(query) == 0:
query = [0]
if len(title) == 0:
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SimNet Task
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# --coding=utf-8
"""
SimNet utilities.
......@@ -17,6 +30,7 @@ import paddle.fluid as fluid
******functions for file processing******
"""
def load_vocab(file_path):
"""
load the given vocabulary
......@@ -47,7 +61,8 @@ def get_result_file(args):
"""
with codecs.open(args.test_data_dir, "r", "utf-8") as test_file:
with codecs.open("predictions.txt", "r", "utf-8") as predictions_file:
with codecs.open(args.test_result_path, "w", "utf-8") as test_result_file:
with codecs.open(args.test_result_path, "w",
"utf-8") as test_result_file:
test_datas = [line.strip("\n") for line in test_file]
predictions = [line.strip("\n") for line in predictions_file]
for test_data, prediction in zip(test_datas, predictions):
......@@ -299,4 +314,3 @@ def init_checkpoint(exe, init_checkpoint_path, main_program):
main_program=main_program,
predicate=existed_persitables)
print("Load model from {}".format(init_checkpoint_path))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import argparse
import time
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import math
import time
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import numpy as np
import paddle.fluid as fluid
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import six
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import six
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import numpy as np
......@@ -190,7 +203,8 @@ def make_one_batch_input(data_batches, index):
turns = np.array(data_batches["turns"][index]).astype('int64')
tt_turns_len = np.array(data_batches["tt_turns_len"][index]).astype('int64')
every_turn_len = np.array(data_batches["every_turn_len"][index]).astype('int64')
every_turn_len = np.array(data_batches["every_turn_len"][index]).astype(
'int64')
response = np.array(data_batches["response"][index]).astype('int64')
response_len = np.array(data_batches["response_len"][index]).astype('int64')
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
import os
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
import math
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/python
#-*- coding:utf-8 -*-
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#coding=utf8
import os, sys, json
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#coding=utf8
import sys
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import json
import pandas as pd
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class TrainTaskConfig(object):
# support both CPU and GPU now.
use_gpu = True
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import multiprocessing
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.fluid as fluid
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import contextlib
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import six
import os
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import ast
import copy
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Conll03 dataset.
"""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.fluid as fluid
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import contextlib
import paddle
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import contextlib
import paddle
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
import unittest
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
import numpy as np
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import sys
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册