提交 d5fbe650 编写于 作者: 0 0YuanZhang0 提交者: pkpk

update Dgu and ade module (#2971)

* dgu_and_ade

* fix_comment
上级 dc9116d0
# this file is only used for continuous evaluation test!
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""this file is only used for continuous evaluation test!"""
import os
import sys
......
"""
Evaluation for auto dialogue evaluation
"""
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for auto dialogue evaluation"""
import sys
import numpy as np
......
#!/bin/bash
#check data directory
cd ..
echo "Start download data and models.............."
if [ ! -d "data" ]; then
echo "Directory data does not exist, make new data directory"
mkdir data
fi
cd data
#check configure file
if [ ! -d "config" ]; then
echo "config directory not exist........"
exit 255
else
if [ ! -f "config/ade.yaml" ]; then
echo "config file dgu.yaml has been lost........"
exit 255
fi
fi
#check and download input data
if [ ! -d "input" ]; then
echo "Directory input does not exist, make new input directory"
mkdir input
fi
cd input
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz
tar -zxvf auto_dialogue_evaluation_dataset-1.0.0.tar.gz
rm auto_dialogue_evaluation_dataset-1.0.0.tar.gz
cd ..
#check and download pretrain model
if [ ! -d "pretrain_model" ]; then
echo "Directory pretrain_model does not exist, make new pretrain_model directory"
mkdir pretrain_model
fi
#check and download inferenece model
if [ ! -d "inference_models" ]; then
echo "Directory inferenece_model does not exist, make new inferenece_model directory"
mkdir inference_models
fi
#check output
if [ ! -d "output" ]; then
echo "Directory output does not exist, make new output directory"
mkdir output
fi
#check saved model
if [ ! -d "saved_models" ]; then
echo "Directory saved_models does not exist, make new saved_models directory"
mkdir saved_models
fi
cd saved_models
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz
tar -xvf auto_dialogue_evaluation_models.2.0.0.tar.gz
rm auto_dialogue_evaluation_models.2.0.0.tar.gz
echo "Finish.............."
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Reader for auto dialogue evaluation"""
import sys
import time
import random
import numpy as np
import paddle
import paddle.fluid as fluid
class DataProcessor(object):
def __init__(self, data_path, max_seq_length, batch_size):
"""init"""
self.data_file = data_path
self.max_seq_len = max_seq_length
self.batch_size = batch_size
self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
def get_examples(self):
"""load examples"""
examples = []
with open(self.data_file, 'r') as fr:
for line in fr:
examples.append(line.strip())
return examples
def get_num_examples(self, phase):
"""Get number of examples for train, dev or test."""
if phase not in ['train', 'dev', 'test']:
raise ValueError(
"Unknown phase, which should be in ['train', 'dev', 'test'].")
count = len(open(self.data_file,'rU').readlines())
self.num_examples[phase] = count
return self.num_examples[phase]
def data_generator(self,
place,
phase="train",
shuffle=True,
sample_pro=1):
"""
Generate data for train, dev or test.
Args:
phase: string. The phase for which to generate data.
shuffle: bool. Whether to shuffle examples.
sample_pro: sample data ratio
"""
examples = self.get_examples()
if shuffle:
np.random.shuffle(examples)
def batch_reader():
"""read batch data"""
batch = []
for example in examples:
if sample_pro < 1:
if random.random() > sample_pro:
continue
tokens = example.strip().split('\t')
assert len(tokens) == 3
context = [int(x) for x in tokens[0].split()[: self.max_seq_len]]
response = [int(x) for x in tokens[1].split()[: self.max_seq_len]]
label = [int(tokens[2])]
instance = (context, response, label)
if len(batch) < self.batch_size:
batch.append(instance)
else:
if len(batch) == self.batch_size:
yield batch
batch = [instance]
if len(batch) > 0:
yield batch
def create_lodtensor(data_ids, place):
"""create LodTensor for input ids"""
cur_len = 0
lod = [cur_len]
seq_lens = [len(ids) for ids in data_ids]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data_ids, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def wrapper():
"""yield batch data to network"""
for batch_data in batch_reader():
context_ids = [batch[0] for batch in batch_data]
response_ids = [batch[1] for batch in batch_data]
label_ids = [batch[2] for batch in batch_data]
context_res = create_lodtensor(context_ids, place)
response_res = create_lodtensor(response_ids, place)
label_ids = np.array(label_ids).astype("int64").reshape([-1, 1])
input_batch = [context_res, response_res, label_ids]
yield input_batch
return wrapper
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5,
"Learning rate used to train with warmup.")
train_g.add_arg(
"lr_scheduler",
str,
"linear_warmup_decay",
"scheduler of learning rate.",
choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01,
"Weight decay rate for L2 regularizer.")
train_g.add_arg(
"warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for."
)
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("use_fp16", bool, False,
"Whether to use fp16 mixed precision training.")
train_g.add_arg(
"loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
)
train_g.add_arg("pred_dir", str, None,
"Path to save the prediction results")
log_g = ArgumentGroup(parser, "logging", "logging related.")
log_g.add_arg("skip_steps", int, 10,
"The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True,
"If set, use GPU for training.")
run_type_g.add_arg(
"use_fast_executor", bool, False,
"If set, use fast parallel executor (in experiment).")
run_type_g.add_arg(
"num_iteration_per_drop_scope", int, 1,
"Ihe iteration intervals to clean up temporary variables.")
run_type_g.add_arg("do_train", bool, True,
"Whether to perform training.")
run_type_g.add_arg("do_predict", bool, True,
"Whether to perform prediction.")
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class PDConfig(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.default_g.add_arg("epoch", int, 2,
"Number of epoches for training.")
self.default_g.add_arg("learning_rate", float, 1e-2,
"Learning rate used to train.")
self.default_g.add_arg("do_train", bool, False,
"Whether to perform training.")
self.default_g.add_arg("do_predict", bool, False,
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
pd_config += ("my_age", int, 18, "I am forever 18.")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
print(pd_config.my_age)
"""
Init for pretrained para
"""
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -16,6 +12,8 @@ Init for pretrained para
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from __future__ import division
from __future__ import print_function
import os
......@@ -27,25 +25,9 @@ import numpy as np
import paddle.fluid as fluid
def init_pretraining_params(exe, pretraining_params_path, main_program):
"""
Init pretraining params
"""
assert os.path.exists(pretraining_params_path
), "[%s] cann't be found." % pretraining_params_path
def existed_params(var):
"""
Test existed
"""
if not isinstance(var, fluid.framework.Parameter):
return False
return os.path.exists(os.path.join(pretraining_params_path, var.name))
fluid.io.load_vars(
exe,
pretraining_params_path,
main_program=main_program,
predicate=existed_params)
print("Load pretraining parameters from {}.".format(
pretraining_params_path))
class InputField(object):
def __init__(self, input_field):
"""init inpit field"""
self.context_wordseq = input_field[0]
self.response_wordseq = input_field[1]
self.labels = input_field[2]
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
import paddle.fluid as fluid
def check_cuda(use_cuda, err = \
"\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
):
try:
if use_cuda == True and fluid.is_compiled_with_cuda() == False:
print(err)
sys.exit(1)
except Exception as e:
pass
if __name__ == "__main__":
check_cuda(True)
check_cuda(False)
check_cuda(True, "This is only for testing.")
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""save or load model api"""
import os
import sys
import paddle
import paddle.fluid as fluid
def init_from_pretrain_model(args, exe, program):
assert isinstance(args.init_from_pretrain_model, str)
if not os.path.exists(args.init_from_pretrain_model):
raise Warning("The pretrained params do not exist.")
return False
def existed_params(var):
if not isinstance(var, fluid.framework.Parameter):
return False
return os.path.exists(
os.path.join(args.init_from_pretrain_model, var.name))
fluid.io.load_vars(
exe,
args.init_from_pretrain_model,
main_program=program,
predicate=existed_params)
print("finish initing model from pretrained params from %s" %
(args.init_from_pretrain_model))
return True
def init_from_checkpoint(args, exe, program):
assert isinstance(args.init_from_checkpoint, str)
if not os.path.exists(args.init_from_checkpoint):
raise Warning("the checkpoint path does not exist.")
return False
fluid.io.load_persistables(
executor=exe,
dirname=args.init_from_checkpoint,
main_program=program,
filename="checkpoint.pdckpt")
print("finish initing model from checkpoint from %s" %
(args.init_from_checkpoint))
return True
def init_from_params(args, exe, program):
assert isinstance(args.init_from_params, str)
if not os.path.exists(args.init_from_params):
raise Warning("the params path does not exist.")
return False
fluid.io.load_params(
executor=exe,
dirname=args.init_from_params,
main_program=program,
filename="params.pdparams")
print("finish init model from params from %s" % (args.init_from_params))
return True
def save_checkpoint(args, exe, program, dirname):
assert isinstance(args.save_model_path, str)
checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
if not os.path.exists(checkpoint_dir):
os.mkdir(checkpoint_dir)
fluid.io.save_persistables(
exe,
os.path.join(checkpoint_dir, dirname),
main_program=program,
filename="checkpoint.pdckpt")
print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
return True
def save_param(args, exe, program, dirname):
assert isinstance(args.save_model_path, str)
param_dir = os.path.join(args.save_model_path, args.save_param)
if not os.path.exists(param_dir):
os.mkdir(param_dir)
fluid.io.save_params(
exe,
os.path.join(param_dir, dirname),
main_program=program,
filename="params.pdparams")
print("save parameters at %s" % (os.path.join(param_dir, dirname)))
return True
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Network for auto dialogue evaluation"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
def create_net(
is_training,
model_input,
args,
clip_value=10.0,
word_emb_name="shared_word_emb",
lstm_W_name="shared_lstm_W",
lstm_bias_name="shared_lstm_bias"):
context_wordseq = model_input.context_wordseq
response_wordseq = model_input.response_wordseq
label = model_input.labels
#emb
context_emb = fluid.layers.embedding(
input=context_wordseq,
size=[args.vocab_size, args.emb_size],
is_sparse=True,
param_attr=fluid.ParamAttr(
name=word_emb_name,
initializer=fluid.initializer.Normal(scale=0.1)))
response_emb = fluid.layers.embedding(
input=response_wordseq,
size=[args.vocab_size, args.emb_size],
is_sparse=True,
param_attr=fluid.ParamAttr(
name=word_emb_name,
initializer=fluid.initializer.Normal(scale=0.1)))
#fc to fit dynamic LSTM
context_fc = fluid.layers.fc(
input=context_emb,
size=args.hidden_size * 4,
param_attr=fluid.ParamAttr(name='fc_weight'),
bias_attr=fluid.ParamAttr(name='fc_bias'))
response_fc = fluid.layers.fc(
input=response_emb,
size=args.hidden_size * 4,
param_attr=fluid.ParamAttr(name='fc_weight'),
bias_attr=fluid.ParamAttr(name='fc_bias'))
#LSTM
context_rep, _ = fluid.layers.dynamic_lstm(
input=context_fc,
size=args.hidden_size * 4,
param_attr=fluid.ParamAttr(name=lstm_W_name),
bias_attr=fluid.ParamAttr(name=lstm_bias_name))
context_rep = fluid.layers.sequence_last_step(context_rep)
response_rep, _ = fluid.layers.dynamic_lstm(
input=response_fc,
size=args.hidden_size * 4,
param_attr=fluid.ParamAttr(name=lstm_W_name),
bias_attr=fluid.ParamAttr(name=lstm_bias_name))
response_rep = fluid.layers.sequence_last_step(input=response_rep)
logits = fluid.layers.bilinear_tensor_product(
context_rep, response_rep, size=1)
if args.loss_type == 'CLS':
label = fluid.layers.cast(x=label, dtype='float32')
loss = fluid.layers.sigmoid_cross_entropy_with_logits(logits, label)
loss = fluid.layers.reduce_mean(
fluid.layers.clip(
loss, min=-clip_value, max=clip_value))
elif args.loss_type == 'L2':
norm_score = 2 * fluid.layers.sigmoid(logits)
label = fluid.layers.cast(x=label, dtype='float32')
loss = fluid.layers.square_error_cost(norm_score, label) / 4
loss = fluid.layers.reduce_mean(loss)
else:
raise ValueError
if is_training:
return loss
else:
return logits
def set_word_embedding(word_emb, place, word_emb_name="shared_word_emb"):
"""
Set word embedding
"""
word_emb_param = fluid.global_scope().find_var(
word_emb_name).get_tensor()
word_emb_param.set(word_emb, place)
"""
Auto Dialogue Evaluation.
"""
import argparse
import six
def parse_args():
"""
Auto Dialogue Evaluation Config
"""
parser = argparse.ArgumentParser('Automatic Dialogue Evaluation.')
parser.add_argument(
'--do_train', type=bool, default=False, help='Whether to perform training.')
parser.add_argument(
'--do_val', type=bool, default=False, help='Whether to perform evaluation.')
parser.add_argument(
'--do_infer', type=bool, default=False, help='Whether to perform inference.')
parser.add_argument(
'--loss_type', type=str, default='CLS', help='Loss type, CLS or L2.')
#data path
parser.add_argument(
'--train_path', type=str, default=None, help='Path of training data')
parser.add_argument(
'--val_path', type=str, default=None, help='Path of validation data')
parser.add_argument(
'--test_path', type=str, default=None, help='Path of test data')
parser.add_argument(
'--save_path', type=str, default='tmp', help='Save path')
#step fit for data size
parser.add_argument(
'--print_step', type=int, default=50, help='Print step')
parser.add_argument(
'--save_step', type=int, default=400, help='Save step')
parser.add_argument(
'--num_scan_data', type=int, default=20, help='Save step')
parser.add_argument(
'--word_emb_init', type=str, default=None, help='Path to the initial word embedding')
parser.add_argument(
'--init_model', type=str, default=None, help='Path to the init model')
parser.add_argument(
'--use_cuda',
action='store_true',
help='If set, use cuda for training.')
parser.add_argument(
'--batch_size', type=int, default=256, help='Batch size')
parser.add_argument(
'--hidden_size', type=int, default=256, help='Hidden size')
parser.add_argument(
'--emb_size', type=int, default=256, help='Embedding size')
parser.add_argument(
'--vocab_size', type=int, default=484016, help='Vocabulary size')
parser.add_argument(
'--learning_rate', type=float, default=0.001, help='Learning rate')
parser.add_argument(
'--sample_pro', type=float, default=1, help='Sample probability for training data')
parser.add_argument(
'--max_len', type=int, default=50, help='Max length for sentences')
args = parser.parse_args()
return args
def print_arguments(args):
"""
Print Config
"""
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
loss_type: "CLS"
training_file: ""
val_file: ""
predict_file: ""
print_steps: 10
save_steps: 10
num_scan_data: ""
word_emb_init: ""
init_model: ""
use_cuda: ""
batch_size: 256
hidden_size: 256
emb_size: 256
vocab_size: 484016
sample_pro: 1.0
output_prediction_file: ""
init_from_checkpoint: ""
init_from_params: ""
init_from_pretrain_model: ""
inference_model_dir: ""
save_model_path: ""
save_checkpoint: ""
save_param: ""
evaluation_file: ""
vocab_path: ""
max_seq_len: 128
random_seed: 110
do_save_inference_model: False
enable_ce: "store_true"
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz
tar -xzf auto_dialogue_evaluation_dataset-1.0.0.tar.gz
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""evaluation metrics"""
import os
import sys
import numpy as np
import ade.evaluate as evaluate
from ade.utils.configure import PDConfig
def do_eval(args):
"""evaluate metrics"""
labels = []
with open(args.evaluation_file, 'r') as fr:
for line in fr:
tokens = line.strip().split('\t')
assert len(tokens) == 3
label = int(tokens[2])
labels.append(label)
scores = []
with open(args.output_prediction_file, 'r') as fr:
for line in fr:
tokens = line.strip().split('\t')
assert len(tokens) == 2
score = tokens[1].strip("[]").split()
score = np.array(score)
score = score.astype(np.float64)
scores.append(score)
if args.loss_type == 'CLS':
recall_dict = evaluate.evaluate_Recall(list(zip(scores, labels)))
mean_score = sum(scores) / len(scores)
print('mean score: %.6f' % mean_score)
print('evaluation recall result:')
print('1_in_2: %.6f\t1_in_10: %.6f\t2_in_10: %.6f\t5_in_10: %.6f' %
(recall_dict['1_in_2'], recall_dict['1_in_10'],
recall_dict['2_in_10'], recall_dict['5_in_10']))
elif args.loss_type == 'L2':
scores = [x[0] for x in scores]
mean_score = sum(scores) / len(scores)
cor = evaluate.evaluate_cor(scores, labels)
print('mean score: %.6f\nevaluation cor results:%.6f' %
(mean_score, cor))
else:
raise ValueError
if __name__ == "__main__":
args = PDConfig(yaml_file="./data/config/ade.yaml")
args.build()
do_eval(args)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""save inference model for auto dialogue evaluation"""
import os
import sys
import six
import numpy as np
import time
import multiprocessing
import paddle
import paddle.fluid as fluid
import ade.reader as reader
from ade_net import create_net
from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
def do_save_inference_model(args):
test_prog = fluid.default_main_program()
startup_prog = fluid.default_startup_program()
with fluid.program_guard(test_prog, startup_prog):
test_prog.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
with fluid.unique_name.guard():
context_wordseq = fluid.layers.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(feed_list=input_inst,
capacity=4, iterable=False)
logits = create_net(
is_training=False,
model_input=input_field,
args=args
)
if args.use_cuda:
place = fluid.CUDAPlace(0)
else:
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_params) or (args.init_from_pretrain_model)
if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog)
elif args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
# saving inference model
fluid.io.save_inference_model(
args.inference_model_dir,
feeded_var_names=[
input_field.context_wordseq.name,
input_field.response_wordseq.name,
],
target_vars=[
logits,
],
executor=exe,
main_program=test_prog,
model_filename="model.pdmodel",
params_filename="params.pdparams")
print("save inference model at %s" % (args.inference_model_dir))
if __name__ == "__main__":
args = PDConfig(yaml_file="./data/config/ade.yaml")
args.build()
check_cuda(args.use_cuda)
do_save_inference_model(args)
"""
Auto dialogue evaluation task
"""
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import six
import numpy as np
import time
import multiprocessing
import paddle
import paddle.fluid as fluid
import reader as reader
import evaluation as eva
import init as init
try:
import cPickle as pickle #python 2
except ImportError as e:
import pickle #python 3
sys.path.append('../../models/dialogue_model_toolkit/auto_dialogue_evaluation/')
sys.path.append('../../models/')
from net import Network
import config
from model_check import check_cuda
def train(args):
"""Train
"""
if not os.path.exists(args.save_path):
os.makedirs(args.save_path)
net = Network(args.vocab_size, args.emb_size, args.hidden_size)
train_program = fluid.Program()
train_startup = fluid.Program()
if "CE_MODE_X" in os.environ:
train_program.random_seed = 110
train_startup.random_seed = 110
with fluid.program_guard(train_program, train_startup):
with fluid.unique_name.guard():
logits, loss = net.network(args.loss_type)
loss.persistable = True
logits.persistable = True
# gradient clipping
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0))
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer.minimize(loss)
print("begin memory optimization ...")
fluid.memory_optimize(train_program)
print("end memory optimization ...")
test_program = fluid.Program()
test_startup = fluid.Program()
if "CE_MODE_X" in os.environ:
test_program.random_seed = 110
test_startup.random_seed = 110
with fluid.program_guard(test_program, test_startup):
with fluid.unique_name.guard():
logits, loss = net.network(args.loss_type)
loss.persistable = True
logits.persistable = True
test_program = test_program.clone(for_test=True)
if args.use_cuda:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
print("device count %d" % dev_count)
print("theoretical memory usage: ")
print(
fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size))
exe = fluid.Executor(place)
exe.run(train_startup)
exe.run(test_startup)
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program)
test_exe = fluid.ParallelExecutor(
use_cuda=args.use_cuda,
main_program=test_program,
share_vars_from=train_exe)
if args.word_emb_init is not None:
print("start loading word embedding init ...")
if six.PY2:
word_emb = np.array(pickle.load(open(args.word_emb_init,
'rb'))).astype('float32')
else:
word_emb = np.array(
pickle.load(
open(args.word_emb_init, 'rb'), encoding="bytes")).astype(
'float32')
net.set_word_embedding(word_emb, place)
print("finish init word embedding ...")
print("start loading data ...")
def train_with_feed(batch_data):
"""
Train on one batch
"""
#to do get_feed_names
feed_dict = dict(zip(net.get_feed_names(), batch_data))
cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name])
return cost[0]
def test_with_feed(batch_data):
"""
Test on one batch
"""
feed_dict = dict(zip(net.get_feed_names(), batch_data))
score = test_exe.run(feed=feed_dict, fetch_list=[logits.name])
return score[0]
def evaluate():
"""
Evaluate to choose model
"""
val_batches = reader.batch_reader(args.val_path, args.batch_size, place,
args.max_len, 1)
scores = []
labels = []
for batch in val_batches:
scores.extend(test_with_feed(batch))
labels.extend([x[0] for x in batch[2]])
return eva.evaluate_Recall(list(zip(scores, labels)))
def save_exe(step, best_recall):
"""
Save exe conditional
"""
recall_dict = evaluate()
print('evaluation recall result:')
print('1_in_2: %s\t1_in_10: %s\t2_in_10: %s\t5_in_10: %s' %
(recall_dict['1_in_2'], recall_dict['1_in_10'],
recall_dict['2_in_10'], recall_dict['5_in_10']))
if recall_dict['1_in_10'] > best_recall and step != 0:
fluid.io.save_inference_model(
args.save_path,
net.get_feed_inference_names(),
logits,
exe,
main_program=train_program)
print("Save model at step %d ... " % step)
print(
time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
best_recall = recall_dict['1_in_10']
return best_recall
# train over different epoches
global_step, train_time = 0, 0.0
best_recall = 0
for epoch in six.moves.xrange(args.num_scan_data):
train_batches = reader.batch_reader(args.train_path, args.batch_size,
place, args.max_len,
args.sample_pro)
begin_time = time.time()
sum_cost = 0
ce_cost = 0
for batch in train_batches:
if (args.save_path is not None) and (
global_step % args.save_step == 0):
best_recall = save_exe(global_step, best_recall)
cost = train_with_feed(batch)
global_step += 1
sum_cost += cost.mean()
ce_cost = cost.mean()
if global_step % args.print_step == 0:
print('training step %s avg loss %s' %
(global_step, sum_cost / args.print_step))
sum_cost = 0
pass_time_cost = time.time() - begin_time
train_time += pass_time_cost
print("Pass {0}, pass_time_cost {1}"
.format(epoch, "%2.2f sec" % pass_time_cost))
if "CE_MODE_X" in os.environ and epoch == args.num_scan_data - 1:
card_num = get_cards()
print("kpis\ttrain_duration_card%s\t%s" %
(card_num, pass_time_cost))
print("kpis\ttrain_loss_card%s\t%s" % (card_num, ce_cost))
def finetune(args):
"""
Finetune
"""
if not os.path.exists(args.save_path):
os.makedirs(args.save_path)
net = Network(args.vocab_size, args.emb_size, args.hidden_size)
train_program = fluid.Program()
train_startup = fluid.Program()
if "CE_MODE_X" in os.environ:
train_program.random_seed = 110
train_startup.random_seed = 110
with fluid.program_guard(train_program, train_startup):
with fluid.unique_name.guard():
logits, loss = net.network(args.loss_type)
loss.persistable = True
logits.persistable = True
# gradient clipping
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0))
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=args.learning_rate,
decay_steps=400,
decay_rate=0.9,
staircase=True))
optimizer.minimize(loss)
print("begin memory optimization ...")
fluid.memory_optimize(train_program)
print("end memory optimization ...")
test_program = fluid.Program()
test_startup = fluid.Program()
if "CE_MODE_X" in os.environ:
test_program.random_seed = 110
test_startup.random_seed = 110
with fluid.program_guard(test_program, test_startup):
with fluid.unique_name.guard():
logits, loss = net.network(args.loss_type)
loss.persistable = True
logits.persistable = True
test_program = test_program.clone(for_test=True)
if args.use_cuda:
place = fluid.CUDAPlace(0)
dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
print("device count %d" % dev_count)
print("theoretical memory usage: ")
print(
fluid.contrib.memory_usage(
program=train_program, batch_size=args.batch_size))
exe = fluid.Executor(place)
exe.run(train_startup)
exe.run(test_startup)
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program)
test_exe = fluid.ParallelExecutor(
use_cuda=args.use_cuda,
main_program=test_program,
share_vars_from=train_exe)
if args.init_model:
init.init_pretraining_params(
exe, args.init_model, main_program=train_startup)
print('sccuess init %s' % args.init_model)
print("start loading data ...")
def train_with_feed(batch_data):
"""
Train on one batch
"""
#to do get_feed_names
feed_dict = dict(zip(net.get_feed_names(), batch_data))
cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name])
return cost[0]
def test_with_feed(batch_data):
"""
Test on one batch
"""
feed_dict = dict(zip(net.get_feed_names(), batch_data))
score = test_exe.run(feed=feed_dict, fetch_list=[logits.name])
return score[0]
def evaluate():
"""
Evaluate to choose model
"""
val_batches = reader.batch_reader(args.val_path, args.batch_size, place,
args.max_len, 1)
scores = []
labels = []
for batch in val_batches:
scores.extend(test_with_feed(batch))
labels.extend([x[0] for x in batch[2]])
scores = [x[0] for x in scores]
return eva.evaluate_cor(scores, labels)
def save_exe(step, best_cor):
"""
Save exe conditional
"""
cor = evaluate()
print('evaluation cor relevance %s' % cor)
if cor > best_cor and step != 0:
fluid.io.save_inference_model(
args.save_path,
net.get_feed_inference_names(),
logits,
exe,
main_program=train_program)
print("Save model at step %d ... " % step)
print(
time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
best_cor = cor
return best_cor
# train over different epoches
global_step, train_time = 0, 0.0
best_cor = 0.0
pre_index = -1
for epoch in six.moves.xrange(args.num_scan_data):
train_batches = reader.batch_reader(args.train_path, args.batch_size,
place, args.max_len,
args.sample_pro)
begin_time = time.time()
sum_cost = 0
for batch in train_batches:
if (args.save_path is not None) and (
global_step % args.save_step == 0):
best_cor = save_exe(global_step, best_cor)
cost = train_with_feed(batch)
global_step += 1
sum_cost += cost.mean()
if global_step % args.print_step == 0:
print('training step %s avg loss %s' %
(global_step, sum_cost / args.print_step))
sum_cost = 0
pass_time_cost = time.time() - begin_time
train_time += pass_time_cost
print("Pass {0}, pass_time_cost {1}"
.format(epoch, "%2.2f sec" % pass_time_cost))
def evaluate(args):
"""
Evaluate model for both pretrained and finetuned
"""
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
t0 = time.time()
with fluid.scope_guard(fluid.Scope()):
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(
args.init_model, exe)
print('init model %s' % args.init_model)
global_step, infer_time = 0, 0.0
test_batches = reader.batch_reader(args.test_path, args.batch_size,
place, args.max_len, 1)
scores = []
labels = []
for batch in test_batches:
logits = exe.run(infer_program,
feed={
'context_wordseq': batch[0],
'response_wordseq': batch[1]
},
fetch_list=fetch_vars)
logits = [x[0] for x in logits[0]]
scores.extend(logits)
labels.extend([x[0] for x in batch[2]])
print('len scores: %s len labels: %s' % (len(scores), len(labels)))
mean_score = sum(scores) / len(scores)
if args.loss_type == 'CLS':
recall_dict = eva.evaluate_Recall(list(zip(scores, labels)))
print('mean score: %s' % mean_score)
print('evaluation recall result:')
print('1_in_2: %s\t1_in_10: %s\t2_in_10: %s\t5_in_10: %s' %
(recall_dict['1_in_2'], recall_dict['1_in_10'],
recall_dict['2_in_10'], recall_dict['5_in_10']))
elif args.loss_type == 'L2':
cor = eva.evaluate_cor(scores, labels)
print('mean score: %s\nevaluation cor resuls:%s' %
(mean_score, cor))
else:
raise ValueError
t1 = time.time()
print("finish evaluate model:%s on data:%s time_cost(s):%.2f" %
(args.init_model, args.test_path, t1 - t0))
def infer(args):
"""
Inference function
"""
place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
t0 = time.time()
with fluid.scope_guard(fluid.Scope()):
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(
args.init_model, exe)
global_step, infer_time = 0, 0.0
test_batches = reader.batch_reader(args.test_path, args.batch_size,
place, args.max_len, 1)
scores = []
for batch in test_batches:
logits = exe.run(infer_program,
feed={
'context_wordseq': batch[0],
'response_wordseq': batch[1]
},
fetch_list=fetch_vars)
logits = [x[0] for x in logits[0]]
scores.extend(logits)
in_file = open(args.test_path, 'r')
out_path = args.test_path + '.infer'
out_file = open(out_path, 'w')
for line, s in zip(in_file, scores):
out_file.write('%s\t%s\n' % (line.strip(), s))
in_file.close()
out_file.close()
from eval import do_eval
from train import do_train
from predict import do_predict
from inference_model import do_save_inference_model
t1 = time.time()
print("finish infer model:%s out file: %s time_cost(s):%.2f" %
(args.init_model, out_path, t1 - t0))
from ade.utils.configure import PDConfig
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__":
args = PDConfig(yaml_file="./data/config/ade.yaml")
args.build()
args.Print()
def main():
"""
main
"""
args = config.parse_args()
config.print_arguments(args)
if args.do_train:
do_train(args)
check_cuda(args.use_cuda)
if args.do_predict:
do_predict(args)
if args.do_train == True:
if args.loss_type == 'CLS':
train(args)
elif args.loss_type == 'L2':
finetune(args)
else:
raise ValueError
elif args.do_val == True:
evaluate(args)
elif args.do_infer == True:
infer(args)
else:
raise ValueError
if args.do_eval:
do_eval(args)
if args.do_save_inference_model:
do_save_inference_model(args)
if __name__ == '__main__':
main()
# vim: set ts=4 sw=4 sts=4 tw=100:
#matching pretrained
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_matching_pretrained-1.0.0.tar.gz
tar -xzf auto_dialogue_evaluation_matching_pretrained-1.0.0.tar.gz
#finetuned
for task in seq2seq_naive seq2seq_att keywords human
do
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_${task}_finetuned-1.0.0.tar.gz
tar -xzf auto_dialogue_evaluation_${task}_finetuned-1.0.0.tar.gz
done
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""predict auto dialogue evaluation task"""
import os
import sys
import six
import time
import numpy as np
import multiprocessing
import paddle
import paddle.fluid as fluid
import ade.reader as reader
from ade_net import create_net
from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
def do_predict(args):
"""
predict function
"""
test_prog = fluid.default_main_program()
startup_prog = fluid.default_startup_program()
with fluid.program_guard(test_prog, startup_prog):
test_prog.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
with fluid.unique_name.guard():
context_wordseq = fluid.layers.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(feed_list=input_inst,
capacity=4, iterable=False)
logits = create_net(
is_training=False,
model_input=input_field,
args=args
)
logits.persistable = True
fetch_list = [logits.name]
#for_test is True if change the is_test attribute of operators to True
test_prog = test_prog.clone(for_test=True)
if args.use_cuda:
place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
else:
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_params) or (args.init_from_pretrain_model)
if args.init_from_params:
save_load_io.init_from_params(args, exe, test_prog)
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, test_prog)
compiled_test_prog = fluid.CompiledProgram(test_prog)
processor = reader.DataProcessor(
data_path=args.predict_file,
max_seq_length=args.max_seq_len,
batch_size=args.batch_size)
batch_generator = processor.data_generator(
place=place,
phase="test",
shuffle=False,
sample_pro=1)
num_test_examples = processor.get_num_examples(phase='test')
data_reader.decorate_batch_generator(batch_generator)
data_reader.start()
scores = []
while True:
try:
results = exe.run(compiled_test_prog, fetch_list=fetch_list)
scores.extend(results[0])
except fluid.core.EOFException:
data_reader.reset()
break
scores = scores[: num_test_examples]
with open(args.output_prediction_file, 'w') as fw:
for index, score in enumerate(scores):
fw.write("%s\t%s\n" % (index, score))
if __name__ == "__main__":
args = PDConfig(yaml_file="./data/config/ade.yaml")
args.build()
args.Print()
check_cuda(args.use_cuda)
do_predict(args)
"""
Reader for auto dialogue evaluation
"""
import sys
import time
import numpy as np
import random
import paddle.fluid as fluid
import paddle
def to_lodtensor(data, place):
"""
Convert to LODtensor
"""
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def reshape_batch(batch, place):
"""
Reshape batch
"""
context_reshape = to_lodtensor([dat[0] for dat in batch], place)
response_reshape = to_lodtensor([dat[1] for dat in batch], place)
label_reshape = [dat[2] for dat in batch]
return (context_reshape, response_reshape, label_reshape)
def batch_reader(data_path,
batch_size,
place,
max_len=50,
sample_pro=1):
"""
Yield batch
"""
batch = []
with open(data_path, 'r') as f:
Print = True
for line in f:
#sample for training data
if sample_pro < 1:
if random.random() > sample_pro:
continue
tokens = line.strip().split('\t')
assert len(tokens) == 3
context = [int(x) for x in tokens[0].split()[:max_len]]
response = [int(x) for x in tokens[1].split()[:max_len]]
label = [int(tokens[2])]
#label = int(tokens[2])
instance = (context, response, label)
if len(batch) < batch_size:
batch.append(instance)
else:
if len(batch) == batch_size:
yield reshape_batch(batch, place)
batch = [instance]
if len(batch) == batch_size:
yield reshape_batch(batch, place)
export CUDA_VISIBLE_DEVICES=4
export FLAGS_eager_delete_tensor_gb=0.0
#pretrain
python -u main.py \
--do_train True \
--use_cuda \
--save_path model_files_tmp/matching_pretrained \
--train_path data/unlabel_data/train.ids \
--val_path data/unlabel_data/val.ids
#finetune based on one task
TASK=human
python -u main.py \
--do_train True \
--loss_type L2 \
--use_cuda \
--save_path model_files_tmp/${TASK}_finetuned \
--init_model model_files/matching_pretrained \
--train_path data/label_data/$TASK/train.ids \
--val_path data/label_data/$TASK/val.ids \
--print_step 1 \
--save_step 1 \
--num_scan_data 50
#evaluate pretrained model by Recall
python -u main.py \
--do_val True \
--use_cuda \
--test_path data/unlabel_data/test.ids \
--init_model model_files/matching_pretrained \
--loss_type CLS
#evaluate pretrained model by Cor
for task in seq2seq_naive seq2seq_att keywords human
do
echo $task
python -u main.py \
--do_val True \
--use_cuda \
--test_path data/label_data/$task/test.ids \
--init_model model_files/matching_pretrained \
--loss_type L2
done
#evaluate finetuned model by Cor
for task in seq2seq_naive seq2seq_att keywords human
do
echo $task
python -u main.py \
--do_val True \
--use_cuda \
--test_path data/label_data/$task/test.ids \
--init_model model_files/${task}_finetuned \
--loss_type L2
done
#infer
TASK=human
python -u main.py \
--do_infer True \
--use_cuda \
--test_path data/label_data/$TASK/test.ids \
--init_model model_files/${TASK}_finetuned
#!/bin/bash
export FLAGS_sync_nccl_allreduce=0
export FLAGS_eager_delete_tensor_gb=1.0
export CUDA_VISIBLE_DEVICES=0
if [ $# -ne 2 ]
then
echo "please input parameters: TRAIN_TYPE and TASK_TYPE"
echo "TRAIN_TYPE: [matching|seq2seq_naive|seq2seq_att|keywords|human]"
echo "TASK_TYPE: [train|predict|evaluate|inference]"
exit 255
fi
TRAIN_TYPE=$1
TASK_TYPE=$2
typeset -l TRAIN_TYPE
typeset -l TASK_TYPE
candi_train_type=("matching" "seq2seq_naive" "seq2seq_att" "keywords" "human")
candi_task_type=("train" "predict" "evaluate" "inference")
if [[ ! "${candi_train_type[@]}" =~ ${TRAIN_TYPE} ]]
then
echo "unknown parameter: ${TRAIN_TYPE}, just support [matching|seq2seq_naive|seq2seq_att|keywords|human]"
exit 255
fi
if [[ ! "${candi_task_type[@]}" =~ ${TASK_TYPE} ]]
then
echo "unknown parameter: ${TRAIN_TYPE}, just support [train|predict|evaluate|inference]"
exit 255
fi
INPUT_PATH="data/input/data"
OUTPUT_PATH="data/output"
SAVED_MODELS="data/saved_models"
INFERENCE_MODEL="data/inference_models"
PYTHON_PATH="python"
#train pretrain model
if [ ! "$CUDA_VISIBLE_DEVICES" ]
then
use_cuda=false
else
use_cuda=true
fi
#training
function pretrain_train()
{
pretrain_model_path="${SAVED_MODELS}/matching_pretrained"
if [ ! -d ${pretrain_model_path} ]
then
mkdir ${pretrain_model_path}
fi
${PYTHON_PATH} -u main.py \
--do_train=true \
--use_cuda=${1} \
--loss_type="CLS" \
--max_seq_len=50 \
--save_model_path=${pretrain_model_path} \
--save_param="params" \
--training_file="${INPUT_PATH}/unlabel_data/train.ids" \
--epoch=20 \
--print_step=1 \
--save_step=400 \
--batch_size=256 \
--hidden_size=256 \
--emb_size=256 \
--vocab_size=484016 \
--learning_rate=0.001 \
--sample_pro 0.1
}
function finetuning_train()
{
save_model_path="${SAVED_MODELS}/${2}_finetuned"
if [ ! -d ${save_model_path} ]
then
mkdir ${save_model_path}
fi
${PYTHON_PATH} -u main.py \
--do_train=true \
--use_cuda=${1} \
--loss_type="L2" \
--max_seq_len=50 \
--init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \
--save_model_path=${save_model_path} \
--save_param="params" \
--training_file="${INPUT_PATH}/label_data/${2}/train.ids" \
--epoch=50 \
--print_step=1 \
--save_step=400 \
--batch_size=256 \
--hidden_size=256 \
--emb_size=256 \
--vocab_size=484016 \
--learning_rate=0.001 \
--sample_pro 0.1
}
#predict
function pretrain_predict()
{
${PYTHON_PATH} -u main.py \
--do_predict=true \
--use_cuda=${1} \
--predict_file="${INPUT_PATH}/unlabel_data/test.ids" \
--init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
--loss_type="CLS" \
--output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
--max_seq_len=50 \
--batch_size=256 \
--hidden_size=256 \
--emb_size=256 \
--vocab_size=484016
}
function finetuning_predict()
{
${PYTHON_PATH} -u main.py \
--do_predict=true \
--use_cuda=${1} \
--predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \
--init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \
--loss_type="L2" \
--output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
--max_seq_len=50 \
--batch_size=256 \
--hidden_size=256 \
--emb_size=256 \
--vocab_size=484016
}
#evaluate
function pretrain_eval()
{
${PYTHON_PATH} -u main.py \
--do_eval=true \
--use_cuda=${1} \
--evaluation_file="${INPUT_PATH}/unlabel_data/test.ids" \
--output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
--loss_type="CLS"
}
function finetuning_eval()
{
${PYTHON_PATH} -u main.py \
--do_eval=true \
--use_cuda=${1} \
--evaluation_file="${INPUT_PATH}/label_data/${2}/test.ids" \
--output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
--loss_type="L2"
}
#inference model
function pretrain_infer()
{
${PYTHON_PATH} -u main.py \
--do_save_inference_model=true \
--use_cuda=${1} \
--init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
--inference_model_dir="${INFERENCE_MODEL}/matching_inference_model"
}
function finetuning_infer()
{
${PYTHON_PATH} -u main.py \
--do_save_inference_model=true \
--use_cuda=${1} \
--init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params" \
--inference_model_dir="${INFERENCE_MODEL}/${2}_inference_model"
}
if [ "${TASK_TYPE}" = "train" ]
then
echo "train ${TRAIN_TYPE} start.........."
if [ "${TRAIN_TYPE}" = "matching" ]
then
pretrain_train ${use_cuda};
else
finetuning_train ${use_cuda} ${TRAIN_TYPE};
fi
elif [ "${TASK_TYPE}" = "predict" ]
then
echo "predict ${TRAIN_TYPE} start.........."
if [ "${TRAIN_TYPE}" = "matching" ]
then
pretrain_predict ${use_cuda};
else
finetuning_predict ${use_cuda} ${TRAIN_TYPE};
fi
elif [ "${TASK_TYPE}" = "evaluate" ]
then
echo "evaluate ${TRAIN_TYPE} start.........."
if [ "${TRAIN_TYPE}" = "matching" ]
then
pretrain_eval ${use_cuda};
else
finetuning_eval ${use_cuda} ${TRAIN_TYPE};
fi
elif [ "${TASK_TYPE}" = "inference" ]
then
echo "save ${TRAIN_TYPE} inference model start.........."
if [ "${TRAIN_TYPE}" = "matching" ]
then
pretrain_infer ${use_cuda};
else
finetuning_infer ${use_cuda} ${TRAIN_TYPE};
fi
else
exit 255
fi
export FLAGS_eager_delete_tensor_gb=0.0
#pretrain
python -u main.py \
--do_train True \
--sample_pro 0.9 \
--batch_size 64 \
--save_path model_files_tmp/matching_pretrained \
--train_path data/unlabel_data/train.ids \
--val_path data/unlabel_data/val.ids
#finetune based on one task
TASK=human
python -u main.py \
--do_train True \
--loss_type L2 \
--save_path model_files_tmp/${TASK}_finetuned \
--init_model model_files/matching_pretrained \
--train_path data/label_data/$TASK/train.ids \
--val_path data/label_data/$TASK/val.ids \
--print_step 1 \
--save_step 1 \
--num_scan_data 50
#evaluate pretrained model by Recall
python -u main.py \
--do_val True \
--test_path data/unlabel_data/test.ids \
--init_model model_files/matching_pretrained \
--loss_type CLS
#evaluate pretrained model by Cor
for task in seq2seq_naive seq2seq_att keywords human
do
echo $task
python -u main.py \
--do_val True \
--test_path data/label_data/$task/test.ids \
--init_model model_files/matching_pretrained \
--loss_type L2
done
#evaluate finetuned model by Cor
for task in seq2seq_naive seq2seq_att keywords human
do
echo $task
python -u main.py \
--do_val True \
--test_path data/label_data/$task/test.ids \
--init_model model_files/${task}_finetuned \
--loss_type L2
done
#infer
TASK=human
python -u main.py \
--do_infer True \
--test_path data/label_data/$TASK/test.ids \
--init_model model_files/${TASK}_finetuned
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""train auto dialogue evaluation task"""
import os
import sys
import six
import time
import numpy as np
import multiprocessing
import paddle
import paddle.fluid as fluid
import ade.reader as reader
from ade_net import create_net, set_word_embedding
from ade.utils.configure import PDConfig
from ade.utils.input_field import InputField
from ade.utils.model_check import check_cuda
import ade.utils.save_load_io as save_load_io
try:
import cPickle as pickle #python 2
except ImportError as e:
import pickle #python 3
def do_train(args):
"""train function"""
train_prog = fluid.default_main_program()
startup_prog = fluid.default_startup_program()
with fluid.program_guard(train_prog, startup_prog):
train_prog.random_seed = args.random_seed
startup_prog.random_seed = args.random_seed
with fluid.unique_name.guard():
context_wordseq = fluid.layers.data(
name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
response_wordseq = fluid.layers.data(
name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
labels = fluid.layers.data(
name='labels', shape=[1], dtype='int64')
input_inst = [context_wordseq, response_wordseq, labels]
input_field = InputField(input_inst)
data_reader = fluid.io.PyReader(feed_list=input_inst,
capacity=4, iterable=False)
loss = create_net(
is_training=True,
model_input=input_field,
args=args
)
loss.persistable = True
# gradient clipping
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
max=1.0, min=-1.0))
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
optimizer.minimize(loss)
if args.use_cuda:
dev_count = fluid.core.get_cuda_device_count()
place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
else:
dev_count = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
place = fluid.CPUPlace()
processor = reader.DataProcessor(
data_path=args.training_file,
max_seq_length=args.max_seq_len,
batch_size=args.batch_size)
batch_generator = processor.data_generator(
place=place,
phase="train",
shuffle=True,
sample_pro=args.sample_pro)
num_train_examples = processor.get_num_examples(phase='train')
max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size
print("Num train examples: %d" % num_train_examples)
print("Max train steps: %d" % max_train_steps)
data_reader.decorate_batch_generator(batch_generator)
exe = fluid.Executor(place)
exe.run(startup_prog)
assert (args.init_from_checkpoint == "") or (
args.init_from_pretrain_model == "")
#init from some checkpoint, to resume the previous training
if args.init_from_checkpoint:
save_load_io.init_from_checkpoint(args, exe, train_prog)
#init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model:
save_load_io.init_from_pretrain_model(args, exe, train_prog)
if args.word_emb_init:
print("start loading word embedding init ...")
if six.PY2:
word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype('float32')
else:
word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32')
set_word_embedding(word_emb, place)
print("finish init word embedding ...")
build_strategy = fluid.compiler.BuildStrategy()
build_strategy.enable_inplace = True
compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
steps = 0
begin_time = time.time()
for epoch_step in range(args.epoch):
data_reader.start()
sum_loss = 0.0
ce_loss = 0.0
while True:
try:
steps += 1
fetch_list = [loss.name]
outputs = exe.run(compiled_train_prog, fetch_list=fetch_list)
np_loss = outputs
sum_loss += np.array(np_loss).mean()
ce_loss = np.array(np_loss).mean()
if steps % args.print_steps == 0:
print('epoch: %d, step: %s, avg loss %s' % (epoch_step, steps, sum_loss / args.print_steps))
sum_loss = 0.0
if steps % args.save_steps == 0:
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps))
if args.save_param:
save_load_io.save_param(args, exe, train_prog, "step_" + str(steps))
except fluid.core.EOFException:
data_reader.reset()
break
if args.save_checkpoint:
save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
if args.save_param:
save_load_io.save_param(args, exe, train_prog, "step_final")
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if args.enable_ce:
card_num = get_cards()
pass_time_cost = time.time() - begin_time
print("test_card_num", card_num)
print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost))
print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
if __name__ == '__main__':
args = PDConfig(yaml_file="./data/config/ade.yaml")
args.build()
args.Print()
check_cuda(args.use_cuda)
do_train(args)
# this file is only used for continuous evaluation test!
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""this file is only used for continuous evaluation test!"""
import os
import sys
......
task_name: ""
data_dir: ""
bert_config_path: ""
init_from_checkpoint: ""
init_from_params: ""
init_from_pretrain_model: ""
inference_model_dir: ""
save_model_path: ""
save_checkpoint: ""
save_param: ""
lr_scheduler: "linear_warmup_decay"
weight_decay: 0.01
warmup_proportion: 0.1
save_steps: 1000
use_fp16: False
loss_scaling: 1.0
print_steps: 20
evaluation_file: ""
output_prediction_file: ""
vocab_path: ""
max_seq_len: 128
batch_size: 2
verbose: False
do_lower_case: False
random_seed: 0
use_cuda: True
task_name: ""
in_tokens: False
do_save_inference_model: False
enable_ce: "store_true"
pretrain model directory: in this module, we use bert as pretrain model
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -74,7 +74,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
return batch_tokens, mask_label, mask_pos
def prepare_batch_data(insts,
def prepare_batch_data(task_name,
insts,
max_len,
total_token_num,
voc_size=0,
......@@ -90,7 +91,6 @@ def prepare_batch_data(insts,
2. generate Tensor of position
3. generate self attention mask, [shape: batch_size * max_len * max_len]
"""
batch_src_ids = [inst[0] for inst in insts]
batch_sent_ids = [inst[1] for inst in insts]
batch_pos_ids = [inst[2] for inst in insts]
......@@ -99,10 +99,10 @@ def prepare_batch_data(insts,
# or unique id
if isinstance(insts[0][3], list):
if max_len != -1:
if task_name == "atis_slot":
labels_list = [inst[3] + [0] * (max_len - len(inst[3])) for inst in insts]
labels_list = [np.array(labels_list).astype("int64").reshape([-1, max_len])]
else:
elif task_name == "dstc2":
labels_list = [inst[3] for inst in insts]
labels_list = [np.array(labels_list).astype("int64")]
else:
......
......@@ -24,9 +24,7 @@ import json
import numpy as np
import paddle.fluid as fluid
_WORK_DIR = os.path.split(os.path.realpath(__file__))[0]
sys.path.append(os.path.join(_WORK_DIR, "../../"))
from transformer_encoder import encoder, pre_process_layer
from dgu.transformer_encoder import encoder, pre_process_layer
class BertConfig(object):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -49,15 +49,9 @@ class Paradigm(object):
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
if params['is_prediction']:
if not params['is_training']:
probs = fluid.layers.softmax(logits)
feed_targets_name = [
params['src_ids'].name,
params['pos_ids'].name,
params['sent_ids'].name,
params['input_mask'].name,
]
results = {"probs": probs, "feed_targets_name": feed_targets_name}
results = {"probs": probs}
return results
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
......@@ -67,11 +61,6 @@ class Paradigm(object):
accuracy = fluid.layers.accuracy(
input=probs, label=params['labels'], total=num_seqs)
loss.persistable = True
probs.persistable = True
accuracy.persistable = True
num_seqs.persistable = True
results = {
"loss": loss,
"probs": probs,
......@@ -105,22 +94,13 @@ class Paradigm(object):
loss = fluid.layers.mean(x=ce_loss)
probs = fluid.layers.sigmoid(logits)
if params['is_prediction']:
feed_targets_name = [
params['src_ids'].name,
params['pos_ids'].name,
params['sent_ids'].name,
params['input_mask'].name,
]
results = {"probs": probs, "feed_targets_name": feed_targets_name}
if not params['is_training']:
results = {"probs": probs}
return results
num_seqs = fluid.layers.tensor.fill_constant(
shape=[1], dtype='int64', value=1)
loss.persistable = True
probs.persistable = True
num_seqs.persistable = True
results = {"loss": loss, "probs": probs, "num_seqs": num_seqs}
return results
......@@ -138,14 +118,8 @@ class Paradigm(object):
fluid.layers.argmax(
logits, axis=1), dtype='int32')
if params['is_prediction']:
feed_targets_name = [
params['src_ids'].name,
params['pos_ids'].name,
params['sent_ids'].name,
params['input_mask'].name,
]
results = {"probs": probs, "feed_targets_name": feed_targets_name}
if not params['is_training']:
results = {"probs": probs}
return results
num_seqs = fluid.layers.tensor.fill_constant(
......@@ -160,10 +134,6 @@ class Paradigm(object):
label=fluid.layers.reshape(params['labels'], [-1, 1]))
loss = fluid.layers.mean(x=ce_loss)
loss.persistable = True
probs.persistable = True
accuracy.persistable = True
num_seqs.persistable = True
results = {
"loss": loss,
"probs": probs,
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -11,9 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
evaluate task metrics
"""
"""evaluate task metrics"""
import sys
......@@ -22,15 +20,12 @@ class EvalDA(object):
"""
evaluate da testset, swda|mrda
"""
def __init__(self, task_name, pred):
def __init__(self, task_name, pred, refer):
"""
predict file
"""
self.pred_file = pred
if task_name == 'swda':
self.refer_file = "./data/swda/test.txt"
elif task_name == "mrda":
self.refer_file = "./data/mrda/test.txt"
self.refer_file = refer
def load_data(self):
"""
......@@ -70,12 +65,12 @@ class EvalATISIntent(object):
"""
evaluate da testset, swda|mrda
"""
def __init__(self, pred):
def __init__(self, pred, refer):
"""
predict file
"""
self.pred_file = pred
self.refer_file = "./data/atis/atis_intent/test.txt"
self.refer_file = refer
def load_data(self):
"""
......@@ -115,12 +110,12 @@ class EvalATISSlot(object):
"""
evaluate atis slot
"""
def __init__(self, pred):
def __init__(self, pred, refer):
"""
pred file
"""
self.pred_file = pred
self.refer_file = "./data/atis/atis_slot/test.txt"
self.refer_file = refer
def load_data(self):
"""
......@@ -200,12 +195,12 @@ class EvalUDC(object):
"""
evaluate udc
"""
def __init__(self, pred):
def __init__(self, pred, refer):
"""
predict file
"""
self.pred_file = pred
self.refer_file = "./data/udc/test.txt"
self.refer_file = refer
def load_data(self):
"""
......@@ -272,13 +267,13 @@ class EvalDSTC2(object):
"""
evaluate dst testset, dstc2
"""
def __init__(self, task_name, pred):
def __init__(self, task_name, pred, refer):
"""
predict file
"""
self.task_name = task_name
self.pred_file = pred
self.refer_file = "./data/dstc2/%s/test.txt" % self.task_name
self.refer_file = refer
def load_data(self):
"""
......@@ -320,15 +315,10 @@ class EvalDSTC2(object):
return metrics_out
if __name__ == "__main__":
if len(sys.argv[1:]) < 2:
print("please input task_name predict_file")
task_name = sys.argv[1]
pred_file = sys.argv[2]
def evaluate(task_name, pred_file, refer_file):
"""evaluate task metrics"""
if task_name.lower() == 'udc':
eval_inst = EvalUDC(pred_file)
eval_inst = EvalUDC(pred_file, refer_file)
eval_metrics = eval_inst.evaluate()
print("MATCHING TASK: %s metrics in testset: " % task_name)
print("R1@2: %s" % eval_metrics[0])
......@@ -337,29 +327,29 @@ if __name__ == "__main__":
print("R5@10: %s" % eval_metrics[3])
elif task_name.lower() in ['swda', 'mrda']:
eval_inst = EvalDA(task_name.lower(), pred_file)
eval_inst = EvalDA(task_name.lower(), pred_file, refer_file)
eval_metrics = eval_inst.evaluate()
print("DA TASK: %s metrics in testset: " % task_name)
print("ACC: %s" % eval_metrics)
elif task_name.lower() == 'atis_intent':
eval_inst = EvalATISIntent(pred_file)
eval_inst = EvalATISIntent(pred_file, refer_file)
eval_metrics = eval_inst.evaluate()
print("INTENTION TASK: %s metrics in testset: " % task_name)
print("ACC: %s" % eval_metrics)
elif task_name.lower() == 'atis_slot':
eval_inst = EvalATISSlot(pred_file)
eval_inst = EvalATISSlot(pred_file, refer_file)
eval_metrics = eval_inst.evaluate()
print("SLOT FILLING TASK: %s metrics in testset: " % task_name)
print(eval_metrics)
elif task_name.lower() in ['dstc2', 'dstc2_asr']:
eval_inst = EvalDSTC2(task_name.lower(), pred_file)
eval_inst = EvalDSTC2(task_name.lower(), pred_file, refer_file)
eval_metrics = eval_inst.evaluate()
print("DST TASK: %s metrics in testset: " % task_name)
print("JOINT ACC: %s" % eval_metrics[0])
elif task_name.lower() == "multi-woz":
eval_inst = EvalMultiWoz(pred_file)
eval_inst = EvalMultiWoz(pred_file, refer_file)
eval_metrics = eval_inst.evaluate()
print("DST TASK: %s metrics in testset: " % task_name)
print("JOINT ACC: %s" % eval_metrics[0])
......@@ -367,3 +357,14 @@ if __name__ == "__main__":
else:
print("task name not in [udc|swda|mrda|atis_intent|atis_slot|dstc2|dstc2_asr|multi-woz]")
if __name__ == "__main__":
if len(sys.argv[1:]) < 3:
print("please input task_name predict_file reference_file")
task_name = sys.argv[1]
pred_file = sys.argv[2]
refer_file = sys.argv[3]
evaluate(task_name, pred_file, refer_file)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -19,7 +19,7 @@ from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from utils.fp16 import create_master_params_grads, master_param_to_train_param
from dgu.utils.fp16 import create_master_params_grads, master_param_to_train_param
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
......
#!/bin/bash
#check data directory
cd ..
echo "Start download data and models.............."
if [ ! -d "data" ]; then
echo "Directory data does not exist, make new data directory"
mkdir data
fi
cd data
#check configure file
if [ ! -d "config" ]; then
echo "config directory not exist........"
exit 255
else
if [ ! -f "config/dgu.yaml" ]; then
echo "config file dgu.yaml has been lost........"
exit 255
fi
fi
#check and download input data
if [ ! -d "input" ]; then
echo "Directory input does not exist, make new input directory"
mkdir input
fi
cd input
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz
tar -xvf dmtk_data_1.0.0.tar.gz
rm dmtk_data_1.0.0.tar.gz
cd ..
#check and download pretrain model
if [ ! -d "pretrain_model" ]; then
echo "Directory pretrain_model does not exist, make new pretrain_model directory"
mkdir pretrain_model
fi
cd pretrain_model
wget --no-check-certificate https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz
tar -xvf uncased_L-12_H-768_A-12.tar.gz
rm uncased_L-12_H-768_A-12.tar.gz
cd ..
#check and download inferenece model
if [ ! -d "inference_models" ]; then
echo "Directory inferenece_model does not exist, make new inferenece_model directory"
mkdir inference_models
fi
#check output
if [ ! -d "output" ]; then
echo "Directory output does not exist, make new output directory"
mkdir output
fi
#check saved model
if [ ! -d "saved_models" ]; then
echo "Directory saved_models does not exist, make new saved_models directory"
mkdir saved_models
fi
cd saved_models
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz
tar -xvf dgu_models_2.0.0.tar.gz
rm dgu_models_2.0.0.tar.gz
cd ..
echo "Finish.............."
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,9 +13,10 @@
# limitations under the License.
"""data reader"""
import os
import types
import csv
import types
import numpy as np
import tokenization
from batching import prepare_batch_data
......@@ -40,9 +41,7 @@ class DataProcessor(object):
np.random.seed(random_seed)
self.current_train_example = -1
self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
self.current_train_epoch = -1
self.task_name = task_name
def get_train_examples(self, data_dir):
......@@ -57,7 +56,8 @@ class DataProcessor(object):
"""Gets a collection of `InputExample`s for prediction."""
raise NotImplementedError()
def get_labels(self):
@staticmethod
def get_labels():
"""Gets the list of labels for this data set."""
raise NotImplementedError()
......@@ -90,6 +90,7 @@ class DataProcessor(object):
return_num_token=False):
"""generate batch data"""
return prepare_batch_data(
self.task_name,
batch_data,
max_len,
total_token_num,
......@@ -119,18 +120,13 @@ class DataProcessor(object):
"Unknown phase, which should be in ['train', 'dev', 'test'].")
return self.num_examples[phase]
def get_train_progress(self):
"""Gets progress for training phase."""
return self.current_train_example, self.current_train_epoch
def data_generator(self, batch_size, phase='train', epoch=1, shuffle=False):
def data_generator(self, batch_size, phase='train', shuffle=False):
"""
Generate data for train, dev or test.
Args:
batch_size: int. The batch size of generated data.
phase: string. The phase for which to generate data.
epoch: int. Total epoches to generate data.
shuffle: bool. Whether to shuffle examples.
"""
if phase == 'train':
......@@ -148,25 +144,19 @@ class DataProcessor(object):
def instance_reader():
"""generate instance data"""
for epoch_index in range(epoch):
if shuffle:
np.random.shuffle(examples)
if phase == 'train':
self.current_train_epoch = epoch_index
for (index, example) in enumerate(examples):
if phase == 'train':
self.current_train_example = index + 1
feature = self.convert_example(
index, example,
self.get_labels(), self.max_seq_len, self.tokenizer)
instance = self.generate_instance(feature)
yield instance
if shuffle:
np.random.shuffle(examples)
for (index, example) in enumerate(examples):
feature = self.convert_example(
index, example,
self.get_labels(), self.max_seq_len, self.tokenizer)
instance = self.generate_instance(feature)
yield instance
def batch_reader(reader, batch_size, in_tokens):
"""read batch data"""
batch, total_token_num, max_len = [], 0, 0
for instance in reader():
for instance in reader():
token_ids, sent_ids, pos_ids, label = instance[:4]
max_len = max(max_len, len(token_ids))
if in_tokens:
......@@ -294,7 +284,8 @@ class UDCProcessor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
return ["0", "1"]
......@@ -327,7 +318,8 @@ class SWDAProcessor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
labels = range(42)
labels = [str(label) for label in labels]
......@@ -362,7 +354,8 @@ class MRDAProcessor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
labels = range(42)
labels = [str(label) for label in labels]
......@@ -406,7 +399,8 @@ class ATISSlotProcessor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
labels = range(130)
labels = [str(label) for label in labels]
......@@ -449,7 +443,8 @@ class ATISIntentProcessor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
labels = range(26)
labels = [str(label) for label in labels]
......@@ -522,7 +517,8 @@ class DSTC2Processor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
labels = range(217)
labels = [str(label) for label in labels]
......@@ -598,7 +594,8 @@ class MULTIWOZProcessor(DataProcessor):
examples = self._create_examples(lines, "test")
return examples
def get_labels(self):
@staticmethod
def get_labels():
"""See base class."""
labels = range(722)
labels = [str(label) for label in labels]
......@@ -666,8 +663,8 @@ def convert_tokens(tokens, sep_id, tokenizer):
tok_text = tokenizer.tokenize(text)
ids = tokenizer.convert_tokens_to_ids(tok_text)
tokens_ids.extend(ids)
if text != tokens[-1]:
tokens_ids.append(sep_id)
tokens_ids.append(sep_id)
tokens_ids = tokens_ids[: -1]
else:
tok_text = tokenizer.tokenize(tokens)
tokens_ids = tokenizer.convert_tokens_to_ids(tok_text)
......@@ -719,7 +716,8 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
if tokens_b_ids:
tokens_b_ids = tokens_b_ids[:min(limit_length, len(tokens_b_ids))]
else:
tokens_a_ids = tokens_a_ids[len(tokens_a_ids) - max_seq_length + 2:]
if len(tokens_a_ids) > max_seq_length - 2:
tokens_a_ids = tokens_a_ids[len(tokens_a_ids) - max_seq_length + 2:]
if not tokens_c_ids:
if len(tokens_a_ids) > max_seq_length - len(tokens_b_ids) - 3:
tokens_a_ids = tokens_a_ids[len(tokens_a_ids) - max_seq_length + len(tokens_b_ids) + 3:]
......@@ -727,13 +725,10 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
if len(tokens_a_ids) + len(tokens_b_ids) + len(tokens_c_ids) > max_seq_length - 4:
left_num = max_seq_length - len(tokens_b_ids) - 4
if len(tokens_a_ids) > len(tokens_c_ids):
if not tokens_c_ids:
tokens_a_ids = tokens_a_ids[max(0, len(tokens_a_ids) - left_num):]
else:
suffix_num = int(left_num / 2)
tokens_c_ids = tokens_c_ids[: min(len(tokens_c_ids), suffix_num)]
prefix_num = left_num - len(tokens_c_ids)
tokens_a_ids = tokens_a_ids[max(0, len(tokens_a_ids) - prefix_num):]
suffix_num = int(left_num / 2)
tokens_c_ids = tokens_c_ids[: min(len(tokens_c_ids), suffix_num)]
prefix_num = left_num - len(tokens_c_ids)
tokens_a_ids = tokens_a_ids[max(0, len(tokens_a_ids) - prefix_num):]
else:
if not tokens_a_ids:
tokens_c_ids = tokens_c_ids[max(0, len(tokens_c_ids) - left_num):]
......
scripts:运行数据处理脚本目录, 将官方公开数据集转换成模型所需训练数据格式
运行命令:
sh run_build_data.sh [udc|swda|mrda|atis|dstc2]
1)、生成MATCHING任务所需要的训练集、开发集、测试集时:
sh run_build_data.sh udc
生成数据在dialogue_general_understanding/data/input/data/udc
2)、生成DA任务所需要的训练集、开发集、测试集时:
sh run_build_data.sh swda
sh run_build_data.sh mrda
生成数据分别在dialogue_general_understanding/data/input/data/swda和dialogue_general_understanding/data/input/data/mrda
3)、生成DST任务所需的训练集、开发集、测试集时:
sh run_build_data.sh dstc2
生成数据分别在dialogue_general_understanding/data/input/data/dstc2
4)、生成意图解析, 槽位识别任务所需训练集、开发集、测试集时:
sh run_build_data.sh atis
生成槽位识别数据在dialogue_general_understanding/data/input/data/atis/atis_slot
生成意图识别数据在dialogue_general_understanding/data/input/data/atis/atis_intent
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""build swda train dev test dataset"""
import json
......@@ -32,11 +33,11 @@ class ATIS(object):
self.slot_dict = {"PAD": 0, "O": 1}
self.intent_id = 0
self.intent_dict = dict()
self.src_dir = "../data/atis/source_data"
self.out_slot_dir = "../data/atis/atis_slot"
self.out_intent_dir = "../data/atis/atis_intent"
self.map_tag_slot = "../data/atis/atis_slot/map_tag_slot_id.txt"
self.map_tag_intent = "../data/atis/atis_intent/map_tag_intent_id.txt"
self.src_dir = "../../data/input/data/atis/source_data"
self.out_slot_dir = "../../data/input/data/atis/atis_slot"
self.out_intent_dir = "../../data/input/data/atis/atis_intent"
self.map_tag_slot = "../../data/input/data/atis/atis_slot/map_tag_slot_id.txt"
self.map_tag_intent = "../../data/input/data/atis/atis_intent/map_tag_intent_id.txt"
def _load_file(self, data_type):
"""
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -31,12 +31,12 @@ class DSTC2(object):
init instance
"""
self.map_tag_dict = {}
self.out_dir = "../data/dstc2/dstc2"
self.out_asr_dir = "../data/dstc2/dstc2_asr"
self.out_dir = "../../data/input/data/dstc2/dstc2"
self.out_asr_dir = "../../data/input/data/dstc2/dstc2_asr"
self.data_list = "./conf/dstc2.conf"
self.map_tag = "../data/dstc2/dstc2/map_tag_id.txt"
self.src_dir = "../data/dstc2/source_data"
self.onto_json = "../data/dstc2/source_data/ontology_dstc2.json"
self.map_tag = "../../data/input/data/dstc2/dstc2/map_tag_id.txt"
self.src_dir = "../../data/input/data/dstc2/source_data"
self.onto_json = "../../data/input/data/dstc2/source_data/ontology_dstc2.json"
self._load_file()
self._load_ontology()
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -31,11 +31,11 @@ class MRDA(object):
"""
self.tag_id = 0
self.map_tag_dict = dict()
self.out_dir = "../data/mrda"
self.out_dir = "../../data/input/data/mrda"
self.data_list = "./conf/mrda.conf"
self.map_tag = "../data/mrda/map_tag_id.txt"
self.voc_map_tag = "../data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split"
self.src_dir = "../data/mrda/source_data/icsi_mrda+hs_corpus_050512/data"
self.map_tag = "../../data/input/data/mrda/map_tag_id.txt"
self.voc_map_tag = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split"
self.src_dir = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/data"
self._load_file()
self.tag_dict = commonlib.load_voc(self.voc_map_tag)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -31,10 +31,10 @@ class SWDA(object):
"""
self.tag_id = 0
self.map_tag_dict = dict()
self.out_dir = "../data/swda"
self.out_dir = "../../data/input/data/swda"
self.data_list = "./conf/swda.conf"
self.map_tag = "../data/swda/map_tag_id.txt"
self.src_dir = "../data/swda/source_data/swda"
self.map_tag = "../../data/input/data/swda/map_tag_id.txt"
self.src_dir = "../../data/input/data/swda/source_data/swda"
self._load_file()
def _load_file(self):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
......@@ -15,8 +15,8 @@ then
elif [[ "${TASK_DATA}" =~ "atis" ]]
then
python build_atis_dataset.py
cat ../data/atis/atis_slot/test.txt > ../data/atis/atis_slot/dev.txt
cat ../data/atis/atis_intent/test.txt > ../data/atis/atis_intent/dev.txt
cat ../../data/input/data/atis/atis_slot/test.txt > ../../data/input/data/atis/atis_slot/dev.txt
cat ../../data/input/data/atis/atis_intent/test.txt > ../../data/input/data/atis/atis_intent/dev.txt
elif [ "${TASK_DATA}" = "dstc2" ]
then
python build_dstc2_dataset.py
......
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes."""
from __future__ import absolute_import
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Transformer encoder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from functools import partial
import paddle.fluid as fluid
import paddle.fluid.layers as layers
def multi_head_attention(queries,
keys,
values,
attn_bias,
d_key,
d_value,
d_model,
n_head=1,
dropout_rate=0.,
cache=None,
param_initializer=None,
name='multi_head_att'):
"""
Multi-Head Attention. Note that attn_bias is added to the logit before
computing softmax activiation to mask certain selected positions so that
they will not considered in attention weights.
"""
keys = queries if keys is None else keys
values = keys if values is None else values
if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
raise ValueError(
"Inputs: quries, keys and values should all be 3-D tensors.")
def __compute_qkv(queries, keys, values, n_head, d_key, d_value):
"""
Add linear projection to queries, keys, and values.
"""
q = layers.fc(input=queries,
size=d_key * n_head,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_query_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_query_fc.b_0')
k = layers.fc(input=keys,
size=d_key * n_head,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_key_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_key_fc.b_0')
v = layers.fc(input=values,
size=d_value * n_head,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_value_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_value_fc.b_0')
return q, k, v
def __split_heads(x, n_head):
"""
Reshape the last dimension of inpunt tensor x so that it becomes two
dimensions and then transpose. Specifically, input a tensor with shape
[bs, max_sequence_length, n_head * hidden_dim] then output a tensor
with shape [bs, n_head, max_sequence_length, hidden_dim].
"""
hidden_size = x.shape[-1]
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
reshaped = layers.reshape(
x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True)
# permuate the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
def __combine_heads(x):
"""
Transpose and then reshape the last two dimensions of inpunt tensor x
so that it becomes one dimension, which is reverse to __split_heads.
"""
if len(x.shape) == 3:
return x
if len(x.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
return layers.reshape(
x=trans_x,
shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
inplace=True)
def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate):
"""
Scaled Dot-Product Attention
"""
scaled_q = layers.scale(x=q, scale=d_key ** -0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
if attn_bias:
product += attn_bias
weights = layers.softmax(product)
if dropout_rate:
weights = layers.dropout(
weights,
dropout_prob=dropout_rate,
dropout_implementation="upscale_in_train",
is_test=False)
out = layers.matmul(weights, v)
return out
q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
if cache is not None: # use cache and concat time steps
# Since the inplace reshape in __split_heads changes the shape of k and
# v, which is the cache input for next time step, reshape the cache
# input from the previous time step first.
k = cache["k"] = layers.concat(
[layers.reshape(
cache["k"], shape=[0, 0, d_model]), k], axis=1)
v = cache["v"] = layers.concat(
[layers.reshape(
cache["v"], shape=[0, 0, d_model]), v], axis=1)
q = __split_heads(q, n_head)
k = __split_heads(k, n_head)
v = __split_heads(v, n_head)
ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key,
dropout_rate)
out = __combine_heads(ctx_multiheads)
# Project back to the model size.
proj_out = layers.fc(input=out,
size=d_model,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_output_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_output_fc.b_0')
return proj_out
def positionwise_feed_forward(x,
d_inner_hid,
d_hid,
dropout_rate,
hidden_act,
param_initializer=None,
name='ffn'):
"""
Position-wise Feed-Forward Networks.
This module consists of two linear transformations with a ReLU activation
in between, which is applied to each position separately and identically.
"""
hidden = layers.fc(input=x,
size=d_inner_hid,
num_flatten_dims=2,
act=hidden_act,
param_attr=fluid.ParamAttr(
name=name + '_fc_0.w_0',
initializer=param_initializer),
bias_attr=name + '_fc_0.b_0')
if dropout_rate:
hidden = layers.dropout(
hidden,
dropout_prob=dropout_rate,
dropout_implementation="upscale_in_train",
is_test=False)
out = layers.fc(input=hidden,
size=d_hid,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_fc_1.w_0', initializer=param_initializer),
bias_attr=name + '_fc_1.b_0')
return out
def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.,
name=''):
"""
Add residual connection, layer normalization and droput to the out tensor
optionally according to the value of process_cmd.
This will be used before or after multi-head attention and position-wise
feed-forward networks.
"""
for cmd in process_cmd:
if cmd == "a": # add residual connection
out = out + prev_out if prev_out else out
elif cmd == "n": # add layer normalization
out_dtype = out.dtype
if out_dtype == fluid.core.VarDesc.VarType.FP16:
out = layers.cast(x=out, dtype="float32")
out = layers.layer_norm(
out,
begin_norm_axis=len(out.shape) - 1,
param_attr=fluid.ParamAttr(
name=name + '_layer_norm_scale',
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
name=name + '_layer_norm_bias',
initializer=fluid.initializer.Constant(0.)))
if out_dtype == fluid.core.VarDesc.VarType.FP16:
out = layers.cast(x=out, dtype="float16")
elif cmd == "d": # add dropout
if dropout_rate:
out = layers.dropout(
out,
dropout_prob=dropout_rate,
dropout_implementation="upscale_in_train",
is_test=False)
return out
pre_process_layer = partial(pre_post_process_layer, None)
post_process_layer = pre_post_process_layer
def encoder_layer(enc_input,
attn_bias,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
hidden_act,
preprocess_cmd="n",
postprocess_cmd="da",
param_initializer=None,
name=''):
"""The encoder layers that can be stacked to form a deep encoder.
This module consits of a multi-head (self) attention followed by
position-wise feed-forward networks and both the two components companied
with the post_process_layer to add residual connection, layer normalization
and droput.
"""
attn_output = multi_head_attention(
pre_process_layer(
enc_input,
preprocess_cmd,
prepostprocess_dropout,
name=name + '_pre_att'),
None,
None,
attn_bias,
d_key,
d_value,
d_model,
n_head,
attention_dropout,
param_initializer=param_initializer,
name=name + '_multi_head_att')
attn_output = post_process_layer(
enc_input,
attn_output,
postprocess_cmd,
prepostprocess_dropout,
name=name + '_post_att')
ffd_output = positionwise_feed_forward(
pre_process_layer(
attn_output,
preprocess_cmd,
prepostprocess_dropout,
name=name + '_pre_ffn'),
d_inner_hid,
d_model,
relu_dropout,
hidden_act,
param_initializer=param_initializer,
name=name + '_ffn')
return post_process_layer(
attn_output,
ffd_output,
postprocess_cmd,
prepostprocess_dropout,
name=name + '_post_ffn')
def encoder(enc_input,
attn_bias,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
hidden_act,
preprocess_cmd="n",
postprocess_cmd="da",
param_initializer=None,
name=''):
"""
The encoder is composed of a stack of identical layers returned by calling
encoder_layer.
"""
for i in range(n_layer):
enc_output = encoder_layer(
enc_input,
attn_bias,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
hidden_act,
preprocess_cmd,
postprocess_cmd,
param_initializer=param_initializer,
name=name + '_layer_' + str(i))
enc_input = enc_output
enc_output = pre_process_layer(
enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder")
return enc_output
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import json
import yaml
import six
import logging
logging_only_message = "%(message)s"
logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
class JsonConfig(object):
"""
A high-level api for handling json configure file.
"""
def __init__(self, config_path):
self._config_dict = self._parse(config_path)
def _parse(self, config_path):
try:
with open(config_path) as json_file:
config_dict = json.load(json_file)
except:
raise IOError("Error in parsing bert model config file '%s'" %
config_path)
else:
return config_dict
def __getitem__(self, key):
return self._config_dict[key]
def print_config(self):
for arg, value in sorted(six.iteritems(self._config_dict)):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
class ArgumentGroup(object):
def __init__(self, parser, title, des):
self._group = parser.add_argument_group(title=title, description=des)
def add_arg(self, name, type, default, help, **kwargs):
type = str2bool if type == bool else type
self._group.add_argument(
"--" + name,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class ArgConfig(object):
"""
A high-level api for handling argument configs.
"""
def __init__(self):
parser = argparse.ArgumentParser()
train_g = ArgumentGroup(parser, "training", "training options.")
train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
train_g.add_arg("learning_rate", float, 5e-5,
"Learning rate used to train with warmup.")
train_g.add_arg(
"lr_scheduler",
str,
"linear_warmup_decay",
"scheduler of learning rate.",
choices=['linear_warmup_decay', 'noam_decay'])
train_g.add_arg("weight_decay", float, 0.01,
"Weight decay rate for L2 regularizer.")
train_g.add_arg(
"warmup_proportion", float, 0.1,
"Proportion of training steps to perform linear learning rate warmup for."
)
train_g.add_arg("save_steps", int, 1000,
"The steps interval to save checkpoints.")
train_g.add_arg("use_fp16", bool, False,
"Whether to use fp16 mixed precision training.")
train_g.add_arg(
"loss_scaling", float, 1.0,
"Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
)
train_g.add_arg("pred_dir", str, None,
"Path to save the prediction results")
log_g = ArgumentGroup(parser, "logging", "logging related.")
log_g.add_arg("skip_steps", int, 10,
"The steps interval to print loss.")
log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
run_type_g.add_arg("use_cuda", bool, True,
"If set, use GPU for training.")
run_type_g.add_arg(
"use_fast_executor", bool, False,
"If set, use fast parallel executor (in experiment).")
run_type_g.add_arg(
"num_iteration_per_drop_scope", int, 1,
"Ihe iteration intervals to clean up temporary variables.")
run_type_g.add_arg("do_train", bool, True,
"Whether to perform training.")
run_type_g.add_arg("do_predict", bool, True,
"Whether to perform prediction.")
custom_g = ArgumentGroup(parser, "customize", "customized options.")
self.custom_g = custom_g
self.parser = parser
def add_arg(self, name, dtype, default, descrip):
self.custom_g.add_arg(name, dtype, default, descrip)
def build_conf(self):
return self.parser.parse_args()
def str2bool(v):
# because argparse does not support to parse "true, False" as python
# boolean directly
return v.lower() in ("true", "t", "1")
def print_arguments(args, log=None):
if not log:
print('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
else:
log.info('----------- Configuration Arguments -----------')
for arg, value in sorted(six.iteritems(vars(args))):
log.info('%s: %s' % (arg, value))
log.info('------------------------------------------------')
class PDConfig(object):
"""
A high-level API for managing configuration files in PaddlePaddle.
Can jointly work with command-line-arugment, json files and yaml files.
"""
def __init__(self, json_file="", yaml_file="", fuse_args=True):
"""
Init funciton for PDConfig.
json_file: the path to the json configure file.
yaml_file: the path to the yaml configure file.
fuse_args: if fuse the json/yaml configs with argparse.
"""
assert isinstance(json_file, str)
assert isinstance(yaml_file, str)
if json_file != "" and yaml_file != "":
raise Warning(
"json_file and yaml_file can not co-exist for now. please only use one configure file type."
)
return
self.args = None
self.arg_config = {}
self.json_config = {}
self.yaml_config = {}
parser = argparse.ArgumentParser()
self.default_g = ArgumentGroup(parser, "default", "default options.")
self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
self.json_g = ArgumentGroup(parser, "json", "options from json.")
self.com_g = ArgumentGroup(parser, "custom", "customized options.")
self.default_g.add_arg("epoch", int, 2,
"Number of epoches for training.")
self.default_g.add_arg("learning_rate", float, 1e-2,
"Learning rate used to train.")
self.default_g.add_arg("do_train", bool, False,
"Whether to perform training.")
self.default_g.add_arg("do_predict", bool, False,
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.parser = parser
if json_file != "":
self.load_json(json_file, fuse_args=fuse_args)
if yaml_file:
self.load_yaml(yaml_file, fuse_args=fuse_args)
def load_json(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the json file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.json_config = json.loads(fin.read())
fin.close()
if fuse_args:
for name in self.json_config:
if not isinstance(self.json_config[name], int) \
and not isinstance(self.json_config[name], float) \
and not isinstance(self.json_config[name], str) \
and not isinstance(self.json_config[name], bool):
continue
self.json_g.add_arg(name,
type(self.json_config[name]),
self.json_config[name],
"This is from %s" % file_path)
def load_yaml(self, file_path, fuse_args=True):
if not os.path.exists(file_path):
raise Warning("the yaml file %s does not exist." % file_path)
return
with open(file_path, "r") as fin:
self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
fin.close()
if fuse_args:
for name in self.yaml_config:
if not isinstance(self.yaml_config[name], int) \
and not isinstance(self.yaml_config[name], float) \
and not isinstance(self.yaml_config[name], str) \
and not isinstance(self.yaml_config[name], bool):
continue
self.yaml_g.add_arg(name,
type(self.yaml_config[name]),
self.yaml_config[name],
"This is from %s" % file_path)
def build(self):
self.args = self.parser.parse_args()
self.arg_config = vars(self.args)
def __add__(self, new_arg):
assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
assert len(new_arg) >= 3
assert self.args is None
name = new_arg[0]
dtype = new_arg[1]
dvalue = new_arg[2]
desc = new_arg[3] if len(
new_arg) == 4 else "Description is not provided."
self.com_g.add_arg(name, dtype, dvalue, desc)
return self
def __getattr__(self, name):
if name in self.arg_config:
return self.arg_config[name]
if name in self.json_config:
return self.json_config[name]
if name in self.yaml_config:
return self.yaml_config[name]
raise Warning("The argument %s is not defined." % name)
def Print(self):
print("-" * 70)
for name in self.arg_config:
print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
for name in self.json_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.json_config[name])))
for name in self.yaml_config:
if name not in self.arg_config:
print("%s:\t\t\t\t%s" %
(str(name), str(self.yaml_config[name])))
print("-" * 70)
if __name__ == "__main__":
"""
pd_config = PDConfig(json_file = "./test/bert_config.json")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
"""
pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
pd_config += ("my_age", int, 18, "I am forever 18.")
pd_config.build()
print(pd_config.do_train)
print(pd_config.hidden_size)
print(pd_config.my_age)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz
tar -xvf dmtk_data_1.0.0.tar.gz
rm dmtk_data_1.0.0.tar.gz
wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dmtk_models_1.0.0.tar.gz
tar -xvf dmtk_models_1.0.0.tar.gz
rm dmtk_models_1.0.0.tar.gz
wget --no-check-certificate https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz
tar -xvf uncased_L-12_H-768_A-12.tar.gz
rm uncased_L-12_H-768_A-12.tar.gz
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册