提交 0085a4f2 编写于 作者: M malin10

update multiview-simnet

上级 f4ace1bf
......@@ -149,11 +149,11 @@ class Model(object):
return optimizer_i
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
None, self._namespace)
optimizer = envs.get_global_env("hyper_parameters.optimizer", None,
self._namespace)
return self._build_optimizer(optimizer, learning_rate)
opt_name = envs.get_global_env("hyper_parameters.optimizer.class")
opt_lr = envs.get_global_env("hyper_parameters.optimizer.learning_rate")
opt_strategy = envs.get_global_env("hyper_parameters.optimizer.strategy")
return self._build_optimizer(opt_name, opt_lr, opt_strategy)
def input_data(self, is_infer=False, **kwargs):
name = "dataset." + kwargs.get("dataset_name") + "."
......
......@@ -167,6 +167,7 @@ class SingleInfer(TranspileTrainer):
model = envs.lazy_instance_by_fliename(
model_path, "Model")(self._env)
model._infer_data_var = model.input_data(
is_infer=True,
dataset_name=model_dict["dataset_name"])
if envs.get_global_env("dataset." + dataset_name +
".type") == "DataLoader":
......
......@@ -147,11 +147,6 @@ class SingleTrainer(TranspileTrainer):
startup_program = fluid.Program()
scope = fluid.Scope()
dataset_name = model_dict["dataset_name"]
opt_name = envs.get_global_env("hyper_parameters.optimizer.class")
opt_lr = envs.get_global_env(
"hyper_parameters.optimizer.learning_rate")
opt_strategy = envs.get_global_env(
"hyper_parameters.optimizer.strategy")
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
with fluid.scope_guard(scope):
......@@ -168,8 +163,7 @@ class SingleTrainer(TranspileTrainer):
self._get_dataloader(dataset_name,
model._data_loader)
model.net(model._data_var, False)
optimizer = model._build_optimizer(opt_name, opt_lr,
opt_strategy)
optimizer = model.optimizer()
optimizer.minimize(model._cost)
self._model[model_dict["name"]][0] = train_program
self._model[model_dict["name"]][1] = startup_program
......
......@@ -14,7 +14,8 @@
from __future__ import print_function
import sys
import yaml
from paddlerec.core.utils import envs
from paddlerec.core.utils.envs import lazy_instance_by_fliename
from paddlerec.core.reader import SlotReader
......@@ -38,6 +39,11 @@ else:
yaml_abs_path = sys.argv[3]
with open(yaml_abs_path, 'r') as rb:
config = yaml.load(rb.read(), Loader=yaml.FullLoader)
envs.set_global_envs()
envs.update_workspace()
if reader_name != "SlotReader":
reader_class = lazy_instance_by_fliename(reader_package, reader_name)
reader = reader_class(yaml_abs_path)
......
......@@ -53,13 +53,14 @@ runner:
save_inference_feed_varnames: ["query", "doc_pos"] # feed vars of save inference
save_inference_fetch_varnames: ["cos_sim_0.tmp_0"] # fetch vars of save inference
init_model_path: "" # load model path
fetch_period: 10
fetch_period: 2
- name: runner2
class: single_infer
# num of epochs
epochs: 1
# device to run training or infer
device: cpu
fetch_period: 1
init_model_path: "increment/2" # load model path
# runner will run all the phase in each epoch
......
......@@ -11,49 +11,73 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
evaluate:
workspace: "paddlerec.models.match.multiview-simnet"
reader:
batch_size: 2
class: "{workspace}/evaluate_reader.py"
test_data_path: "{workspace}/data/test"
train:
trainer:
# for cluster training
strategy: "async"
# workspace
workspace: "paddlerec.models.match.multiview-simnet"
epochs: 2
workspace: "paddlerec.models.match.multiview-simnet"
# list of dataset
dataset:
- name: dataset_train # name of dataset to distinguish different datasets
batch_size: 2
type: DataLoader # or QueueDataset
data_path: "{workspace}/data/train"
sparse_slots: "1 2 3"
- name: dataset_infer # name
batch_size: 2
type: DataLoader # or QueueDataset
data_path: "{workspace}/data/test"
sparse_slots: "1 2"
reader:
batch_size: 2
class: "{workspace}/reader.py"
train_data_path: "{workspace}/data/train"
dataset_class: "DataLoader"
# hyper parameters of user-defined network
hyper_parameters:
optimizer:
class: Adam
learning_rate: 0.0001
strategy: async
query_encoder: "bow"
title_encoder: "bow"
query_encode_dim: 128
title_encode_dim: 128
sparse_feature_dim: 1000001
embedding_dim: 128
hidden_size: 128
margin: 0.1
model:
models: "{workspace}/model.py"
hyper_parameters:
use_DataLoader: True
query_encoder: "bow"
title_encoder: "bow"
query_encode_dim: 128
title_encode_dim: 128
query_slots: 1
title_slots: 1
sparse_feature_dim: 1000001
embedding_dim: 128
hidden_size: 128
learning_rate: 0.0001
optimizer: adam
# select runner by name
mode: runner1
# config of each runner.
# runner is a kind of paddle training class, which wraps the train/infer process.
runner:
- name: runner1
class: single_train
# num of epochs
epochs: 2
# device to run training or infer
device: cpu
save_checkpoint_interval: 1 # save model interval of epochs
save_inference_interval: 1 # save inference
save_checkpoint_path: "increment" # save checkpoint path
save_inference_path: "inference" # save inference path
save_inference_feed_varnames: [] # feed vars of save inference
save_inference_fetch_varnames: [] # fetch vars of save inference
init_model_path: "" # load model path
fetch_period: 1
- name: runner2
class: single_infer
# num of epochs
epochs: 1
# device to run training or infer
device: cpu
fetch_period: 1
init_model_path: "increment/0" # load model path
save:
increment:
dirname: "increment"
epoch_interval: 1
save_last: True
inference:
dirname: "inference"
epoch_interval: 1
save_last: True
# runner will run all the phase in each epoch
phase:
- name: phase1
model: "{workspace}/model.py" # user-defined model
dataset_name: dataset_train # select dataset by name
thread_num: 1
#- name: phase2
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# thread_num: 1
......@@ -99,146 +99,88 @@ class SimpleEncoderFactory(object):
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.init_config()
def init_config(self):
self._fetch_interval = 1
query_encoder = envs.get_global_env("hyper_parameters.query_encoder",
None, self._namespace)
title_encoder = envs.get_global_env("hyper_parameters.title_encoder",
None, self._namespace)
query_encode_dim = envs.get_global_env(
"hyper_parameters.query_encode_dim", None, self._namespace)
title_encode_dim = envs.get_global_env(
"hyper_parameters.title_encode_dim", None, self._namespace)
query_slots = envs.get_global_env("hyper_parameters.query_slots", None,
self._namespace)
title_slots = envs.get_global_env("hyper_parameters.title_slots", None,
self._namespace)
factory = SimpleEncoderFactory()
self.query_encoders = [
factory.create(query_encoder, query_encode_dim)
for i in range(query_slots)
]
self.title_encoders = [
factory.create(title_encoder, title_encode_dim)
for i in range(title_slots)
]
self.emb_size = envs.get_global_env(
"hyper_parameters.sparse_feature_dim", None, self._namespace)
self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim",
None, self._namespace)
def _init_hyper_parameters(self):
self.query_encoder = envs.get_global_env("hyper_parameters.query_encoder")
self.title_encoder = envs.get_global_env("hyper_parameters.title_encoder")
self.query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim")
self.title_encode_dim = envs.get_global_env("hyper_parameters.title_encode_dim")
self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim")
self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim")
self.emb_shape = [self.emb_size, self.emb_dim]
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size",
None, self._namespace)
self.margin = 0.1
def input(self, is_train=True):
self.q_slots = [
fluid.data(
name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64')
for i in range(len(self.query_encoders))
]
self.pt_slots = [
fluid.data(
name="%d" % (i + len(self.query_encoders)),
shape=[None, 1],
lod_level=1,
dtype='int64') for i in range(len(self.title_encoders))
]
if is_train == False:
return self.q_slots + self.pt_slots
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size")
self.margin = envs.get_global_env("hyper_parameters.margin")
self.nt_slots = [
fluid.data(
name="%d" %
(i + len(self.query_encoders) + len(self.title_encoders)),
shape=[None, 1],
lod_level=1,
dtype='int64') for i in range(len(self.title_encoders))
def net(self, input, is_infer=False):
factory = SimpleEncoderFactory()
self.q_slots = self._sparse_data_var[0:1]
self.query_encoders = [
factory.create(self.query_encoder, self.query_encode_dim)
for _ in self.q_slots
]
return self.q_slots + self.pt_slots + self.nt_slots
def train_input(self):
res = self.input()
self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader",
False, self._namespace)
if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var,
capacity=256,
use_double_buffer=False,
iterable=False)
def get_acc(self, x, y):
less = tensor.cast(cf.less_than(x, y), dtype='float32')
label_ones = fluid.layers.fill_constant_batch_size_like(
input=x, dtype='float32', shape=[-1, 1], value=1.0)
correct = fluid.layers.reduce_sum(less)
total = fluid.layers.reduce_sum(label_ones)
acc = fluid.layers.elementwise_div(correct, total)
return acc
def net(self):
q_embs = [
fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb")
for query in self.q_slots
]
pt_embs = [
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
# concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes)
# projection of hidden layer
q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size,
param_attr='q_fc.w',
bias_attr='q_fc.b')
self.pt_slots = self._sparse_data_var[1:2]
self.title_encoders = [
factory.create(self.title_encoder, self.title_encode_dim)
]
pt_embs = [
fluid.embedding(
input=title, size=self.emb_shape, param_attr="emb")
for title in self.pt_slots
]
nt_embs = [
pt_encodes = [
self.title_encoders[i].forward(emb)
for i, emb in enumerate(pt_embs)
]
pt_concat = fluid.layers.concat(pt_encodes)
pt_hid = fluid.layers.fc(pt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
# cosine of hidden layers
cos_pos = fluid.layers.cos_sim(q_hid, pt_hid)
if is_infer:
self._infer_results['query_pt_sim'] = cos_pos
return
self.nt_slots = self._sparse_data_var[2:3]
nt_embs = [
fluid.embedding(
input=title, size=self.emb_shape, param_attr="emb")
for title in self.nt_slots
]
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
pt_encodes = [
self.title_encoders[i].forward(emb)
for i, emb in enumerate(pt_embs)
]
nt_encodes = [
nt_encodes = [
self.title_encoders[i].forward(emb)
for i, emb in enumerate(nt_embs)
]
# concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes)
pt_concat = fluid.layers.concat(pt_encodes)
nt_concat = fluid.layers.concat(nt_encodes)
# projection of hidden layer
q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size,
param_attr='q_fc.w',
bias_attr='q_fc.b')
pt_hid = fluid.layers.fc(pt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
nt_hid = fluid.layers.fc(nt_concat,
nt_concat = fluid.layers.concat(nt_encodes)
nt_hid = fluid.layers.fc(nt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
cos_neg = fluid.layers.cos_sim(q_hid, nt_hid)
# cosine of hidden layers
cos_pos = fluid.layers.cos_sim(q_hid, pt_hid)
cos_neg = fluid.layers.cos_sim(q_hid, nt_hid)
# pairwise hinge_loss
# pairwise hinge_loss
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos,
......@@ -254,72 +196,16 @@ class Model(ModelBase):
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
loss_part2)
self.avg_cost = fluid.layers.mean(loss_part3)
self._cost = fluid.layers.mean(loss_part3)
self.acc = self.get_acc(cos_neg, cos_pos)
def avg_loss(self):
self._cost = self.avg_cost
def metrics(self):
self._metrics["loss"] = self.avg_cost
self._metrics["loss"] = self._cost
self._metrics["acc"] = self.acc
def train_net(self):
self.train_input()
self.net()
self.avg_loss()
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
None, self._namespace)
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
return optimizer
def infer_input(self):
res = self.input(is_train=False)
self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def infer_net(self):
self.infer_input()
# lookup embedding for each slot
q_embs = [
fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb")
for query in self.q_slots
]
pt_embs = [
fluid.embedding(
input=title, size=self.emb_shape, param_attr="emb")
for title in self.pt_slots
]
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
pt_encodes = [
self.title_encoders[i].forward(emb)
for i, emb in enumerate(pt_embs)
]
# concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes)
pt_concat = fluid.layers.concat(pt_encodes)
# projection of hidden layer
q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size,
param_attr='q_fc.w',
bias_attr='q_fc.b')
pt_hid = fluid.layers.fc(pt_concat,
size=self.hidden_size,
param_attr='t_fc.w',
bias_attr='t_fc.b')
# cosine of hidden layers
cos = fluid.layers.cos_sim(q_hid, pt_hid)
self._infer_results['query_pt_sim'] = cos
def get_acc(self, x, y):
less = tensor.cast(cf.less_than(x, y), dtype='float32')
label_ones = fluid.layers.fill_constant_batch_size_like(
input=x, dtype='float32', shape=[-1, 1], value=1.0)
correct = fluid.layers.reduce_sum(less)
total = fluid.layers.reduce_sum(label_ones)
acc = fluid.layers.elementwise_div(correct, total)
return acc
......@@ -11,46 +11,71 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
evaluate:
workspace: "paddlerec.models.recall.gnn"
reader:
batch_size: 50
class: "{workspace}/evaluate_reader.py"
test_data_path: "{workspace}/data/test"
train:
trainer:
# for cluster training
strategy: "async"
# workspace
workspace: "paddlerec.models.recall.gnn"
epochs: 2
workspace: "paddlerec.models.recall.gnn"
# list of dataset
dataset:
- name: dataset_train # name of dataset to distinguish different datasets
batch_size: 100
type: DataLoader # or QueueDataset
data_path: "{workspace}/data/train"
data_converter: "{workspace}/reader.py"
- name: dataset_infer # name
batch_size: 50
type: DataLoader # or QueueDataset
data_path: "{workspace}/data/test"
data_converter: "{workspace}/evaluate_reader.py"
reader:
batch_size: 100
class: "{workspace}/reader.py"
train_data_path: "{workspace}/data/train"
dataset_class: "DataLoader"
# hyper parameters of user-defined network
hyper_parameters:
optimizer:
class: Adam
learning_rate: 0.001
decay_steps: 3
decay_rate: 0.1
l2: 0.00001
sparse_feature_nums: 43098
sparse_feature_dim: 100
corpus_size: 719470
gnn_propogation_steps: 1
model:
models: "{workspace}/model.py"
hyper_parameters:
use_DataLoader: True
config_path: "{workspace}/data/config.txt"
sparse_feature_dim: 100
gnn_propogation_steps: 1
learning_rate: 0.001
l2: 0.00001
decay_steps: 3
decay_rate: 0.1
optimizer: adam
# select runner by name
mode: runner1
# config of each runner.
# runner is a kind of paddle training class, which wraps the train/infer process.
runner:
- name: runner1
class: single_train
# num of epochs
epochs: 2
# device to run training or infer
device: cpu
save_checkpoint_interval: 1 # save model interval of epochs
save_inference_interval: 1 # save inference
save_checkpoint_path: "increment" # save checkpoint path
save_inference_path: "inference" # save inference path
save_inference_feed_varnames: [] # feed vars of save inference
save_inference_fetch_varnames: [] # fetch vars of save inference
init_model_path: "" # load model path
fetch_period: 10
- name: runner2
class: single_infer
# num of epochs
epochs: 1
# device to run training or infer
device: cpu
fetch_period: 1
init_model_path: "increment/0" # load model path
save:
increment:
dirname: "increment"
epoch_interval: 1
save_last: True
inference:
dirname: "inference"
epoch_interval: 1
save_last: True
# runner will run all the phase in each epoch
phase:
- name: phase1
model: "{workspace}/model.py" # user-defined model
dataset_name: dataset_train # select dataset by name
thread_num: 1
#- name: phase2
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# thread_num: 1
......@@ -21,10 +21,9 @@ from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
class TrainReader(Reader):
def init(self):
self.batch_size = envs.get_global_env("batch_size", None,
"evaluate.reader")
self.batch_size = envs.get_global_env("dataset.dataset_infer.batch_size")
self.input = []
self.length = None
......
......@@ -25,74 +25,59 @@ from paddlerec.core.model import Model as ModelBase
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.init_config()
def init_config(self):
self._fetch_interval = 1
self.items_num, self.ins_num = self.config_read(
envs.get_global_env("hyper_parameters.config_path", None,
self._namespace))
self.train_batch_size = envs.get_global_env("batch_size", None,
"train.reader")
self.evaluate_batch_size = envs.get_global_env("batch_size", None,
"evaluate.reader")
self.hidden_size = envs.get_global_env(
"hyper_parameters.sparse_feature_dim", None, self._namespace)
self.step = envs.get_global_env(
"hyper_parameters.gnn_propogation_steps", None, self._namespace)
def config_read(self, config_path=None):
if config_path is None:
raise ValueError(
"please set train.model.hyper_parameters.config_path at first")
with open(config_path, "r") as fin:
item_nums = int(fin.readline().strip())
ins_nums = int(fin.readline().strip())
return item_nums, ins_nums
def input(self, bs):
self.items = fluid.data(
def _init_hyper_parameters(self):
self.learning_rate = envs.get_global_env("hyper_parameters.optimizer.learning_rate")
self.decay_steps = envs.get_global_env("hyper_parameters.optimizer.decay_steps")
self.decay_rate = envs.get_global_env("hyper_parameters.optimizer.decay_rate")
self.l2 = envs.get_global_env("hyper_parameters.optimizer.l2")
self.dict_size = envs.get_global_env("hyper_parameters.sparse_feature_nums")
self.corpus_size = envs.get_global_env("hyper_parameters.corpus_size")
self.train_batch_size = envs.get_global_env("dataset.dataset_train.batch_size")
self.evaluate_batch_size = envs.get_global_env("dataset.dataset_infer.batch_size")
self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim")
self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps")
def input_data(self, is_infer=False, **kwargs):
if is_infer:
bs = self.evaluate_batch_size
else:
bs = self.train_batch_size
items = fluid.data(
name="items", shape=[bs, -1],
dtype="int64") # [batch_size, uniq_max]
self.seq_index = fluid.data(
seq_index = fluid.data(
name="seq_index", shape=[bs, -1, 2],
dtype="int32") # [batch_size, seq_max, 2]
self.last_index = fluid.data(
last_index = fluid.data(
name="last_index", shape=[bs, 2], dtype="int32") # [batch_size, 2]
self.adj_in = fluid.data(
adj_in = fluid.data(
name="adj_in", shape=[bs, -1, -1],
dtype="float32") # [batch_size, seq_max, seq_max]
self.adj_out = fluid.data(
adj_out = fluid.data(
name="adj_out", shape=[bs, -1, -1],
dtype="float32") # [batch_size, seq_max, seq_max]
self.mask = fluid.data(
mask = fluid.data(
name="mask", shape=[bs, -1, 1],
dtype="float32") # [batch_size, seq_max, 1]
self.label = fluid.data(
label = fluid.data(
name="label", shape=[bs, 1], dtype="int64") # [batch_size, 1]
res = [
self.items, self.seq_index, self.last_index, self.adj_in,
self.adj_out, self.mask, self.label
items, seq_index, last_index, adj_in, adj_out, mask, label
]
return res
def train_input(self):
res = self.input(self.train_batch_size)
self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader",
False, self._namespace)
if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var,
capacity=256,
use_double_buffer=False,
iterable=False)
def net(self, items_num, hidden_size, step, bs):
stdv = 1.0 / math.sqrt(hidden_size)
def net(self, inputs, is_infer=False):
if is_infer:
bs = self.evaluate_batch_size
else:
bs = self.train_batch_size
stdv = 1.0 / math.sqrt(self.hidden_size)
def embedding_layer(input,
table_name,
......@@ -100,22 +85,22 @@ class Model(ModelBase):
initializer_instance=None):
emb = fluid.embedding(
input=input,
size=[items_num, emb_dim],
size=[self.dict_size, emb_dim],
param_attr=fluid.ParamAttr(
name=table_name, initializer=initializer_instance), )
name=table_name, initializer=initializer_instance))
return emb
sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
items_emb = embedding_layer(self.items, "emb", hidden_size,
items_emb = embedding_layer(inputs[0], "emb", self.hidden_size,
sparse_initializer)
pre_state = items_emb
for i in range(step):
for i in range(self.step):
pre_state = layers.reshape(
x=pre_state, shape=[bs, -1, hidden_size])
x=pre_state, shape=[bs, -1, self.hidden_size])
state_in = layers.fc(
input=pre_state,
name="state_in",
size=hidden_size,
size=self.hidden_size,
act=None,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
......@@ -127,7 +112,7 @@ class Model(ModelBase):
state_out = layers.fc(
input=pre_state,
name="state_out",
size=hidden_size,
size=self.hidden_size,
act=None,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
......@@ -137,33 +122,32 @@ class Model(ModelBase):
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_adj_in = layers.matmul(self.adj_in,
state_adj_in = layers.matmul(inputs[3],
state_in) # [batch_size, uniq_max, h]
state_adj_out = layers.matmul(
self.adj_out, state_out) # [batch_size, uniq_max, h]
state_adj_out = layers.matmul(inputs[4], state_out) # [batch_size, uniq_max, h]
gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)
gru_input = layers.reshape(
x=gru_input, shape=[-1, hidden_size * 2])
x=gru_input, shape=[-1, self.hidden_size * 2])
gru_fc = layers.fc(input=gru_input,
name="gru_fc",
size=3 * hidden_size,
size=3 * self.hidden_size,
bias_attr=False)
pre_state, _, _ = fluid.layers.gru_unit(
input=gru_fc,
hidden=layers.reshape(
x=pre_state, shape=[-1, hidden_size]),
size=3 * hidden_size)
x=pre_state, shape=[-1, self.hidden_size]),
size=3 * self.hidden_size)
final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
seq = layers.gather_nd(final_state, self.seq_index)
last = layers.gather_nd(final_state, self.last_index)
final_state = layers.reshape(pre_state, shape=[bs, -1, self.hidden_size])
seq = layers.gather_nd(final_state, inputs[1])
last = layers.gather_nd(final_state, inputs[2])
seq_fc = layers.fc(
input=seq,
name="seq_fc",
size=hidden_size,
size=self.hidden_size,
bias_attr=False,
act=None,
num_flatten_dims=2,
......@@ -171,7 +155,7 @@ class Model(ModelBase):
low=-stdv, high=stdv))) # [batch_size, seq_max, h]
last_fc = layers.fc(input=last,
name="last_fc",
size=hidden_size,
size=self.hidden_size,
bias_attr=False,
act=None,
num_flatten_dims=1,
......@@ -184,7 +168,7 @@ class Model(ModelBase):
add = layers.elementwise_add(seq_fc_t,
last_fc) # [seq_max, batch_size, h]
b = layers.create_parameter(
shape=[hidden_size],
shape=[self.hidden_size],
dtype='float32',
default_initializer=fluid.initializer.Constant(value=0.0)) # [h]
add = layers.elementwise_add(add, b) # [seq_max, batch_size, h]
......@@ -202,7 +186,7 @@ class Model(ModelBase):
bias_attr=False,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) # [batch_size, seq_max, 1]
weight *= self.mask
weight *= inputs[5]
weight_mask = layers.elementwise_mul(
seq, weight, axis=0) # [batch_size, seq_max, h]
global_attention = layers.reduce_sum(
......@@ -213,7 +197,7 @@ class Model(ModelBase):
final_attention_fc = layers.fc(
input=final_attention,
name="final_attention_fc",
size=hidden_size,
size=self.hidden_size,
bias_attr=False,
act=None,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
......@@ -225,7 +209,7 @@ class Model(ModelBase):
# dtype="int64",
# persistable=True,
# name="all_vocab")
all_vocab = np.arange(1, items_num).reshape((-1)).astype('int32')
all_vocab = np.arange(1, self.dict_size).reshape((-1)).astype('int32')
all_vocab = fluid.layers.cast(
x=fluid.layers.assign(all_vocab), dtype='int64')
......@@ -235,63 +219,34 @@ class Model(ModelBase):
name="emb",
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)),
size=[items_num, hidden_size]) # [all_vocab, h]
size=[self.dict_size, self.hidden_size]) # [all_vocab, h]
logits = layers.matmul(
x=final_attention_fc, y=all_emb,
transpose_y=True) # [batch_size, all_vocab]
softmax = layers.softmax_with_cross_entropy(
logits=logits, label=self.label) # [batch_size, 1]
logits=logits, label=inputs[6]) # [batch_size, 1]
self.loss = layers.reduce_mean(softmax) # [1]
self.acc = layers.accuracy(input=logits, label=self.label, k=20)
self.acc = layers.accuracy(input=logits, label=inputs[6], k=20)
def avg_loss(self):
self._cost = self.loss
if is_infer:
self._infer_results['acc'] = self.acc
self._infer_results['loss'] = self.loss
return
def metrics(self):
self._metrics["LOSS"] = self.loss
self._metrics["LOSS"] = self.loss
self._metrics["train_acc"] = self.acc
def train_net(self):
self.train_input()
self.net(self.items_num, self.hidden_size, self.step,
self.train_batch_size)
self.avg_loss()
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
None, self._namespace)
step_per_epoch = self.ins_num // self.train_batch_size
decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None,
self._namespace)
decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None,
self._namespace)
l2 = envs.get_global_env("hyper_parameters.l2", None, self._namespace)
step_per_epoch = self.corpus_size // self.train_batch_size
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=learning_rate,
decay_steps=decay_steps * step_per_epoch,
decay_rate=decay_rate),
learning_rate=self.learning_rate,
decay_steps=self.decay_steps * step_per_epoch,
decay_rate=self.decay_rate),
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=l2))
regularization_coeff=self.l2))
return optimizer
def infer_input(self):
self._reader_namespace = "evaluate.reader"
res = self.input(self.evaluate_batch_size)
self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def infer_net(self):
self.infer_input()
self.net(self.items_num, self.hidden_size, self.step,
self.evaluate_batch_size)
self._infer_results['acc'] = self.acc
self._infer_results['loss'] = self.loss
......@@ -23,9 +23,7 @@ from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.batch_size = envs.get_global_env("batch_size", None,
"train.reader")
self.batch_size = envs.get_global_env("dataset.dataset_train.batch_size")
self.input = []
self.length = None
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册