提交 308fe8ca 编写于 作者: C chengmo

add tdm infer

上级 decaa00f
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -116,17 +116,19 @@ class SingleTrainer(TranspileTrainer):
context['status'] = 'infer_pass'
def infer(self, context):
logger.info("Run in infer pass")
infer_program = fluid.Program()
startup_program = fluid.Program()
with fluid.unique_name.guard():
with fluid.program_guard(infer_program, startup_program):
self.model.infer_net()
logger.info("End build infer net")
if self.model._infer_data_loader is None:
context['status'] = 'terminal_pass'
return
reader = self._get_dataloader("Evaluate")
logger.info("End Get data loader")
metrics_varnames = []
metrics_format = []
......@@ -142,7 +144,8 @@ class SingleTrainer(TranspileTrainer):
self._exe.run(startup_program)
for (epoch, model_dir) in self.increment_models:
print("Begin to infer epoch {}, model_dir: {}".format(epoch, model_dir))
logger.info(
"Begin to infer epoch {}, model_dir: {}".format(epoch, model_dir))
program = infer_program.clone()
fluid.io.load_persistables(self._exe, model_dir, program)
reader.start()
......@@ -161,7 +164,7 @@ class SingleTrainer(TranspileTrainer):
batch_id += 1
except fluid.core.EOFException:
reader.reset()
context['status'] = 'terminal_pass'
def terminal(self, context):
......
......@@ -48,12 +48,13 @@ class TranspileTrainer(Trainer):
batch_size = envs.get_global_env("batch_size", None, namespace)
reader_class = envs.get_global_env("class", None, namespace)
print("batch_size: {}".format(batch_size))
reader = dataloader_instance.dataloader(
reader_class, state, self._config_yaml)
reader = dataloader_instance.dataloader(reader_class, state, self._config_yaml)
reader_class = envs.lazy_instance_by_fliename(reader_class, class_name)
reader_ins = reader_class(self._config_yaml)
if hasattr(reader_ins,'generate_batch_from_trainfiles'):
if hasattr(reader_ins, 'generate_batch_from_trainfiles'):
dataloader.set_sample_list_generator(reader)
else:
dataloader.set_sample_generator(reader, batch_size)
......@@ -63,23 +64,27 @@ class TranspileTrainer(Trainer):
if state == "TRAIN":
inputs = self.model.get_inputs()
namespace = "train.reader"
train_data_path = envs.get_global_env("train_data_path", None, namespace)
train_data_path = envs.get_global_env(
"train_data_path", None, namespace)
else:
inputs = self.model.get_infer_inputs()
namespace = "evaluate.reader"
train_data_path = envs.get_global_env("test_data_path", None, namespace)
train_data_path = envs.get_global_env(
"test_data_path", None, namespace)
threads = int(envs.get_runtime_environ("train.trainer.threads"))
batch_size = envs.get_global_env("batch_size", None, namespace)
reader_class = envs.get_global_env("class", None, namespace)
abs_dir = os.path.dirname(os.path.abspath(__file__))
reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
pipe_cmd = "python {} {} {} {}".format(reader, reader_class, state, self._config_yaml)
pipe_cmd = "python {} {} {} {}".format(
reader, reader_class, state, self._config_yaml)
if train_data_path.startswith("fleetrec::"):
package_base = envs.get_runtime_environ("PACKAGE_BASE")
assert package_base is not None
train_data_path = os.path.join(package_base, train_data_path.split("::")[1])
train_data_path = os.path.join(
package_base, train_data_path.split("::")[1])
dataset = fluid.DatasetFactory().create_dataset()
dataset.set_use_var(inputs)
......@@ -105,18 +110,23 @@ class TranspileTrainer(Trainer):
return epoch_id % epoch_interval == 0
def save_inference_model():
save_interval = envs.get_global_env("save.inference.epoch_interval", -1, namespace)
save_interval = envs.get_global_env(
"save.inference.epoch_interval", -1, namespace)
if not need_save(epoch_id, save_interval, False):
return
print("save inference model is not supported now.")
return
feed_varnames = envs.get_global_env("save.inference.feed_varnames", None, namespace)
fetch_varnames = envs.get_global_env("save.inference.fetch_varnames", None, namespace)
fetch_vars = [fluid.default_main_program().global_block().vars[varname] for varname in fetch_varnames]
dirname = envs.get_global_env("save.inference.dirname", None, namespace)
feed_varnames = envs.get_global_env(
"save.inference.feed_varnames", None, namespace)
fetch_varnames = envs.get_global_env(
"save.inference.fetch_varnames", None, namespace)
fetch_vars = [fluid.default_main_program().global_block().vars[varname]
for varname in fetch_varnames]
dirname = envs.get_global_env(
"save.inference.dirname", None, namespace)
assert dirname is not None
dirname = os.path.join(dirname, str(epoch_id))
......@@ -124,16 +134,19 @@ class TranspileTrainer(Trainer):
if is_fleet:
fleet.save_inference_model(dirname, feed_varnames, fetch_vars)
else:
fluid.io.save_inference_model(dirname, feed_varnames, fetch_vars, self._exe)
fluid.io.save_inference_model(
dirname, feed_varnames, fetch_vars, self._exe)
self.inference_models.append((epoch_id, dirname))
def save_persistables():
save_interval = envs.get_global_env("save.increment.epoch_interval", -1, namespace)
save_interval = envs.get_global_env(
"save.increment.epoch_interval", -1, namespace)
if not need_save(epoch_id, save_interval, False):
return
dirname = envs.get_global_env("save.increment.dirname", None, namespace)
dirname = envs.get_global_env(
"save.increment.dirname", None, namespace)
assert dirname is not None
dirname = os.path.join(dirname, str(epoch_id))
......@@ -146,7 +159,6 @@ class TranspileTrainer(Trainer):
save_persistables()
save_inference_model()
def instance(self, context):
models = envs.get_global_env("train.model.models")
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -17,7 +17,7 @@ train:
# for cluster training
strategy: "async"
epochs: 4
epochs: 2
workspace: "fleetrec.models.recall.tdm"
reader:
......@@ -65,9 +65,16 @@ train:
save:
increment:
dirname: "increment"
epoch_interval: 2
epoch_interval: 1
save_last: True
inference:
dirname: "inference"
epoch_interval: 4
epoch_interval: 10
save_last: True
evaluate:
workspace: "fleetrec.models.recall.tdm"
reader:
batch_size: 1
class: "{workspace}/tdm_evaluate_reader.py"
test_data_path: "{workspace}/data/test"
......@@ -37,9 +37,9 @@ class Model(ModelBase):
"tree_parameters.layer_node_num_list", [
2, 4, 7, 12], self._namespace)
self.child_nums = envs.get_global_env(
"tree_parameters.node_nums", 2, self._namespace)
self.tree_layer_init_path = envs.get_global_env(
"tree_parameters.tree_layer_init_path", None, self._namespace)
"tree_parameters.child_nums", 2, self._namespace)
self.tree_layer_path = envs.get_global_env(
"tree.tree_layer_path", None, "train.startup")
# model training hyper parameter
self.node_emb_size = envs.get_global_env(
......@@ -56,7 +56,7 @@ class Model(ModelBase):
self.topK = envs.get_global_env(
"hyper_parameters.node_nums", 1, self._namespace)
self.batch_size = envs.get_global_env(
"batch_size", 32, "train.reader")
"batch_size", 1, "evaluate.reader")
def train_net(self):
self.train_input()
......@@ -287,16 +287,15 @@ class Model(ModelBase):
shape=[self.input_emb_size],
dtype="float32",
)
self._data_var.append(input_emb)
self._infer_data_var.append(input_emb)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def get_layer_list(self):
"""get layer list from layer_list.txt"""
layer_list = []
with open(self.tree_layer_init_path, 'r') as fin:
with open(self.tree_layer_path, 'r') as fin:
for line in fin.readlines():
l = []
layer = (line.split('\n'))[0].split(',')
......@@ -304,7 +303,7 @@ class Model(ModelBase):
if node:
l.append(node)
layer_list.append(l)
return layer_list
self.layer_list = layer_list
def create_first_layer(self):
"""decide which layer to start infer"""
......@@ -318,16 +317,15 @@ class Model(ModelBase):
self.first_layer_idx = first_layer_id
node_list = []
mask_list = []
for id in node_list:
for id in first_layer_node:
node_list.append(fluid.layers.fill_constant(
[self.batch_size, 1], value=id, dtype='int64'))
[self.batch_size, 1], value=int(id), dtype='int64'))
mask_list.append(fluid.layers.fill_constant(
[self.batch_size, 1], value=0, dtype='int64'))
self.first_layer_node = fluid.layers.concat(node_list, axis=1)
self.first_layer_node_mask = fluid.layers.concat(mask_list, axis=1)
def tdm_infer_net(self, inputs):
def tdm_infer_net(self):
"""
infer的主要流程
infer的基本逻辑是:从上层开始(具体层idx由树结构及TopK值决定)
......@@ -336,14 +334,13 @@ class Model(ModelBase):
3、循环1、2步骤,遍历完所有层,得到每一层筛选结果的集合
4、将筛选结果集合中的叶子节点,拿出来再做一次topK,得到最终的召回输出
"""
input_emb = self._data_var[0]
input_emb = self._infer_data_var[0]
node_score = []
node_list = []
current_layer_node = self.first_layer_node
current_layer_node_mask = self.first_layer_node_mask
input_trans_emb = self.input_trans_net.input_fc_infer(input_emb)
input_trans_emb = self.input_fc_infer(input_emb)
for layer_idx in range(self.first_layer_idx, self.max_layers):
# 确定当前层的需要计算的节点数
......@@ -357,10 +354,9 @@ class Model(ModelBase):
current_layer_node, [-1, current_layer_node_num])
current_layer_node_mask = fluid.layers.reshape(
current_layer_node_mask, [-1, current_layer_node_num])
node_emb = fluid.embedding(
input=current_layer_node,
size=[self.node_nums, self.node_embed_size],
size=[self.node_nums, self.node_emb_size],
param_attr=fluid.ParamAttr(name="TDM_Tree_Emb"))
input_fc_out = self.layer_fc_infer(
......@@ -434,6 +430,7 @@ class Model(ModelBase):
res_item = fluid.layers.slice(
res_node_emb, axes=[2], starts=[0], ends=[1])
self.res_item_re = fluid.layers.reshape(res_item, [-1, self.topK])
self._infer_results["item"] = self.res_item_re
def input_fc_infer(self, input_emb):
"""
......
# -*- coding=utf8 -*-
"""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from __future__ import print_function
from fleetrec.core.reader import Reader
class EvaluateReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
features = (line.strip('\n')).split('\t')
input_emb = map(float, features[0].split(' '))
feature_name = ["input_emb"]
yield zip(feature_name, [input_emb])
return reader
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
......@@ -25,14 +25,19 @@ class Model(ModelBase):
ModelBase.__init__(self, config)
def input(self):
neg_num = int(envs.get_global_env("hyper_parameters.neg_num", None, self._namespace))
self.input_word = fluid.data(name="input_word", shape=[None, 1], dtype='int64')
self.true_word = fluid.data(name='true_label', shape=[None, 1], dtype='int64')
neg_num = int(envs.get_global_env(
"hyper_parameters.neg_num", None, self._namespace))
self.input_word = fluid.data(name="input_word", shape=[
None, 1], dtype='int64')
self.true_word = fluid.data(name='true_label', shape=[
None, 1], dtype='int64')
self._data_var.append(self.input_word)
self._data_var.append(self.true_word)
with_shuffle_batch = bool(int(envs.get_global_env("hyper_parameters.with_shuffle_batch", None, self._namespace)))
with_shuffle_batch = bool(int(envs.get_global_env(
"hyper_parameters.with_shuffle_batch", None, self._namespace)))
if not with_shuffle_batch:
self.neg_word = fluid.data(name="neg_label", shape=[None, neg_num], dtype='int64')
self.neg_word = fluid.data(name="neg_label", shape=[
None, neg_num], dtype='int64')
self._data_var.append(self.neg_word)
if self._platform != "LINUX":
......@@ -41,10 +46,14 @@ class Model(ModelBase):
def net(self):
is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
neg_num = int(envs.get_global_env("hyper_parameters.neg_num", None, self._namespace))
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
with_shuffle_batch = bool(int(envs.get_global_env("hyper_parameters.with_shuffle_batch", None, self._namespace)))
neg_num = int(envs.get_global_env(
"hyper_parameters.neg_num", None, self._namespace))
sparse_feature_number = envs.get_global_env(
"hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env(
"hyper_parameters.sparse_feature_dim", None, self._namespace)
with_shuffle_batch = bool(int(envs.get_global_env(
"hyper_parameters.with_shuffle_batch", None, self._namespace)))
def embedding_layer(input, table_name, emb_dim, initializer_instance=None, squeeze=False):
emb = fluid.embedding(
......@@ -65,28 +74,38 @@ class Model(ModelBase):
emb_initializer = fluid.initializer.Uniform(-init_width, init_width)
emb_w_initializer = fluid.initializer.Constant(value=0.0)
input_emb = embedding_layer(self.input_word, "emb", sparse_feature_dim, emb_initializer, True)
true_emb_w = embedding_layer(self.true_word, "emb_w", sparse_feature_dim, emb_w_initializer, True)
true_emb_b = embedding_layer(self.true_word, "emb_b", 1, emb_w_initializer, True)
input_emb = embedding_layer(
self.input_word, "emb", sparse_feature_dim, emb_initializer, True)
true_emb_w = embedding_layer(
self.true_word, "emb_w", sparse_feature_dim, emb_w_initializer, True)
true_emb_b = embedding_layer(
self.true_word, "emb_b", 1, emb_w_initializer, True)
if with_shuffle_batch:
neg_emb_w_list = []
for i in range(neg_num):
neg_emb_w_list.append(fluid.contrib.layers.shuffle_batch(true_emb_w)) # shuffle true_word
neg_emb_w_concat = fluid.layers.concat(neg_emb_w_list, axis=0)
neg_emb_w = fluid.layers.reshape(neg_emb_w_concat, shape=[-1, neg_num, sparse_feature_dim])
neg_emb_w_list.append(fluid.contrib.layers.shuffle_batch(
true_emb_w)) # shuffle true_word
neg_emb_w_concat = fluid.layers.concat(neg_emb_w_list, axis=0)
neg_emb_w = fluid.layers.reshape(
neg_emb_w_concat, shape=[-1, neg_num, sparse_feature_dim])
neg_emb_b_list = []
for i in range(neg_num):
neg_emb_b_list.append(fluid.contrib.layers.shuffle_batch(true_emb_b)) # shuffle true_word
neg_emb_b_list.append(fluid.contrib.layers.shuffle_batch(
true_emb_b)) # shuffle true_word
neg_emb_b = fluid.layers.concat(neg_emb_b_list, axis=0)
neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num])
neg_emb_b_vec = fluid.layers.reshape(
neg_emb_b, shape=[-1, neg_num])
else:
neg_emb_w = embedding_layer(self.neg_word, "emb_w", sparse_feature_dim, emb_w_initializer)
neg_emb_b = embedding_layer(self.neg_word, "emb_b", 1, emb_w_initializer)
neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num])
neg_emb_w = embedding_layer(
self.neg_word, "emb_w", sparse_feature_dim, emb_w_initializer)
neg_emb_b = embedding_layer(
self.neg_word, "emb_b", 1, emb_w_initializer)
neg_emb_b_vec = fluid.layers.reshape(
neg_emb_b, shape=[-1, neg_num])
true_logits = fluid.layers.elementwise_add(
fluid.layers.reduce_sum(
fluid.layers.elementwise_mul(input_emb, true_emb_w),
......@@ -95,17 +114,18 @@ class Model(ModelBase):
true_emb_b)
input_emb_re = fluid.layers.reshape(
input_emb, shape=[-1, 1, sparse_feature_dim])
neg_matmul = fluid.layers.matmul(input_emb_re, neg_emb_w, transpose_y=True)
input_emb, shape=[-1, 1, sparse_feature_dim])
neg_matmul = fluid.layers.matmul(
input_emb_re, neg_emb_w, transpose_y=True)
neg_logits = fluid.layers.elementwise_add(
fluid.layers.reshape(neg_matmul, shape=[-1, neg_num]),
neg_emb_b_vec)
label_ones = fluid.layers.fill_constant_batch_size_like(
label_ones = fluid.layers.fill_constant_batch_size_like(
true_logits, shape=[-1, 1], value=1.0, dtype='float32')
label_zeros = fluid.layers.fill_constant_batch_size_like(
true_logits, shape=[-1, neg_num], value=0.0, dtype='float32')
true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits,
label_ones)
neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(neg_logits,
......@@ -116,10 +136,12 @@ class Model(ModelBase):
fluid.layers.reduce_sum(
neg_xent, dim=1))
self.avg_cost = fluid.layers.reduce_mean(cost)
global_right_cnt = fluid.layers.create_global_var(name="global_right_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_total_cnt = fluid.layers.create_global_var(name="global_total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_right_cnt = fluid.layers.create_global_var(
name="global_right_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_total_cnt = fluid.layers.create_global_var(
name="global_total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_right_cnt.stop_gradient = True
global_total_cnt.stop_gradient = True
global_total_cnt.stop_gradient = True
def avg_loss(self):
self._cost = self.avg_cost
......@@ -134,9 +156,12 @@ class Model(ModelBase):
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None, self._namespace)
decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None, self._namespace)
learning_rate = envs.get_global_env(
"hyper_parameters.learning_rate", None, self._namespace)
decay_steps = envs.get_global_env(
"hyper_parameters.decay_steps", None, self._namespace)
decay_rate = envs.get_global_env(
"hyper_parameters.decay_rate", None, self._namespace)
optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=learning_rate,
......@@ -146,19 +171,27 @@ class Model(ModelBase):
return optimizer
def analogy_input(self):
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
self.analogy_a = fluid.data(name="analogy_a", shape=[None], dtype='int64')
self.analogy_b = fluid.data(name="analogy_b", shape=[None], dtype='int64')
self.analogy_c = fluid.data(name="analogy_c", shape=[None], dtype='int64')
self.analogy_d = fluid.data(name="analogy_d", shape=[None], dtype='int64')
self._infer_data_var = [self.analogy_a, self.analogy_b, self.analogy_c, self.analogy_d]
sparse_feature_number = envs.get_global_env(
"hyper_parameters.sparse_feature_number", None, self._namespace)
self.analogy_a = fluid.data(
name="analogy_a", shape=[None], dtype='int64')
self.analogy_b = fluid.data(
name="analogy_b", shape=[None], dtype='int64')
self.analogy_c = fluid.data(
name="analogy_c", shape=[None], dtype='int64')
self.analogy_d = fluid.data(
name="analogy_d", shape=[None], dtype='int64')
self._infer_data_var = [self.analogy_a,
self.analogy_b, self.analogy_c, self.analogy_d]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self):
sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env(
"hyper_parameters.sparse_feature_dim", None, self._namespace)
sparse_feature_number = envs.get_global_env(
"hyper_parameters.sparse_feature_number", None, self._namespace)
def embedding_layer(input, table_name, initializer_instance=None):
emb = fluid.embedding(
......@@ -166,30 +199,36 @@ class Model(ModelBase):
size=[sparse_feature_number, sparse_feature_dim],
param_attr=table_name)
return emb
self.analogy_input()
all_label = np.arange(sparse_feature_number).reshape(sparse_feature_number).astype('int32')
self.all_label = fluid.layers.cast(x=fluid.layers.assign(all_label), dtype='int64')
all_label = np.arange(sparse_feature_number).reshape(
sparse_feature_number).astype('int32')
self.all_label = fluid.layers.cast(
x=fluid.layers.assign(all_label), dtype='int64')
emb_all_label = embedding_layer(self.all_label, "emb")
emb_a = embedding_layer(self.analogy_a, "emb")
emb_b = embedding_layer(self.analogy_b, "emb")
emb_c = embedding_layer(self.analogy_c, "emb")
target = fluid.layers.elementwise_add(
fluid.layers.elementwise_sub(emb_b, emb_a), emb_c)
emb_all_label_l2 = fluid.layers.l2_normalize(x=emb_all_label, axis=1)
dist = fluid.layers.matmul(x=target, y=emb_all_label_l2, transpose_y=True)
dist = fluid.layers.matmul(
x=target, y=emb_all_label_l2, transpose_y=True)
values, pred_idx = fluid.layers.topk(input=dist, k=4)
label = fluid.layers.expand(fluid.layers.unsqueeze(self.analogy_d, axes=[1]), expand_times=[1, 4])
label = fluid.layers.expand(fluid.layers.unsqueeze(
self.analogy_d, axes=[1]), expand_times=[1, 4])
label_ones = fluid.layers.fill_constant_batch_size_like(
label, shape=[-1, 1], value=1.0, dtype='float32')
right_cnt = fluid.layers.reduce_sum(
input=fluid.layers.cast(fluid.layers.equal(pred_idx, label), dtype='float32'))
total_cnt = fluid.layers.reduce_sum(label_ones)
global_right_cnt = fluid.layers.create_global_var(name="global_right_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_total_cnt = fluid.layers.create_global_var(name="global_total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_right_cnt = fluid.layers.create_global_var(
name="global_right_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_total_cnt = fluid.layers.create_global_var(
name="global_total_cnt", persistable=True, dtype='float32', shape=[1], value=0)
global_right_cnt.stop_gradient = True
global_total_cnt.stop_gradient = True
......@@ -197,6 +236,7 @@ class Model(ModelBase):
fluid.layers.assign(tmp1, global_right_cnt)
tmp2 = fluid.layers.elementwise_add(total_cnt, global_total_cnt)
fluid.layers.assign(tmp2, global_total_cnt)
acc = fluid.layers.elementwise_div(global_right_cnt, global_total_cnt, name="total_acc")
acc = fluid.layers.elementwise_div(
global_right_cnt, global_total_cnt, name="total_acc")
self._infer_results['acc'] = acc
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册