双塔模型利用merge_v2_model打包网络参数之后仅一侧网络可用
Created by: duanyuzhuo
模型:DSSM改进版本:左侧全连接网络(输入为sparse binary),右侧包括cnn gru fc (输入为sequence) 版本:v2,使用的打包方式是paddle.utils.merge_model 自带的 merge_v2_model 环境:paddle C++接口
问题:双塔模型两侧网络结构不同,使用方式为将模型参数统一打包,paddle_model读取;根据网络结构和对应的名称获取单侧paddle_machine。线上效果为某一侧网络正常输出结果,另一侧输出为-nan的多维向量。该模型离线预测正常。
尝试:1. 如果修改network中预测网络的输出之后打包参数,比如改为右侧网络输出,则右侧网络输出正常,但是左侧网络输出变成-nan的多维向量。 2. 如果训练一个参数少一点的模型可以得到两侧都正确的输出
推测:1. 可能模型只加载了一侧 2. 可能模型输入的特征都在一侧网络导致另一侧网络输入为空
模型network代码如下:
import paddle.v2 as paddle
from paddle.v2.attr import ParamAttr
from reader import logger
tr_feeding_index = {
'user_data': 0,
'pos_data': 1,
'neg1_data': 2,
'neg2_data': 3,
'neg3_data': 4,
'neg4_data': 5,
'label_data': 6
}
in_feeding_index = {
'user_data': 0,
'pos_data': 1
}
#
class DSSM_3_model(object):
"""
A DSSM recommendation model
"""
def __init__(self,
user_dim,
ad_dim,
label_dim,
factor_size,
user_embedding_size,
ad_embedding_size,
fc_size_vec,
dssm_coe,
gru_cnn_size,
is_infer=False):
self.user_dim = user_dim
self.ad_dim = ad_dim
self.label_dim = label_dim
self.factor_size = factor_size
self.user_embedding_size = user_embedding_size
self.ad_embedding_size = ad_embedding_size
self.fc_size_vec = fc_size_vec
self.dssm_coe = dssm_coe
self.gru_cnn_size = gru_cnn_size
self.is_infer = is_infer
self._declare_input_layers()
self.model = self._build_model()
def _declare_input_layers(self):
self.user_data = paddle.layer.data(
name="user_data",
type=paddle.data_type.sparse_binary_vector(self.user_dim)
)
self.pos_data = paddle.layer.data(
name="pos_data",
type=paddle.data_type.integer_value_sequence(self.ad_dim)
)
if not self.is_infer:
self.neg1_data = paddle.layer.data(
name="neg1_data",
type=paddle.data_type.integer_value_sequence(self.ad_dim)
)
self.neg2_data = paddle.layer.data(
name="neg2_data",
type=paddle.data_type.integer_value_sequence(self.ad_dim)
)
self.neg3_data = paddle.layer.data(
name="neg3_data",
type=paddle.data_type.integer_value_sequence(self.ad_dim)
)
self.neg4_data = paddle.layer.data(
name="neg4_data",
type=paddle.data_type.integer_value_sequence(self.ad_dim)
)
self.label_data = paddle.layer.data(
name="label_data",
type=paddle.data_type.integer_value(self.label_dim)
)
def fm_layer(self, input):
first_order = paddle.layer.fc(input=input, size=1, act=paddle.activation.Linear())
second_order = paddle.layer.factorization_machine(input, self.factor_size)
fm = paddle.layer.addto(input=[first_order, second_order],
act=paddle.activation.Linear(),
bias_attr=False)
return fm
def create_cnn(self, input_data):
source_embedding = paddle.layer.embedding(
input=input_data,
size=self.ad_embedding_size,
param_attr=ParamAttr(name="__cnn_emb.w"))
conv_vec_2 = paddle.networks.sequence_conv_pool(
input=source_embedding,
context_len=3,
hidden_size=self.gru_cnn_size,
context_proj_param_attr=ParamAttr(name="__cnn_contex_proj_2.w"),
fc_param_attr=ParamAttr(name="__cnn_fc_2.w"),
fc_bias_attr=ParamAttr(name="__cnn_fc_2.b"),
pool_bias_attr=ParamAttr(name="__cnn_pool_2.b"))
return conv_vec_2
def create_rnn(self, input_data):
source_embedding = paddle.layer.embedding(
input=input_data,
size=self.ad_embedding_size,
param_attr=ParamAttr(name="__rnn_emb.w"))
source_vec = paddle.networks.bidirectional_gru(
input=source_embedding,
size=self.gru_cnn_size,
fwd_act=paddle.activation.Tanh(),
fwd_gate_act=paddle.activation.Sigmoid(),
bwd_act=paddle.activation.Tanh(),
bwd_gate_act=paddle.activation.Sigmoid(),
fwd_mixed_param_attr=ParamAttr(name="__rnn_fwd_mix.w"),
fwd_mixed_bias_attr=ParamAttr(name="__rnn_fwd_mix.b"),
fwd_gru_param_attr=ParamAttr(name="__rnn_fwd_gru.w"),
fwd_gru_bias_attr=ParamAttr(name="__rnn_fwd_gru.b"),
bwd_mixed_param_attr=ParamAttr(name="__rnn_bwd_mix.w"),
bwd_mixed_bias_attr=ParamAttr(name="__rnn_bwd_mix.b"),
bwd_gru_param_attr=ParamAttr(name="__rnn_bwd_gru.w"),
bwd_gru_bias_attr=ParamAttr(name="__rnn_bwd_gru.b"))
return source_vec
def create_fc(self, input_data):
source_embedding = paddle.layer.embedding(
input=input_data,
size=self.ad_embedding_size,
param_attr=ParamAttr(name="__cnn_emb.w"))
source_pool = paddle.layer.pooling(
input=source_embedding, pooling_type=paddle.pooling.Max())
fc_vec = paddle.layer.fc(
input=source_pool,
size=self.gru_cnn_size,
act=paddle.activation.Tanh(),
param_attr=ParamAttr(name="__fcp_fc.w"),
bias_attr=ParamAttr(name="__fcp_fc.b"))
return fc_vec
def create_dnn(self, sent_vec):
input_vec = sent_vec
for idx, dim in enumerate(self.fc_size_vec):
name = "__dnn_fc_%d_%d" % (idx, dim)
logger.info("create fc layer [%s] which dim is %d" \
% (name, dim))
fc = paddle.layer.fc(
input=input_vec,
size=dim,
act=paddle.activation.Tanh(),
param_attr=ParamAttr(name=("%s.w" % name)),
bias_attr=ParamAttr(name=("%s.b" % name)))
input_vec = fc
return input_vec
def create_item_single_network(self, source):
rnn_vec = self.create_rnn(source)
cnn_vec = self.create_cnn(source)
fc_vec = self.create_fc(source)
merge_vec = paddle.layer.concat(input=[rnn_vec, cnn_vec, fc_vec])
source_semantic = self.create_dnn(merge_vec)
return source_semantic
def create_user_network(self, user):
user_emb = paddle.layer.fc(
input = user,
size = self.user_embedding_size,
act = paddle.activation.Tanh(),
param_attr = paddle.attr.ParamAttr(name = '__user_emb.w'),
bias_attr = paddle.attr.ParamAttr(name = '__user_emb.b'))
input_vec = user_emb
for idx, dim in enumerate(self.fc_size_vec):
name = '__user_fc_%d_%d' % (idx, dim)
logger.info('create fc layer %s which dim is %d' % (name, dim))
fc = paddle.layer.fc(
input = input_vec,
size = dim,
act = paddle.activation.Tanh(),
param_attr = paddle.attr.ParamAttr(name = ('%s.w' % name)),
bias_attr = paddle.attr.ParamAttr(name = ('%s.b' % name)))
input_vec = fc
return input_vec
def create_item_network(self, item):
item_emb = paddle.layer.fc(
input = item,
size = self.ad_embedding_size,
act = paddle.activation.Tanh(),
param_attr = paddle.attr.ParamAttr(name = '__item_emb.w'),
bias_attr = paddle.attr.ParamAttr(name = '__item_emb.b'))
input_vec = item_emb
for idx, dim in enumerate(self.fc_size_vec):
name = '__item_fc_%d_%d' % (idx, dim)
logger.info('create fc layer %s which dim is %d' % (name, dim))
fc = paddle.layer.fc(
input = input_vec,
size = dim,
act = paddle.activation.Tanh(),
param_attr = paddle.attr.ParamAttr(name = ('%s.w' % name)),
bias_attr = paddle.attr.ParamAttr(name = ('%s.b' % name)))
input_vec = fc
return input_vec
def _build_model(self):
if not self.is_infer:
left_user_network = self.create_user_network(self.user_data)
right_positive_network = self.create_item_single_network(self.pos_data)
right_negative_1_network = self.create_item_single_network(self.neg1_data)
right_negative_2_network = self.create_item_single_network(self.neg2_data)
right_negative_3_network = self.create_item_single_network(self.neg3_data)
right_negative_4_network = self.create_item_single_network(self.neg4_data)
# print "create network finish~"
positive_cosine = paddle.layer.cos_sim(
a = left_user_network,
b = right_positive_network,
scale = self.dssm_coe)
negative_cosine_1 = paddle.layer.cos_sim(
a = left_user_network,
b = right_negative_1_network,
scale = self.dssm_coe)
negative_cosine_2 = paddle.layer.cos_sim(
a = left_user_network,
b = right_negative_2_network,
scale = self.dssm_coe)
negative_cosine_3 = paddle.layer.cos_sim(
a = left_user_network,
b = right_negative_3_network,
scale = self.dssm_coe)
negative_cosine_4 = paddle.layer.cos_sim(
a = left_user_network,
b = right_negative_4_network,
scale = self.dssm_coe)
concat = paddle.layer.concat(input = [positive_cosine, \
negative_cosine_1, \
negative_cosine_2, \
negative_cosine_3, \
negative_cosine_4])
softmax = paddle.layer.mixed(
input = paddle.layer.identity_projection(input = concat),
act = paddle.activation.Softmax())
# self.output = softmax
# print self.output
# print self.label_data
self.train_cost = paddle.layer.classification_cost(
input=softmax,
label=self.label_data
)
self.output = self.train_cost
else:
left_user_network = self.create_user_network(self.user_data)
right_positive_network = self.create_item_single_network(self.pos_data)
positive_cosine = paddle.layer.cos_sim(
a = left_user_network,
b = right_positive_network,
scale = 1.0)
self.output = positive_cosine
# self.output = left_user_network
# self.output = right_positive_network
print "network create finish~"
return self.output
在C++接口调用测试阶段,将整体网络参数打包,利用不同的网络proto获取到网络参数,利用paddle_gradient_machine_forward函数走网络预测。
请值班同学协助排查,不胜感激。