paddle v2里如何输出中间层信息及梯度计算出现问题该如何解决?
Created by: yuanxiangxie
代码如下所示,如果想要输出slope_interception layer的output和gradient该在哪里添加信息(想要追梯度计算问题)?因为在训练模型的时候遇到图片所示梯度计算问题
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
# Author : yuanxiangxie
# Email :
# Create Time : 2018-07-06 17:29
# Description :
# Copyright (c) 2017 . All Rights Reserved
"""
import os
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import paddle.v2 as paddle
import gzip
import getopt
import pickle
import shutil
is_online = False
offline_data_dir = "."
train_data_dir = os.path.join(offline_data_dir, "train_data_dir")
test_data_dir = os.path.join(offline_data_dir, "test_data_dir")
word_dict_file = os.path.join("thirdparty", "word_dict.pkl")
if is_online:
online_data_dir = "."
online_train_data_dir = os.path.join(online_data_dir, "train_data_dir")
online_test_data_dir = os.path.join(online_data_dir, "test_data_dir")
train_data_dir = os.path.join(online_train_data_dir, "train_data")
test_data_dir = os.path.join(online_test_data_dir, "test_data")
word_dict_file = os.path.join("thirdparty", "thirdparty/word_dict.pkl")
hidden_size = 128
train_buf_size = 128
train_batch_size = 64
test_batch_size = 64
word_len_size = 106
num_passes = 1
word_dict = pickle.load(open(word_dict_file, "r"))
word_dict_unk = word_dict["<UNK>"]
word_dict_size = len(word_dict) + 16
model_save_dir = os.path.join(".", "model_dir")
emb_share_attr = paddle.attr.Param(name = "emb_share_attr")
class ctr_model(object):
"""
paddlepaddle模型
"""
def __init__(self):
"""
初始化函数
"""
paddle.init(
use_gpu = False,
trainer_count = int(os.getenv("PADDLE_TRAINER_COUNT", "1")),
port=int(os.getenv("PADDLE_PORT", "22535")),
ports_num=int(os.getenv("PADDLE_PORTS_NUM", "1")),
num_gradient_servers=int(os.getenv("PADDLE_NUM_GRADIENT_SERVERS", "1")),
trainer_id=int(os.getenv("PADDLE_TRAINER_ID", "0")),
pservers=os.getenv("PADDLE_PSERVERS", "127.0.0.1")
)
def __parse_data_reader(self, line_list, is_infer = False):
"""
解析读取进来的数据,统一格式要求
"""
content = [int(i_item) for i_item in line_list[1].split(' ')]
word = int(line_list[2])
word_len = int(line_list[3])
label = int(line_list[4])
if is_infer:
return content, word, word_len
else:
return content, word, word_len, [label]
def data_reader(self, data_dir):
"""
读入数据
"""
def _data_reader():
"""
读入数据的迭代器
"""
for file_name in os.listdir(data_dir):
with open(os.path.join(data_dir, file_name), "r") as in_file:
for line in in_file:
line = line.strip('\n')
line_list = line.split('\t')
if len(line_list) != 5:
continue
(content, word, word_len, label) = self.__parse_data_reader(line_list, False)
yield (content, word, word_len, label)
return _data_reader
def get_content_feature(self, share_attr):
"""
得到文本的特征
"""
content_digit = paddle.layer.data(name = "content", type = paddle.data_type.integer_value_sequence(word_dict_size))
content_emb = paddle.layer.embedding(input = content_digit, size = 128, param_attr = share_attr)
content_conv_3 = paddle.networks.sequence_conv_pool(input = content_emb, hidden_size = 128, context_len = 3)
content_conv_5 = paddle.networks.sequence_conv_pool(input = content_emb, hidden_size = 128, context_len = 5)
content_conv_7 = paddle.networks.sequence_conv_pool(input = content_emb, hidden_size = 128, context_len = 7)
content_fc_1 = paddle.layer.fc(input = [content_conv_3, content_conv_5, content_conv_7], size = 256)
content_dropout_1 = paddle.layer.dropout(input = content_fc_1, dropout_rate = 0.5)
content_fc_2 = paddle.layer.fc(input = content_dropout_1, size = 128)
return content_fc_2
def get_word_feature(self, share_attr):
"""
得到词语的特征
"""
word_digit = paddle.layer.data(name = "word", type = paddle.data_type.integer_value(word_dict_size))
word_emb = paddle.layer.embedding(input = word_digit, size = 128, param_attr = share_attr)
word_len_digit = paddle.layer.data(name = "word_len", type = paddle.data_type.integer_value(word_len_size))
word_len_emb = paddle.layer.embedding(input = word_len_digit, size = 128)
word_concat_feature = paddle.layer.concat(input = [word_emb, word_len_emb])
word_concat_feature_fc_1 = paddle.layer.fc(input = word_concat_feature, size = 128)
word_concat_feature_dropout_1 = paddle.layer.dropout(input = word_concat_feature_fc_1, dropout_rate = 0.5)
word_concat_feature_fc_2 = paddle.layer.fc(input = word_concat_feature_dropout_1, size = 128)
return word_concat_feature_fc_2
def build_model_feature(self):
"""
构建模型特征
"""
content_feature = self.get_content_feature(emb_share_attr)
word_feature = self.get_word_feature(emb_share_attr)
return content_feature, word_feature
def train_model(self):
"""
训练模型
"""
train_data_reader = paddle.batch(paddle.reader.shuffle(self.data_reader(train_data_dir), buf_size = train_buf_size), batch_size = train_batch_size)
test_data_reader = paddle.batch(self.data_reader(test_data_dir), batch_size = test_batch_size)
(content_feature, word_feature) = self.build_model_feature()
label = paddle.layer.data(name = "label", type = paddle.data_type.dense_vector(1))
inference = paddle.layer.cos_sim(a = content_feature, b = word_feature, size = 1)
cost = paddle.layer.square_error_cost(input = (inference + 1.0) * 0.5, label = label)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=1e-3,
regularization=paddle.optimizer.L2Regularization(rate=1e-3),
model_average=paddle.optimizer.ModelAverage(average_window=0.5, max_average_window = 10000))
trainer = paddle.trainer.SGD(
cost = cost,
extra_layers=paddle.evaluator.classification_error(input= (inference + 1.0) * 0.5, label=label),
parameters=parameters,
update_equation=adam_optimizer)
feeding = {
"content": 0,
"word": 1,
"word_len": 2,
"label": 3
}
def event_handler(event):
"""
事件监听
"""
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print >> sys.stderr, "[NOTICE] Pass:{} Batch:{} Cost:{:.2f} {}".format(event.pass_id, event.batch_id, event.cost, event.metrics)
if isinstance(event, paddle.event.EndPass):
if test_data_reader is not None:
result = trainer.test(reader = test_data_reader, feeding = feeding)
print >> sys.stderr, "[NOTICE] Test at Pass:{} {}".format(event.pass_id, result.metrics)
if not os.path.exists(model_save_dir):
os.mkdir(model_save_dir)
with gzip.open(os.path.join(model_save_dir, "ctr_model_{}.tar.gz".format(event.pass_id)), "w") as out_file:
trainer.save_parameter_to_tar(out_file)
print >> sys.stderr, "[NOTICE] move model to output dir ..."
shutil.move(model_save_dir, "output/model_params")
print >> sys.stderr, "[NOTICE] embedding feature building finished ..."
print >> sys.stderr, "[NOTICE] train ctr model start ..."
trainer.train(
reader = train_data_reader,
event_handler = event_handler,
feeding = feeding,
num_passes = num_passes)
print >> sys.stderr, "[NOTICE] train ctr model finished ..."
if __name__ == '__main__':
ctr_model_function = ctr_model()
ctr_model_function.train_model()