编写DNN模型进行Regression训练,不收敛,求助排查原因。
Created by: Fangkey
基于CTR模型的Demo修改建立DNN网络进行Regression预测,训练结果不收敛,infer的结果也不正确。帮忙排查原因。
训练输出如下:
WARNING:paddle:Pass 0, Samples 0, Cost 11620.388000, {}
WARNING:paddle:Test 0-0, Cost 5308.589006, {}
WARNING:paddle:Pass 1, Samples 0, Cost 12167.511000, {}
WARNING:paddle:Test 1-0, Cost 5269.448265, {}
WARNING:paddle:Pass 2, Samples 0, Cost 12159.679000, {}
WARNING:paddle:Test 2-0, Cost 5264.473139, {}
WARNING:paddle:Pass 3, Samples 0, Cost 12154.542000, {}
WARNING:paddle:Test 3-0, Cost 5261.147334, {}
WARNING:paddle:Pass 4, Samples 0, Cost 12151.005000, {}
WARNING:paddle:Test 4-0, Cost 5258.846167, {}
WARNING:paddle:Pass 5, Samples 0, Cost 12148.595000, {}
WARNING:paddle:Test 5-0, Cost 5257.272729, {}
WARNING:paddle:Pass 6, Samples 0, Cost 12146.910000, {}
WARNING:paddle:Test 6-0, Cost 5256.165883, {}
WARNING:paddle:Pass 7, Samples 0, Cost 12145.686000, {}
WARNING:paddle:Test 7-0, Cost 5255.357524, {}
WARNING:paddle:Pass 8, Samples 0, Cost 12144.765000, {}
WARNING:paddle:Test 8-0, Cost 5254.747666, {}
WARNING:paddle:Pass 9, Samples 0, Cost 12144.054000, {}
WARNING:paddle:Test 9-0, Cost 5254.274953, {}
模型相关代码:
# 模型定义
class DNNmodel(object):
def __init__(self,
dnn_layer_dims,
dnn_input_dim,
is_infer=False):
self.dnn_layer_dims = dnn_layer_dims
self.dnn_input_dim = dnn_input_dim
self.is_infer = is_infer
self._declare_input_layers()
self.dnn = self._build_dnn_submodel_(self.dnn_layer_dims)
self.model = self._build_regression_model(self.dnn)
def _declare_input_layers(self):
self.dnn_merged_input = layer.data(
name='dnn_input',
type=dtype.sparse_vector(self.dnn_input_dim))
if not self.is_infer:
self.target = paddle.layer.data(
name='target', type=dtype.dense_vector(1))
def _build_dnn_submodel_(self, dnn_layer_dims):
dnn_embedding = layer.fc(
input=self.dnn_merged_input, size=dnn_layer_dims[0])
_input_layer = dnn_embedding
for i, dim in enumerate(dnn_layer_dims[1:]):
fc = layer.fc(
input=_input_layer,
size=dim,
act=paddle.activation.Relu(),
name='dnn-fc-%d' % i)
_input_layer = fc
return _input_layer
def _build_regression_model(self, dnn):
self.output = layer.fc(
input=dnn, size=1, act=paddle.activation.Sigmoid())
if not self.is_infer:
self.train_cost = paddle.layer.square_error_cost(
input=self.output, label=self.target)
return self.output
模型训练
dnn_layer_dims = [128, 64, 32, 1]
def train():
args = parse_args()
paddle.init(use_gpu=False, trainer_count=1)
dnn_input_dim = reader.load_data_meta(args.data_meta_file)
# create ctr model.
model = DNNmodel(
dnn_layer_dims,
dnn_input_dim,
is_infer=False)
params = paddle.parameters.create(model.train_cost)
optimizer = paddle.optimizer.AdaGrad()
trainer = paddle.trainer.SGD(
cost=model.train_cost, parameters=params, update_equation=optimizer)
dataset = reader.Dataset()
def __event_handler__(event):
if isinstance(event, paddle.event.EndIteration):
num_samples = event.batch_id * args.batch_size
if event.batch_id % 100 == 0:
logger.warning("Pass %d, Samples %d, Cost %f, %s" % (
event.pass_id, num_samples, event.cost, event.metrics))
if event.batch_id % 1000 == 0:
if args.test_data_path:
result = trainer.test(
reader=paddle.batch(
dataset.test(args.test_data_path),
batch_size=args.batch_size),
feeding=reader.feeding_index)
logger.warning("Test %d-%d, Cost %f, %s" %
(event.pass_id, event.batch_id, result.cost,
result.metrics))
path = "{}-pass-{}-batch-{}-test-{}.tar.gz".format(
args.model_output_prefix, event.pass_id, event.batch_id,
result.cost)
with gzip.open(path, 'w') as f:
params.to_tar(f)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
dataset.train(args.train_data_path), buf_size=500),
batch_size=args.batch_size),
feeding=reader.feeding_index,
event_handler=__event_handler__,
num_passes=args.num_passes)