也许我的问题,代表了大多数初学者。欢迎解决。
Created by: tanhaoyu
coding=UTF-8
import paddle.v2 as paddle import pandas as pd import numpy as np from paddle.trainer_config_helpers.activations import SigmoidActivation
'''
体重预测项目:
描述:根据某个人100多天的两个特征值,摄入量(小数),消耗量(小数),所对应的目标值体重变化(小数),来预测用户未来某一天的体重.
初步思路:
1.数据集操作(已完成)
1.1 通过pandas读取数据集
1.2 均值来填充特征值和目标值(摄入量in,消耗量out,体重weight)
1.3 去掉除上述外其他无关特征(id,age,gender)
2.创建paddlepaddle框架中的reader(不确定是否正确)
2.1 已完成整体数据集train_reader的逻辑书写,问题:是否要区分train_reader和test_reader处理?还是直接整体数据集,再通过buf_size区分训练集和测试集的数量?
2.2 batch size 为 1
3.创建网络结构(一层输入层,一层fc,一层lstm层,一层输出层)
3.1 输入层 type=paddle.data_type.pydp2.dense_vector(4) 参数为多少?是特征值还是神经元个数?输入层有 1 个input,隐藏层有 4 个神经元
3.2 fc层 size = 4
3.3 lstm层搭建,激活函数用 sigmoid,
3.4 输出层 输出层就是预测一个值
4.保存网络结构(照搬)
5.构造SGD trainer(构造使用随机梯度下降的trainer)
6.构建feeding(看了官方文档不是很理解)
6.1官方文档描述如下
feeding用来指定train_reader和test_reader返回的数据与模型配置中data_layer的对应关系。这里表示reader返回的第0列数据对应word层,第1列数据对应label层
7.进行训练,轮数100(已完成)
8.预测infer
'''
#paddlepaddle固定初始化,不适用GPU,线程数为1 paddle.init(use_gpu=False, trainer_count=1)
#1 (closed).数据集操作 df1 = pd.read_csv('2_34.csv',parse_dates=["data"],index_col=3,) df1_in_mean = df1["in"].mean() df1["in"].fillna(df1_in_mean,inplace=True) df1_out_mean = df1["out"].mean() df1["out"].fillna(df1_out_mean,inplace=True) # 求体重均值并填充(不要求精度) df1_weight_mean = df1["weight"].mean() df1["weight"].fillna(df1_weight_mean,inplace=True) df1.drop('id', axis=1, inplace=True)#axis=1,删除列;inplace=True,直接在原DataFrame上执行删除 df1.drop('age', axis=1, inplace=True)#axis=1,删除列;inplace=True,直接在原DataFrame上执行删除 df1.drop('gender', axis=1, inplace=True)#axis=1,删除列;inplace=True,直接在原DataFrame上执行删除
#定义reader def train_reader(): train_list_x = [] train_list_y = [] train_len = len(df1["in"]) for i in range(train_len): list1=[] list1 = [df1["in"][i],df1["out"][i]] list2 = [df1["weight"][i]] train_list_x.append(list1) train_list_y.append(list2) train_x = np.array(train_list_x) train_y = np.array(train_list_y) # train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) # train_y = np.array([[-2], [-3], [-7], [-7]]) def reader(): for i in xrange(train_y.shape[0]): yield train_x[i], train_y[i] return reader #2 (closed).创建reader(不确定正确与否) reader_w = train_reader() shuffle_reader = paddle.reader.shuffle(reader_w,buf_size=128) batch_reader = paddle.batch(shuffle_reader,batch_size=1)
#3 (closed).搭建网络结构 #输入层 paddle.data_type.pydp2.dense_vector(4)?应该填几?4?还是几个特征值就填几? x = paddle.layer.data(name='x',type=paddle.data_type.pydp2.dense_vector(4)) #lstm层 y_predict = paddle.layer.v1_layers.lstmemory(input=x,act=SigmoidActivation()) #输出层 y = paddle.layer.data(name='y', type=paddle.data_type.pydp2.dense_vector(1)) #损失函数 cost = paddle.layer.v1_layers.square_error_cost(input=y_predict, label=y) #mse_cost 是mse均方差y_predict 是预测值,label = y_label 是真实值
#4 (closed).保存网络结构 inference_topology = paddle.topology.Topology(layers=y_predict) with open("weight_infer.pkl", 'wb') as f: inference_topology.serialize_for_inference(f)
#5 (closed).构造SGD trainer(构造使用随机梯度下降的trainer) parameters = paddle.parameters.create(cost) optimizer =paddle.optimizer.Adam() #AdaGrad函数代替momentum进行优化,学习率可以通过参数更改 trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer)
#6 (closed).feeding构建 feeding={'x': 0, 'y': 1}
#7 (closed).进行训练 trainer.train( reader = batch_reader, feeding=feeding, #event_handler=event_handler_plot, num_passes=100)
没有结果: I0306 11:24:33.605950 9099 Util.cpp:166] commandline: --use_gpu=False --trainer_count=1 I0306 11:24:33.628300 9099 GradientMachine.cpp:94] Initing parameters.. I0306 11:24:33.628324 9099 GradientMachine.cpp:101] Init parameters done. F0306 11:24:33.699769 9099 LstmLayer.cpp:155] Check failed: input.sequenceStartPositions *** Check failure stack trace: *** @ 0x7f03535639cd google::LogMessage::Fail() @ 0x7f035356747c google::LogMessage::SendToLog() @ 0x7f03535634f3 google::LogMessage::Flush() @ 0x7f035356898e google::LogMessageFatal::~LogMessageFatal() @ 0x7f0353282452 paddle::LstmLayer::forward() @ 0x7f0353318efd paddle::NeuralNetwork::forward() @ 0x7f0353319c13 paddle::GradientMachine::forwardBackward() @ 0x7f035353fae4 GradientMachine::forwardBackward() @ 0x7f03531bda69 _wrap_GradientMachine_forwardBackward @ 0x4cb755 PyEval_EvalFrameEx @ 0x4c2705 PyEval_EvalCodeEx @ 0x4ca7df PyEval_EvalFrameEx @ 0x4c2705 PyEval_EvalCodeEx @ 0x4ca088 PyEval_EvalFrameEx @ 0x4c2705 PyEval_EvalCodeEx @ 0x4ca088 PyEval_EvalFrameEx @ 0x4c2705 PyEval_EvalCodeEx @ 0x4c24a9 PyEval_EvalCode @ 0x4f19ef (unknown) @ 0x4ec372 PyRun_FileExFlags @ 0x4eaaf1 PyRun_SimpleFileExFlags @ 0x49e208 Py_Main @ 0x7f03779ad830 __libc_start_main @ 0x49da59 _start @ (nil) (unknown)
Process finished with exit code 134 (interrupted by signal 6: SIGABRT)