提交 3157ce61 编写于 作者: H Helin Wang

Simpily demo, add paddle.default_main_program() and paddle.default_startup_program

- Removed all main_program and startup_program in the demo.
- Using paddle.default_main_program() hides the implementation detail (e.g., using g_main_program) from the user, we can change the implementation in the future much easier.
上级 c3a61349
......@@ -37,6 +37,8 @@ import model
import paddle.trainer.config_parser as cp
__all__ = [
'default_startup_program',
'default_main_program',
'optimizer',
'layer',
'activation',
......
......@@ -4,7 +4,7 @@ import collections
import numpy as np
import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator']
__all__ = ['Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'default_main_program']
def unique_name(prefix):
......@@ -562,3 +562,9 @@ class Parameter(Variable):
# program is a global instance.
g_main_program = Program()
g_startup_program = Program()
def default_startup_program():
return g_startup_program
def default_main_program():
return g_main_program
......@@ -2,45 +2,33 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.io import save_persistables, load_persistables
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
x = layers.data(
name='x',
shape=[13],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
y_predict = layers.fc(input=x,
size=1,
act=None,
main_program=main_program,
startup_program=startup_program)
act=None)
y = layers.data(
name='y',
shape=[1],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
cost = layers.square_error_cost(
input=y_predict,
label=y,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
label=y)
avg_cost = layers.mean(x=cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program)
opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20
......@@ -52,12 +40,12 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", main_program=main_program)
load_persistables(exe, "./fit_a_line.model/", main_program=main_program)
save_persistables(exe, "./fit_a_line.model/")
load_persistables(exe, "./fit_a_line.model/")
for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32")
......@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor()
tensor_y.set(y_data, place)
# print tensor_y.get_dims()
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x,
'y': tensor_y},
fetch_list=[avg_cost])
......
......@@ -5,19 +5,17 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_startup_program, g_main_program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def resnet_cifar10(input, depth=32):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
main_program=None,
startup_program=None):
act='relu'):
tmp = layers.conv2d(
input=input,
filter_size=filter_size,
......@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
stride=stride,
padding=padding,
act=None,
bias_attr=False,
main_program=main_program,
startup_program=startup_program)
bias_attr=False)
return layers.batch_norm(
input=tmp,
act=act,
main_program=main_program,
startup_program=startup_program)
act=act)
def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out:
......@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def basicblock(input,
ch_in,
ch_out,
stride,
main_program=main_program,
startup_program=startup_program):
stride):
tmp = conv_bn_layer(
input,
ch_out,
3,
stride,
1,
main_program=main_program,
startup_program=startup_program)
1)
tmp = conv_bn_layer(
tmp,
ch_out,
3,
1,
1,
act=None,
main_program=main_program,
startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
act=None)
short = shortcut(input, ch_in, ch_out, stride)
return layers.elementwise_add(
x=tmp,
y=short,
act='relu',
main_program=main_program,
startup_program=startup_program)
act='relu')
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program,
startup_program):
tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
tmp = block_func(input, ch_in, ch_out, stride)
for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program)
tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp
assert (depth - 2) % 6 == 0
......@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
ch_out=16,
filter_size=3,
stride=1,
padding=1,
main_program=main_program,
startup_program=startup_program)
padding=1)
res1 = layer_warp(
basicblock,
conv1,
16,
16,
n,
1,
main_program=main_program,
startup_program=startup_program)
1)
res2 = layer_warp(
basicblock,
res1,
16,
32,
n,
2,
main_program=main_program,
startup_program=startup_program)
2)
res3 = layer_warp(
basicblock,
res2,
32,
64,
n,
2,
main_program=main_program,
startup_program=startup_program)
2)
pool = layers.pool2d(
input=res3,
pool_size=8,
pool_type='avg',
pool_stride=1,
main_program=main_program,
startup_program=startup_program)
pool_stride=1)
return pool
def vgg16_bn_drop(input, main_program=None, startup_program=None):
def vgg16_bn_drop(input):
def conv_block(input,
num_filter,
groups,
dropouts,
main_program=None,
startup_program=None):
dropouts):
return nets.img_conv_group(
input=input,
pool_size=2,
......@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None):
conv_act='relu',
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type='max',
main_program=main_program,
startup_program=startup_program)
pool_type='max')
conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program)
conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program)
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = layers.dropout(
x=conv5,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
dropout_prob=0.5)
fc1 = layers.fc(input=drop,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()},
main_program=main_program,
startup_program=startup_program)
param_attr={"initializer": XavierInitializer()})
reshape1 = layers.reshape(
x=fc1,
shape=list(fc1.shape + (1, 1)),
main_program=main_program,
startup_program=startup_program)
shape=list(fc1.shape + (1, 1)))
bn = layers.batch_norm(
input=reshape1,
act='relu',
main_program=main_program,
startup_program=startup_program)
act='relu')
drop2 = layers.dropout(
x=bn,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
dropout_prob=0.5)
fc2 = layers.fc(input=drop2,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()},
main_program=main_program,
startup_program=startup_program)
param_attr={"initializer": XavierInitializer()})
return fc2
......@@ -225,7 +180,7 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM):
batch_id = 0
......@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place)
tensor_y.set(y_data, place)
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img,
"label": tensor_y},
fetch_list=[avg_cost, accuracy])
......
......@@ -3,67 +3,49 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
images = layers.data(
name='pixel',
shape=[1, 28, 28],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
label = layers.data(
name='label',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
conv_pool_1 = nets.simple_img_conv_pool(
input=images,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu",
main_program=main_program,
startup_program=startup_program)
act="relu")
conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu",
main_program=main_program,
startup_program=startup_program)
act="relu")
predict = layers.fc(input=conv_pool_2,
size=10,
act="softmax",
main_program=main_program,
startup_program=startup_program)
act="softmax")
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
label=label)
avg_cost = layers.mean(x=cost)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
label=label)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, startup_program)
opts = optimizer.minimize(avg_cost)
BATCH_SIZE = 50
PASS_NUM = 3
......@@ -75,7 +57,7 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM):
count = 0
......@@ -90,7 +72,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place)
tensor_y.set(y_data, place)
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img,
"label": tensor_y},
fetch_list=[avg_cost, accuracy])
......
......@@ -2,8 +2,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.fluid.initializer import UniformInitializer
......@@ -11,14 +10,10 @@ from paddle.v2.fluid.initializer import UniformInitializer
import numpy as np
BATCH_SIZE = 128
startup_program = Program()
main_program = Program()
image = layers.data(
name='x',
shape=[784],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
param_attr = {
'name': None,
......@@ -30,45 +25,30 @@ param_attr = {
hidden1 = layers.fc(input=image,
size=128,
act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr)
hidden2 = layers.fc(input=hidden1,
size=64,
act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr)
predict = layers.fc(input=hidden2,
size=10,
act='softmax',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr)
label = layers.data(
name='y',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
label=label)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program)
opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -78,7 +58,7 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM):
......@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor()
tensor_y.set(y_data, place)
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x,
'y': tensor_y},
fetch_list=[avg_cost, accuracy])
......
......@@ -3,16 +3,13 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
is_sparse = True
use_gpu = False
IS_SPARSE = True
USE_GPU = False
BATCH_SIZE = 256
......@@ -25,99 +22,71 @@ def get_usr_combined_features():
uid = layers.data(
name='user_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
usr_emb = layers.embedding(
input=uid,
data_type='float32',
size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
usr_fc = layers.fc(input=usr_emb,
size=32,
main_program=main_program,
startup_program=startup_program)
size=32)
USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(
name='gender_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
usr_gender_emb = layers.embedding(
input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
usr_gender_fc = layers.fc(input=usr_gender_emb,
size=16,
main_program=main_program,
startup_program=startup_program)
size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(
name='age_id',
shape=[1],
data_type="int64",
main_program=main_program,
startup_program=startup_program)
data_type="int64")
usr_age_emb = layers.embedding(
input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16],
is_sparse=is_sparse,
param_attr={'name': 'age_table'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'})
usr_age_fc = layers.fc(input=usr_age_emb,
size=16,
main_program=main_program,
startup_program=startup_program)
size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(
name='job_id',
shape=[1],
data_type="int64",
main_program=main_program,
startup_program=startup_program)
data_type="int64")
usr_job_emb = layers.embedding(
input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
usr_job_fc = layers.fc(input=usr_job_emb,
size=16,
main_program=main_program,
startup_program=startup_program)
size=16)
concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
axis=1,
main_program=main_program,
startup_program=startup_program)
axis=1)
usr_combined_features = layers.fc(input=concat_embed,
size=200,
act="tanh",
main_program=main_program,
startup_program=startup_program)
act="tanh")
return usr_combined_features
......@@ -129,83 +98,61 @@ def get_mov_combined_features():
mov_id = layers.data(
name='movie_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
mov_emb = layers.embedding(
input=mov_id,
data_type='float32',
size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
mov_fc = layers.fc(input=mov_emb,
size=32,
main_program=main_program,
startup_program=startup_program)
size=32)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data(
name='category_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
mov_categories_emb = layers.embedding(
input=category_id,
size=[CATEGORY_DICT_SIZE, 32],
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb,
pool_type="sum",
main_program=main_program,
startup_program=startup_program)
pool_type="sum")
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data(
name='movie_title',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
mov_title_emb = layers.embedding(
input=mov_title_id,
size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb,
num_filters=32,
filter_size=3,
act="tanh",
pool_type="sum",
main_program=main_program,
startup_program=startup_program)
pool_type="sum")
concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv],
axis=1,
main_program=main_program,
startup_program=startup_program)
axis=1)
# FIXME(dzh) : need tanh operator
mov_combined_features = layers.fc(input=concat_embed,
size=200,
act="tanh",
main_program=main_program,
startup_program=startup_program)
act="tanh")
return mov_combined_features
......@@ -217,27 +164,18 @@ def model():
# need cos sim
inference = layers.cos_sim(
X=usr_combined_features,
Y=mov_combined_features,
main_program=main_program,
startup_program=startup_program)
Y=mov_combined_features)
label = layers.data(
name='score',
shape=[1],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
square_cost = layers.square_error_cost(
input=inference,
label=label,
main_program=main_program,
startup_program=startup_program)
label=label)
avg_cost = layers.mean(
x=square_cost,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=square_cost)
return avg_cost
......@@ -245,16 +183,15 @@ def model():
def main():
cost = model()
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost, startup_program=startup_program)
block = main_program.block(0)
opts = sgd_optimizer.minimize(cost)
if use_gpu:
if USE_GPU:
place = core.GPUPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -303,7 +240,7 @@ def main():
PASS_NUM = 100
for pass_id in range(PASS_NUM):
for data in train_reader():
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed=func_feed(feeding, data),
fetch_list=[cost])
out = np.array(outs[0])
......
......@@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
......@@ -70,7 +69,7 @@ def main():
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program)
exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM):
for data in train_data():
......@@ -82,7 +81,7 @@ def main():
tensor_label = core.LoDTensor()
tensor_label.set(label, place)
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
......
......@@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
......@@ -81,7 +80,7 @@ def main():
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program)
exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM):
for data in train_data():
......@@ -93,7 +92,7 @@ def main():
tensor_label = core.LoDTensor()
tensor_label.set(label, place)
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
......
......@@ -2,8 +2,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import g_main_program, g_startup_program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
......@@ -88,10 +87,10 @@ def main():
place = core.CPUPlace()
tensor_words, tensor_label = prepare_feed_data(data, place)
exe = Executor(place)
exe.run(g_startup_program)
exe.run(framework.default_startup_program())
while True:
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
......
......@@ -2,20 +2,17 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
embed_size = 32
hidden_size = 256
PASS_NUM = 100
EMBED_SIZE = 32
HIDDEN_SIZE = 256
N = 5
batch_size = 32
is_sparse = True
BATCH_SIZE = 32
IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
......@@ -23,97 +20,67 @@ dict_size = len(word_dict)
first_word = layers.data(
name='firstw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
second_word = layers.data(
name='secondw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
third_word = layers.data(
name='thirdw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
forth_word = layers.data(
name='forthw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
next_word = layers.data(
name='nextw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
embed_first = layers.embedding(
input=first_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
embed_second = layers.embedding(
input=second_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
embed_third = layers.embedding(
input=third_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
embed_forth = layers.embedding(
input=forth_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth],
axis=1,
main_program=main_program,
startup_program=startup_program)
axis=1)
hidden1 = layers.fc(input=concat_embed,
size=hidden_size,
act='sigmoid',
main_program=main_program,
startup_program=startup_program)
size=HIDDEN_SIZE,
act='sigmoid')
predict_word = layers.fc(input=hidden1,
size=dict_size,
act='softmax',
main_program=main_program,
startup_program=startup_program)
act='softmax')
cost = layers.cross_entropy(
input=predict_word,
label=next_word,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
label=next_word)
avg_cost = layers.mean(x=cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program)
opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size)
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = core.CPUPlace()
exe = Executor(place)
......@@ -122,8 +89,8 @@ exe = Executor(place)
# below exit line.
exit(0)
exe.run(startup_program, feed={}, fetch_list=[])
PASS_NUM = 100
exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM):
for data in train_reader():
input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)]
......@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor()
next_tensor.set(next_data, place)
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={
'firstw': first_tensor,
'secondw': second_tensor,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册