提交 3157ce61 编写于 作者: H Helin Wang

Simpily demo, add paddle.default_main_program() and paddle.default_startup_program

- Removed all main_program and startup_program in the demo.
- Using paddle.default_main_program() hides the implementation detail (e.g., using g_main_program) from the user, we can change the implementation in the future much easier.
上级 c3a61349
...@@ -37,6 +37,8 @@ import model ...@@ -37,6 +37,8 @@ import model
import paddle.trainer.config_parser as cp import paddle.trainer.config_parser as cp
__all__ = [ __all__ = [
'default_startup_program',
'default_main_program',
'optimizer', 'optimizer',
'layer', 'layer',
'activation', 'activation',
......
...@@ -4,7 +4,7 @@ import collections ...@@ -4,7 +4,7 @@ import collections
import numpy as np import numpy as np
import copy import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator'] __all__ = ['Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'default_main_program']
def unique_name(prefix): def unique_name(prefix):
...@@ -562,3 +562,9 @@ class Parameter(Variable): ...@@ -562,3 +562,9 @@ class Parameter(Variable):
# program is a global instance. # program is a global instance.
g_main_program = Program() g_main_program = Program()
g_startup_program = Program() g_startup_program = Program()
def default_startup_program():
return g_startup_program
def default_main_program():
return g_main_program
...@@ -2,45 +2,33 @@ import paddle.v2 as paddle ...@@ -2,45 +2,33 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.io import save_persistables, load_persistables from paddle.v2.fluid.io import save_persistables, load_persistables
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
x = layers.data( x = layers.data(
name='x', name='x',
shape=[13], shape=[13],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
y_predict = layers.fc(input=x, y_predict = layers.fc(input=x,
size=1, size=1,
act=None, act=None)
main_program=main_program,
startup_program=startup_program)
y = layers.data( y = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, input=y_predict,
label=y, label=y)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20 BATCH_SIZE = 20
...@@ -52,12 +40,12 @@ train_reader = paddle.batch( ...@@ -52,12 +40,12 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", main_program=main_program) save_persistables(exe, "./fit_a_line.model/")
load_persistables(exe, "./fit_a_line.model/", main_program=main_program) load_persistables(exe, "./fit_a_line.model/")
for data in train_reader(): for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32") x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32")
...@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM): ...@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
# print tensor_y.get_dims() # print tensor_y.get_dims()
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost])
......
...@@ -5,19 +5,17 @@ import paddle.v2.fluid.layers as layers ...@@ -5,19 +5,17 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_startup_program, g_main_program import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, padding,
act='relu', act='relu'):
main_program=None,
startup_program=None):
tmp = layers.conv2d( tmp = layers.conv2d(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
...@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
bias_attr=False, bias_attr=False)
main_program=main_program,
startup_program=startup_program)
return layers.batch_norm( return layers.batch_norm(
input=tmp, input=tmp,
act=act, act=act)
main_program=main_program,
startup_program=startup_program)
def shortcut(input, ch_in, ch_out, stride, program, init_program): def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out: if ch_in != ch_out:
...@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def basicblock(input, def basicblock(input,
ch_in, ch_in,
ch_out, ch_out,
stride, stride):
main_program=main_program,
startup_program=startup_program):
tmp = conv_bn_layer( tmp = conv_bn_layer(
input, input,
ch_out, ch_out,
3, 3,
stride, stride,
1, 1)
main_program=main_program,
startup_program=startup_program)
tmp = conv_bn_layer( tmp = conv_bn_layer(
tmp, tmp,
ch_out, ch_out,
3, 3,
1, 1,
1, 1,
act=None, act=None)
main_program=main_program, short = shortcut(input, ch_in, ch_out, stride)
startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
return layers.elementwise_add( return layers.elementwise_add(
x=tmp, x=tmp,
y=short, y=short,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, def layer_warp(block_func, input, ch_in, ch_out, count, stride):
startup_program): tmp = block_func(input, ch_in, ch_out, stride)
tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
for i in range(1, count): for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program) tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp return tmp
assert (depth - 2) % 6 == 0 assert (depth - 2) % 6 == 0
...@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
ch_out=16, ch_out=16,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1, padding=1)
main_program=main_program,
startup_program=startup_program)
res1 = layer_warp( res1 = layer_warp(
basicblock, basicblock,
conv1, conv1,
16, 16,
16, 16,
n, n,
1, 1)
main_program=main_program,
startup_program=startup_program)
res2 = layer_warp( res2 = layer_warp(
basicblock, basicblock,
res1, res1,
16, 16,
32, 32,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
res3 = layer_warp( res3 = layer_warp(
basicblock, basicblock,
res2, res2,
32, 32,
64, 64,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
pool = layers.pool2d( pool = layers.pool2d(
input=res3, input=res3,
pool_size=8, pool_size=8,
pool_type='avg', pool_type='avg',
pool_stride=1, pool_stride=1)
main_program=main_program,
startup_program=startup_program)
return pool return pool
def vgg16_bn_drop(input, main_program=None, startup_program=None): def vgg16_bn_drop(input):
def conv_block(input, def conv_block(input,
num_filter, num_filter,
groups, groups,
dropouts, dropouts):
main_program=None,
startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
...@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None): ...@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None):
conv_act='relu', conv_act='relu',
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max')
main_program=main_program,
startup_program=startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program) conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program) conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program, conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
startup_program) conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program, conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
drop = layers.dropout( drop = layers.dropout(
x=conv5, x=conv5,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc1 = layers.fc(input=drop, fc1 = layers.fc(input=drop,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
reshape1 = layers.reshape( reshape1 = layers.reshape(
x=fc1, x=fc1,
shape=list(fc1.shape + (1, 1)), shape=list(fc1.shape + (1, 1)))
main_program=main_program,
startup_program=startup_program)
bn = layers.batch_norm( bn = layers.batch_norm(
input=reshape1, input=reshape1,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
drop2 = layers.dropout( drop2 = layers.dropout(
x=bn, x=bn,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc2 = layers.fc(input=drop2, fc2 = layers.fc(input=drop2,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
return fc2 return fc2
...@@ -225,7 +180,7 @@ train_reader = paddle.batch( ...@@ -225,7 +180,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
...@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM): ...@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
...@@ -3,67 +3,49 @@ import paddle.v2.fluid.layers as layers ...@@ -3,67 +3,49 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[1, 28, 28], shape=[1, 28, 28],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='label', name='label',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
conv_pool_1 = nets.simple_img_conv_pool( conv_pool_1 = nets.simple_img_conv_pool(
input=images, input=images,
filter_size=5, filter_size=5,
num_filters=20, num_filters=20,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
conv_pool_2 = nets.simple_img_conv_pool( conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
num_filters=50, num_filters=50,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
predict = layers.fc(input=conv_pool_2, predict = layers.fc(input=conv_pool_2,
size=10, size=10,
act="softmax", act="softmax")
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
accuracy = layers.accuracy( accuracy = layers.accuracy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0, # optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9) # momentum=0.9)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
BATCH_SIZE = 50 BATCH_SIZE = 50
PASS_NUM = 3 PASS_NUM = 3
...@@ -75,7 +57,7 @@ train_reader = paddle.batch( ...@@ -75,7 +57,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
count = 0 count = 0
...@@ -90,7 +72,7 @@ for pass_id in range(PASS_NUM): ...@@ -90,7 +72,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
...@@ -2,8 +2,7 @@ import paddle.v2 as paddle ...@@ -2,8 +2,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.regularizer import L2DecayRegularizer from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.fluid.initializer import UniformInitializer from paddle.v2.fluid.initializer import UniformInitializer
...@@ -11,14 +10,10 @@ from paddle.v2.fluid.initializer import UniformInitializer ...@@ -11,14 +10,10 @@ from paddle.v2.fluid.initializer import UniformInitializer
import numpy as np import numpy as np
BATCH_SIZE = 128 BATCH_SIZE = 128
startup_program = Program()
main_program = Program()
image = layers.data( image = layers.data(
name='x', name='x',
shape=[784], shape=[784],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
param_attr = { param_attr = {
'name': None, 'name': None,
...@@ -30,45 +25,30 @@ param_attr = { ...@@ -30,45 +25,30 @@ param_attr = {
hidden1 = layers.fc(input=image, hidden1 = layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
hidden2 = layers.fc(input=hidden1, hidden2 = layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
predict = layers.fc(input=hidden2, predict = layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
label = layers.data( label = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(input=predict, label=label)
input=predict, avg_cost = layers.mean(x=cost)
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
accuracy = layers.accuracy( accuracy = layers.accuracy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -78,7 +58,7 @@ train_reader = paddle.batch( ...@@ -78,7 +58,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
...@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM): ...@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
...@@ -3,16 +3,13 @@ import paddle.v2.fluid.layers as layers ...@@ -3,16 +3,13 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program() IS_SPARSE = True
main_program = Program() USE_GPU = False
is_sparse = True
use_gpu = False
BATCH_SIZE = 256 BATCH_SIZE = 256
...@@ -25,99 +22,71 @@ def get_usr_combined_features(): ...@@ -25,99 +22,71 @@ def get_usr_combined_features():
uid = layers.data( uid = layers.data(
name='user_id', name='user_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
usr_emb = layers.embedding( usr_emb = layers.embedding(
input=uid, input=uid,
data_type='float32', data_type='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'}, param_attr={'name': 'user_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_fc = layers.fc(input=usr_emb, usr_fc = layers.fc(input=usr_emb,
size=32, size=32)
main_program=main_program,
startup_program=startup_program)
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data( usr_gender_id = layers.data(
name='gender_id', name='gender_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
usr_gender_emb = layers.embedding( usr_gender_emb = layers.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'}, param_attr={'name': 'gender_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_gender_fc = layers.fc(input=usr_gender_emb, usr_gender_fc = layers.fc(input=usr_gender_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data( usr_age_id = layers.data(
name='age_id', name='age_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64")
main_program=main_program,
startup_program=startup_program)
usr_age_emb = layers.embedding( usr_age_emb = layers.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'}, param_attr={'name': 'age_table'})
main_program=main_program,
startup_program=startup_program)
usr_age_fc = layers.fc(input=usr_age_emb, usr_age_fc = layers.fc(input=usr_age_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data( usr_job_id = layers.data(
name='job_id', name='job_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64")
main_program=main_program,
startup_program=startup_program)
usr_job_emb = layers.embedding( usr_job_emb = layers.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'}, param_attr={'name': 'job_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_job_fc = layers.fc(input=usr_job_emb, usr_job_fc = layers.fc(input=usr_job_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
usr_combined_features = layers.fc(input=concat_embed, usr_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh")
main_program=main_program,
startup_program=startup_program)
return usr_combined_features return usr_combined_features
...@@ -129,83 +98,61 @@ def get_mov_combined_features(): ...@@ -129,83 +98,61 @@ def get_mov_combined_features():
mov_id = layers.data( mov_id = layers.data(
name='movie_id', name='movie_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_emb = layers.embedding( mov_emb = layers.embedding(
input=mov_id, input=mov_id,
data_type='float32', data_type='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'}, param_attr={'name': 'movie_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_fc = layers.fc(input=mov_emb, mov_fc = layers.fc(input=mov_emb,
size=32, size=32)
main_program=main_program,
startup_program=startup_program)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = layers.data(
name='category_id', name='category_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_categories_emb = layers.embedding( mov_categories_emb = layers.embedding(
input=category_id, input=category_id,
size=[CATEGORY_DICT_SIZE, 32], size=[CATEGORY_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb, input=mov_categories_emb,
pool_type="sum", pool_type="sum")
main_program=main_program,
startup_program=startup_program)
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = layers.data(
name='movie_title', name='movie_title',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_title_emb = layers.embedding( mov_title_emb = layers.embedding(
input=mov_title_id, input=mov_title_id,
size=[MOV_TITLE_DICT_SIZE, 32], size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb, input=mov_title_emb,
num_filters=32, num_filters=32,
filter_size=3, filter_size=3,
act="tanh", act="tanh",
pool_type="sum", pool_type="sum")
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv], input=[mov_fc, mov_categories_hidden, mov_title_conv],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
# FIXME(dzh) : need tanh operator # FIXME(dzh) : need tanh operator
mov_combined_features = layers.fc(input=concat_embed, mov_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh")
main_program=main_program,
startup_program=startup_program)
return mov_combined_features return mov_combined_features
...@@ -217,27 +164,18 @@ def model(): ...@@ -217,27 +164,18 @@ def model():
# need cos sim # need cos sim
inference = layers.cos_sim( inference = layers.cos_sim(
X=usr_combined_features, X=usr_combined_features,
Y=mov_combined_features, Y=mov_combined_features)
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='score', name='score',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
square_cost = layers.square_error_cost( square_cost = layers.square_error_cost(
input=inference, input=inference,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean( avg_cost = layers.mean(x=square_cost)
x=square_cost,
main_program=main_program,
startup_program=startup_program)
return avg_cost return avg_cost
...@@ -245,16 +183,15 @@ def model(): ...@@ -245,16 +183,15 @@ def model():
def main(): def main():
cost = model() cost = model()
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost, startup_program=startup_program) opts = sgd_optimizer.minimize(cost)
block = main_program.block(0)
if use_gpu: if USE_GPU:
place = core.GPUPlace(0) place = core.GPUPlace(0)
else: else:
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -303,7 +240,7 @@ def main(): ...@@ -303,7 +240,7 @@ def main():
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed=func_feed(feeding, data), feed=func_feed(feeding, data),
fetch_list=[cost]) fetch_list=[cost])
out = np.array(outs[0]) out = np.array(outs[0])
......
...@@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers ...@@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
...@@ -70,7 +69,7 @@ def main(): ...@@ -70,7 +69,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -82,7 +81,7 @@ def main(): ...@@ -82,7 +81,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
...@@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers ...@@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
...@@ -81,7 +80,7 @@ def main(): ...@@ -81,7 +80,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -93,7 +92,7 @@ def main(): ...@@ -93,7 +92,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
...@@ -2,8 +2,7 @@ import paddle.v2 as paddle ...@@ -2,8 +2,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import g_main_program, g_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
...@@ -88,10 +87,10 @@ def main(): ...@@ -88,10 +87,10 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
tensor_words, tensor_label = prepare_feed_data(data, place) tensor_words, tensor_label = prepare_feed_data(data, place)
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
while True: while True:
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
...@@ -2,20 +2,17 @@ import paddle.v2 as paddle ...@@ -2,20 +2,17 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program() PASS_NUM = 100
main_program = Program() EMBED_SIZE = 32
HIDDEN_SIZE = 256
embed_size = 32
hidden_size = 256
N = 5 N = 5
batch_size = 32 BATCH_SIZE = 32
is_sparse = True IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) dict_size = len(word_dict)
...@@ -23,97 +20,67 @@ dict_size = len(word_dict) ...@@ -23,97 +20,67 @@ dict_size = len(word_dict)
first_word = layers.data( first_word = layers.data(
name='firstw', name='firstw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
second_word = layers.data( second_word = layers.data(
name='secondw', name='secondw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
third_word = layers.data( third_word = layers.data(
name='thirdw', name='thirdw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
forth_word = layers.data( forth_word = layers.data(
name='forthw', name='forthw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
next_word = layers.data( next_word = layers.data(
name='nextw', name='nextw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
embed_first = layers.embedding( embed_first = layers.embedding(
input=first_word, input=first_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], input=[embed_first, embed_second, embed_third, embed_forth],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
hidden1 = layers.fc(input=concat_embed, hidden1 = layers.fc(input=concat_embed,
size=hidden_size, size=HIDDEN_SIZE,
act='sigmoid', act='sigmoid')
main_program=main_program,
startup_program=startup_program)
predict_word = layers.fc(input=hidden1, predict_word = layers.fc(input=hidden1,
size=dict_size, size=dict_size,
act='softmax', act='softmax')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict_word, input=predict_word,
label=next_word, label=next_word)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size) paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
...@@ -122,8 +89,8 @@ exe = Executor(place) ...@@ -122,8 +89,8 @@ exe = Executor(place)
# below exit line. # below exit line.
exit(0) exit(0)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)] input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)]
...@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM): ...@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor() next_tensor = core.LoDTensor()
next_tensor.set(next_data, place) next_tensor.set(next_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={ feed={
'firstw': first_tensor, 'firstw': first_tensor,
'secondw': second_tensor, 'secondw': second_tensor,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册