未验证 提交 48f1a02a 编写于 作者: H hutuxian 提交者: GitHub

upgrade api for rec_case (#1819)

upgrade api for rec_case
上级 747adef5
...@@ -254,9 +254,9 @@ def get_usr_combined_features(): ...@@ -254,9 +254,9 @@ def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
uid = layers.data(name='user_id', shape=[1], dtype='int64') uid = fluid.data(name='user_id', shape=[None], dtype='int64')
usr_emb = layers.embedding( usr_emb = fluid.embedding(
input=uid, input=uid,
dtype='float32', dtype='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
...@@ -267,9 +267,9 @@ def get_usr_combined_features(): ...@@ -267,9 +267,9 @@ def get_usr_combined_features():
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_id = fluid.data(name='gender_id', shape=[None], dtype='int64')
usr_gender_emb = layers.embedding( usr_gender_emb = fluid.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr='gender_table', param_attr='gender_table',
...@@ -278,9 +278,9 @@ def get_usr_combined_features(): ...@@ -278,9 +278,9 @@ def get_usr_combined_features():
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_id = fluid.data(name='age_id', shape=[None], dtype="int64")
usr_age_emb = layers.embedding( usr_age_emb = fluid.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
...@@ -289,9 +289,9 @@ def get_usr_combined_features(): ...@@ -289,9 +289,9 @@ def get_usr_combined_features():
usr_age_fc = layers.fc(input=usr_age_emb, size=16) usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_id = fluid.data(name='job_id', shape=[None], dtype="int64")
usr_job_emb = layers.embedding( usr_job_emb = fluid.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr='job_table', param_attr='job_table',
...@@ -320,9 +320,9 @@ def get_mov_combined_features(): ...@@ -320,9 +320,9 @@ def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_id = fluid.data(name='movie_id', shape=[None], dtype='int64')
mov_emb = layers.embedding( mov_emb = fluid.embedding(
input=mov_id, input=mov_id,
dtype='float32', dtype='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
...@@ -333,10 +333,10 @@ def get_mov_combined_features(): ...@@ -333,10 +333,10 @@ def get_mov_combined_features():
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = fluid.data(
name='category_id', shape=[1], dtype='int64', lod_level=1) name='category_id', shape=[None], dtype='int64', lod_level=1)
mov_categories_emb = layers.embedding( mov_categories_emb = fluid.embedding(
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
...@@ -344,10 +344,10 @@ def get_mov_combined_features(): ...@@ -344,10 +344,10 @@ def get_mov_combined_features():
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = fluid.data(
name='movie_title', shape=[1], dtype='int64', lod_level=1) name='movie_title', shape=[None], dtype='int64', lod_level=1)
mov_title_emb = layers.embedding( mov_title_emb = fluid.embedding(
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
...@@ -390,7 +390,7 @@ def train_program(): ...@@ -390,7 +390,7 @@ def train_program():
scale_infer = inference_program() scale_infer = inference_program()
label = layers.data(name='score', shape=[1], dtype='float32') label = fluid.data(name='score', shape=[None, 1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label) square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(square_cost) avg_cost = layers.mean(square_cost)
...@@ -416,12 +416,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ...@@ -416,12 +416,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据,乱序化的大小为缓存大小`buf_size` 下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据,乱序化的大小为缓存大小`buf_size`
```python ```python
train_reader = paddle.batch( train_reader = fluid.io.batch(
paddle.reader.shuffle( fluid.io.shuffle(
paddle.dataset.movielens.train(), buf_size=8192), paddle.dataset.movielens.train(), buf_size=8192),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
test_reader = paddle.batch( test_reader = fluid.io.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
``` ```
...@@ -533,13 +533,13 @@ train_loop() ...@@ -533,13 +533,13 @@ train_loop()
```python ```python
infer_movie_id = 783 infer_movie_id = 783
infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) user_id = np.array([1]).astype("int64").reshape(-1)
gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) gender_id = np.array([1]).astype("int64").reshape(-1)
age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place) age_id = np.array([0]).astype("int64").reshape(-1)
job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place) job_id = np.array([10]).astype("int64").reshape(-1)
movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) # Hunchback of Notre Dame movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
category_id = fluid.create_lod_tensor([np.array([10, 8, 9], dtype='int64')], [[3]], place) # Animation, Children's, Musical category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
movie_title = fluid.create_lod_tensor([np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
place) # 'hunchback','of','notre','dame','the' place) # 'hunchback','of','notre','dame','the'
``` ```
......
...@@ -241,9 +241,9 @@ def get_usr_combined_features(): ...@@ -241,9 +241,9 @@ def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
uid = layers.data(name='user_id', shape=[1], dtype='int64') uid = fluid.data(name='user_id', shape=[None], dtype='int64')
usr_emb = layers.embedding( usr_emb = fluid.embedding(
input=uid, input=uid,
dtype='float32', dtype='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
...@@ -254,9 +254,9 @@ def get_usr_combined_features(): ...@@ -254,9 +254,9 @@ def get_usr_combined_features():
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_id = fluid.data(name='gender_id', shape=[None], dtype='int64')
usr_gender_emb = layers.embedding( usr_gender_emb = fluid.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr='gender_table', param_attr='gender_table',
...@@ -265,9 +265,9 @@ def get_usr_combined_features(): ...@@ -265,9 +265,9 @@ def get_usr_combined_features():
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_id = fluid.data(name='age_id', shape=[None], dtype="int64")
usr_age_emb = layers.embedding( usr_age_emb = fluid.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
...@@ -276,9 +276,9 @@ def get_usr_combined_features(): ...@@ -276,9 +276,9 @@ def get_usr_combined_features():
usr_age_fc = layers.fc(input=usr_age_emb, size=16) usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_id = fluid.data(name='job_id', shape=[None], dtype="int64")
usr_job_emb = layers.embedding( usr_job_emb = fluid.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr='job_table', param_attr='job_table',
...@@ -307,9 +307,9 @@ def get_mov_combined_features(): ...@@ -307,9 +307,9 @@ def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_id = fluid.data(name='movie_id', shape=[None], dtype='int64')
mov_emb = layers.embedding( mov_emb = fluid.embedding(
input=mov_id, input=mov_id,
dtype='float32', dtype='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
...@@ -320,10 +320,10 @@ def get_mov_combined_features(): ...@@ -320,10 +320,10 @@ def get_mov_combined_features():
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = fluid.data(
name='category_id', shape=[1], dtype='int64', lod_level=1) name='category_id', shape=[None], dtype='int64', lod_level=1)
mov_categories_emb = layers.embedding( mov_categories_emb = fluid.embedding(
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
...@@ -331,10 +331,10 @@ def get_mov_combined_features(): ...@@ -331,10 +331,10 @@ def get_mov_combined_features():
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = fluid.data(
name='movie_title', shape=[1], dtype='int64', lod_level=1) name='movie_title', shape=[None], dtype='int64', lod_level=1)
mov_title_emb = layers.embedding( mov_title_emb = fluid.embedding(
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
...@@ -378,7 +378,7 @@ def train_program(): ...@@ -378,7 +378,7 @@ def train_program():
scale_infer = inference_program() scale_infer = inference_program()
label = layers.data(name='score', shape=[1], dtype='float32') label = fluid.data(name='score', shape=[None, 1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label) square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(square_cost) avg_cost = layers.mean(square_cost)
...@@ -404,12 +404,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ...@@ -404,12 +404,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`. The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`.
```python ```python
train_reader = paddle.batch( train_reader = fluid.io.batch(
paddle.reader.shuffle( fluid.io.shuffle(
paddle.dataset.movielens.train(), buf_size=8192), paddle.dataset.movielens.train(), buf_size=8192),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
test_reader = paddle.batch( test_reader = fluid.io.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
``` ```
...@@ -521,13 +521,13 @@ In this prediction example, we try to predict the score given by user with ID1 f ...@@ -521,13 +521,13 @@ In this prediction example, we try to predict the score given by user with ID1 f
```python ```python
infer_movie_id = 783 infer_movie_id = 783
infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
user_id = fluid.create_lod_tensor([[1]], [[1]], place) user_id = np.array([1]).astype("int64").reshape(-1)
gender_id = fluid.create_lod_tensor([[1]], [[1]], place) gender_id = np.array([1]).astype("int64").reshape(-1)
age_id = fluid.create_lod_tensor([[0]], [[1]], place) age_id = np.array([0]).astype("int64").reshape(-1)
job_id = fluid.create_lod_tensor([[10]], [[1]], place) job_id = np.array([10]).astype("int64").reshape(-1)
movie_id = fluid.create_lod_tensor([[783]], [[1]], place) # Hunchback of Notre Dame movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) # Animation, Children's, Musical category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
place) # 'hunchback','of','notre','dame','the' place) # 'hunchback','of','notre','dame','the'
``` ```
......
...@@ -296,9 +296,9 @@ def get_usr_combined_features(): ...@@ -296,9 +296,9 @@ def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
uid = layers.data(name='user_id', shape=[1], dtype='int64') uid = fluid.data(name='user_id', shape=[None], dtype='int64')
usr_emb = layers.embedding( usr_emb = fluid.embedding(
input=uid, input=uid,
dtype='float32', dtype='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
...@@ -309,9 +309,9 @@ def get_usr_combined_features(): ...@@ -309,9 +309,9 @@ def get_usr_combined_features():
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_id = fluid.data(name='gender_id', shape=[None], dtype='int64')
usr_gender_emb = layers.embedding( usr_gender_emb = fluid.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr='gender_table', param_attr='gender_table',
...@@ -320,9 +320,9 @@ def get_usr_combined_features(): ...@@ -320,9 +320,9 @@ def get_usr_combined_features():
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_id = fluid.data(name='age_id', shape=[None], dtype="int64")
usr_age_emb = layers.embedding( usr_age_emb = fluid.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
...@@ -331,9 +331,9 @@ def get_usr_combined_features(): ...@@ -331,9 +331,9 @@ def get_usr_combined_features():
usr_age_fc = layers.fc(input=usr_age_emb, size=16) usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_id = fluid.data(name='job_id', shape=[None], dtype="int64")
usr_job_emb = layers.embedding( usr_job_emb = fluid.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr='job_table', param_attr='job_table',
...@@ -362,9 +362,9 @@ def get_mov_combined_features(): ...@@ -362,9 +362,9 @@ def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_id = fluid.data(name='movie_id', shape=[None], dtype='int64')
mov_emb = layers.embedding( mov_emb = fluid.embedding(
input=mov_id, input=mov_id,
dtype='float32', dtype='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
...@@ -375,10 +375,10 @@ def get_mov_combined_features(): ...@@ -375,10 +375,10 @@ def get_mov_combined_features():
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = fluid.data(
name='category_id', shape=[1], dtype='int64', lod_level=1) name='category_id', shape=[None], dtype='int64', lod_level=1)
mov_categories_emb = layers.embedding( mov_categories_emb = fluid.embedding(
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
...@@ -386,10 +386,10 @@ def get_mov_combined_features(): ...@@ -386,10 +386,10 @@ def get_mov_combined_features():
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = fluid.data(
name='movie_title', shape=[1], dtype='int64', lod_level=1) name='movie_title', shape=[None], dtype='int64', lod_level=1)
mov_title_emb = layers.embedding( mov_title_emb = fluid.embedding(
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
...@@ -432,7 +432,7 @@ def train_program(): ...@@ -432,7 +432,7 @@ def train_program():
scale_infer = inference_program() scale_infer = inference_program()
label = layers.data(name='score', shape=[1], dtype='float32') label = fluid.data(name='score', shape=[None, 1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label) square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(square_cost) avg_cost = layers.mean(square_cost)
...@@ -458,12 +458,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ...@@ -458,12 +458,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据,乱序化的大小为缓存大小`buf_size`。 下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据,乱序化的大小为缓存大小`buf_size`。
```python ```python
train_reader = paddle.batch( train_reader = fluid.io.batch(
paddle.reader.shuffle( fluid.io.shuffle(
paddle.dataset.movielens.train(), buf_size=8192), paddle.dataset.movielens.train(), buf_size=8192),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
test_reader = paddle.batch( test_reader = fluid.io.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
``` ```
...@@ -575,13 +575,13 @@ train_loop() ...@@ -575,13 +575,13 @@ train_loop()
```python ```python
infer_movie_id = 783 infer_movie_id = 783
infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) user_id = np.array([1]).astype("int64").reshape(-1)
gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) gender_id = np.array([1]).astype("int64").reshape(-1)
age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place) age_id = np.array([0]).astype("int64").reshape(-1)
job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place) job_id = np.array([10]).astype("int64").reshape(-1)
movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) # Hunchback of Notre Dame movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
category_id = fluid.create_lod_tensor([np.array([10, 8, 9], dtype='int64')], [[3]], place) # Animation, Children's, Musical category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
movie_title = fluid.create_lod_tensor([np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
place) # 'hunchback','of','notre','dame','the' place) # 'hunchback','of','notre','dame','the'
``` ```
......
...@@ -283,9 +283,9 @@ def get_usr_combined_features(): ...@@ -283,9 +283,9 @@ def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
uid = layers.data(name='user_id', shape=[1], dtype='int64') uid = fluid.data(name='user_id', shape=[None], dtype='int64')
usr_emb = layers.embedding( usr_emb = fluid.embedding(
input=uid, input=uid,
dtype='float32', dtype='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
...@@ -296,9 +296,9 @@ def get_usr_combined_features(): ...@@ -296,9 +296,9 @@ def get_usr_combined_features():
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_id = fluid.data(name='gender_id', shape=[None], dtype='int64')
usr_gender_emb = layers.embedding( usr_gender_emb = fluid.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr='gender_table', param_attr='gender_table',
...@@ -307,9 +307,9 @@ def get_usr_combined_features(): ...@@ -307,9 +307,9 @@ def get_usr_combined_features():
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_id = fluid.data(name='age_id', shape=[None], dtype="int64")
usr_age_emb = layers.embedding( usr_age_emb = fluid.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
...@@ -318,9 +318,9 @@ def get_usr_combined_features(): ...@@ -318,9 +318,9 @@ def get_usr_combined_features():
usr_age_fc = layers.fc(input=usr_age_emb, size=16) usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_id = fluid.data(name='job_id', shape=[None], dtype="int64")
usr_job_emb = layers.embedding( usr_job_emb = fluid.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr='job_table', param_attr='job_table',
...@@ -349,9 +349,9 @@ def get_mov_combined_features(): ...@@ -349,9 +349,9 @@ def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_id = fluid.data(name='movie_id', shape=[None], dtype='int64')
mov_emb = layers.embedding( mov_emb = fluid.embedding(
input=mov_id, input=mov_id,
dtype='float32', dtype='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
...@@ -362,10 +362,10 @@ def get_mov_combined_features(): ...@@ -362,10 +362,10 @@ def get_mov_combined_features():
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = fluid.data(
name='category_id', shape=[1], dtype='int64', lod_level=1) name='category_id', shape=[None], dtype='int64', lod_level=1)
mov_categories_emb = layers.embedding( mov_categories_emb = fluid.embedding(
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
...@@ -373,10 +373,10 @@ def get_mov_combined_features(): ...@@ -373,10 +373,10 @@ def get_mov_combined_features():
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = fluid.data(
name='movie_title', shape=[1], dtype='int64', lod_level=1) name='movie_title', shape=[None], dtype='int64', lod_level=1)
mov_title_emb = layers.embedding( mov_title_emb = fluid.embedding(
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
...@@ -420,7 +420,7 @@ def train_program(): ...@@ -420,7 +420,7 @@ def train_program():
scale_infer = inference_program() scale_infer = inference_program()
label = layers.data(name='score', shape=[1], dtype='float32') label = fluid.data(name='score', shape=[None, 1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label) square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(square_cost) avg_cost = layers.mean(square_cost)
...@@ -446,12 +446,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ...@@ -446,12 +446,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`. The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`.
```python ```python
train_reader = paddle.batch( train_reader = fluid.io.batch(
paddle.reader.shuffle( fluid.io.shuffle(
paddle.dataset.movielens.train(), buf_size=8192), paddle.dataset.movielens.train(), buf_size=8192),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
test_reader = paddle.batch( test_reader = fluid.io.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
``` ```
...@@ -563,13 +563,13 @@ In this prediction example, we try to predict the score given by user with ID1 f ...@@ -563,13 +563,13 @@ In this prediction example, we try to predict the score given by user with ID1 f
```python ```python
infer_movie_id = 783 infer_movie_id = 783
infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
user_id = fluid.create_lod_tensor([[1]], [[1]], place) user_id = np.array([1]).astype("int64").reshape(-1)
gender_id = fluid.create_lod_tensor([[1]], [[1]], place) gender_id = np.array([1]).astype("int64").reshape(-1)
age_id = fluid.create_lod_tensor([[0]], [[1]], place) age_id = np.array([0]).astype("int64").reshape(-1)
job_id = fluid.create_lod_tensor([[10]], [[1]], place) job_id = np.array([10]).astype("int64").reshape(-1)
movie_id = fluid.create_lod_tensor([[783]], [[1]], place) # Hunchback of Notre Dame movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) # Animation, Children's, Musical category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
place) # 'hunchback','of','notre','dame','the' place) # 'hunchback','of','notre','dame','the'
``` ```
......
...@@ -44,9 +44,9 @@ def get_usr_combined_features(): ...@@ -44,9 +44,9 @@ def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
uid = layers.data(name='user_id', shape=[1], dtype='int64') uid = fluid.data(name='user_id', shape=[None], dtype='int64')
usr_emb = layers.embedding( usr_emb = fluid.embedding(
input=uid, input=uid,
dtype='float32', dtype='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
...@@ -57,9 +57,9 @@ def get_usr_combined_features(): ...@@ -57,9 +57,9 @@ def get_usr_combined_features():
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_id = fluid.data(name='gender_id', shape=[None], dtype='int64')
usr_gender_emb = layers.embedding( usr_gender_emb = fluid.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr='gender_table', param_attr='gender_table',
...@@ -68,9 +68,9 @@ def get_usr_combined_features(): ...@@ -68,9 +68,9 @@ def get_usr_combined_features():
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_id = fluid.data(name='age_id', shape=[None], dtype="int64")
usr_age_emb = layers.embedding( usr_age_emb = fluid.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE, is_sparse=IS_SPARSE,
...@@ -79,9 +79,9 @@ def get_usr_combined_features(): ...@@ -79,9 +79,9 @@ def get_usr_combined_features():
usr_age_fc = layers.fc(input=usr_age_emb, size=16) usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_id = fluid.data(name='job_id', shape=[None], dtype="int64")
usr_job_emb = layers.embedding( usr_job_emb = fluid.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr='job_table', param_attr='job_table',
...@@ -101,9 +101,9 @@ def get_mov_combined_features(): ...@@ -101,9 +101,9 @@ def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_id = fluid.data(name='movie_id', shape=[None], dtype='int64')
mov_emb = layers.embedding( mov_emb = fluid.embedding(
input=mov_id, input=mov_id,
dtype='float32', dtype='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
...@@ -114,10 +114,10 @@ def get_mov_combined_features(): ...@@ -114,10 +114,10 @@ def get_mov_combined_features():
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = fluid.data(
name='category_id', shape=[1], dtype='int64', lod_level=1) name='category_id', shape=[None], dtype='int64', lod_level=1)
mov_categories_emb = layers.embedding( mov_categories_emb = fluid.embedding(
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
...@@ -125,13 +125,11 @@ def get_mov_combined_features(): ...@@ -125,13 +125,11 @@ def get_mov_combined_features():
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = fluid.data(
name='movie_title', shape=[1], dtype='int64', lod_level=1) name='movie_title', shape=[None], dtype='int64', lod_level=1)
mov_title_emb = layers.embedding( mov_title_emb = fluid.embedding(
input=mov_title_id, input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb, input=mov_title_emb,
...@@ -152,11 +150,10 @@ def inference_program(): ...@@ -152,11 +150,10 @@ def inference_program():
usr_combined_features = get_usr_combined_features() usr_combined_features = get_usr_combined_features()
mov_combined_features = get_mov_combined_features() mov_combined_features = get_mov_combined_features()
inference = layers.cos_sim( inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
X=usr_combined_features, Y=mov_combined_features)
scale_infer = layers.scale(x=inference, scale=5.0) scale_infer = layers.scale(x=inference, scale=5.0)
label = layers.data(name='score', shape=[1], dtype='float32') label = fluid.data(name='score', shape=[None, 1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label) square_cost = layers.square_error_cost(input=scale_infer, label=label)
avg_cost = layers.mean(square_cost) avg_cost = layers.mean(square_cost)
...@@ -171,16 +168,15 @@ def train(use_cuda, params_dirname): ...@@ -171,16 +168,15 @@ def train(use_cuda, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
if args.enable_ce: if args.enable_ce:
train_reader = paddle.batch( train_reader = fluid.io.batch(
paddle.dataset.movielens.train(), batch_size=BATCH_SIZE) paddle.dataset.movielens.train(), batch_size=BATCH_SIZE)
test_reader = paddle.batch( test_reader = fluid.io.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
else: else:
train_reader = paddle.batch( train_reader = fluid.io.batch(
paddle.reader.shuffle( fluid.io.shuffle(paddle.dataset.movielens.train(), buf_size=8192),
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=BATCH_SIZE) batch_size=BATCH_SIZE)
test_reader = paddle.batch( test_reader = fluid.io.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
feed_order = [ feed_order = [
...@@ -220,8 +216,7 @@ def train(use_cuda, params_dirname): ...@@ -220,8 +216,7 @@ def train(use_cuda, params_dirname):
def train_loop(): def train_loop():
feed_list = [ feed_list = [
main_program.global_block().var(var_name) main_program.global_block().var(var_name) for var_name in feed_order
for var_name in feed_order
] ]
feeder = fluid.DataFeeder(feed_list, place) feeder = fluid.DataFeeder(feed_list, place)
exe.run(star_program) exe.run(star_program)
...@@ -297,28 +292,27 @@ def infer(use_cuda, params_dirname): ...@@ -297,28 +292,27 @@ def infer(use_cuda, params_dirname):
# Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one
# level of detail info, indicating that `data` consists of two sequences # level of detail info, indicating that `data` consists of two sequences
# of length 3 and 2, respectively. # of length 3 and 2, respectively.
user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) user_id = np.array([1]).astype("int64").reshape(-1)
assert feed_target_names[1] == "gender_id" assert feed_target_names[1] == "gender_id"
gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) gender_id = np.array([1]).astype("int64").reshape(-1)
assert feed_target_names[2] == "age_id" assert feed_target_names[2] == "age_id"
age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place) age_id = np.array([0]).astype("int64").reshape(-1)
assert feed_target_names[3] == "job_id" assert feed_target_names[3] == "job_id"
job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place) job_id = np.array([10]).astype("int64").reshape(-1)
assert feed_target_names[4] == "movie_id" assert feed_target_names[4] == "movie_id"
movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) movie_id = np.array([783]).astype("int64").reshape(-1)
assert feed_target_names[5] == "category_id" assert feed_target_names[5] == "category_id"
category_id = fluid.create_lod_tensor( category_id = fluid.create_lod_tensor(
[np.array([10, 8, 9], dtype='int64')], [[3]], place) np.array([10, 8, 9], dtype='int64'), [[3]], place)
assert feed_target_names[6] == "movie_title" assert feed_target_names[6] == "movie_title"
movie_title = fluid.create_lod_tensor( movie_title = fluid.create_lod_tensor(
[np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place)
place)
# Construct feed as a dictionary of {feed_target_name: feed_target_data} # Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets. # and results will contain a list of data corresponding to fetch_targets.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册