diff --git a/05.recommender_system/README.cn.md b/05.recommender_system/README.cn.md index ab4398a74fa2b1148f6bcedfd3fe58228c84143f..ffd07a1252e4ab58e81198d2dfbab78f9bd7c120 100644 --- a/05.recommender_system/README.cn.md +++ b/05.recommender_system/README.cn.md @@ -254,9 +254,9 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], dtype='int64') + uid = fluid.data(name='user_id', shape=[-1], dtype='int64') - usr_emb = layers.embedding( + usr_emb = fluid.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], @@ -267,9 +267,9 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') + usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64') - usr_gender_emb = layers.embedding( + usr_gender_emb = fluid.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', @@ -278,9 +278,9 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") + usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64") - usr_age_emb = layers.embedding( + usr_age_emb = fluid.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, @@ -289,9 +289,9 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") + usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64") - usr_job_emb = layers.embedding( + usr_job_emb = fluid.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', @@ -320,9 +320,9 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') + mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64') - mov_emb = layers.embedding( + mov_emb = fluid.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], @@ -333,10 +333,10 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', shape=[1], dtype='int64', lod_level=1) + category_id = fluid.data( + name='category_id', shape=[-1], dtype='int64', lod_level=1) - mov_categories_emb = layers.embedding( + mov_categories_emb = fluid.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( @@ -344,10 +344,10 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', shape=[1], dtype='int64', lod_level=1) + mov_title_id = fluid.data( + name='movie_title', shape=[-1], dtype='int64', lod_level=1) - mov_title_emb = layers.embedding( + mov_title_emb = fluid.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( @@ -390,7 +390,7 @@ def train_program(): scale_infer = inference_program() - label = layers.data(name='score', shape=[1], dtype='float32') + label = fluid.data(name='score', shape=[-1, 1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) avg_cost = layers.mean(square_cost) @@ -416,12 +416,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() 下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据,乱序化的大小为缓存大小`buf_size`。 ```python -train_reader = paddle.batch( - paddle.reader.shuffle( +train_reader = fluid.io.batch( + fluid.io.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) -test_reader = paddle.batch( +test_reader = fluid.io.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) ``` @@ -533,13 +533,13 @@ train_loop() ```python infer_movie_id = 783 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title -user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) -gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) -age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place) -job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place) -movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) # Hunchback of Notre Dame -category_id = fluid.create_lod_tensor([np.array([10, 8, 9], dtype='int64')], [[3]], place) # Animation, Children's, Musical -movie_title = fluid.create_lod_tensor([np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], +user_id = np.array([1]).astype("int64").reshape(-1) +gender_id = np.array([1]).astype("int64").reshape(-1) +age_id = np.array([0]).astype("int64").reshape(-1) +job_id = np.array([10]).astype("int64").reshape(-1) +movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame +category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical +movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place) # 'hunchback','of','notre','dame','the' ``` diff --git a/05.recommender_system/README.md b/05.recommender_system/README.md index deb61c8fad12f942e723a37b14c9615aad35cfca..abf032c6a513c1f7a95a687d38ca355a578c22e8 100644 --- a/05.recommender_system/README.md +++ b/05.recommender_system/README.md @@ -241,9 +241,9 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], dtype='int64') + uid = fluid.data(name='user_id', shape=[-1], dtype='int64') - usr_emb = layers.embedding( + usr_emb = fluid.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], @@ -254,9 +254,9 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') + usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64') - usr_gender_emb = layers.embedding( + usr_gender_emb = fluid.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', @@ -265,9 +265,9 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") + usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64") - usr_age_emb = layers.embedding( + usr_age_emb = fluid.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, @@ -276,9 +276,9 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") + usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64") - usr_job_emb = layers.embedding( + usr_job_emb = fluid.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', @@ -307,9 +307,9 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') + mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64') - mov_emb = layers.embedding( + mov_emb = fluid.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], @@ -320,10 +320,10 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', shape=[1], dtype='int64', lod_level=1) + category_id = fluid.data( + name='category_id', shape=[-1], dtype='int64', lod_level=1) - mov_categories_emb = layers.embedding( + mov_categories_emb = fluid.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( @@ -331,10 +331,10 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', shape=[1], dtype='int64', lod_level=1) + mov_title_id = fluid.data( + name='movie_title', shape=[-1], dtype='int64', lod_level=1) - mov_title_emb = layers.embedding( + mov_title_emb = fluid.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( @@ -378,7 +378,7 @@ def train_program(): scale_infer = inference_program() - label = layers.data(name='score', shape=[1], dtype='float32') + label = fluid.data(name='score', shape=[-1, 1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) avg_cost = layers.mean(square_cost) @@ -404,12 +404,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`. ```python -train_reader = paddle.batch( - paddle.reader.shuffle( +train_reader = fluid.io.batch( + fluid.io.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) -test_reader = paddle.batch( +test_reader = fluid.io.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) ``` @@ -521,13 +521,13 @@ In this prediction example, we try to predict the score given by user with ID1 f ```python infer_movie_id = 783 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title -user_id = fluid.create_lod_tensor([[1]], [[1]], place) -gender_id = fluid.create_lod_tensor([[1]], [[1]], place) -age_id = fluid.create_lod_tensor([[0]], [[1]], place) -job_id = fluid.create_lod_tensor([[10]], [[1]], place) -movie_id = fluid.create_lod_tensor([[783]], [[1]], place) # Hunchback of Notre Dame -category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) # Animation, Children's, Musical -movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], +user_id = np.array([1]).astype("int64").reshape(-1) +gender_id = np.array([1]).astype("int64").reshape(-1) +age_id = np.array([0]).astype("int64").reshape(-1) +job_id = np.array([10]).astype("int64").reshape(-1) +movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame +category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical +movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place) # 'hunchback','of','notre','dame','the' ``` diff --git a/05.recommender_system/index.cn.html b/05.recommender_system/index.cn.html index 7c00581afee7086eba3e5a691aff28404ac687c5..17191a58795ea4c0879958fd0ca5ce639877f678 100644 --- a/05.recommender_system/index.cn.html +++ b/05.recommender_system/index.cn.html @@ -296,9 +296,9 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], dtype='int64') + uid = fluid.data(name='user_id', shape=[-1], dtype='int64') - usr_emb = layers.embedding( + usr_emb = fluid.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], @@ -309,9 +309,9 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') + usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64') - usr_gender_emb = layers.embedding( + usr_gender_emb = fluid.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', @@ -320,9 +320,9 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") + usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64") - usr_age_emb = layers.embedding( + usr_age_emb = fluid.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, @@ -331,9 +331,9 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") + usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64") - usr_job_emb = layers.embedding( + usr_job_emb = fluid.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', @@ -362,9 +362,9 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') + mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64') - mov_emb = layers.embedding( + mov_emb = fluid.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], @@ -375,10 +375,10 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', shape=[1], dtype='int64', lod_level=1) + category_id = fluid.data( + name='category_id', shape=[-1], dtype='int64', lod_level=1) - mov_categories_emb = layers.embedding( + mov_categories_emb = fluid.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( @@ -386,10 +386,10 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', shape=[1], dtype='int64', lod_level=1) + mov_title_id = fluid.data( + name='movie_title', shape=[-1], dtype='int64', lod_level=1) - mov_title_emb = layers.embedding( + mov_title_emb = fluid.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( @@ -432,7 +432,7 @@ def train_program(): scale_infer = inference_program() - label = layers.data(name='score', shape=[1], dtype='float32') + label = fluid.data(name='score', shape=[-1, 1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) avg_cost = layers.mean(square_cost) @@ -458,12 +458,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() 下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据,乱序化的大小为缓存大小`buf_size`。 ```python -train_reader = paddle.batch( - paddle.reader.shuffle( +train_reader = fluid.io.batch( + fluid.io.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) -test_reader = paddle.batch( +test_reader = fluid.io.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) ``` @@ -575,13 +575,13 @@ train_loop() ```python infer_movie_id = 783 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title -user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) -gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) -age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place) -job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place) -movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) # Hunchback of Notre Dame -category_id = fluid.create_lod_tensor([np.array([10, 8, 9], dtype='int64')], [[3]], place) # Animation, Children's, Musical -movie_title = fluid.create_lod_tensor([np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], +user_id = np.array([1]).astype("int64").reshape(-1) +gender_id = np.array([1]).astype("int64").reshape(-1) +age_id = np.array([0]).astype("int64").reshape(-1) +job_id = np.array([10]).astype("int64").reshape(-1) +movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame +category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical +movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place) # 'hunchback','of','notre','dame','the' ``` diff --git a/05.recommender_system/index.html b/05.recommender_system/index.html index 29fa25df7e7bee1197ec13ae677c2f1126887d95..808d891cf58a077e35bd42a2739c708d540257a3 100644 --- a/05.recommender_system/index.html +++ b/05.recommender_system/index.html @@ -283,9 +283,9 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], dtype='int64') + uid = fluid.data(name='user_id', shape=[-1], dtype='int64') - usr_emb = layers.embedding( + usr_emb = fluid.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], @@ -296,9 +296,9 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') + usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64') - usr_gender_emb = layers.embedding( + usr_gender_emb = fluid.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', @@ -307,9 +307,9 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") + usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64") - usr_age_emb = layers.embedding( + usr_age_emb = fluid.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, @@ -318,9 +318,9 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") + usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64") - usr_job_emb = layers.embedding( + usr_job_emb = fluid.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', @@ -349,9 +349,9 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') + mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64') - mov_emb = layers.embedding( + mov_emb = fluid.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], @@ -362,10 +362,10 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', shape=[1], dtype='int64', lod_level=1) + category_id = fluid.data( + name='category_id', shape=[-1], dtype='int64', lod_level=1) - mov_categories_emb = layers.embedding( + mov_categories_emb = fluid.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( @@ -373,10 +373,10 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', shape=[1], dtype='int64', lod_level=1) + mov_title_id = fluid.data( + name='movie_title', shape=[-1], dtype='int64', lod_level=1) - mov_title_emb = layers.embedding( + mov_title_emb = fluid.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( @@ -420,7 +420,7 @@ def train_program(): scale_infer = inference_program() - label = layers.data(name='score', shape=[1], dtype='float32') + label = fluid.data(name='score', shape=[-1, 1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) avg_cost = layers.mean(square_cost) @@ -446,12 +446,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`. ```python -train_reader = paddle.batch( - paddle.reader.shuffle( +train_reader = fluid.io.batch( + fluid.io.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) -test_reader = paddle.batch( +test_reader = fluid.io.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) ``` @@ -563,13 +563,13 @@ In this prediction example, we try to predict the score given by user with ID1 f ```python infer_movie_id = 783 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title -user_id = fluid.create_lod_tensor([[1]], [[1]], place) -gender_id = fluid.create_lod_tensor([[1]], [[1]], place) -age_id = fluid.create_lod_tensor([[0]], [[1]], place) -job_id = fluid.create_lod_tensor([[10]], [[1]], place) -movie_id = fluid.create_lod_tensor([[783]], [[1]], place) # Hunchback of Notre Dame -category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) # Animation, Children's, Musical -movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]], +user_id = np.array([1]).astype("int64").reshape(-1) +gender_id = np.array([1]).astype("int64").reshape(-1) +age_id = np.array([0]).astype("int64").reshape(-1) +job_id = np.array([10]).astype("int64").reshape(-1) +movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame +category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical +movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place) # 'hunchback','of','notre','dame','the' ``` diff --git a/05.recommender_system/train.py b/05.recommender_system/train.py index 70e71608f76543eba2531d284fe25ecb22a427a5..5cf64acf0908c456420ef2d200af231a5de7f6ce 100644 --- a/05.recommender_system/train.py +++ b/05.recommender_system/train.py @@ -44,9 +44,9 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], dtype='int64') + uid = fluid.data(name='user_id', shape=[-1], dtype='int64') - usr_emb = layers.embedding( + usr_emb = fluid.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], @@ -57,9 +57,9 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') + usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64') - usr_gender_emb = layers.embedding( + usr_gender_emb = fluid.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', @@ -68,9 +68,9 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") + usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64") - usr_age_emb = layers.embedding( + usr_age_emb = fluid.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, @@ -79,9 +79,9 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") + usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64") - usr_job_emb = layers.embedding( + usr_job_emb = fluid.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', @@ -101,9 +101,9 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') + mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64') - mov_emb = layers.embedding( + mov_emb = fluid.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], @@ -114,10 +114,10 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', shape=[1], dtype='int64', lod_level=1) + category_id = fluid.data( + name='category_id', shape=[-1], dtype='int64', lod_level=1) - mov_categories_emb = layers.embedding( + mov_categories_emb = fluid.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( @@ -125,10 +125,10 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', shape=[1], dtype='int64', lod_level=1) + mov_title_id = fluid.data( + name='movie_title', shape=[-1], dtype='int64', lod_level=1) - mov_title_emb = layers.embedding( + mov_title_emb = fluid.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( @@ -153,7 +153,7 @@ def inference_program(): inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) scale_infer = layers.scale(x=inference, scale=5.0) - label = layers.data(name='score', shape=[1], dtype='float32') + label = fluid.data(name='score', shape=[-1, 1], dtype='float32') square_cost = layers.square_error_cost(input=scale_infer, label=label) avg_cost = layers.mean(square_cost) @@ -168,16 +168,15 @@ def train(use_cuda, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() if args.enable_ce: - train_reader = paddle.batch( + train_reader = fluid.io.batch( paddle.dataset.movielens.train(), batch_size=BATCH_SIZE) - test_reader = paddle.batch( + test_reader = fluid.io.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) else: - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.movielens.train(), buf_size=8192), + train_reader = fluid.io.batch( + fluid.io.shuffle(paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) - test_reader = paddle.batch( + test_reader = fluid.io.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) feed_order = [ @@ -293,28 +292,27 @@ def infer(use_cuda, params_dirname): # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one # level of detail info, indicating that `data` consists of two sequences # of length 3 and 2, respectively. - user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) + user_id = np.array([1]).astype("int64").reshape(-1) assert feed_target_names[1] == "gender_id" - gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place) + gender_id = np.array([1]).astype("int64").reshape(-1) assert feed_target_names[2] == "age_id" - age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place) + age_id = np.array([0]).astype("int64").reshape(-1) assert feed_target_names[3] == "job_id" - job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place) + job_id = np.array([10]).astype("int64").reshape(-1) assert feed_target_names[4] == "movie_id" - movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) + movie_id = np.array([783]).astype("int64").reshape(-1) assert feed_target_names[5] == "category_id" category_id = fluid.create_lod_tensor( - [np.array([10, 8, 9], dtype='int64')], [[3]], place) + np.array([10, 8, 9], dtype='int64'), [[3]], place) assert feed_target_names[6] == "movie_title" movie_title = fluid.create_lod_tensor( - [np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]], - place) + np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets.