Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Pytorch Widedeep
提交
801c597a
P
Pytorch Widedeep
项目概览
Greenplum
/
Pytorch Widedeep
10 个月 前同步成功
通知
9
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Pytorch Widedeep
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
801c597a
编写于
7月 31, 2023
作者:
J
Javier
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Adjusted the notebooks to show how one can use the 'load_movielens100k' function in the library
上级
d30203a0
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
38 addition
and
92 deletion
+38
-92
examples/notebooks/19_wide_and_deep_for_recsys_pt1.ipynb
examples/notebooks/19_wide_and_deep_for_recsys_pt1.ipynb
+38
-92
未找到文件。
examples/notebooks/19_wide_and_deep_for_recsys_pt1.ipynb
浏览文件 @
801c597a
...
...
@@ -23,7 +23,9 @@
"import warnings\n",
"\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split"
"from sklearn.model_selection import train_test_split\n",
"\n",
"from pytorch_widedeep.datasets import load_movielens100k"
]
},
{
...
...
@@ -43,44 +45,31 @@
"metadata": {},
"outputs": [],
"source": [
"raw_data_path = Path(\"~/ml_projects/wide_deep_learning_for_recsys/ml-100k\")\n",
"\n",
"save_path = Path(\"prepared_data\")\n",
"if not save_path.exists():\n",
" save_path.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "929a9712",
"cell_type": "code",
"execution_count": 4,
"id": "5de7a941",
"metadata": {},
"outputs": [],
"source": [
"
Let's first start by loading the interactions, user and item data
"
"
data, users, items = load_movielens100k(as_frame=True)
"
]
},
{
"cell_type": "code",
"execution_count":
4
,
"id": "
38de36ff
",
"execution_count":
5
,
"id": "
7a288aee
",
"metadata": {},
"outputs": [],
"source": [
"# Load the Ratings/Interaction (triplets (user, item, rating) plus timestamp)\n",
"data = pd.read_csv(raw_data_path / \"u.data\", sep=\"\\t\", header=None)\n",
"data.columns = [\"user_id\", \"movie_id\", \"rating\", \"timestamp\"]\n",
"\n",
"# Load the User features\n",
"users = pd.read_csv(raw_data_path / \"u.user\", sep=\"|\", encoding=\"latin-1\", header=None)\n",
"users.columns = [\"user_id\", \"age\", \"gender\", \"occupation\", \"zip_code\"]\n",
"\n",
"# Load the Item features\n",
"items = pd.read_csv(raw_data_path / \"u.item\", sep=\"|\", encoding=\"latin-1\", header=None)\n",
"items.columns = [\n",
" \"movie_id\",\n",
" \"movie_title\",\n",
" \"release_date\",\n",
" \"video_release_date\",\n",
" \"IMDb_URL\",\n",
"# Alternatively, as specified in the docs: 'The last 19 fields are the genres' so:\n",
"# list_of_genres = items.columns.tolist()[-19:]\n",
"list_of_genres = [\n",
" \"unknown\",\n",
" \"Action\",\n",
" \"Adventure\",\n",
...
...
@@ -103,9 +92,17 @@
"]"
]
},
{
"cell_type": "markdown",
"id": "929a9712",
"metadata": {},
"source": [
"Let's first start by loading the interactions, user and item data"
]
},
{
"cell_type": "code",
"execution_count":
5
,
"execution_count":
6
,
"id": "f4c09273",
"metadata": {},
"outputs": [
...
...
@@ -185,7 +182,7 @@
"4 166 346 1 886397596"
]
},
"execution_count":
5
,
"execution_count":
6
,
"metadata": {},
"output_type": "execute_result"
}
...
...
@@ -196,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count":
6
,
"execution_count":
7
,
"id": "18c3faa0",
"metadata": {},
"outputs": [
...
...
@@ -282,7 +279,7 @@
"4 5 33 F other 15213"
]
},
"execution_count":
6
,
"execution_count":
7
,
"metadata": {},
"output_type": "execute_result"
}
...
...
@@ -293,7 +290,7 @@
},
{
"cell_type": "code",
"execution_count":
7
,
"execution_count":
8
,
"id": "1dbad7b1",
"metadata": {},
"outputs": [
...
...
@@ -499,55 +496,13 @@
"[5 rows x 24 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"items.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7b1ce069",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['unknown',\n",
" 'Action',\n",
" 'Adventure',\n",
" 'Animation',\n",
" \"Children's\",\n",
" 'Comedy',\n",
" 'Crime',\n",
" 'Documentary',\n",
" 'Drama',\n",
" 'Fantasy',\n",
" 'Film-Noir',\n",
" 'Horror',\n",
" 'Musical',\n",
" 'Mystery',\n",
" 'Romance',\n",
" 'Sci-Fi',\n",
" 'Thriller',\n",
" 'War',\n",
" 'Western']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list_of_genres = pd.read_csv(\n",
" raw_data_path / \"u.genre\", sep=\"|\", header=None, usecols=[0]\n",
")[0].tolist()\n",
"list_of_genres"
"items.head()"
]
},
{
...
...
@@ -1899,16 +1854,7 @@
"execution_count": 29,
"id": "68555183",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:309: UserWarning: Continuous columns will not be normalised\n",
" warnings.warn(\"Continuous columns will not be normalised\")\n"
]
}
],
"outputs": [],
"source": [
"X_train_tab = tab_preprocessor.fit_transform(X_train.fillna(0))\n",
"X_test_tab = tab_preprocessor.transform(X_test.fillna(0))"
...
...
@@ -2291,16 +2237,16 @@
"name": "stderr",
"output_type": "stream",
"text": [
"epoch 1: 100%|██████████████████████████████
| 149/149 [00:16<00:00, 9.08
it/s, loss=6.66]\n",
"valid: 100%|██████████████████████████████████
| 38/38 [00:01<00:00, 23.53
it/s, loss=6.61]\n",
"epoch 2: 100%|██████████████████████████████
| 149/149 [00:16<00:00, 9.11it/s, loss=5.99
]\n",
"valid: 100%|██████████████████████████████████
| 38/38 [00:01<00:00, 23.07it/s, loss=6.55
]\n",
"epoch 3: 100%|██████████████████████████████
| 149/149 [00:16<00:00, 9.11it/s, loss=5.67
]\n",
"valid: 100%|██████████████████████████████████
| 38/38 [00:01<00:00, 22.89it/s, loss=6.55
]\n",
"epoch 4: 100%|██████████████████████████████
| 149/149 [00:16<00:00, 8.81
it/s, loss=5.43]\n",
"valid: 100%|██████████████████████████████████
| 38/38 [00:01<00:00, 21.43it/s, loss=6.57
]\n",
"epoch 5: 100%|██████████████████████████████
| 149/149 [00:16<00:00, 8.79it/s, loss=5.24
]\n",
"valid: 100%|███████████████████████████████████
| 38/38 [00:01<00:00, 22.39it/s, loss=6.6
]\n"
"epoch 1: 100%|██████████████████████████████
███████████████████████████████████████████| 149/149 [00:16<00:00, 8.82
it/s, loss=6.66]\n",
"valid: 100%|██████████████████████████████████
███████████████████████████████████████████| 38/38 [00:01<00:00, 21.14
it/s, loss=6.61]\n",
"epoch 2: 100%|██████████████████████████████
███████████████████████████████████████████| 149/149 [00:17<00:00, 8.53it/s, loss=5.98
]\n",
"valid: 100%|██████████████████████████████████
███████████████████████████████████████████| 38/38 [00:01<00:00, 23.20it/s, loss=6.53
]\n",
"epoch 3: 100%|██████████████████████████████
███████████████████████████████████████████| 149/149 [00:17<00:00, 8.61it/s, loss=5.66
]\n",
"valid: 100%|██████████████████████████████████
███████████████████████████████████████████| 38/38 [00:01<00:00, 23.16it/s, loss=6.54
]\n",
"epoch 4: 100%|██████████████████████████████
███████████████████████████████████████████| 149/149 [00:17<00:00, 8.76
it/s, loss=5.43]\n",
"valid: 100%|██████████████████████████████████
███████████████████████████████████████████| 38/38 [00:01<00:00, 22.03it/s, loss=6.56
]\n",
"epoch 5: 100%|██████████████████████████████
███████████████████████████████████████████| 149/149 [00:17<00:00, 8.28it/s, loss=5.23
]\n",
"valid: 100%|███████████████████████████████████
██████████████████████████████████████████| 38/38 [00:01<00:00, 22.60it/s, loss=6.59
]\n"
]
}
],
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录