{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MovieLens数据集\n",
"\n",
"```{note}\n",
"MovieLens是一个用户对电影评分的数据集,它是推荐系统最常用的数据集。
\n",
"我们将使用一个已经经过基础处理的MovieLens数据集。\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 转化为tf dataset"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from tensorflow import keras\n",
"\n",
"#@save\n",
"def get_movielens_path(file_name):\n",
" # 获取movielens数据集中文件的路径\n",
" url_prefix = \"file:///Users/facer/IdeaProjects/SparrowRecSys/src/main/resources/webroot/sampledata\"\n",
" return keras.utils.get_file(file_name, os.path.join(url_prefix, file_name))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n", " | movieId | \n", "userId | \n", "rating | \n", "timestamp | \n", "label | \n", "releaseYear | \n", "movieGenre1 | \n", "movieGenre2 | \n", "movieGenre3 | \n", "movieRatingCount | \n", "... | \n", "userRatingCount | \n", "userAvgReleaseYear | \n", "userReleaseYearStddev | \n", "userAvgRating | \n", "userRatingStddev | \n", "userGenre1 | \n", "userGenre2 | \n", "userGenre3 | \n", "userGenre4 | \n", "userGenre5 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "15555 | \n", "3.0 | \n", "900953740 | \n", "0 | \n", "1995 | \n", "Adventure | \n", "Animation | \n", "Children | \n", "10759 | \n", "... | \n", "92 | \n", "1992 | \n", "8.98 | \n", "3.86 | \n", "0.74 | \n", "Drama | \n", "Comedy | \n", "Thriller | \n", "Action | \n", "Crime | \n", "
1 | \n", "1 | \n", "25912 | \n", "3.5 | \n", "1111631768 | \n", "1 | \n", "1995 | \n", "Adventure | \n", "Animation | \n", "Children | \n", "10759 | \n", "... | \n", "21 | \n", "1988 | \n", "14.09 | \n", "3.48 | \n", "1.28 | \n", "Action | \n", "Comedy | \n", "Romance | \n", "Adventure | \n", "Thriller | \n", "
2 | \n", "1 | \n", "29912 | \n", "3.0 | \n", "866820360 | \n", "0 | \n", "1995 | \n", "Adventure | \n", "Animation | \n", "Children | \n", "10759 | \n", "... | \n", "4 | \n", "1995 | \n", "0.50 | \n", "3.00 | \n", "0.00 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "10 | \n", "17686 | \n", "0.5 | \n", "1195555011 | \n", "0 | \n", "1995 | \n", "Action | \n", "Adventure | \n", "Thriller | \n", "6330 | \n", "... | \n", "35 | \n", "1992 | \n", "8.35 | \n", "2.97 | \n", "1.48 | \n", "Comedy | \n", "Drama | \n", "Adventure | \n", "Action | \n", "Thriller | \n", "
4 | \n", "104 | \n", "20158 | \n", "4.0 | \n", "1155357691 | \n", "1 | \n", "1996 | \n", "Comedy | \n", "NaN | \n", "NaN | \n", "3954 | \n", "... | \n", "81 | \n", "1991 | \n", "8.70 | \n", "3.60 | \n", "0.72 | \n", "Thriller | \n", "Drama | \n", "Action | \n", "Crime | \n", "Adventure | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
88822 | \n", "968 | \n", "26865 | \n", "3.0 | \n", "854092232 | \n", "0 | \n", "1968 | \n", "Horror | \n", "Sci-Fi | \n", "Thriller | \n", "1824 | \n", "... | \n", "94 | \n", "1991 | \n", "12.23 | \n", "3.35 | \n", "0.85 | \n", "Drama | \n", "Thriller | \n", "Comedy | \n", "Crime | \n", "Romance | \n", "
88823 | \n", "968 | \n", "8507 | \n", "2.0 | \n", "974709061 | \n", "0 | \n", "1968 | \n", "Horror | \n", "Sci-Fi | \n", "Thriller | \n", "1824 | \n", "... | \n", "5 | \n", "1994 | \n", "0.89 | \n", "2.00 | \n", "1.00 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
88824 | \n", "969 | \n", "16689 | \n", "5.0 | \n", "857854044 | \n", "1 | \n", "1951 | \n", "Adventure | \n", "Comedy | \n", "Romance | \n", "2380 | \n", "... | \n", "97 | \n", "1992 | \n", "9.95 | \n", "3.53 | \n", "0.82 | \n", "Drama | \n", "Comedy | \n", "Crime | \n", "Romance | \n", "Thriller | \n", "
88825 | \n", "969 | \n", "26460 | \n", "2.0 | \n", "1250279576 | \n", "0 | \n", "1951 | \n", "Adventure | \n", "Comedy | \n", "Romance | \n", "2380 | \n", "... | \n", "55 | \n", "1990 | \n", "11.78 | \n", "2.73 | \n", "1.42 | \n", "Thriller | \n", "Crime | \n", "Drama | \n", "Comedy | \n", "Sci-Fi | \n", "
88826 | \n", "970 | \n", "3033 | \n", "2.0 | \n", "1272394603 | \n", "0 | \n", "1953 | \n", "Adventure | \n", "Comedy | \n", "Crime | \n", "98 | \n", "... | \n", "100 | \n", "1985 | \n", "17.64 | \n", "3.67 | \n", "0.89 | \n", "Drama | \n", "Romance | \n", "Comedy | \n", "Thriller | \n", "Crime | \n", "
88827 rows × 27 columns
\n", "