data_prepare.sh 690 字节
Newer Older
M
malin10 已提交
1 2
cd data

C
Chengmo 已提交
3
echo "---> Download movielens 1M data ..."
M
malin10 已提交
4
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
C
Chengmo 已提交
5
echo "---> Unzip ml-1m.zip ..."
M
malin10 已提交
6
unzip ml-1m.zip
C
Chengmo 已提交
7
rm ml-1m.zip
M
malin10 已提交
8

C
Chengmo 已提交
9
echo "---> Split movielens data ..."
M
malin10 已提交
10 11
python split.py

C
Chengmo 已提交
12 13
mkdir -p train/
mkdir -p test/
M
malin10 已提交
14

C
Chengmo 已提交
15
echo "---> Process train & test data ..."
M
malin10 已提交
16 17
python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
M
malin10 已提交
18 19
python process_ml_1m.py hash log.data.train > ./train/data.txt
python process_ml_1m.py hash log.data.test > ./test/data.txt
M
malin10 已提交
20 21 22

rm log.data.train
rm log.data.test
C
Chengmo 已提交
23 24 25
cd ..

echo "---> Finish data process"