similarity_by_sklearn.py 1.2 KB
Newer Older
Y
init  
Yao544303 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 14 11:11:25 2018

@author: ych

E-mail:yao544303963@gmail.com
"""

from sklearn.externals.joblib import Memory
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
import numpy as np

np.set_printoptions(suppress=True)
mem = Memory("./mycache")



# 输入数据格式为
# User movie1:ratting1 movie2:ratting2
@mem.cache
def get_data(filename):
    data = load_svmlight_file(filename)
    return data[0], data[1]


# 计算jaccard 相似度
def get_jaccard_similarity(X):
    n = X.shape[1]
    similarity = np.zeros([n, n])
    for i in range(n):
        v1 = X.T[i].toarray()
        for j in range(i + 1, n):
            v2 = X.T[j].toarray()
            sim = jaccard_similarity_score(v1, v2)
            similarity[i][j] = sim
            similarity[j][i] = sim
    return similarity


# 计算余弦相似度
def get_consine_similarity(X):
    similarity = cosine_similarity(X)
    return similarity


filename = "C:/dtworkspace/recommand/data/ratingslibsvm"
X, y = get_data(filename)