提交 d31d19f1 编写于 作者: Y Yao544303

add

上级 cd9091a8
......@@ -3,13 +3,16 @@
但因其成书时间较早,当时大数据相关技术并未如当下一样流行,故而书中使用的demo 代码,并不具备在大规模数据集上运行的条件。
于是萌生了使用Spark,实现相关内容的念头。
# 目录规划
data 测试用数据集合
standalone 相关实践的单机实现版本(主要为python实现)
spark 相关实践的spark版本(主要为scala实现)
# 推荐算法实现(计划)
## 基于用户行为数据的
# 目录规划
data 测试用数据集合
standalone 相关实践的单机实现版本(主要为python实现)
spark 相关实践的spark版本(主要为scala实现)
manual 相关资料集合
# 计划项(恩 就是挖坑的意思)
## 推荐算实现
### 基于用户行为数据的推荐算法
ItemCF
UserCF
关联规则
......@@ -17,7 +20,7 @@ LFM
Graph
ALS
## 利用用户标签数据
### 利用用户标签数据推荐算法
LDA
TFIDF
TagCF
......@@ -27,18 +30,24 @@ Markov Chain
社交网络
....
# 推荐系统架构实现
## 外围架构
### 用户行为日志存储系统
### 日志系统
### UI
## 功能模块
### 数据录入模块
### 用户特征生成模块
### 推荐模块
### 过滤模块
### 排名模块
## paper阅读笔记
## 评价系统实现
## 推荐系统架构实现
### 外围架构
#### 用户行为日志存储系统
#### 日志系统
#### UI
### 功能模块
#### 数据录入模块
#### 用户特征生成模块
#### 推荐模块
#### 过滤模块
#### 排名模块
......
package apache.wiki
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
/**
* @author ${user.name}
*/
object App {
// def foo(x : Array[String]) = x.foldLeft("")((a,b) => a + b)
// def main(args : Array[String]) {
// println( "Hello World!" )
// println("concat arguments = " + foo(args))
// }
def main(args: Array[String]) {
// 初始化 SparkContext对象,通过SparkConf指定配置的内容
val conf = new SparkConf().setMaster("local").setAppName("My App")
val sc = new SparkContext(conf)
println("this system exit ok!!!")
// 每一个 JVM 可能只能激活一个 SparkContext 对象。在创新一个新的对象之前,必须调用 stop() 该方法停止活跃的 SparkContext。
sc.stop()
}
}
package apache.wiki
import java.io.File
import java.nio.charset.Charset
import com.google.common.io.Files
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
import org.apache.spark.util.{IntParam, LongAccumulator}
/**
* @author ${user.name}
* Copyright 2015 Sanford Ryza, Uri Laserson, Sean Owen and Joshua Wills
*
* See LICENSE file for further information.
*
* 参考地址
* GitHub: https://github.com/apachecn/RecommenderSystems
* https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
*/
object WordBlacklist {
@volatile private var instance: Broadcast[Seq[String]] = null
def getInstance(sc: SparkContext): Broadcast[Seq[String]] = {
if (instance == null) {
synchronized {
if (instance == null) {
val wordBlacklist = Seq("a", "b", "c")
instance = sc.broadcast(wordBlacklist)
}
}
}
return instance
}
}
/**
* Use this singleton to get or register an Accumulator.
*/
object DroppedWordsCounter {
@volatile private var instance: LongAccumulator = null
def getInstance(sc: SparkContext): LongAccumulator = {
if (instance == null) {
synchronized {
if (instance == null) {
instance = sc.longAccumulator("WordsInBlacklistCounter")
}
}
}
return instance
}
}
object CheckPointWordCount{
def createContext(ip: String, port: Int, outputPath: String, checkpointDirectory: String): StreamingContext = {
// 如果已存在CheckPoint,就不进入该方法
println("Creating new context")
// Streaming处理的结果存放位置
val outputFile = new File(outputPath.split(":")(1))
if (outputFile.exists()) outputFile.delete()
val conf = new SparkConf().setAppName("CheckPointWordCount")
// 默认本地模式运行
val isDebug = true
if (isDebug) {
conf.setMaster("local[2]")
}
// Create the context with a 1 second batch size
val ssc = new StreamingContext(conf, Seconds(10))
// checkpoint存放位置
ssc.checkpoint(checkpointDirectory)
// 创建一个将要连接到 hostname:port 的离散流,如 localhost:9999
val lines = ssc.socketTextStream(ip, port)
// 将每一行拆分成单词 val words = lines.flatMap(_.split(" "))
val words = lines.flatMap(_.split(" "))
val wordCounts = words.map((_, 1)).reduceByKey(_ + _)
wordCounts.foreachRDD { (rdd: RDD[(String, Int)], time: Time) =>
// Get or register the blacklist Broadcast
val blacklist = WordBlacklist.getInstance(rdd.sparkContext)
// Get or register the droppedWordsCounter Accumulator
val droppedWordsCounter = DroppedWordsCounter.getInstance(rdd.sparkContext)
// Use blacklist to drop words and use droppedWordsCounter to count them
/*
* 累加器进行累加操作,blacklist.value的出现总次数
*/
val counts = rdd.filter { case (word, count) =>
printf("blacklist.value=%s, word=%s, count=%d\n", blacklist.value, word, count)
if (blacklist.value.contains(word)) {
droppedWordsCounter.add(count)
println("return false")
false
} else {
println("return true")
true
}
}.collect().mkString("[", ", ", "]")
val output = "Counts at time " + time + " " + counts
println(output)
println("Dropped " + droppedWordsCounter.value + " word(s) totally")
println("Appending to " + outputFile.getAbsolutePath)
Files.append(output + "\n", outputFile, Charset.defaultCharset())
}
return ssc
}
def main(args: Array[String]): Unit = {
val base = if (args.length > 0) args(0) else "file:/opt/git/RecommenderSystems/"
// 设置CheckPoint
val (ip, port, outputPath, checkpointDir) = ("localhost", 9999, base + "output/out", base + "output/checkpointDir")
val ssc = StreamingContext.getOrCreate(checkpointDir, () => createContext(ip, port, outputPath, checkpointDir))
ssc.start() // 启动计算
ssc.awaitTermination() // 等待计算的终止
}
}
\ No newline at end of file
package apache.wiki
import org.apache.spark._
import org.apache.spark.streaming._
/**
* @author ${user.name}
* Copyright 2015 Sanford Ryza, Uri Laserson, Sean Owen and Joshua Wills
*
* See LICENSE file for further information.
*
* 参考地址
* 推荐系统: http://www.kuqin.com/shuoit/20151202/349305.html
* ALS说明: http://www.csdn.net/article/2015-05-07/2824641
*/
object StreamingWordCount{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("StreamingWordCount")
val ssc = new StreamingContext(conf, Seconds(1))
// 创建一个将要连接到 hostname:port 的离散流,如 localhost:9999
val lines = ssc.socketTextStream("localhost", 9999)
// 将每一行拆分成单词 val words = lines.flatMap(_.split(" "))
val words = lines.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
// 在控制台打印出在这个离散流(DStream)中生成的每个 RDD 的前十个元素
// 注意 : 必需要触发 action(很多初学者会忘记触发action操作,导致报错:No output operations registered, so nothing to execute)
wordCounts.print()
ssc.start() // 启动计算
ssc.awaitTermination() // 等待计算的终止
}
}
\ No newline at end of file
package apache.wiki
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
/**
* @author ${user.name}
*/
object WordCount {
def main(args: Array[String]) {
// 初始化 SparkContext对象,通过SparkConf指定配置的内容
val conf = new SparkConf().setMaster("local").setAppName("My app") //.set("spark.executor.memory", "2g")
val sc = new SparkContext(conf)
// // 检验输入参数
// if (args.length < 1) {
// println("USAGE:")
// println("spark-submit ... xxx.jar Date_String [Iteration]")
// println("spark-submit ... xxx.jar 20160424 10")
// sys.exit()
// }
val lines = sc.textFile("file:/opt/git/RecommenderSystems/README.md")
lines.flatMap(_.split(" "))
.map((_, 1))
.reduceByKey(_+_)
.map(x => (x._2, x._1))
.sortByKey(false)
.map(x => (x._2, x._1))
.saveAsTextFile("file:/opt/git/RecommenderSystems/output/result.log")
// println("this system exit ok!!!")
// 每一个 JVM 可能只能激活一个 SparkContext 对象。在创新一个新的对象之前,必须调用 stop() 该方法停止活跃的 SparkContext。
sc.stop()
}
}
package com.apachecn.recommand.colfliter
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by ych on 2018/9/20.
*/
object Features {
class Features {
/**
* 将Rating 为 id features 形式的DataFrame
*/
def changeRating2DF(sc: SparkContext,
ratingsPath: String,
size: Int,
OutPath: String
): Unit ={
val sqlContext = new SQLContext(sc)
val ratingsRdd = sc.textFile(ratingsPath)
.map(_.split("::"))
.map(x=>(x(0),Array((x(1).toInt,x(2).toDouble))))
.reduceByKey(_ ++ _)
.map(x=>(x._1,Vectors.sparse(size,x._2)))
val df = sqlContext.createDataFrame(ratingsRdd).toDF("id", "features")
.write.parquet(OutPath)
}
/**
* 将输入的打分值,转为libsvm
* 例如: 输入为
* 1::661::3::978302109
* 1::914::3::978301968
* 转化之后结果为
* 1 661:3 914:3
*/
def changeRatings2LibSVM(sc: SparkContext,
ratingsPath: String,
ratingsLibSVMPath: String): Unit ={
val ratingsRdd = sc.textFile(ratingsPath)
.map(_.split("::"))
.map(x=>(x(0),Array((x(1).toInt,x(2).toInt))))
.reduceByKey(_ ++ _)
.map(x=>(x._1+" " + x._2.sortBy(_._1).map(x=>(f"${x._1}:${x._2}")).mkString(" ")))
.saveAsTextFile(ratingsLibSVMPath)
}
}
object Features{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("Features Prepare").setMaster("local")
val sc = new SparkContext(conf)
val ratingsPath = "C:\\dtworkspace\\recommand\\data\\ratings"
val ratingsLibSVMPath = "C:\\dtworkspace\\recommand\\data\\ratingslibsvm"
/**
* 将输入的打分值,转为稀疏矩阵
* 例如: 输入为
* 1::661::3::978302109
* 1::914::3::978301968
* 转化之后结果为
* 1 661:3 914:3
*/
def changeRatings2LibSVM(): Unit ={
val ratingsRdd = sc.textFile(ratingsPath)
.map(_.split("::"))
.map(x=>(x(0),Array((x(1).toInt,x(2).toInt))))
.reduceByKey(_ ++ _)
.map(x=>(x._1+" " + x._2.sortBy(_._1).map(x=>(f"${x._1}:${x._2}")).mkString(" ")))
.coalesce(1).saveAsTextFile(ratingsLibSVMPath)
}
* 提交命令 spark-submit --master yarn-cluster <ratingsPath> <libSVMOutPath> <dfOutPath> <featureSize>
*/
val conf = new SparkConf().setAppName("Features Prepare")
val sc = new SparkContext(conf)
val ratingsPath = args(0)
val libSVMOutPath = args(1)
val dfOutPath = args(2)
val featureSize = args(3).toInt
}
val testF = new Features
testF.changeRatings2LibSVM(sc, ratingsPath, libSVMOutPath)
testF.changeRating2DF(sc, ratingsPath, featureSize, dfOutPath)
}
}
package com.apachecn.recommand.colfliter
import org.apache.spark.SparkContext
import breeze.linalg.SparseVector
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.linalg.{DenseMatrix, SparseMatrix, Vectors}
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry, RowMatrix}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import scala.collection.BitSet
......@@ -10,12 +14,40 @@ import scala.collection.BitSet
* 基于物品的协同过滤
*/
class ItemCF {
/**
* 使用BitSet 计算jaccard 距离
*/
def computeJaccardSim(sc: SparkContext,
pathIn: String): RDD[(String,Double)] ={
val rdd = sc.textFile(pathIn)
def computeJaccardSimWithDF(sc: SparkContext,
featurePath: String
): CoordinateMatrix ={
val sqlContext = new SQLContext(sc)
val rdd = sqlContext.read.parquet(featurePath).select("features")
.rdd.map(x=>x(0).asInstanceOf[org.apache.spark.mllib.linalg.SparseVector])
.map(x=>(x.indices))
.zipWithIndex()
.map(x=>{
for (i <- x._1) yield {
(x._2, i)
}
})
.flatMap(x=>x)
.map(x=>(x._1,BitSet(x._2.toString.toInt)))
.reduceByKey(_.union(_))
val entries = rdd.cartesian(rdd).map {
case ((key0, set0), (key1, set1)) => {
val j = (set0 & (set1)).size
val q = set0.union(set1).size
val re = j.toDouble / q
MatrixEntry(key0.toInt, key1.toInt, re)
}
}
val simMat: CoordinateMatrix = new CoordinateMatrix(entries)
simMat
}
def computeJaccardSimWithLibSVM(sc: SparkContext,
featurePath: String): CoordinateMatrix ={
val rdd = sc.textFile(featurePath)
.map(_.split(" ", 2)(1))
.zipWithIndex()
.map(x => (x._2.toInt,x._1.split(" ", -1)))
......@@ -24,25 +56,133 @@ class ItemCF {
(i.split("\\:")(0), x._1)
}
}).flatMap(x=>x)
.map(x=>(x._1,BitSet(x._2.toString.toInt))).reduceByKey(_.union(_))
.map(x=>(x._1,BitSet(x._2.toString.toInt)))
.reduceByKey(_.union(_))
val re = rdd.cartesian(rdd).map {
val entries = rdd.cartesian(rdd).map {
case((key0,set0),(key1,set1))=>{
val key=key0+"|"+key1
val j = (set0 &(set1)).size
val q = set0.union(set1).size
val re = j.toDouble/q
(key, re)
MatrixEntry(key0.toInt,key1.toInt,re)
}
}
re
val simMat: CoordinateMatrix = new CoordinateMatrix(entries)
simMat
}
def computeCosSimWithDF(sc: SparkContext,
featurePath: String): CoordinateMatrix ={
val sqlContext = new SQLContext(sc)
val rdd = sqlContext.read.parquet(featurePath).select("features")
.rdd.map(x=>x(0).asInstanceOf[org.apache.spark.mllib.linalg.Vector])
val mat = new RowMatrix(rdd)
val simMat = mat.columnSimilarities()
simMat
}
/**
*
* @param sc
* @param featureSize 特征数量
* @param featurePath
*/
def computeCosSimWithLibSVM(sc: SparkContext,
featureSize: Int,
featurePath: String): CoordinateMatrix ={
val rows = sc.textFile(featurePath).map(_.split(" "))
.map(x=>(x.filter(g=>g.contains(":"))))
.map(x=>(x.map(_.split(":")).map(ar => (ar(0).toInt,ar(1).toDouble))))
.map(x=>(Vectors.sparse(featureSize,x)))
val mat = new RowMatrix(rows)
val simMat = mat.columnSimilarities()
simMat
}
def loadSimMatrix(sc: SparkContext,
simPath: String,
featruesSize: Int
): SparseMatrix ={
val entries = sc.textFile(simPath)
.map(_.split("\\|", -1))
.map(x=>(x(0).toInt, x(1).toInt, x(2).toDouble))
.collect()
val simMatrix = SparseMatrix.fromCOO(featruesSize, featruesSize, entries)
simMatrix
}
/**
* 将相似矩阵存储为文本文件
* @param sc
* @param savePath
* @param mat
*/
def saveSimMatrix(sc: SparkContext,
savePath: String,
mat: CoordinateMatrix): Unit ={
val sim = mat
sim.entries.map(x=>x.i+"|"+x.j+"|"+x.value).coalesce(1).saveAsTextFile(savePath)
}
def predictByMatrix(sc: SparkContext,
simMatrix: breeze.linalg.Matrix[Double],
featuresSize: Int,
featurePath: String,
resultPath: String
): Unit ={
val rdd = sc.textFile(featurePath)
.map(_.split(" "))
.map(x=>(x.filter(g=>g.contains(":"))))
.map(x=>(x.map(_.split(":")).map(ar => (ar(0).toInt,ar(1).toDouble))))
.map(x=>{
val idx = x.map(_._1)
val v = x.map(_._2)
val vec: SparseVector[Double] = new SparseVector(idx, v, featuresSize)
vec
})
// .map(x=>(x.toDenseVector.toDenseMatrix.dot(simMatrix)))
}
}
object ItemCF{
object ItemCF extends ItemCF{
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("utils").setMaster("local[8]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val libsvmFeaturePath = "C:\\workspace\\data\\apacheCN\\libsvmOut"
val dfFeaturePath = "C:\\workspace\\data\\apacheCN\\dfOut"
val simPath = "C:\\workspace\\data\\apacheCN\\simPath"
// val JaccardSimPath = "..//data//jaccardSim"
val CosSimPath = "..//data//cosSim"
val featureSize = 3953
//
// val sim1 = computeJaccardSimWithLibSVM(sc,libsvmFeaturePath)
// sim1.entries.take(10)
val sim2 = computeCosSimWithLibSVM(sc,featureSize,libsvmFeaturePath)
// sim2.entries.take(10).foreach(println)
// saveSimMatrix(sc,simPath,sim2)
val sim3 = computeJaccardSimWithDF(sc,dfFeaturePath)
sim3.entries.take(10)
val sim4 = computeCosSimWithDF(sc,dfFeaturePath)
sim4.entries.take(10)
// computeItemJaccardSim(sc,featurePath, JaccardSimPath)
// computeItemCosSim(sc,100,featurePath, CosSimPath)
val simMatrix = loadSimMatrix(sc, simPath, featureSize)
// val score = predict()
}
}
......@@ -10,11 +10,6 @@ object sparkUtils {
val conf = new SparkConf().setAppName("utils").setMaster("local")
val sc = new SparkContext(conf)
def selectData(): Unit ={
val in = "C:\\dtworkspace\\recommand\\data\\ratings"
val rdd =sc.textFile(in).map(x=>(x,x.split("::")(1).toInt)).filter(x=>(x._2 < 1000)).map(_._1).coalesce(1).saveAsTextFile(in+"out")
}
}
}
/*
* Copyright 2001-2009 Artima, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package samples
/*
ScalaTest facilitates different styles of testing by providing traits you can mix
together to get the behavior and syntax you prefer. A few examples are
included here. For more information, visit:
http://www.scalatest.org/
One way to use ScalaTest is to help make JUnit or TestNG tests more
clear and concise. Here's an example:
*/
import scala.collection.mutable.Stack
import org.scalatest.Assertions
import org.junit.Test
class StackSuite extends Assertions {
@Test def stackShouldPopValuesIinLastInFirstOutOrder() {
val stack = new Stack[Int]
stack.push(1)
stack.push(2)
assert(stack.pop() === 2)
assert(stack.pop() === 1)
}
@Test def stackShouldThrowNoSuchElementExceptionIfAnEmptyStackIsPopped() {
val emptyStack = new Stack[String]
intercept[NoSuchElementException] {
emptyStack.pop()
}
}
}
/*
Here's an example of a FunSuite with ShouldMatchers mixed in:
*/
import org.scalatest.FunSuite
import org.scalatest.matchers.ShouldMatchers
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
@RunWith(classOf[JUnitRunner])
class ListSuite extends FunSuite with ShouldMatchers {
test("An empty list should be empty") {
List() should be ('empty)
Nil should be ('empty)
}
test("A non-empty list should not be empty") {
List(1, 2, 3) should not be ('empty)
List("fee", "fie", "foe", "fum") should not be ('empty)
}
test("A list's length should equal the number of elements it contains") {
List() should have length (0)
List(1, 2) should have length (2)
List("fee", "fie", "foe", "fum") should have length (4)
}
}
/*
ScalaTest also supports the behavior-driven development style, in which you
combine tests with text that specifies the behavior being tested. Here's
an example whose text output when run looks like:
A Map
- should only contain keys and values that were added to it
- should report its size as the number of key/value pairs it contains
*/
import org.scalatest.FunSpec
import scala.collection.mutable.Stack
class ExampleSpec extends FunSpec {
describe("A Stack") {
it("should pop values in last-in-first-out order") {
val stack = new Stack[Int]
stack.push(1)
stack.push(2)
assert(stack.pop() === 2)
assert(stack.pop() === 1)
}
it("should throw NoSuchElementException if an empty stack is popped") {
val emptyStack = new Stack[Int]
intercept[NoSuchElementException] {
emptyStack.pop()
}
}
}
}
package samples
import org.junit.runner.RunWith
import org.specs2.mutable._
import org.specs2.runner._
/**
* Sample specification.
*
* This specification can be executed with: scala -cp <your classpath=""> ${package}.SpecsTest
* Or using maven: mvn test
*
* For more information on how to write or run specifications, please visit:
* http://etorreborre.github.com/specs2/guide/org.specs2.guide.Runners.html
*
*/
@RunWith(classOf[JUnitRunner])
class MySpecTest extends Specification {
"The 'Hello world' string" should {
"contain 11 characters" in {
"Hello world" must have size(11)
}
"start with 'Hello'" in {
"Hello world" must startWith("Hello")
}
"end with 'world'" in {
"Hello world" must endWith("world")
}
}
}
......@@ -134,7 +134,7 @@ class ItemCF(object):
if __name__ == '__main__':
ratingfile = "C://workspace//data//ml-1m//ml-1m//ratings.dat"
ratingfile = "../../data/ratings"
item_cf = ItemCF()
item_cf.generate_dataset(ratingfile)
item_cf.calc_movie_sim()
......
{"duration": 0.02300095558166504, "input_args": {"filename": "'C:/dtworkspace/RecommenderSystems/data/ratingslibsvm'"}}
\ No newline at end of file
# first line: 24
@mem.cache
def get_data(filename):
data = load_svmlight_file(filename)
return data[0], data[1]
......@@ -11,7 +11,6 @@ from sklearn.externals.joblib import Memory
from sklearn.datasets import load_svmlight_file
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
import numpy as np
np.set_printoptions(suppress=True)
......@@ -29,7 +28,7 @@ def get_data(filename):
# 计算jaccard 相似度
def get_jaccard_similarity(X):
n = X.shape[1]
n = X.T.shape[1]
similarity = np.zeros([n, n])
for i in range(n):
v1 = X.T[i].toarray()
......@@ -47,8 +46,12 @@ def get_consine_similarity(X):
return similarity
filename = "C:/dtworkspace/recommand/data/ratingslibsvm"
filename = "../../data/ratingslibsvm"
X, y = get_data(filename)
consine_sim = get_consine_similarity(X)
print(consine_sim)
jaccard_sim = get_jaccard_similarity(X)
print(jaccard_sim)
......
......@@ -170,7 +170,7 @@ class UserCF(object):
if __name__ == '__main__':
ratingfile = "C://workspace//data//ml-1m//ml-1m//ratings.dat"
ratingfile = "../../data/ratings"
usercf = UserCF()
usercf.generate_dataset(ratingfile)
usercf.calc_user_sim()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册