提交 64ac5924 编写于 作者: T Tom Qian

refresh framework and offer test data

上级 8d32e70f
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/jfinal-2.2-all/jfinal-2.2-lib/log4j/log4j-1.2.16.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/jfinal-2.2-all/jfinal-2.2-lib/jackson/jackson-core-2.5.3.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/jfinal-2.2-all/jfinal-2.2-lib/c3p0/c3p0-0.9.5.1.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/数据库驱动/mysql-connector-java-5.1.40-bin.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/分词/ansj/ansj_seg-5.0.3.jar" sourcepath="/Users/hanbo/Desktop/边城/开源好工具/分词/ansj/ansj_seg-5.0.3-sources.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/分词/ansj/tree_split-1.4.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/分词/ansj/nlp-lang-1.7.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/日志类工具/slf4j-api-1.7.10.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-flink_2.10-0.12.2.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-h2o_2.10-0.12.2-dependency-reduced.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-h2o_2.10-0.12.2.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-math-0.12.2.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-mr-0.12.2.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-mr-0.12.2-job.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/quartz-2.2.2/lib/quartz-2.2.2.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/开源好工具/数据库驱动/postgresql-9.4.1207.jre6.jar"/>
<classpathentry kind="lib" path="/Users/hanbo/Desktop/边城/apache-mahout-0.12.2/mahout-integration-0.12.2.jar"/>
<classpathentry kind="output" path="bin"/>
<classpathentry including="**/*.java" kind="src" output="target/classes" path="src">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
......@@ -10,8 +10,14 @@
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/log4j.properties=UTF-8
encoding//res/log4j.properties=UTF-8
......@@ -8,4 +8,5 @@ org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.8
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
# 个性化新闻推荐系统 NewsRecommendSystem
[toc]
# 个性化新闻推荐系统--TomRecommenderSystem
## 说明
本推荐系统需要基于【新闻模块】使用,此处对于【新闻模块】的定义是:**有规律地进行新闻采集,并通过公共平台对用户进行新闻展示与推送的应用。**
本推荐系统使用的推荐算法包括协同过滤(Collaborative Filtering)、基于内容相似度的推荐(Content-based Recommendation)与热点新闻推荐(Hot News Recommendation):
- 协同过滤的实现依托于Mahout的提供库;
- 基于内容的相似度推荐在原始算法上基于相关论文做了自主的改进;
- 热点新闻推荐顾名思义是取最近被最多用户浏览过的新闻进行推荐。
**推荐算法的具体细节可参考文件[推荐系统介绍.pdf]**
**主要使用的库(Lib): **
- [Ansj](https://github.com/NLPchina/ansj_seg):基于内容的推荐部分用以分词,以及其内含的TFIDF算法。
- [Quartz](http://www.quartz-scheduler.org/):推荐系统定时运行的设定。
- [Mahout](http://mahout.apache.org/):使用内置的协同过滤算法。
- [Jfinal](http://www.jfinal.com/):使用内置的ActiveRecord与Db工具,对推荐系统中的数据库表做了实体类映射,以简化数据库相关操作。
## 使用
### 预备工作
#### 数据库配合
**本推荐系统目前只支持与MYSQL数据库进行交互**
本系统需要与五个表进行交互:用户表(users),新闻表(news),新闻模块表(newsmodules),浏览记录表(newslogs),推荐结果表(Recommendations)。
- 用户表users
存储用户基本信息的表。要求至少拥有两个字段:用户id(id:bigint),用户喜好关键词列表(pref_list:json),用户最近登录时间(latest_log_time:timestamp)。
|字段名|类型|非空|主键|外键|自增|默认值|
|--|--|--|--|--|--|--|
|id|bigint|yes|yes||yes||
|pref_list|text|yes||||{}|
|latest_log_time|timestamp|yes|||||
- 新闻表news
存储新闻基本信息的表。要求至少拥有三个字段:新闻id(id:bigint),新闻文本内容(content:text),所属模块(module_id)。
|字段名|类型|非空|主键|外键|自增|默认值|
|--|--|--|--|--|--|--|
|id|bigint|yes|yes||yes||
|title|text|yes|||||
|content|text|yes|||||
|module_id|int|yes||yes|||
- 新闻模块表newsmodules
存储新闻模块信息的表。要求至少拥有两个字段:模块id(id:int),模块名称(name:text),抓取时间/新闻日期(news_time:timestamp)。
|字段名|类型|非空|主键|外键|自增|默认值|
|--|--|--|--|--|--|--|
|id|int|yes|yes||yes||
|name|text|yes|||||
|news_time|timestamp|yes|||||
- 浏览记录表newslogs
存储用户浏览新闻记录的表。要求至少拥有三个字段:记录id(id:bigint),用户id(user_id:bigint),新闻id(news_id:bigint),浏览时间(view_time:timestamp),用户对新闻的偏好程度(prefer_degree[0:仅仅浏览,1:评论,2:收藏])。
|字段名|类型|非空|主键|外键|自增|默认值|
|--|--|--|--|--|--|--|
|id|bigint|yes|yes||yes||
|user_id|bigint|yes||yes|||
|news_id|bigint|yes||yes|||
|view_time|timestamp|yes|||||
|prefer_degree|int|yes|||||
- 推荐结果表Recommendations
存储推荐系统为用户生成的推荐结果及用户反馈的表。要求至少拥有五个字段:推荐结果id(id:bigint),用户id(user_id:bigint),新闻id(news_id:bigint),推荐结果生成时间戳(derive_time:timestamp),用户反馈(feedback:bit[0:用户未浏览,1:用户进行了浏览]),结果生成的对应推荐算法(derive_algorithm:int[0:协同过滤,1:基于内容的推荐,2:热点新闻推荐])
|字段名|类型|非空|主键|外键|自增|默认值|
|--|--|--|--|--|--|--|
|id|bigint|yes|yes||yes||
|user_id|bigint|yes||yes|||
|news_id|bigint|yes||yes|||
|derive_time|timestamp|yes|||||
|feedback|bit|||||0|
|derive_algorithm|int|yes|||||
#### 数据库配置
在项目根目录下的res目录下,修改dbconfig.properties文件中有关数据库的配置:
```
url = jdbc:mysql://[数据库ip]/[数据库名]?useUnicode=true&characterEncoding=utf8
user = [登录用户名]
password = [登录密码]
```
**注意,数据库的编码设置应为UTF8。**
### 系统启动-Quick Start
四个步骤:
1.在com.qianxinyao.TomNewsRecommender包下,找到类Main;
2.选择推荐算法。设置boolean类型的enableCB,enableCF,enableHR变量,分别代表推荐过程中是否启用协同过滤推荐算法、基于内容的推荐算法、基于热点新闻的推荐算法。若均设为true,表示三种算法均工作,一起为用户生成推荐结果;
3.选择推荐对象。推荐对象分为三种:全体用户,活跃用户(最近一段时间有登录行为)与自定义用户(自己指定的用户),若选择自定义用户,需要构建包含目标用户id(long)的List<Long>
4.选择系统运行方式。运行方式分为两种:一次运行和定时运行。一次运行即只为用户进行一次推荐生成,生成结束后则系统停止,若要再生成推荐,需要重新启动系统。而定时运行则可以定时为用户生成推荐结果,若不强制停止系统,则系统会一直运行下去。(定时运行时间在paraConfig.properties文件中设定)
以下是示例代码:
```
package com.qianxinyao.TomNewsRecommender;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
/**
* @author qianxinyao
* @email tomqianmaple@gmail.com
* @github https://github.com/bluemapleman
* @date 20161020
* 推荐系统入口类,在此启动推荐系统。
*/
public class Main
{
public static final Logger logger = Logger.getLogger(Main.class);
/**
* 推荐系统运行入口
* @param args
*/
public static void main(String[] args)
{
//选择要在推荐系统中运行的推荐算法
boolean enableCF=true,enableCB=false,enableHR=false;
List<Long> userList=new ArrayList<Long>();
userList.add(1l);
userList.add(2l);
userList.add(3l);
//为指定用户执行一次推荐
new JobSetter(enableCF,enableCB,enableHR).executeInstantJobForCertainUsers(userList);
//定时执行推荐
// new JobSetter(enableCF,enableCB,enableHR).executeQuartzJob(forActiveUsers);
}
}
```
### 日常使用
系统运行的各类参数都可以在根目录下src/main/res目录下的paraConfig.properties文件中进行配置。默认配置是推荐配置。
若要保持系统的日常运行,还需要【新闻模块】每天抓取一定量的新闻并入库。
## 测试数据
在Mysql数据库中运行data.sql中的sql语句,可生成数据库结构与测试数据。
测试数据中包含以下几个部分:
- users表:7个测试用户
- news表:307个2017-12-12日从网易首页抓取的测试新闻
- newsmodules表:17个测试模块
- newslogs:测试推荐算法效果用的9条浏览记录
按照上一节中提供的示例代码运行的话:
- 若对测试数据进行一次协同过滤,将生成0条推荐。
- 若对测试数据进行一次基于内容的推荐,将为用户1(id=1)推荐85,87,89,104这四条新闻(有重复标题的新闻,新闻标题中的“合同”关键词匹配上了用户的喜好关键词),为用户2推荐89新闻(重复标题的新闻),推荐用户3推荐87,85,100这三条新闻(新闻标题中的“合同”关键词匹配上了用户的喜好关键词)。
- 若对测试数据进行一次基于热点新闻的推荐,将为每个用户生成20条推荐结果。
## 额外说明
1.com.qianxinyao.TomNewsRecommender下的NewsScraper类是抓取网易的测试新闻时用的类,大家也可以用这个类继续采集新闻。该类默认对网易新闻首页的所有新闻进行一次抓取入库。
2. 协同过滤的效果目前不太稳定/可控,因为采用的是Mahout内置的协同过滤工具。一般来说,新闻模块的活跃用户越多,则协同过滤效果越好,也越明显。若有需求,我会在后期自己实现能稳定生成指定数量的推荐结果的协同过滤算法。
3. 一般当协同过滤与基于内容的推荐算法生成的推荐数目不足时,可以用基于热点新闻的推荐进行数量补充。
推荐算法包括协同过滤、基于内容相似度的推荐,热点新闻推荐。
A recommend system involved **collaborative filtering**,**content-based recommendation** and **hot news recommendation**,
***Lib: [Ansj](https://github.com/NLPchina/ansj_seg), [Quartz](http://www.quartz-scheduler.org/), [Mahout](http://mahout.apache.org/)***
**系统相关细节可参考[推荐系统介绍.pdf]**
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="EclipseModuleManager">
<libelement value="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-3.14-20160307.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/log4j/log4j-1.2.16.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-ooxml-3.14-20160307.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/xmlbeans-2.6.0.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-excelant-3.14-20160307.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-ooxml-schemas-3.14-20160307.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/jackson/jackson-core-2.5.3.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-flink_2.10-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-h2o_2.10-0.12.2-dependency-reduced.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-h2o_2.10-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-hdfs-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-integration-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-math-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-math-scala_2.10-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-mr-0.12.2.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/c3p0/c3p0-0.9.5.1.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/c3p0/mchange-commons-java-0.2.10.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../开源好工具/数据库驱动/mysql-connector-java-5.1.40-bin.jar!/" />
<libelement value="jar://$MODULE_DIR$/../../开源好工具/分词/ansj_seg-2.0.8-min.jar!/" />
<src_description expected_position="0">
<src_folder value="file://$MODULE_DIR$/src" expected_position="0" />
</src_description>
</component>
<component name="NewModuleRootManager" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/bin" />
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="jdk" jdkName="JavaSE-1.8" jdkType="JavaSDK" />
<orderEntry type="module-library">
<library name="poi-3.14-20160307.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-3.14-20160307.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-3.14-20160307.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="log4j-1.2.16.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/log4j/log4j-1.2.16.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="poi-ooxml-3.14-20160307.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-ooxml-3.14-20160307.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="xmlbeans-2.6.0.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/xmlbeans-2.6.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="poi-excelant-3.14-20160307.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-excelant-3.14-20160307.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="poi-ooxml-schemas-3.14-20160307.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/poi-3.14/poi-ooxml-schemas-3.14-20160307.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="jackson-core-2.5.3.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/jackson/jackson-core-2.5.3.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-flink_2.10-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-flink_2.10-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-h2o_2.10-0.12.2-dependency-reduced.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-h2o_2.10-0.12.2-dependency-reduced.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-h2o_2.10-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-h2o_2.10-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-hdfs-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-hdfs-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-integration-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-integration-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-math-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-math-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-math-scala_2.10-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-math-scala_2.10-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mahout-mr-0.12.2.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../apache-mahout-0.12.2/mahout-mr-0.12.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="c3p0-0.9.5.1.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/c3p0/c3p0-0.9.5.1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mchange-commons-java-0.2.10.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../jfinal-2.2-all/jfinal-2.2-lib/c3p0/mchange-commons-java-0.2.10.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="mysql-connector-java-5.1.40-bin.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/数据库驱动/mysql-connector-java-5.1.40-bin.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ansj_seg-2.0.8-min.jar">
<CLASSES>
<root url="jar://$MODULE_DIR$/../../开源好工具/分词/ansj_seg-2.0.8-min.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
</component>
</module>
\ No newline at end of file
###设置###
log4j.rootLogger = stdout,D,E
### 输出信息到控制台###
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target = System.out
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
###输出DEBUG级别日志到/Users/hanbo/Desktop/debug.log ###
log4j.appender.D = org.apache.log4j.DailyRollingFileAppender
log4j.appender.D.File = /Users/hanbo/Desktop/debug.log
log4j.appender.D.Append = true
log4j.appender.D.Threshold = DEBUG
log4j.appender.D.layout = org.apache.log4j.PatternLayout
log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
###输出ERROR级别日志到/Users/hanbo/Desktop/error.log###
log4j.appender.E = org.apache.log4j.DailyRollingFileAppender
log4j.appender.E.File =/Users/hanbo/Desktop/error.log
log4j.appender.E.Append = true
log4j.appender.E.Threshold = ERROR
log4j.appender.E.layout = org.apache.log4j.PatternLayout
log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
\ No newline at end of file
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>top.tomqian</groupId>
<artifactId>recommender-system</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<resources>
<resource>
<directory>src</directory>
<excludes>
<exclude>**/*.java</exclude>
</excludes>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.jfinal/jfinal -->
<dependency>
<groupId>com.jfinal</groupId>
<artifactId>jfinal</artifactId>
<version>3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.ansj/ansj_seg -->
<dependency>
<groupId>org.ansj</groupId>
<artifactId>ansj_seg</artifactId>
<version>5.0.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.mchange/c3p0 -->
<dependency>
<groupId>com.mchange</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.5.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.5.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/log4j/log4j -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.40</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.quartz-scheduler/quartz -->
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
<version>2.2.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.mahout/mahout-core -->
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.9</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.mahout/mahout-integration -->
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-integration</artifactId>
<version>0.12.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>c3p0</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.1.1</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
#url=jdbc:mysql://121.42.36.199/recommand_system
#user=
#password=
#dbname=recommand_system
#ip=
url = jdbc:mysql://121.42.36.199/recommender-system?useUnicode=true&characterEncoding=utf8
user = root
password = qxysay1+1=2
\ No newline at end of file
####设置###
#log4j.rootLogger = D,E
#
#### 输出信息到控制台###
#log4j.appender.stdout = org.apache.log4j.ConsoleAppender
#log4j.appender.stdout.Target = logs/info.log
#log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
#log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
#
####输出DEBUG级别日志到/res/logs ###
#log4j.appender.D = org.apache.log4j.DailyRollingFileAppender
#log4j.appender.D.File = logs/debug.log
#log4j.appender.D.Append = true
#log4j.appender.D.Threshold = DEBUG
#log4j.appender.D.layout = org.apache.log4j.PatternLayout
#log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
#
####输出ERROR级别日志到/res/logs/error.log###
#log4j.appender.E = org.apache.log4j.DailyRollingFileAppender
#log4j.appender.E.File =logs/error.log
#log4j.appender.E.Append = true
#log4j.appender.E.Threshold = ERROR
#log4j.appender.E.layout = org.apache.log4j.PatternLayout
#log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
\ No newline at end of file
#---Recommend System Parameter---
#Recommend Executing Moment(Quartz's CronExpression)
#Recommendation Executing Moment (Please refer to [Quartz's CronExpression])
startAt=0 0 0 ? * *
#---Specific Algorithm Parameter---
#-RecommendKit-
#Valid specific day the news published after which are still valuable
beforeDays=-30
#Valid specific day after which those who has browsed news can be regarded "active"
activeDay=-30
#-Collaborative Filtering Recommendation-
#Recommend Num from CF(Collaborative Filtering)
CFRecNum=5
#Valid specific day after which users' browsing history would be calculated
CFValidDay=-30
#-Content-Based Recommendation-
#Recommend Num from CB(Content-Based Recommend)
CBRecNum=5
......@@ -25,5 +31,6 @@ previousDays=-30
#TF-IDF's extracting numbers of keywords from every news
TFIDFKeywordsNum=10
#-Hot Recommendation-
无法预览此类型文件
###设置###
log4j.rootLogger = stdout,D,E
### 输出信息到控制台###
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target = System.out
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
###输出DEBUG级别日志到/Users/hanbo/Desktop/debug.log ###
log4j.appender.D = org.apache.log4j.DailyRollingFileAppender
log4j.appender.D.File = /Users/hanbo/Desktop/debug.log
log4j.appender.D.Append = true
log4j.appender.D.Threshold = DEBUG
log4j.appender.D.layout = org.apache.log4j.PatternLayout
log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
###输出ERROR级别日志到/Users/hanbo/Desktop/error.log###
log4j.appender.E = org.apache.log4j.DailyRollingFileAppender
log4j.appender.E.File =/Users/hanbo/Desktop/error.log
log4j.appender.E.Append = true
log4j.appender.E.Threshold = ERROR
log4j.appender.E.layout = org.apache.log4j.PatternLayout
log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
\ No newline at end of file
无法预览此类型文件
package top.qianxinyao.Main;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.quartz.SchedulerException;
import top.qianxinyao.UserBasedCollaborativeRecommender.MahoutUserBasedCollaborativeRecommender;
import top.qianxinyao.UserBasedCollaborativeRecommender.quartz.CFCronTriggerRunner;
import top.qianxinyao.algorithms.PropGetKit;
import top.qianxinyao.algorithms.RecommendKit;
import top.qianxinyao.contentbasedrecommend.ContentBasedRecommender;
import top.qianxinyao.contentbasedrecommend.quartz.CBCronTriggerRunner;
import top.qianxinyao.hotrecommend.HotRecommender;
import top.qianxinyao.hotrecommend.quartz.HRCronTriggerRunner;
import top.qianxinyao.model.Users;
/**
* @author Tom Qian
* @email tomqianmaple@outlook.com
* @github https://github.com/bluemapleman
* @date 2017年12月11日
* 使用Quartz库设定推荐系统每天固定的工作时间(默认为每天0点开始工作)
* 当启用该类时,推荐系统可以保持运行,直到被强制关闭。
*/
public class JobSetter
{
public static final Logger logger=Logger.getLogger(JobSetter.class);
/**
* 使用Quartz的表达式进行时间设定,详情请参照:http://www.quartz-scheduler.org/api/2.2.1/index.html(CronExpression)
* 参数forActiveUsers表示是否只针对活跃用户进行新闻推荐,true为是,false为否。
* @param forActiveUsers
*/
public void executeQuartzJob(boolean forActiveUsers) {
//加载系统配置文件
PropGetKit.loadProperties("paraConfig");
List<Users> userList=forActiveUsers?RecommendKit.getActiveUsers():RecommendKit.getAllUsers();
//设定推荐任务每天的执行时间
String cronExpression=PropGetKit.getString("startAt");
try
{
new CFCronTriggerRunner().task(userList,cronExpression);
new CBCronTriggerRunner().task(userList,cronExpression);
new HRCronTriggerRunner().task(userList,cronExpression);
}
catch (SchedulerException e)
{
e.printStackTrace();
}
logger.info("本次推荐结束!");
}
/**
* 执行一次新闻推荐
* 参数forActiveUsers表示是否只针对活跃用户进行新闻推荐,true为是,false为否。
* @param forActiveUsers
*/
public void executeInstantJob(boolean forActiveUsers) {
//加载系统配置文件
PropGetKit.loadProperties("paraConfig");
List<Users> userList=forActiveUsers?RecommendKit.getActiveUsers():RecommendKit.getAllUsers();
List<Long> userIDList=new ArrayList<Long>();
for(Users user:userList)
userIDList.add(user.getId());
HotRecommender.formTodayTopHotNewsList();
new MahoutUserBasedCollaborativeRecommender().recommend(userIDList);
new ContentBasedRecommender().recommend(userIDList);
new HotRecommender().recommend(userIDList);
logger.info("本次推荐结束!");
}
}
......@@ -3,15 +3,12 @@
*/
package top.qianxinyao.Main;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
import top.qianxinyao.UserBasedCollaborativeRecommender.MahoutUserBasedCollaborativeRecommender;
import org.apache.log4j.Logger;
import top.qianxinyao.algorithms.PropGetKit;
import top.qianxinyao.algorithms.RecommendKit;
import top.qianxinyao.contentbasedrecommend.ContentBasedRecommender;
import top.qianxinyao.hotrecommend.HotRecommender;
import top.qianxinyao.dbconnection.DBKit;
/**
* @author qianxinyao
......@@ -21,7 +18,10 @@ import top.qianxinyao.hotrecommend.HotRecommender;
*/
public class Main
{
static ResultSet rs=null;
public static final Logger logger = Logger.getLogger(Main.class);
// static ResultSet rs=null;
/**
* @param args
......@@ -31,29 +31,11 @@ public class Main
{
//加载系统配置文件
PropGetKit.loadProperties("paraConfig");
//仅给最近一个月有活动的用户进行推荐动作
List<String> users=RecommendKit.getActiveUsers();
//设定推荐任务每天的执行时间
// String cronExpression=PropGetKit.getString("startAt");
// try
// {
// new CFCronTriggerRunner().task(users,cronExpression);
// new CBCronTriggerRunner().task(users,cronExpression);
// new HRCronTriggerRunner().task(users,cronExpression);
// }
// catch (SchedulerException e)
// {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
HotRecommender.formTodayTopHotNewsList();
// new MahoutUserBasedCollaborativeRecommender().recommend(users);
// new ContentBasedRecommender().recommend(users);
new HotRecommender().recommend(users);
//初始化操作:主要是数据库的连接
DBKit.initalize();
new JobSetter().executeInstantJob(true);
}
}
......
package top.qianxinyao.Main;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.sql.SQLException;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import top.qianxinyao.dbconnection.DBKit;
import top.qianxinyao.model.News;
import top.qianxinyao.model.Newsmodules;
public class TestDataProcessor{
public static final Logger logger=Logger.getLogger(TestDataProcessor.class);
/**
* 从新闻门户抓取一次新闻
* 目前使用的新闻门户是网易新闻
* @param args
* @throws IOException
* @throws SQLException
*/
public static void main(String[] args) throws IOException, SQLException
{
DBKit.initalize();
String url="http://www.163.com/";
Document docu1=Jsoup.connect(url).get();
Elements lis=docu1.getElementsByTag("li");
for(Element li: lis) {
if(li.getElementsByTag("a").size()==0)
continue;
else {
Element a=li.getElementsByTag("a").get(0);
String title=a.text();
//去除标题小于5个字的、非新闻的<li>标签
String regex=".{10,}";
Pattern pattern=Pattern.compile(regex);
Matcher match=pattern.matcher(title);
if(!match.find())
continue;
String newsUrl=a.attr("href");
//图集类忽略,Redirect表示广告类忽略
if(newsUrl.contains("photoview") || newsUrl.contains("Redirect") || newsUrl.contains("{"))
continue;
try
{
Document docu2=Jsoup.connect(newsUrl).get();
Elements eles=docu2.getElementsByClass("post_crumb");
//没有面包屑导航栏的忽略:不是正规新闻
if(eles.size()==0)
continue;
String moduleName=eles.get(0).getElementsByTag("a").get(1).text();
System.out.println(title+"("+moduleName+"):"+newsUrl);
News news=new News();
news.set("title",title).set("module_id", getModuleID(moduleName))
.set("url",newsUrl).set("news_time", new Date()).save();
}
catch (SocketTimeoutException e)
{
continue;
}
catch(Exception e) {
e.printStackTrace();
}
}
}
logger.info("本次新闻抓取完毕!");
}
/**
* 初次使用,填充新闻模块信息:将默认RSS源所有模块填入。
*/
private static int getModuleID(String moduleName) {
int mododuleID=-1;
try {
String sql="select id from newsmodules where name=?";
Newsmodules newsmodule=Newsmodules.dao.findFirst(sql,moduleName);
if(newsmodule==null) {
Newsmodules module=new Newsmodules();
module.setName(moduleName);
module.save();
return Newsmodules.dao.findFirst(sql,moduleName).getId();
}
else
return newsmodule.getId();
}
catch (Exception e) {
logger.error(e.getMessage());
}
return mododuleID;
}
}
\ No newline at end of file
......@@ -3,9 +3,6 @@
*/
package top.qianxinyao.UserBasedCollaborativeRecommender;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
......@@ -13,7 +10,7 @@ import java.util.Set;
import org.apache.log4j.Logger;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.jdbc.PostgreSQLBooleanPrefJDBCDataModel;
import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLBooleanPrefJDBCDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
......@@ -25,7 +22,8 @@ import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import top.qianxinyao.algorithms.PropGetKit;
import top.qianxinyao.algorithms.RecommendAlgorithm;
import top.qianxinyao.algorithms.RecommendKit;
import top.qianxinyao.dbconnection.ConnectionFactory;
import top.qianxinyao.dbconnection.DBKit;
import top.qianxinyao.model.Newslogs;
/**
* @author qianxinyao
......@@ -59,26 +57,24 @@ public class MahoutUserBasedCollaborativeRecommender implements RecommendAlgorit
*/
@SuppressWarnings("unused")
@Override
public void recommend(List<String> users)
public void recommend(List<Long> users)
{
int count=0;
try
{
System.out.println("CF start at "+new Date());
PostgreSQLBooleanPrefJDBCDataModel dataModel = ConnectionFactory.getPostgreSQLBooleanPrefJDBCDataModel();
MySQLBooleanPrefJDBCDataModel dataModel = DBKit.getMySQLJDBCDataModel();
Statement stmt = ConnectionFactory.getNewStatement();
ResultSet rs1 = stmt.executeQuery("select " + ConnectionFactory.PREF_TABLE_USERID + ","
+ ConnectionFactory.PREF_TABLE_NEWSID + "," + ConnectionFactory.PREF_TABLE_TIME + " from newslogs");
List<Newslogs> newslogList=Newslogs.dao.find("select " + DBKit.PREF_TABLE_USERID + ","
+ DBKit.PREF_TABLE_NEWSID + "," + DBKit.PREF_TABLE_TIME + " from newslogs");
// 移除过期的用户浏览新闻行为,这些行为对计算用户相似度不再具有较大价值
while (rs1.next())
for (Newslogs newslog:newslogList)
{
if (rs1.getTimestamp(3).before(RecommendKit.getInRecTimestamp(inRecDays)))
if (newslog.getViewTime().before(RecommendKit.getInRecTimestamp(inRecDays)))
{
dataModel.removePreference(Long.parseLong(rs1.getString(1)), Long.parseLong(rs1.getString(2)));
dataModel.removePreference(newslog.getUserId(), newslog.getNewsId());
}
}
......@@ -89,24 +85,24 @@ public class MahoutUserBasedCollaborativeRecommender implements RecommendAlgorit
Recommender recommender = new GenericUserBasedRecommender(dataModel, neighborhood, similarity);
for (String user : users)
for (Long user : users)
{
long start = System.currentTimeMillis();
Long userid = Long.parseLong(user);
Long userid = user;
List<RecommendedItem> recItems = recommender.recommend(userid, N);
Set<String> hs = new HashSet<String>();
Set<Long> hs = new HashSet<Long>();
for (RecommendedItem recItem : recItems)
{
hs.add(String.valueOf(recItem.getItemID()));
hs.add(recItem.getItemID());
}
// 过滤掉已推荐新闻和已过期新闻
RecommendKit.filterOutDateNews(hs, String.valueOf(userid));
RecommendKit.filterReccedNews(hs, String.valueOf(userid));
RecommendKit.filterOutDateNews(hs,userid);
RecommendKit.filterReccedNews(hs,userid);
// 无可推荐新闻
if (hs == null)
......@@ -118,7 +114,7 @@ public class MahoutUserBasedCollaborativeRecommender implements RecommendAlgorit
RecommendKit.removeOverNews(hs, N);
}
RecommendKit.insertRecommend(String.valueOf(userid), hs.iterator(),RecommendAlgorithm.CF);
RecommendKit.insertRecommend(userid, hs.iterator(),RecommendAlgorithm.CF);
count+=hs.size();
}
......@@ -128,7 +124,7 @@ public class MahoutUserBasedCollaborativeRecommender implements RecommendAlgorit
logger.error("CB算法构造偏好对象失败!");
e.printStackTrace();
}
catch (SQLException e)
catch (Exception e)
{
logger.error("CB算法数据库操作失败!");
e.printStackTrace();
......
......@@ -14,6 +14,8 @@ import org.quartz.impl.JobDetailImpl;
import org.quartz.impl.StdSchedulerFactory;
import org.quartz.impl.triggers.CronTriggerImpl;
import top.qianxinyao.model.Users;
/**
* @author qianxinyao
* @email tomqianmaple@gmail.com
......@@ -22,7 +24,7 @@ import org.quartz.impl.triggers.CronTriggerImpl;
*/
public class CFCronTriggerRunner
{
public void task(List<String> users,String cronExpression) throws SchedulerException
public void task(List<Users> users,String cronExpression) throws SchedulerException
{
// Initiate a Schedule Factory
SchedulerFactory schedulerFactory = new StdSchedulerFactory();
......
......@@ -24,7 +24,7 @@ public class CFJob implements Job
@Override
public void execute(JobExecutionContext arg0) throws JobExecutionException
{
List<String> users=(List<String>) arg0.getJobDetail().getJobDataMap().get("users");
List<Long> users=(List<Long>) arg0.getJobDetail().getJobDataMap().get("users");
new MahoutUserBasedCollaborativeRecommender().recommend(users);
}
......
......@@ -32,6 +32,6 @@ public interface RecommendAlgorithm
/**
* 针对特定用户返回推荐结果
*/
public void recommend(List<String> users);
public void recommend(List<Long> users);
}
......@@ -3,9 +3,6 @@
*/
package top.qianxinyao.algorithms;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
......@@ -17,8 +14,12 @@ import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import top.qianxinyao.contentbasedrecommend.CustomizedHashMap;
import top.qianxinyao.dbconnection.ConnectionFactory;
import top.qianxinyao.model.News;
import top.qianxinyao.model.Recommendations;
import top.qianxinyao.model.Users;
/**
* @author qianxinyao
......@@ -28,6 +29,9 @@ import top.qianxinyao.dbconnection.ConnectionFactory;
*/
public class RecommendKit
{
public static final Logger logger=Logger.getLogger(RecommendKit.class);
/**
* 推荐新闻的时效性天数,即从推荐当天开始到之前beforeDays天的新闻属于仍具有时效性的新闻,予以推荐。
*/
......@@ -62,25 +66,24 @@ public class RecommendKit
/**
* 过滤方法filterOutDateNews() 过滤掉失去时效性的新闻(由beforeDays属性控制)
*/
public static void filterOutDateNews(Collection<String> col, String userId)
public static void filterOutDateNews(Collection<Long> col, Long userId)
{
try
{
String newsids = getInQueryString(col.iterator());
if (!newsids.equals("()"))
{
ResultSet rs = ConnectionFactory.getStatement()
.executeQuery("select newsid,ntime from news where newsid in " + newsids);
while (rs.next())
List<News> newsList = News.dao.find("select news_id,news_time from news where id in " + newsids);
for(News news:newsList)
{
if (rs.getTimestamp(2).before(getInRecTimestamp(beforeDays)))
if (news.getNewsTime().before(getInRecTimestamp(beforeDays)))
{
col.remove(rs.getString(1));
col.remove(news.getId());
}
}
}
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
......@@ -89,22 +92,20 @@ public class RecommendKit
/**
* 过滤方法filterBrowsedNews() 过滤掉已经用户已经看过的新闻
*/
public static void filterBrowsedNews(Collection<String> col, String userId)
public static void filterBrowsedNews(Collection<Long> col, Long userId)
{
try
{
Statement stmt = ConnectionFactory.getNewStatement();
ResultSet rs;
rs = stmt.executeQuery("select nlnewsid from newslogs where nluserid='" + userId + "'");
while (rs.next())
List<News> newsList = News.dao.find("select news_id from newslogs where user_id=?",userId);
for (News news:newsList)
{
if (col.contains(rs.getString(1)))
if (col.contains(news.getId()))
{
col.remove(rs.getString(1));
col.remove(news.getId());
}
}
}
catch (SQLException e)
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
......@@ -114,22 +115,20 @@ public class RecommendKit
/**
* 过滤方法filterReccedNews() 过滤掉已经推荐过的新闻(在recommend表中查找)
*/
public static void filterReccedNews(Collection<String> col, String userId)
public static void filterReccedNews(Collection<Long> col, Long userId)
{
try
{
Statement stmt = ConnectionFactory.getNewStatement();
ResultSet rs;
rs = stmt.executeQuery("select rnewsid from recommend where ruserid='" + userId + "' and rrectime>"+getInRecDate());
while (rs.next())
List<News> newsList = News.dao.find("select news_id from recommendations where user_id=? and derive_time>?",userId,getInRecDate());
for (News news:newsList)
{
if (col.contains(rs.getString(1)))
if (col.contains(news.getId()))
{
col.remove(rs.getString(1));
col.remove(news.getId());
}
}
}
catch (SQLException e)
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
......@@ -141,18 +140,18 @@ public class RecommendKit
*
* @return
*/
public static ArrayList<String> getUserList()
public static ArrayList<Long> getUserList()
{
ArrayList<String> users = new ArrayList<String>();
ArrayList<Long> users = new ArrayList<Long>();
try
{
ResultSet rs = ConnectionFactory.getNewStatement().executeQuery("select userid from users");
while (rs.next())
List<Users> userList = Users.dao.find("select id from users");
for (Users user:userList)
{
users.add(rs.getString(1));
users.add(user.getId());
}
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
......@@ -183,26 +182,24 @@ public class RecommendKit
*
* @return
*/
public static HashMap<String, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>> getUserPrefListMap(
Collection<String> userSet)
public static HashMap<Long, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>> getUserPrefListMap(
Collection<Long> userSet)
{
ResultSet rs = null;
HashMap<String, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>> userPrefListMap = null;
HashMap<Long, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>> userPrefListMap = null;
try
{
String userPrefListQuery = getInQueryStringWithSingleQuote(userSet.iterator());
if (!userPrefListQuery.equals("()"))
{
rs = ConnectionFactory.getNewStatement()
.executeQuery("select userid,upreflist from users where userid in " + userPrefListQuery);
userPrefListMap = new HashMap<String, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>>();
while (rs.next())
List<Users> userList = Users.dao.find("select id,pref_list from users where id in " + userPrefListQuery);
userPrefListMap = new HashMap<Long, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>>();
for (Users user:userList)
{
userPrefListMap.put(rs.getString(1), JsonKit.jsonPrefListtoMap(rs.getString(2)));
userPrefListMap.put(user.getId(), JsonKit.jsonPrefListtoMap(user.getPrefList()));
}
}
}
catch (SQLException e)
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
......@@ -257,58 +254,61 @@ public class RecommendKit
* @param recAlgo
* 标明推荐结果来自哪个推荐算法(RecommendAlgorithm.XX)
*/
public static void insertRecommend(String userId, Iterator<String> newsIte, int recAlgo)
public static void insertRecommend(Long userId, Iterator<Long> newsIte, int recAlgo)
{
try
{
String insertValues = "";
while (newsIte.hasNext())
{
insertValues += "(" + userId + "," + newsIte.next() + ",'" + new Timestamp(System.currentTimeMillis())
+ "'," + recAlgo + "),";
}
if (insertValues.length() > 0)
{
insertValues = insertValues.substring(0, insertValues.length() - 1);
ConnectionFactory.getNewStatement()
.execute("insert into recommend (ruserid,rnewsid,rrectime,rrecalgo) values " + insertValues);
Recommendations rec=new Recommendations();
rec.setUserId(userId);
rec.setDeriveAlgorithm(recAlgo);
rec.setNewsId(newsIte.next());
}
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
}
/**
* Acquire list of "active" users' ids
* "Active" means who use app recently(determined by method getInRecDate())
* Acquire a list of active users
* "Active" means who read news recently ('recent' determined by method getInRecDate(), default in a month)
*
* @return
*/
public static List<String> getActiveUsers()
public static List<Users> getActiveUsers()
{
try
{
int activeDay=PropGetKit.getInt("activeDay");
ResultSet rs1;
rs1 = ConnectionFactory.getNewStatement()
.executeQuery("select distinct nluserid from newslogs where nltime>" + getInRecDate(activeDay));
List<String> users = new ArrayList<String>();
while (rs1.next())
{
users.add(rs1.getString(1));
}
return users;
List<Users> userList=Users.dao.find("select distinct id,name from users where latest_log_time>" + getInRecDate(activeDay));
return userList;
}
catch (SQLException e)
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("return null");
logger.info("获取活跃用户异常!");
return null;
}
public static List<Users> getAllUsers(){
try
{
List<Users> userList=Users.dao.find("select distinct id,name from users");
return userList;
}
catch (Exception e)
{
e.printStackTrace();
}
logger.info("获取全体用户异常!");
return null;
}
/**
* 去除数量上超过为算法设置的推荐结果上限值的推荐结果
......@@ -317,10 +317,10 @@ public class RecommendKit
* @param N
* @return
*/
public static void removeOverNews(Set<String> set, int N)
public static void removeOverNews(Set<Long> set, int N)
{
int i = 0;
Iterator<String> ite = set.iterator();
Iterator<Long> ite = set.iterator();
while (ite.hasNext())
{
if (i >= N)
......
......@@ -3,7 +3,6 @@
*/
package top.qianxinyao.contentbasedrecommend;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
......@@ -21,7 +20,7 @@ import org.apache.log4j.Logger;
import top.qianxinyao.algorithms.PropGetKit;
import top.qianxinyao.algorithms.RecommendAlgorithm;
import top.qianxinyao.algorithms.RecommendKit;
import top.qianxinyao.dbconnection.ConnectionFactory;
import top.qianxinyao.model.News;
/**
* @author qianxinyao
......@@ -50,7 +49,7 @@ public class ContentBasedRecommender implements RecommendAlgorithm
private static final int N = PropGetKit.getInt("CBRecNum");
@Override
public void recommend(List<String> users)
public void recommend(List<Long> users)
{
try
{
......@@ -59,27 +58,26 @@ public class ContentBasedRecommender implements RecommendAlgorithm
// 首先进行用户喜好关键词列表的衰减更新+用户当日历史浏览记录的更新
new UserPrefRefresher().refresh(users);
// 新闻及对应关键词列表的Map
HashMap<String, List<Keyword>> newsKeyWordsMap = new HashMap<String, List<Keyword>>();
HashMap<String, Integer> newsModuleMap = new HashMap<String, Integer>();
HashMap<Long, List<Keyword>> newsKeyWordsMap = new HashMap<Long, List<Keyword>>();
HashMap<Long, Integer> newsModuleMap = new HashMap<Long, Integer>();
// 用户喜好关键词列表
HashMap<String, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>> userPrefListMap = RecommendKit
HashMap<Long, CustomizedHashMap<Integer, CustomizedHashMap<String, Double>>> userPrefListMap = RecommendKit
.getUserPrefListMap(users);
ResultSet rs = ConnectionFactory.getStatement()
.executeQuery("select newsid,ntitle,ncontent,nmoduleid from news where ntime>"
+ RecommendKit.getInRecDate() + " and ncontent not like '<meta%'");
while (rs.next())
List<News> newsList=News.dao.find("select id,title,content,module_id from news where news_time>"
+ RecommendKit.getInRecDate());
for (News news:newsList)
{
newsKeyWordsMap.put(rs.getString(1), TFIDF.getTFIDE(rs.getString(2), rs.getString(3), KEY_WORDS_NUM));
newsModuleMap.put(rs.getString(1), rs.getInt(4));
newsKeyWordsMap.put(news.getId(), TFIDF.getTFIDE(news.getTitle(), news.getContent(), KEY_WORDS_NUM));
newsModuleMap.put(news.getId(), news.getModuleId());
}
for (String userId : users)
for (Long userId : users)
{
Map<String, Double> tempMatchMap = new HashMap<String, Double>();
Iterator<String> ite = newsKeyWordsMap.keySet().iterator();
Map<Long, Double> tempMatchMap = new HashMap<Long, Double>();
Iterator<Long> ite = newsKeyWordsMap.keySet().iterator();
while (ite.hasNext())
{
String newsId = ite.next();
Long newsId = ite.next();
int moduleId = newsModuleMap.get(newsId);
if (null != userPrefListMap.get(userId).get(moduleId))
tempMatchMap.put(newsId,
......@@ -92,7 +90,7 @@ public class ContentBasedRecommender implements RecommendAlgorithm
if (!(tempMatchMap.toString().equals("{}")))
{
tempMatchMap = sortMapByValue(tempMatchMap);
Set<String> toBeRecommended=tempMatchMap.keySet();
Set<Long> toBeRecommended=tempMatchMap.keySet();
//过滤掉已经推荐过的新闻
RecommendKit.filterReccedNews(toBeRecommended,userId);
//过滤掉用户已经看过的新闻
......@@ -133,19 +131,19 @@ public class ContentBasedRecommender implements RecommendAlgorithm
return matchValue;
}
private void removeZeroItem(Map<String, Double> map)
private void removeZeroItem(Map<Long, Double> map)
{
HashSet<String> toBeDeleteItemSet = new HashSet<String>();
Iterator<String> ite = map.keySet().iterator();
HashSet<Long> toBeDeleteItemSet = new HashSet<Long>();
Iterator<Long> ite = map.keySet().iterator();
while (ite.hasNext())
{
String newsId = ite.next();
Long newsId = ite.next();
if (map.get(newsId) <= 0)
{
toBeDeleteItemSet.add(newsId);
}
}
for (String item : toBeDeleteItemSet)
for (Long item : toBeDeleteItemSet)
{
map.remove(item);
}
......@@ -156,17 +154,17 @@ public class ContentBasedRecommender implements RecommendAlgorithm
* @param map
* @return
*/
public static Map<String, Double> sortMapByValue(Map<String, Double> oriMap) {
public static Map<Long, Double> sortMapByValue(Map<Long, Double> oriMap) {
if (oriMap == null || oriMap.isEmpty()) {
return null;
}
Map<String, Double> sortedMap = new LinkedHashMap<String, Double>();
List<Map.Entry<String, Double>> entryList = new ArrayList<Map.Entry<String, Double>>(
Map<Long, Double> sortedMap = new LinkedHashMap<Long, Double>();
List<Map.Entry<Long, Double>> entryList = new ArrayList<Map.Entry<Long, Double>>(
oriMap.entrySet());
Collections.sort(entryList, new MapValueComparator());
Iterator<Map.Entry<String, Double>> iter = entryList.iterator();
Map.Entry<String, Double> tmpEntry = null;
Iterator<Map.Entry<Long, Double>> iter = entryList.iterator();
Map.Entry<Long, Double> tmpEntry = null;
while (iter.hasNext()) {
tmpEntry = iter.next();
sortedMap.put(tmpEntry.getKey(), tmpEntry.getValue());
......
......@@ -13,10 +13,10 @@ import java.util.Map.Entry;
* @github https://github.com/bluemapleman
* @date 2016年12月2日
*/
class MapValueComparator implements Comparator<Map.Entry<String, Double>> {
class MapValueComparator implements Comparator<Map.Entry<Long, Double>> {
@Override
public int compare(Entry<String, Double> me1, Entry<String, Double> me2) {
public int compare(Entry<Long, Double> me1, Entry<Long, Double> me2) {
return me1.getValue().compareTo(me2.getValue());
}
......
......@@ -3,8 +3,6 @@
*/
package top.qianxinyao.contentbasedrecommend;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
......@@ -14,9 +12,13 @@ import java.util.List;
import org.ansj.app.keyword.Keyword;
import com.jfinal.plugin.activerecord.Db;
import top.qianxinyao.algorithms.JsonKit;
import top.qianxinyao.algorithms.RecommendKit;
import top.qianxinyao.dbconnection.ConnectionFactory;
import top.qianxinyao.model.News;
import top.qianxinyao.model.Newslogs;
import top.qianxinyao.model.Users;
/**
* @author qianxinyao
......@@ -25,9 +27,7 @@ import top.qianxinyao.dbconnection.ConnectionFactory;
* @date 2016年11月3日 每次用户浏览新的新闻时,用以更新用户的喜好关键词列表
*/
public class UserPrefRefresher
{
ResultSet rs=null;
{
//设置TFIDF提取的关键词数目
private static final int KEY_WORDS_NUM = 10;
......@@ -39,29 +39,30 @@ public class UserPrefRefresher
}
@SuppressWarnings("unchecked")
public void refresh(Collection<String> userIdsCol){
public void refresh(Collection<Long> userIdsCol){
//首先对用户的喜好关键词列表进行衰减更新
autoDecRefresh(userIdsCol);
//用户浏览新闻纪录:userBrowsexMap:<String(userid),ArrayList<String>(newsid List)>
HashMap<String,ArrayList<String>> userBrowsedMap=getBrowsedHistoryMap();
//用户浏览新闻纪录:userBrowsexMap:<Long(userid),ArrayList<String>(newsid List)>
HashMap<Long,ArrayList<Long>> userBrowsedMap=getBrowsedHistoryMap();
//用户喜好关键词列表:userPrefListMap:<String(userid),String(json))>
HashMap<String,CustomizedHashMap<Integer,CustomizedHashMap<String,Double>>> userPrefListMap=RecommendKit.getUserPrefListMap(userBrowsedMap.keySet());
HashMap<Long,CustomizedHashMap<Integer,CustomizedHashMap<String,Double>>> userPrefListMap=RecommendKit.getUserPrefListMap(userBrowsedMap.keySet());
//新闻对应关键词列表与模块ID:newsTFIDFMap:<String(newsid),List<Keyword>>,<String(newsModuleId),Integer(moduleid)>
HashMap<String,Object> newsTFIDFMap=getNewsTFIDFMap();
//开始遍历用户浏览记录,更新用户喜好关键词列表
//对每个用户(外层循环),循环他所看过的每条新闻(内层循环),对每个新闻,更新它的关键词列表到用户的对应模块中
Iterator<String> ite=userBrowsedMap.keySet().iterator();
Iterator<Long> ite=userBrowsedMap.keySet().iterator();
while(ite.hasNext()){
String userId=ite.next();
ArrayList<String> newsList=userBrowsedMap.get(userId);
for(String news:newsList){
Long userId=ite.next();
ArrayList<Long> newsList=userBrowsedMap.get(userId);
for(Long news:newsList){
Integer moduleId=(Integer) newsTFIDFMap.get(news+"moduleid");
//获得对应模块的(关键词:喜好)map
CustomizedHashMap<String,Double> rateMap=userPrefListMap.get(userId).get(moduleId);
//获得新闻的(关键词:TFIDF值)map
@SuppressWarnings("unlikely-arg-type")
List<Keyword> keywordList=(List<Keyword>) newsTFIDFMap.get(news);
Iterator<Keyword> keywordIte=keywordList.iterator();
while(keywordIte.hasNext()){
......@@ -77,14 +78,14 @@ public class UserPrefRefresher
userPrefListMap.get(userId);
}
}
Iterator<String> iterator=userBrowsedMap.keySet().iterator();
Iterator<Long> iterator=userBrowsedMap.keySet().iterator();
while(iterator.hasNext()){
String userId=iterator.next();
Long userId=iterator.next();
try
{
ConnectionFactory.getStatement().executeUpdate("update users set upreflist='"+userPrefListMap.get(userId)+"' where userid='"+userId+"'");
Db.update("update users set upreflist='"+userPrefListMap.get(userId)+"' where userid=?",userId);
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
......@@ -102,20 +103,20 @@ public class UserPrefRefresher
/**
* 所有用户的喜好关键词列表TFIDF值随时间进行自动衰减更新
*/
public void autoDecRefresh(Collection<String> userIdsCol){
public void autoDecRefresh(Collection<Long> userIdsCol){
try
{
String inQuery=RecommendKit.getInQueryStringWithSingleQuote(userIdsCol.iterator());
if(inQuery.equals("()")){
return;
}
ResultSet rs=ConnectionFactory.getStatement().executeQuery("select userid,upreflist from users where userid in "+inQuery);
List<Users> userList=Users.dao.find("select id,pref_list from users where id in "+inQuery);
//用以更新的用户喜好关键词map的json串
//用于删除喜好值过低的关键词
ArrayList<String> keywordToDelete=new ArrayList<String>();
while(rs.next()){
for(Users user:userList){
String newPrefList="{";
HashMap<Integer,CustomizedHashMap<String,Double>> map=JsonKit.jsonPrefListtoMap(rs.getString(2));
HashMap<Integer,CustomizedHashMap<String,Double>> map=JsonKit.jsonPrefListtoMap(user.getPrefList());
Iterator<Integer> ite=map.keySet().iterator();
while(ite.hasNext()){
//用户对应模块的喜好不为空
......@@ -142,10 +143,10 @@ public class UserPrefRefresher
newPrefList+=moduleMap.toString()+",";
}
newPrefList="'"+newPrefList.substring(0,newPrefList.length()-1)+"}'";
ConnectionFactory.getNewStatement().executeUpdate("update users set upreflist="+newPrefList+" where userid='"+rs.getString(1)+"'");
Db.update("update users set pref_list="+newPrefList+" where id=?",user.getId());
}
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
......@@ -155,35 +156,40 @@ public class UserPrefRefresher
* 提取出当天所有用户浏览新闻纪录
* @return
*/
private HashMap<String,ArrayList<String>> getBrowsedHistoryMap(){
HashMap<String, ArrayList<String>> userBrowsedMap=null;
private HashMap<Long,ArrayList<Long>> getBrowsedHistoryMap(){
HashMap<Long, ArrayList<Long>> userBrowsedMap=null;
try
{
userBrowsedMap=new HashMap<String,ArrayList<String>>();
ResultSet rs = ConnectionFactory.getStatement().executeQuery("select * from newslogs where nltime>"+RecommendKit.getSpecificDayFormat(0));
while(rs.next()){
if(userBrowsedMap.containsKey(rs.getString(2))){
userBrowsedMap.get(rs.getString(2)).add(rs.getString(3));
userBrowsedMap=new HashMap<Long,ArrayList<Long>>();
List<Newslogs> newslogsList=Newslogs.dao.find("select * from newslogs where view_time>"+RecommendKit.getSpecificDayFormat(0));
for(Newslogs newslogs:newslogsList){
if(userBrowsedMap.containsKey(newslogs.getUserId())){
userBrowsedMap.get(newslogs.getUserId()).add(newslogs.getNewsId());
}
else{
userBrowsedMap.put(rs.getString(2), new ArrayList<String>());
userBrowsedMap.get(rs.getString(2)).add(rs.getString(3));
userBrowsedMap.put(newslogs.getUserId(), new ArrayList<Long>());
userBrowsedMap.get(newslogs.getUserId()).add(newslogs.getNewsId());
}
}
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
return userBrowsedMap;
}
private HashSet<String> getBrowsedNewsSet(){
HashMap<String,ArrayList<String>> browsedMap=getBrowsedHistoryMap();
HashSet<String> newsIdSet=new HashSet<String>();
Iterator<String> ite=getBrowsedHistoryMap().keySet().iterator();
/**
* 获得浏览过的新闻的集合
* @return
*/
private HashSet<Long> getBrowsedNewsSet(){
HashMap<Long,ArrayList<Long>> browsedMap=getBrowsedHistoryMap();
HashSet<Long> newsIdSet=new HashSet<Long>();
Iterator<Long> ite=getBrowsedHistoryMap().keySet().iterator();
while(ite.hasNext()){
Iterator<String> inIte=browsedMap.get(ite.next()).iterator();
Iterator<Long> inIte=browsedMap.get(ite.next()).iterator();
while(inIte.hasNext()){
newsIdSet.add(inIte.next());
}
......@@ -199,7 +205,7 @@ public class UserPrefRefresher
HashMap<String,Object> newsTFIDFMap=null;
try
{
Iterator<String> ite=getBrowsedNewsSet().iterator();
Iterator<Long> ite=getBrowsedNewsSet().iterator();
String newsIdListQuery="(";
while(ite.hasNext()){
newsIdListQuery+=ite.next()+",";
......@@ -208,18 +214,18 @@ public class UserPrefRefresher
if(newsIdListQuery.length()>1){
newsIdListQuery=newsIdListQuery.substring(0, newsIdListQuery.length()-1)+")";
//提取出所有新闻的关键词列表及对应TF-IDf值,并放入一个map中
rs=ConnectionFactory.getStatement().executeQuery("select newsid,ntitle,ncontent,nmoduleid from news where newsid in "+newsIdListQuery);
List<News> newsList=News.dao.find("select id,title,content,module_id from news where newsid in "+newsIdListQuery);
newsTFIDFMap=new HashMap<String,Object>();
while(rs.next()){
newsTFIDFMap.put(rs.getString(1), TFIDF.getTFIDE(rs.getString(2), rs.getString(3),KEY_WORDS_NUM));
newsTFIDFMap.put(rs.getString(1)+"moduleid", rs.getInt(4));
for(News news:newsList){
newsTFIDFMap.put(String.valueOf(news.getId()), TFIDF.getTFIDE(news.getTitle(), news.getContent(),KEY_WORDS_NUM));
newsTFIDFMap.put(news.getId()+"moduleid", news.getModuleId());
}
}
else
return null;
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
......
......@@ -14,6 +14,8 @@ import org.quartz.impl.JobDetailImpl;
import org.quartz.impl.StdSchedulerFactory;
import org.quartz.impl.triggers.CronTriggerImpl;
import top.qianxinyao.model.Users;
/**
* @author qianxinyao
* @email tomqianmaple@gmail.com
......@@ -22,7 +24,7 @@ import org.quartz.impl.triggers.CronTriggerImpl;
*/
public class CBCronTriggerRunner
{
public void task(List<String> users,String cronExpression) throws SchedulerException
public void task(List<Users> users,String cronExpression) throws SchedulerException
{
// Initiate a Schedule Factory
SchedulerFactory schedulerFactory = new StdSchedulerFactory();
......
......@@ -24,7 +24,7 @@ public class CBJob implements Job
@Override
public void execute(JobExecutionContext arg0) throws JobExecutionException
{
List<String> users=(List<String>) arg0.getJobDetail().getJobDataMap().get("users");
List<Long> users=(List<Long>) arg0.getJobDetail().getJobDataMap().get("users");
new ContentBasedRecommender().recommend(users);
}
......
package top.qianxinyao.dbconnection;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashMap;
import java.util.Properties;
import javax.sql.DataSource;
import org.apache.log4j.Logger;
import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLBooleanPrefJDBCDataModel;
import com.jfinal.plugin.activerecord.ActiveRecordPlugin;
import com.jfinal.plugin.c3p0.C3p0Plugin;
import top.qianxinyao.model.News;
import top.qianxinyao.model.Newslogs;
import top.qianxinyao.model.Newsmodules;
import top.qianxinyao.model.Recommendations;
import top.qianxinyao.model.Users;
public class DBKit{
public static final Logger logger=Logger.getLogger(DBKit.class);
//偏好表表名
public static final String PREF_TABLE="newslogs";
//用户id列名
public static final String PREF_TABLE_USERID="user_id";
//新闻id列名
public static final String PREF_TABLE_NEWSID="news_id";
//偏好值列名
public static final String PREF_TABLE_PREFVALUE="prefer_degree";
//用户浏览时间列名
public static final String PREF_TABLE_TIME="view_time";
private static C3p0Plugin cp;
public static void initalize()
{
try
{
HashMap<String, String> info = getDBInfo();
cp = new C3p0Plugin(info.get("url"), info.get("user"), info.get("password"));
ActiveRecordPlugin arp = new ActiveRecordPlugin(cp);
arp.addMapping("users", Users.class);
arp.addMapping("news", News.class);
arp.addMapping("newsmodules", Newsmodules.class);
arp.addMapping("newslogs", Newslogs.class);
arp.addMapping("recommendations", Recommendations.class);
if(cp.start() && arp.start())
logger.info("数据库连接池插件启动成功......");
else
logger.info("c3p0插件启动失败!");
logger.info("数据库初始化工作完毕!");
}
catch (Exception e)
{
logger.error("数据库连接初始化错误!");
}
return;
}
public static HashMap<String, String> getDBInfo()
{
HashMap<String, String> info = null;
try
{
Properties p = new Properties();
p.load(new FileInputStream(System.getProperty("user.dir") + "/res/dbconfig.properties"));
info = new HashMap<String, String>();
info.put("url", p.getProperty("url"));
info.put("user", p.getProperty("user"));
info.put("password", p.getProperty("password"));
}
catch (FileNotFoundException e)
{
logger.error("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在");
}
catch (IOException e)
{
logger.error("装载文件--->失败!");
}
return info;
}
public static DataSource getDataSource() {
if(cp==null)
initalize();
return cp.getDataSource();
}
public static MySQLBooleanPrefJDBCDataModel getMySQLJDBCDataModel(){
return new MySQLBooleanPrefJDBCDataModel(DBKit.getDataSource(), PREF_TABLE, PREF_TABLE_USERID,
PREF_TABLE_NEWSID,PREF_TABLE_TIME);
}
}
......@@ -22,7 +22,7 @@ public class StatementWatcher extends Thread
//防止还未开始
try
{
Thread.sleep(10000);
Thread.sleep(20000);
}
catch (InterruptedException e1)
{
......
......@@ -3,8 +3,6 @@
*/
package top.qianxinyao.hotrecommend;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Calendar;
......@@ -13,9 +11,12 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import top.qianxinyao.algorithms.RecommendAlgorithm;
import top.qianxinyao.algorithms.RecommendKit;
import top.qianxinyao.dbconnection.ConnectionFactory;
import top.qianxinyao.model.Newslogs;
import top.qianxinyao.model.Recommendations;
/**
* @author qianxinyao
......@@ -25,29 +26,33 @@ import top.qianxinyao.dbconnection.ConnectionFactory;
*/
public class HotRecommender implements RecommendAlgorithm
{
public static final Logger logger=Logger.getLogger(HotRecommender.class);
// 热点新闻的有效时间
public static int beforeDays = -10;
// 推荐系统每日为每位用户生成的推荐结果的总数,当CF与CB算法生成的推荐结果数不足此数时,由该算法补充
public static int TOTAL_REC_NUM = 20;
// 将每天生成的“热点新闻”ID,按照新闻的热点程度从高到低放入此List
private static ArrayList<String> topHotNewsList = new ArrayList<String>();
private static ArrayList<Long> topHotNewsList = new ArrayList<Long>();
@Override
public void recommend(List<String> users)
public void recommend(List<Long> users)
{
System.out.println("HR start at "+new Date());
int count=0;
Timestamp timestamp = getCertainTimestamp(0, 0, 0);
for (String userId : users)
for (Long userId : users)
{
try
{
ResultSet rs = ConnectionFactory.getNewStatement()
.executeQuery("select ruserid,count(*) as recnums from recommend where rrectime>'" + timestamp
+ "' and ruserid='" + userId + "' group by ruserid");
boolean flag=rs.next();
int delta=flag?TOTAL_REC_NUM - rs.getInt("recnums"):TOTAL_REC_NUM;
Set<String> toBeRecommended = new HashSet<String>();
Recommendations recommendation=Recommendations.dao.findFirst("select user_id,count(*) as recnums from recommendations where derive_time>'" + timestamp
+ "' and user_id='" + userId + "' group by user_id");
boolean flag=recommendation!=null;
int delta=flag?TOTAL_REC_NUM - recommendation.getInt("recnums"):TOTAL_REC_NUM;
Set<Long> toBeRecommended = new HashSet<Long>();
if (delta > 0)
{
int i = topHotNewsList.size() > delta ? delta : topHotNewsList.size();
......@@ -59,12 +64,12 @@ public class HotRecommender implements RecommendAlgorithm
RecommendKit.insertRecommend(userId, toBeRecommended.iterator(), RecommendAlgorithm.HR);
count+=toBeRecommended.size();
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
}
System.out.println("HR has contributed " + (count/users.size()) + " recommending news on average");
System.out.println("HR has contributed " + (users.size()==0?0:count/users.size()) + " recommending news on average");
System.out.println("HR end at "+new Date());
}
......@@ -72,29 +77,27 @@ public class HotRecommender implements RecommendAlgorithm
public static void formTodayTopHotNewsList()
{
topHotNewsList.clear();
ArrayList<String> hotNewsTobeReccommended = new ArrayList<String>();
ArrayList<Long> hotNewsTobeReccommended = new ArrayList<Long>();
try
{
ResultSet rs = ConnectionFactory.getNewStatement()
.executeQuery("select nlnewsid,count(*) as visitNums from newslogs where nltime>"
+ RecommendKit.getInRecDate(beforeDays) + " group by nlnewsid order by visitNums desc");
while (rs.next())
List<Newslogs> newslogsList=Newslogs.dao.find("select news_id,count(*) as visitNums from newslogs where view_time>"
+ RecommendKit.getInRecDate(beforeDays) + " group by news_id order by visitNums desc");
for (Newslogs newslog:newslogsList)
{
hotNewsTobeReccommended.add(rs.getString(1));
hotNewsTobeReccommended.add(newslog.getNewsId());
}
for (String news : hotNewsTobeReccommended)
for (Long news : hotNewsTobeReccommended)
{
topHotNewsList.add(news);
}
System.out.println(topHotNewsList);
}
catch (SQLException e)
catch (Exception e)
{
e.printStackTrace();
}
}
public static List<String> getTopHotNewsList()
public static List<Long> getTopHotNewsList()
{
return topHotNewsList;
}
......
......@@ -14,6 +14,8 @@ import org.quartz.impl.JobDetailImpl;
import org.quartz.impl.StdSchedulerFactory;
import org.quartz.impl.triggers.CronTriggerImpl;
import top.qianxinyao.model.Users;
/**
* @author qianxinyao
* @email tomqianmaple@gmail.com
......@@ -22,7 +24,7 @@ import org.quartz.impl.triggers.CronTriggerImpl;
*/
public class HRCronTriggerRunner
{
public void task(List<String> users,String cronExpression) throws SchedulerException
public void task(List<Users> users,String cronExpression) throws SchedulerException
{
// Initiate a Schedule Factory
SchedulerFactory schedulerFactory = new StdSchedulerFactory();
......
/**
*
*/
package top.qianxinyao.model;
import com.jfinal.kit.PathKit;
import com.jfinal.plugin.activerecord.dialect.MysqlDialect;
import com.jfinal.plugin.activerecord.generator.Generator;
import top.qianxinyao.dbconnection.DBKit;
/**
* @author qianxinyao
*
*/
public class ModelGenerator
{
/**
* @param dataSource
* @param baseModelPackageName
* @param baseModelOutputDir
* @param modelPackageName
* @param modelOutputDir
*/
public static void main(String[] args)
{
// base model 所使用的包名
String baseModelPackageName = "top.qianxinyao.model.base";
// base model 文件保存路径
String baseModelOutputDir = PathKit.getRootClassPath() + "/../../src/top/qianxinyao/model/base";
System.out.println("rootclasspath:"+baseModelOutputDir);
// model 所使用的包名 (MappingKit 默认使用的包名)
String modelPackageName = "top.qianxinyao.model";
// model 文件保存路径 (MappingKit 与 DataDictionary 文件默认保存路径)
String modelOutputDir = baseModelOutputDir+"/..";
System.out.println(baseModelOutputDir);
// 创建生成器
Generator gernerator = new Generator(DBKit.getDataSource(), baseModelPackageName, baseModelOutputDir,
modelPackageName, modelOutputDir);
gernerator.setDialect(new MysqlDialect());
// 设置是否在 Model 中生成 dao 对象
gernerator.setGenerateDaoInModel(true);
// 设置是否生成字典文件
gernerator.setGenerateDataDictionary(false);
// 生成
gernerator.generate();
}
}
package top.qianxinyao.model;
import top.qianxinyao.model.base.BaseNews;
/**
* Generated by JFinal.
*/
@SuppressWarnings("serial")
public class News extends BaseNews<News> {
public static final News dao = new News().dao();
}
package top.qianxinyao.model;
import top.qianxinyao.model.base.BaseNewslogs;
/**
* Generated by JFinal.
*/
@SuppressWarnings("serial")
public class Newslogs extends BaseNewslogs<Newslogs> {
public static final Newslogs dao = new Newslogs().dao();
}
package top.qianxinyao.model;
import top.qianxinyao.model.base.BaseNewsmodules;
/**
* Generated by JFinal.
*/
@SuppressWarnings("serial")
public class Newsmodules extends BaseNewsmodules<Newsmodules> {
public static final Newsmodules dao = new Newsmodules().dao();
}
package top.qianxinyao.model;
import top.qianxinyao.model.base.BaseRecommendations;
/**
* Generated by JFinal.
*/
@SuppressWarnings("serial")
public class Recommendations extends BaseRecommendations<Recommendations> {
public static final Recommendations dao = new Recommendations().dao();
}
package top.qianxinyao.model;
import top.qianxinyao.model.base.BaseUsers;
/**
* Generated by JFinal.
*/
@SuppressWarnings("serial")
public class Users extends BaseUsers<Users> {
public static final Users dao = new Users().dao();
}
package top.qianxinyao.model;
import com.jfinal.plugin.activerecord.ActiveRecordPlugin;
/**
* Generated by JFinal, do not modify this file.
* <pre>
* Example:
* public void configPlugin(Plugins me) {
* ActiveRecordPlugin arp = new ActiveRecordPlugin(...);
* _MappingKit.mapping(arp);
* me.add(arp);
* }
* </pre>
*/
public class _MappingKit {
public static void mapping(ActiveRecordPlugin arp) {
arp.addMapping("news", "id", News.class);
arp.addMapping("newslogs", "id", Newslogs.class);
arp.addMapping("newsmodules", "id", Newsmodules.class);
arp.addMapping("recommendations", "id", Recommendations.class);
arp.addMapping("users", "id", Users.class);
}
}
package top.qianxinyao.model.base;
import com.jfinal.plugin.activerecord.Model;
import com.jfinal.plugin.activerecord.IBean;
/**
* Generated by JFinal, do not modify this file.
*/
@SuppressWarnings("serial")
public abstract class BaseNews<M extends BaseNews<M>> extends Model<M> implements IBean {
public void setId(java.lang.Long id) {
set("id", id);
}
public java.lang.Long getId() {
return get("id");
}
public void setContent(java.lang.String content) {
set("content", content);
}
public java.lang.String getContent() {
return get("content");
}
public void setNewsTime(java.util.Date newsTime) {
set("news_time", newsTime);
}
public java.util.Date getNewsTime() {
return get("news_time");
}
public void setTitle(java.lang.String title) {
set("title", title);
}
public java.lang.String getTitle() {
return get("title");
}
public void setModuleId(java.lang.Integer moduleId) {
set("module_id", moduleId);
}
public java.lang.Integer getModuleId() {
return get("module_id");
}
public void setUrl(java.lang.String url) {
set("url", url);
}
public java.lang.String getUrl() {
return get("url");
}
}
package top.qianxinyao.model.base;
import com.jfinal.plugin.activerecord.Model;
import com.jfinal.plugin.activerecord.IBean;
/**
* Generated by JFinal, do not modify this file.
*/
@SuppressWarnings("serial")
public abstract class BaseNewslogs<M extends BaseNewslogs<M>> extends Model<M> implements IBean {
public void setId(java.lang.Long id) {
set("id", id);
}
public java.lang.Long getId() {
return get("id");
}
public void setUserId(java.lang.Long userId) {
set("user_id", userId);
}
public java.lang.Long getUserId() {
return get("user_id");
}
public void setNewsId(java.lang.Long newsId) {
set("news_id", newsId);
}
public java.lang.Long getNewsId() {
return get("news_id");
}
public void setViewTime(java.util.Date viewTime) {
set("view_time", viewTime);
}
public java.util.Date getViewTime() {
return get("view_time");
}
public void setPreferDegree(java.lang.Integer preferDegree) {
set("prefer_degree", preferDegree);
}
public java.lang.Integer getPreferDegree() {
return get("prefer_degree");
}
}
package top.qianxinyao.model.base;
import com.jfinal.plugin.activerecord.Model;
import com.jfinal.plugin.activerecord.IBean;
/**
* Generated by JFinal, do not modify this file.
*/
@SuppressWarnings("serial")
public abstract class BaseNewsmodules<M extends BaseNewsmodules<M>> extends Model<M> implements IBean {
public void setId(java.lang.Integer id) {
set("id", id);
}
public java.lang.Integer getId() {
return get("id");
}
public void setName(java.lang.String name) {
set("name", name);
}
public java.lang.String getName() {
return get("name");
}
}
package top.qianxinyao.model.base;
import com.jfinal.plugin.activerecord.Model;
import com.jfinal.plugin.activerecord.IBean;
/**
* Generated by JFinal, do not modify this file.
*/
@SuppressWarnings("serial")
public abstract class BaseRecommendations<M extends BaseRecommendations<M>> extends Model<M> implements IBean {
public void setId(java.lang.Long id) {
set("id", id);
}
public java.lang.Long getId() {
return get("id");
}
public void setUserId(java.lang.Long userId) {
set("user_id", userId);
}
public java.lang.Long getUserId() {
return get("user_id");
}
public void setNewsId(java.lang.Long newsId) {
set("news_id", newsId);
}
public java.lang.Long getNewsId() {
return get("news_id");
}
public void setDeriveTime(java.util.Date deriveTime) {
set("derive_time", deriveTime);
}
public java.util.Date getDeriveTime() {
return get("derive_time");
}
public void setFeedback(java.lang.Boolean feedback) {
set("feedback", feedback);
}
public java.lang.Boolean getFeedback() {
return get("feedback");
}
public void setDeriveAlgorithm(java.lang.Integer deriveAlgorithm) {
set("derive_algorithm", deriveAlgorithm);
}
public java.lang.Integer getDeriveAlgorithm() {
return get("derive_algorithm");
}
}
package top.qianxinyao.model.base;
import com.jfinal.plugin.activerecord.Model;
import com.jfinal.plugin.activerecord.IBean;
/**
* Generated by JFinal, do not modify this file.
*/
@SuppressWarnings("serial")
public abstract class BaseUsers<M extends BaseUsers<M>> extends Model<M> implements IBean {
public void setId(java.lang.Long id) {
set("id", id);
}
public java.lang.Long getId() {
return get("id");
}
public void setPrefList(java.lang.String prefList) {
set("pref_list", prefList);
}
public java.lang.String getPrefList() {
return get("pref_list");
}
public void setLatestLogTime(java.util.Date latestLogTime) {
set("latest_log_time", latestLogTime);
}
public java.util.Date getLatestLogTime() {
return get("latest_log_time");
}
public void setName(java.lang.String name) {
set("name", name);
}
public java.lang.String getName() {
return get("name");
}
}
///**
// *
// */
//package top.qianxinyao.Main;
//
//import java.io.File;
//import java.io.FileInputStream;
//import java.io.FileNotFoundException;
//import java.io.IOException;
//import java.text.DecimalFormat;
//import java.util.HashMap;
//import java.util.Iterator;
//
//import org.apache.log4j.Logger;
//import org.apache.poi.hssf.usermodel.HSSFCell;
//import org.apache.poi.hssf.usermodel.HSSFRow;
//import org.apache.poi.hssf.usermodel.HSSFSheet;
//import org.apache.poi.hssf.usermodel.HSSFWorkbook;
//import org.apache.poi.ss.usermodel.Cell;
//import org.apache.poi.ss.usermodel.Row;
//
//
///**
// * @author qianxinyao
// * @email tomqianmaple@gmail.com
// * @github https://github.com/bluemapleman
// * @date 2016年10月16日
// */
//
//public class DataProcessor
//{
//
// private static Logger logger = Logger.getLogger(DataProcessor.class);
//
// private static HashMap<String,HashMap<String,Integer>> userLikes;
//
// /**
// * 读取数据表,获得用户的喜好数据
// */
// public static void readDataTable(){
//
// }
//
// /**
// * 读取excel文件,获得用户的喜好数据
// *
// * @param file
// */
//
// public static void readExcelData(File file)
// {
// userLikes=new HashMap<String,HashMap<String,Integer>>();
//
// HSSFWorkbook wb=null;
//
// try
// {
// wb = new HSSFWorkbook(new FileInputStream(file));
//
// HSSFSheet sheet = wb.getSheetAt(0);
//
// // Iterate over each row in the sheet
//
// Iterator<Row> rows = sheet.rowIterator();
//
// while (rows.hasNext())
// {
// HashMap<String,Integer> likes=new HashMap<String,Integer>();
//
// HSSFRow row = (HSSFRow) rows.next();
//
// // Iterate over each cell in the row and print out the cell"s
//
// // content
//
// Iterator<Cell> cells = row.cellIterator();
//
//
// while (cells.hasNext())
// {
// Cell cell=cells.next();
//
// String cellValue=getCellValue((HSSFCell) cell);
//
//
// logger.info(cellValue);
//
// }
// }
// }
// catch (FileNotFoundException fe)
// {
// logger.error("Exception:" + fe.toString());
// }
// catch (Exception e)
// {
// logger.error("Exception:" + e.toString());
// }
// finally
// {
// if(null!=wb){
// try
// {
// wb.close();
// }
// catch (IOException e)
// {
// // TODO Auto-generated catch block
// logger.error("XSSFWorkbook close failed!");
// }
// }
// logger.info("程序出现异常!请检查!");
// }
// }
//
//
// /**
// * 返回各种类型单元格值的字符串形式的方法
// * @param cell
// * @return
// */
// private static String getCellValue(HSSFCell cell) {
// String cellValue = "";
// DecimalFormat df = new DecimalFormat("#");
// switch (cell.getCellType()) {
// case HSSFCell.CELL_TYPE_STRING:
// cellValue = cell.getRichStringCellValue().getString().trim();
// break;
// case HSSFCell.CELL_TYPE_NUMERIC:
// cellValue = df.format(cell.getNumericCellValue()).toString();
// break;
// case HSSFCell.CELL_TYPE_BOOLEAN:
// cellValue = String.valueOf(cell.getBooleanCellValue()).trim();
// break;
// case HSSFCell.CELL_TYPE_FORMULA:
// cellValue = cell.getCellFormula();
// break;
// default:
// cellValue = "";
// }
// return cellValue;
// }
//
//
//
//
//
// /**
// * 分隔用户所喜好新闻数据的方法,获得用户喜好的所有新闻信息。
// * @param likes
// * @param seperator 新闻数据分隔符
// * @return
// */
//// private static String[] seperateLikes(String likes,String seperator){
//// return likes.split(seperator);
//// }
//
//}
/**
*
*/
package top.qianxinyao.dbconnection;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Properties;
import javax.sql.DataSource;
import org.apache.log4j.Logger;
import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel;
import org.apache.mahout.cf.taste.impl.model.jdbc.PostgreSQLBooleanPrefJDBCDataModel;
import org.postgresql.jdbc3.Jdbc3SimpleDataSource;
/**
* @author qianxinyao
* @email tomqianmaple@gmail.com
* @github https://github.com/bluemapleman
* @date 2016年10月21日
*/
public class ConnectionFactory
{
public static final Logger logger = Logger.getLogger(ConnectionFactory.class);
//偏好表表名
public static final String PREF_TABLE="newslogs";
//用户id列名
public static final String PREF_TABLE_USERID="nlonguserid";
//新闻id列名
public static final String PREF_TABLE_NEWSID="nlnewsid";
//偏好值列名
public static final String PREF_TABLE_PREFVALUE="nprefer";
//用户浏览时间列名
public static final String PREF_TABLE_TIME="nltime";
public static final String MYSQL="com.mysql.jdbc.Driver";
public static final String POSTGRE="org.postgresql.Driver";
public static Connection conn;
public static Statement stmt;
// private static void initalize(String database)
// {
// try
// {
// Class.forName(database).newInstance();
// HashMap<String, String> info = getDBInfo();
// conn = (Connection) DriverManager.getConnection(info.get("url"), info.get("user"), info.get("password"));
// }
// catch (ClassNotFoundException e)
// {
// logger.error("找不到驱动程序类 ,加载驱动失败!");
// }
// catch (SQLException se)
// {
// logger.error("数据库连接失败!");
// }
// catch (Exception e)
// {
// logger.error("数据库连接初始化错误!");
// }
// return;
// }
public static Connection getConnection()
{
if (null == conn)
{
try
{
conn=getDataSource().getConnection();
}
catch (SQLException e)
{
e.printStackTrace();
}
}
return conn;
}
public static Statement getStatement(){
if(null==stmt){
try
{
stmt=getConnection().createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE,
ResultSet.CONCUR_UPDATABLE);
}
catch (SQLException e)
{
e.printStackTrace();
}
}
return stmt;
}
public static Statement getNewStatement(){
Statement watchedStmt=null;
try
{
watchedStmt=getConnection().createStatement();
new StatementWatcher(watchedStmt).start();
}
catch (SQLException e)
{
e.printStackTrace();
}
return watchedStmt;
}
public static DataSource getDataSource()
{
Jdbc3SimpleDataSource dataSource=new Jdbc3SimpleDataSource();
HashMap<String,String> info=getDBInfo();
dataSource.setUrl(info.get("url"));
dataSource.setUser(info.get("user"));
dataSource.setPassword(info.get("password"));
return dataSource;
}
public static PostgreSQLBooleanPrefJDBCDataModel getPostgreSQLBooleanPrefJDBCDataModel(){
return new PostgreSQLBooleanPrefJDBCDataModel(ConnectionFactory.getDataSource(), PREF_TABLE, PREF_TABLE_USERID,
PREF_TABLE_NEWSID,PREF_TABLE_TIME);
}
public static MySQLJDBCDataModel getMySQLJDBCDataModel(){
return new MySQLJDBCDataModel(ConnectionFactory.getDataSource(), "user_likes", "uid",
"nid", "likes", "recording_time");
}
private static HashMap<String, String> getDBInfo()
{
HashMap<String, String> info = null;
try
{
Properties p = new Properties();
p.load(new FileInputStream(System.getProperty("user.dir") + "/res/dbconfig.properties"));
info = new HashMap<String, String>();
info.put("url", p.getProperty("url"));
info.put("user", p.getProperty("user"));
info.put("password", p.getProperty("password"));
}
catch (FileNotFoundException e)
{
logger.error("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在");
}
catch (IOException e)
{
logger.error("装载文件--->失败!");
}
return info;
}
}
Manifest-Version: 1.0
Built-By: qianxinyao
Build-Jdk: 1.8.0_77
Created-By: Maven Integration for Eclipse
#Generated by Maven Integration for Eclipse
#Mon Dec 11 21:12:02 CST 2017
version=0.0.1-SNAPSHOT
groupId=top.tomqian
m2e.projectName=Recommender System
m2e.projectLocation=/Users/hanbo/Desktop/\u8FB9\u57CE/work_space/Recommender System
artifactId=recommender-system
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>top.tomqian</groupId>
<artifactId>recommender-system</artifactId>
<version>0.0.1-SNAPSHOT</version>
<build>
<sourceDirectory>src</sourceDirectory>
<resources>
<resource>
<directory>src</directory>
<excludes>
<exclude>**/*.java</exclude>
</excludes>
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.jfinal/jfinal -->
<dependency>
<groupId>com.jfinal</groupId>
<artifactId>jfinal</artifactId>
<version>3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.ansj/ansj_seg -->
<dependency>
<groupId>org.ansj</groupId>
<artifactId>ansj_seg</artifactId>
<version>5.0.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.mchange/c3p0 -->
<dependency>
<groupId>com.mchange</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.5.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.5.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/log4j/log4j -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.40</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.quartz-scheduler/quartz -->
<dependency>
<groupId>org.quartz-scheduler</groupId>
<artifactId>quartz</artifactId>
<version>2.2.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.postgresql/postgresql -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.mahout/mahout-core -->
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-core</artifactId>
<version>0.9</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.mahout/mahout-integration -->
<dependency>
<groupId>org.apache.mahout</groupId>
<artifactId>mahout-integration</artifactId>
<version>0.12.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>c3p0</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.1.1</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册