diff --git a/.classpath b/.classpath new file mode 100644 index 0000000000000000000000000000000000000000..7144ddba94e3a5786ca6eb438e706451093402b6 --- /dev/null +++ b/.classpath @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.project b/.project new file mode 100644 index 0000000000000000000000000000000000000000..5b5a27bc2c12803f5590fc39ccb2bc7fe06c0fc8 --- /dev/null +++ b/.project @@ -0,0 +1,17 @@ + + + RecommandSystem + + + + + + org.eclipse.jdt.core.javabuilder + + + + + + org.eclipse.jdt.core.javanature + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000000000000000000000000000000000000..6a1838d62b0cb137d3f7aa471dcb93fcdeb7e685 --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding//src/log4j.properties=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000000000000000000000000000000000000..3a21537071bf4118b9e1ee864cb4bc258aa48211 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,11 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/RecommandSystem.iml b/RecommandSystem.iml new file mode 100644 index 0000000000000000000000000000000000000000..1f89d77c07281d10d9718c8ecabbabb20c2b9146 --- /dev/null +++ b/RecommandSystem.iml @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/bin/.DS_Store b/bin/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4de0ded2fb8099a07518498d5d313400c3820f3e Binary files /dev/null and b/bin/.DS_Store differ diff --git a/bin/log4j.properties b/bin/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..505cb5d0bb1eea20455cc64ed134591b39e15838 --- /dev/null +++ b/bin/log4j.properties @@ -0,0 +1,24 @@ +###设置### +log4j.rootLogger = stdout,D,E + +### 输出信息到控制台### +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target = System.out +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n + +###输出DEBUG级别日志到/Users/hanbo/Desktop/debug.log ### +log4j.appender.D = org.apache.log4j.DailyRollingFileAppender +log4j.appender.D.File = /Users/hanbo/Desktop/debug.log +log4j.appender.D.Append = true +log4j.appender.D.Threshold = DEBUG +log4j.appender.D.layout = org.apache.log4j.PatternLayout +log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n + +###输出ERROR级别日志到/Users/hanbo/Desktop/error.log### +log4j.appender.E = org.apache.log4j.DailyRollingFileAppender +log4j.appender.E.File =/Users/hanbo/Desktop/error.log +log4j.appender.E.Append = true +log4j.appender.E.Threshold = ERROR +log4j.appender.E.layout = org.apache.log4j.PatternLayout +log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n \ No newline at end of file diff --git a/bin/top/.DS_Store b/bin/top/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5432b94bfaa006e420c8271aa2c68cbc5bee1a84 Binary files /dev/null and b/bin/top/.DS_Store differ diff --git a/bin/top/qianxinyao/.DS_Store b/bin/top/qianxinyao/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..54f170ec9ab16313960cb6d226e4a66673f4d88d Binary files /dev/null and b/bin/top/qianxinyao/.DS_Store differ diff --git a/bin/top/qianxinyao/Main/Main.class b/bin/top/qianxinyao/Main/Main.class new file mode 100644 index 0000000000000000000000000000000000000000..43a0ad88fd1c1473bb504448f0cb092c14a0adb5 Binary files /dev/null and b/bin/top/qianxinyao/Main/Main.class differ diff --git a/bin/top/qianxinyao/UserBasedCollaborativeRecommender/MahoutUserBasedCollaborativeRecommender.class b/bin/top/qianxinyao/UserBasedCollaborativeRecommender/MahoutUserBasedCollaborativeRecommender.class new file mode 100644 index 0000000000000000000000000000000000000000..efe137c7fa542978f39e11f343cfd9dd9eff6e1d Binary files /dev/null and b/bin/top/qianxinyao/UserBasedCollaborativeRecommender/MahoutUserBasedCollaborativeRecommender.class differ diff --git a/bin/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFCronTriggerRunner.class b/bin/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFCronTriggerRunner.class new file mode 100644 index 0000000000000000000000000000000000000000..22e7f49672343cefa6e6250cff07d8aa0dcb3e99 Binary files /dev/null and b/bin/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFCronTriggerRunner.class differ diff --git a/bin/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFJob.class b/bin/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFJob.class new file mode 100644 index 0000000000000000000000000000000000000000..77bdc8eecd4fa283f284428cbbef2967661c9d9b Binary files /dev/null and b/bin/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFJob.class differ diff --git a/bin/top/qianxinyao/algorithms/.DS_Store b/bin/top/qianxinyao/algorithms/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/.DS_Store differ diff --git a/bin/top/qianxinyao/algorithms/JsonKit$1.class b/bin/top/qianxinyao/algorithms/JsonKit$1.class new file mode 100644 index 0000000000000000000000000000000000000000..0c72a2bb92a0ae27df638363002a1a9e99bd07b4 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/JsonKit$1.class differ diff --git a/bin/top/qianxinyao/algorithms/JsonKit$2.class b/bin/top/qianxinyao/algorithms/JsonKit$2.class new file mode 100644 index 0000000000000000000000000000000000000000..218bc3cbe852f2ce34173f17248f44279e6c8e23 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/JsonKit$2.class differ diff --git a/bin/top/qianxinyao/algorithms/JsonKit$3.class b/bin/top/qianxinyao/algorithms/JsonKit$3.class new file mode 100644 index 0000000000000000000000000000000000000000..ad08cb1c036b421b62f8d46b74e35d067c6fdcf6 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/JsonKit$3.class differ diff --git a/bin/top/qianxinyao/algorithms/JsonKit.class b/bin/top/qianxinyao/algorithms/JsonKit.class new file mode 100644 index 0000000000000000000000000000000000000000..f69612823a661e71ba4e36f8a5f0b31ce8f95047 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/JsonKit.class differ diff --git a/bin/top/qianxinyao/algorithms/PropGetKit.class b/bin/top/qianxinyao/algorithms/PropGetKit.class new file mode 100644 index 0000000000000000000000000000000000000000..d1ca3458d8f748668f16b4496e11e3c296b21c56 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/PropGetKit.class differ diff --git a/bin/top/qianxinyao/algorithms/RecommendAlgorithm.class b/bin/top/qianxinyao/algorithms/RecommendAlgorithm.class new file mode 100644 index 0000000000000000000000000000000000000000..2680cef64204c7488f67bcbde4ac5cdd11ee8677 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/RecommendAlgorithm.class differ diff --git a/bin/top/qianxinyao/algorithms/RecommendKit.class b/bin/top/qianxinyao/algorithms/RecommendKit.class new file mode 100644 index 0000000000000000000000000000000000000000..9c4f5da6529c39c6df30193ee26fe4f05bc9f2d8 Binary files /dev/null and b/bin/top/qianxinyao/algorithms/RecommendKit.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/ContentBasedRecommender.class b/bin/top/qianxinyao/contentbasedrecommend/ContentBasedRecommender.class new file mode 100644 index 0000000000000000000000000000000000000000..1d1677a4e9ae6446a52c467589267158b86981c9 Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/ContentBasedRecommender.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/CustomizedComparator.class b/bin/top/qianxinyao/contentbasedrecommend/CustomizedComparator.class new file mode 100644 index 0000000000000000000000000000000000000000..662b201dc791eeb5a9fc4a780ff7cad29cc451bb Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/CustomizedComparator.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/CustomizedHashMap.class b/bin/top/qianxinyao/contentbasedrecommend/CustomizedHashMap.class new file mode 100644 index 0000000000000000000000000000000000000000..9a30b2fbb0b8541e49410a1ea2ee6ca31f1cf8ca Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/CustomizedHashMap.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/MapValueComparator.class b/bin/top/qianxinyao/contentbasedrecommend/MapValueComparator.class new file mode 100644 index 0000000000000000000000000000000000000000..654ee6033c01d1d96cb30a63b7a531fe2a1e39f1 Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/MapValueComparator.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/TFIDF.class b/bin/top/qianxinyao/contentbasedrecommend/TFIDF.class new file mode 100644 index 0000000000000000000000000000000000000000..5a17fd5fc35f6a4fa72d0d1629a7f7bc9fe90627 Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/TFIDF.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/UserPrefRefresher.class b/bin/top/qianxinyao/contentbasedrecommend/UserPrefRefresher.class new file mode 100644 index 0000000000000000000000000000000000000000..9402cf4ebb0eb06cb5e24a401851457d5965181c Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/UserPrefRefresher.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/quartz/CBCronTriggerRunner.class b/bin/top/qianxinyao/contentbasedrecommend/quartz/CBCronTriggerRunner.class new file mode 100644 index 0000000000000000000000000000000000000000..04372d9a9b67af3150921d0502143042ad103068 Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/quartz/CBCronTriggerRunner.class differ diff --git a/bin/top/qianxinyao/contentbasedrecommend/quartz/CBJob.class b/bin/top/qianxinyao/contentbasedrecommend/quartz/CBJob.class new file mode 100644 index 0000000000000000000000000000000000000000..20648ae4a7afd7a50c3692d1028208d7a79be3ce Binary files /dev/null and b/bin/top/qianxinyao/contentbasedrecommend/quartz/CBJob.class differ diff --git a/bin/top/qianxinyao/dbconnection/ConnectionFactory.class b/bin/top/qianxinyao/dbconnection/ConnectionFactory.class new file mode 100644 index 0000000000000000000000000000000000000000..2c10c4246fd6fcd5af2a93dd304c0ef94c2df5cf Binary files /dev/null and b/bin/top/qianxinyao/dbconnection/ConnectionFactory.class differ diff --git a/bin/top/qianxinyao/dbconnection/StatementWatcher.class b/bin/top/qianxinyao/dbconnection/StatementWatcher.class new file mode 100644 index 0000000000000000000000000000000000000000..122b69a6b67832de1fd21a8c8a1a2a912fe9da40 Binary files /dev/null and b/bin/top/qianxinyao/dbconnection/StatementWatcher.class differ diff --git a/bin/top/qianxinyao/hotrecommend/HotRecommender.class b/bin/top/qianxinyao/hotrecommend/HotRecommender.class new file mode 100644 index 0000000000000000000000000000000000000000..b94bc6bf59dcb9f6c8aea96994157d90af338943 Binary files /dev/null and b/bin/top/qianxinyao/hotrecommend/HotRecommender.class differ diff --git a/bin/top/qianxinyao/hotrecommend/quartz/HRCronTriggerRunner.class b/bin/top/qianxinyao/hotrecommend/quartz/HRCronTriggerRunner.class new file mode 100644 index 0000000000000000000000000000000000000000..f57443fa1e7791437b601a3012f27b740c5612ca Binary files /dev/null and b/bin/top/qianxinyao/hotrecommend/quartz/HRCronTriggerRunner.class differ diff --git a/bin/top/qianxinyao/hotrecommend/quartz/HRJob.class b/bin/top/qianxinyao/hotrecommend/quartz/HRJob.class new file mode 100644 index 0000000000000000000000000000000000000000..7c20160017cb40043e6b09cb1ea36250a66205e0 Binary files /dev/null and b/bin/top/qianxinyao/hotrecommend/quartz/HRJob.class differ diff --git a/bin/top/qianxinyao/performance/Judge.class b/bin/top/qianxinyao/performance/Judge.class new file mode 100644 index 0000000000000000000000000000000000000000..cf078395b9db17ca28e624c8b57f815dfabc265f Binary files /dev/null and b/bin/top/qianxinyao/performance/Judge.class differ diff --git a/res/dbconfig.properties b/res/dbconfig.properties new file mode 100644 index 0000000000000000000000000000000000000000..f5d27a17f7061155432d384c697db251bf4e1dc1 --- /dev/null +++ b/res/dbconfig.properties @@ -0,0 +1,16 @@ +#url=jdbc:mysql://121.42.36.199/recommand_system +#user=root +#password=123456 +#dbname=recommand_system +#ip=121.42.36.199 + + +url = jdbc:postgresql://202.114.234.171:25432/mcipdb +user = postgres +password = n749fj +dbname=mcipdb + +#url = jdbc:postgresql://115.28.212.146:5432/mcipdb +#user = postgres +#password = n749fj +#dbname=mcipdb \ No newline at end of file diff --git a/res/paraConfig.properties b/res/paraConfig.properties new file mode 100644 index 0000000000000000000000000000000000000000..28a46719fdc2cbd82ede09fd19699e33092f8af8 --- /dev/null +++ b/res/paraConfig.properties @@ -0,0 +1,29 @@ +#---Recommend System Parameter--- +#Recommend Executing Moment(Quartz's CronExpression) +startAt=0 0 0 ? * * + + +#---Specific Algorithm Parameter--- + +#-RecommendKit- +#Valid specific day the news published after which are still valuable +beforeDays=-30 +#Valid specific day after which those who has browsed news can be regarded "active" +activeDay=-30 + +#-Collaborative Filtering Recommendation- +#Recommend Num from CF(Collaborative Filtering) +CFRecNum=5 +#Valid specific day after which users' browsing history would be calculated +CFValidDay=-30 + +#-Content-Based Recommendation- +#Recommend Num from CB(Content-Based Recommend) +CBRecNum=5 +#Set a previous day after which the browsing history will be calculated with, namely, is still in time +previousDays=-30 +#TF-IDF's extracting numbers of keywords from every news +TFIDFKeywordsNum=10 + +#-Hot Recommendation- + diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4de0ded2fb8099a07518498d5d313400c3820f3e Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/log4j.properties b/src/log4j.properties new file mode 100644 index 0000000000000000000000000000000000000000..505cb5d0bb1eea20455cc64ed134591b39e15838 --- /dev/null +++ b/src/log4j.properties @@ -0,0 +1,24 @@ +###设置### +log4j.rootLogger = stdout,D,E + +### 输出信息到控制台### +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target = System.out +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n + +###输出DEBUG级别日志到/Users/hanbo/Desktop/debug.log ### +log4j.appender.D = org.apache.log4j.DailyRollingFileAppender +log4j.appender.D.File = /Users/hanbo/Desktop/debug.log +log4j.appender.D.Append = true +log4j.appender.D.Threshold = DEBUG +log4j.appender.D.layout = org.apache.log4j.PatternLayout +log4j.appender.D.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n + +###输出ERROR级别日志到/Users/hanbo/Desktop/error.log### +log4j.appender.E = org.apache.log4j.DailyRollingFileAppender +log4j.appender.E.File =/Users/hanbo/Desktop/error.log +log4j.appender.E.Append = true +log4j.appender.E.Threshold = ERROR +log4j.appender.E.layout = org.apache.log4j.PatternLayout +log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n \ No newline at end of file diff --git a/src/top/.DS_Store b/src/top/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5432b94bfaa006e420c8271aa2c68cbc5bee1a84 Binary files /dev/null and b/src/top/.DS_Store differ diff --git a/src/top/qianxinyao/.DS_Store b/src/top/qianxinyao/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..54f170ec9ab16313960cb6d226e4a66673f4d88d Binary files /dev/null and b/src/top/qianxinyao/.DS_Store differ diff --git a/src/top/qianxinyao/Main/DataProcessor.java b/src/top/qianxinyao/Main/DataProcessor.java new file mode 100644 index 0000000000000000000000000000000000000000..4a3b0262ff594faf0d9e52e047d98772aad79663 --- /dev/null +++ b/src/top/qianxinyao/Main/DataProcessor.java @@ -0,0 +1,158 @@ +///** +// * +// */ +//package top.qianxinyao.Main; +// +//import java.io.File; +//import java.io.FileInputStream; +//import java.io.FileNotFoundException; +//import java.io.IOException; +//import java.text.DecimalFormat; +//import java.util.HashMap; +//import java.util.Iterator; +// +//import org.apache.log4j.Logger; +//import org.apache.poi.hssf.usermodel.HSSFCell; +//import org.apache.poi.hssf.usermodel.HSSFRow; +//import org.apache.poi.hssf.usermodel.HSSFSheet; +//import org.apache.poi.hssf.usermodel.HSSFWorkbook; +//import org.apache.poi.ss.usermodel.Cell; +//import org.apache.poi.ss.usermodel.Row; +// +// +///** +// * @author qianxinyao +// * @email tomqianmaple@gmail.com +// * @github https://github.com/bluemapleman +// * @date 2016年10月16日 +// */ +// +//public class DataProcessor +//{ +// +// private static Logger logger = Logger.getLogger(DataProcessor.class); +// +// private static HashMap> userLikes; +// +// /** +// * 读取数据表,获得用户的喜好数据 +// */ +// public static void readDataTable(){ +// +// } +// +// /** +// * 读取excel文件,获得用户的喜好数据 +// * +// * @param file +// */ +// +// public static void readExcelData(File file) +// { +// userLikes=new HashMap>(); +// +// HSSFWorkbook wb=null; +// +// try +// { +// wb = new HSSFWorkbook(new FileInputStream(file)); +// +// HSSFSheet sheet = wb.getSheetAt(0); +// +// // Iterate over each row in the sheet +// +// Iterator rows = sheet.rowIterator(); +// +// while (rows.hasNext()) +// { +// HashMap likes=new HashMap(); +// +// HSSFRow row = (HSSFRow) rows.next(); +// +// // Iterate over each cell in the row and print out the cell"s +// +// // content +// +// Iterator cells = row.cellIterator(); +// +// +// while (cells.hasNext()) +// { +// Cell cell=cells.next(); +// +// String cellValue=getCellValue((HSSFCell) cell); +// +// +// logger.info(cellValue); +// +// } +// } +// } +// catch (FileNotFoundException fe) +// { +// logger.error("Exception:" + fe.toString()); +// } +// catch (Exception e) +// { +// logger.error("Exception:" + e.toString()); +// } +// finally +// { +// if(null!=wb){ +// try +// { +// wb.close(); +// } +// catch (IOException e) +// { +// // TODO Auto-generated catch block +// logger.error("XSSFWorkbook close failed!"); +// } +// } +// logger.info("程序出现异常!请检查!"); +// } +// } +// +// +// /** +// * 返回各种类型单元格值的字符串形式的方法 +// * @param cell +// * @return +// */ +// private static String getCellValue(HSSFCell cell) { +// String cellValue = ""; +// DecimalFormat df = new DecimalFormat("#"); +// switch (cell.getCellType()) { +// case HSSFCell.CELL_TYPE_STRING: +// cellValue = cell.getRichStringCellValue().getString().trim(); +// break; +// case HSSFCell.CELL_TYPE_NUMERIC: +// cellValue = df.format(cell.getNumericCellValue()).toString(); +// break; +// case HSSFCell.CELL_TYPE_BOOLEAN: +// cellValue = String.valueOf(cell.getBooleanCellValue()).trim(); +// break; +// case HSSFCell.CELL_TYPE_FORMULA: +// cellValue = cell.getCellFormula(); +// break; +// default: +// cellValue = ""; +// } +// return cellValue; +// } +// +// +// +// +// +// /** +// * 分隔用户所喜好新闻数据的方法,获得用户喜好的所有新闻信息。 +// * @param likes +// * @param seperator 新闻数据分隔符 +// * @return +// */ +//// private static String[] seperateLikes(String likes,String seperator){ +//// return likes.split(seperator); +//// } +// +//} diff --git a/src/top/qianxinyao/Main/Main.java b/src/top/qianxinyao/Main/Main.java new file mode 100644 index 0000000000000000000000000000000000000000..53d8af1b689d556fae31ebf6adeffea4cc9b276f --- /dev/null +++ b/src/top/qianxinyao/Main/Main.java @@ -0,0 +1,60 @@ +/** + * + */ +package top.qianxinyao.Main; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.List; + +import top.qianxinyao.UserBasedCollaborativeRecommender.MahoutUserBasedCollaborativeRecommender; +import top.qianxinyao.algorithms.PropGetKit; +import top.qianxinyao.algorithms.RecommendKit; +import top.qianxinyao.contentbasedrecommend.ContentBasedRecommender; +import top.qianxinyao.hotrecommend.HotRecommender; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年10月20日 + */ +public class Main +{ + static ResultSet rs=null; + + /** + * @param args + * @throws SQLException + */ + public static void main(String[] args) throws SQLException + { + //加载系统配置文件 + PropGetKit.loadProperties("paraConfig"); + //仅给最近一个月有活动的用户进行推荐动作 + List users=RecommendKit.getActiveUsers(); + //设定推荐任务每天的执行时间 +// String cronExpression=PropGetKit.getString("startAt"); +// try +// { +// new CFCronTriggerRunner().task(users,cronExpression); +// new CBCronTriggerRunner().task(users,cronExpression); +// new HRCronTriggerRunner().task(users,cronExpression); +// } +// catch (SchedulerException e) +// { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + HotRecommender.formTodayTopHotNewsList(); +// new MahoutUserBasedCollaborativeRecommender().recommend(users); +// new ContentBasedRecommender().recommend(users); + new HotRecommender().recommend(users); + + + + + + } +} + diff --git a/src/top/qianxinyao/UserBasedCollaborativeRecommender/MahoutUserBasedCollaborativeRecommender.java b/src/top/qianxinyao/UserBasedCollaborativeRecommender/MahoutUserBasedCollaborativeRecommender.java new file mode 100644 index 0000000000000000000000000000000000000000..abc01971c21fbc1bac65cde70d590e138fafe09b --- /dev/null +++ b/src/top/qianxinyao/UserBasedCollaborativeRecommender/MahoutUserBasedCollaborativeRecommender.java @@ -0,0 +1,150 @@ +/** + * + */ +package top.qianxinyao.UserBasedCollaborativeRecommender; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.jdbc.PostgreSQLBooleanPrefJDBCDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Recommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import top.qianxinyao.algorithms.PropGetKit; +import top.qianxinyao.algorithms.RecommendAlgorithm; +import top.qianxinyao.algorithms.RecommendKit; +import top.qianxinyao.dbconnection.ConnectionFactory; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年10月18日 协同过滤 + */ + +/** + * Collaborative-Based Filter 基于用户的协同过滤 + * + */ +public class MahoutUserBasedCollaborativeRecommender implements RecommendAlgorithm +{ + public static final Logger logger = Logger.getLogger(MahoutUserBasedCollaborativeRecommender.class); + + /** + * 对应计算相似度时的时效天数 + */ + private static int inRecDays = PropGetKit.getInt("CFValidDay"); + + /** + * 给每个用户推荐的新闻的条数 + */ + public static int N =PropGetKit.getInt("CFRecNum"); + + /** + * 给特定的一批用户进行新闻推荐 + * + * @param 目标用户的id列表 + */ + @SuppressWarnings("unused") + @Override + public void recommend(List users) + { + int count=0; + try + { + System.out.println("CF start at "+new Date()); + + PostgreSQLBooleanPrefJDBCDataModel dataModel = ConnectionFactory.getPostgreSQLBooleanPrefJDBCDataModel(); + + Statement stmt = ConnectionFactory.getNewStatement(); + + ResultSet rs1 = stmt.executeQuery("select " + ConnectionFactory.PREF_TABLE_USERID + "," + + ConnectionFactory.PREF_TABLE_NEWSID + "," + ConnectionFactory.PREF_TABLE_TIME + " from newslogs"); + + // 移除过期的用户浏览新闻行为,这些行为对计算用户相似度不再具有较大价值 + while (rs1.next()) + { + if (rs1.getTimestamp(3).before(RecommendKit.getInRecTimestamp(inRecDays))) + { + dataModel.removePreference(Long.parseLong(rs1.getString(1)), Long.parseLong(rs1.getString(2))); + } + } + + UserSimilarity similarity = new LogLikelihoodSimilarity(dataModel); + + // NearestNeighborhood的数量有待考察 + UserNeighborhood neighborhood = new NearestNUserNeighborhood(5, similarity, dataModel); + + Recommender recommender = new GenericUserBasedRecommender(dataModel, neighborhood, similarity); + + for (String user : users) + { + long start = System.currentTimeMillis(); + + Long userid = Long.parseLong(user); + + List recItems = recommender.recommend(userid, N); + + Set hs = new HashSet(); + + for (RecommendedItem recItem : recItems) + { + hs.add(String.valueOf(recItem.getItemID())); + } + + // 过滤掉已推荐新闻和已过期新闻 + RecommendKit.filterOutDateNews(hs, String.valueOf(userid)); + RecommendKit.filterReccedNews(hs, String.valueOf(userid)); + + // 无可推荐新闻 + if (hs == null) + { + continue; + } + + if(hs.size()>N){ + RecommendKit.removeOverNews(hs, N); + } + + RecommendKit.insertRecommend(String.valueOf(userid), hs.iterator(),RecommendAlgorithm.CF); + + count+=hs.size(); + } + } + catch (TasteException e) + { + logger.error("CB算法构造偏好对象失败!"); + e.printStackTrace(); + } + catch (SQLException e) + { + logger.error("CB算法数据库操作失败!"); + e.printStackTrace(); + } + System.out.println("CF has contributed " + (count/users.size()) + " recommending news on average"); + System.out.println("CF finish at "+new Date()); + return; + } + + public int getRecNums() + { + return N; + } + + public void setRecNums(int recNums) + { + N = recNums; + } +} diff --git a/src/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFCronTriggerRunner.java b/src/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFCronTriggerRunner.java new file mode 100644 index 0000000000000000000000000000000000000000..a490a90301222cc0b9a4ad3bf7c59ed380872d11 --- /dev/null +++ b/src/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFCronTriggerRunner.java @@ -0,0 +1,56 @@ +/** + * + */ +package top.qianxinyao.UserBasedCollaborativeRecommender.quartz; + +import java.util.List; + +import org.quartz.CronExpression; +import org.quartz.JobKey; +import org.quartz.Scheduler; +import org.quartz.SchedulerException; +import org.quartz.SchedulerFactory; +import org.quartz.impl.JobDetailImpl; +import org.quartz.impl.StdSchedulerFactory; +import org.quartz.impl.triggers.CronTriggerImpl; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + */ +public class CFCronTriggerRunner +{ + public void task(List users,String cronExpression) throws SchedulerException + { + // Initiate a Schedule Factory + SchedulerFactory schedulerFactory = new StdSchedulerFactory(); + // Retrieve a scheduler from schedule factory + Scheduler scheduler = schedulerFactory.getScheduler(); + + // Initiate JobDetail with job name, job group, and executable job class + JobDetailImpl jobDetailImpl = + new JobDetailImpl(); + jobDetailImpl.setJobClass(CFJob.class); + jobDetailImpl.setKey(new JobKey("CFJob1")); + jobDetailImpl.getJobDataMap().put("users", users); + // Initiate CronTrigger with its name and group name + CronTriggerImpl cronTriggerImpl = new CronTriggerImpl(); + cronTriggerImpl.setName("CFCronTrigger1"); + try { + // setup CronExpression + CronExpression cexp = new CronExpression(cronExpression); + // Assign the CronExpression to CronTrigger + cronTriggerImpl.setCronExpression(cexp); + } catch (Exception e) { + e.printStackTrace(); + } + // schedule a job with JobDetail and Trigger + scheduler.scheduleJob(jobDetailImpl, cronTriggerImpl); + + // start the scheduler + scheduler.start(); + } +} + diff --git a/src/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFJob.java b/src/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFJob.java new file mode 100644 index 0000000000000000000000000000000000000000..6f3dfaf49e127ce5ad06ebc84559041107a4561c --- /dev/null +++ b/src/top/qianxinyao/UserBasedCollaborativeRecommender/quartz/CFJob.java @@ -0,0 +1,32 @@ +/** + * + */ +package top.qianxinyao.UserBasedCollaborativeRecommender.quartz; + +import java.util.List; + +import org.quartz.Job; +import org.quartz.JobExecutionContext; +import org.quartz.JobExecutionException; + +import top.qianxinyao.UserBasedCollaborativeRecommender.MahoutUserBasedCollaborativeRecommender; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + * 每天定时根据用户当日的新闻浏览记录来更新用户的喜好关键词列表 + */ +public class CFJob implements Job +{ + @SuppressWarnings("unchecked") + @Override + public void execute(JobExecutionContext arg0) throws JobExecutionException + { + List users=(List) arg0.getJobDetail().getJobDataMap().get("users"); + new MahoutUserBasedCollaborativeRecommender().recommend(users); + } + +} + diff --git a/src/top/qianxinyao/algorithms/.DS_Store b/src/top/qianxinyao/algorithms/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/src/top/qianxinyao/algorithms/.DS_Store differ diff --git a/src/top/qianxinyao/algorithms/JsonKit.java b/src/top/qianxinyao/algorithms/JsonKit.java new file mode 100644 index 0000000000000000000000000000000000000000..423c53416e5ec100ca542c20d60245e7cfaca69e --- /dev/null +++ b/src/top/qianxinyao/algorithms/JsonKit.java @@ -0,0 +1,147 @@ +/** + * + */ +package top.qianxinyao.algorithms; + +import java.io.IOException; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.map.JsonMappingException; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.type.TypeReference; + +import top.qianxinyao.contentbasedrecommend.CustomizedHashMap; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月21日 + */ +public class JsonKit +{ + public static String test(){ + String json=null; + try { + Map moduleidMap = new HashMap(); + Map keywordRateMap = new HashMap(); + keywordRateMap.put("政治", 123.1); + keywordRateMap.put("金融", 35.2); + moduleidMap.put(1,keywordRateMap); + keywordRateMap.put("电影", 351.1); + moduleidMap.put(2,keywordRateMap); + ObjectMapper objectMapper=new ObjectMapper(); + json=objectMapper.writeValueAsString(moduleidMap); + + String test="{\"1\":{},\"2\":{},\"3\":{},\"4\":{}}"; + return test; + } catch (IOException e) { + + e.printStackTrace(); + + } + return json; + } + + /** + * 获取用户所关注的模板的id的set + * @param srcJson + * @return + */ + public static Set getUserModuleIdSet(String srcJson){ + + //java的擦除机制不允许直接获取泛型类的class,但是这样会使得jackson的readValue自动将键转换为String,于是需要使用jackson提供的TypeReference来解决这个问题 + Map map=null; + try + { + ObjectMapper objectMapper=new ObjectMapper(); + map = objectMapper.readValue(srcJson, new TypeReference>(){}); + } + catch (JsonParseException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (JsonMappingException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return map.keySet(); + } + + /** + * 获得用户对应module的喜好关键词列表的map + * @param srcJson + * @param moduleId + * @return + */ + @SuppressWarnings("unchecked") + public static LinkedHashMap getModulePrefMap(String srcJson,int moduleId){ + + LinkedHashMap keyWordsRateMap=null; + try + { + ObjectMapper objectMapper=new ObjectMapper(); + //java的擦除机制不允许直接获取泛型类的class,但是这样会使得jackson的readValue自动将键转换为String,于是需要使用jackson提供的TypeReference来解决这个问题 + Map map=objectMapper.readValue(srcJson, new TypeReference>(){}); + keyWordsRateMap=(LinkedHashMap) map.get(moduleId); + } + catch (JsonParseException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (JsonMappingException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return keyWordsRateMap; + } + + /** + * 将用户的喜好关键词列表字符串转换为map + * @param srcJson + * @return + */ + public static CustomizedHashMap> jsonPrefListtoMap(String srcJson){ + ObjectMapper objectMapper=new ObjectMapper(); + CustomizedHashMap> map=null; + try + { + map=objectMapper.readValue(srcJson, new TypeReference>>(){}); + } + catch (JsonParseException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (JsonMappingException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (IOException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return map; + } + +} diff --git a/src/top/qianxinyao/algorithms/PropGetKit.java b/src/top/qianxinyao/algorithms/PropGetKit.java new file mode 100644 index 0000000000000000000000000000000000000000..051d92e92043bcc3134b8b5a05422e932938cdea --- /dev/null +++ b/src/top/qianxinyao/algorithms/PropGetKit.java @@ -0,0 +1,51 @@ +/** + * + */ +package top.qianxinyao.algorithms; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Properties; + +import org.apache.log4j.Logger; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月30日 用以读取配置文件,获取对应属性 + */ +public class PropGetKit +{ + private static final Logger logger = Logger.getLogger(PropGetKit.class); + + public static Properties propGetKit = new Properties();; + + public static void loadProperties(String configFileName) + { + try + { + propGetKit.load(new FileInputStream(System.getProperty("user.dir") + "/res/" + configFileName + ".properties")); + } + catch (FileNotFoundException e) + { + logger.error("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在"); + } + catch (IOException e) + { + logger.error("装载文件--->失败!"); + } + } + + public static String getString(String key) + { + return propGetKit.getProperty(key); + } + + public static int getInt(String key) + { + return Integer.valueOf(propGetKit.getProperty(key)); + } + +} diff --git a/src/top/qianxinyao/algorithms/RecommendAlgorithm.java b/src/top/qianxinyao/algorithms/RecommendAlgorithm.java new file mode 100644 index 0000000000000000000000000000000000000000..800bf7bc2ce9cf3b372923079b52676a64e6c77c --- /dev/null +++ b/src/top/qianxinyao/algorithms/RecommendAlgorithm.java @@ -0,0 +1,37 @@ +/** + * + */ +package top.qianxinyao.algorithms; + +import java.util.List; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年10月20日 + */ +public interface RecommendAlgorithm +{ + /** + * 推荐算法的int表示 + */ + //协同过滤 + public static final int CF=0; + //基于内容的推荐 + public static final int CB=1; + //基于热点新闻的推荐 + public static final int HR=2; + /** + * 针对所有用户返回推荐结果 + */ + public default void recommend(){ + recommend(RecommendKit.getUserList()); + } + + /** + * 针对特定用户返回推荐结果 + */ + public void recommend(List users); +} + diff --git a/src/top/qianxinyao/algorithms/RecommendKit.java b/src/top/qianxinyao/algorithms/RecommendKit.java new file mode 100644 index 0000000000000000000000000000000000000000..7cebfec95edfde701c94c0e872c98abd7fa889fa --- /dev/null +++ b/src/top/qianxinyao/algorithms/RecommendKit.java @@ -0,0 +1,338 @@ +/** + * + */ +package top.qianxinyao.algorithms; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import top.qianxinyao.contentbasedrecommend.CustomizedHashMap; +import top.qianxinyao.dbconnection.ConnectionFactory; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月21日 提供推荐算法通用的一些方法 + */ +public class RecommendKit +{ + /** + * 推荐新闻的时效性天数,即从推荐当天开始到之前beforeDays天的新闻属于仍具有时效性的新闻,予以推荐。 + */ + private static int beforeDays = PropGetKit.getInt("beforeDays"); + + /** + * @return the inRecDate 返回时效时间的"year-month-day"的格式表示,方便数据库的查询 + */ + public static String getInRecDate() + { + return getSpecificDayFormat(beforeDays); + } + + /** + * @return the inRecDate 返回时效时间的"year-month-day"的格式表示,方便数据库的查询 + */ + public static String getInRecDate(int beforeDays) + { + return getSpecificDayFormat(beforeDays); + } + + /** + * @return the inRecDate 返回时效时间timestamp形式表示,方便其他推荐方法在比较时间先后时调用 + */ + public static Timestamp getInRecTimestamp(int before_Days) + { + Calendar calendar = Calendar.getInstance(); // 得到日历 + calendar.add(Calendar.DAY_OF_MONTH, before_Days); // 设置为前beforeNum天 + return new Timestamp(calendar.getTime().getTime()); + } + + /** + * 过滤方法filterOutDateNews() 过滤掉失去时效性的新闻(由beforeDays属性控制) + */ + public static void filterOutDateNews(Collection col, String userId) + { + try + { + String newsids = getInQueryString(col.iterator()); + if (!newsids.equals("()")) + { + ResultSet rs = ConnectionFactory.getStatement() + .executeQuery("select newsid,ntime from news where newsid in " + newsids); + while (rs.next()) + { + if (rs.getTimestamp(2).before(getInRecTimestamp(beforeDays))) + { + col.remove(rs.getString(1)); + } + } + } + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + + /** + * 过滤方法filterBrowsedNews() 过滤掉已经用户已经看过的新闻 + */ + public static void filterBrowsedNews(Collection col, String userId) + { + try + { + Statement stmt = ConnectionFactory.getNewStatement(); + ResultSet rs; + rs = stmt.executeQuery("select nlnewsid from newslogs where nluserid='" + userId + "'"); + while (rs.next()) + { + if (col.contains(rs.getString(1))) + { + col.remove(rs.getString(1)); + } + } + } + catch (SQLException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + /** + * 过滤方法filterReccedNews() 过滤掉已经推荐过的新闻(在recommend表中查找) + */ + public static void filterReccedNews(Collection col, String userId) + { + try + { + Statement stmt = ConnectionFactory.getNewStatement(); + ResultSet rs; + rs = stmt.executeQuery("select rnewsid from recommend where ruserid='" + userId + "' and rrectime>"+getInRecDate()); + while (rs.next()) + { + if (col.contains(rs.getString(1))) + { + col.remove(rs.getString(1)); + } + } + } + catch (SQLException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + /** + * 获取所有用户的Id列表 + * + * @return + */ + public static ArrayList getUserList() + { + ArrayList users = new ArrayList(); + try + { + ResultSet rs = ConnectionFactory.getNewStatement().executeQuery("select userid from users"); + while (rs.next()) + { + users.add(rs.getString(1)); + } + } + catch (SQLException e) + { + e.printStackTrace(); + } + return users; + } + + public static int getbeforeDays() + { + return beforeDays; + } + + public static void setbeforeDays(int beforeDays) + { + RecommendKit.beforeDays = beforeDays; + } + + public static String getSpecificDayFormat(int before_Days) + { + SimpleDateFormat date_format = new SimpleDateFormat("yyyy-MM-dd"); + Calendar calendar = Calendar.getInstance(); // 得到日历 + calendar.add(Calendar.DAY_OF_MONTH, before_Days); // 设置为前beforeNum天 + Date d = calendar.getTime(); + return "'" + date_format.format(d) + "'"; + } + + /** + * 获取所有用户的喜好关键词列表 + * + * @return + */ + public static HashMap>> getUserPrefListMap( + Collection userSet) + { + ResultSet rs = null; + HashMap>> userPrefListMap = null; + try + { + String userPrefListQuery = getInQueryStringWithSingleQuote(userSet.iterator()); + if (!userPrefListQuery.equals("()")) + { + rs = ConnectionFactory.getNewStatement() + .executeQuery("select userid,upreflist from users where userid in " + userPrefListQuery); + userPrefListMap = new HashMap>>(); + while (rs.next()) + { + userPrefListMap.put(rs.getString(1), JsonKit.jsonPrefListtoMap(rs.getString(2))); + } + } + } + catch (SQLException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return userPrefListMap; + } + + /** + * 用以select语句中使用in (n1,n2,n3...)范围查询的字符串拼接 + * + * @param ite + * 待查询对象集合的迭代器 + * @return 若迭代集合不为空:"(n1,n2,n3)",若为空:"()" + */ + public static String getInQueryString(Iterator ite) + { + String inQuery = "("; + while (ite.hasNext()) + { + inQuery += ite.next() + ","; + } + if (inQuery.length() > 1) + { + inQuery = inQuery.substring(0, inQuery.length() - 1); + } + inQuery += ")"; + return inQuery; + } + + public static String getInQueryStringWithSingleQuote(Iterator ite) + { + String inQuery = "("; + while (ite.hasNext()) + { + inQuery += "'" + ite.next() + "',"; + } + if (inQuery.length() > 1) + { + inQuery = inQuery.substring(0, inQuery.length() - 1); + } + inQuery += ")"; + return inQuery; + } + + /** + * 将推荐结果插入recommend表 + * + * @param userId + * 推荐目标用户id + * @param newsIte + * 待推荐新闻集合的迭代器 + * @param recAlgo + * 标明推荐结果来自哪个推荐算法(RecommendAlgorithm.XX) + */ + public static void insertRecommend(String userId, Iterator newsIte, int recAlgo) + { + try + { + String insertValues = ""; + while (newsIte.hasNext()) + { + insertValues += "(" + userId + "," + newsIte.next() + ",'" + new Timestamp(System.currentTimeMillis()) + + "'," + recAlgo + "),"; + } + if (insertValues.length() > 0) + { + insertValues = insertValues.substring(0, insertValues.length() - 1); + ConnectionFactory.getNewStatement() + .execute("insert into recommend (ruserid,rnewsid,rrectime,rrecalgo) values " + insertValues); + } + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + + /** + * Acquire list of "active" users' ids + * "Active" means who use app recently(determined by method getInRecDate()) + * + * @return + */ + public static List getActiveUsers() + { + try + { + int activeDay=PropGetKit.getInt("activeDay"); + ResultSet rs1; + rs1 = ConnectionFactory.getNewStatement() + .executeQuery("select distinct nluserid from newslogs where nltime>" + getInRecDate(activeDay)); + List users = new ArrayList(); + while (rs1.next()) + { + users.add(rs1.getString(1)); + } + return users; + } + catch (SQLException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + System.out.println("return null"); + return null; + } + + /** + * 去除数量上超过为算法设置的推荐结果上限值的推荐结果 + * + * @param set + * @param N + * @return + */ + public static void removeOverNews(Set set, int N) + { + int i = 0; + Iterator ite = set.iterator(); + while (ite.hasNext()) + { + if (i >= N) + { + ite.remove(); + ite.next(); + } + else + { + ite.next(); + } + i++; + } + } +} diff --git a/src/top/qianxinyao/contentbasedrecommend/ContentBasedRecommender.java b/src/top/qianxinyao/contentbasedrecommend/ContentBasedRecommender.java new file mode 100644 index 0000000000000000000000000000000000000000..161fb28a4d0b180ec8e4c35cd40852c4ed386e08 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/ContentBasedRecommender.java @@ -0,0 +1,176 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend; + +import java.sql.ResultSet; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.ansj.app.keyword.Keyword; +import org.apache.log4j.Logger; + +import top.qianxinyao.algorithms.PropGetKit; +import top.qianxinyao.algorithms.RecommendAlgorithm; +import top.qianxinyao.algorithms.RecommendKit; +import top.qianxinyao.dbconnection.ConnectionFactory; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年10月20日 基于内容的推荐 Content-Based + * + * 思路:提取抓取进来的新闻的关键词列表(tf-idf),与每个用户的喜好关键词列表,做关键词相似度计算,取最相似的N个新闻推荐给用户。 + * + * Procedure: 1、Every time that the recommendation is started up(according + * to quartz framework), the current day's coming in news will be + * processed by class TF-IDF's getTFIDF() method to obtain their key words + * list.And then the system go over every user and calculate the + * similarity between every news's key words list with user's preference + * list.After that, rank the news according to the similarities and + * recommend them to users. + */ +public class ContentBasedRecommender implements RecommendAlgorithm +{ + public static final Logger logger = Logger.getLogger(ContentBasedRecommender.class); + + // TFIDF提取关键词数 + private static final int KEY_WORDS_NUM = PropGetKit.getInt("TFIDFKeywordsNum"); + + // 推荐新闻数 + private static final int N = PropGetKit.getInt("CBRecNum"); + + @Override + public void recommend(List users) + { + try + { + int count=0; + System.out.println("CB start at "+ new Date()); + // 首先进行用户喜好关键词列表的衰减更新+用户当日历史浏览记录的更新 + new UserPrefRefresher().refresh(users); + // 新闻及对应关键词列表的Map + HashMap> newsKeyWordsMap = new HashMap>(); + HashMap newsModuleMap = new HashMap(); + // 用户喜好关键词列表 + HashMap>> userPrefListMap = RecommendKit + .getUserPrefListMap(users); + ResultSet rs = ConnectionFactory.getStatement() + .executeQuery("select newsid,ntitle,ncontent,nmoduleid from news where ntime>" + + RecommendKit.getInRecDate() + " and ncontent not like ' tempMatchMap = new HashMap(); + Iterator ite = newsKeyWordsMap.keySet().iterator(); + while (ite.hasNext()) + { + String newsId = ite.next(); + int moduleId = newsModuleMap.get(newsId); + if (null != userPrefListMap.get(userId).get(moduleId)) + tempMatchMap.put(newsId, + getMatchValue(userPrefListMap.get(userId).get(moduleId), newsKeyWordsMap.get(newsId))); + else + continue; + } + // 去除匹配值为0的项目 + removeZeroItem(tempMatchMap); + if (!(tempMatchMap.toString().equals("{}"))) + { + tempMatchMap = sortMapByValue(tempMatchMap); + Set toBeRecommended=tempMatchMap.keySet(); + //过滤掉已经推荐过的新闻 + RecommendKit.filterReccedNews(toBeRecommended,userId); + //过滤掉用户已经看过的新闻 + RecommendKit.filterBrowsedNews(toBeRecommended, userId); + if(toBeRecommended.size()>N){ + RecommendKit.removeOverNews(toBeRecommended,N); + } + RecommendKit.insertRecommend(userId, toBeRecommended.iterator(),RecommendAlgorithm.CB); + count+=toBeRecommended.size(); + } + } + System.out.println("CB has contributed " + (count/users.size()) + " recommending news on average"); + System.out.println("CB finished at "+new Date()); + } + catch (Exception e) + { + e.printStackTrace(); + } + return; + } + + /** + * 获得用户的关键词列表和新闻关键词列表的匹配程度 + * + * @return + */ + private double getMatchValue(CustomizedHashMap map, List list) + { + Set keywordsSet = map.keySet(); + double matchValue = 0; + for (Keyword keyword : list) + { + if (keywordsSet.contains(keyword.getName())) + { + matchValue += keyword.getScore() * map.get(keyword.getName()); + } + } + return matchValue; + } + + private void removeZeroItem(Map map) + { + HashSet toBeDeleteItemSet = new HashSet(); + Iterator ite = map.keySet().iterator(); + while (ite.hasNext()) + { + String newsId = ite.next(); + if (map.get(newsId) <= 0) + { + toBeDeleteItemSet.add(newsId); + } + } + for (String item : toBeDeleteItemSet) + { + map.remove(item); + } + } + + /** + * 使用 Map按value进行排序 + * @param map + * @return + */ + public static Map sortMapByValue(Map oriMap) { + if (oriMap == null || oriMap.isEmpty()) { + return null; + } + Map sortedMap = new LinkedHashMap(); + List> entryList = new ArrayList>( + oriMap.entrySet()); + Collections.sort(entryList, new MapValueComparator()); + + Iterator> iter = entryList.iterator(); + Map.Entry tmpEntry = null; + while (iter.hasNext()) { + tmpEntry = iter.next(); + sortedMap.put(tmpEntry.getKey(), tmpEntry.getValue()); + } + return sortedMap; + } +} diff --git a/src/top/qianxinyao/contentbasedrecommend/CustomizedComparator.java b/src/top/qianxinyao/contentbasedrecommend/CustomizedComparator.java new file mode 100644 index 0000000000000000000000000000000000000000..e3e25d4a061bfcb86cc817e2cd3ea5874a7d3376 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/CustomizedComparator.java @@ -0,0 +1,35 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend; + +import java.util.Comparator; +import java.util.Map; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月30日 + */ +public class CustomizedComparator implements Comparator +{ + Map base; + public CustomizedComparator(Map base) { + this.base = base; + } + + + @Override + public int compare(String a, String b) + { + if (base.get(a) >= base.get(b)) { + return 1; + } + else { + return -1; + } + } + +} + diff --git a/src/top/qianxinyao/contentbasedrecommend/CustomizedHashMap.java b/src/top/qianxinyao/contentbasedrecommend/CustomizedHashMap.java new file mode 100644 index 0000000000000000000000000000000000000000..7adff875729477e90b3ec8b07aadf0ca5b38a4a4 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/CustomizedHashMap.java @@ -0,0 +1,48 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + */ +public class CustomizedHashMap extends HashMap +{ + private static final long serialVersionUID = 1L; + + @Override + public String toString(){ + String toString="{"; + Iterator keyIte=this.keySet().iterator(); + while(keyIte.hasNext()){ + K key=keyIte.next(); + toString+="\""+key+"\":"+this.get(key)+","; + } + if(toString.equals("{")){ + toString="{}"; + } + else{ + toString=toString.substring(0, toString.length()-1)+"}"; + } + return toString; + + } + + public CustomizedHashMap copyFromLinkedHashMap(LinkedHashMap linkedHashMap){ +// Iterator ite = linkedHashMap.keySet().iterator(); +// while(ite.hasNext()){ +// K key=ite.next(); +// this.put(key,linkedHashMap.get(key)); +// } + this.putAll(linkedHashMap); + return this; + } +} + diff --git a/src/top/qianxinyao/contentbasedrecommend/MapValueComparator.java b/src/top/qianxinyao/contentbasedrecommend/MapValueComparator.java new file mode 100644 index 0000000000000000000000000000000000000000..507888f5af64920f26a9562584671ebbe3979f92 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/MapValueComparator.java @@ -0,0 +1,23 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend; + +import java.util.Comparator; +import java.util.Map; +import java.util.Map.Entry; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年12月2日 + */ +class MapValueComparator implements Comparator> { + + @Override + public int compare(Entry me1, Entry me2) { + + return me1.getValue().compareTo(me2.getValue()); + } +} \ No newline at end of file diff --git a/src/top/qianxinyao/contentbasedrecommend/TFIDF.java b/src/top/qianxinyao/contentbasedrecommend/TFIDF.java new file mode 100644 index 0000000000000000000000000000000000000000..0c4623dfe58cb0950976a568619145d9347aa276 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/TFIDF.java @@ -0,0 +1,52 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend; + +import java.util.List; + +import org.ansj.app.keyword.KeyWordComputer; +import org.ansj.app.keyword.Keyword; +import org.ansj.domain.Result; +import org.ansj.splitWord.analysis.ToAnalysis; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年10月23日 + */ +public class TFIDF +{ + public static Result split(String text) + { + return ToAnalysis.parse(text); + } + + /** + * + * @param title 文本标题 + * @param content 文本内容 + * @param keyNums 返回的关键词数目 + * @return + */ + public static List getTFIDE(String title, String content,int keyNums) + { + // String + // sentence="我今天很开心,所以一口气买了好多东西。然而我一不小心把本月预算透支了,现在有很不开心了,因为后面的日子得吃土了!"; + KeyWordComputer kwc = new KeyWordComputer(keyNums); + return kwc.computeArticleTfidf(title, content); + } + + /** + * + * @param content 文本内容 + * @param keyNums 返回的关键词数目 + * @return + */ + public static List getTFIDE(String content,int keyNums) + { + KeyWordComputer kwc = new KeyWordComputer(keyNums); + return kwc.computeArticleTfidf(content); + } +} diff --git a/src/top/qianxinyao/contentbasedrecommend/UserPrefRefresher.java b/src/top/qianxinyao/contentbasedrecommend/UserPrefRefresher.java new file mode 100644 index 0000000000000000000000000000000000000000..dcb16e2a17fb35b7dc31976a0b77b2a31cd3c547 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/UserPrefRefresher.java @@ -0,0 +1,228 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; + +import org.ansj.app.keyword.Keyword; + +import top.qianxinyao.algorithms.JsonKit; +import top.qianxinyao.algorithms.RecommendKit; +import top.qianxinyao.dbconnection.ConnectionFactory; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月3日 每次用户浏览新的新闻时,用以更新用户的喜好关键词列表 + */ +public class UserPrefRefresher +{ + ResultSet rs=null; + + //设置TFIDF提取的关键词数目 + private static final int KEY_WORDS_NUM = 10; + + //每日衰减系数 + private static final double DEC_COEE=0.7; + + public void refresh(){ + refresh(RecommendKit.getUserList()); + } + + @SuppressWarnings("unchecked") + public void refresh(Collection userIdsCol){ + //首先对用户的喜好关键词列表进行衰减更新 + autoDecRefresh(userIdsCol); + //用户浏览新闻纪录:userBrowsexMap:(newsid List)> + HashMap> userBrowsedMap=getBrowsedHistoryMap(); + + //用户喜好关键词列表:userPrefListMap: + HashMap>> userPrefListMap=RecommendKit.getUserPrefListMap(userBrowsedMap.keySet()); + //新闻对应关键词列表与模块ID:newsTFIDFMap:>, + HashMap newsTFIDFMap=getNewsTFIDFMap(); + + //开始遍历用户浏览记录,更新用户喜好关键词列表 + //对每个用户(外层循环),循环他所看过的每条新闻(内层循环),对每个新闻,更新它的关键词列表到用户的对应模块中 + Iterator ite=userBrowsedMap.keySet().iterator(); + + while(ite.hasNext()){ + String userId=ite.next(); + ArrayList newsList=userBrowsedMap.get(userId); + for(String news:newsList){ + Integer moduleId=(Integer) newsTFIDFMap.get(news+"moduleid"); + //获得对应模块的(关键词:喜好)map + CustomizedHashMap rateMap=userPrefListMap.get(userId).get(moduleId); + //获得新闻的(关键词:TFIDF值)map + List keywordList=(List) newsTFIDFMap.get(news); + Iterator keywordIte=keywordList.iterator(); + while(keywordIte.hasNext()){ + Keyword keyword=keywordIte.next(); + String name=keyword.getName(); + if(rateMap.containsKey(name)){ + rateMap.put(name, rateMap.get(name)+keyword.getScore()); + } + else{ + rateMap.put(name,keyword.getScore()); + } + } + userPrefListMap.get(userId); + } + } + Iterator iterator=userBrowsedMap.keySet().iterator(); + while(iterator.hasNext()){ + String userId=iterator.next(); + try + { + ConnectionFactory.getStatement().executeUpdate("update users set upreflist='"+userPrefListMap.get(userId)+"' where userid='"+userId+"'"); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + + } + + /** + * 所有用户的喜好关键词列表TFIDF值随时间进行自动衰减更新 + */ + public void autoDecRefresh(){ + autoDecRefresh(RecommendKit.getUserList()); + } + + /** + * 所有用户的喜好关键词列表TFIDF值随时间进行自动衰减更新 + */ + public void autoDecRefresh(Collection userIdsCol){ + try + { + String inQuery=RecommendKit.getInQueryStringWithSingleQuote(userIdsCol.iterator()); + if(inQuery.equals("()")){ + return; + } + ResultSet rs=ConnectionFactory.getStatement().executeQuery("select userid,upreflist from users where userid in "+inQuery); + //用以更新的用户喜好关键词map的json串 + //用于删除喜好值过低的关键词 + ArrayList keywordToDelete=new ArrayList(); + while(rs.next()){ + String newPrefList="{"; + HashMap> map=JsonKit.jsonPrefListtoMap(rs.getString(2)); + Iterator ite=map.keySet().iterator(); + while(ite.hasNext()){ + //用户对应模块的喜好不为空 + Integer moduleId=ite.next(); + CustomizedHashMap moduleMap=map.get(moduleId); + newPrefList+="\""+moduleId+"\":"; + //N:{"X1":n1,"X2":n2,.....} + if(!(moduleMap.toString().equals("{}"))){ + Iterator inIte=moduleMap.keySet().iterator(); + while(inIte.hasNext()){ + String key=inIte.next(); + //累计TFIDF值乘以衰减系数 + double result=moduleMap.get(key)*DEC_COEE; + if(result<10){ + keywordToDelete.add(key); + } + moduleMap.put(key,result); + } + } + for(String deleteKey:keywordToDelete){ + moduleMap.remove(deleteKey); + } + keywordToDelete.clear(); + newPrefList+=moduleMap.toString()+","; + } + newPrefList="'"+newPrefList.substring(0,newPrefList.length()-1)+"}'"; + ConnectionFactory.getNewStatement().executeUpdate("update users set upreflist="+newPrefList+" where userid='"+rs.getString(1)+"'"); + } + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + + /** + * 提取出当天所有用户浏览新闻纪录 + * @return + */ + private HashMap> getBrowsedHistoryMap(){ + HashMap> userBrowsedMap=null; + try + { + userBrowsedMap=new HashMap>(); + ResultSet rs = ConnectionFactory.getStatement().executeQuery("select * from newslogs where nltime>"+RecommendKit.getSpecificDayFormat(0)); + while(rs.next()){ + if(userBrowsedMap.containsKey(rs.getString(2))){ + userBrowsedMap.get(rs.getString(2)).add(rs.getString(3)); + } + else{ + userBrowsedMap.put(rs.getString(2), new ArrayList()); + userBrowsedMap.get(rs.getString(2)).add(rs.getString(3)); + } + } + } + catch (SQLException e) + { + e.printStackTrace(); + } + return userBrowsedMap; + } + + private HashSet getBrowsedNewsSet(){ + HashMap> browsedMap=getBrowsedHistoryMap(); + HashSet newsIdSet=new HashSet(); + Iterator ite=getBrowsedHistoryMap().keySet().iterator(); + while(ite.hasNext()){ + Iterator inIte=browsedMap.get(ite.next()).iterator(); + while(inIte.hasNext()){ + newsIdSet.add(inIte.next()); + } + } + return newsIdSet; + } + + /** + * 将所有当天被浏览过的新闻提取出来,以便进行TFIDF求值操作,以及对用户喜好关键词列表的更新。 + * @return + */ + private HashMap getNewsTFIDFMap(){ + HashMap newsTFIDFMap=null; + try + { + Iterator ite=getBrowsedNewsSet().iterator(); + String newsIdListQuery="("; + while(ite.hasNext()){ + newsIdListQuery+=ite.next()+","; + } + //用户如果当天没看新闻 + if(newsIdListQuery.length()>1){ + newsIdListQuery=newsIdListQuery.substring(0, newsIdListQuery.length()-1)+")"; + //提取出所有新闻的关键词列表及对应TF-IDf值,并放入一个map中 + rs=ConnectionFactory.getStatement().executeQuery("select newsid,ntitle,ncontent,nmoduleid from news where newsid in "+newsIdListQuery); + + newsTFIDFMap=new HashMap(); + while(rs.next()){ + newsTFIDFMap.put(rs.getString(1), TFIDF.getTFIDE(rs.getString(2), rs.getString(3),KEY_WORDS_NUM)); + newsTFIDFMap.put(rs.getString(1)+"moduleid", rs.getInt(4)); + } + } + else + return null; + } + catch (SQLException e) + { + e.printStackTrace(); + } + return newsTFIDFMap; + } +} diff --git a/src/top/qianxinyao/contentbasedrecommend/quartz/CBCronTriggerRunner.java b/src/top/qianxinyao/contentbasedrecommend/quartz/CBCronTriggerRunner.java new file mode 100644 index 0000000000000000000000000000000000000000..2a05fdaf851c934cfd52d6f098227c48d98d95b9 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/quartz/CBCronTriggerRunner.java @@ -0,0 +1,57 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend.quartz; + +import java.util.List; + +import org.quartz.CronExpression; +import org.quartz.JobKey; +import org.quartz.Scheduler; +import org.quartz.SchedulerException; +import org.quartz.SchedulerFactory; +import org.quartz.impl.JobDetailImpl; +import org.quartz.impl.StdSchedulerFactory; +import org.quartz.impl.triggers.CronTriggerImpl; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + */ +public class CBCronTriggerRunner +{ + public void task(List users,String cronExpression) throws SchedulerException + { + // Initiate a Schedule Factory + SchedulerFactory schedulerFactory = new StdSchedulerFactory(); + // Retrieve a scheduler from schedule factory + Scheduler scheduler = schedulerFactory.getScheduler(); + + // Initiate JobDetail with job name, job group, and executable job class + JobDetailImpl jobDetailImpl = + new JobDetailImpl(); + jobDetailImpl.setJobClass(CBJob.class); + jobDetailImpl.setKey(new JobKey("CBJob1")); + jobDetailImpl.getJobDataMap().put("users", users); + // Initiate CronTrigger with its name and group name + CronTriggerImpl cronTriggerImpl = new CronTriggerImpl(); + cronTriggerImpl.setName("CBCronTrigger1"); + + try { + // setup CronExpression + CronExpression cexp = new CronExpression(cronExpression); + // Assign the CronExpression to CronTrigger + cronTriggerImpl.setCronExpression(cexp); + } catch (Exception e) { + e.printStackTrace(); + } + // schedule a job with JobDetail and Trigger + scheduler.scheduleJob(jobDetailImpl, cronTriggerImpl); + + // start the scheduler + scheduler.start(); + } +} + diff --git a/src/top/qianxinyao/contentbasedrecommend/quartz/CBJob.java b/src/top/qianxinyao/contentbasedrecommend/quartz/CBJob.java new file mode 100644 index 0000000000000000000000000000000000000000..78bd4f15bd713fc4637c3166078a027a78f491b3 --- /dev/null +++ b/src/top/qianxinyao/contentbasedrecommend/quartz/CBJob.java @@ -0,0 +1,32 @@ +/** + * + */ +package top.qianxinyao.contentbasedrecommend.quartz; + +import java.util.List; + +import org.quartz.Job; +import org.quartz.JobExecutionContext; +import org.quartz.JobExecutionException; + +import top.qianxinyao.contentbasedrecommend.ContentBasedRecommender; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + * 每天定时根据用户当日的新闻浏览记录来更新用户的喜好关键词列表 + */ +public class CBJob implements Job +{ + @SuppressWarnings("unchecked") + @Override + public void execute(JobExecutionContext arg0) throws JobExecutionException + { + List users=(List) arg0.getJobDetail().getJobDataMap().get("users"); + new ContentBasedRecommender().recommend(users); + } + +} + diff --git a/src/top/qianxinyao/dbconnection/ConnectionFactory.java b/src/top/qianxinyao/dbconnection/ConnectionFactory.java new file mode 100644 index 0000000000000000000000000000000000000000..46a537cfa1ecc52b341c37e5b7ec3a7089ac0f27 --- /dev/null +++ b/src/top/qianxinyao/dbconnection/ConnectionFactory.java @@ -0,0 +1,163 @@ +/** + * + */ +package top.qianxinyao.dbconnection; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.HashMap; +import java.util.Properties; + +import javax.sql.DataSource; + +import org.apache.log4j.Logger; +import org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel; +import org.apache.mahout.cf.taste.impl.model.jdbc.PostgreSQLBooleanPrefJDBCDataModel; +import org.postgresql.jdbc3.Jdbc3SimpleDataSource; + + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年10月21日 + */ +public class ConnectionFactory +{ + public static final Logger logger = Logger.getLogger(ConnectionFactory.class); + //偏好表表名 + public static final String PREF_TABLE="newslogs"; + //用户id列名 + public static final String PREF_TABLE_USERID="nlonguserid"; + //新闻id列名 + public static final String PREF_TABLE_NEWSID="nlnewsid"; + //偏好值列名 + public static final String PREF_TABLE_PREFVALUE="nprefer"; + //用户浏览时间列名 + public static final String PREF_TABLE_TIME="nltime"; + + public static final String MYSQL="com.mysql.jdbc.Driver"; + public static final String POSTGRE="org.postgresql.Driver"; + + public static Connection conn; + + public static Statement stmt; + +// private static void initalize(String database) +// { +// try +// { +// Class.forName(database).newInstance(); +// HashMap info = getDBInfo(); +// conn = (Connection) DriverManager.getConnection(info.get("url"), info.get("user"), info.get("password")); +// } +// catch (ClassNotFoundException e) +// { +// logger.error("找不到驱动程序类 ,加载驱动失败!"); +// } +// catch (SQLException se) +// { +// logger.error("数据库连接失败!"); +// } +// catch (Exception e) +// { +// logger.error("数据库连接初始化错误!"); +// } +// return; +// } + + public static Connection getConnection() + { + if (null == conn) + { + try + { + conn=getDataSource().getConnection(); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + return conn; + } + + public static Statement getStatement(){ + if(null==stmt){ + try + { + stmt=getConnection().createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, + ResultSet.CONCUR_UPDATABLE); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + return stmt; + } + + public static Statement getNewStatement(){ + Statement watchedStmt=null; + try + { + watchedStmt=getConnection().createStatement(); + new StatementWatcher(watchedStmt).start(); + } + catch (SQLException e) + { + e.printStackTrace(); + } + return watchedStmt; + } + + public static DataSource getDataSource() + { + Jdbc3SimpleDataSource dataSource=new Jdbc3SimpleDataSource(); + + HashMap info=getDBInfo(); + dataSource.setUrl(info.get("url")); + dataSource.setUser(info.get("user")); + dataSource.setPassword(info.get("password")); + return dataSource; + } + + public static PostgreSQLBooleanPrefJDBCDataModel getPostgreSQLBooleanPrefJDBCDataModel(){ + return new PostgreSQLBooleanPrefJDBCDataModel(ConnectionFactory.getDataSource(), PREF_TABLE, PREF_TABLE_USERID, + PREF_TABLE_NEWSID,PREF_TABLE_TIME); + } + + public static MySQLJDBCDataModel getMySQLJDBCDataModel(){ + return new MySQLJDBCDataModel(ConnectionFactory.getDataSource(), "user_likes", "uid", + "nid", "likes", "recording_time"); + } + + private static HashMap getDBInfo() + { + HashMap info = null; + try + { + Properties p = new Properties(); + p.load(new FileInputStream(System.getProperty("user.dir") + "/res/dbconfig.properties")); + info = new HashMap(); + info.put("url", p.getProperty("url")); + info.put("user", p.getProperty("user")); + info.put("password", p.getProperty("password")); + } + catch (FileNotFoundException e) + { + logger.error("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在"); + } + catch (IOException e) + { + logger.error("装载文件--->失败!"); + } + return info; + } + +} diff --git a/src/top/qianxinyao/dbconnection/StatementWatcher.java b/src/top/qianxinyao/dbconnection/StatementWatcher.java new file mode 100644 index 0000000000000000000000000000000000000000..25f7698b3d6599d91ebac734edd817f2230d89ff --- /dev/null +++ b/src/top/qianxinyao/dbconnection/StatementWatcher.java @@ -0,0 +1,51 @@ +/** + * + */ +package top.qianxinyao.dbconnection; + +import java.sql.SQLException; +import java.sql.Statement; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + */ +public class StatementWatcher extends Thread +{ + private Statement watchedStmt; + public StatementWatcher(Statement watchedStmt){ + this.watchedStmt=watchedStmt; + } + public void run(){ + //防止还未开始 + try + { + Thread.sleep(10000); + } + catch (InterruptedException e1) + { + // TODO Auto-generated catch block + e1.printStackTrace(); + } + while(true){ + try + { + if(this.watchedStmt.isClosed()){ + break; + } + else{ + this.watchedStmt.close(); + } + } + catch (SQLException e) + { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + } +} + diff --git a/src/top/qianxinyao/hotrecommend/HotRecommender.java b/src/top/qianxinyao/hotrecommend/HotRecommender.java new file mode 100644 index 0000000000000000000000000000000000000000..aa7fb5f977d9260a6cec3c4162ed179c0296bea3 --- /dev/null +++ b/src/top/qianxinyao/hotrecommend/HotRecommender.java @@ -0,0 +1,115 @@ +/** + * + */ +package top.qianxinyao.hotrecommend; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import top.qianxinyao.algorithms.RecommendAlgorithm; +import top.qianxinyao.algorithms.RecommendKit; +import top.qianxinyao.dbconnection.ConnectionFactory; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月30日 基于“热点新闻”生成的推荐,一般用于在CF和CB算法推荐结果数较少时进行数目的补充 + */ +public class HotRecommender implements RecommendAlgorithm +{ + // 热点新闻的有效时间 + public static int beforeDays = -10; + // 推荐系统每日为每位用户生成的推荐结果的总数,当CF与CB算法生成的推荐结果数不足此数时,由该算法补充 + public static int TOTAL_REC_NUM = 20; + // 将每天生成的“热点新闻”ID,按照新闻的热点程度从高到低放入此List + private static ArrayList topHotNewsList = new ArrayList(); + + @Override + public void recommend(List users) + { + System.out.println("HR start at "+new Date()); + int count=0; + Timestamp timestamp = getCertainTimestamp(0, 0, 0); + for (String userId : users) + { + try + { + ResultSet rs = ConnectionFactory.getNewStatement() + .executeQuery("select ruserid,count(*) as recnums from recommend where rrectime>'" + timestamp + + "' and ruserid='" + userId + "' group by ruserid"); + boolean flag=rs.next(); + int delta=flag?TOTAL_REC_NUM - rs.getInt("recnums"):TOTAL_REC_NUM; + Set toBeRecommended = new HashSet(); + if (delta > 0) + { + int i = topHotNewsList.size() > delta ? delta : topHotNewsList.size(); + while (i-- > 0) + toBeRecommended.add(topHotNewsList.get(i)); + } + RecommendKit.filterBrowsedNews(toBeRecommended, userId); + RecommendKit.filterReccedNews(toBeRecommended, userId); + RecommendKit.insertRecommend(userId, toBeRecommended.iterator(), RecommendAlgorithm.HR); + count+=toBeRecommended.size(); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + System.out.println("HR has contributed " + (count/users.size()) + " recommending news on average"); + System.out.println("HR end at "+new Date()); + + } + + public static void formTodayTopHotNewsList() + { + topHotNewsList.clear(); + ArrayList hotNewsTobeReccommended = new ArrayList(); + try + { + ResultSet rs = ConnectionFactory.getNewStatement() + .executeQuery("select nlnewsid,count(*) as visitNums from newslogs where nltime>" + + RecommendKit.getInRecDate(beforeDays) + " group by nlnewsid order by visitNums desc"); + while (rs.next()) + { + hotNewsTobeReccommended.add(rs.getString(1)); + } + for (String news : hotNewsTobeReccommended) + { + topHotNewsList.add(news); + } + System.out.println(topHotNewsList); + } + catch (SQLException e) + { + e.printStackTrace(); + } + } + + public static List getTopHotNewsList() + { + return topHotNewsList; + } + + public static int getTopHopNewsListSize() + { + return topHotNewsList.size(); + } + + private Timestamp getCertainTimestamp(int hour, int minute, int second) + { + Calendar calendar = Calendar.getInstance(); // 得到日历 + calendar.set(Calendar.HOUR_OF_DAY, hour); // 设置为前beforeNum天 + calendar.set(Calendar.MINUTE, minute); + calendar.set(Calendar.SECOND, second); + return new Timestamp(calendar.getTime().getTime()); + } +} diff --git a/src/top/qianxinyao/hotrecommend/quartz/HRCronTriggerRunner.java b/src/top/qianxinyao/hotrecommend/quartz/HRCronTriggerRunner.java new file mode 100644 index 0000000000000000000000000000000000000000..40c4ccd99f6b2eecd085c37fa9c7f711d8c0ba3e --- /dev/null +++ b/src/top/qianxinyao/hotrecommend/quartz/HRCronTriggerRunner.java @@ -0,0 +1,57 @@ +/** + * + */ +package top.qianxinyao.hotrecommend.quartz; + +import java.util.List; + +import org.quartz.CronExpression; +import org.quartz.JobKey; +import org.quartz.Scheduler; +import org.quartz.SchedulerException; +import org.quartz.SchedulerFactory; +import org.quartz.impl.JobDetailImpl; +import org.quartz.impl.StdSchedulerFactory; +import org.quartz.impl.triggers.CronTriggerImpl; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + */ +public class HRCronTriggerRunner +{ + public void task(List users,String cronExpression) throws SchedulerException + { + // Initiate a Schedule Factory + SchedulerFactory schedulerFactory = new StdSchedulerFactory(); + // Retrieve a scheduler from schedule factory + Scheduler scheduler = schedulerFactory.getScheduler(); + + // Initiate JobDetail with job name, job group, and executable job class + JobDetailImpl jobDetailImpl = + new JobDetailImpl(); + jobDetailImpl.setJobClass(HRJob.class); + jobDetailImpl.setKey(new JobKey("HRJob1")); + jobDetailImpl.getJobDataMap().put("users",users); + // Initiate CronTrigger with its name and group name + CronTriggerImpl cronTriggerImpl = new CronTriggerImpl(); + cronTriggerImpl.setName("HRCronTrigger1"); + + try { + // setup CronExpression + CronExpression cexp = new CronExpression(cronExpression); + // Assign the CronExpression to CronTrigger + cronTriggerImpl.setCronExpression(cexp); + } catch (Exception e) { + e.printStackTrace(); + } + // schedule a job with JobDetail and Trigger + scheduler.scheduleJob(jobDetailImpl, cronTriggerImpl); + + // start the scheduler + scheduler.start(); + } +} + diff --git a/src/top/qianxinyao/hotrecommend/quartz/HRJob.java b/src/top/qianxinyao/hotrecommend/quartz/HRJob.java new file mode 100644 index 0000000000000000000000000000000000000000..356f9a6fe7886a463da4de101a32f06d8820e213 --- /dev/null +++ b/src/top/qianxinyao/hotrecommend/quartz/HRJob.java @@ -0,0 +1,29 @@ +/** + * + */ +package top.qianxinyao.hotrecommend.quartz; + +import org.quartz.Job; +import org.quartz.JobExecutionContext; +import org.quartz.JobExecutionException; + +import top.qianxinyao.hotrecommend.HotRecommender; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年11月23日 + * 每天定时生成热点新闻的列表 + */ +public class HRJob implements Job +{ + @Override + public void execute(JobExecutionContext arg0) throws JobExecutionException + { + HotRecommender.getTopHotNewsList().clear(); + HotRecommender.formTodayTopHotNewsList(); + } + +} + diff --git a/src/top/qianxinyao/performance/Judge.java b/src/top/qianxinyao/performance/Judge.java new file mode 100644 index 0000000000000000000000000000000000000000..330faa1e25185ebbfee590adbdbfd8e14d5cfc82 --- /dev/null +++ b/src/top/qianxinyao/performance/Judge.java @@ -0,0 +1,20 @@ +/** + * + */ +package top.qianxinyao.performance; + +/** + * @author qianxinyao + * @email tomqianmaple@gmail.com + * @github https://github.com/bluemapleman + * @date 2016年12月2日 + * 用以评估推荐效果的类 + * 1、准确率(precision) 2、召回率(recall) + */ +public class Judge +{ + public double getPrecision(){ + return 0; + } +} +