提交 b573474f 编写于 作者: WSKH0929's avatar WSKH0929 💬

last commit

上级 e9d40196
文件已添加
文件已添加
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile default="true" name="Default" enabled="true" />
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="CUMCM2022" />
</profile>
</annotationProcessing>
</component>
</project>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="JavaDoc" enabled="true" level="WARNING" enabled_by_default="true">
<option name="TOP_LEVEL_CLASS_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="" />
</value>
</option>
<option name="INNER_CLASS_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="" />
</value>
</option>
<option name="METHOD_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="@return@param@throws or @exception" />
</value>
</option>
<option name="FIELD_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="" />
</value>
</option>
<option name="IGNORE_DEPRECATED" value="false" />
<option name="IGNORE_JAVADOC_PERIOD" value="true" />
<option name="IGNORE_DUPLICATED_THROWS" value="false" />
<option name="IGNORE_POINT_TO_ITSELF" value="false" />
<option name="myAdditionalJavadocTags" value="date" />
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://maven.aliyun.com/repository/public" />
</remote-repository>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.wskh</groupId>
<artifactId>CUMCM2022</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.22</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.0</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.wskh;
import lombok.Data;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
/**
* @Author:WSKH
* @ClassName:ISODATA
* @ClassType:
* @Description:
* @Date:2022/2/13/14:23
* @Email:1187560563@qq.com
* @Blog:https://blog.csdn.net/weixin_51545953?type=blog
*/
@Data
public class ISODATA {
// 待分类的原始值
private List<List<Double>> dataList;
// 初始类别个数
private int K = 4;
// 最大迭代次数
private int maxClusterTimes = 2000;
// 聚类的结果
private List<List<List<Double>>> clusterList = new ArrayList<>();
// <种群索引,点集>
Map<Integer, List<List<Double>>> map;
// 每一聚类域中最少的样本数目,若少于此数即不作为一个独立的聚类
int minPopulationPointNum;
// 两个聚类中心间的最小距离,若小于此数,两个聚类需进行合并
double centerMinDistance;
// 在一次迭代运算中可以合并的聚类中心的最多对数
int maxCombineNum;
// 一个聚类域中样本距离分布的标准差
double maxMAE;
/**
* @param dataList 数据集
* @param centerMinDistance 两个聚类中心间的最小距离,若小于此数,两个聚类需进行合并
* @param minPopulationPointNum 每一聚类域中最少的样本数目,若少于此数即不作为一个独立的聚类;
* @param maxCombineNum 在一次迭代运算中可以合并的聚类中心的最多对数
* @param K 初始类别个数
* @param maxClusterTimes 最大迭代次数
* @return
* @Description
* @Author WSKH
*/
public ISODATA(List<List<Double>> dataList, double centerMinDistance, int minPopulationPointNum, int maxCombineNum, double maxMAE, int K, int maxClusterTimes) {
this.dataList = new ArrayList<>(dataList);
this.K = Math.min(K, dataList.size());
this.maxClusterTimes = maxClusterTimes;
this.centerMinDistance = centerMinDistance;
this.minPopulationPointNum = minPopulationPointNum;
this.maxCombineNum = maxCombineNum;
this.maxMAE = maxMAE;
}
/**
* @param
* @return 聚类的结果(簇心)
* @Description 聚类主方法
* @Author WSKH
*/
public List<List<Double>> clustering() throws Exception {
long start = System.currentTimeMillis();
int t = 0;
while (t < maxClusterTimes) {
if (t == 0) {
// 初始化簇心
clusterList.add(initCenterList(K));
} else {
// 获取当前簇心
List<List<Double>> centerList = clusterList.get(t - 1);
// 计算新的簇心
map = new HashMap<>();
int[] counterArr = new int[centerList.size()];
for (List<Double> data : dataList) {
// 当前点和簇心依次比较,找到最近的簇心
double minDis = computeDistance(data, centerList.get(0));
int minIndex = 0;
for (int i = 1; i < centerList.size(); i++) {
double distance = computeDistance(data, centerList.get(i));
if (minDis > distance) {
minDis = distance;
minIndex = i;
}
}
// 将当前点加入最近的种群
if (!map.containsKey(minIndex)) {
List<List<Double>> newPointList = new ArrayList<>();
newPointList.add(data);
map.put(minIndex, newPointList);
} else {
map.get(minIndex).add(data);
}
counterArr[minIndex]++;
}
// 根据均值,计算新的簇心
List<List<Double>> newCenterList = new ArrayList<>();
for (int i = 0; i < centerList.size(); i++) {
if (map.containsKey(i)) {
// 计算簇心
List<Double> newCenter = computeCenter(map.get(i));
newCenterList.add(newCenter);
} else {
throw new RuntimeException("发生了簇缺失");
}
}
// 适应性分裂与合并簇心
newCenterList = adaptiveSplittingAndMerging(newCenterList);
// 将新的簇心,加入集合
clusterList.add(newCenterList);
}
// 如果簇心没有改变,那么就跳出循环
if (t > 0 && !isCenterChange(clusterList.get(t - 1), clusterList.get(t))) {
break;
}
t++;
}
System.out.println("用时" + (System.currentTimeMillis() - start) + "ms");
return clusterList.get(clusterList.size() - 1);
}
/**
* @param centerList 簇心集合
* @return
* @Description 适应性分裂与合并簇心
* @Author WSKH
*/
private List<List<Double>> adaptiveSplittingAndMerging(List<List<Double>> centerList) throws Exception {
int c1 = centerList.size();
List<List<Double>> newCenterList = new ArrayList<>();
// 适应性分裂
map.forEach((k,v)->{
// 中心点
List<Double> center = centerList.get(k);
// 计算SSE 然后推导出MAE
AtomicReference<Double> SSE = new AtomicReference<>(0d);
v.forEach(point->{
for (int i = 0; i < point.size(); i++) {
int finalI = i;
SSE.updateAndGet(v1 -> (v1 + Math.pow(point.get(finalI) - center.get(finalI), 2)));
}
});
if(Math.sqrt(SSE.get()/v.size())>maxMAE){
// 说明可以分裂,找到一个点距离中心点超过
for (int i = 0; i < dataList.size(); i++) {
try {
if(computeDistance(dataList.get(i),center)>centerMinDistance){
newCenterList.add(dataList.get(i));
break;
}else{
if(i==dataList.size()-1){
// 如果没找到那就报错,说明又要分裂,分裂后又要合并,参数设置不合理
throw new Exception("参数centerMinDistance和maxMAE设置不合理");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
});
// 适应性合并
newCenterList.addAll(centerList);
int c2 = newCenterList.size();
int counter = 0; // 记录合并次数
for (int i = 0; i < newCenterList.size(); i++) {
if(counter > maxCombineNum){
break;
}
for (int j = i+1; j < newCenterList.size(); j++) {
if(computeDistance(newCenterList.get(i),newCenterList.get(j))<=centerMinDistance){
// 小于最小距离,合并
List<Double> remove1 = newCenterList.get(i);
List<Double> remove2 = newCenterList.get(j);
// 新中心就是合并点的中点
List<Double> newCenter = new ArrayList<>();
for (int k = 0; k < remove1.size(); k++) {
newCenter.add((remove1.get(k)+remove2.get(k))/2.0);
}
if(i>j){
newCenterList.remove(i);
newCenterList.remove(j);
}else{
newCenterList.remove(i);
newCenterList.remove(j-1);
}
newCenterList.add(newCenter);
i = -1; // 让i从头开始遍历
counter++;
break;
}
}
}
// 返回分裂与合并后的中心点集合
System.out.println("一开始:"+c1+",分裂后:"+c2+",合并后:"+newCenterList.size());
return newCenterList;
}
/**
* @param size 初始化簇心的数量
* @return List<List < Double>>
* @Description 初始化簇心(随机抽取size项作为簇心,尽可能使得初始簇心相互距离较远)
* @Author WSKH
*/
private List<List<Double>> initCenterList(int size) throws Exception {
List<List<Double>> initCenterList = new ArrayList<>();
Set<Integer> set = new HashSet<>();
Double[][] distanceMatrix = new Double[dataList.size()][dataList.size()];
// 随机选取第一个簇心
int r = new Random().nextInt(dataList.size());
set.add(r);
// 选择出其余的聚类中心
while (set.size()<size){
double maxDistance = -1d;
int maxIndex = -1;
for (int i = 0; i < dataList.size(); i++) {
List<Double> data = dataList.get(i);
if(!set.contains(i)){
// 计算当前点,距离最近的已有簇心
double minDistance = Double.MAX_VALUE;
int minIndex = -1;
for (Integer j : set) {
if(distanceMatrix[i][j]==null){
distanceMatrix[i][j] = computeDistance(data,dataList.get(j));
}
if(minDistance>distanceMatrix[i][j]){
minDistance = distanceMatrix[i][j];
minIndex = i;
}
}
// 获取最小距离中最大的那个(最大化点与簇心的最短距离)
if(maxDistance<minDistance){
maxDistance = minDistance;
maxIndex = minIndex;
}
}
}
set.add(maxIndex);
}
// set -> list
set.forEach(i->initCenterList.add(dataList.get(i)));
return initCenterList;
}
/**
* @param p1 点1
* @param p2 点2
* @return double
* @Description 计算两点间距离(欧式)
* @Author WSKH
*/
private double computeDistance(List<Double> p1, List<Double> p2) throws Exception {
if (p1.size() != p2.size()) {
throw new Exception("两点维度不一致");
}
double distance = 0d;
for (int i = 0; i < p1.size(); i++) {
distance += Math.pow((p1.get(i) - p2.get(i)), 2);
}
return Math.sqrt(distance);
}
/**
* @param pointList 点集合
* @return double
* @Description 计算簇心坐标
* @Author WSKH
*/
private List<Double> computeCenter(List<List<Double>> pointList) {
List<Double> result = new ArrayList<>(pointList.get(0));
for (int i = 1; i < pointList.size(); i++) {
for (int j = 0; j < pointList.get(i).size(); j++) {
result.set(j, result.get(j) + pointList.get(i).get(j));
}
}
return result.stream().map(item -> {
return item / pointList.size();
}).collect(Collectors.toList());
}
/**
* @param oldCenterList 上一轮迭代的簇心列表
* @param curCenterList 当前迭代的簇心列表
* @return boolean 改变返回true,没改变返回false
* @Description 判断簇心是否改变
* @Author WSKH
*/
private boolean isCenterChange(List<List<Double>> oldCenterList, List<List<Double>> curCenterList) throws Exception {
if (oldCenterList.size() != curCenterList.size()) {
return true;
}
for (int i = 0; i < oldCenterList.size(); i++) {
for (int j = 0; j < oldCenterList.get(i).size(); j++) {
if (!oldCenterList.get(i).get(j).equals(curCenterList.get(i).get(j))) {
return true;
}
}
}
return false;
}
}
package com.wskh;
import lombok.Data;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @Author:WSKH
* @ClassName:Run
* @ClassType:
* @Description:
* @Date:2022/9/16/21:54
* @Email:1187560563@qq.com
* @Blog:https://blog.csdn.net/weixin_51545953?type=blog
*/
public class Run {
public static void main(String[] args) throws Exception {
run("高钾");
System.out.println("==================================================================");
run("铅钡");
}
public static void run(String type) throws Exception {
String path = "D:\\2 BachelorDegree\\大四上比赛或项目资料\\数模国赛\\OurGit\\cumcm2022\\05 Wskh\\Python\\CUMCM2022\\src\\data\\data.xlsx";
List<List<Double>> dataList = readData(path,type);
System.out.println(dataList);
ISODATA isodata = new ISODATA(
dataList,
5,
2,
2,
0.5,
2,
4000);
List<List<Double>> clustering = isodata.clustering();
Map<Integer, List<List<Double>>> map = isodata.getMap();
StringBuilder label = new StringBuilder();
StringBuilder data = new StringBuilder();
for (List<Double> doubleList : dataList) {
boolean b = true;
for (Integer key : map.keySet()) {
for (List<Double> list : map.get(key)) {
if(list.toString().equals(doubleList.toString())){
data.append(list.toString()).append(",");
label.append(key).append(",");
b = false;
break;
}
}
if(!b){
break;
}
}
}
System.out.println("label = "+label);
System.out.println("data = "+data);
}
public static List<List<Double>> readData(String path,String type) {
try {
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(path);
XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
List<List<Double>> dataList = new ArrayList<>();
for (int i = 1; i < sheet.getPhysicalNumberOfRows(); i++) {
if(type.equals(sheet.getRow(i).getCell(18).getStringCellValue())){
List<Double> list = new ArrayList<>();
for (int j = 1; j <= 14; j++) {
list.add(sheet.getRow(i).getCell(j).getNumericCellValue());
}
dataList.add(list);
}
}
xssfWorkbook.close();
return dataList;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/CUMCM2022.iml" filepath="$PROJECT_DIR$/.idea/CUMCM2022.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
# -*-coding:utf-8-*-
# Author: WSKH
# Blog: wskh0929.blog.csdn.net
# Time: 2022/9/15 21:50
import pandas as pd
import scipy
from sklearn.cluster import KMeans
data = pd.read_excel(
r'D:\2 BachelorDegree\大四上比赛或项目资料\数模国赛\OurGit\cumcm2022\05 Wskh\Python\CUMCM2022\src\data\data.xlsx',
sheet_name="Sheet1")
x = []
columns = data.columns
for i in range(data.shape[0]):
# pass
lst = []
for item in data.iloc[i][columns[1:15]]:
lst.append(item)
x.append(lst)
print(x)
y_pred = KMeans(n_clusters=2, random_state=520).fit_predict(x)
print(y_pred)
import numpy as np
from sklearn import tree
from sklearn.datasets import load_wine #红酒数据集
from sklearn.model_selection import train_test_split
import pandas as pd
import graphviz
import matplotlib.pyplot as plt
#DecisionTreeClassifier分类树代码
wine = load_wine() #加载红酒数据集
print(wine)
#wine.data 数据
#wine.target 分类标签
#wine.feature_names 要素名称
#wine.target_name 分类标签的名字含义
#我们可以通过pandas把红酒数据集变成表格,方便我们查看
table = pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1)
print(table)
#把wine数据分为训练集和测试集
Xtrain,Xtest,Ytrain,Ytest = train_test_split(wine.data, #数据
wine.target,#标签
test_size=0.3)#经典的三七分,测试集占三,训练集占七
#设置参数
clf = tree.DecisionTreeClassifier(criterion="entropy" #用来规定不纯度的计算,可以输入的参数有"entropy":信息熵;"gini":基尼系数
,random_state=0 #用来控制决策树的随机性
,splitter='random' #也是用来控制决策树的随机性,
# 可以输入的参数有"best":优先取对决策贡献率最高的特征进行分支;"random":随机分支
,max_depth=3 #控制决策树的最大深度
,min_samples_leaf=10 #控制叶子的最小尺寸,即叶子最少包含的样本量
,min_samples_split=10 #控制节点的最小尺寸,即节点最少包含的样本量
,max_features= 8 #限制分支时考虑的特征个数
,min_impurity_decrease=0.01 #限制信息增益的大小,即限制子节点和父节点信息熵之差的大小
)
#开始训练
clf = clf.fit(Xtrain,Ytrain)
#评估得分
score = clf.score(Xtest,Ytest)
print('决策树评估得分为:',score)
# list = [[1,2,3,4,5,6,7,8,9,10,11,12,13]]
# print(clf.predict(np.array(list)))
# print(clf.predict_proba(np.array(list)))
#把树给画出来
#定义要素名称
feature_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素',
'颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
dot_data = tree.export_graphviz(clf, #已经训练好的模型,也就是之前训练时定义的clf
feature_names=feature_name, #传入上方定义的要素名称
class_names=['啤酒','红酒','白酒'], #酒的分类
filled=True, #是否给输出的图案填充颜色
# rounded=True #是否给方框加上圆角
)
#把画的树导出来
# graph = graphviz.Source(dot_data)
# graph.view(cleanup=True) #这个参数可以让生成新文件直接先清除掉旧的文件,cleanup默认为False
#clf.feature_importances_这个属性可以查看决策树用到了哪些特征
print(clf.feature_importances_)
print([*zip(feature_name,clf.feature_importances_)]) #可以把特征名字,和特征对决策的贡献率打包在一起显示,方便查看
#通过for循环多次调参数并计算得分和plot画图,来确定最佳的参数值
test = []
for i in range(10):
clf = tree.DecisionTreeClassifier(criterion='gini'
,max_depth=i+1
)
clf=clf.fit(Xtrain,Ytrain)
score = clf.score(Xtest,Ytest)
test.append(score)
#开始画图
plt.plot(range(1,11),test,'r-',label="max_depth")
plt.legend()
plt.show()
#重要的属性和接口
clf.apply(Xtest) #返回每个测试样本所在叶节点的索引
clf.predict(Xtest) #返回每个测试样本的分类/回归结果
\ No newline at end of file
问题1:
**问题1:**
1. 玻璃文物的表面风化与其玻璃类型、纹饰和颜色的关系进行分析(灰色关联分析)
2. 结合玻璃的类型,分析文物样品表面有无风化,化学成分含量的统计规律(灰色关联分析
1. 玻璃文物的表面风化与其玻璃类型、纹饰和颜色的关系进行分析(卡方分析)
2. 结合玻璃的类型,分析文物样品表面有无风化 化学成分含量的统计规律(描述性统计、
3. 根据风化点检测数据,预测其风化前的化学成分含量(没有思路)
问题2:
**问题2:**
1. 分析高钾玻璃、铅钡玻璃的分类规律
2. 对于每个类别选择合适的化学成分对其进行亚类划分,给出具体的划分方法及划分结果,并对分类结果的合理性和敏感性进行分析
1. 分析高钾玻璃、铅钡玻璃的分类规律(均值雷达图)
2. 对于每个类别选择合适的化学成分对其进行亚类划分,给出具体的划分方法及划分结果,并对分类结果的合理性和敏感性进行分析(聚类)
问题3:
**问题3:**
1. 对未知类型玻璃文物的化学成分进行分析,鉴别其所属类型,并对其分类结果的敏感性进行分析
1. 对未知类型玻璃文物的化学成分进行分析,鉴别其所属类型,并对其分类结果的敏感性进行分析(机器学习算法分类)
问题4:
**问题4:**
1.
\ No newline at end of file
1. 针对不同类型的玻璃文物样品,分析其化学成分之间的关联关系,并比较不同类别之间的化学成分关联关系的差异性(Person相关系数)
\ No newline at end of file
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile default="true" name="Default" enabled="true" />
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="CUMCM2022" />
</profile>
</annotationProcessing>
</component>
</project>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="JavaDoc" enabled="true" level="WARNING" enabled_by_default="true">
<option name="TOP_LEVEL_CLASS_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="" />
</value>
</option>
<option name="INNER_CLASS_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="" />
</value>
</option>
<option name="METHOD_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="@return@param@throws or @exception" />
</value>
</option>
<option name="FIELD_OPTIONS">
<value>
<option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
<option name="REQUIRED_TAGS" value="" />
</value>
</option>
<option name="IGNORE_DEPRECATED" value="false" />
<option name="IGNORE_JAVADOC_PERIOD" value="true" />
<option name="IGNORE_DUPLICATED_THROWS" value="false" />
<option name="IGNORE_POINT_TO_ITSELF" value="false" />
<option name="myAdditionalJavadocTags" value="date" />
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://maven.aliyun.com/repository/public" />
</remote-repository>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.wskh</groupId>
<artifactId>CUMCM2022</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.22</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.0.0</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.wskh;
import lombok.Data;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
/**
* @Author:WSKH
* @ClassName:ISODATA
* @ClassType:
* @Description:
* @Date:2022/2/13/14:23
* @Email:1187560563@qq.com
* @Blog:https://blog.csdn.net/weixin_51545953?type=blog
*/
@Data
public class ISODATA {
// 待分类的原始值
private List<List<Double>> dataList;
// 初始类别个数
private int K = 4;
// 最大迭代次数
private int maxClusterTimes = 2000;
// 聚类的结果
private List<List<List<Double>>> clusterList = new ArrayList<>();
// <种群索引,点集>
Map<Integer, List<List<Double>>> map;
// 每一聚类域中最少的样本数目,若少于此数即不作为一个独立的聚类
int minPopulationPointNum;
// 两个聚类中心间的最小距离,若小于此数,两个聚类需进行合并
double centerMinDistance;
// 在一次迭代运算中可以合并的聚类中心的最多对数
int maxCombineNum;
// 一个聚类域中样本距离分布的标准差
double maxMAE;
/**
* @param dataList 数据集
* @param centerMinDistance 两个聚类中心间的最小距离,若小于此数,两个聚类需进行合并
* @param minPopulationPointNum 每一聚类域中最少的样本数目,若少于此数即不作为一个独立的聚类;
* @param maxCombineNum 在一次迭代运算中可以合并的聚类中心的最多对数
* @param K 初始类别个数
* @param maxClusterTimes 最大迭代次数
* @return
* @Description
* @Author WSKH
*/
public ISODATA(List<List<Double>> dataList, double centerMinDistance, int minPopulationPointNum, int maxCombineNum, double maxMAE, int K, int maxClusterTimes) {
this.dataList = new ArrayList<>(dataList);
this.K = Math.min(K, dataList.size());
this.maxClusterTimes = maxClusterTimes;
this.centerMinDistance = centerMinDistance;
this.minPopulationPointNum = minPopulationPointNum;
this.maxCombineNum = maxCombineNum;
this.maxMAE = maxMAE;
}
/**
* @param
* @return 聚类的结果(簇心)
* @Description 聚类主方法
* @Author WSKH
*/
public List<List<Double>> clustering() throws Exception {
long start = System.currentTimeMillis();
int t = 0;
while (t < maxClusterTimes) {
if (t == 0) {
// 初始化簇心
clusterList.add(initCenterList(K));
} else {
// 获取当前簇心
List<List<Double>> centerList = clusterList.get(t - 1);
// 计算新的簇心
map = new HashMap<>();
int[] counterArr = new int[centerList.size()];
for (List<Double> data : dataList) {
// 当前点和簇心依次比较,找到最近的簇心
double minDis = computeDistance(data, centerList.get(0));
int minIndex = 0;
for (int i = 1; i < centerList.size(); i++) {
double distance = computeDistance(data, centerList.get(i));
if (minDis > distance) {
minDis = distance;
minIndex = i;
}
}
// 将当前点加入最近的种群
if (!map.containsKey(minIndex)) {
List<List<Double>> newPointList = new ArrayList<>();
newPointList.add(data);
map.put(minIndex, newPointList);
} else {
map.get(minIndex).add(data);
}
counterArr[minIndex]++;
}
// 根据均值,计算新的簇心
List<List<Double>> newCenterList = new ArrayList<>();
for (int i = 0; i < centerList.size(); i++) {
if (map.containsKey(i)) {
// 计算簇心
List<Double> newCenter = computeCenter(map.get(i));
newCenterList.add(newCenter);
} else {
throw new RuntimeException("发生了簇缺失");
}
}
// 适应性分裂与合并簇心
newCenterList = adaptiveSplittingAndMerging(newCenterList);
// 将新的簇心,加入集合
clusterList.add(newCenterList);
}
// 如果簇心没有改变,那么就跳出循环
if (t > 0 && !isCenterChange(clusterList.get(t - 1), clusterList.get(t))) {
break;
}
t++;
}
System.out.println("用时" + (System.currentTimeMillis() - start) + "ms");
return clusterList.get(clusterList.size() - 1);
}
/**
* @param centerList 簇心集合
* @return
* @Description 适应性分裂与合并簇心
* @Author WSKH
*/
private List<List<Double>> adaptiveSplittingAndMerging(List<List<Double>> centerList) throws Exception {
int c1 = centerList.size();
List<List<Double>> newCenterList = new ArrayList<>();
// 适应性分裂
map.forEach((k,v)->{
// 中心点
List<Double> center = centerList.get(k);
// 计算SSE 然后推导出MAE
AtomicReference<Double> SSE = new AtomicReference<>(0d);
v.forEach(point->{
for (int i = 0; i < point.size(); i++) {
int finalI = i;
SSE.updateAndGet(v1 -> (v1 + Math.pow(point.get(finalI) - center.get(finalI), 2)));
}
});
if(Math.sqrt(SSE.get()/v.size())>maxMAE){
// 说明可以分裂,找到一个点距离中心点超过
for (int i = 0; i < dataList.size(); i++) {
try {
if(computeDistance(dataList.get(i),center)>centerMinDistance){
newCenterList.add(dataList.get(i));
break;
}else{
if(i==dataList.size()-1){
// 如果没找到那就报错,说明又要分裂,分裂后又要合并,参数设置不合理
throw new Exception("参数centerMinDistance和maxMAE设置不合理");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
});
// 适应性合并
newCenterList.addAll(centerList);
int c2 = newCenterList.size();
int counter = 0; // 记录合并次数
for (int i = 0; i < newCenterList.size(); i++) {
if(counter > maxCombineNum){
break;
}
for (int j = i+1; j < newCenterList.size(); j++) {
if(computeDistance(newCenterList.get(i),newCenterList.get(j))<=centerMinDistance){
// 小于最小距离,合并
List<Double> remove1 = newCenterList.get(i);
List<Double> remove2 = newCenterList.get(j);
// 新中心就是合并点的中点
List<Double> newCenter = new ArrayList<>();
for (int k = 0; k < remove1.size(); k++) {
newCenter.add((remove1.get(k)+remove2.get(k))/2.0);
}
if(i>j){
newCenterList.remove(i);
newCenterList.remove(j);
}else{
newCenterList.remove(i);
newCenterList.remove(j-1);
}
newCenterList.add(newCenter);
i = -1; // 让i从头开始遍历
counter++;
break;
}
}
}
// 返回分裂与合并后的中心点集合
System.out.println("一开始:"+c1+",分裂后:"+c2+",合并后:"+newCenterList.size());
return newCenterList;
}
/**
* @param size 初始化簇心的数量
* @return List<List < Double>>
* @Description 初始化簇心(随机抽取size项作为簇心,尽可能使得初始簇心相互距离较远)
* @Author WSKH
*/
private List<List<Double>> initCenterList(int size) throws Exception {
List<List<Double>> initCenterList = new ArrayList<>();
Set<Integer> set = new HashSet<>();
Double[][] distanceMatrix = new Double[dataList.size()][dataList.size()];
// 随机选取第一个簇心
int r = new Random().nextInt(dataList.size());
set.add(r);
// 选择出其余的聚类中心
while (set.size()<size){
double maxDistance = -1d;
int maxIndex = -1;
for (int i = 0; i < dataList.size(); i++) {
List<Double> data = dataList.get(i);
if(!set.contains(i)){
// 计算当前点,距离最近的已有簇心
double minDistance = Double.MAX_VALUE;
int minIndex = -1;
for (Integer j : set) {
if(distanceMatrix[i][j]==null){
distanceMatrix[i][j] = computeDistance(data,dataList.get(j));
}
if(minDistance>distanceMatrix[i][j]){
minDistance = distanceMatrix[i][j];
minIndex = i;
}
}
// 获取最小距离中最大的那个(最大化点与簇心的最短距离)
if(maxDistance<minDistance){
maxDistance = minDistance;
maxIndex = minIndex;
}
}
}
set.add(maxIndex);
}
// set -> list
set.forEach(i->initCenterList.add(dataList.get(i)));
return initCenterList;
}
/**
* @param p1 点1
* @param p2 点2
* @return double
* @Description 计算两点间距离(欧式)
* @Author WSKH
*/
private double computeDistance(List<Double> p1, List<Double> p2) throws Exception {
if (p1.size() != p2.size()) {
throw new Exception("两点维度不一致");
}
double distance = 0d;
for (int i = 0; i < p1.size(); i++) {
distance += Math.pow((p1.get(i) - p2.get(i)), 2);
}
return Math.sqrt(distance);
}
/**
* @param pointList 点集合
* @return double
* @Description 计算簇心坐标
* @Author WSKH
*/
private List<Double> computeCenter(List<List<Double>> pointList) {
List<Double> result = new ArrayList<>(pointList.get(0));
for (int i = 1; i < pointList.size(); i++) {
for (int j = 0; j < pointList.get(i).size(); j++) {
result.set(j, result.get(j) + pointList.get(i).get(j));
}
}
return result.stream().map(item -> {
return item / pointList.size();
}).collect(Collectors.toList());
}
/**
* @param oldCenterList 上一轮迭代的簇心列表
* @param curCenterList 当前迭代的簇心列表
* @return boolean 改变返回true,没改变返回false
* @Description 判断簇心是否改变
* @Author WSKH
*/
private boolean isCenterChange(List<List<Double>> oldCenterList, List<List<Double>> curCenterList) throws Exception {
if (oldCenterList.size() != curCenterList.size()) {
return true;
}
for (int i = 0; i < oldCenterList.size(); i++) {
for (int j = 0; j < oldCenterList.get(i).size(); j++) {
if (!oldCenterList.get(i).get(j).equals(curCenterList.get(i).get(j))) {
return true;
}
}
}
return false;
}
}
package com.wskh;
import lombok.Data;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @Author:WSKH
* @ClassName:Run
* @ClassType:
* @Description:
* @Date:2022/9/16/21:54
* @Email:1187560563@qq.com
* @Blog:https://blog.csdn.net/weixin_51545953?type=blog
*/
public class Run {
public static void main(String[] args) throws Exception {
run("高钾");
System.out.println("==================================================================");
run("铅钡");
}
public static void run(String type) throws Exception {
String path = "D:\\2 BachelorDegree\\大四上比赛或项目资料\\数模国赛\\OurGit\\cumcm2022\\05 Wskh\\Python\\CUMCM2022\\src\\data\\data.xlsx";
List<List<Double>> dataList = readData(path,type);
System.out.println(dataList);
ISODATA isodata = new ISODATA(
dataList,
5,
2,
2,
0.5,
2,
4000);
List<List<Double>> clustering = isodata.clustering();
Map<Integer, List<List<Double>>> map = isodata.getMap();
StringBuilder label = new StringBuilder();
StringBuilder data = new StringBuilder();
for (List<Double> doubleList : dataList) {
boolean b = true;
for (Integer key : map.keySet()) {
for (List<Double> list : map.get(key)) {
if(list.toString().equals(doubleList.toString())){
data.append(list.toString()).append(",");
label.append(key).append(",");
b = false;
break;
}
}
if(!b){
break;
}
}
}
System.out.println("label = "+label);
System.out.println("data = "+data);
}
public static List<List<Double>> readData(String path,String type) {
try {
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(path);
XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
List<List<Double>> dataList = new ArrayList<>();
for (int i = 1; i < sheet.getPhysicalNumberOfRows(); i++) {
if(type.equals(sheet.getRow(i).getCell(18).getStringCellValue())){
List<Double> list = new ArrayList<>();
for (int j = 1; j <= 14; j++) {
list.add(sheet.getRow(i).getCell(j).getNumericCellValue());
}
dataList.add(list);
}
}
xssfWorkbook.close();
return dataList;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/CUMCM2022.iml" filepath="$PROJECT_DIR$/.idea/CUMCM2022.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
# -*-coding:utf-8-*-
# Author: WSKH
# Blog: wskh0929.blog.csdn.net
# Time: 2022/9/15 21:50
import pandas as pd
import scipy
from sklearn.cluster import KMeans
data = pd.read_excel(
r'D:\2 BachelorDegree\大四上比赛或项目资料\数模国赛\OurGit\cumcm2022\05 Wskh\Python\CUMCM2022\src\data\data.xlsx',
sheet_name="Sheet1")
x = []
columns = data.columns
for i in range(data.shape[0]):
# pass
lst = []
for item in data.iloc[i][columns[1:15]]:
lst.append(item)
x.append(lst)
print(x)
y_pred = KMeans(n_clusters=2, random_state=520).fit_predict(x)
print(y_pred)
import numpy as np
from sklearn import tree
from sklearn.datasets import load_wine #红酒数据集
from sklearn.model_selection import train_test_split
import pandas as pd
import graphviz
import matplotlib.pyplot as plt
#DecisionTreeClassifier分类树代码
wine = load_wine() #加载红酒数据集
print(wine)
#wine.data 数据
#wine.target 分类标签
#wine.feature_names 要素名称
#wine.target_name 分类标签的名字含义
#我们可以通过pandas把红酒数据集变成表格,方便我们查看
table = pd.concat([pd.DataFrame(wine.data),pd.DataFrame(wine.target)],axis=1)
print(table)
#把wine数据分为训练集和测试集
Xtrain,Xtest,Ytrain,Ytest = train_test_split(wine.data, #数据
wine.target,#标签
test_size=0.3)#经典的三七分,测试集占三,训练集占七
#设置参数
clf = tree.DecisionTreeClassifier(criterion="entropy" #用来规定不纯度的计算,可以输入的参数有"entropy":信息熵;"gini":基尼系数
,random_state=0 #用来控制决策树的随机性
,splitter='random' #也是用来控制决策树的随机性,
# 可以输入的参数有"best":优先取对决策贡献率最高的特征进行分支;"random":随机分支
,max_depth=3 #控制决策树的最大深度
,min_samples_leaf=10 #控制叶子的最小尺寸,即叶子最少包含的样本量
,min_samples_split=10 #控制节点的最小尺寸,即节点最少包含的样本量
,max_features= 8 #限制分支时考虑的特征个数
,min_impurity_decrease=0.01 #限制信息增益的大小,即限制子节点和父节点信息熵之差的大小
)
#开始训练
clf = clf.fit(Xtrain,Ytrain)
#评估得分
score = clf.score(Xtest,Ytest)
print('决策树评估得分为:',score)
# list = [[1,2,3,4,5,6,7,8,9,10,11,12,13]]
# print(clf.predict(np.array(list)))
# print(clf.predict_proba(np.array(list)))
#把树给画出来
#定义要素名称
feature_name = ['酒精','苹果酸','灰','灰的碱性','镁','总酚','类黄酮','非黄烷类酚类','花青素',
'颜色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
dot_data = tree.export_graphviz(clf, #已经训练好的模型,也就是之前训练时定义的clf
feature_names=feature_name, #传入上方定义的要素名称
class_names=['啤酒','红酒','白酒'], #酒的分类
filled=True, #是否给输出的图案填充颜色
# rounded=True #是否给方框加上圆角
)
#把画的树导出来
# graph = graphviz.Source(dot_data)
# graph.view(cleanup=True) #这个参数可以让生成新文件直接先清除掉旧的文件,cleanup默认为False
#clf.feature_importances_这个属性可以查看决策树用到了哪些特征
print(clf.feature_importances_)
print([*zip(feature_name,clf.feature_importances_)]) #可以把特征名字,和特征对决策的贡献率打包在一起显示,方便查看
#通过for循环多次调参数并计算得分和plot画图,来确定最佳的参数值
test = []
for i in range(10):
clf = tree.DecisionTreeClassifier(criterion='gini'
,max_depth=i+1
)
clf=clf.fit(Xtrain,Ytrain)
score = clf.score(Xtest,Ytest)
test.append(score)
#开始画图
plt.plot(range(1,11),test,'r-',label="max_depth")
plt.legend()
plt.show()
#重要的属性和接口
clf.apply(Xtest) #返回每个测试样本所在叶节点的索引
clf.predict(Xtest) #返回每个测试样本的分类/回归结果
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册