未验证 提交 10afb4a5 编写于 作者: rictjo's avatar rictjo 提交者: GitHub

update DBSCAN

上级 a726d8a3
......@@ -550,11 +550,11 @@ def backprojection_clustering ( analyte_df , bRanked=False , n_dimensions=2 ,
return ( cluster_coords_f,cluster_coords_s )
def dbscan ( data_frame = None , distance_matrix = None ,
eps = 0.11, minPts = 1 ) :
eps = None, minPts = None ) :
#
# FOR A DESCRIPTION OF THE CONNECTIVITY READ PAGE 30 (16 INTERNAL NUMBERING) of:
# https://kth.diva-portal.org/smash/get/diva2:748464/FULLTEXT01.pdf
from impetuous.clustering import connectivity , absolute_coordinates_to_distance_matrix
from impetuous.clustering import absolute_coordinates_to_distance_matrix
import operator
if not operator.xor( data_frame is None , distance_matrix is None ) :
print ( "ONLY SUPPLY A SINGE DATA FRAME OR A DISTANCE MATRIX" )
......@@ -564,17 +564,28 @@ def dbscan ( data_frame = None , distance_matrix = None ,
if not data_frame is None :
if not 'pandas' in str(type(data_frame)) :
print ( "ONLY SUPPLY A SINGE DATA FRAME WITH ABSOLUTE COORDINATES" )
print ( "DATA MATRICES NEEDS TO BE SPECIFIED WITH \" distance_matrix = ... \" " )
print ( "DATA MATRICES NEEDS TO BE SPECIFIED WITH \" data_frame = ... \" " )
print ( "dbscan FAILED" )
exit ( 1 )
if bVerbose :
print ( data_frame )
if not ( 'float' in str(type(eps)).lower() and 'int' in str(type(minPts)).lower() ) :
print ( "TO CALL THE dbscan PLEASE SPECIFY AT LEAST A DATA FRAME OR")
print ( "ITS CORRESPONDING DISTANCE MATRIX AS WELL AS THE DISTANCE CUTOFF PARAMETER" )
print ( "AND THE MINIMAL AMOUNT OF NEIGHBOUR POINTS TO CONSIDER IT A CLUSTER")
print ( "dbscan ( data_frame = None , distance_matrix = None , eps = None, minPts = None )" )
distance_matrix = absolute_coordinates_to_distance_matrix(data_frame.values)
clustercontent , clustercontacts = connectivity(distance_matrix,eps)
isNoise = np.sum(distance_matrix<eps,0)-1 < minPts
i_ = 0
for ib in isNoise :
if ib :
distance_matrix [ i_] = ( 1+eps )*10.0
distance_matrix.T[i_] = ( 1+eps )*10.0
distance_matrix[i_][i_] = 0.
i_ = i_+1
clustercontent , clustercontacts = connectivity(distance_matrix,eps)
return ( {'cluster content': clustercontent, 'clusterid-particleid' : clustercontacts, 'is noise':isNoise} )
if __name__ == '__main__' :
if False :
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册