From e1f6aedca69115df760f2f37763fd81957e3d612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20Tj=C3=B6rnhammar?= Date: Thu, 19 Aug 2021 08:10:44 +0200 Subject: [PATCH] =?UTF-8?q?=CD=84ex=20dbs=20wrds++?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 84d4f51..95e8c93 100755 --- a/README.md +++ b/README.md @@ -483,11 +483,11 @@ melanosome membrane -> mitochondrion full cell -> mitochondrial outer membrane full cell -> mitochondrial intermembrane space ``` -the definition for the mitochondrion is fully contained within the melanosome membrane definition and so testing that group should try and account for the mitochondrion. This can be done with the `HierarchicalEnrichment` routine exemplified above. We know that the melanosome membrane is associated with sight and that being diabetic is associated with mitochondrial dysfunction, but also that diabetic retinopathy affects diabetics and we also see that there is a knowledge based genetic connection relating these two spatially distinct regions of the cell. +the definition for the mitochondrion is fully contained within the melanosome membrane definition and so testing that group should try and account for the mitochondrion. This can be done with the `HierarchicalEnrichment` routine exemplified above. We know that the melanosome membrane is associated with sight and that being diabetic is associated with mitochondrial dysfunction, but also that diabetic retinopathy affects diabetics. We see here that there is a knowledge based genetic connection relating these two spatially distinct regions of the cell. # [Example 9](https://gist.githubusercontent.com/richardtjornhammar/e84056e0b10f8d550258a1e8944ee375/raw/45fb8322487ff3a384e7f56eb06ac1073aee4da1/example9.py): Impetuous [deterministic DBSCAN](https://github.com/richardtjornhammar/impetuous/blob/master/src/impetuous/clustering.py) (search for dbscan) -[DBSCAN](https://en.wikipedia.org/wiki/DBSCAN) is a clustering algorithm that can be seen as a way of rejecting points that are positioned in low dense regions of a point cloud. This introduces holes and may result in a larger segment, that would otherwise be connected via a non dense link to become disconnected and form two segments, or clusters, instead. The rejection criterion is simple. The central concern is to evaluate a distance matrix with an applied cutoff this turns the distances into true or false values depending if a pair distance between point i and j are within the distance cutoff. This new binary Neighbour matrix tells you wether or not two points are neighbours. The DBSCAN criterion states that a point is not part of any cluster if it has fewer than `minPts` neighbors. Once you've calculated the distance matrix you can immediately evaluate the number of neighbors each point has, via . If the rejection vector R of a point is True then all the pairwise distances in the distance matrix of that point is set to value larger than epsilon. This ensures that distance matrix search will reject these as Neighbours of any other for the choose epsilon. By tracing out all points that are neighbors and assessing the [connectivity](https://github.com/richardtjornhammar/impetuous/blob/master/src/impetuous/clustering.py) you can find all the clusters. +[DBSCAN](https://en.wikipedia.org/wiki/DBSCAN) is a clustering algorithm that can be seen as a way of rejecting points that are positioned in low dense regions of a point cloud. This introduces holes and may result in a larger segment, that would otherwise be connected via a non dense link to become disconnected and form two segments, or clusters, instead. The rejection criterion is simple. The central concern is to evaluate a distance matrix with an applied cutoff this turns the distances into true or false values depending if a pair distance between point i and j is within the distance cutoff. This new binary Neighbour matrix tells you wether or not two points are neighbours. The DBSCAN criterion states that a point is not part of any cluster if it has fewer than `minPts` neighbors. Once you've calculated the distance matrix you can immediately evaluate the number of neighbors each point has, via . If the rejection vector R value of a point is True then all the pairwise distances in the distance matrix of that point is set to value larger than epsilon. This ensures that distance matrix search will reject these as Neighbours of any other for the choose epsilon. By tracing out all points that are neighbors and assessing the [connectivity](https://github.com/richardtjornhammar/impetuous/blob/master/src/impetuous/clustering.py) you can find all the clusters. In this [example](https://gist.githubusercontent.com/richardtjornhammar/e84056e0b10f8d550258a1e8944ee375/raw/45fb8322487ff3a384e7f56eb06ac1073aee4da1/example9.py) we do exactly this for two gaussian point clouds. The dbscan search is just a single line `dbscan ( data_frame = point_cloud_df , eps=0.45 , minPts=4 )`, while the last 27 lines are just there to plot the [results](https://bl.ocks.org/richardtjornhammar/raw/0cc0ff037e88c76a9d65387155674fd1/?raw=true) with [graph revision dates](https://gist.github.com/richardtjornhammar/0cc0ff037e88c76a9d65387155674fd1/revisions) diff --git a/setup.py b/setup.py index dbd8dd2..08deee5 100755 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name = "impetuous-gfa", - version = "0.67.1", + version = "0.68.0", author = "Richard Tjörnhammar", author_email = "richard.tjornhammar@gmail.com", description = "Impetuous Quantification, a Statistical Learning library for Humans : Alignments, Clustering, Enrichments and Group Analysis", -- GitLab