Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
rictjo
impetuous
提交
5227c3be
I
impetuous
项目概览
rictjo
/
impetuous
10 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
I
impetuous
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
5227c3be
编写于
3月 11, 2021
作者:
rictjo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
hierarch pc gmt
上级
dae1aee3
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
95 addition
and
38 deletion
+95
-38
setup.py
setup.py
+1
-1
src/impetuous/hierarchical.py
src/impetuous/hierarchical.py
+94
-37
未找到文件。
setup.py
浏览文件 @
5227c3be
...
...
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools
.
setup
(
name
=
"impetuous-gfa"
,
version
=
"0.42.
1
"
,
version
=
"0.42.
5
"
,
author
=
"Richard Tjörnhammar"
,
author_email
=
"richard.tjornhammar@gmail.com"
,
description
=
"Impetuous Quantification, a Statistical Learning library for Humans : Alignments, Clustering, Enrichments and Group Analysis"
,
...
...
src/impetuous/hierarchical.py
浏览文件 @
5227c3be
...
...
@@ -177,7 +177,12 @@ def calculate_hierarchy_matrix ( data_frame = None ,
def
parent_child_matrix_relationships
(
hierarchy_matrix
,
bVerbose
=
False
,
bRemoveRedundant
=
True
)
:
bRemoveRedundant
=
True
,
separators
=
[
'_'
,
'-'
]
)
:
print
(
'WARNING: DEVELOPMENTAL VERSION'
)
s_
=
separators
M
=
hierarchy_matrix
ns
,
ms
=
np
.
shape
(
M
)
if
not
'pandas'
in
str
(
type
(
M
)):
...
...
@@ -222,7 +227,7 @@ def parent_child_matrix_relationships ( hierarchy_matrix ,
children_level_name
,
cidx
]
,
index
=
[
'Parent level label'
,
'Parent level cluster index'
,
'Child level label'
,
'Child level cluster index'
]
,
name
=
str
(
I
)
+
'_'
+
str
(
pidx
)
+
'-'
+
str
(
J
)
+
'_'
+
str
(
cidx
)
)
name
=
str
(
I
)
+
s_
[
0
]
+
str
(
pidx
)
+
s_
[
1
]
+
str
(
J
)
+
s_
[
0
]
+
str
(
cidx
)
)
pcrel
.
append
(
pd
.
DataFrame
(
pcser
)
)
if
len
(
pcrel
)
>
0
:
if
pc_df
is
None
:
...
...
@@ -235,15 +240,15 @@ def parent_child_matrix_relationships ( hierarchy_matrix ,
idx_rename
=
{}
for
item
in
lookup
.
items
():
if
len
(
item
[
1
])
>
1
:
orig
=
str
(
item
[
1
][
0
][
2
])
+
'_'
+
str
(
item
[
1
][
0
][
1
])
+
'-'
+
\
str
(
item
[
1
][
0
][
-
1
])
+
'_'
+
str
(
item
[
1
][
0
][
-
2
])
new
=
str
(
item
[
1
][
0
][
2
])
+
'_'
+
str
(
item
[
1
][
0
][
1
])
+
'-'
+
\
str
(
item
[
1
][
-
1
][
-
1
])
+
'_'
+
str
(
item
[
1
][
-
1
][
-
2
])
orig
=
str
(
item
[
1
][
0
][
2
])
+
s_
[
0
]
+
str
(
item
[
1
][
0
][
1
])
+
s_
[
1
]
+
\
str
(
item
[
1
][
0
][
-
1
])
+
s_
[
0
]
+
str
(
item
[
1
][
0
][
-
2
])
new
=
str
(
item
[
1
][
0
][
2
])
+
s_
[
0
]
+
str
(
item
[
1
][
0
][
1
])
+
s_
[
1
]
+
\
str
(
item
[
1
][
-
1
][
-
1
])
+
s_
[
0
]
+
str
(
item
[
1
][
-
1
][
-
2
])
if
bVerbose
:
print
(
item
)
print
(
str
(
item
[
1
][
0
][
2
])
+
'_'
+
str
(
item
[
1
][
0
][
1
])
)
print
(
str
(
item
[
1
][
0
][
-
1
])
+
'_'
+
str
(
item
[
1
][
0
][
-
2
])
)
print
(
str
(
item
[
1
][
-
1
][
-
1
])
+
'_'
+
str
(
item
[
1
][
-
1
][
-
2
])
)
print
(
str
(
item
[
1
][
0
][
2
])
+
s_
[
0
]
+
str
(
item
[
1
][
0
][
1
])
)
print
(
str
(
item
[
1
][
0
][
-
1
])
+
s_
[
0
]
+
str
(
item
[
1
][
0
][
-
2
])
)
print
(
str
(
item
[
1
][
-
1
][
-
1
])
+
s_
[
0
]
+
str
(
item
[
1
][
-
1
][
-
2
])
)
print
(
orig
,
new
)
print
(
pc_df
.
loc
[
orig
,:])
pc_df
.
loc
[
orig
,
'Child level label'
]
=
item
[
1
][
-
1
][
-
3
]
...
...
@@ -252,37 +257,89 @@ def parent_child_matrix_relationships ( hierarchy_matrix ,
pc_df
=
pc_df
.
rename
(
index
=
idx_rename
)
return
(
pc_df
)
if
__name__
==
'__main__'
:
def
create_cpgmt_lookup
(
pcdf
,
separators
=
[
'_'
,
'-'
]
):
s_
=
separators
all_parents
=
list
(
set
([
v
.
split
(
s_
[
1
])[
0
]
for
v
in
pcdf
.
index
.
values
]))
lookup
=
{
'content'
:[
'children'
,
'descriptions'
,
'parts'
]}
children
,
descriptions
,
parts
=
[]
,
[]
,
[]
for
parent
in
all_parents
:
selected
=
pcdf
.
loc
[
[
idx
for
idx
in
pcdf
.
index
.
values
if
parent
==
idx
.
split
(
s_
[
1
])[
0
]],:]
for
i_
in
selected
.
index
:
a_level
=
pcdf
.
loc
[
i_
,
[
c
for
c
in
pcdf
.
columns
if
'label'
in
c
]
]
.
values
[
1
]
a_cluster
=
pcdf
.
loc
[
i_
,
[
c
for
c
in
pcdf
.
columns
if
'index'
in
c
]
]
.
values
[
1
]
collected_parts
=
M
.
columns
.
values
[
M
.
loc
[
a_level
].
values
==
a_cluster
]
p_
=
pcdf
.
loc
[
i_
,
[
c
for
c
in
pcdf
.
columns
if
'label'
in
c
]
]
.
values
[
0
]
+
s_
[
0
]
+
\
str
(
pcdf
.
loc
[
i_
,
[
c
for
c
in
pcdf
.
columns
if
'index'
in
c
]
]
.
values
[
0
])
children
.
append
(
'level'
+
i_
.
split
(
s_
[
1
])[
-
1
]
)
descriptions
.
append
(
p_
)
parts
.
append
(
collected_parts
)
lookup
[
'children'
]
=
children
lookup
[
'parts'
]
=
parts
lookup
[
'descriptions'
]
=
descriptions
return
(
lookup
)
print
(
"hierarchy matrix test"
)
R
=
np
.
random
.
rand
(
90
).
reshape
(
30
,
3
)
P
=
np
.
zeros
(
90
).
reshape
(
30
,
3
)
P
[
1
:
10
,:
]
+=
1
P
[
0
:
5
,:
]
+=
2
P
[
20
:
,:
]
+=
3
P
[
15
:
20
,:
]
+=
2
P
[
25
:
,:
]
+=
2
def
write_cpgmt
(
lookup
,
filename
=
'childparentparts.gmt'
,
bVerbose
=
False
)
:
if
bVerbose
:
print
(
"""
If you want to check specific level clusters
using traditional methods such as GSEA
then you'll need to create a gmt file """
)
if
'content'
in
lookup
:
with
open
(
filename
,
'w'
)
as
of
:
print
(
'
\n
'
.
join
(
[
'
\t
'
.
join
([
c
,
d
,
'
\t
'
.
join
(
p
)])
for
\
(
c
,
d
,
p
)
in
zip
(
*
[
lookup
[
c
]
for
c
in
lookup
[
'content'
]
])
])
,
file
=
of
)
if
__name__
==
'__main__'
:
pdf
=
pd
.
DataFrame
(
P
+
R
,
if
True
:
#
bVerbose
=
False
if
bVerbose
:
print
(
"For creating pc and gmt files"
)
print
(
"note that the description includes the parent as a reference"
)
print
(
"to create a pc file you should reorder i.e. using"
)
print
(
"$ cat childparentparts.gmt | awk '{print $2,$1}'"
)
print
(
"make sure to use the correct delimiter"
,
'-- the rich saiga'
)
#
pdf
=
pd
.
read_csv
(
'../data/genes.tsv'
,
'
\t
'
,
index_col
=
0
)
M
,
L
=
calculate_hierarchy_matrix
(
pdf
)
cpgl
=
create_cpgmt_lookup
(
parent_child_matrix_relationships
(
M
)
,
separators
=
[
'_'
,
'-'
]
)
write_cpgmt
(
cpgl
)
if
True
:
print
(
"hierarchy matrix test"
)
R
=
np
.
random
.
rand
(
90
).
reshape
(
30
,
3
)
P
=
np
.
zeros
(
90
).
reshape
(
30
,
3
)
P
[
1
:
10
,:
]
+=
1
P
[
0
:
5
,:
]
+=
2
P
[
20
:
,:
]
+=
3
P
[
15
:
20
,:
]
+=
2
P
[
25
:
,:
]
+=
2
pdf
=
pd
.
DataFrame
(
P
+
R
,
index
=
[
'pid'
+
str
(
i
)
for
i
in
range
(
30
)]
,
columns
=
[
'x'
,
'y'
,
'z'
]
)
M
,
L
=
calculate_hierarchy_matrix
(
pdf
)
print
(
M
)
parent_child_matrix_relationships
(
M
)
M
,
L
=
calculate_hierarchy_matrix
(
pdf
)
print
(
M
)
parent_child_matrix_relationships
(
M
)
from
impetuous.visualisation
import
*
X
,
Y
,
W
,
Z
=
[],[],[],[]
for
item
in
L
.
items
():
X
.
append
(
item
[
1
][
1
])
W
.
append
(
item
[
1
][
1
])
Z
.
append
(
item
[
1
][
2
])
Y
.
append
(
len
(
set
(
M
.
loc
[
item
[
0
]].
values
)))
from
bokeh.plotting
import
show
#
# SHOW THE COORDINATION AND SEGREGATION FUNCTIONS
# BOTH ARE WELL KNOWN FROM STATISTICAL PHYSICS
show
(
plotter
(
[
X
,
W
]
,
[
Y
,
Z
]
,
[
nice_colors
[
0
],
nice_colors
[
2
]]
,
legends
=
[
'segregation'
,
'coordination'
],
axis_labels
=
[
'distance'
,
'Number'
])
)
from
impetuous.visualisation
import
*
X
,
Y
,
W
,
Z
=
[],[],[],[]
for
item
in
L
.
items
():
X
.
append
(
item
[
1
][
1
])
W
.
append
(
item
[
1
][
1
])
Z
.
append
(
item
[
1
][
2
])
Y
.
append
(
len
(
set
(
M
.
loc
[
item
[
0
]].
values
)))
from
bokeh.plotting
import
show
#
# SHOW THE COORDINATION AND SEGREGATION FUNCTIONS
# BOTH ARE WELL KNOWN FROM STATISTICAL PHYSICS
#
show
(
plotter
(
[
X
,
W
]
,
[
Y
,
Z
]
,
[
nice_colors
[
0
],
nice_colors
[
2
]]
,
legends
=
[
'segregation'
,
'coordination'
],
axis_labels
=
[
'distance'
,
'Number'
])
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录