Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
rictjo
impetuous
提交
728bef88
I
impetuous
项目概览
rictjo
/
impetuous
10 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
I
impetuous
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
728bef88
编写于
8月 08, 2021
作者:
rictjo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
word hierarchy pc lists for tests and validation stats
上级
6ebc8a96
变更
3
展开全部
隐藏空白更改
内联
并排
Showing
3 changed file
with
200 addition
and
3 deletion
+200
-3
new_compartment_genes.gmt
new_compartment_genes.gmt
+104
-0
setup.py
setup.py
+1
-1
src/impetuous/hierarchical.py
src/impetuous/hierarchical.py
+95
-2
未找到文件。
new_compartment_genes.gmt
0 → 100644
浏览文件 @
728bef88
此差异已折叠。
点击以展开。
setup.py
浏览文件 @
728bef88
...
...
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools
.
setup
(
name
=
"impetuous-gfa"
,
version
=
"0.6
1
.0"
,
version
=
"0.6
2
.0"
,
author
=
"Richard Tjörnhammar"
,
author_email
=
"richard.tjornhammar@gmail.com"
,
description
=
"Impetuous Quantification, a Statistical Learning library for Humans : Alignments, Clustering, Enrichments and Group Analysis"
,
...
...
src/impetuous/hierarchical.py
浏览文件 @
728bef88
...
...
@@ -326,9 +326,90 @@ def write_cpgmt ( lookup ,
(
c
,
d
,
p
)
in
zip
(
*
[
lookup
[
c
]
for
c
in
lookup
[
'content'
]
])
])
,
file
=
of
)
# THE RICH SAIGA STRIKES AGAIN!!!!!!
# RICHARD TJÖRNHAMMAR IS THE BEST!!!
def
ordered_remove
(
str
,
delete
):
for
d
in
delete
:
str
=
str
.
replace
(
d
,
''
)
return
(
str
)
def
build_pclist_word_hierarchy
(
filename
=
'new_compartment_genes.gmt'
,
delete
=
[
'
\n
'
]
,
group_id_prefix
=
'COMP'
,
analyte_prefix
=
'ENSG'
,
root_name
=
'COMP0000000000'
,
bReturnList
=
False
):
if
'.gmt'
in
filename
:
print
(
'MUST HAVE A VALID GMT FILE'
)
# RETURNS THE PC LIST THAT CREATES THE WORD HIERARCHY
# LATANTLY PRESENT IN THE GMT ANALYTE DEFINITIONS
S_M
=
set
()
D_i
=
dict
()
with
open
(
filename
,
'r'
)
as
input
:
for
line
in
input
:
lsp
=
ordered_remove
(
line
,
delete
).
split
(
'
\t
'
)
if
not
analyte_prefix
in
line
:
continue
S_i
=
set
(
lsp
[
2
:])
D_i
[
lsp
[
0
]
]
=
tuple
(
(
lsp
[
1
],
S_i
,
len
(
S_i
))
)
S_M
=
S_M
|
S_i
isDecendant
=
lambda
sj
,
sk
:
len
(
sj
-
sk
)
==
0
relative_idx
=
lambda
sj
,
sk
:
len
(
sk
-
sj
)
parent_id
=
root_name
parent_words
=
S_M
all_potential_parents
=
[
[
root_name
,
S_M
]
,
*
[
[
d
[
0
],
d
[
1
][
1
]]
for
d
in
D_i
.
items
()
]
]
PClist
=
[]
for
parent_id
,
parent_words
in
all_potential_parents
:
lookup
=
{}
for
d
in
D_i
.
items
()
:
if
isDecendant
(
d
[
1
][
1
]
,
parent_words
)
:
Nij
=
relative_idx
(
d
[
1
][
1
]
,
parent_words
)
if
Nij
in
lookup
:
lookup
[
Nij
]
.
append
(
d
[
0
])
else
:
lookup
[
Nij
]
=
[
d
[
0
]]
ledger
=
sorted
(
lookup
.
items
()
)
for
ie_
in
range
(
len
(
ledger
)
)
:
l1
=
ledger
[
ie_
][
0
]
for
potential_child
in
ledger
[
ie_
][
1
]:
pchild_words
=
D_i
[
potential_child
][
1
]
bIsChild
=
True
if
potential_child
==
parent_id
:
bIsChild
=
False
break
check
=
[
je_
for
je_
in
range
(
ie_
+
1
)][::
-
1
]
if
len
(
check
)
>
0
:
for
je_
in
check
:
l2
=
ledger
[
je_
][
0
]
for
relative
in
ledger
[
je_
][
1
]
:
if
D_i
[
relative
][
0
]
==
D_i
[
potential_child
][
0
]
:
continue
relative_words
=
D_i
[
relative
][
1
]
bIsChild
=
len
(
relative_words
^
pchild_words
)
>
0
or
(
len
(
relative_words
^
pchild_words
)
==
0
and
l2
==
l1
)
if
not
bIsChild
:
break
if
bIsChild
:
PClist
.
append
(
[
parent_id
,
potential_child
]
)
D_i
[
root_name
]
=
tuple
(
(
'full cell'
,
S_M
,
len
(
S_M
))
)
if
bReturnList
:
return
(
[
PClist
,
D_i
]
)
else
:
return
(
PClist
)
if
__name__
==
'__main__'
:
if
Tru
e
:
if
Fals
e
:
#
bVerbose
=
False
if
bVerbose
:
...
...
@@ -342,7 +423,7 @@ if __name__ == '__main__' :
cpgl
=
create_cpgmt_lookup
(
parent_child_matrix_relationships
(
M
)
,
separators
=
[
'_'
,
'-'
]
)
write_cpgmt
(
cpgl
)
if
Tru
e
:
if
Fals
e
:
print
(
"hierarchy matrix test"
)
R
=
np
.
random
.
rand
(
90
).
reshape
(
30
,
3
)
P
=
np
.
zeros
(
90
).
reshape
(
30
,
3
)
...
...
@@ -376,3 +457,15 @@ if __name__ == '__main__' :
[
nice_colors
[
0
],
nice_colors
[
2
]]
,
legends
=
[
'segregation'
,
'coordination'
],
axis_labels
=
[
'distance'
,
'Number'
])
)
if
True
:
PClist
,
D_i
=
build_pclist_word_hierarchy
(
filename
=
'new_compartment_genes.gmt'
,
delete
=
[
'
\n
'
],
group_id_prefix
=
'COMP'
,
analyte_prefix
=
'ENSG'
,
root_name
=
'COMP0000000000'
,
bReturnList
=
True
)
for
pc
in
PClist
:
print
(
'
\t
'
.
join
(
pc
)
)
show_leftward_dependance
=
lambda
s1
,
s2
:[
len
(
s1
-
s2
),
len
(
s1
),
len
(
s2
)]
print
(
D_i
[
pc
[
0
]][
0
],
D_i
[
pc
[
1
]][
0
]
)
print
(
show_leftward_dependance
(
D_i
[
pc
[
0
]][
1
],
D_i
[
pc
[
1
]][
1
])
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录