Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
rictjo
impetuous
提交
2e354a63
I
impetuous
项目概览
rictjo
/
impetuous
10 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
I
impetuous
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
2e354a63
编写于
2月 08, 2023
作者:
rictjo
提交者:
GitHub
2月 08, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
composition hierarchy++
write to disc alleviates mem usage
上级
cfb5275a
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
128 addition
and
26 deletion
+128
-26
src/impetuous/quantification.py
src/impetuous/quantification.py
+128
-26
未找到文件。
src/impetuous/quantification.py
浏览文件 @
2e354a63
...
...
@@ -1939,43 +1939,145 @@ def composition_create_contraction ( adf:pd.DataFrame , jdf:pd.DataFrame , labe
lid_to_label
=
dict
()
I
=
0
for
ul
in
unique_labels
:
lid_to_label
[
'lid.'
+
str
(
I
)
]
=
ul
label_to_lid
[
ul
]
=
'lid.'
+
str
(
I
)
lid_to_label
[
I
]
=
ul
label_to_lid
[
ul
]
=
I
I
+=
1
contracted_df
=
cumulative_cdf
.
T
.
apply
(
lambda
x
:
composition_assign_label_ids
(
x
,
label_to_lid
)
).
T
return
(
{
'contraction'
:
contracted_df
,
'all_level_values'
:
all_level_values
,
'id_to_label'
:
lid_to_label
,
'label_to_id'
:
label_to_lid
}
)
return
(
{
'contraction'
:
contracted_df
,
'all_level_values'
:
all_level_values
,
'id_to_label'
:
lid_to_label
,
'label_to_id'
:
label_to_lid
}
)
def
composition_contraction_to_hierarchy
(
contracted_df
,
TOL
=
1E-10
,
levels
:
list
[
str
]
=
[
0.01
,
0.05
,
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
,
0.8
,
0.9
,
0.95
,
0.99
]
,
default_label
:
str
=
'lid.unassigned'
)
->
pd
.
DataFrame
:
def
composition_contraction_to_hierarchy
_red
(
contracted_df
:
pd
.
DataFrame
,
TOL
:
float
=
1E-10
,
levels
:
list
[
str
]
=
[
0.01
,
0.05
,
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
,
0.8
,
0.9
,
0.95
,
0.99
]
,
default_label
:
int
=
-
1
)
->
pd
.
DataFrame
:
solution
=
[]
I
=
0
for
level
in
levels
:
for
level
in
levels
:
if
level
>
TOL
:
level_segmentation
=
[
q
[
0
][
1
]
if
len
(
q
)
>
1
else
default_label
for
q
in
[
v
[
[
w
[
0
]
>=
level
for
w
in
v
]
]
for
v
in
contracted_df
.
values
]
]
nlabels
=
len
(
set
(
level_segmentation
))
level_segmentation
=
[
q
[
0
][
1
]
if
len
(
q
)
>
1
else
default_label
for
q
in
[
v
[
[
w
[
0
]
>=
level
for
w
in
v
]
]
for
v
in
contracted_df
.
values
]
]
nlabels
=
len
(
set
(
level_segmentation
))
solution
.
append
(
pd
.
Series
(
[
*
level_segmentation
,
nlabels
,
level
]
,
index
=
[
*
contracted_df
.
index
.
values
,
'h.N'
,
'h.lv'
]
,
name
=
str
(
I
)
)
)
name
=
str
(
I
)
)
)
I
+=
1
return
(
pd
.
DataFrame
(
solution
).
T
)
def
composition_split_contraction
(
contracted_df
:
pd
.
DataFrame
)
->
tuple
((
np
.
array
,
np
.
array
,
int
,
int
))
:
na1
=
np
.
array
([
v
[
0
]
for
v
in
contracted_df
.
values
.
reshape
(
-
1
)
])
na2
=
np
.
array
([
v
[
1
]
for
v
in
contracted_df
.
values
.
reshape
(
-
1
)
])
return
(
*
[
na1
,
na2
]
,
*
np
.
shape
(
contracted_df
.
values
)
)
#
#from numba import jit
#@jit( nopython=True ) # BROKEN AT THIS POINT
def
composition_contraction_to_hierarchy_ser
(
na1
:
np
.
array
,
na2
:
np
.
array
,
n
:
int
,
m
:
int
,
index_values
:
list
,
bWriteToDisc
:
bool
=
True
,
output_directory
:
str
=
'./'
,
compression
:
str
=
'gzip'
,
TOL
=
1E-10
,
levels
:
list
[
str
]
=
[
0.01
,
0.05
,
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
,
0.8
,
0.9
,
0.95
,
0.99
]
,
default_label
:
int
=
-
1
)
->
pd
.
DataFrame
:
#
fname
=
[
'.compser.'
,
'.tsv'
]
if
not
bWriteToDisc
:
solution
=
[]
I
=
0
NA1
=
na1
.
reshape
(
n
,
m
)
NA2
=
na2
.
reshape
(
n
,
m
)
for
level
in
levels
:
if
level
>
TOL
:
LS
=
[]
for
v
,
u
in
zip
(
NA1
,
NA2
)
:
iFirst
=
np
.
where
(
v
>=
level
)[
0
]
if
len
(
iFirst
)
>
0
:
Q
=
int
(
u
[
iFirst
[
0
]])
else
:
Q
=
default_label
LS
.
append
(
Q
)
level_segmentation
=
LS
nlabels
=
len
(
set
(
level_segmentation
))
sI
=
pd
.
Series
(
[
*
level_segmentation
,
nlabels
,
level
]
,
index
=
[
*
[
i
for
i
in
range
(
n
)],
'h.N'
,
'h.lv'
]
,
name
=
str
(
I
)
)
if
not
bWriteToDisc
:
solution
.
append
(
pd
.
Series
(
[
*
level_segmentation
,
nlabels
,
level
]
,
index
=
[
*
index_values
,
'h.N'
,
'h.lv'
]
,
name
=
str
(
I
)
)
)
else
:
sI
.
to_csv
(
output_directory
+
fname
[
0
]
+
str
(
I
)
+
fname
[
1
]
,
sep
=
'
\t
'
,
compression
=
compression
)
I
+=
1
if
bWriteToDisc
:
return
(
pd
.
DataFrame
(
[
'files stored as'
,
output_directory
+
'I'
.
join
(
fname
)
,
compression
,
I
]
)
)
else
:
return
(
pd
.
DataFrame
(
solution
).
T
)
def
composition_contraction_to_hierarchy
(
contracted_df
:
pd
.
DataFrame
,
TOL
:
float
=
1E-10
,
levels
:
list
[
str
]
=
[
0.01
,
0.05
,
0.1
,
0.2
,
0.3
,
0.4
,
0.5
,
0.6
,
0.7
,
0.8
,
0.9
,
0.95
,
0.99
]
,
default_label
:
int
=
-
1
,
bWriteToDisc
:
bool
=
True
,
output_directory
:
str
=
'./'
,
compression
:
str
=
'gzip'
)
->
pd
.
DataFrame
:
# OMG RICHARD TJÖRNHAMMAR
# return ( composition_contraction_to_hierarchy_red ( contracted_df ,levels=levels) )
na1
,
na2
,
n
,
m
=
composition_split_contraction
(
contracted_df
)
res_df
=
composition_contraction_to_hierarchy_ser
(
na1
,
na2
,
n
,
m
,
levels
=
levels
,
default_label
=
default_label
,
index_values
=
contracted_df
.
index
.
values
,
bWriteToDisc
=
bWriteToDisc
,
output_directory
=
output_directory
,
compression
=
compression
)
return
(
res_df
)
def
composition_collect_df
(
res_df
:
pd
.
DataFrame
,
index_values
:
list
)
->
pd
.
DataFrame
:
# INTERNAL USE ONLY
Nfiles
=
res_df
.
iloc
[
-
1
,
-
1
]
S_
=
[]
template_name
=
res_df
.
iloc
[
1
,
-
1
]
compression
=
res_df
.
iloc
[
2
,
-
1
]
for
I
in
range
(
Nfiles
)
:
fname
=
template_name
.
replace
(
'.I.'
,
'.'
+
str
(
I
)
+
'.'
)
S_
.
append
(
pd
.
read_csv
(
fname
,
sep
=
'
\t
'
,
index_col
=
0
,
compression
=
compression
)
)
res_df
=
pd
.
concat
(
S_
,
axis
=
1
)
res_df
.
index
=
[
*
index_values
,
*
res_df
.
iloc
[
-
2
:].
index
.
values
.
tolist
()
]
return
(
res_df
)
def
composition_create_hierarchy
(
adf
:
pd
.
DataFrame
,
jdf
:
pd
.
DataFrame
,
label
:
str
,
levels
:
list
[
int
]
=
None
,
bFull
:
bool
=
False
)
->
dict
:
contr_d
=
composition_create_contraction
(
adf
=
adf
,
jdf
=
jdf
,
label
=
label
)
contracted_df
=
contr_d
[
'contraction'
]
lookup_l2i
=
contr_d
[
'label_to_id'
]
lookup_i2l
=
contr_d
[
'id_to_label'
]
if
levels
is
None
:
levels
=
contr_d
[
'all_level_values'
]
res_df
=
composition_contraction_to_hierarchy
(
contracted_df
,
levels
=
levels
)
lmax
=
int
(
np
.
max
(
res_df
.
loc
[
'h.N'
].
values
.
tolist
()
))
# UNRELIABLE AT LOW VALUES
lmin
=
int
(
np
.
min
(
res_df
.
loc
[
'h.N'
].
values
.
tolist
()
))
# REDUNDANT AT HIGH VALUES
iA
=
np
.
min
(
np
.
where
(
res_df
.
loc
[
'h.N'
,:].
values
==
lmax
)[
0
])
iB
=
np
.
min
(
np
.
where
(
res_df
.
loc
[
'h.N'
,:].
values
==
lmin
)[
0
])
+
1
if
bFull
:
iA
,
iB
=
0
,
None
return
(
{
'composition hierarchy'
:
res_df
.
iloc
[:,
iA
:
iB
]
,
'id to label'
:
lookup_i2l
}
)
levels
:
list
[
int
]
=
None
,
bFull
:
bool
=
False
,
default_label
:
int
=
-
1
,
bWriteToDisc
:
bool
=
True
,
output_directory
:
str
=
'./'
,
compression
:
str
=
'gzip'
)
->
dict
:
#
# SAIGA KNOWLEDGE :
# A COMPOSITION HIERARCHY IS DEFINED VIA ABSOLUTE QUANTIFICATIONS
# IT IS NOT RELATED TO A HIERARCHY STEMMING FROM A DISTANCE MATRIX
# OF ALL THE RELATIVE DISTANCES, I.E. AS IN WHAT IS DONE FOR
# AGGLOMARATIVE HIERARCHICAL CLUSTERING DERIVED HIERARCHIES
#
contr_d
=
composition_create_contraction
(
adf
=
adf
,
jdf
=
jdf
,
label
=
label
)
contracted_df
=
contr_d
[
'contraction'
]
lookup_l2i
=
contr_d
[
'label_to_id'
]
lookup_i2l
=
contr_d
[
'id_to_label'
]
if
levels
is
None
:
levels
=
contr_d
[
'all_level_values'
]
#
res_df
=
composition_contraction_to_hierarchy
(
contracted_df
,
levels
=
levels
,
bWriteToDisc
=
bWriteToDisc
,
output_directory
=
output_directory
,
compression
=
compression
,
default_label
=
default_label
)
if
bWriteToDisc
:
print
(
'MUST COLLECT DATA FRAME HERE'
)
res_df
=
composition_collect_df
(
res_df
,
index_values
=
contracted_df
.
index
.
values
.
tolist
()
)
#
lmax
=
int
(
np
.
max
(
res_df
.
loc
[
'h.N'
].
values
))
# UNRELIABLE AT LOW VALUES
lmin
=
int
(
np
.
min
(
res_df
.
loc
[
'h.N'
].
values
))
# REDUNDANT AT HIGH VALUES
iA
=
np
.
min
(
np
.
where
(
res_df
.
loc
[
'h.N'
,:].
values
==
lmax
)[
0
])
iB
=
np
.
min
(
np
.
where
(
res_df
.
loc
[
'h.N'
,:].
values
==
lmin
)[
0
])
+
1
if
bFull
:
iA
,
iB
=
0
,
None
return
(
{
'composition hierarchy'
:
res_df
.
iloc
[:,
iA
:
iB
]
,
'id to label'
:
lookup_i2l
}
)
def
multivariate_aligned_pca
(
analytes_df
,
journal_df
,
sample_label
=
'Sample ID'
,
align_to
=
'Modulating group'
,
n_components
=
None
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录