Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
rictjo
impetuous
提交
4d3a89bc
I
impetuous
项目概览
rictjo
/
impetuous
10 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
I
impetuous
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
4d3a89bc
编写于
3月 03, 2023
作者:
rictjo
提交者:
GitHub
3月 03, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update quantification.py
group sig confusion lengths
上级
c6f1ab60
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
26 addition
and
12 deletion
+26
-12
src/impetuous/quantification.py
src/impetuous/quantification.py
+26
-12
未找到文件。
src/impetuous/quantification.py
浏览文件 @
4d3a89bc
...
...
@@ -1293,10 +1293,10 @@ def prune_journal ( journal_df , remove_units_on = '_' ) :
journal_df
=
pd
.
concat
(
[
nmr_journal
,
str_journal
]
)
return
(
journal_df
)
def
group_significance
(
subset
,
all_analytes_df
=
None
,
tolerance
=
0.05
,
significance_name
=
'pVal'
,
AllAnalytes
=
None
,
SigAnalytes
=
None
,
alternative
=
'two-sided'
)
:
def
group_significance
(
subset
:
pd
.
Series
,
all_analytes_df
:
pd
.
DataFrame
=
None
,
tolerance
:
float
=
0.05
,
significance_name
:
str
=
'pVal'
,
AllAnalytes
:
set
=
None
,
SigAnalytes
:
set
=
None
,
TestType
:
str
=
'fisher'
,
alternative
:
str
=
'two-sided'
,
AllAnnotated
:
set
=
None
)
:
# FISHER ODDS RATIO CHECK
# CHECK FOR ALTERNATIVE :
# 'greater' ( ENRICHMENT IN GROUP )
...
...
@@ -1309,13 +1309,26 @@ def group_significance( subset , all_analytes_df = None ,
if
all_analytes_df
is
None
:
SigAnalytes
=
set
(
all_analytes_df
.
iloc
[(
all_analytes_df
<
tolerance
).
loc
[:,
significance_name
]].
index
.
values
)
Analytes
=
set
(
subset
.
index
.
values
)
if
not
AllAnnotated
is
None
:
Analytes
=
Analytes
&
AllAnnotated
SigAnalytes
=
SigAnalytes
&
AllAnnotated
AllAnalytes
=
AllAnalytes
&
AllAnnotated
notAnalytes
=
AllAnalytes
-
Analytes
notSigAnalytes
=
AllAnalytes
-
SigAnalytes
AB
=
len
(
Analytes
&
SigAnalytes
)
;
nAB
=
len
(
notAnalytes
&
SigAnalytes
)
AnB
=
len
(
Analytes
&
notSigAnalytes
)
;
nAnB
=
len
(
notAnalytes
&
notSigAnalytes
)
oddsratio
,
pval
=
stats
.
fisher_exact
([[
AB
,
nAB
],
[
AnB
,
nAnB
]],
alternative
=
alternative
)
if
'fisher'
in
TestType
.
lower
()
:
oddsratio
,
pval
=
stats
.
fisher_exact
([[
AB
,
nAB
],
[
AnB
,
nAnB
]],
alternative
=
alternative
)
if
'hypergeom'
in
TestType
.
lower
():
x
=
AB
N
=
len
(
AllAnalytes
)
k
=
len
(
Analytes
)
m
=
len
(
SigAnalytes
)
pval
=
stats
.
hypergeom
(
M
=
N
,
n
=
m
,
N
=
k
)
.
sf
(
x
-
1
)
oddsratio
=
0
return
(
pval
,
oddsratio
)
def
quantify_groups_by_analyte_pvalues
(
analyte_df
,
grouping_file
,
delimiter
=
'
\t
'
,
tolerance
=
0.05
,
p_label
=
'C(Status),p'
,
group_prefix
=
''
,
alternative
=
'two-sided'
)
:
...
...
@@ -2222,16 +2235,17 @@ def confusion_matrix ( dict_row:dict , dict_col:dict , bSwitchKeyValues:bool=Fal
confusion
[
i
,
j
]
=
len
(
set
(
dict_row
[
all_interactions
[
i
]])
&
set
(
dict_col
[
all_interactions
[
j
]])
)
return
(
{
'confusion matrix'
:
confusion
,
'index names'
:
all_interactions
}
)
def
confusion_lengths
(
BCM
:
np
.
array
)
->
list
[
np
.
array
]
:
def
confusion_lengths
(
BCM
:
np
.
array
,
ranktype
:
str
=
'ordinal'
)
->
list
[
np
.
array
]
:
from
scipy.stats
import
rankdata
ND
=
len
(
np
.
shape
(
BCM
))
axshape
=
lambda
i
,
nm
:
np
.
array
([
nm
[
j
]
if
j
!=
i
else
1
for
j
in
range
(
len
(
nm
))
])
nm
=
np
.
shape
(
BCM
)
ND
=
len
(
nm
)
SAIGA
=
[]
for
i_
in
range
(
ND
)
:
j_
=
ND
-
1
-
i_
# NOT USED
rBCM
=
rankdata
(
BCM
,
'average'
,
axis
=
i_
)
rBCM
=
1
+
np
.
abs
(
np
.
max
(
rBCM
,
axis
=
i_
)
-
rBCM
)
Z
=
np
.
sum
(
BCM
,
axis
=
i_
)
SAIGA
.
append
(
np
.
sum
(
BCM
*
rBCM
/
Z
,
axis
=
i_
)
)
rBCM
=
rankdata
(
BCM
,
ranktype
,
axis
=
i_
)
rBCM
=
1
+
np
.
abs
(
np
.
max
(
rBCM
,
axis
=
i_
)
.
reshape
(
axshape
(
i_
,
nm
))
*
np
.
ones
(
np
.
prod
(
nm
)
).
reshape
(
nm
)
-
rBCM
)
Z
=
np
.
sum
(
BCM
,
axis
=
i_
)
SAIGA
.
append
(
np
.
sum
(
BCM
*
rBCM
,
axis
=
i_
)
/
Z
)
return
(
SAIGA
)
def
compare_labeling_solutions
(
df_
:
pd
.
DataFrame
,
lab1
:
str
,
lab2
:
str
,
nsamples
:
int
=
None
)
->
list
[
pd
.
DataFrame
]
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录