Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
rictjo
impetuous
提交
8467b62b
I
impetuous
项目概览
rictjo
/
impetuous
10 个月 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
I
impetuous
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
8467b62b
编写于
4月 13, 2019
作者:
rictjo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
foo
上级
9c9a7549
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
34 addition
and
10 deletion
+34
-10
README.md
README.md
+2
-4
setup.py
setup.py
+1
-1
src/impetuous/convert.py
src/impetuous/convert.py
+4
-0
src/impetuous/quantification.py
src/impetuous/quantification.py
+27
-5
未找到文件。
README.md
浏览文件 @
8467b62b
...
...
@@ -13,9 +13,7 @@ Visit the active code via:
https://github.com/richardtjornhammar/impetuous
Visit the published code:
v 0.1.1 https://doi.org/10.5281/zenodo.2594691
v 0.2.6 https://doi.org/10.5281/zenodo.2636321
https://doi.org/10.5281/zenodo.2594691
Cite using
DOI: 10.5281/zenodo.2
63632
1
DOI: 10.5281/zenodo.2
59469
1
setup.py
浏览文件 @
8467b62b
...
...
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools
.
setup
(
name
=
"impetuous-gfa"
,
version
=
"0.2.
6
"
,
version
=
"0.2.
8
"
,
author
=
"Richard Tjörnhammar"
,
author_email
=
"richard.tjornhammar@gmail.com"
,
description
=
"Impetuous Quantification, Enrichment and Group Factor Analysis"
,
...
...
src/impetuous/convert.py
浏览文件 @
8467b62b
...
...
@@ -18,6 +18,10 @@ import numpy as np
import
json
import
sys
def
normalise_for_apples_and_oranges_stats
(
X
,
method
=
'ordinal'
):
X_
=
rankdata
(
X
,
method
=
method
)
/
len
(
X
)
return
(
X_
)
def
make_group_analytes_unique
(
grouping_file
,
delimiter
=
'
\t
'
):
uniqe_grouping_file
=
'unique_'
+
grouping_file
with
open
(
uniqe_grouping_file
,
'w'
)
as
of
:
...
...
src/impetuous/quantification.py
浏览文件 @
8467b62b
...
...
@@ -78,6 +78,21 @@ def anova_test ( formula, group_expression_df, journal_df, test_type = 'random'
table
=
sm
.
stats
.
anova_lm
(
model
,
typ
=
type_d
[
test_type
])
return
table
.
iloc
[
[(
idx
in
formula
)
for
idx
in
table
.
index
],
-
1
]
from
scipy.stats
import
ttest_rel
,
ttest_ind
,
mannwhitneyu
def
t_test
(
df
,
endogen
=
'expression'
,
group
=
'disease'
,
pair_values
=
(
'Sick'
,
'Healthy'
)
,
test_type
=
'independent'
,
equal_var
=
False
)
:
group1
=
df
[
df
[
group
]
==
pair_values
[
0
]][
endogen
].
astype
(
float
)
group2
=
df
[
df
[
group
]
==
pair_values
[
1
]][
endogen
].
astype
(
float
)
if
test_type
==
'independent'
:
pv
=
ttest_ind
(
group1
,
group2
,
equal_var
=
equal_var
)
if
test_type
==
'related'
:
pv
=
ttest_ind
(
group1
,
group2
)
p_normality
=
mannwhitneyu
(
group1
,
group2
,
alternative
=
"two-sided"
)[
1
]
pvalue
=
pv
[
1
];
statistic
=
pv
[
0
]
n
,
m
=
len
(
group1
)
,
len
(
group2
)
return
(
pvalue
,
p_normality
)
def
prune_journal
(
journal_df
,
remove_units_on
=
'_'
)
:
journal_df
=
journal_df
.
loc
[
[
'label'
in
idx
.
lower
()
or
'['
in
idx
for
idx
in
journal_df
.
index
.
values
]
,
:
].
copy
()
bSel
=
[
(
'label'
in
idx
.
lower
()
)
for
idx
in
journal_df
.
index
.
values
]
...
...
@@ -98,17 +113,23 @@ def merge_significance ( significance_df , distance_type='euclidean' ) :
# GROUPS ALONG INDICES
# EX: pd.DataFrame( np.random.rand(20).reshape(5,4) , columns=['bio','cars','oil','money']).apply( lambda x: -1.*np.log10(x) ).T.apply( lambda x: np.sqrt(np.sum(x**2)) )
#
if
distance_type
==
'euclidean'
:
distance
=
lambda
x
:
np
.
sqrt
(
np
.
sum
(
x
**
2
))
else
:
# CURRENTLY ONLY ONE METHOD IMPLEMENTED
distance
=
lambda
x
:
np
.
sqrt
(
np
.
sum
(
x
**
2
))
if
distance_type
==
'euclidean'
:
# CONSERVATIVE ESTIMATE
distance
=
lambda
x
:
np
.
sqrt
(
np
.
sum
(
x
**
2
))
if
distance_type
==
'extreme'
:
# ANTI-CONSERVATIVE ESTIMATE
distance
=
lambda
x
:
np
.
max
(
x
)
get_pvalue
=
lambda
x
:
10
**
(
-
x
)
return
(
significance_df
.
apply
(
lambda
x
:
-
1.
*
np
.
log10
(
x
)
).
T
.
apply
(
distance
).
apply
(
get_pvalue
)
)
def
group_significance
(
subset
,
all_analytes_df
=
None
,
tolerance
=
0.05
,
significance_name
=
'pVal'
,
AllAnalytes
=
None
,
SigAnalytes
=
None
)
:
AllAnalytes
=
None
,
SigAnalytes
=
None
,
alternative
=
'greater'
)
:
# FISHER ODDS RATIO CHECK
# CHECK FOR ALTERNATIVE:
# 'greater' ( ENRICHMENT IN GROUP )
# 'two-sided' ( DIFFERENTIAL GROUP EXPERSSION )
# 'less' ( DEPLETION IN GROUP )
if
AllAnalytes
is
None
:
if
all_analytes_df
is
None
:
AllAnalytes
=
set
(
all_analytes_df
.
index
.
values
)
...
...
@@ -120,7 +141,7 @@ def group_significance( subset , all_analytes_df = None ,
notSigAnalytes
=
AllAnalytes
-
SigAnalytes
AB
=
len
(
Analytes
&
SigAnalytes
)
;
nAB
=
len
(
notAnalytes
&
SigAnalytes
)
AnB
=
len
(
Analytes
&
notSigAnalytes
)
;
nAnB
=
len
(
notAnalytes
&
notSigAnalytes
)
oddsratio
,
pval
=
stats
.
fisher_exact
([[
AB
,
nAB
],
[
AnB
,
nAnB
]])
oddsratio
,
pval
=
stats
.
fisher_exact
([[
AB
,
nAB
],
[
AnB
,
nAnB
]]
,
alternative
=
alternative
)
return
(
pval
,
oddsratio
)
def
quantify_groups_by_analyte_pvalues
(
analyte_df
,
grouping_file
,
delimiter
=
'
\t
'
,
...
...
@@ -146,6 +167,7 @@ def quantify_groups_by_analyte_pvalues( analyte_df, grouping_file, delimiter='\t
rdf
.
columns
=
[
col
+
',p'
if
',p'
not
in
col
else
col
for
col
in
rdf
.
columns
]
rdf
[
'description'
]
=
gdesc
+
','
+
str
(
L_
)
;
rdf
[
'analytes'
]
=
str_analytes
rdf
[
group_prefix
+
'NGroupAnalytes'
]
=
L_
rdf
[
group_prefix
+
'FracGroupFill'
]
=
L_
/
float
(
len
(
analytes_
))
ndf
=
rdf
if
eval_df
is
None
:
eval_df
=
ndf
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录