clr

8189a444 · rictjo · 0c05e032 · 8189a444 · 8189a444 · 8189a444
隐藏空白更改
内联并排

Showing with 10 addition and 73 deletion

README.md README.md +7 -5

setup.py setup.py +2 -2

src/impetuous/quantification.py src/impetuous/quantification.py +1 -66

未找到文件。
--- a/README.md
+++ b/README.md
-# Simple Group Factor Analysis
+# Simple Group Analysis
 Decomposes a set of expressions into a group expression.
-The expression regulation can be studied via an ANOVA that
-relates it to the observables in the journal file. The
-final p values are then FDR corrected and the resulting
-q values are produced.
+The toolkit currently offers enrichment analysis, hierarchical
+enrichment analysis, a rudimentary factor analysis.
+The expression regulation can be studied via a statistical
+test that relates it to the observables in the journal file. 
+The final p values are then FDR corrected and the resulting
+adjusted p values are produced.

 The journal and analyte expression file must be ordered
 the same way with respect to the samples that are

--- a/setup.py
+++ b/setup.py
@@ -5,10 +5,10 @@ with open("README.md", "r") as fh:

 setuptools.setup(
    name = "impetuous-gfa",
-    version = "0.6.0",
+    version = "0.7.0",
    author = "Richard Tjörnhammar",
    author_email = "richard.tjornhammar@gmail.com",
-    description = "Impetuous Quantification, Enrichment and Group Factor Analysis",
+    description = "Impetuous Quantification, Enrichment and Group Variation Analysis",
    long_description = long_description,
    long_description_content_type = "text/markdown",
    url = "https://github.com/richardtjornhammar/impetuous",

--- a/src/impetuous/quantification.py
+++ b/src/impetuous/quantification.py
@@ -264,7 +264,7 @@ def group_significance( subset , all_analytes_df = None ,
    notSigAnalytes = AllAnalytes - SigAnalytes
    AB  = len(Analytes&SigAnalytes)    ; nAB  = len(notAnalytes&SigAnalytes)
    AnB = len(Analytes&notSigAnalytes) ; nAnB = len(notAnalytes&notSigAnalytes)
-    oddsratio , pval = stats.fisher_exact([[AB, nAB], [AnB, nAnB]], alternative=alternative)
+    oddsratio , pval = stats.fisher_exact([[AB, nAB], [AnB, nAnB]], alternative=alternative )
    return ( pval , oddsratio )

 def quantify_groups_by_analyte_pvalues( analyte_df, grouping_file, delimiter='\t',
@@ -346,71 +346,6 @@ def quantify_groups ( analyte_df , journal_df , formula , grouping_file , synony
            edf.loc[l] = q
    return ( edf.T )

-
-class RCA( object ) :
-    def __init__(self):
-        self.components_ = None
-        self.F_ = None
-        self.U_ , self.S_, self.V_ = None,None,None
-        self.evr_ = None
-        self.var_ = None
-
-    def fit_transform(self,X):
-        Xc = X - np.mean( X , 0 )
-        u, s, v = np.linalg.svd( Xc, full_matrices=False )
-        S = np.diag( s )
-        self.F_ = np.dot(u,S)
-        self.var_ = s ** 2 / Xc.shape[0]
-        self.explained_variance_ratio_ = self.var_/self.var_.sum()
-        self.U_, self.S_, self.V_ = u,s,v
-        self.components_ = self.V_
-        return(self.F_)
-
-def righteous ( analyte_df , journal_df , formula , grouping_file , synonyms = None ,
-                delimiter = '\t' , test_type = 'random' ,
-                split_id = None , skip_line_char = '#' 
-              ) :
-    dimred = RCA()
-    statistical_formula = formula
-    if not split_id is None :
-        nidx = [ idx.split(split_id)[-1].replace(' ','') for idx in analyte_df.index.values ]
-        analyte_df.index = nidx
-    sidx = set( analyte_df.index.values ) ; nidx=len(sidx)
-    eval_df = None
-    with open ( grouping_file ) as input:
-        for line in input:
-            if line[0] == skip_line_char :
-                continue
-            vline = line.replace('\n','').split(delimiter)
-            gid,gdesc,analytes_ = vline[0],vline[1],vline[2:]
-            if not synonyms is None :
-                [ analytes_.append(synonyms[a]) for a in analytes_ if a in synonyms ]
-            try :
-                group = analyte_df.loc[[a for a in analytes_ if a in sidx] ].dropna( axis=0, how='any', thresh=analyte_df.shape[1]/2 ).drop_duplicates()
-            except KeyError as e :
-                continue
-            L_ = len( group ); str_analytes=','.join(group.index.values)
-            if L_>0 :
-                Xnew = dimred.fit_transform(group.T.values)
-                group_expression_df = pd.DataFrame([Xnew.T[0]],columns=analyte_df.columns.values,index=[gid])
-                rdf = pd.DataFrame( parse_test( statistical_formula, group_expression_df , journal_df , test_type=test_type )).T
-                rdf .columns = [ col+',p' if (not ',s' in col) else col+',s' for col in rdf.columns ]
-                rdf['description'] = gdesc+','+str(L_)
-                rdf['analytes'] = str_analytes
-                rdf.index = [ gid ] ; ndf = pd.concat([rdf.T,group_expression_df.T]).T
-                if eval_df is None :
-                    eval_df = ndf
-                else :
-                    eval_df = pd.concat([eval_df,ndf])
-    edf = eval_df.T
-    for col in eval_df.columns :
-        if ',p' in col :
-            q = [q_[0] for q_ in qvalues(eval_df.loc[:,col].values)]; l=col.split(',')[0]+',q'
-            edf.loc[l] = q
-    return ( edf.T )
-
-
-
 def quantify_analytes( analyte_df , journal_df , formula ,
                       delimiter = '\t' , test_type = 'random',
                       verbose = True , only_include = None ) :