diff --git a/setup.py b/setup.py index f42b40a875d602a6914e84ebeee66f7ec5ad9b4d..476629c05304a97b4eeb89bc27c708696161bfd5 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name = "impetuous-gfa", - version = "0.14.0", + version = "0.15.0", author = "Richard Tjörnhammar", author_email = "richard.tjornhammar@gmail.com", description = "Impetuous Quantification, Enrichment and Group Variation Analysis", diff --git a/src/impetuous/hierarchal.py b/src/impetuous/hierarchal.py index 2efde15ed65127d39c2e78c4473c4e1915ce8858..62e22d130d99f6ae1d16ffec09c0253aa691dded 100644 --- a/src/impetuous/hierarchal.py +++ b/src/impetuous/hierarchal.py @@ -46,9 +46,9 @@ def create_dag_representation_df ( pathway_file = '../data/GROUPDEFINITIONS.gmt' def HierarchalEnrichment ( analyte_df , dag_df , dag_level_label = 'DAG,l' , - ancestors_id_label = 'aid' , id_name = None , threshold=0.05 , - p_label = 'C(Status),p', analyte_name_label = 'analytes', - item_delimiter = ',' , alexa_elim=False + ancestors_id_label = 'aid' , id_name = None , threshold = 0.05 , + p_label = 'C(Status),p', analyte_name_label = 'analytes' , + item_delimiter = ',' , alexa_elim=False , alternative = 'two-sided' ) : # # NEEDS AN ANALYTE SIGNIFICANCE FRAME: @@ -83,7 +83,7 @@ def HierarchalEnrichment ( L_ = len( group ) ; str_analytes=','.join(group.index.values) if L_ > 0 : used_analytes[node] = ','.join( group.index.values ) - pv,odds = group_significance( group , AllAnalytes=AllAnalytes, SigAnalytes=SigAnalytes , tolerance = threshold ) + pv,odds = group_significance( group , AllAnalytes=AllAnalytes, SigAnalytes=SigAnalytes , tolerance = threshold , alternative=alternative ) node_sig[node] = pv ; marked_ = set( group.index.values ) ancestors = df.loc[node,ancestors_id_label].replace('\n','').replace(' ','').split(item_delimiter) if alexa_elim and pv > threshold : # USE ALEXAS ELIM ALGORITHM : IS NOT DEFAULT diff --git a/src/impetuous/quantification.py b/src/impetuous/quantification.py index a90426b8220d839b672d267161b0e1be8de69d8e..ee34c370b588c82d6073c918e73a440d0c80f553 100644 --- a/src/impetuous/quantification.py +++ b/src/impetuous/quantification.py @@ -1,5 +1,5 @@ """ -Copyright 2019 RICHARD TJÖRNHAMMAR +Copyright 2020 RICHARD TJÖRNHAMMAR Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -584,16 +584,16 @@ def quantify_groups_by_analyte_pvalues( analyte_df, grouping_file, delimiter='\t edf.loc[l] = q return ( edf.T ) -class sPCA( object ) : +class APCA ( object ) : # # THIS CLASS PERFORMS A SPARSE PCA # IT USES THE SPARSE SVD ALGORITHM # FOUND IN SCIPY # - def __init__ ( self,X,k=-1,fillna=None ) : + def __init__ ( self , X=None , k=-1 , fillna=None , transcending=True) : from scipy.sparse import csc_matrix from scipy.sparse.linalg import svds - self.svds_,self.smatrix_ = svds,csc_matrix + self.svds_ , self.smatrix_ = svds , csc_matrix self.components_ = None self.F_ = None self.U_ , self.S_, self.V_ = None,None,None @@ -602,6 +602,7 @@ class sPCA( object ) : self.fillna_ = fillna self.X_ = self.interpret_input(X) self.k_ = k + self.transcending_ = transcending def interpret_input ( self,X ) : if 'pandas' in str(type(X)) : @@ -612,29 +613,43 @@ class sPCA( object ) : self.X_ = X.values else : self.X_ = X - return( self.X_ ) + return ( self.X_ ) def fit ( self , X=None ) : self.fit_transform( X=X ) def fit_transform ( self , X=None ) : - X = self.X_ - if not X is None : # DID THE USER SUPPLY NEW DATA + if X is None: + X = self.X_ + if not X is None : X = self.interpret_input(X) Xc = X - np.mean( X , 0 ) - if self.k_<=0: - k_ = np.min(np.shape(X))-1 + if self.k_<=0 : + k_ = np.min( np.shape(Xc) ) - 1 else: k_ = self.k_ u, s, v = self.svds_ ( self.smatrix_(Xc, dtype=float) , k=k_ ) + if self.transcending_ : + u, s, v = self.transcending_order(u,s,v) S = np.diag( s ) self.F_ = np.dot(u,S) self.var_ = s ** 2 / Xc.shape[0] self.explained_variance_ratio_ = self.var_/self.var_.sum() self.U_ , self.S_ , self.V_ = u,s,v - self.components_ = self.V_ + self.components_ = self.V_ return ( self.F_ ) + def transcending_order(self,u,s,v) : + return ( u[:,::-1],s[::-1],v[::-1,:] ) + + def apply_matrix( self , R ) : + self.U_ = np.dot( self.U_,R.T ) + self.V_ = np.dot( self.V_.T,R.T ).T + self.F_ = np.dot( self.F_,R.T ) + self.components_ = self.V_ + return ( self.F_ ) + + dimred = PCA() def quantify_groups ( analyte_df , journal_df , formula , grouping_file , synonyms = None , @@ -685,10 +700,10 @@ def quantify_by_dictionary ( analyte_df , journal_df , formula , split_id=None, grouping_dictionary = dict() , synonyms = None , delimiter = ':' ,test_type = 'random', tolerance = 0.05, supress_q = False , analyte_formula = None, - use_sparse_pca=False , k=-1 ) : + use_loc_pca=False , k=-1 ) : - if use_sparse_pca : - dimred = sPCA(X=analyte_df,k=k) + if use_loc_pca : + dimred = APCA(X=analyte_df,k=k) if not 'dict' in str(type(grouping_dictionary)) : print ( 'INVALID GROUPING' )