""" Copyright 2023 RICHARD TJÖRNHAMMAR Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ desc__ = """ THIS MODULE WILL CONTAIN SINGLE FUNCTIONS THAT HAVE NO OTHER DEPENDENCIES EXCEPT NUMPY. SOME HAVE BEEN MOVED OVER FROM MODULES WITH COMPLICATED DEPENDENCY STRUCTURES AND WILL ALLEVIATE POTENTIALLY DIFFICULT GRAPH LOOPS AS WELL AS EASE DEBUGGING IN THE FUTURE. CURRENT: REDUNDANT DEFINITIONS (HERE VS REDUCER) EXISTS FOR reducer:: frac_procentile get_procentile smoothbinred smoothmax contrast e_flatness e_contrast confred padded_rolling_window quantification:: isItPrime # FIRST APPEARENCE: # https://gist.github.com/richardtjornhammar/ef1719ab0dc683c69d5a864cb05c5a90 Fibonacci F_truth clustering: lint2lstr unpack rem """ import numpy as np def sign ( x ) : return ( 2*(x>=0)-1 ) def abs ( x ): return(sign(x)*x) contrast = lambda A,B : ( A-B )/( A+B ) # # OTHER e_flatness = lambda x : np.exp(np.mean(np.log(x),0))/np.mean(x,0) e_contrast = lambda x : 1 - e_flatness(x) # # SPURIOUS LOW VALUE REMOVAL confred = lambda x,eta,varpi : 0.5*x*(1+np.tanh((x-eta)/varpi))*(np.sqrt(x*eta)/(0.5*(eta+x))) smoothbinred = lambda x,eta,varpi : 0.5*(1+np.tanh((x-eta)/varpi))*(np.sqrt(x*eta)/(0.5*(eta+x))) smoothmax = lambda x,eta,varpi : x * self.smoothbinred(x-np.min(x),eta-np.min(x),varpi) sabsmax = lambda x,eta,varpi : x * self.smoothbinred(np.abs(x),np.abs(eta),varpi) from collections import Counter def counts ( A:list ) -> list : return ( list(Counter(A).values()) ) def frac_procentile ( vals=[12.123, 1.2, 1000, 4] ): vals = np.array(vals).copy() N = len( vals ); for i,j in zip( np.argsort(vals),range(N)): vals[i]=(j+0.5)/N return ( vals ) def get_procentile ( vals, procentile = 50 ): fp_ = procentile/100.0 proc = frac_procentile(vals) ma,mi = np.max(proc),np.min(proc) if fp_ > ma: fp_ = ma if fp_ < mi: fp_ = mi idx = np.argwhere(proc==fp_) if len ( idx ) == 1 : return ( vals[idx[0][0]] ) else : i1 = np.argsort( np.abs(proc-fp_) )[:2] return ( sum([vals[i] for i in i1]) * 0.5 ) def padded_rolling_window ( ran, tau ) : if tau==1 : return ( [ (i,None) for i in range(len(ran)) ] ) if len(ran)0)*((i-w)%N),int(i+w=N)*(N-1)] idx = [ centered( jid(i,w,N) ) for i in range(N) ] return ( idx ) def factorial ( n ) : return ( 1 if n<=0 else factorial(n-1)*n ) def invfactorial ( n ) : if n<0 : return 0 m = factorial(n) return ( 1/m ) def zernicke ( r , theta , n , m ) : if ( not (r >= 0 and r <= 1)) or (m > n) : return ( 0 ) def zer_R ( n , m , r ) : ip,im = ( n+m )/2 , ( n-m )/2 z = 0 for k in range( int( im ) ) : f = factorial(n-k)*invfactorial(k)*invfactorial(ip-k)*invfactorial(im-k) if f > 0 : z = z + (-1)**k * f * r**( n-2*k ) return ( z ) Rnm = zer_R ( n,m,r ) return ( Rnm ) def error ( self , errstr , severity=0 ): print ( errstr ) if severity > 0 : exit(1) else : return def seqsum ( c , n = 1 ) : return ( c[n:] + c[:-n] ) def seqdiff ( c , n = 1 ) : return ( c[n:] - c[:-n] ) def arr_contrast ( c , n=1, ctyp='c' ) : if ctyp=='c': return ( np.append( (c[n:]-c[:-n])/(c[n:]+c[:-n]) , np.zeros(n) ) ) return ( np.append( np.zeros(n) , (c[n:]-c[:-n])/(c[n:]+c[:-n]) ) ) def all_conts ( c ) : s = c*0.0 inv = 1.0/len(c) for i in range(len(c)): s += arr_contrast(c,i+1)*inv return ( s ) def mse ( Fs,Fe ) : return ( np.mean( (Fs-Fe)**2 ) ) def coserr ( Fe , Fs ) : return ( np.dot( Fe,Fs )/np.sqrt(np.dot( Fe,Fe ))/np.sqrt(np.dot( Fs,Fs )) ) def z2error ( model_data , evidence_data , evidence_uncertainties = None ) : # FOR A DESCRIPTION READ PAGE 71 (57 INTERNAL NUMBERING) of: # https://kth.diva-portal.org/smash/get/diva2:748464/FULLTEXT01.pdf # EQUATIONS 6.3 AND 6.4 # Fe = evidence_data Fs = model_data N = np.min( [ len(evidence_data) , len(model_data) ] ) if not len(evidence_data) == len(model_data): error ( " THE MODEL AND THE EVIDENCE MUST BE PAIRED ON THEIR INDICES" , 0 ) Fe = evidence_data[:N] Fs = model_data[:N] dFe = np.array( [ 0.05 for d in range(N) ] ) if not evidence_uncertainties is None : if len(evidence_uncertainties)==N : dFe = evidence_uncertainties else : error ( " DATA UNCERTAINTIES MUST CORRESPOND TO THE TARGET DATA " , 0 ) def K ( Fs , Fe , dFe ) : return ( np.sum( np.abs(Fs)*np.abs(Fe)/dFe**2 ) / np.sum( (Fe/dFe)**2 ) ) k = K ( Fs,Fe,dFe ) z2e = np.sqrt( 1/(N-1) * np.sum( ( (np.abs(Fs) - k*np.abs(Fe))/(k*dFe) )**2 ) ) cer = coserr(Fe,Fs) qer = z2e/cer return ( qer, z2e , cer , N ) import math def isItPrime( N , M=None,p=None,lM05=None ) : if p is None : p = 1 if M is None : M = N if lM05 is None: lM05 = math.log(M)*0.5 if ((M%p)==0 and p>=2) : return ( N==2 ) else : if math.log(p) > lM05: return ( True ) return ( isItPrime(N-1,M=M,p=p+1,lM05=lM05) ) def lint2lstr ( seq:list[int] ) -> list[str] : if isinstance ( seq,(list,tuple,set)) : yield from ( str(x) for y in seq for x in lint2lstr(y) ) else : yield seq def rem ( a:list , H:list ) -> list : h0 = [] for h in H: hp = h - np.sum(h>np.array(h0)) h0 .append(h) a .pop(hp) return(a) def smom ( v:np.array , p:int ) -> np.array : n = len(v) X = [ np.mean(v) ] X .append( np.std(v) ) z = (v-X[0])/X[1] if p>2 : for q in range( 3 , p+1 ) : X.append( np.sum(z**q)/n ) return ( np.array(X)[:p] ) def unpack ( seq ) : if type(seq) == type( list()) or type(seq) == type(set()) or \ type(seq) == type(tuple()) or type(seq) == type(np.array([])) : yield from ( x for y in seq for x in unpack(y) ) elif type(seq) == type(dict()) : yield from ( x for item in seq.items() for y in item for x in unpack(y) ) else : yield seq def bagscore(s1:str,s2:str)->int: return len( set(s1.lower().split(' '))&set(s2.lower().split(' ')) ) def fuzzy_locate( w , fuzzy_s1, exact_loc ) : I = [] j = None for i in range(len(w)) : fuzzy_s2 = ' '.join(w.iloc[i,:].tolist()) I.append( tuple( (bagscore(fuzzy_s1,fuzzy_s2),i) ) ) if exact_loc in fuzzy_s2 : j = i break if j is None : j = sorted(I)[-1][1] return ( j ) def scale_free_geomav(A:np.array,B:np.array) -> np.array : return( A*B/((A+B)*(A+B)) * 4 ) def GEOMAV ( A:float , B:float ) -> float : return ( 2*A*B/(A+B) ) def geomav ( x:np.array ) -> float : return ( GEOMAV ( x[0] , x[1] ) ) def G0 ( x:np.array, N:int ) -> float: return ( geomav( x ) - N/(N+1) ) def rename_order(A:list) ->list : S = sorted(list(set(A))) a0 = None R = dict() for a in A : if a0 is None : R[a] = 0 a0 = 0 continue if not a in R : R[a] = a0+1 a0 = a0+1 return ( [R[a] for a in A] ) def zvals ( data:np.array , axis:int = 0 , bKeepRange=False ) -> dict : data = data.T if axis == 1 : data = data.T m = np.mean( data, axis = 0 ) s = np.std ( data, axis = 0 ) z = data - m if not bKeepRange : z = z / s z = z.T if axis == 1 : z = z.T return ( {'z':z , 'mean':m , 'std':s } ) def rename_order(A:list) ->list : S = sorted(list(set(A))) a0 = None R = dict() for a in A : if a0 is None : R[a] = 0 a0 = 0 continue if not a in R : R[a] = a0+1 a0 = a0+1 return ( [R[a] for a in A] ) def read_rds_matrix ( filename = 'matrix.rds', bIsSquare=False ) : import rpy2.robjects as robjects from rpy2.robjects import pandas2ri from scipy.spatial.distance import pdist,squareform pandas2ri.activate() readRDS = robjects.r['readRDS'] # if bIsSquare : from scipy.spatial.distance import pdist,squareform return ( squareform( readRDS(filename) ) ) else : return ( readRDS(filename) )