OH,TP

f0486de2 · rictjo · 798d2310 · f0486de2 · f0486de2
隐藏空白更改
内联并排

Showing with 39 addition and 41 deletion

setup.py setup.py +1 -1

src/impetuous/quantification.py src/impetuous/quantification.py +38 -40

未找到文件。
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:

 setuptools.setup(
    name = "impetuous-gfa",
-    version = "0.37.1",
+    version = "0.38.0",
    author = "Richard Tjörnhammar",
    author_email = "richard.tjornhammar@gmail.com",
    description = "Impetuous Quantification, a Statistical Learning library for Humans : Alignments, Clustering, Enrichments and Group Analysis",

--- a/src/impetuous/quantification.py
+++ b/src/impetuous/quantification.py
@@ -1066,34 +1066,44 @@ def add_kendalltau( analyte_results_df , journal_df , what='M' , sample_names =
    return ( analyte_results_df )


-
-def calculate_rates( known_df , inferred_df ,
+def calculate_rates( journal_df , inferred_df ,
                     formula , inference_label = 'owner',
-                     interaction_sep = '-') :
-
-    bHaveInteraction = ':' in formula
-    known_set_values = set(known_df.values.reshape(-1))
+                     bVerbose=False ) :

-    inferred_labels = []
-    vals = inferred_df .loc[:,inference_label] .values
-    TP,FP,FN,TN = 0,0,0,0
+    check = []
+    for idx in journal_df.index.values:
+        if idx in formula :
+            check.append( idx )
+    check = list( set(check) )
+    if len( check ) == 0 :
+        print( 'Cannot assert quality' )
+        results_lookup = dict( )
+        return ( results_lookup )
+   
+    known_df = journal_df.loc[check,:]
    
-    for idx in inferred_df.index.values :
-
-        thisSet = set( known_df.loc[idx].values )
-        not_Set = known_set_values - thisSet
-        called = inferred_df.loc[ idx,inference_label ]
-
-        if bHaveInteraction and called.count(interaction_sep) == 1 :
-            called = set( called.split(interaction_sep) )
-        else :
-            called = set( [ called ] )
-        not_called = known_set_values - called
-        
-        TP += int( len ( called - thisSet  ) == 0 )
-        FP += int( len ( called - not_Set  ) == 0 )
-        TN += int( len ( not_Set - not_called ) < len(not_Set) )
-        FN += int( len ( thisSet - not_called ) < len(thisSet) )
+    known_OH = create_encoding_journal( known_df.index.values, known_df )
+    known_instances = known_OH.index
+    inferred_OH = known_OH*0
+    for o in inferred_df.iterrows():
+        for known_instance in known_instances:
+            inferred_OH.loc[known_instance,o[0]] = int( known_instance in o[1][inference_label] )
+            
+    OH_not = lambda df:df.apply( lambda x:1-x )
+
+    not_known_OH    = OH_not( known_OH )
+    not_inferred_OH = OH_not(inferred_OH)
+
+    if bVerbose:
+        print(known_OH.iloc[:6,:6])
+        print(not_known_OH.iloc[:6,:6])
+        print(inferred_OH.iloc[:6,:6])
+        print(np.sum(np.sum(inferred_OH.iloc[:6,:6] * known_OH.iloc[:6,:6] )) )
+
+    TP = np.sum( np.sum( inferred_OH     * known_OH     ) )
+    FP = np.sum( np.sum( inferred_OH     * not_known_OH ) )
+    TN = np.sum( np.sum( not_inferred_OH * not_known_OH ) )
+    FN = np.sum( np.sum( not_inferred_OH * known_OH     ) )

    results_lookup = {  'TP':TP , 'TN':TN ,
                'FN':FN , 'FP':FP ,
@@ -1108,30 +1118,18 @@ def calculate_rates( known_df , inferred_df ,
    return ( results_lookup )


-def assign_quality_measures( journal , result_dfs ,
+def assign_quality_measures( journal_df , result_dfs ,
                             formula , inference_label='owner' ,
                             plabel = ',p' , qlabel = ',q' ) :

    for label in [ col for col in result_dfs[0].columns if plabel in col[-2:] ] :
        result_dfs[0].loc[:, label[:-2]+',q'] = [ qvs[0] for qvs in qvalues( result_dfs[0].loc[:,label].values ) ]
-
-    check = []
-    for idx in journal_df.index.values:
-        if idx in formula :
-            check.append( idx )
-    check = list( set(check) )
-    
-    if len( check ) == 0 :
-        print( 'Cannot assert quality' )
-        results_lookup = dict( )
-    else :
-        results_lookup = calculate_rates ( journal_df.loc[check,:].T , res_df[1] ,
+   
+    results_lookup = calculate_rates ( journal_df , result_dfs[1] ,
                          formula , inference_label = inference_label )
    return( results_lookup )


-
-
 if __name__ == '__main__' :

    test_type = 'random'