提交 f0486de2 编写于 作者: rictjo's avatar rictjo

OH,TP

上级 798d2310
......@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name = "impetuous-gfa",
version = "0.37.1",
version = "0.38.0",
author = "Richard Tjörnhammar",
author_email = "richard.tjornhammar@gmail.com",
description = "Impetuous Quantification, a Statistical Learning library for Humans : Alignments, Clustering, Enrichments and Group Analysis",
......
......@@ -1066,34 +1066,44 @@ def add_kendalltau( analyte_results_df , journal_df , what='M' , sample_names =
return ( analyte_results_df )
def calculate_rates( known_df , inferred_df ,
def calculate_rates( journal_df , inferred_df ,
formula , inference_label = 'owner',
interaction_sep = '-') :
bHaveInteraction = ':' in formula
known_set_values = set(known_df.values.reshape(-1))
bVerbose=False ) :
inferred_labels = []
vals = inferred_df .loc[:,inference_label] .values
TP,FP,FN,TN = 0,0,0,0
check = []
for idx in journal_df.index.values:
if idx in formula :
check.append( idx )
check = list( set(check) )
if len( check ) == 0 :
print( 'Cannot assert quality' )
results_lookup = dict( )
return ( results_lookup )
known_df = journal_df.loc[check,:]
for idx in inferred_df.index.values :
thisSet = set( known_df.loc[idx].values )
not_Set = known_set_values - thisSet
called = inferred_df.loc[ idx,inference_label ]
if bHaveInteraction and called.count(interaction_sep) == 1 :
called = set( called.split(interaction_sep) )
else :
called = set( [ called ] )
not_called = known_set_values - called
TP += int( len ( called - thisSet ) == 0 )
FP += int( len ( called - not_Set ) == 0 )
TN += int( len ( not_Set - not_called ) < len(not_Set) )
FN += int( len ( thisSet - not_called ) < len(thisSet) )
known_OH = create_encoding_journal( known_df.index.values, known_df )
known_instances = known_OH.index
inferred_OH = known_OH*0
for o in inferred_df.iterrows():
for known_instance in known_instances:
inferred_OH.loc[known_instance,o[0]] = int( known_instance in o[1][inference_label] )
OH_not = lambda df:df.apply( lambda x:1-x )
not_known_OH = OH_not( known_OH )
not_inferred_OH = OH_not(inferred_OH)
if bVerbose:
print(known_OH.iloc[:6,:6])
print(not_known_OH.iloc[:6,:6])
print(inferred_OH.iloc[:6,:6])
print(np.sum(np.sum(inferred_OH.iloc[:6,:6] * known_OH.iloc[:6,:6] )) )
TP = np.sum( np.sum( inferred_OH * known_OH ) )
FP = np.sum( np.sum( inferred_OH * not_known_OH ) )
TN = np.sum( np.sum( not_inferred_OH * not_known_OH ) )
FN = np.sum( np.sum( not_inferred_OH * known_OH ) )
results_lookup = { 'TP':TP , 'TN':TN ,
'FN':FN , 'FP':FP ,
......@@ -1108,30 +1118,18 @@ def calculate_rates( known_df , inferred_df ,
return ( results_lookup )
def assign_quality_measures( journal , result_dfs ,
def assign_quality_measures( journal_df , result_dfs ,
formula , inference_label='owner' ,
plabel = ',p' , qlabel = ',q' ) :
for label in [ col for col in result_dfs[0].columns if plabel in col[-2:] ] :
result_dfs[0].loc[:, label[:-2]+',q'] = [ qvs[0] for qvs in qvalues( result_dfs[0].loc[:,label].values ) ]
check = []
for idx in journal_df.index.values:
if idx in formula :
check.append( idx )
check = list( set(check) )
if len( check ) == 0 :
print( 'Cannot assert quality' )
results_lookup = dict( )
else :
results_lookup = calculate_rates ( journal_df.loc[check,:].T , res_df[1] ,
results_lookup = calculate_rates ( journal_df , result_dfs[1] ,
formula , inference_label = inference_label )
return( results_lookup )
if __name__ == '__main__' :
test_type = 'random'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册