Commit 1f73ea1b authored by Nicole Bussola's avatar Nicole Bussola
Browse files

single layer metrics integrated

parent 5da0d54c
#%%
import pandas as pd
import argparse
from itertools import combinations
import numpy as np
import bootstrapped.bootstrap as bs
import bootstrapped.stats_functions as bs_stats
from collections import Counter, OrderedDict
from pathlib import Path
import os
#%%
OUTFOLDER = '/Users/nicole/Desktop/inf_revamped/results'
DATASET = 'tcga_breast'
TARGET ='ER'
MODEL = 'LSVM'
LAYERS = 'prot'
N_SPLITS = 10
MODE = 'single'
assert MODE in ['juxt', 'rSNF', 'rSNFi', 'single']
#%%
PATH = f'{OUTFOLDER}/{DATASET}/{TARGET}/{MODEL}'
N_LAYERS = len(LAYERS)
#%%
df_results = pd.DataFrame(columns=['layers', 'mcc_train','mcc_train_min','mcc_train_max', 'auc_train', 'auc_train_min', 'auc_train_max',
'sens_train','sens_train_min','sens_train_max', 'spec_train', 'spec_train_min', 'spec_train_max',
'mcc_test', 'mcc_test_min', 'mcc_test_max', 'best_feat'])
all_mccs = []
all_sens = []
all_spec = []
all_aucs = []
all_test_mccs = []
best_feat_steps = []
for split_id in range(N_SPLITS):
PATH = f'{OUTFOLDER}/{DATASET}/{TARGET}/{MODEL}/{split_id}'
layers_concat= LAYERS
file_log = os.path.join(PATH, f'{MODE}/{layers_concat}_tr_{MODEL}_KBest.log')
file_metrics = os.path.join(PATH, f'{MODE}/{layers_concat}_tr_{MODEL}_KBest_allmetrics.txt')
with open(file_log) as f:
log_content = f.readlines()
for line in log_content:
if 'mcc' in line:
mcc_test_line = line
if "n_feats" in line:
best_feat_line = line
break
best_feat = int(best_feat_line.split(' = ')[1][:-1])
best_feat_steps.append(best_feat)
mcc_test = float(mcc_test_line.split(' = ')[1][:-1])
all_test_mccs.append(mcc_test)
all_metrics = pd.read_csv(file_metrics, sep='\t')
best_idxs = np.where(all_metrics["nf"]==best_feat)[0]
MCC = np.where(all_metrics.columns=='mcc')[0][0]
best_mccs = all_metrics.iloc[best_idxs, MCC]
# print(np.mean(best_mccs), best_feat)
all_mccs.extend(best_mccs)
AUC = np.where(all_metrics.columns=='auc')[0][0]
best_auc = all_metrics.iloc[best_idxs, AUC]
all_aucs.extend(best_auc)
if TARGET!='subtypes':
SENS = np.where(all_metrics.columns=='sens')[0][0]
best_sens = all_metrics.iloc[best_idxs, SENS]
all_sens.extend(best_sens)
SPEC = np.where(all_metrics.columns=='spec')[0][0]
best_spec = all_metrics.iloc[best_idxs, SPEC]
all_spec.extend(best_spec)
all_mccs = np.array(all_mccs)
MCC_CI = bs.bootstrap(all_mccs, stat_func=bs_stats.mean)
print('MCC train =', round(np.mean(all_mccs),3), (round(MCC_CI.lower_bound,3), round(MCC_CI.upper_bound,3)))
all_aucs = np.array(all_aucs)
AUC_CI = bs.bootstrap(all_aucs, stat_func=bs_stats.mean)
print('AUC train =', round(np.mean(all_aucs),3), (round(AUC_CI.lower_bound,3), round(AUC_CI.upper_bound,3)))
all_test_mccs = np.array(all_test_mccs)
MCC_TEST = bs.bootstrap(all_test_mccs, stat_func=bs_stats.mean)
print('MCC test =', round(np.mean(all_test_mccs),3), (round(MCC_TEST.lower_bound,3), round(MCC_TEST.upper_bound,3)))
if TARGET!='subtypes':
all_sens = np.array(all_sens)
all_spec = np.array(all_spec)
SENS_CI = bs.bootstrap(all_sens, stat_func=bs_stats.mean)
SPEC_CI = bs.bootstrap(all_spec, stat_func=bs_stats.mean)
print('SENS =', round(np.mean(all_sens),3), (round(SENS_CI.lower_bound,3), round(SENS_CI.upper_bound,3)))
print('SPEC =', round(np.mean(all_spec),3), (round(SPEC_CI.lower_bound,3), round(SPEC_CI.upper_bound,3)))
row = OrderedDict({'layers':layers_concat, 'mcc_train':round(np.mean(all_mccs),3), 'mcc_train_min':round(MCC_CI.lower_bound,3), 'mcc_train_max':round(MCC_CI.upper_bound,3),
'auc_train':round(np.mean(all_aucs),3), 'auc_train_min':round(AUC_CI.lower_bound,3), 'auc_train_max':round(AUC_CI.upper_bound,3),
'sens_train':round(np.mean(all_sens),3), 'sens_train_min':round(SENS_CI.lower_bound,3), 'sens_train_max':round(SENS_CI.upper_bound,3),
'spec_train':round(np.mean(all_spec),3), 'spec_train_min':round(SPEC_CI.lower_bound,3), 'spec_train_max':round(SPEC_CI.upper_bound,3),
'mcc_test':round(np.mean(all_test_mccs),3), 'mcc_test_min':round(MCC_TEST.lower_bound,3), 'mcc_test_max':round(MCC_TEST.upper_bound,3),
'best_feat':best_feat_steps})
else:
row = OrderedDict({'layers':layers_concat, 'mcc_train':round(np.mean(all_mccs),3), 'mcc_train_min':round(MCC_CI.lower_bound,3), 'mcc_train_max':round(MCC_CI.upper_bound,3),
'auc_train':round(np.mean(all_aucs),3), 'auc_train_min':round(AUC_CI.lower_bound,3), 'auc_train_max':round(AUC_CI.upper_bound,3),
'sens_train':np.nan, 'sens_train_min':np.nan, 'sens_train_max':np.nan,
'spec_train':np.nan, 'spec_train_min':np.nan, 'spec_train_max':np.nan,
'mcc_test':round(np.mean(all_test_mccs),3), 'mcc_test_min':round(MCC_TEST.lower_bound,3), 'mcc_test_max':round(MCC_TEST.upper_bound,3),
'best_feat':best_feat_steps})
print(layers_concat, MODE, 'best_feats =', Counter(best_feat_steps))
print('\n')
df_results = df_results.append(row, ignore_index=True)
df_results.to_csv(f'{OUTFOLDER}/{DATASET}/{TARGET}/{MODEL}/metrics_allSplits_{MODE}_{layers_concat}.txt', sep='\t', index=False)
# %%
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment