list_distances.py 1.47 KB
Newer Older
Marco Chierici's avatar
Marco Chierici committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import argparse
import pandas as pd
import numpy as np
from mlpy import canberra_stability
from itertools import combinations

parser = argparse.ArgumentParser()
parser.add_argument('--resultsdir', type=str, help='Results folder')
parser.add_argument('--dataset', type=str, help='Dataset name')
parser.add_argument('--model', type=str, default='randomForest', help='Model (default: %default)')
parser.add_argument('--target', type=str, help='Clinical endpoint')
parser.add_argument('--layers', type=str, nargs='+', help='')
args = parser.parse_args()

RESULTSDIR = args.resultsdir # top-level results directory
DATASET = args.dataset # 'tcga_breast'
TARGET = args.target # 'ER'
MODEL = args.model
LAYERS = args.layers
N_LAYERS = len(LAYERS)
MODE = 'rSNF'

for k in range(2, N_LAYERS+1):
    for comb in combinations(LAYERS, k):
        layers_concat = '_'.join(comb)
        bordas = []
        for datatype in [DATASET, f'{DATASET}_SNFdap']:
            bordaf = f'{RESULTSDIR}/{datatype}/{TARGET}/{MODEL}/Borda_splits_50-60_{MODE}_{layers_concat}.txt'
            bordas.append(pd.read_csv(bordaf, sep='\t', index_col=None))
        # prepare ranks for canberra_stability
        ranks = pd.concat([np.argsort(bordas[0]['FEATURE_ID']),
                           np.argsort(bordas[1]['FEATURE_ID'])], axis=1).transpose().values
        for nf in np.arange(10, 60, 10):
            cs = canberra_stability(ranks, nf)
            print(f'{MODE} - {layers_concat} - stability({nf}) = {cs:.3f}')
        print()