list_distances.py 2.42 KB
Newer Older
Marco Chierici's avatar
Marco Chierici committed
1
2
3
4
5
import argparse
import pandas as pd
import numpy as np
from mlpy import canberra_stability
from itertools import combinations
Marco Chierici's avatar
Marco Chierici committed
6
from pathlib import Path
Marco Chierici's avatar
Marco Chierici committed
7
8
9
10
11

parser = argparse.ArgumentParser()
parser.add_argument('--resultsdir', type=str, help='Results folder')
parser.add_argument('--dataset', type=str, help='Dataset name')
parser.add_argument('--target', type=str, help='Clinical endpoint')
Marco Chierici's avatar
Marco Chierici committed
12
13
14
15
16
parser.add_argument('--model', type=str, default='randomForest', help='Model (default: %(default)s)')
parser.add_argument('--nf_min', type=int, default=10, help='Min #feat (default: %(default)s)')
parser.add_argument('--nf_max', type=int, default=50, help='Max #feat (default: %(default)s)')
parser.add_argument('--nf_step', type=int, default=10, help='Increase by these many feat (default: %(default)s)')
parser.add_argument('--nf_rsnf', type=int, nargs='+', help='One or more #feat for rSNF')
Marco Chierici's avatar
Marco Chierici committed
17
18
19
20
21
22
23
parser.add_argument('--layers', type=str, nargs='+', help='')
args = parser.parse_args()

RESULTSDIR = args.resultsdir # top-level results directory
DATASET = args.dataset # 'tcga_breast'
TARGET = args.target # 'ER'
MODEL = args.model
Marco Chierici's avatar
Marco Chierici committed
24
25
26
27
NF_MIN = args.nf_min
NF_MAX = args.nf_max
NF_STEP = args.nf_step
NF_RSNF = args.nf_rsnf
Marco Chierici's avatar
Marco Chierici committed
28
29
30
31
LAYERS = args.layers
N_LAYERS = len(LAYERS)
MODE = 'rSNF'

Marco Chierici's avatar
Marco Chierici committed
32
33
34
35
36
37
38
39
assert(
    Path(RESULTSDIR, DATASET).expanduser().exists()
), f"{RESULTSDIR}/{DATASET} not found"

assert(
    Path(RESULTSDIR, f"{DATASET}_SNFdap").expanduser().exists()
), f"{RESULTSDIR}/{DATASET}_SNFdap not found"

Marco Chierici's avatar
Marco Chierici committed
40
41
42
43
44
45
46
47
48
49
for k in range(2, N_LAYERS+1):
    for comb in combinations(LAYERS, k):
        layers_concat = '_'.join(comb)
        bordas = []
        for datatype in [DATASET, f'{DATASET}_SNFdap']:
            bordaf = f'{RESULTSDIR}/{datatype}/{TARGET}/{MODEL}/Borda_splits_50-60_{MODE}_{layers_concat}.txt'
            bordas.append(pd.read_csv(bordaf, sep='\t', index_col=None))
        # prepare ranks for canberra_stability
        ranks = pd.concat([np.argsort(bordas[0]['FEATURE_ID']),
                           np.argsort(bordas[1]['FEATURE_ID'])], axis=1).transpose().values
Marco Chierici's avatar
Marco Chierici committed
50
51
52
53
54
55
        for nf in np.arange(NF_MIN, NF_MAX + NF_STEP, NF_STEP):
            cs = canberra_stability(ranks, nf)
            print(f'{MODE} - {layers_concat} - stability({nf}) = {cs:.3f}')
        # additional steps for NF_RSNF
        print()
        for nf in NF_RSNF:
Marco Chierici's avatar
Marco Chierici committed
56
57
58
            cs = canberra_stability(ranks, nf)
            print(f'{MODE} - {layers_concat} - stability({nf}) = {cs:.3f}')
        print()
Marco Chierici's avatar
Marco Chierici committed
59
        print()