import argparse import pandas as pd import numpy as np from mlpy import canberra_stability from itertools import combinations from pathlib import Path parser = argparse.ArgumentParser() parser.add_argument('--resultsdir', type=str, help='Results folder') parser.add_argument('--dataset', type=str, help='Dataset name') parser.add_argument('--target', type=str, help='Clinical endpoint') parser.add_argument('--model', type=str, default='randomForest', help='Model (default: %(default)s)') parser.add_argument('--nf_min', type=int, default=10, help='Min #feat (default: %(default)s)') parser.add_argument('--nf_max', type=int, default=50, help='Max #feat (default: %(default)s)') parser.add_argument('--nf_step', type=int, default=10, help='Increase by these many feat (default: %(default)s)') parser.add_argument('--nf_rsnf', type=int, nargs='+', help='One or more #feat for rSNF') parser.add_argument('--layers', type=str, nargs='+', help='') args = parser.parse_args() RESULTSDIR = args.resultsdir # top-level results directory DATASET = args.dataset # 'tcga_breast' TARGET = args.target # 'ER' MODEL = args.model NF_MIN = args.nf_min NF_MAX = args.nf_max NF_STEP = args.nf_step NF_RSNF = args.nf_rsnf LAYERS = args.layers N_LAYERS = len(LAYERS) MODE = 'rSNF' assert( Path(RESULTSDIR, DATASET).expanduser().exists() ), f"{RESULTSDIR}/{DATASET} not found" assert( Path(RESULTSDIR, f"{DATASET}_SNFdap").expanduser().exists() ), f"{RESULTSDIR}/{DATASET}_SNFdap not found" for k in range(2, N_LAYERS+1): for comb in combinations(LAYERS, k): layers_concat = '_'.join(comb) bordas = [] for datatype in [DATASET, f'{DATASET}_SNFdap']: bordaf = f'{RESULTSDIR}/{datatype}/{TARGET}/{MODEL}/Borda_splits_50-60_{MODE}_{layers_concat}.txt' bordas.append(pd.read_csv(bordaf, sep='\t', index_col=None)) # prepare ranks for canberra_stability ranks = pd.concat([np.argsort(bordas[0]['FEATURE_ID']), np.argsort(bordas[1]['FEATURE_ID'])], axis=1).transpose().values for nf in np.arange(NF_MIN, NF_MAX + NF_STEP, NF_STEP): cs = canberra_stability(ranks, nf) print(f'{MODE} - {layers_concat} - stability({nf}) = {cs:.3f}') # additional steps for NF_RSNF print() for nf in NF_RSNF: cs = canberra_stability(ranks, nf) print(f'{MODE} - {layers_concat} - stability({nf}) = {cs:.3f}') print() print()