Commit 1d24d86a authored by Nicole Bussola's avatar Nicole Bussola
Browse files

Compute Borda ranking among all splits

parent df7bb7ef
#%%
import os
import numpy as np
import pandas as pd
from mlpy import borda_count
from input_output import load_data
#%%
DATA_PATH = 'data/tcga_breast/subtypes'
PATH = 'results/tcga_breast/subtypes'
mode = 'rSNF'
assert mode in ['juxt', 'rSNF', 'single']
N_LAYERS = 3
if N_LAYERS==1:
assert mode=='single'
single_layer = 'prot'
else:
layer1 = 'gene'
layer2 = 'cnv'
layer3 = 'prot'
if N_LAYERS==3:
layers = f'{layer1}_{layer2}_{layer3}'
assert len(layers.split('_')) == 3
elif N_LAYERS==2:
layers = f'{layer1}_{layer2}'
assert len(layers.split('_')) == 2
else:
layers = f'{single_layer}'
assert len(layers.split('_')) == 1
N_SPLIT = 10
CV_K = 5
CV_N = 10
_, var_names, _ = load_data(os.path.join(DATA_PATH,f'0/{layers}_tr.txt') )
rankings = []
#%%
for i in range(N_SPLIT):
if mode == 'rSNF':
file_ranking = os.path.join(PATH,f'{i}/{mode}/{layers}_tr_RandomForest_rankList_ranking.csv.gz')
else:
file_ranking = os.path.join(PATH,f'{i}/{mode}/{layers}_tr_RandomForest_KBest_ranking.csv.gz')
rank = pd.read_csv(file_ranking, header=None, sep='\t').values
rankings.append(rank)
rankings = np.vstack(rankings)
# %%
BORDA_ID, _, BORDA_POS = borda_count(rankings)
len(rankings),BORDA_ID
# %%
borda_df = pd.DataFrame(columns=["FEATURE_ID", "FEATURE_NAME", "MEAN_POS"])
for i, pos in zip(BORDA_ID, BORDA_POS):
borda_df = borda_df.append({'FEATURE_ID': i, 'FEATURE_NAME': var_names[i], 'MEAN_POS': pos+1}, ignore_index=True)
borda_df.to_csv(f"{PATH}/Borda_allSpilts_{mode}_{layers}.txt", sep='\t', index=False, float_format="%.3f")
# %%
#%%
import os
import numpy as np
import pandas as pd
import argparse
from itertools import combinations
from mlpy import borda_count
from input_output import load_data
#%%
class myArgumentParser(argparse.ArgumentParser):
def __init__(self, *args, **kwargs):
super(myArgumentParser, self).__init__(*args, **kwargs)
def convert_arg_line_to_args(self, line):
for arg in line.split():
if not arg.strip():
continue
if arg[0] == '#':
break
yield arg
parser = myArgumentParser(
description='Concatenate omic layers files', fromfile_prefix_chars='@'
)
parser.add_argument('--datafolder', type=str, help='Main data folder')
parser.add_argument('--outfolder', type=str, help='Results folder')
parser.add_argument('--dataset', type=str, help='Dataset name')
parser.add_argument('--target', type=str, help='Clinical endpoint')
parser.add_argument('--layers', type=str, nargs='+', help='')
parser.add_argument('--n_splits', type=int, help='')
parser.add_argument('--mode', type=str, help='rSNF, rSNFi, single')
args = parser.parse_args()
DATAFOLDER = args.datafolder
DATASET = args.dataset
OUTFOLDER = args.outfolder
TARGET = args.target
LAYERS = args.layers
N_SPLITS = args.n_splits
MODE = args.mode
assert MODE in ['juxt', 'rSNF', 'single']
N_LAYERS = len(LAYERS)
#%%
for k in range(2, N_LAYERS + 1):
for comb in combinations(LAYERS, k):
layers_concat = '_'.join(comb)
_, var_names, _ = load_data(os.path.join(DATAFOLDER, DATASET, TARGET, f'0/{layers_concat}_tr.txt') )
rankings = []
for i in range(N_SPLITS):
if MODE == 'rSNF':
file_ranking = os.path.join(OUTFOLDER, DATASET, TARGET, f'{i}/{MODE}/{layers_concat}_tr_RandomForest_rankList_ranking.csv.gz')
else:
file_ranking = os.path.join(OUTFOLDER, DATASET, TARGET,f'{i}/{MODE}/{layers_concat}_tr_RandomForest_KBest_ranking.csv.gz')
rank = pd.read_csv(file_ranking, header=None, sep='\t').values
rankings.append(rank)
rankings = np.vstack(rankings)
BORDA_ID, _, BORDA_POS = borda_count(rankings)
borda_df = pd.DataFrame(columns=["FEATURE_ID", "FEATURE_NAME", "MEAN_POS"])
for i, pos in zip(BORDA_ID, BORDA_POS):
borda_df = borda_df.append({'FEATURE_ID': i, 'FEATURE_NAME': var_names[i], 'MEAN_POS': pos+1}, ignore_index=True)
borda_df.to_csv(f"{OUTFOLDER}/{DATASET}/{TARGET}/Borda_allSpilts_{MODE}_{layers_concat}.txt", sep='\t', index=False, float_format="%.3f")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment