Commit d36a1631 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Add concat_layers step to Snakefile

parent 5165c77e
......@@ -15,6 +15,7 @@ SPLIT_ID = config['split_id']
LAYERS = [config[k] for k in config.keys() if k.startswith('layer')]
LAYERS_CONCAT = "_".join(LAYERS)
LAYERS_SPACED = " ".join(LAYERS)
......@@ -29,6 +30,20 @@ rule all:
expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layer=LAYERS, split_id=SPLIT_ID)
rule concat_layers:
input:
expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt",
datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layer=LAYERS),
expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_ts.txt",
datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layer=LAYERS)
output:
expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt",
datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layer=LAYERS_CONCAT),
expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_ts.txt",
datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layer=LAYERS_CONCAT)
shell:
f"python preprocessing/concat_layers.py --datafolder {DATAFOLDER} --dataset {DATASET} --target {TARGET} --layers {LAYERS_SPACED} --split_id {SPLIT_ID}"
rule ml_juxt_tr:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
......@@ -42,6 +57,7 @@ rule ml_juxt_tr:
rule ml_juxt_val:
input:
"{outfolder}/{dataset}/{target}/{model}/{split_id}/juxt/{layers}_tr_{model}_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
output:
......
......@@ -25,8 +25,9 @@ parser = myArgumentParser(
parser.add_argument('--datafolder', type=str, help='Main data folder')
parser.add_argument('--dataset', type=str, help='Dataset name')
parser.add_argument('--target', type=str, help='Clinical endpoint')
parser.add_argument('--split_id', type=int, help='')
parser.add_argument('--layers', type=str, nargs='+', help='')
parser.add_argument('--n_splits', type=int, help='')
args = parser.parse_args()
......@@ -35,29 +36,29 @@ DATAFOLDER = args.datafolder
DATASET = args.dataset
TARGET = args.target
LAYERS = args.layers
N_SPLITS = args.n_splits
SPLIT_ID = args.split_id
print(LAYERS)
#%%
for split_id in range(N_SPLITS):
PATH = f'{DATAFOLDER}/{DATASET}/{TARGET}/{split_id}'
for k in range(2, len(LAYERS) + 1):
for comb in combinations(LAYERS, k):
single_dfs_tr = []
single_dfs_ts = []
PATH = f'{DATAFOLDER}/{DATASET}/{TARGET}/{SPLIT_ID}'
for k in range(2, len(LAYERS) + 1):
for comb in combinations(LAYERS, k):
single_dfs_tr = []
single_dfs_ts = []
for layer in comb:
single_dfs_tr.append(pd.read_csv(f'{PATH}/{layer}_tr.txt', sep='\t'))
single_dfs_ts.append(pd.read_csv(f'{PATH}/{layer}_ts.txt', sep='\t'))
for layer in comb:
single_dfs_tr.append(pd.read_csv(f'{PATH}/{layer}_tr.txt', sep='\t'))
single_dfs_ts.append(pd.read_csv(f'{PATH}/{layer}_ts.txt', sep='\t'))
merged_tr = reduce(lambda x, y: pd.merge(x, y, on='Sample'), single_dfs_tr)
merged_ts = reduce(lambda x, y: pd.merge(x, y, on='Sample'), single_dfs_ts)
merged_tr = reduce(lambda x, y: pd.merge(x, y, on='Sample'), single_dfs_tr)
merged_ts = reduce(lambda x, y: pd.merge(x, y, on='Sample'), single_dfs_ts)
layers_concat = '_'.join(comb)
merged_tr.to_csv(f'{PATH}/{layers_concat}_tr.txt', sep='\t', index=False)
merged_ts.to_csv(f'{PATH}/{layers_concat}_ts.txt', sep='\t', index=False)
layers_concat = '_'.join(comb)
merged_tr.to_csv(f'{PATH}/{layers_concat}_tr.txt', sep='\t', index=False)
merged_ts.to_csv(f'{PATH}/{layers_concat}_ts.txt', sep='\t', index=False)
# %%
......@@ -5,35 +5,33 @@ THREADS=12
OUTFOLDER=results
DATAFOLDER=data
DATASET=tcga_breast
DATASET=tcga_aml
LAYER1=gene
LAYER2=prot
LAYER3=prot
TARGET=ER
LAYER2=meth
LAYER3=mirna
TARGET=OS
MODEL=LSVM
N_SPLITS=10
# go!cc
#python preprocessing/concat_layers.py --datafolder $DATAFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 $LAYER3 --model $MODEL--n_splits $N_SPLITS
# go!
for (( i=0; i<$N_SPLITS; i++ ))
do
snakemake -s Snakefile_split --cores $THREADS --config datafolder=$DATAFOLDER outfolder=$OUTFOLDER dataset=$DATASET target=$TARGET layer1=$LAYER1 layer2=$LAYER2 model=$MODEL split_id=$i -p
snakemake -s Snakefile_split --cores $THREADS --config datafolder=$DATAFOLDER outfolder=$OUTFOLDER dataset=$DATASET target=$TARGET layer1=$LAYER1 layer2=$LAYER2 layer3=$LAYER3 model=$MODEL split_id=$i -p
done
for MODE in juxt rSNF rSNFi
do
python postprocessing/compute_all_metrics.py --outfolder $OUTFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 --model $MODEL --n_splits $N_SPLITS --mode $MODE
python postprocessing/compute_all_metrics.py --outfolder $OUTFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 $LAYER3 --model $MODEL --n_splits $N_SPLITS --mode $MODE
done
for MODE in juxt rSNF
do
python postprocessing/borda_global_juxt_rSNF.py --datafolder $DATAFOLDER --outfolder $OUTFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 --model $MODEL --n_splits $N_SPLITS --mode $MODE
python postprocessing/borda_global_juxt_rSNF.py --datafolder $DATAFOLDER --outfolder $OUTFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 $LAYER3 --model $MODEL --n_splits $N_SPLITS --mode $MODE
done
python postprocessing/borda_global_rSNFi.py --datafolder $DATAFOLDER --outfolder $OUTFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 --model $MODEL --n_splits $N_SPLITS --mode rSNFi
python postprocessing/borda_global_rSNFi.py --datafolder $DATAFOLDER --outfolder $OUTFOLDER --dataset $DATASET --target $TARGET --layers $LAYER1 $LAYER2 $LAYER3 --model $MODEL --n_splits $N_SPLITS --mode rSNFi
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment