Commit 43c00983 authored by Marco Chierici's avatar Marco Chierici
Browse files

Initial support for arbitrary number of omics layers

parent e153ab2d
import os
import subprocess
# these can be set on runtime:
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" target="ER" (...)
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" target="ER" layer1="gene" layer2="cnv" layer3="prot" (...)
DATAFOLDER = config['datafolder']
OUTFOLDER = config['outfolder']
DATASET = config['dataset']
TARGET = config['target']
LAYER1 = config['layer1']
LAYER2 = config['layer2']
LAYERS = [config[k] for k in config.keys() if k.startswith('layer')]
LAYERS_CONCAT = "_".join(LAYERS)
rule all:
input:
expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer1=LAYER1, layer2=LAYER2, split_id=[i for i in range(10)]),
expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER1, split_id=[i for i in range(10)]),
expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=[i for i in range(10)]),
expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER2, split_id=[i for i in range(10)])
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=[i for i in range(10)])
rule ml_juxt_tr:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
output:
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log"
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log"
shell:
"python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/juxt --ranking KBest"
rule ml_juxt_val:
input:
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
output:
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_MCC_scores.txt"
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_MCC_scores.txt"
shell:
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --tslab {input[2]}"
rule snf:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt",
datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=[i for i in range(10)]),
expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=[i for i in range(10)])
threads: 8
output:
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layer1}_{layer2}_tr.txt"
shell:
"Rscript snf_integration.R --d1 {input[0]} --d2 {input[1]} --lab {input[2]} \
expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layers}_tr.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=[i for i in range(10)])
run:
all_input = [i[1] for i in input.allitems()]
inputfiles = " ".join(all_input[:-1])
labfile = all_input[-1]
subprocess.call(f"Rscript snf_integration.R --data {inputfiles} --lab {labfile} \
--scriptDir SNFtools/ --clust spectral --threads {threads} \
--outf {output}"
--outf {output}", shell=True)
rule ml_rsnf_tr:
......@@ -134,18 +139,3 @@ rule single_val:
"{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt"
shell:
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --tslab {input[2]}"
# rule single_layer:
# input:
# "data/{layer}_tr.txt",
# "data/labels_{target}_tr.txt",
# "data/{layer}_ts.txt",
# "data/labels_{target}_ts.txt"
# output:
# "out/{target}/single/{layer}_tr_MCC_scores.txt"
# shell:
# """
# python sklearn_rf_training_fixrank.py {input[0]} {input[1]} out/{wildcards.target}/single --ranking KBest
# python sklearn_rf_validation_writeperf.py out/{wildcards.target}/single/{wildcards.layer}_tr_RandomForest_KBest.log {input[2]} out/{wildcards.target}/single --tslab {input[3]}
# """
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment