Commit fa0ecd8d authored by Marco Chierici's avatar Marco Chierici
Browse files

Added support for data splits to Snakefile

parent 14d2515f
......@@ -11,42 +11,42 @@ LAYER2 = config['layer2']
rule all:
input:
expand("{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer1=LAYER1, layer2=LAYER2),
expand("{outfolder}/{dataset}/{target}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER1),
expand("{outfolder}/{dataset}/{target}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER2)
expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer1=LAYER1, layer2=LAYER2, split_id=[i for i in range(10)]),
expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER1, split_id=[i for i in range(10)]),
expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt",
outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER2, split_id=[i for i in range(10)])
rule ml_juxt_tr:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_tr.txt")
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
output:
"{outfolder}/{dataset}/{target}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log"
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log"
shell:
"python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/juxt --ranking KBest"
rule ml_juxt_val:
input:
"{outfolder}/{dataset}/{target}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_{layer2}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_ts.txt")
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
output:
"{outfolder}/{dataset}/{target}/juxt/{layer1}_{layer2}_tr_MCC_scores.txt"
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_MCC_scores.txt"
shell:
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/juxt --tslab {input[2]}"
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --tslab {input[2]}"
rule snf:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_tr.txt")
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
threads: 8
output:
"{outfolder}/{dataset}/{target}/rSNF/INF_{layer1}_{layer2}_tr.txt"
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layer1}_{layer2}_tr.txt"
shell:
"Rscript snf_integration.R --d1 {input[0]} --d2 {input[1]} --lab {input[2]} \
--scriptDir SNFtools/ --clust spectral --threads {threads} \
......@@ -55,85 +55,85 @@ rule snf:
rule ml_rsnf_tr:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_tr.txt"),
"{outfolder}/{dataset}/{target}/rSNF/INF_{layer1}_{layer2}_tr.txt"
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt"),
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layer1}_{layer2}_tr.txt"
output:
"{outfolder}/{dataset}/{target}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
shell:
"python sklearn_rf_training_fixrank.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/rSNF --ranking rankList --rankFeats {input[2]}"
"python sklearn_rf_training_fixrank.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --ranking rankList --rankFeats {input[2]}"
rule ml_rsnf_val:
input:
"{outfolder}/{dataset}/{target}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_{layer2}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_ts.txt")
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
output:
"{outfolder}/{dataset}/{target}/rSNF/{layer1}_{layer2}_tr_MCC_scores.txt"
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_MCC_scores.txt"
shell:
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/rSNF --tslab {input[2]}"
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --tslab {input[2]}"
rule myintersect:
input:
"{outfolder}/{dataset}/{target}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
"{outfolder}/{dataset}/{target}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
"{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
"{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
output:
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
shell:
"python intersect_biomarkers.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/rSNFi/venn_{wildcards.layer1}_{wildcards.layer2}_tr.png {output} --title1 {wildcards.layer1} --title2 {wildcards.layer2}"
"python intersect_biomarkers.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi/venn_{wildcards.layer1}_{wildcards.layer2}_tr.png {output} --title1 {wildcards.layer1} --title2 {wildcards.layer2}"
rule extract:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_{layer2}_tr.txt"),
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
output:
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_tr.txt"
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr.txt"
shell:
"python extract_topfeats_onecol.py {input} {output}"
rule ml_rsnfi_tr:
input:
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_tr.txt",
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_tr.txt")
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr.txt",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
output:
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log"
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log"
shell:
"python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/rSNFi --ranking KBest"
"python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --ranking KBest"
rule ml_rsnfi_val:
input:
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer1}_{layer2}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_ts.txt")
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
output:
"{outfolder}/{dataset}/{target}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt"
"{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt"
shell:
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/rSNFi --tslab {input[2]}"
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --tslab {input[2]}"
rule single_tr:
input:
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_tr.txt")
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_tr.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
output:
"{outfolder}/{dataset}/{target}/single/{layer}_tr_RandomForest_KBest.log"
"{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log"
shell:
"python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/single --ranking KBest"
"python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --ranking KBest"
rule single_val:
input:
"{outfolder}/{dataset}/{target}/single/{layer}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{layer}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/labels_{target}_ts.txt")
"{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log",
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_ts.txt"),
os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
output:
"{outfolder}/{dataset}/{target}/single/{layer}_tr_MCC_scores.txt"
"{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt"
shell:
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/single --tslab {input[2]}"
"python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --tslab {input[2]}"
# rule single_layer:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment