Snakefile 7.06 KB
Newer Older
1
import os
2
import subprocess
3
4

# these can be set on runtime: 
5
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" target="ER" layer1="gene" layer2="cnv" layer3="prot" (...)
6
7
8
9
DATAFOLDER = config['datafolder']
OUTFOLDER = config['outfolder']
DATASET = config['dataset']
TARGET = config['target']
10
11
12

LAYERS = [config[k] for k in config.keys() if k.startswith('layer')]
LAYERS_CONCAT = "_".join(LAYERS)
13
14
15

rule all:
    input:
16
17
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=[i for i in range(10)]),
18
        expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt", 
19
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=[i for i in range(10)])
20
21
22

rule ml_juxt_tr:
    input:
23
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
24
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
25
    output:
26
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log"
27
28
29
30
31
32
    shell:
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/juxt --ranking KBest"


rule ml_juxt_val:
    input:
33
34
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_ts.txt"),
35
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
36
    output:
37
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_MCC_scores.txt"
38
    shell:
39
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --tslab {input[2]}"
40

41

42
43
rule snf:
    input:
44
45
46
47
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=[i for i in range(10)]),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=[i for i in range(10)])
48
49
    threads: 8
    output:
50
51
52
53
54
55
56
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layers}_tr.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=[i for i in range(10)])
    run:
        all_input = [i[1] for i in input.allitems()]
        inputfiles = " ".join(all_input[:-1])
        labfile = all_input[-1]
        subprocess.call(f"Rscript snf_integration.R --data {inputfiles} --lab {labfile} \
57
		--scriptDir SNFtools/ --clust spectral --threads {threads} \
58
		--outf {output}", shell=True)
59
60
61
62


rule ml_rsnf_tr:
    input:
63
64
65
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt"),
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layer1}_{layer2}_tr.txt"
66
    output:
67
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
68
    shell:
69
        "python sklearn_rf_training_fixrank.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --ranking rankList --rankFeats {input[2]}"
70
71
72
73


rule ml_rsnf_val:
    input:
74
75
76
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
77
    output:
78
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_MCC_scores.txt"
79
    shell:
80
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --tslab {input[2]}"
81
82
83
84


rule myintersect:
    input:
85
86
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
87
    output:
88
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
89
    shell:
90
        "python intersect_biomarkers.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi/venn_{wildcards.layer1}_{wildcards.layer2}_tr.png {output} --title1 {wildcards.layer1} --title2 {wildcards.layer2}"
91
92
93
94


rule extract:
    input:
95
96
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
97
    output:
98
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr.txt"
99
100
101
102
103
104
    shell:
        "python extract_topfeats_onecol.py {input} {output}"


rule ml_rsnfi_tr:
    input:
105
106
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr.txt",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
107
    output:
108
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log"
109
    shell:
110
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --ranking KBest"
111
112
113
114


rule ml_rsnfi_val:
    input:
115
116
117
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
118
    output:
119
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt"
120
    shell:
121
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --tslab {input[2]}"
122
123
124
125


rule single_tr:
    input:
126
127
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
128
    output:
129
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log"
130
    shell:
131
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --ranking KBest"
132
133
134

rule single_val:
    input:
135
136
137
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
138
    output:
139
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt"
140
    shell:
141
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --tslab {input[2]}"