Snakefile 7.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
import os

# these can be set on runtime: 
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" target="ER" (...)
DATAFOLDER = config['datafolder']
OUTFOLDER = config['outfolder']
DATASET = config['dataset']
TARGET = config['target']
LAYER1 = config['layer1']
LAYER2 = config['layer2']

rule all:
    input:
14
15
16
17
18
19
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer1=LAYER1, layer2=LAYER2, split_id=[i for i in range(10)]),
        expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER1, split_id=[i for i in range(10)]),
        expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYER2, split_id=[i for i in range(10)])
20
21
22
23


rule ml_juxt_tr:
    input:
24
25
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
26
    output:
27
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log"
28
29
30
31
32
33
    shell:
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/juxt --ranking KBest"


rule ml_juxt_val:
    input:
34
35
36
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
37
    output:
38
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_MCC_scores.txt"
39
    shell:
40
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --tslab {input[2]}"
41
42
43

rule snf:
    input:
44
45
46
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer2}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
47
48
    threads: 8
    output:
49
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layer1}_{layer2}_tr.txt"
50
51
52
53
54
55
56
57
    shell:
        "Rscript snf_integration.R --d1 {input[0]} --d2 {input[1]} --lab {input[2]} \
		--scriptDir SNFtools/ --clust spectral --threads {threads} \
		--outf {output}"


rule ml_rsnf_tr:
    input:
58
59
60
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt"),
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layer1}_{layer2}_tr.txt"
61
    output:
62
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
63
    shell:
64
        "python sklearn_rf_training_fixrank.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --ranking rankList --rankFeats {input[2]}"
65
66
67
68


rule ml_rsnf_val:
    input:
69
70
71
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
72
    output:
73
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_MCC_scores.txt"
74
    shell:
75
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --tslab {input[2]}"
76
77
78
79


rule myintersect:
    input:
80
81
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layer1}_{layer2}_tr_RandomForest_KBest.log",
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layer1}_{layer2}_tr_RandomForest_rankList.log"
82
    output:
83
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
84
    shell:
85
        "python intersect_biomarkers.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi/venn_{wildcards.layer1}_{wildcards.layer2}_tr.png {output} --title1 {wildcards.layer1} --title2 {wildcards.layer2}"
86
87
88
89


rule extract:
    input:
90
91
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_tr.txt"),
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_intersect_tr.txt"
92
    output:
93
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr.txt"
94
95
96
97
98
99
    shell:
        "python extract_topfeats_onecol.py {input} {output}"


rule ml_rsnfi_tr:
    input:
100
101
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr.txt",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
102
    output:
103
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log"
104
    shell:
105
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --ranking KBest"
106
107
108
109


rule ml_rsnfi_val:
    input:
110
111
112
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer1}_{layer2}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
113
    output:
114
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layer1}_{layer2}_tr_MCC_scores.txt"
115
    shell:
116
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --tslab {input[2]}"
117
118
119
120


rule single_tr:
    input:
121
122
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
123
    output:
124
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log"
125
    shell:
126
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --ranking KBest"
127
128
129

rule single_val:
    input:
130
131
132
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
133
    output:
134
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt"
135
    shell:
136
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --tslab {input[2]}"
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151


# rule single_layer:
#     input:
#         "data/{layer}_tr.txt",
#         "data/labels_{target}_tr.txt",
#         "data/{layer}_ts.txt",
#         "data/labels_{target}_ts.txt"
#     output:
#         "out/{target}/single/{layer}_tr_MCC_scores.txt"
#     shell:
#         """
#         python sklearn_rf_training_fixrank.py {input[0]} {input[1]} out/{wildcards.target}/single --ranking KBest
#         python sklearn_rf_validation_writeperf.py out/{wildcards.target}/single/{wildcards.layer}_tr_RandomForest_KBest.log {input[2]} out/{wildcards.target}/single --tslab {input[3]}
#         """