Snakefile 8.04 KB
Newer Older
1
import os
2
import subprocess
3
4

# these can be set on runtime: 
Alessia Marcolini's avatar
Alessia Marcolini committed
5
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" target="ER" layer1="gene" layer2="cnv" layer3="prot" split_id="1"(...)
6
7
8
9
DATAFOLDER = config['datafolder']
OUTFOLDER = config['outfolder']
DATASET = config['dataset']
TARGET = config['target']
Alessia Marcolini's avatar
Alessia Marcolini committed
10
SPLIT_ID = config['split_id']
11
12
13

LAYERS = [config[k] for k in config.keys() if k.startswith('layer')]
LAYERS_CONCAT = "_".join(LAYERS)
14
15
16

rule all:
    input:
17
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
18
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
19
        expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
20
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=SPLIT_ID)
21
22
23

rule ml_juxt_tr:
    input:
24
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
25
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
26
    output:
27
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log"
28
29
30
31
32
33
    shell:
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/juxt --ranking KBest"


rule ml_juxt_val:
    input:
34
35
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_ts.txt"),
36
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
37
    output:
38
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_MCC_scores.txt"
39
    shell:
40
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --tslab {input[2]}"
41

42

43
44
rule snf:
    input:
45
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
46
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=SPLIT_ID),
47
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
48
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID)
49
50
    threads: 8
    output:
51
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layers}_tr.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
52
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
53
54
55
56
57
    run:
        all_input = [i[1] for i in input.allitems()]
        inputfiles = " ".join(all_input[:-1])
        labfile = all_input[-1]
        subprocess.call(f"Rscript snf_integration.R --data {inputfiles} --lab {labfile} \
58
		--scriptDir SNFtools/ --clust spectral --threads {threads} \
59
		--outf {output}", shell=True)
60
61
62
63


rule ml_rsnf_tr:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
64
65
66
67
68
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID),
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layers}_tr.txt", 
69
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
70
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_RandomForest_rankList.log",
71
    shell:
72
        "python sklearn_rf_training_fixrank.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --ranking rankList --rankFeats {input[2]}"
73
74
75
76


rule ml_rsnf_val:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
77
78
79
80
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_RandomForest_rankList.log",
        "{datafolder}/{dataset}/{target}/{split_id}/{layers}_ts.txt", 
        "{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_ts.txt",
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_MCC_scores.txt",
81
    shell:
82
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --tslab {input[2]}"
83
84
85
86


rule myintersect:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
87
88
89
90
        expand("{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_RandomForest_rankList.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
91
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
92
93
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_intersect_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
94
    shell:
Alessia Marcolini's avatar
Alessia Marcolini committed
95
        "python intersect_biomarkers.py {input}  {output}"  ######## 
96
97
98
99


rule extract:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
100
101
102
103
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_intersect_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
104
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
105
106
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
107
108
109
110
111
112
    shell:
        "python extract_topfeats_onecol.py {input} {output}"


rule ml_rsnfi_tr:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
113
114
115
116
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
117
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
118
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_RandomForest_KBest.log"
119
    shell:
120
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --ranking KBest"
121
122
123
124


rule ml_rsnfi_val:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
125
126
127
128
129
130
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_RandomForest_KBest.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT)
131
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
132
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt"
133
    shell:
134
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --tslab {input[2]}"
135
136
137
138


rule single_tr:
    input:
139
140
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
141
    output:
142
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log"
143
    shell:
144
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --ranking KBest"
145
146
147

rule single_val:
    input:
148
149
150
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
151
    output:
152
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt"
153
    shell:
154
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --tslab {input[2]}"