Snakefile 8.45 KB
Newer Older
Nicole Bussola's avatar
Nicole Bussola committed
1
#%%
2
import os
3
import subprocess
Nicole Bussola's avatar
Nicole Bussola committed
4
#%%
5
# these can be set on runtime: 
Alessia Marcolini's avatar
Alessia Marcolini committed
6
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" target="ER" layer1="gene" layer2="cnv" layer3="prot" split_id="1"(...)
7
8
9
10
DATAFOLDER = config['datafolder']
OUTFOLDER = config['outfolder']
DATASET = config['dataset']
TARGET = config['target']
Alessia Marcolini's avatar
Alessia Marcolini committed
11
SPLIT_ID = config['split_id']
12
13
14

LAYERS = [config[k] for k in config.keys() if k.startswith('layer')]
LAYERS_CONCAT = "_".join(LAYERS)
15
16
17

rule all:
    input:
18
19
20
21
        expand("{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
22
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
23
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
24
        expand("{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
25
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=SPLIT_ID)
26
27
28

rule ml_juxt_tr:
    input:
29
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
30
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
31
    output:
32
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log"
33
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
34
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --ranking KBest"
35
36
37
38


rule ml_juxt_val:
    input:
39
40
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_ts.txt"),
41
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
42
    output:
43
        "{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_MCC_scores.txt"
44
    shell:
45
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/juxt --tslab {input[2]}"
46

47

48
49
rule snf:
    input:
50
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
51
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=SPLIT_ID),
52
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
53
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID)
54
55
    threads: 8
    output:
56
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layers}_tr.txt", 
Alessia Marcolini's avatar
Alessia Marcolini committed
57
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
58
59
60
61
62
    run:
        all_input = [i[1] for i in input.allitems()]
        inputfiles = " ".join(all_input[:-1])
        labfile = all_input[-1]
        subprocess.call(f"Rscript snf_integration.R --data {inputfiles} --lab {labfile} \
63
		--scriptDir SNFtools/ --clust spectral --threads {threads} \
64
		--outf {output}", shell=True)
65
66
67
68


rule ml_rsnf_tr:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
69
70
71
72
73
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID),
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/INF_{layers}_tr.txt", 
74
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
75
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_RandomForest_rankList.log",
76
    shell:
77
        "python sklearn_rf_training_fixrank.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --ranking rankList --rankFeats {input[2]}"
78
79
80
81


rule ml_rsnf_val:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
82
83
84
85
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_RandomForest_rankList.log",
        "{datafolder}/{dataset}/{target}/{split_id}/{layers}_ts.txt", 
        "{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_ts.txt",
        "{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_MCC_scores.txt",
86
    shell:
87
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNF --tslab {input[2]}"
88
89
90
91


rule myintersect:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
92
93
94
95
        expand("{outfolder}/{dataset}/{target}/{split_id}/juxt/{layers}_tr_RandomForest_KBest.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNF/{layers}_tr_RandomForest_rankList.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
96
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
97
98
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_intersect_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
99
    shell:
Alessia Marcolini's avatar
Alessia Marcolini committed
100
        "python intersect_biomarkers.py {input}  {output}"  ######## 
101
102
103
104


rule extract:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
105
106
107
108
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_intersect_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
109
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
110
111
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
112
113
114
115
116
117
    shell:
        "python extract_topfeats_onecol.py {input} {output}"


rule ml_rsnfi_tr:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
118
119
120
121
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
122
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
123
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_RandomForest_KBest.log"
124
    shell:
125
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --ranking KBest"
126
127
128
129


rule ml_rsnfi_val:
    input:
Alessia Marcolini's avatar
Alessia Marcolini committed
130
131
132
133
134
135
        expand("{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_RandomForest_KBest.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT)
136
    output:
Alessia Marcolini's avatar
Alessia Marcolini committed
137
        "{outfolder}/{dataset}/{target}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt"
138
    shell:
139
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/rSNFi --tslab {input[2]}"
140
141
142
143


rule single_tr:
    input:
144
145
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
146
    output:
147
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log"
148
    shell:
149
        "python sklearn_rf_training_fixrank.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --ranking KBest"
150
151
152

rule single_val:
    input:
153
154
155
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_RandomForest_KBest.log",
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
156
    output:
157
        "{outfolder}/{dataset}/{target}/{split_id}/single/{layer}_tr_MCC_scores.txt"
158
    shell:
159
        "python sklearn_rf_validation_writeperf.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.split_id}/single --tslab {input[2]}"