Snakefile_split 9.29 KB
Newer Older
1
2
3
4
5
#%%
import os
import subprocess
#%%
# these can be set on runtime: 
Nicole Bussola's avatar
Nicole Bussola committed
6
# snakemake --config datafolder="mydata" outfolder="out" dataset="breast" model="RandomForest" target="ER" layer1="gene" layer2="cnv" layer3="prot" split_id="1"(...)
7
8
9
10
11


DATAFOLDER = config['datafolder']
OUTFOLDER = config['outfolder']
DATASET = config['dataset']
Nicole Bussola's avatar
Nicole Bussola committed
12
MODEL = config['model']
13
14
15
16
17
18
19
20
21
22
TARGET = config['target']
SPLIT_ID = config['split_id']

LAYERS = [config[k] for k in config.keys() if k.startswith('layer')]
LAYERS_CONCAT = "_".join(LAYERS)



rule all:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
23
24
25
26
27
28
29
30
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/juxt/{layers}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/{layers}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/single/{layer}_tr_MCC_scores.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layer=LAYERS, split_id=SPLIT_ID)
31
32
33
34

rule ml_juxt_tr:
    input:
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_tr.txt"),
Nicole Bussola's avatar
Nicole Bussola committed
35
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")       
36
    output:
Nicole Bussola's avatar
Nicole Bussola committed
37
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/juxt/{layers}_tr_{model}_KBest.log"
38
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
39
        "python sklearn_training.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/juxt --model {wildcards.model} --ranking KBest"
40
41
42
43


rule ml_juxt_val:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
44
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/juxt/{layers}_tr_{model}_KBest.log",
45
46
47
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layers}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
    output:
Nicole Bussola's avatar
Nicole Bussola committed
48
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/juxt/{layers}_tr_MCC_scores.txt"
49
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
50
        "python sklearn_validation.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/juxt --tslab {input[2]}"
51
52
53
54
55
56
57
58
59
60


rule snf:
    input:
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layer}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layer=LAYERS, split_id=SPLIT_ID),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID)
    threads: 8
    output:
Nicole Bussola's avatar
Nicole Bussola committed
61
62
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/INF_{layers}_tr.txt", 
        outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    run:
        all_input = [i[1] for i in input.allitems()]
        inputfiles = " ".join(all_input[:-1])
        labfile = all_input[-1]
        subprocess.call(f"Rscript snf_integration.R --data {inputfiles} --lab {labfile} \
		--scriptDir SNFtools/ --clust spectral --threads {threads} \
		--outf {output}", shell=True)


rule ml_rsnf_tr:
    input:
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_tr.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID),
Nicole Bussola's avatar
Nicole Bussola committed
78
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/INF_{layers}_tr.txt", 
79
    output:
Nicole Bussola's avatar
Nicole Bussola committed
80
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/{layers}_tr_{model}_rankList.log",
81
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
82
        "python sklearn_training.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/rSNF --model {wildcards.model} --ranking rankList --rankFeats {input[2]}"
83
84
85
86


rule ml_rsnf_val:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
87
88
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/{layers}_tr_{model}_rankList.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
89
90
91
92
93
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_ts.txt", 
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
    output:
Nicole Bussola's avatar
Nicole Bussola committed
94
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/{layers}_tr_MCC_scores.txt",
95
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
96
        "python sklearn_validation.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/rSNF --tslab {input[2]}"
97
98
99
100


rule myintersect:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
101
102
103
104
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/juxt/{layers}_tr_{model}_KBest.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNF/{layers}_tr_{model}_rankList.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
105
    output:
Nicole Bussola's avatar
Nicole Bussola committed
106
107
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_intersect_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
108
109
110
111
112
113
114
115
    shell:
        "python intersect_biomarkers.py {input}  {output}"  ######## 


rule extract:
    input:
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, layers=LAYERS_CONCAT, split_id=SPLIT_ID),
Nicole Bussola's avatar
Nicole Bussola committed
116
117
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_intersect_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
118
    output:
Nicole Bussola's avatar
Nicole Bussola committed
119
120
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, layers=LAYERS_CONCAT, split_id=SPLIT_ID)
121
122
123
124
125
126
    shell:
        "python extract_topfeats_onecol.py {input} {output}"


rule ml_rsnfi_tr:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
127
128
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_tr.txt",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
129
130
131
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_tr.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
    output:
Nicole Bussola's avatar
Nicole Bussola committed
132
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_tr_{model}_KBest.log"
133
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
134
        "python sklearn_training.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/rSNFi --model {wildcards.model} --ranking KBest"
135
136
137
138


rule ml_rsnfi_val:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
139
140
        expand("{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_tr_{model}_KBest.log",
            outfolder=OUTFOLDER, dataset=DATASET, target=TARGET, model=MODEL, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
141
142
143
144
145
        expand("{datafolder}/{dataset}/{target}/{split_id}/{layers}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT),
        expand("{datafolder}/{dataset}/{target}/{split_id}/labels_{target}_ts.txt",
            datafolder=DATAFOLDER, dataset=DATASET, target=TARGET, split_id=SPLIT_ID, layers=LAYERS_CONCAT)
    output:
Nicole Bussola's avatar
Nicole Bussola committed
146
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/rSNFi/{layers}_tr_MCC_scores.txt"
147
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
148
        "python sklearn_validation.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/rSNFi --tslab {input[2]}"
149
150
151
152
153
154
155


rule single_tr:
    input:
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_tr.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_tr.txt")
    output:
Nicole Bussola's avatar
Nicole Bussola committed
156
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/single/{layer}_tr_{model}_KBest.log"
157
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
158
        "python sklearn_training.py {input} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/single --model LSVM --ranking KBest"
159
160
161

rule single_val:
    input:
Nicole Bussola's avatar
Nicole Bussola committed
162
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/single/{layer}_tr_{model}_KBest.log",
163
164
165
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/{layer}_ts.txt"),
        os.path.join(DATAFOLDER, "{dataset}/{target}/{split_id}/labels_{target}_ts.txt")
    output:
Nicole Bussola's avatar
Nicole Bussola committed
166
        "{outfolder}/{dataset}/{target}/{model}/{split_id}/single/{layer}_tr_MCC_scores.txt"
167
    shell:
Nicole Bussola's avatar
Nicole Bussola committed
168
        "python sklearn_validation.py {input[0]} {input[1]} {wildcards.outfolder}/{wildcards.dataset}/{wildcards.target}/{wildcards.model}/{wildcards.split_id}/single --tslab {input[2]}"