Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
INF
Commits
e93424c3
Commit
e93424c3
authored
Mar 26, 2020
by
Alessia Marcolini
Browse files
Merge branch 'master' of gitlab.fbk.eu:MPBA/INF
parents
c2ef86cb
b422bb46
Changes
18
Show whitespace changes
Inline
Side-by-side
Makefile
deleted
100755 → 0
View file @
c2ef86cb
#!/usr/bin/make -f
# Makefile for running the INF pipeline
# Author: Marco Chierici <chierici@fbk.eu>
# Date: 2017-05-12
#
.PHONY
:
init
SHELL
:=
/bin/bash
# input variables
# shown are examples, override on command line
FILE
?=
data/AG1-G_MAV-G_498_LIT_ALL_tr.txt
LABEL
?=
data/label_498_ALL-EFS_tr.lab
DATA1
?=
data/AG1-G_498_LIT_ALL_tr.txt
DATA2
?=
data/MAV-G_498_LIT_ALL_tr.txt
ENDPOINT
?=
ALL-EFS
# added MF 20170710
THREADS
?=
4
OUTBASE
?=
/path/to/out_tmp
BINDIR
:=
.
OUTDIR
:=
$(OUTBASE)
/
$(ENDPOINT)
# derived variables
OUTPREFIX
=
$(
notdir
$(
basename
$(FILE)
))
LEVEL1
=
$(
word
1,
$(
subst
_, ,
$(OUTPREFIX)
))
LEVEL2
=
$(
word
2,
$(
subst
_, ,
$(OUTPREFIX)
))
init
:
@
mkdir
-p
$(OUTDIR)
/rSNFi
@
mkdir
-p
$(OUTDIR)
/rSNF
all
:
init $(OUTDIR)/rSNFi/$(OUTPREFIX)_MCC_scores.txt $(OUTDIR)/rSNF/$(OUTPREFIX)_MCC_scores.txt $(OUTDIR)/juxt/$(OUTPREFIX)_MCC_scores.txt
$(OUTDIR)/juxt/$(OUTPREFIX)_RandomForest_KBest.log
:
$(FILE) $(LABEL)
python
$(BINDIR)
/sklearn_rf_training_fixrank.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/juxt
--ranking
KBest
$(OUTDIR)/juxt/$(OUTPREFIX)_MCC_scores.txt
:
$(OUTDIR)/juxt/$(OUTPREFIX)_RandomForest_KBest.log $(subst _tr
,
_ts
,
$(FILE)) $(subst _tr
,
_ts
,
$(LABEL))
python
$(BINDIR)
/sklearn_rf_validation_writeperf.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/juxt
--tslab
$(
word
3,
$^
)
$(OUTDIR)/rSNF/INF_$(OUTPREFIX).txt
:
$(DATA1) $(DATA2) $(LABEL)
Rscript
$(BINDIR)
/snf_integration.R
--d1
$(
word
1,
$^
)
--d2
$(
word
2,
$^
)
--lab
$(
word
3,
$^
)
\
--scriptDir
$(BINDIR)
/SNFtools/
--clust
spectral
--threads
"
$(THREADS)
"
\
--outf
$@
$(OUTDIR)/rSNF/$(OUTPREFIX)_RandomForest_rankList.log
:
$(FILE) $(LABEL) $(OUTDIR)/rSNF/INF_$(OUTPREFIX).txt
python
$(BINDIR)
/sklearn_rf_training_fixrank.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNF
\
--ranking
rankList
--rankFeats
$(
word
3,
$^
)
$(OUTDIR)/rSNF/$(OUTPREFIX)_MCC_scores.txt
:
$(OUTDIR)/rSNF/$(OUTPREFIX)_RandomForest_rankList.log $(subst _tr
,
_ts
,
$(FILE)) $(subst _tr
,
_ts
,
$(LABEL))
python
$(BINDIR)
/sklearn_rf_validation_writeperf.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNF
--tslab
$(
word
3,
$^
)
$(OUTDIR)/rSNFi/intersection_$(OUTPREFIX).txt
:
$(OUTDIR)/juxt/$(OUTPREFIX)_RandomForest_KBest.log $(OUTDIR)/rSNF/$(OUTPREFIX)_RandomForest_rankList.log
python
$(BINDIR)
/intersect_biomarkers.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNFi/venn_
$(OUTPREFIX)
.png
$@
--title1
"
$(LEVEL1)
"
--title2
"
$(LEVEL2)
"
$(OUTDIR)/rSNFi/$(OUTPREFIX).txt
:
$(FILE) $(OUTDIR)/rSNFi/intersection_$(OUTPREFIX).txt
python
$(BINDIR)
/extract_topfeats_onecol.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$@
$(OUTDIR)/rSNFi/$(OUTPREFIX)_RandomForest_KBest.log
:
$(OUTDIR)/rSNFi/$(OUTPREFIX).txt $(LABEL)
python
$(BINDIR)
/sklearn_rf_training_fixrank.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNFi
--ranking
KBest
$(OUTDIR)/rSNFi/$(OUTPREFIX)_MCC_scores.txt
:
$(OUTDIR)/rSNFi/$(OUTPREFIX)_RandomForest_KBest.log $(subst _tr
,
_ts
,
$(FILE)) $(subst _tr
,
_ts
,
$(LABEL))
python
$(BINDIR)
/sklearn_rf_validation_writeperf.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNFi
--tslab
$(
word
3,
$^
)
README.md
View file @
e93424c3
##
# Integrative Network Fusion (INF)
# Integrative Network Fusion (INF)


**
Setup
**
##
Setup
```
bash
```
bash
git clone https://gitlab.fbk.eu/MPBA/INF
git clone https://gitlab.fbk.eu/MPBA/INF
cd
INF
cd
INF
...
@@ -9,19 +9,44 @@ conda env create -f env.yml -n inf
...
@@ -9,19 +9,44 @@ conda env create -f env.yml -n inf
conda activate inf
conda activate inf
```
```
### Additional dependencies
#### R dependencies
To install the R dependencies (not in conda channels), run the following command via the R prompt:
To install the R dependencies (not in conda channels), run the following command via the R prompt:
```
bash
```
bash
install.packages
(
"TunePareto"
)
install.packages
(
"TunePareto"
)
```
```
To install
`mlpy`
, follow the instructions
[
here
](
https://gitlab.fbk.eu/MPBA/mlpy
)
.
#### MLPY
To install
`mlpy`
follow this instructions:
`mlpy`
package is required for some operations included in the DAP procedure.
The
`mlpy`
package available on PyPI is outdated and not working on OSX platforms.
These are the steps to follow:
Let
`<ANACONDA>`
be your anaconda path (e.g.,
`/home/user/anaconda3`
).
Adjust these environmental variables:
```
bash
export
LD_LIBRARY_PATH
=
<ANACONDA>/envs/<ENV>/lib:
${
LD_LIBRARY_PATH
}
export
CPATH
=
<ANACONDA>/envs/<ENV>/include:
${
CPATH
}
```
and then install
`mlpy`
from GitLab:
```
bash
pip
install
git+https://gitlab.fbk.eu/MPBA/mlpy.git
```
#### Other Python dependencies
To install
`bootstrapped`
:
To install
`bootstrapped`
:
```
bash
```
bash
pip
install
bootstrapped
pip
install
bootstrapped
```
```
## Usage
**Input files**
**Input files**
*
omics layer 1 data: samples x features, tab-separated, with row & column names
*
omics layer 1 data: samples x features, tab-separated, with row & column names
...
...
extract_topfeats.py
View file @
e93424c3
...
@@ -12,7 +12,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
...
@@ -12,7 +12,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
# feats abundances (no names of samples, no header)
# feats abundances (no names of samples, no header)
# data_ab = data_ab.astype(np.float)
# data_ab = data_ab.astype(np.float)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
# number of features in the list
# number of features in the list
nf_list
=
rank
.
shape
nf_list
=
rank
.
shape
if
len
(
nf_list
)
>
1
:
if
len
(
nf_list
)
>
1
:
...
@@ -20,9 +20,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
...
@@ -20,9 +20,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
top_feats
=
feats
[
0
:
nfeat
]
top_feats
=
feats
[
0
:
nfeat
]
else
:
else
:
top_feats
=
rank
[
1
]
top_feats
=
rank
[
1
]
# print top_feats.shape
#print top_feats.shape
# extract top features from table with abundances of all features
# extract top features from table with abundances of all features
idx
=
[]
idx
=
[]
if
len
(
nf_list
)
==
1
:
if
len
(
nf_list
)
==
1
:
...
@@ -35,16 +33,15 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
...
@@ -35,16 +33,15 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
print
(
'###### MISSING %s ######'
%
top_feats
[
i
])
print
(
'###### MISSING %s ######'
%
top_feats
[
i
])
# considering samples names in the new table
# considering samples names in the new table
sel_feats
=
[
features
[
i
]
for
i
in
idx
]
sel_feats
=
[
features
[
i
]
for
i
in
idx
]
# write new table
# write new table
with
open
(
outfile
,
'w'
)
as
outw
:
with
open
(
outfile
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
# header
# header
writer
.
writerow
([
'Samples'
]
+
sel_feats
)
writer
.
writerow
([
'Samples'
]
+
sel_feats
)
for
i
in
range
(
0
,
len
(
samples
)):
for
i
in
range
(
0
,
len
(
samples
)):
writer
.
writerow
([
samples
[
i
]]
+
data_ab
[
i
,
idx
].
tolist
())
writer
.
writerow
([
samples
[
i
]]
+
data_ab
[
i
,
idx
].
tolist
())
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
extract_topfeats_onecol.py
View file @
e93424c3
...
@@ -14,30 +14,30 @@ __date__ = '15 December 2016'
...
@@ -14,30 +14,30 @@ __date__ = '15 December 2016'
#### Extract features from a given dataset ####
#### Extract features from a given dataset ####
def
extract_feats
(
datafile
,
rankedfile
,
outfile
):
def
extract_feats
(
datafile
,
rankedfile
,
outfile
):
#print locals()
#
print locals()
# table with feats abundances
# table with feats abundances
data
=
np
.
loadtxt
(
datafile
,
delimiter
=
'
\t
'
,
dtype
=
str
)
data
=
np
.
loadtxt
(
datafile
,
delimiter
=
'
\t
'
,
dtype
=
str
)
# feats abundances (no names of samples, no header)
# feats abundances (no names of samples, no header)
data_ab
=
data
[
1
:,
1
:].
astype
(
np
.
float
)
data_ab
=
data
[
1
:,
1
:].
astype
(
np
.
float
)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
# number of features in the list
# number of features in the list
nf_list
=
rank
.
shape
nf_list
=
rank
.
shape
if
len
(
nf_list
)
>
1
:
if
len
(
nf_list
)
>
1
:
feats
=
rank
[:,
0
]
feats
=
rank
[:,
0
]
top_feats
=
feats
#
[0:nfeat]
top_feats
=
feats
#
[0:nfeat]
else
:
else
:
top_feats
=
rank
top_feats
=
rank
# extract top features from table with abundances of all features
# extract top features from table with abundances of all features
idx
=
[]
idx
=
[]
nfeat
=
len
(
top_feats
)
nfeat
=
len
(
top_feats
)
for
i
in
range
(
nfeat
):
for
i
in
range
(
nfeat
):
if
top_feats
[
i
]
in
data
[
0
,:].
tolist
():
if
top_feats
[
i
]
in
data
[
0
,
:].
tolist
():
idx
.
append
(
data
[
0
,:].
tolist
().
index
(
top_feats
[
i
]))
idx
.
append
(
data
[
0
,
:].
tolist
().
index
(
top_feats
[
i
]))
else
:
else
:
print
(
top_feats
[
i
])
print
(
top_feats
[
i
])
...
@@ -48,8 +48,8 @@ def extract_feats(datafile, rankedfile, outfile):
...
@@ -48,8 +48,8 @@ def extract_feats(datafile, rankedfile, outfile):
# write new table
# write new table
with
open
(
outfile
,
'w'
)
as
outw
:
with
open
(
outfile
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
for
i
in
range
(
len
(
sel_feats
[:,
0
])):
for
i
in
range
(
len
(
sel_feats
[:,
0
])):
writer
.
writerow
(
sel_feats
[
i
,:])
writer
.
writerow
(
sel_feats
[
i
,
:])
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
input_output.py
View file @
e93424c3
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
def
load_data
(
filename
):
def
load_data
(
filename
):
df
=
pd
.
read_csv
(
filename
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
df
=
pd
.
read_csv
(
filename
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
var_names
=
df
.
columns
.
tolist
()
var_names
=
df
.
columns
.
tolist
()
...
@@ -8,14 +9,18 @@ def load_data(filename):
...
@@ -8,14 +9,18 @@ def load_data(filename):
data
=
df
.
values
.
astype
(
dtype
=
np
.
float
)
data
=
df
.
values
.
astype
(
dtype
=
np
.
float
)
return
sample_names
,
var_names
,
data
return
sample_names
,
var_names
,
data
def
save_split
(
x
,
y
,
sample_names
,
var_names
,
basename
):
def
save_split
(
x
,
y
,
sample_names
,
var_names
,
basename
):
"""
"""
x, y: output of train_test_split
x, y: output of train_test_split
sample_names var_names: lists with samples and feature names (will be the DataFrame row and column names)
sample_names var_names: lists with samples and feature names (will be the DataFrame row and column names)
"""
"""
x_df
=
pd
.
DataFrame
(
x
,
index
=
sample_names
,
columns
=
var_names
)
x_df
=
pd
.
DataFrame
(
x
,
index
=
sample_names
,
columns
=
var_names
)
x_df
.
to_csv
(
f
"
{
basename
}
.txt"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
x_df
.
to_csv
(
f
"
{
basename
}
.txt"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
y_df
=
pd
.
DataFrame
(
y
,
index
=
sample_names
,
columns
=
[
'label'
])
y_df
=
pd
.
DataFrame
(
y
,
index
=
sample_names
,
columns
=
[
'label'
])
y_df
.
to_csv
(
f
"
{
basename
}
.lab"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
y_df
.
to_csv
(
f
"
{
basename
}
.lab"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
intersect_biomarkers.py
View file @
e93424c3
...
@@ -18,21 +18,59 @@ import numpy as np
...
@@ -18,21 +18,59 @@ import numpy as np
matplotlib
.
use
(
'Agg'
)
matplotlib
.
use
(
'Agg'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Find the intersection between feature lists and produce Venn diagrams.'
)
parser
=
argparse
.
ArgumentParser
(
parser
.
add_argument
(
'CONFIGFILE1'
,
type
=
str
,
help
=
'Training experiment configuration file 1 (with info about number of top discriminant features)'
)
description
=
'Find the intersection between feature lists and produce Venn diagrams.'
parser
.
add_argument
(
'CONFIGFILE2'
,
type
=
str
,
help
=
'Training experiment configuration file 2 (with info about number of top discriminant features)'
)
)
parser
.
add_argument
(
'OUTLIST'
,
type
=
str
,
help
=
'Output file for intersected feature list.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'OUTFILE'
,
type
=
str
,
nargs
=
'?'
,
help
=
'Output file for Venn diagram plot.'
)
'CONFIGFILE1'
,
type
=
str
,
parser
.
add_argument
(
'--title1'
,
type
=
str
,
default
=
'List_1'
,
nargs
=
'?'
,
help
=
'Name for first diagram (default: %(default)s)'
)
help
=
'Training experiment configuration file 1 (with info about number of top discriminant features)'
,
parser
.
add_argument
(
'--title2'
,
type
=
str
,
default
=
'List_2'
,
nargs
=
'?'
,
help
=
'Name for second diagram (default: %(default)s)'
)
)
parser
.
add_argument
(
'--configFile3'
,
type
=
str
,
default
=
'NO'
,
nargs
=
'?'
,
help
=
'Third configuration file - optional (default: %(default)s)'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--title3'
,
type
=
str
,
default
=
'List_3'
,
nargs
=
'?'
,
help
=
'Name for third diagram (default: %(default)s)'
)
'CONFIGFILE2'
,
type
=
str
,
help
=
'Training experiment configuration file 2 (with info about number of top discriminant features)'
,
)
parser
.
add_argument
(
'OUTLIST'
,
type
=
str
,
help
=
'Output file for intersected feature list.'
)
parser
.
add_argument
(
'OUTFILE'
,
type
=
str
,
nargs
=
'?'
,
help
=
'Output file for Venn diagram plot.'
)
parser
.
add_argument
(
'--title1'
,
type
=
str
,
default
=
'List_1'
,
nargs
=
'?'
,
help
=
'Name for first diagram (default: %(default)s)'
,
)
parser
.
add_argument
(
'--title2'
,
type
=
str
,
default
=
'List_2'
,
nargs
=
'?'
,
help
=
'Name for second diagram (default: %(default)s)'
,
)
parser
.
add_argument
(
'--configFile3'
,
type
=
str
,
default
=
'NO'
,
nargs
=
'?'
,
help
=
'Third configuration file - optional (default: %(default)s)'
,
)
parser
.
add_argument
(
'--title3'
,
type
=
str
,
default
=
'List_3'
,
nargs
=
'?'
,
help
=
'Name for third diagram (default: %(default)s)'
,
)
__author__
=
'Alessandro Zandona'
__author__
=
'Alessandro Zandona'
__date__
=
'15 December 2016'
__date__
=
'15 December 2016'
if
len
(
sys
.
argv
)
==
1
:
if
len
(
sys
.
argv
)
==
1
:
parser
.
print_help
()
parser
.
print_help
()
sys
.
exit
(
1
)
sys
.
exit
(
1
)
...
@@ -77,7 +115,7 @@ feats2 = fl_2[:NFEATS, 1]
...
@@ -77,7 +115,7 @@ feats2 = fl_2[:NFEATS, 1]
# Convert lists into sets
# Convert lists into sets
feats2_set
=
set
(
feats2
)
feats2_set
=
set
(
feats2
)
if
(
configfile3
!=
'NO'
)
:
if
configfile3
!=
'NO'
:
config
.
read
(
configfile3
)
config
.
read
(
configfile3
)
if
not
config
.
has_section
(
'INPUT'
):
if
not
config
.
has_section
(
'INPUT'
):
print
(
"%s is not a valid configuration file."
%
CONFIGFILE2
)
print
(
"%s is not a valid configuration file."
%
CONFIGFILE2
)
...
@@ -96,53 +134,60 @@ if (configfile3 != 'NO'):
...
@@ -96,53 +134,60 @@ if (configfile3 != 'NO'):
# Intersection between lists
# Intersection between lists
f1f2
=
feats1_set
.
intersection
(
feats2_set
)
f1f2
=
feats1_set
.
intersection
(
feats2_set
)
if
(
configfile3
!=
'NO'
)
:
if
configfile3
!=
'NO'
:
f1f3
=
feats1_set
.
intersection
(
feats3_set
)
f1f3
=
feats1_set
.
intersection
(
feats3_set
)
f2f3
=
feats2_set
.
intersection
(
feats3_set
)
f2f3
=
feats2_set
.
intersection
(
feats3_set
)
# associate to each common feature the position in each lists
# associate to each common feature the position in each lists
#outFile_f1f2=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title2))
#
outFile_f1f2=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title2))
#outw=open(outFile_f1f2, 'w')
#
outw=open(outFile_f1f2, 'w')
with
open
(
OUTLIST
,
'w'
)
as
outw
:
with
open
(
OUTLIST
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
.
writerow
([
'Feature'
,
'Position in %s'
%
title1
,
'Postition in %s'
%
title2
])
writer
.
writerow
([
'Feature'
,
'Position in %s'
%
title1
,
'Postition in %s'
%
title2
])
for
i
in
range
(
len
(
list
(
f1f2
))):
for
i
in
range
(
len
(
list
(
f1f2
))):
# current feature in intersection
# current feature in intersection
interF
=
list
(
f1f2
)[
i
]
interF
=
list
(
f1f2
)[
i
]
# position of current feature in first list
# position of current feature in first list
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
# position of current feature in second list
# position of current feature in second list
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
writer
.
writerow
([
list
(
f1f2
)[
i
],
idx_list1
+
1
,
idx_list2
+
1
])
writer
.
writerow
([
list
(
f1f2
)[
i
],
idx_list1
+
1
,
idx_list2
+
1
])
if
(
configfile3
!=
'NO'
)
:
if
configfile3
!=
'NO'
:
# associate to each common feature the position in each lists
# associate to each common feature the position in each lists
outFile_f1f3
=
os
.
path
.
join
(
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title1
,
title3
))
outFile_f1f3
=
os
.
path
.
join
(
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title1
,
title3
)
)
with
open
(
outFile_f1f3
,
'w'
)
as
outw
:
with
open
(
outFile_f1f3
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
.
writerow
([
'Feature'
,
'Position in %s '
%
title1
,
'Postition in %s '
%
title3
])
writer
.
writerow
(
[
'Feature'
,
'Position in %s '
%
title1
,
'Postition in %s '
%
title3
]
)
for
i
in
range
(
len
(
list
(
f1f3
))):
for
i
in
range
(
len
(
list
(
f1f3
))):
# current feature in intersection
# current feature in intersection
interF
=
list
(
f1f3
)[
i
]
interF
=
list
(
f1f3
)[
i
]
# position of current feature in first list
# position of current feature in first list
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
# position of current feature in second list
# position of current feature in second list
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
writer
.
writerow
([
list
(
f1f3
)[
i
],
idx_list1
+
1
,
idx_list3
+
1
])
writer
.
writerow
([
list
(
f1f3
)[
i
],
idx_list1
+
1
,
idx_list3
+
1
])
outFile_f2f3
=
os
.
path
.
join
(
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title2
,
title3
))
outFile_f2f3
=
os
.
path
.
join
(
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title2
,
title3
)
)
with
open
(
outFile_f2f3
,
'w'
)
as
outw
:
with
open
(
outFile_f2f3
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
.
writerow
([
'Feature'
,
'Position in %s '
%
title2
,
'Postition in %s '
%
title3
])
writer
.
writerow
(
[
'Feature'
,
'Position in %s '
%
title2
,
'Postition in %s '
%
title3
]
)
for
i
in
range
(
len
(
list
(
f2f3
))):
for
i
in
range
(
len
(
list
(
f2f3
))):
# current feature in intersection
# current feature in intersection
interF
=
list
(
f2f3
)[
i
]
interF
=
list
(
f2f3
)[
i
]
# position of current feature in first list
# position of current feature in first list
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
# position of current feature in second list
# position of current feature in second list
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
writer
.
writerow
([
list
(
f2f3
)[
i
],
idx_list2
+
1
,
idx_list3
+
1
])
writer
.
writerow
([
list
(
f2f3
)[
i
],
idx_list2
+
1
,
idx_list3
+
1
])
# # plot Venn diagrams
# # plot Venn diagrams
...
...
list_distances.py
0 → 100644
View file @
e93424c3
import
argparse
from
itertools
import
combinations
from
pathlib
import
Path
import
numpy
as
np
import
pandas
as
pd
from
mlpy
import
canberra_stability
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--resultsdir'
,
type
=
str
,
help
=
'Results folder'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'Dataset name'
)
parser
.
add_argument
(
'--target'
,
type
=
str
,
help
=
'Clinical endpoint'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
default
=
'randomForest'
,
help
=
'Model (default: %(default)s)'
)
parser
.
add_argument
(
'--nf_min'
,
type
=
int
,
default
=
10
,
help
=
'Min #feat (default: %(default)s)'
)
parser
.
add_argument
(
'--nf_max'
,
type
=
int
,
default
=
50
,
help
=
'Max #feat (default: %(default)s)'
)
parser
.
add_argument
(
'--nf_step'
,
type
=
int
,
default
=
10
,
help
=
'Increase by these many feat (default: %(default)s)'
,
)
parser
.
add_argument
(
'--nf_rsnf'
,
type
=
int
,
nargs
=
'+'
,
help
=
'One or more #feat for rSNF'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
args
=
parser
.
parse_args
()
RESULTSDIR
=
args
.
resultsdir
# top-level results directory
DATASET
=
args
.
dataset
# 'tcga_breast'
TARGET
=
args
.
target
# 'ER'
MODEL
=
args
.
model
NF_MIN
=
args
.
nf_min
NF_MAX
=
args
.
nf_max
NF_STEP
=
args
.
nf_step
NF_RSNF
=
args
.
nf_rsnf
LAYERS
=
args
.
layers
N_LAYERS
=
len
(
LAYERS
)
MODE
=
'rSNF'
assert
(
Path
(
RESULTSDIR
,
DATASET
).
expanduser
().
exists
()
),
f
"
{
RESULTSDIR
}
/
{
DATASET
}
not found"
assert
(
Path
(
RESULTSDIR
,
f
"
{
DATASET
}
_SNFdap"
).
expanduser
().
exists
()
),
f
"
{
RESULTSDIR
}
/
{
DATASET
}
_SNFdap not found"
for
k
in
range
(
2
,
N_LAYERS
+
1
):
for
comb
in
combinations
(
LAYERS
,
k
):
layers_concat
=
'_'
.
join
(
comb
)
bordas
=
[]
for
datatype
in
[
DATASET
,
f
'
{
DATASET
}
_SNFdap'
]:
bordaf
=
f
'
{
RESULTSDIR
}
/
{
datatype
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_splits_50-60_
{
MODE
}
_
{
layers_concat
}
.txt'
bordas
.
append
(
pd
.
read_csv
(
bordaf
,
sep
=
'
\t
'
,
index_col
=
None
))
# prepare ranks for canberra_stability
ranks
=
(
pd
.
concat
(
[
np
.
argsort
(
bordas
[
0
][
'FEATURE_ID'
]),
np
.
argsort
(
bordas
[
1
][
'FEATURE_ID'
]),
],
axis
=
1
,
)
.
transpose
()
.
values
)
for
nf
in
np
.
arange
(
NF_MIN
,
NF_MAX
+
NF_STEP
,
NF_STEP
):
cs
=
canberra_stability
(
ranks
,
nf
)
print
(
f
'
{
MODE
}
-
{
layers_concat
}
- stability(
{
nf
}
) =
{
cs
:.
3
f
}
'
)
# additional steps for NF_RSNF
print
()
for
nf
in
NF_RSNF
:
cs
=
canberra_stability
(
ranks
,
nf
)
print
(
f
'
{
MODE
}
-
{
layers_concat
}
- stability(
{
nf
}
) =
{
cs
:.
3
f
}
'
)
print
()
print
()
performance.py
View file @
e93424c3
from
__future__
import
division
from
__future__
import
division
import
numpy
as
np
import
numpy
as
np
__author__
=
'Davide Albanese'
__author__
=
'Davide Albanese'