Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
INF
Commits
e93424c3
Commit
e93424c3
authored
Mar 26, 2020
by
Alessia Marcolini
Browse files
Merge branch 'master' of gitlab.fbk.eu:MPBA/INF
parents
c2ef86cb
b422bb46
Changes
18
Hide whitespace changes
Inline
Side-by-side
Makefile
deleted
100755 → 0
View file @
c2ef86cb
#!/usr/bin/make -f
# Makefile for running the INF pipeline
# Author: Marco Chierici <chierici@fbk.eu>
# Date: 2017-05-12
#
.PHONY
:
init
SHELL
:=
/bin/bash
# input variables
# shown are examples, override on command line
FILE
?=
data/AG1-G_MAV-G_498_LIT_ALL_tr.txt
LABEL
?=
data/label_498_ALL-EFS_tr.lab
DATA1
?=
data/AG1-G_498_LIT_ALL_tr.txt
DATA2
?=
data/MAV-G_498_LIT_ALL_tr.txt
ENDPOINT
?=
ALL-EFS
# added MF 20170710
THREADS
?=
4
OUTBASE
?=
/path/to/out_tmp
BINDIR
:=
.
OUTDIR
:=
$(OUTBASE)
/
$(ENDPOINT)
# derived variables
OUTPREFIX
=
$(
notdir
$(
basename
$(FILE)
))
LEVEL1
=
$(
word
1,
$(
subst
_, ,
$(OUTPREFIX)
))
LEVEL2
=
$(
word
2,
$(
subst
_, ,
$(OUTPREFIX)
))
init
:
@
mkdir
-p
$(OUTDIR)
/rSNFi
@
mkdir
-p
$(OUTDIR)
/rSNF
all
:
init $(OUTDIR)/rSNFi/$(OUTPREFIX)_MCC_scores.txt $(OUTDIR)/rSNF/$(OUTPREFIX)_MCC_scores.txt $(OUTDIR)/juxt/$(OUTPREFIX)_MCC_scores.txt
$(OUTDIR)/juxt/$(OUTPREFIX)_RandomForest_KBest.log
:
$(FILE) $(LABEL)
python
$(BINDIR)
/sklearn_rf_training_fixrank.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/juxt
--ranking
KBest
$(OUTDIR)/juxt/$(OUTPREFIX)_MCC_scores.txt
:
$(OUTDIR)/juxt/$(OUTPREFIX)_RandomForest_KBest.log $(subst _tr
,
_ts
,
$(FILE)) $(subst _tr
,
_ts
,
$(LABEL))
python
$(BINDIR)
/sklearn_rf_validation_writeperf.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/juxt
--tslab
$(
word
3,
$^
)
$(OUTDIR)/rSNF/INF_$(OUTPREFIX).txt
:
$(DATA1) $(DATA2) $(LABEL)
Rscript
$(BINDIR)
/snf_integration.R
--d1
$(
word
1,
$^
)
--d2
$(
word
2,
$^
)
--lab
$(
word
3,
$^
)
\
--scriptDir
$(BINDIR)
/SNFtools/
--clust
spectral
--threads
"
$(THREADS)
"
\
--outf
$@
$(OUTDIR)/rSNF/$(OUTPREFIX)_RandomForest_rankList.log
:
$(FILE) $(LABEL) $(OUTDIR)/rSNF/INF_$(OUTPREFIX).txt
python
$(BINDIR)
/sklearn_rf_training_fixrank.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNF
\
--ranking
rankList
--rankFeats
$(
word
3,
$^
)
$(OUTDIR)/rSNF/$(OUTPREFIX)_MCC_scores.txt
:
$(OUTDIR)/rSNF/$(OUTPREFIX)_RandomForest_rankList.log $(subst _tr
,
_ts
,
$(FILE)) $(subst _tr
,
_ts
,
$(LABEL))
python
$(BINDIR)
/sklearn_rf_validation_writeperf.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNF
--tslab
$(
word
3,
$^
)
$(OUTDIR)/rSNFi/intersection_$(OUTPREFIX).txt
:
$(OUTDIR)/juxt/$(OUTPREFIX)_RandomForest_KBest.log $(OUTDIR)/rSNF/$(OUTPREFIX)_RandomForest_rankList.log
python
$(BINDIR)
/intersect_biomarkers.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNFi/venn_
$(OUTPREFIX)
.png
$@
--title1
"
$(LEVEL1)
"
--title2
"
$(LEVEL2)
"
$(OUTDIR)/rSNFi/$(OUTPREFIX).txt
:
$(FILE) $(OUTDIR)/rSNFi/intersection_$(OUTPREFIX).txt
python
$(BINDIR)
/extract_topfeats_onecol.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$@
$(OUTDIR)/rSNFi/$(OUTPREFIX)_RandomForest_KBest.log
:
$(OUTDIR)/rSNFi/$(OUTPREFIX).txt $(LABEL)
python
$(BINDIR)
/sklearn_rf_training_fixrank.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNFi
--ranking
KBest
$(OUTDIR)/rSNFi/$(OUTPREFIX)_MCC_scores.txt
:
$(OUTDIR)/rSNFi/$(OUTPREFIX)_RandomForest_KBest.log $(subst _tr
,
_ts
,
$(FILE)) $(subst _tr
,
_ts
,
$(LABEL))
python
$(BINDIR)
/sklearn_rf_validation_writeperf.py
$(
word
1,
$^
)
$(
word
2,
$^
)
$(OUTDIR)
/rSNFi
--tslab
$(
word
3,
$^
)
README.md
View file @
e93424c3
##
# Integrative Network Fusion (INF)
# Integrative Network Fusion (INF)


**
Setup
**
##
Setup
```
bash
```
bash
git clone https://gitlab.fbk.eu/MPBA/INF
git clone https://gitlab.fbk.eu/MPBA/INF
cd
INF
cd
INF
...
@@ -9,19 +9,44 @@ conda env create -f env.yml -n inf
...
@@ -9,19 +9,44 @@ conda env create -f env.yml -n inf
conda activate inf
conda activate inf
```
```
### Additional dependencies
#### R dependencies
To install the R dependencies (not in conda channels), run the following command via the R prompt:
To install the R dependencies (not in conda channels), run the following command via the R prompt:
```
bash
```
bash
install.packages
(
"TunePareto"
)
install.packages
(
"TunePareto"
)
```
```
To install
`mlpy`
, follow the instructions
[
here
](
https://gitlab.fbk.eu/MPBA/mlpy
)
.
#### MLPY
To install
`mlpy`
follow this instructions:
`mlpy`
package is required for some operations included in the DAP procedure.
The
`mlpy`
package available on PyPI is outdated and not working on OSX platforms.
These are the steps to follow:
Let
`<ANACONDA>`
be your anaconda path (e.g.,
`/home/user/anaconda3`
).
Adjust these environmental variables:
```
bash
export
LD_LIBRARY_PATH
=
<ANACONDA>/envs/<ENV>/lib:
${
LD_LIBRARY_PATH
}
export
CPATH
=
<ANACONDA>/envs/<ENV>/include:
${
CPATH
}
```
and then install
`mlpy`
from GitLab:
```
bash
pip
install
git+https://gitlab.fbk.eu/MPBA/mlpy.git
```
#### Other Python dependencies
To install
`bootstrapped`
:
To install
`bootstrapped`
:
```
bash
```
bash
pip
install
bootstrapped
pip
install
bootstrapped
```
```
## Usage
**Input files**
**Input files**
*
omics layer 1 data: samples x features, tab-separated, with row & column names
*
omics layer 1 data: samples x features, tab-separated, with row & column names
...
...
extract_topfeats.py
View file @
e93424c3
...
@@ -7,12 +7,12 @@ from input_output import load_data
...
@@ -7,12 +7,12 @@ from input_output import load_data
def
extract_feats
(
datafile
,
rankedfile
,
nfeat
,
outfile
):
def
extract_feats
(
datafile
,
rankedfile
,
nfeat
,
outfile
):
# sample names, features names and table with features abundances
# sample names, features names and table with features abundances
samples
,
features
,
data_ab
=
load_data
(
datafile
)
samples
,
features
,
data_ab
=
load_data
(
datafile
)
# feats abundances (no names of samples, no header)
# feats abundances (no names of samples, no header)
# data_ab = data_ab.astype(np.float)
# data_ab = data_ab.astype(np.float)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
# number of features in the list
# number of features in the list
nf_list
=
rank
.
shape
nf_list
=
rank
.
shape
if
len
(
nf_list
)
>
1
:
if
len
(
nf_list
)
>
1
:
...
@@ -20,9 +20,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
...
@@ -20,9 +20,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
top_feats
=
feats
[
0
:
nfeat
]
top_feats
=
feats
[
0
:
nfeat
]
else
:
else
:
top_feats
=
rank
[
1
]
top_feats
=
rank
[
1
]
# print top_feats.shape
#print top_feats.shape
# extract top features from table with abundances of all features
# extract top features from table with abundances of all features
idx
=
[]
idx
=
[]
if
len
(
nf_list
)
==
1
:
if
len
(
nf_list
)
==
1
:
...
@@ -35,16 +33,15 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
...
@@ -35,16 +33,15 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
print
(
'###### MISSING %s ######'
%
top_feats
[
i
])
print
(
'###### MISSING %s ######'
%
top_feats
[
i
])
# considering samples names in the new table
# considering samples names in the new table
sel_feats
=
[
features
[
i
]
for
i
in
idx
]
sel_feats
=
[
features
[
i
]
for
i
in
idx
]
# write new table
# write new table
with
open
(
outfile
,
'w'
)
as
outw
:
with
open
(
outfile
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
# header
# header
writer
.
writerow
([
'Samples'
]
+
sel_feats
)
writer
.
writerow
([
'Samples'
]
+
sel_feats
)
for
i
in
range
(
0
,
len
(
samples
)):
for
i
in
range
(
0
,
len
(
samples
)):
writer
.
writerow
([
samples
[
i
]]
+
data_ab
[
i
,
idx
].
tolist
())
writer
.
writerow
([
samples
[
i
]]
+
data_ab
[
i
,
idx
].
tolist
())
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
extract_topfeats_onecol.py
View file @
e93424c3
...
@@ -9,35 +9,35 @@ import sys
...
@@ -9,35 +9,35 @@ import sys
import
numpy
as
np
import
numpy
as
np
__author__
=
'Marco Chierici, Alessandro Zandona'
__author__
=
'Marco Chierici, Alessandro Zandona'
__date__
=
'15 December 2016'
__date__
=
'15 December 2016'
#### Extract features from a given dataset ####
#### Extract features from a given dataset ####
def
extract_feats
(
datafile
,
rankedfile
,
outfile
):
def
extract_feats
(
datafile
,
rankedfile
,
outfile
):
#print locals()
#
print locals()
# table with feats abundances
# table with feats abundances
data
=
np
.
loadtxt
(
datafile
,
delimiter
=
'
\t
'
,
dtype
=
str
)
data
=
np
.
loadtxt
(
datafile
,
delimiter
=
'
\t
'
,
dtype
=
str
)
# feats abundances (no names of samples, no header)
# feats abundances (no names of samples, no header)
data_ab
=
data
[
1
:,
1
:].
astype
(
np
.
float
)
data_ab
=
data
[
1
:,
1
:].
astype
(
np
.
float
)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
rank
=
np
.
loadtxt
(
rankedfile
,
delimiter
=
'
\t
'
,
skiprows
=
1
,
dtype
=
str
)
# number of features in the list
# number of features in the list
nf_list
=
rank
.
shape
nf_list
=
rank
.
shape
if
len
(
nf_list
)
>
1
:
if
len
(
nf_list
)
>
1
:
feats
=
rank
[:,
0
]
feats
=
rank
[:,
0
]
top_feats
=
feats
#
[0:nfeat]
top_feats
=
feats
#
[0:nfeat]
else
:
else
:
top_feats
=
rank
top_feats
=
rank
# extract top features from table with abundances of all features
# extract top features from table with abundances of all features
idx
=
[]
idx
=
[]
nfeat
=
len
(
top_feats
)
nfeat
=
len
(
top_feats
)
for
i
in
range
(
nfeat
):
for
i
in
range
(
nfeat
):
if
top_feats
[
i
]
in
data
[
0
,:].
tolist
():
if
top_feats
[
i
]
in
data
[
0
,
:].
tolist
():
idx
.
append
(
data
[
0
,:].
tolist
().
index
(
top_feats
[
i
]))
idx
.
append
(
data
[
0
,
:].
tolist
().
index
(
top_feats
[
i
]))
else
:
else
:
print
(
top_feats
[
i
])
print
(
top_feats
[
i
])
...
@@ -48,8 +48,8 @@ def extract_feats(datafile, rankedfile, outfile):
...
@@ -48,8 +48,8 @@ def extract_feats(datafile, rankedfile, outfile):
# write new table
# write new table
with
open
(
outfile
,
'w'
)
as
outw
:
with
open
(
outfile
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
for
i
in
range
(
len
(
sel_feats
[:,
0
])):
for
i
in
range
(
len
(
sel_feats
[:,
0
])):
writer
.
writerow
(
sel_feats
[
i
,:])
writer
.
writerow
(
sel_feats
[
i
,
:])
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
input_output.py
View file @
e93424c3
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
def
load_data
(
filename
):
def
load_data
(
filename
):
df
=
pd
.
read_csv
(
filename
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
df
=
pd
.
read_csv
(
filename
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
var_names
=
df
.
columns
.
tolist
()
var_names
=
df
.
columns
.
tolist
()
...
@@ -8,14 +9,18 @@ def load_data(filename):
...
@@ -8,14 +9,18 @@ def load_data(filename):
data
=
df
.
values
.
astype
(
dtype
=
np
.
float
)
data
=
df
.
values
.
astype
(
dtype
=
np
.
float
)
return
sample_names
,
var_names
,
data
return
sample_names
,
var_names
,
data
def
save_split
(
x
,
y
,
sample_names
,
var_names
,
basename
):
def
save_split
(
x
,
y
,
sample_names
,
var_names
,
basename
):
"""
"""
x, y: output of train_test_split
x, y: output of train_test_split
sample_names var_names: lists with samples and feature names (will be the DataFrame row and column names)
sample_names var_names: lists with samples and feature names (will be the DataFrame row and column names)
"""
"""
x_df
=
pd
.
DataFrame
(
x
,
index
=
sample_names
,
columns
=
var_names
)
x_df
=
pd
.
DataFrame
(
x
,
index
=
sample_names
,
columns
=
var_names
)
x_df
.
to_csv
(
f
"
{
basename
}
.txt"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
x_df
.
to_csv
(
f
"
{
basename
}
.txt"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
y_df
=
pd
.
DataFrame
(
y
,
index
=
sample_names
,
columns
=
[
'label'
])
y_df
=
pd
.
DataFrame
(
y
,
index
=
sample_names
,
columns
=
[
'label'
])
y_df
.
to_csv
(
f
"
{
basename
}
.lab"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
y_df
.
to_csv
(
f
"
{
basename
}
.lab"
,
sep
=
'
\t
'
,
index
=
True
,
header
=
True
,
index_label
=
"sampleID"
)
intersect_biomarkers.py
View file @
e93424c3
...
@@ -18,21 +18,59 @@ import numpy as np
...
@@ -18,21 +18,59 @@ import numpy as np
matplotlib
.
use
(
'Agg'
)
matplotlib
.
use
(
'Agg'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Find the intersection between feature lists and produce Venn diagrams.'
)
parser
=
argparse
.
ArgumentParser
(
parser
.
add_argument
(
'CONFIGFILE1'
,
type
=
str
,
help
=
'Training experiment configuration file 1 (with info about number of top discriminant features)'
)
description
=
'Find the intersection between feature lists and produce Venn diagrams.'
parser
.
add_argument
(
'CONFIGFILE2'
,
type
=
str
,
help
=
'Training experiment configuration file 2 (with info about number of top discriminant features)'
)
)
parser
.
add_argument
(
'OUTLIST'
,
type
=
str
,
help
=
'Output file for intersected feature list.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'OUTFILE'
,
type
=
str
,
nargs
=
'?'
,
help
=
'Output file for Venn diagram plot.'
)
'CONFIGFILE1'
,
type
=
str
,
parser
.
add_argument
(
'--title1'
,
type
=
str
,
default
=
'List_1'
,
nargs
=
'?'
,
help
=
'Name for first diagram (default: %(default)s)'
)
help
=
'Training experiment configuration file 1 (with info about number of top discriminant features)'
,
parser
.
add_argument
(
'--title2'
,
type
=
str
,
default
=
'List_2'
,
nargs
=
'?'
,
help
=
'Name for second diagram (default: %(default)s)'
)
)
parser
.
add_argument
(
'--configFile3'
,
type
=
str
,
default
=
'NO'
,
nargs
=
'?'
,
help
=
'Third configuration file - optional (default: %(default)s)'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--title3'
,
type
=
str
,
default
=
'List_3'
,
nargs
=
'?'
,
help
=
'Name for third diagram (default: %(default)s)'
)
'CONFIGFILE2'
,
type
=
str
,
__author__
=
'Alessandro Zandona'
help
=
'Training experiment configuration file 2 (with info about number of top discriminant features)'
,
__date__
=
'15 December 2016'
)
parser
.
add_argument
(
if
len
(
sys
.
argv
)
==
1
:
'OUTLIST'
,
type
=
str
,
help
=
'Output file for intersected feature list.'
)
parser
.
add_argument
(
'OUTFILE'
,
type
=
str
,
nargs
=
'?'
,
help
=
'Output file for Venn diagram plot.'
)
parser
.
add_argument
(
'--title1'
,
type
=
str
,
default
=
'List_1'
,
nargs
=
'?'
,
help
=
'Name for first diagram (default: %(default)s)'
,
)
parser
.
add_argument
(
'--title2'
,
type
=
str
,
default
=
'List_2'
,
nargs
=
'?'
,
help
=
'Name for second diagram (default: %(default)s)'
,
)
parser
.
add_argument
(
'--configFile3'
,
type
=
str
,
default
=
'NO'
,
nargs
=
'?'
,
help
=
'Third configuration file - optional (default: %(default)s)'
,
)
parser
.
add_argument
(
'--title3'
,
type
=
str
,
default
=
'List_3'
,
nargs
=
'?'
,
help
=
'Name for third diagram (default: %(default)s)'
,
)
__author__
=
'Alessandro Zandona'
__date__
=
'15 December 2016'
if
len
(
sys
.
argv
)
==
1
:
parser
.
print_help
()
parser
.
print_help
()
sys
.
exit
(
1
)
sys
.
exit
(
1
)
...
@@ -77,72 +115,79 @@ feats2 = fl_2[:NFEATS, 1]
...
@@ -77,72 +115,79 @@ feats2 = fl_2[:NFEATS, 1]
# Convert lists into sets
# Convert lists into sets
feats2_set
=
set
(
feats2
)
feats2_set
=
set
(
feats2
)
if
(
configfile3
!=
'NO'
)
:
if
configfile3
!=
'NO'
:
config
.
read
(
configfile3
)
config
.
read
(
configfile3
)
if
not
config
.
has_section
(
'INPUT'
):
if
not
config
.
has_section
(
'INPUT'
):
print
(
"%s is not a valid configuration file."
%
CONFIGFILE2
)
print
(
"%s is not a valid configuration file."
%
CONFIGFILE2
)
sys
.
exit
(
3
)
sys
.
exit
(
3
)
RANK
=
config
.
get
(
"OUTPUT"
,
"Borda"
)
RANK
=
config
.
get
(
"OUTPUT"
,
"Borda"
)
NFEATS
=
config
.
getint
(
"OUTPUT"
,
"N_feats"
)
NFEATS
=
config
.
getint
(
"OUTPUT"
,
"N_feats"
)
# Feature lists
# Feature lists
fl_3
=
np
.
loadtxt
(
RANK
,
dtype
=
str
,
delimiter
=
'
\t
'
,
skiprows
=
1
)
fl_3
=
np
.
loadtxt
(
RANK
,
dtype
=
str
,
delimiter
=
'
\t
'
,
skiprows
=
1
)
# Features name
# Features name
feats3
=
fl_3
[:
NFEATS
,
1
]
feats3
=
fl_3
[:
NFEATS
,
1
]
# Convert lists into sets
# Convert lists into sets
feats3_set
=
set
(
feats3
)
feats3_set
=
set
(
feats3
)
# Intersection between lists
# Intersection between lists
f1f2
=
feats1_set
.
intersection
(
feats2_set
)
f1f2
=
feats1_set
.
intersection
(
feats2_set
)
if
(
configfile3
!=
'NO'
)
:
if
configfile3
!=
'NO'
:
f1f3
=
feats1_set
.
intersection
(
feats3_set
)
f1f3
=
feats1_set
.
intersection
(
feats3_set
)
f2f3
=
feats2_set
.
intersection
(
feats3_set
)
f2f3
=
feats2_set
.
intersection
(
feats3_set
)
# associate to each common feature the position in each lists
# associate to each common feature the position in each lists
#outFile_f1f2=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title2))
#
outFile_f1f2=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title2))
#outw=open(outFile_f1f2, 'w')
#
outw=open(outFile_f1f2, 'w')
with
open
(
OUTLIST
,
'w'
)
as
outw
:
with
open
(
OUTLIST
,
'w'
)
as
outw
:
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
.
writerow
([
'Feature'
,
'Position in %s'
%
title1
,
'Postition in %s'
%
title2
])
writer
.
writerow
([
'Feature'
,
'Position in %s'
%
title1
,
'Postition in %s'
%
title2
])
for
i
in
range
(
len
(
list
(
f1f2
))):
for
i
in
range
(
len
(
list
(
f1f2
))):
# current feature in intersection
# current feature in intersection
interF
=
list
(
f1f2
)[
i
]
interF
=
list
(
f1f2
)[
i
]
# position of current feature in first list
# position of current feature in first list
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
# position of current feature in second list
# position of current feature in second list
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
writer
.
writerow
([
list
(
f1f2
)[
i
],
idx_list1
+
1
,
idx_list2
+
1
])
writer
.
writerow
([
list
(
f1f2
)[
i
],
idx_list1
+
1
,
idx_list2
+
1
])
if
(
configfile3
!=
'NO'
):
if
configfile3
!=
'NO'
:
# associate to each common feature the position in each lists
# associate to each common feature the position in each lists
outFile_f1f3
=
os
.
path
.
join
(
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title1
,
title3
))
outFile_f1f3
=
os
.
path
.
join
(
with
open
(
outFile_f1f3
,
'w'
)
as
outw
:
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title1
,
title3
)
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
)
writer
.
writerow
([
'Feature'
,
'Position in %s '
%
title1
,
'Postition in %s '
%
title3
])
with
open
(
outFile_f1f3
,
'w'
)
as
outw
:
for
i
in
range
(
len
(
list
(
f1f3
))):
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
# current feature in intersection
writer
.
writerow
(
interF
=
list
(
f1f3
)[
i
]
[
'Feature'
,
'Position in %s '
%
title1
,
'Postition in %s '
%
title3
]
# position of current feature in first list
)
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
for
i
in
range
(
len
(
list
(
f1f3
))):
# position of current feature in second list
# current feature in intersection
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
interF
=
list
(
f1f3
)[
i
]
writer
.
writerow
([
list
(
f1f3
)[
i
],
idx_list1
+
1
,
idx_list3
+
1
])
# position of current feature in first list
idx_list1
=
np
.
where
(
feats1
==
interF
)[
0
][
0
]
# position of current feature in second list
outFile_f2f3
=
os
.
path
.
join
(
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title2
,
title3
))
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
with
open
(
outFile_f2f3
,
'w'
)
as
outw
:
writer
.
writerow
([
list
(
f1f3
)[
i
],
idx_list1
+
1
,
idx_list3
+
1
])
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
writer
.
writerow
([
'Feature'
,
'Position in %s '
%
title2
,
'Postition in %s '
%
title3
])
outFile_f2f3
=
os
.
path
.
join
(
for
i
in
range
(
len
(
list
(
f2f3
))):
os
.
path
.
dirname
(
OUTFILE
),
'Intersection_%s_%s.txt'
%
(
title2
,
title3
)
# current feature in intersection
)
interF
=
list
(
f2f3
)[
i
]
with
open
(
outFile_f2f3
,
'w'
)
as
outw
:
# position of current feature in first list
writer
=
csv
.
writer
(
outw
,
delimiter
=
'
\t
'
,
lineterminator
=
'
\n
'
)
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
writer
.
writerow
(
# position of current feature in second list
[
'Feature'
,
'Position in %s '
%
title2
,
'Postition in %s '
%
title3
]
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
)
writer
.
writerow
([
list
(
f2f3
)[
i
],
idx_list2
+
1
,
idx_list3
+
1
])
for
i
in
range
(
len
(
list
(
f2f3
))):
# current feature in intersection
interF
=
list
(
f2f3
)[
i
]
# position of current feature in first list
idx_list2
=
np
.
where
(
feats2
==
interF
)[
0
][
0
]
# position of current feature in second list
idx_list3
=
np
.
where
(
feats3
==
interF
)[
0
][
0
]
writer
.
writerow
([
list
(
f2f3
)[
i
],
idx_list2
+
1
,
idx_list3
+
1
])
# # plot Venn diagrams
# # plot Venn diagrams
...
...
list_distances.py
0 → 100644
View file @
e93424c3
import
argparse
from
itertools
import
combinations
from
pathlib
import
Path
import
numpy
as
np
import
pandas
as
pd
from
mlpy
import
canberra_stability
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--resultsdir'
,
type
=
str
,
help
=
'Results folder'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'Dataset name'
)
parser
.
add_argument
(
'--target'
,
type
=
str
,
help
=
'Clinical endpoint'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
default
=
'randomForest'
,
help
=
'Model (default: %(default)s)'
)
parser
.
add_argument
(
'--nf_min'
,
type
=
int
,
default
=
10
,
help
=
'Min #feat (default: %(default)s)'
)
parser
.
add_argument
(
'--nf_max'
,
type
=
int
,
default
=
50
,
help
=
'Max #feat (default: %(default)s)'
)
parser
.
add_argument
(
'--nf_step'
,
type
=
int
,
default
=
10
,
help
=
'Increase by these many feat (default: %(default)s)'
,
)
parser
.
add_argument
(
'--nf_rsnf'
,
type
=
int
,
nargs
=
'+'
,
help
=
'One or more #feat for rSNF'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
args
=
parser
.
parse_args
()
RESULTSDIR
=
args
.
resultsdir
# top-level results directory
DATASET
=
args
.
dataset
# 'tcga_breast'
TARGET
=
args
.
target
# 'ER'
MODEL
=
args
.
model
NF_MIN
=
args
.
nf_min
NF_MAX
=
args
.
nf_max
NF_STEP
=
args
.
nf_step
NF_RSNF
=
args
.
nf_rsnf
LAYERS
=
args
.
layers
N_LAYERS
=
len
(
LAYERS
)
MODE
=
'rSNF'
assert
(
Path
(
RESULTSDIR
,
DATASET
).
expanduser
().
exists
()
),
f
"
{
RESULTSDIR
}
/
{
DATASET
}
not found"
assert
(
Path
(
RESULTSDIR
,
f
"
{
DATASET
}
_SNFdap"
).
expanduser
().
exists
()
),
f
"
{
RESULTSDIR
}
/
{
DATASET
}
_SNFdap not found"
for
k
in
range
(
2
,
N_LAYERS
+
1
):
for
comb
in
combinations
(
LAYERS
,
k
):