Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
INF
Commits
55d7a92d
Commit
55d7a92d
authored
Feb 04, 2020
by
Nicole Bussola
Browse files
manage custom splits
parent
610a7d3f
Changes
4
Hide whitespace changes
Inline
Side-by-side
postprocessing/borda_global_juxt_rSNF.py
View file @
55d7a92d
...
...
@@ -39,7 +39,8 @@ parser.add_argument('--dataset', type=str, help='Dataset name')
parser
.
add_argument
(
'--target'
,
type
=
str
,
help
=
'Clinical endpoint'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'Classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
parser
.
add_argument
(
'--n_splits'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_end'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_start'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--mode'
,
type
=
str
,
help
=
'rSNF, rSNFi, single'
)
args
=
parser
.
parse_args
()
...
...
@@ -51,7 +52,8 @@ OUTFOLDER = args.outfolder
TARGET
=
args
.
target
MODEL
=
args
.
model
LAYERS
=
args
.
layers
N_SPLITS
=
args
.
n_splits
N_SPLITS_START
=
args
.
n_splits_start
N_SPLITS_END
=
args
.
n_splits_end
MODE
=
args
.
mode
assert
MODE
in
[
'juxt'
,
'rSNF'
,
'single'
]
...
...
@@ -66,7 +68,7 @@ for k in range(2, N_LAYERS + 1):
_
,
var_names
,
_
=
load_data
(
os
.
path
.
join
(
DATAFOLDER
,
DATASET
,
TARGET
,
f
'0/
{
layers_concat
}
_tr.txt'
)
)
rankings
=
[]
for
i
in
range
(
N_SPLITS
):
for
i
in
range
(
N_SPLITS
_END
-
N_SPLITS_START
):
if
MODE
==
'rSNF'
:
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_rankList_ranking.csv.gz'
)
...
...
@@ -83,4 +85,4 @@ for k in range(2, N_LAYERS + 1):
for
i
,
pos
in
zip
(
BORDA_ID
,
BORDA_POS
):
borda_df
=
borda_df
.
append
({
'FEATURE_ID'
:
i
,
'FEATURE_NAME'
:
var_names
[
i
],
'MEAN_POS'
:
pos
+
1
},
ignore_index
=
True
)
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_
allS
plits_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
)
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_
s
plits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
)
postprocessing/borda_global_rSNFi.py
View file @
55d7a92d
...
...
@@ -29,7 +29,8 @@ parser.add_argument('--dataset', type=str, help='Dataset name')
parser
.
add_argument
(
'--target'
,
type
=
str
,
help
=
'Clinical endpoint'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'Classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
parser
.
add_argument
(
'--n_splits'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_end'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_start'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--mode'
,
type
=
str
,
help
=
'rSNFi custom Borda'
)
args
=
parser
.
parse_args
()
...
...
@@ -41,7 +42,8 @@ OUTFOLDER = args.outfolder
TARGET
=
args
.
target
MODEL
=
args
.
model
LAYERS
=
args
.
layers
N_SPLITS
=
args
.
n_splits
N_SPLITS_START
=
args
.
n_splits_start
N_SPLITS_END
=
args
.
n_splits_end
MODE
=
args
.
mode
assert
MODE
==
'rSNFi'
...
...
@@ -59,7 +61,7 @@ for k in range(2, N_LAYERS + 1):
all_feats
=
[]
for
i
in
range
(
N_SPLITS
):
for
i
in
range
(
N_SPLITS
_END
-
N_SPLITS_START
):
file_featureList
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_featurelist.txt'
)
feats
=
pd
.
read_csv
(
file_featureList
,
sep
=
'
\t
'
)
all_feats
.
extend
(
list
(
feats
.
FEATURE_NAME
))
...
...
@@ -68,13 +70,13 @@ for k in range(2, N_LAYERS + 1):
positions
=
dict
()
means
=
dict
()
x
=
((
len
(
all_feats
)
-
1
)
*
np
.
ones
((
1
,
N_SPLITS
*
CV_K
*
CV_N
)))
x
=
((
len
(
all_feats
)
-
1
)
*
np
.
ones
((
1
,
(
N_SPLITS
_END
-
N_SPLITS_START
)
*
CV_K
*
CV_N
)))
for
i
in
all_feats
:
positions
[
i
]
=
x
.
tolist
()[
0
]
means
[
i
]
=
0.0
for
i
in
range
(
N_SPLITS
):
for
i
in
range
(
N_SPLITS
_END
-
N_SPLITS_START
):
file_featureList
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_featurelist.txt'
)
feats
=
pd
.
read_csv
(
file_featureList
,
sep
=
'
\t
'
)
...
...
@@ -96,4 +98,4 @@ for k in range(2, N_LAYERS + 1):
sorted_means
=
sorted
(
means
.
items
(),
key
=
operator
.
itemgetter
(
1
))
borda_df
=
pd
.
DataFrame
(
sorted_means
,
columns
=
[
'FEATURE_NAME'
,
'MEAN_POS'
])
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_allSplits_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
)
\ No newline at end of file
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_splits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
)
\ No newline at end of file
postprocessing/compute_all_metrics.py
View file @
55d7a92d
...
...
@@ -35,7 +35,8 @@ parser.add_argument(
'--model'
,
type
=
str
,
help
=
'classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
parser
.
add_argument
(
'--n_splits'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_end'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_start'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--mode'
,
type
=
str
,
help
=
'juxt, rSNF, rSNFi, single'
)
args
=
parser
.
parse_args
()
...
...
@@ -46,7 +47,8 @@ DATASET = args.dataset
TARGET
=
args
.
target
MODEL
=
args
.
model
LAYERS
=
args
.
layers
N_SPLITS
=
args
.
n_splits
N_SPLITS_START
=
args
.
n_splits_start
N_SPLITS_END
=
args
.
n_splits_end
MODE
=
args
.
mode
assert
MODE
in
[
'juxt'
,
'rSNF'
,
'rSNFi'
,
'single'
]
...
...
@@ -98,7 +100,7 @@ for k in range_combinations:
all_test_mccs
=
[]
best_feat_steps
=
[]
for
split_id
in
range
(
N_SPLITS
):
for
split_id
in
range
(
N_SPLITS
_END
-
N_SPLITS_START
):
PATH
=
f
'
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/
{
split_id
}
'
...
...
@@ -255,7 +257,7 @@ for k in range_combinations:
df_results
=
df_results
.
append
(
row
,
ignore_index
=
True
)
df_results
.
to_csv
(
f
'
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/metrics_
allS
plits_
{
MODE
}
.txt'
,
f
'
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/metrics_
s
plits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
.txt'
,
sep
=
'
\t
'
,
index
=
False
,
)
...
...
runner.sh
View file @
55d7a92d
...
...
@@ -5,33 +5,34 @@ THREADS=12
OUTFOLDER
=
results
DATAFOLDER
=
data
DATASET
=
tcga_
aml
LAYER1
=
gene
LAYER2
=
meth
LAYER3
=
mirna
TARGET
=
OS
DATASET
=
tcga_
breast
LAYER1
=
cnv
LAYER2
=
prot
LAYER3
=
prot
TARGET
=
ER
MODEL
=
LSVM
N_SPLITS
=
10
N_SPLITS_START
=
0
N_SPLITS_END
=
10
# go!
for
((
i
=
0
;
i<
$N_SPLITS
;
i++
))
for
((
i
=
$N_SPLITS_START
;
i<
$N_SPLITS
_END
;
i++
))
do
snakemake
-s
Snakefile_split
--cores
$THREADS
--config
datafolder
=
$DATAFOLDER
outfolder
=
$OUTFOLDER
dataset
=
$DATASET
target
=
$TARGET
layer1
=
$LAYER1
layer2
=
$LAYER2
layer3
=
$LAYER3
model
=
$MODEL
split_id
=
$i
-p
snakemake
-s
Snakefile_split
--cores
$THREADS
--config
datafolder
=
$DATAFOLDER
outfolder
=
$OUTFOLDER
dataset
=
$DATASET
target
=
$TARGET
layer1
=
$LAYER1
layer2
=
$LAYER2
model
=
$MODEL
split_id
=
$i
-p
done
for
MODE
in
juxt rSNF rSNFi single
do
python postprocessing/compute_all_metrics.py
--outfolder
$OUTFOLDER
--dataset
$DATASET
--target
$TARGET
--layers
$LAYER1
$LAYER2
$LAYER3
--model
$MODEL
--n_splits
$N_SPLITS
--mode
$MODE
python postprocessing/compute_all_metrics.py
--outfolder
$OUTFOLDER
--dataset
$DATASET
--target
$TARGET
--layers
$LAYER1
$LAYER2
--model
$MODEL
--n_splits
$N_SPLITS
_END
$N_SPLITS_START
--mode
$MODE
done
for
MODE
in
juxt rSNF
do
python postprocessing/borda_global_juxt_rSNF.py
--datafolder
$DATAFOLDER
--outfolder
$OUTFOLDER
--dataset
$DATASET
--target
$TARGET
--layers
$LAYER1
$LAYER2
$LAYER3
--model
$MODEL
--n_splits
$N_SPLITS
--mode
$MODE
python postprocessing/borda_global_juxt_rSNF.py
--datafolder
$DATAFOLDER
--outfolder
$OUTFOLDER
--dataset
$DATASET
--target
$TARGET
--layers
$LAYER1
$LAYER2
--model
$MODEL
--n_splits
$N_SPLITS
_END
$N_SPLITS_START
--mode
$MODE
done
python postprocessing/borda_global_rSNFi.py
--datafolder
$DATAFOLDER
--outfolder
$OUTFOLDER
--dataset
$DATASET
--target
$TARGET
--layers
$LAYER1
$LAYER2
$LAYER3
--model
$MODEL
--n_splits
$N_SPLITS
--mode
rSNFi
python postprocessing/borda_global_rSNFi.py
--datafolder
$DATAFOLDER
--outfolder
$OUTFOLDER
--dataset
$DATASET
--target
$TARGET
--layers
$LAYER1
$LAYER2
--model
$MODEL
--n_splits
$N_SPLITS
_END
$N_SPLITS_START
--mode
rSNFi
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment