Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
INF
Commits
6bc146e9
Commit
6bc146e9
authored
Feb 05, 2020
by
Alessia Marcolini
Browse files
Fix split management + black formatting
parent
1b06cc55
Changes
2
Hide whitespace changes
Inline
Side-by-side
postprocessing/borda_global_juxt_rSNF.py
View file @
6bc146e9
...
...
@@ -37,7 +37,9 @@ parser.add_argument('--datafolder', type=str, help='Main data folder')
parser
.
add_argument
(
'--outfolder'
,
type
=
str
,
help
=
'Results folder'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'Dataset name'
)
parser
.
add_argument
(
'--target'
,
type
=
str
,
help
=
'Clinical endpoint'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'Classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'Classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_end'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_start'
,
type
=
int
,
help
=
''
)
...
...
@@ -65,15 +67,31 @@ for k in range(2, N_LAYERS + 1):
for
comb
in
combinations
(
LAYERS
,
k
):
layers_concat
=
'_'
.
join
(
comb
)
_
,
var_names
,
_
=
load_data
(
os
.
path
.
join
(
DATAFOLDER
,
DATASET
,
TARGET
,
f
'0/
{
layers_concat
}
_tr.txt'
)
)
rankings
=
[]
for
i
in
range
(
N_SPLITS_START
,
N_SPLITS_END
-
N_SPLITS_START
):
if
MODE
==
'rSNF'
:
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_rankList_ranking.csv.gz'
)
_
,
var_names
,
_
=
load_data
(
os
.
path
.
join
(
DATAFOLDER
,
DATASET
,
TARGET
,
f
'
{
N_SPLITS_START
}
/
{
layers_concat
}
_tr.txt'
)
)
rankings
=
[]
for
split_id
in
range
(
N_SPLITS_START
,
N_SPLITS_END
):
if
MODE
==
'rSNF'
:
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
split_id
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_rankList_ranking.csv.gz'
,
)
else
:
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_ranking.csv.gz'
)
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
split_id
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_ranking.csv.gz'
,
)
rank
=
pd
.
read_csv
(
file_ranking
,
header
=
None
,
sep
=
'
\t
'
).
values
rankings
.
append
(
rank
)
...
...
@@ -83,6 +101,14 @@ for k in range(2, N_LAYERS + 1):
borda_df
=
pd
.
DataFrame
(
columns
=
[
"FEATURE_ID"
,
"FEATURE_NAME"
,
"MEAN_POS"
])
for
i
,
pos
in
zip
(
BORDA_ID
,
BORDA_POS
):
borda_df
=
borda_df
.
append
({
'FEATURE_ID'
:
i
,
'FEATURE_NAME'
:
var_names
[
i
],
'MEAN_POS'
:
pos
+
1
},
ignore_index
=
True
)
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_splits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
)
borda_df
=
borda_df
.
append
(
{
'FEATURE_ID'
:
i
,
'FEATURE_NAME'
:
var_names
[
i
],
'MEAN_POS'
:
pos
+
1
},
ignore_index
=
True
,
)
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_splits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
,
)
postprocessing/borda_global_rSNFi.py
View file @
6bc146e9
#%%
import
argparse
import
operator
import
os
from
itertools
import
combinations
import
numpy
as
np
import
pandas
as
pd
import
operator
import
argparse
from
itertools
import
combinations
#%%
class
myArgumentParser
(
argparse
.
ArgumentParser
):
...
...
@@ -27,7 +29,9 @@ parser.add_argument('--datafolder', type=str, help='Main data folder')
parser
.
add_argument
(
'--outfolder'
,
type
=
str
,
help
=
'Results folder'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'Dataset name'
)
parser
.
add_argument
(
'--target'
,
type
=
str
,
help
=
'Clinical endpoint'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'Classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'Classifiers implemented, randomForest or LSVM'
)
parser
.
add_argument
(
'--layers'
,
type
=
str
,
nargs
=
'+'
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_end'
,
type
=
int
,
help
=
''
)
parser
.
add_argument
(
'--n_splits_start'
,
type
=
int
,
help
=
''
)
...
...
@@ -52,17 +56,23 @@ N_LAYERS = len(LAYERS)
CV_K
=
5
CV_N
=
10
#%%
#%%
for
k
in
range
(
2
,
N_LAYERS
+
1
):
for
comb
in
combinations
(
LAYERS
,
k
):
layers_concat
=
'_'
.
join
(
comb
)
all_feats
=
[]
all_feats
=
[]
for
i
in
range
(
N_SPLITS_START
,
N_SPLITS_END
-
N_SPLITS_START
):
file_featureList
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_featurelist.txt'
)
for
i
in
range
(
N_SPLITS_START
,
N_SPLITS_END
):
file_featureList
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_featurelist.txt'
,
)
feats
=
pd
.
read_csv
(
file_featureList
,
sep
=
'
\t
'
)
all_feats
.
extend
(
list
(
feats
.
FEATURE_NAME
))
...
...
@@ -70,32 +80,51 @@ for k in range(2, N_LAYERS + 1):
positions
=
dict
()
means
=
dict
()
x
=
((
len
(
all_feats
)
-
1
)
*
np
.
ones
((
1
,(
N_SPLITS_END
-
N_SPLITS_START
)
*
CV_K
*
CV_N
)))
x
=
(
len
(
all_feats
)
-
1
)
*
np
.
ones
(
(
1
,
(
N_SPLITS_END
-
N_SPLITS_START
)
*
CV_K
*
CV_N
)
)
for
i
in
all_feats
:
positions
[
i
]
=
x
.
tolist
()[
0
]
means
[
i
]
=
0.0
for
i
in
range
(
N_SPLITS_START
,
N_SPLITS_END
-
N_SPLITS_START
):
file_featureList
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_featurelist.txt'
)
positions
[
i
]
=
x
.
tolist
()[
0
]
means
[
i
]
=
0.0
for
i
,
split_id
in
enumerate
(
range
(
N_SPLITS_START
,
N_SPLITS_END
)):
file_featureList
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
split_id
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_featurelist.txt'
,
)
feats
=
pd
.
read_csv
(
file_featureList
,
sep
=
'
\t
'
)
z
=
[
None
]
*
len
(
feats
)
for
k
in
range
(
len
(
feats
)):
z
[
feats
.
FEATURE_ID
[
k
]]
=
feats
.
FEATURE_NAME
[
k
]
z
=
[
None
]
*
len
(
feats
)
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
i
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_ranking.csv.gz'
)
for
k
in
range
(
len
(
feats
)):
z
[
feats
.
FEATURE_ID
[
k
]]
=
feats
.
FEATURE_NAME
[
k
]
file_ranking
=
os
.
path
.
join
(
OUTFOLDER
,
DATASET
,
TARGET
,
MODEL
,
f
'
{
split_id
}
/
{
MODE
}
/
{
layers_concat
}
_tr_
{
MODEL
}
_KBest_ranking.csv.gz'
,
)
rankings
=
pd
.
read_csv
(
file_ranking
,
header
=
None
,
sep
=
'
\t
'
)
for
j
in
range
(
CV_K
*
CV_N
):
for
j
in
range
(
CV_K
*
CV_N
):
for
k
in
range
(
rankings
.
shape
[
1
]):
positions
[
z
[
rankings
.
iloc
[
j
][
k
]]][
i
*
(
CV_K
*
CV_N
)
+
j
]
=
1.0
*
k
positions
[
z
[
rankings
.
iloc
[
j
][
k
]]][
i
*
(
CV_K
*
CV_N
)
+
j
]
=
1.0
*
k
for
i
in
all_feats
:
means
[
i
]
=
np
.
mean
(
positions
[
i
])
means
[
i
]
=
np
.
mean
(
positions
[
i
])
sorted_means
=
sorted
(
means
.
items
(),
key
=
operator
.
itemgetter
(
1
))
borda_df
=
pd
.
DataFrame
(
sorted_means
,
columns
=
[
'FEATURE_NAME'
,
'MEAN_POS'
])
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_splits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
)
\ No newline at end of file
borda_df
.
to_csv
(
f
"
{
OUTFOLDER
}
/
{
DATASET
}
/
{
TARGET
}
/
{
MODEL
}
/Borda_splits_
{
N_SPLITS_START
}
-
{
N_SPLITS_END
}
_
{
MODE
}
_
{
layers_concat
}
.txt"
,
sep
=
'
\t
'
,
index
=
False
,
float_format
=
"%.3f"
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment