Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
CDRP
Commits
579e65a9
Commit
579e65a9
authored
Jun 01, 2018
by
Valerio Maggio
Browse files
further experimental scripts
parent
260c416e
Changes
2
Hide whitespace changes
Inline
Side-by-side
code/deep_features_random_forest_runner.py
0 → 100644
View file @
579e65a9
#!/usr/bin/env python3
"""
Random Forest DAP Runner on Deep Features (OS-32 Layer)
as resulting from CDRP-N+A Multi-task Network for SEQC-NB
Dataset
"""
import
os
from
dap.runners
import
RandomForestRunnerDAP
from
dataset
import
load_nb_camda
,
OS_LAB
class
RandomForestDeepFeaturesDAP
(
RandomForestRunnerDAP
):
"""
RandomForest DAP Runner
"""
def
__init__
(
self
,
experiment
):
""""""
self
.
_target_prediction_name
=
OS_LAB
super
(
RandomForestDeepFeaturesDAP
,
self
).
__init__
(
experiment
=
experiment
)
self
.
experiment_data
.
nb_classes
=
experiment
.
nb_classes_targets
[
self
.
_target_prediction_name
]
@
property
def
results_folder
(
self
):
"""
Compose path to the folder where reports and metrics will be saved.
"""
base_foldername
=
self
.
_target_prediction_name
.
lower
()
folder_name
=
'_'
.
join
([
self
.
ml_model_name
,
str
(
self
.
_hyper_params
[
'n_estimators'
]),
self
.
_hyper_params
[
'criterion'
],
self
.
_hyper_params
[
'max_features'
],
self
.
feature_scaling_name
,
self
.
feature_ranking_name
,
str
(
self
.
cv_n
),
str
(
self
.
cv_k
)])
ds_name
=
self
.
experiment_data
.
dataset_name
\
if
'dataset_name'
in
self
.
experiment_data
.
keys
()
else
self
.
experiment_data
.
DESCR
output_folder_path
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'results'
,
ds_name
,
base_foldername
,
folder_name
)
os
.
makedirs
(
output_folder_path
,
exist_ok
=
True
)
return
output_folder_path
@
property
def
ml_model_name
(
self
):
return
"RandomForest"
# Override DAP Methods
# --------------------
def
_set_training_data
(
self
):
"""Default implementation for classic and quite standard DAP implementation.
More complex implementation require overriding this method.
"""
self
.
X
=
self
.
experiment_data
.
training_data
self
.
y
=
self
.
experiment_data
.
targets
[
self
.
_target_prediction_name
]
def
_set_test_data
(
self
):
self
.
X_test
=
self
.
experiment_data
.
test_data
self
.
y_test
=
self
.
experiment_data
.
test_targets
[
self
.
_target_prediction_name
]
def
main
():
# Load Dataset
# ============
training_data_fpath
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'data'
,
'..'
,
'OS32_SEQC'
,
'SEQC2_OS_32_training.csv'
)
test_data_fpath
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'data'
,
'..'
,
'OS32_SEQC'
,
'SEQC2_OS_32_test.csv'
)
dataset
=
load_nb_camda
(
dataset_name
=
'SEQC2_OS32_HRONLY'
,
training_data_fpath
=
training_data_fpath
,
test_data_fpath
=
test_data_fpath
,
hr_only
=
True
)
print
(
'RUNNING ON DATASET {}'
.
format
(
dataset
.
dataset_name
.
upper
()))
# ============
# DAP Settings
# ============
from
dap
import
settings
as
dap_settings
dap_settings
.
to_categorical
=
False
dap_settings
.
feature_ranges
=
[
75
,
90
,
100
]
from
dap.scaling
import
MinMaxScaler
dap_settings
.
feature_scaler
=
MinMaxScaler
(
feature_range
=
(
-
1
,
1
),
copy
=
False
)
# ============================
# Model Settings (hyperparams)
# ============================
dap
=
RandomForestDeepFeaturesDAP
(
dataset
)
dap
.
hyper_params
.
criterion
=
'entropy'
dap
.
hyper_params
.
n_estimators
=
500
dap
.
hyper_params
.
max_features
=
'log2'
trained_model
=
dap
.
run
(
verbose
=
True
)
dap
.
predict_on_test
(
trained_model
)
print
(
"Computation completed!"
)
if
__name__
==
'__main__'
:
main
()
code/targetnb_svm_runner_multiple_splits.py
0 → 100644
View file @
579e65a9
"""
Script to generate Random train/test split for the TARGET-NB Dataset
"""
import
os
from
dap.runners
import
SupportVectorRunnerDAP
from
dataset
import
generate_dataset_partitions
import
argparse
class
SupportVectorMachineRunner
(
SupportVectorRunnerDAP
):
"""
SVM DAP Runner
"""
def
__init__
(
self
,
experiment
,
target_prediction
):
""""""
# One of: 'HR', 'EFS', 'OS'
self
.
_target_prediction_name
=
target_prediction
super
(
SupportVectorMachineRunner
,
self
).
__init__
(
experiment
=
experiment
)
self
.
experiment_data
.
nb_classes
=
experiment
.
nb_classes_targets
[
self
.
_target_prediction_name
]
@
property
def
results_folder
(
self
):
"""
Compose path to the folder where reports and metrics will be saved.
"""
base_foldername
=
self
.
_target_prediction_name
.
lower
()
folder_name
=
'_'
.
join
([
self
.
ml_model_name
,
self
.
_hyper_params
[
'kernel'
],
str
(
self
.
_hyper_params
[
'C'
]),
self
.
feature_scaling_name
,
self
.
feature_ranking_name
,
str
(
self
.
cv_n
),
str
(
self
.
cv_k
)])
ds_name
=
self
.
experiment_data
.
dataset_name
\
if
'dataset_name'
in
self
.
experiment_data
.
keys
()
else
self
.
experiment_data
.
DESCR
output_folder_path
=
os
.
path
.
join
(
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
)),
'out_multi_split_25'
,
ds_name
,
base_foldername
,
folder_name
)
os
.
makedirs
(
output_folder_path
,
exist_ok
=
True
)
return
output_folder_path
@
property
def
ml_model_name
(
self
):
return
"LSVM"
# Override DAP Methods
# --------------------
def
_set_training_data
(
self
):
"""Default implementation for classic and quite standard DAP implementation.
More complex implementation require overriding this method.
"""
self
.
X
=
self
.
experiment_data
.
training_data
self
.
y
=
self
.
experiment_data
.
targets
[
self
.
_target_prediction_name
]
def
_set_test_data
(
self
):
self
.
X_test
=
self
.
experiment_data
.
test_data
self
.
y_test
=
self
.
experiment_data
.
test_targets
[
self
.
_target_prediction_name
]
def
main
():
for
targetNB_partition
in
generate_dataset_partitions
(
n_partitions
=
100
,
test_size
=
.
25
):
print
(
'RUNNING ON DATASET {}'
.
format
(
targetNB_partition
.
dataset_name
.
upper
()))
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"-t"
,
"--target"
,
help
=
"Target Endpoint prediction"
,
type
=
str
,
choices
=
targetNB_partition
.
targets_names
)
args
=
parser
.
parse_args
()
# ============
# DAP Settings
# ============
from
dap
import
settings
as
dap_settings
dap_settings
.
to_categorical
=
False
dap_settings
.
feature_ranges
=
[
2
,
5
,
10
,
15
,
20
,
25
,
50
,
75
,
100
]
from
dap.scaling
import
MinMaxScaler
,
StandardScaler
dap_settings
.
feature_scaler
=
MinMaxScaler
(
feature_range
=
(
-
1
,
1
),
copy
=
False
)
# dap_settings.feature_scaler = StandardScaler(copy=False)
# ============================
# Model Settings (hyperparams)
# ============================
dap
=
SupportVectorMachineRunner
(
targetNB_partition
,
target_prediction
=
args
.
target
)
dap
.
hyper_params
.
C
=
1.0
dap
.
hyper_params
.
kernel
=
'linear'
trained_model
=
dap
.
run
(
verbose
=
True
)
dap
.
predict_on_test
(
trained_model
)
print
(
"Computation completed!"
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment