Commit 579e65a9 authored by Valerio Maggio's avatar Valerio Maggio
Browse files

further experimental scripts

parent 260c416e
#!/usr/bin/env python3
"""
Random Forest DAP Runner on Deep Features (OS-32 Layer)
as resulting from CDRP-N+A Multi-task Network for SEQC-NB
Dataset
"""
import os
from dap.runners import RandomForestRunnerDAP
from dataset import load_nb_camda, OS_LAB
class RandomForestDeepFeaturesDAP(RandomForestRunnerDAP):
"""
RandomForest DAP Runner
"""
def __init__(self, experiment):
""""""
self._target_prediction_name = OS_LAB
super(RandomForestDeepFeaturesDAP, self).__init__(experiment=experiment)
self.experiment_data.nb_classes = experiment.nb_classes_targets[self._target_prediction_name]
@property
def results_folder(self):
"""
Compose path to the folder where reports and metrics will be saved.
"""
base_foldername = self._target_prediction_name.lower()
folder_name = '_'.join([self.ml_model_name, str(self._hyper_params['n_estimators']),
self._hyper_params['criterion'], self._hyper_params['max_features'],
self.feature_scaling_name, self.feature_ranking_name,
str(self.cv_n), str(self.cv_k)])
ds_name = self.experiment_data.dataset_name \
if 'dataset_name' in self.experiment_data.keys() else self.experiment_data.DESCR
output_folder_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'results', ds_name, base_foldername, folder_name)
os.makedirs(output_folder_path, exist_ok=True)
return output_folder_path
@property
def ml_model_name(self):
return "RandomForest"
# Override DAP Methods
# --------------------
def _set_training_data(self):
"""Default implementation for classic and quite standard DAP implementation.
More complex implementation require overriding this method.
"""
self.X = self.experiment_data.training_data
self.y = self.experiment_data.targets[self._target_prediction_name]
def _set_test_data(self):
self.X_test = self.experiment_data.test_data
self.y_test = self.experiment_data.test_targets[self._target_prediction_name]
def main():
# Load Dataset
# ============
training_data_fpath = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'data', '..', 'OS32_SEQC', 'SEQC2_OS_32_training.csv')
test_data_fpath = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'data', '..', 'OS32_SEQC', 'SEQC2_OS_32_test.csv')
dataset = load_nb_camda(dataset_name='SEQC2_OS32_HRONLY',
training_data_fpath=training_data_fpath,
test_data_fpath=test_data_fpath, hr_only=True)
print('RUNNING ON DATASET {}'.format(dataset.dataset_name.upper()))
# ============
# DAP Settings
# ============
from dap import settings as dap_settings
dap_settings.to_categorical = False
dap_settings.feature_ranges = [75, 90, 100]
from dap.scaling import MinMaxScaler
dap_settings.feature_scaler = MinMaxScaler(feature_range=(-1, 1), copy=False)
# ============================
# Model Settings (hyperparams)
# ============================
dap = RandomForestDeepFeaturesDAP(dataset)
dap.hyper_params.criterion = 'entropy'
dap.hyper_params.n_estimators = 500
dap.hyper_params.max_features = 'log2'
trained_model = dap.run(verbose=True)
dap.predict_on_test(trained_model)
print("Computation completed!")
if __name__ == '__main__':
main()
"""
Script to generate Random train/test split for the TARGET-NB Dataset
"""
import os
from dap.runners import SupportVectorRunnerDAP
from dataset import generate_dataset_partitions
import argparse
class SupportVectorMachineRunner(SupportVectorRunnerDAP):
"""
SVM DAP Runner
"""
def __init__(self, experiment, target_prediction):
""""""
# One of: 'HR', 'EFS', 'OS'
self._target_prediction_name = target_prediction
super(SupportVectorMachineRunner, self).__init__(experiment=experiment)
self.experiment_data.nb_classes = experiment.nb_classes_targets[self._target_prediction_name]
@property
def results_folder(self):
"""
Compose path to the folder where reports and metrics will be saved.
"""
base_foldername = self._target_prediction_name.lower()
folder_name = '_'.join([self.ml_model_name,
self._hyper_params['kernel'], str(self._hyper_params['C']),
self.feature_scaling_name, self.feature_ranking_name,
str(self.cv_n), str(self.cv_k)])
ds_name = self.experiment_data.dataset_name \
if 'dataset_name' in self.experiment_data.keys() else self.experiment_data.DESCR
output_folder_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'out_multi_split_25', ds_name, base_foldername, folder_name)
os.makedirs(output_folder_path, exist_ok=True)
return output_folder_path
@property
def ml_model_name(self):
return "LSVM"
# Override DAP Methods
# --------------------
def _set_training_data(self):
"""Default implementation for classic and quite standard DAP implementation.
More complex implementation require overriding this method.
"""
self.X = self.experiment_data.training_data
self.y = self.experiment_data.targets[self._target_prediction_name]
def _set_test_data(self):
self.X_test = self.experiment_data.test_data
self.y_test = self.experiment_data.test_targets[self._target_prediction_name]
def main():
for targetNB_partition in generate_dataset_partitions(n_partitions=100, test_size=.25):
print('RUNNING ON DATASET {}'.format(targetNB_partition.dataset_name.upper()))
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--target", help="Target Endpoint prediction",
type=str, choices=targetNB_partition.targets_names)
args = parser.parse_args()
# ============
# DAP Settings
# ============
from dap import settings as dap_settings
dap_settings.to_categorical = False
dap_settings.feature_ranges = [2, 5, 10, 15, 20, 25, 50, 75, 100]
from dap.scaling import MinMaxScaler, StandardScaler
dap_settings.feature_scaler = MinMaxScaler(feature_range=(-1, 1), copy=False)
# dap_settings.feature_scaler = StandardScaler(copy=False)
# ============================
# Model Settings (hyperparams)
# ============================
dap = SupportVectorMachineRunner(targetNB_partition, target_prediction=args.target)
dap.hyper_params.C = 1.0
dap.hyper_params.kernel = 'linear'
trained_model = dap.run(verbose=True)
dap.predict_on_test(trained_model)
print("Computation completed!")
if __name__ == '__main__':
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment