Commit 1b3a50b9 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Start transition from ipynb to py + SET SEED IN DAP

parent fdc265ae
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%HN_env"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"PATH = os.path.abspath(os.path.curdir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# from dap_devel.dap import dap\n",
"import dap\n",
"from dap.datasets.featuredatasets import FeatureLabelCSVDataset\n",
"from dap.datasets.featuredatasets import CSVDataset\n",
"from dap.metrics import BinaryMetrics, MulticlassMetrics\n",
"from dap.models.featuresmodel import DAPSVMClassifier, DAPRandomForestClassifier\n",
"# from dap.models.sklearnmodel import SklearnModel\n",
"from sklearn import svm\n",
"\n",
"from dap.crossval import kfold_split\n",
"from dap.ranking import randomforest_ranking, kbest_ranking\n",
"# from dap_devel.dap.dap import DAP"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"SEED = 1234\n",
"LAB_COL = 'Locoregional'\n",
"FEATURESET = 'features_noTx_F_SVC_Locoregional'#'radiomics_features_F_SVC_Locoregional'#'features_LR' #radiomics_features_F_SVC_Locoregional\n",
"\n",
"#%%\n",
"LABELS_FILE = f'{PATH}/data/clinical_data.csv'\n",
"NETWORK = 'LR_noTx_branch_wise_free_aug_20191027-003918'\n",
"\n",
"FEATUREDIR = f'{PATH}/experiments/{NETWORK}/features' #f'{PATH}/data/' # ## \n",
"OUT_DIR = f'{PATH}/predictions'\n",
"\n",
"FEATURE_FILE = f'{FEATUREDIR}/{FEATURESET}.csv'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = FeatureLabelCSVDataset(FEATURE_FILE, LABELS_FILE, LAB_COL, problem='binary', name=FEATURESET)\n",
"\n",
"#%%\n",
"metrics = BinaryMetrics(reference='MCC')\n",
"\n",
"dap_settings = {'stratified_test': True,\n",
" 'ratio_test': 0.2,\n",
" 'stratified_valid': True,\n",
" 'ratio_valid': 0.2,\n",
" 'fold_method': kfold_split,\n",
" 'cv_n': 10}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LSVM"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = DAPSVMClassifier({'rank_features': False, 'scale_features': True, 'kernel':'linear', 'random_labels': False}, name='DAPSVMClassifier')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DAP = dap.DAP(dap_settings, dataset, model, metrics, name='kfold')\n",
"\n",
"n_feat = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]\n",
"parameters = {'C':[0.001, 0.01, 0.1, 1, 10, 100, 1000], 'n_feat': n_feat} # [0.001, 0.01, 0.1, 1, 10, 100, 1000]\n",
"param_grid = dap.dap.create_param_grid(parameters)\n",
"\n",
"DAP.fit(param_grid)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DAP.predict_on_test()\n",
"DAP.save(OUT_DIR)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Random Forest"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = DAPRandomForestClassifier({'rank_features': False, 'scale_features': True, 'n_estimators': 500, 'random_labels': False}, name='DAPRFClassifier')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DAP = dap.DAP(dap_settings, dataset, model, metrics, name='kfold')\n",
"\n",
"\n",
"#%%\n",
"n_feat = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]\n",
"parameters = {'n_feat': n_feat} # [0.001, 0.01, 0.1, 1, 10, 100, 1000]\n",
"param_grid = dap.dap.create_param_grid(parameters)\n",
"\n",
"DAP.fit(param_grid)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DAP.predict_on_test()\n",
"DAP.save(OUT_DIR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (dappertf)",
"language": "python",
"name": "dappertf"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
``` python
%HN_env
```
%% Cell type:code id: tags:
``` python
import os
PATH = os.path.abspath(os.path.curdir)
```
%% Cell type:code id: tags:
``` python
# from dap_devel.dap import dap
import dap
from dap.datasets.featuredatasets import FeatureLabelCSVDataset
from dap.datasets.featuredatasets import CSVDataset
from dap.metrics import BinaryMetrics, MulticlassMetrics
from dap.models.featuresmodel import DAPSVMClassifier, DAPRandomForestClassifier
# from dap.models.sklearnmodel import SklearnModel
from sklearn import svm
from dap.crossval import kfold_split
from dap.ranking import randomforest_ranking, kbest_ranking
# from dap_devel.dap.dap import DAP
```
%% Cell type:code id: tags:
``` python
SEED = 1234
LAB_COL = 'Locoregional'
FEATURESET = 'features_noTx_F_SVC_Locoregional'#'radiomics_features_F_SVC_Locoregional'#'features_LR' #radiomics_features_F_SVC_Locoregional
#%%
LABELS_FILE = f'{PATH}/data/clinical_data.csv'
NETWORK = 'LR_noTx_branch_wise_free_aug_20191027-003918'
FEATUREDIR = f'{PATH}/experiments/{NETWORK}/features' #f'{PATH}/data/' # ##
OUT_DIR = f'{PATH}/predictions'
FEATURE_FILE = f'{FEATUREDIR}/{FEATURESET}.csv'
```
%% Cell type:code id: tags:
``` python
dataset = FeatureLabelCSVDataset(FEATURE_FILE, LABELS_FILE, LAB_COL, problem='binary', name=FEATURESET)
#%%
metrics = BinaryMetrics(reference='MCC')
dap_settings = {'stratified_test': True,
'ratio_test': 0.2,
'stratified_valid': True,
'ratio_valid': 0.2,
'fold_method': kfold_split,
'cv_n': 10}
```
%% Cell type:markdown id: tags:
## LSVM
%% Cell type:code id: tags:
``` python
model = DAPSVMClassifier({'rank_features': False, 'scale_features': True, 'kernel':'linear', 'random_labels': False}, name='DAPSVMClassifier')
```
%% Cell type:code id: tags:
``` python
DAP = dap.DAP(dap_settings, dataset, model, metrics, name='kfold')
n_feat = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]
parameters = {'C':[0.001, 0.01, 0.1, 1, 10, 100, 1000], 'n_feat': n_feat} # [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dap.dap.create_param_grid(parameters)
DAP.fit(param_grid)
```
%% Cell type:code id: tags:
``` python
DAP.predict_on_test()
DAP.save(OUT_DIR)
```
%% Cell type:markdown id: tags:
## Random Forest
%% Cell type:code id: tags:
``` python
model = DAPRandomForestClassifier({'rank_features': False, 'scale_features': True, 'n_estimators': 500, 'random_labels': False}, name='DAPRFClassifier')
```
%% Cell type:code id: tags:
``` python
DAP = dap.DAP(dap_settings, dataset, model, metrics, name='kfold')
#%%
n_feat = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]
parameters = {'n_feat': n_feat} # [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dap.dap.create_param_grid(parameters)
DAP.fit(param_grid)
```
%% Cell type:code id: tags:
``` python
DAP.predict_on_test()
DAP.save(OUT_DIR)
```
%% Cell type:code id: tags:
``` python
```
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
#from IPython import get_ipython
# %%
#get_ipython().run_line_magic('HN_env', '')
# %%
import os
PATH = os.path.abspath(os.path.curdir)
# %%
# from dap_devel.dap import dap
import warnings
warnings.filterwarnings("ignore")
import dap
from dap.datasets.featuredatasets import FeatureLabelCSVDataset
from dap.datasets.featuredatasets import CSVDataset
from dap.metrics import BinaryMetrics, MulticlassMetrics
from dap.models.featuresmodel import DAPSVMClassifier, DAPRandomForestClassifier
# from dap.models.sklearnmodel import SklearnModel
from sklearn import svm
from dap.crossval import kfold_split
from dap.ranking import randomforest_ranking, kbest_ranking
# from dap_devel.dap.dap import DAP
# %%
SEED = 1234
LAB_COL = 'Locoregional'
FEATURESET = 'radiomics_features_CT_F_SVC_Locoregional' #'features_LR' #radiomics_features_F_SVC_Locoregional
#%%
LABELS_FILE = f'{PATH}/data/clinical_data.csv'
NETWORK = 'LR_noTx_branch_wise_free_aug_CT_20191027-124913'
FEATUREDIR = f'{PATH}/data/' #f'{PATH}/experiments/{NETWORK}/features' # # ##
OUT_DIR = f'{PATH}/predictions/{FEATURESET}'
FEATURE_FILE_DEEP = f'{FEATUREDIR}/{FEATURESET}.csv'
FEATURE_FILE_RADIOMICS = f'{FEATUREDIR}/{FEATURESET}.csv'
# %%
dataset = FeatureLabelCSVDataset(FEATURE_FILE_RADIOMICS, LABELS_FILE, LAB_COL, problem='binary', name=FEATURESET)
#%%
metrics = BinaryMetrics(reference='MCC')
dap_settings = {'stratified_test': True,
'ratio_test': 0.2,
'stratified_valid': True,
'ratio_valid': 0.2,
'fold_method': kfold_split,
'cv_n': 10}
# %% [markdown]
# ## LSVM
# %%
model = DAPSVMClassifier({'rank_features': False, 'scale_features': True, 'kernel':'linear', 'random_labels': False}, name='DAPSVMClassifier')
# %%
DAP = dap.DAP(dap_settings, dataset, model, metrics, name='kfold', seed=SEED)
n_feat = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]
parameters = {'C':[0.001, 0.01, 0.1, 1, 10, 100, 1000], 'n_feat': n_feat} # [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dap.dap.create_param_grid(parameters)
DAP.fit(param_grid)
# %%
DAP.predict_on_test()
DAP.save(OUT_DIR)
# %% [markdown]
# ## Random Forest
# %%
model = DAPRandomForestClassifier({'rank_features': False, 'scale_features': True, 'n_estimators': 500, 'random_labels': False}, name='DAPRFClassifier')
# %%
DAP = dap.DAP(dap_settings, dataset, model, metrics, name='kfold')
#%%
n_feat = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1]
parameters = {'n_feat': n_feat} # [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dap.dap.create_param_grid(parameters)
DAP.fit(param_grid)
# %%
DAP.predict_on_test()
DAP.save(OUT_DIR)
# %%
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment