Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
e1268943
Commit
e1268943
authored
Oct 29, 2019
by
Alessia Marcolini
Browse files
Decide between Valieres or 80/20 split
parent
b5d5fd43
Changes
2
Hide whitespace changes
Inline
Side-by-side
trainingTstage.ipynb
View file @
e1268943
...
...
@@ -16,17 +16,9 @@
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/utente/bussola/networks_dami\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
...
...
@@ -45,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count":
2
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -70,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count":
3
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -81,14 +73,15 @@
},
{
"cell_type": "code",
"execution_count":
4
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
"DATASET_DIR = f\"/thunderdisk/HN/processed/bbox_
64_augmented
/\" #Not augmented but already 64**3 (for faster loading)\n",
"DATASET_DIR = f\"/thunderdisk/HN/processed/bbox_
fixed2_64
/\" #Not augmented but already 64**3 (for faster loading)\n",
"EXPERIMENT_DIR = f\"{PATH}/experiments\"\n",
"\n",
"PRETRAINED_MED3D_WEIGHTS = '/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth'"
"PRETRAINED_MED3D_WEIGHTS = '/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth'\n",
"PRETRAINED_T_STAGE = f'{EXPERIMENT_DIR}/Tstage_binary_augmented_noTx_branch_wise_20191028-104101/checkpoint_40.pth'"
]
},
{
...
...
@@ -100,7 +93,7 @@
},
{
"cell_type": "code",
"execution_count":
5
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -108,25 +101,28 @@
"\n",
"settings = {\n",
" 'model': CiompiDO,\n",
" 'batch_size':
16
,\n",
" 'lr': 1e-
4
,\n",
" 'epochs':
1
,\n",
" 'batch_size':
32
,\n",
" 'lr': 1e-
5
,\n",
" 'epochs':
500
,\n",
" 'optim': torch.optim.Adam,\n",
" 'K': 0.
2
,\n",
" 'K': 0.
5
,\n",
" 'n_classes': 2, #TSTAGE\n",
" 'seed': 1234,\n",
" 'dropout': 0.5,\n",
" 'split': '8020',\n",
" 'size': 64,\n",
" 'pretrained': '
branch-wise
',\n",
" 'pretrained': '',\n",
" }\n",
"\n",
"assert settings['pretrained'] in ['Med3D', 'branch-wise', '']\n",
"assert settings['split'] in ['valeries', '8020']\n",
"assert settings['pretrained'] in ['Med3D', 'branch-wise', 'T-stage', '']\n",
"\n",
"os.makedirs(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}', exist_ok=False)"
]
},
{
"cell_type": "code",
"execution_count":
6
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -139,6 +135,7 @@
"N_CLASSES = settings['n_classes']\n",
"SEED = settings['seed']\n",
"DROPOUT = settings['dropout']\n",
"SPLIT = settings['split']\n",
"SIZE = settings['size']\n",
"PRETRAINED = settings['pretrained']"
]
...
...
@@ -152,17 +149,9 @@
},
{
"cell_type": "code",
"execution_count":
7
,
"execution_count":
null
,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tensorboard folder: /home/utente/bussola/networks_dami/tb-runs/prova20191022-182638\n"
]
}
],
"outputs": [],
"source": [
"def new_run_log_dir(experiment_name): \n",
" log_dir = os.path.join(PATH, 'tb-runs') \n",
...
...
@@ -184,16 +173,9 @@
"### Data Handlers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Train-Test split 80/20 indexes using sklearn StratifiedShuffleSplit"
]
},
{
"cell_type": "code",
"execution_count":
8
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -203,14 +185,13 @@
},
{
"cell_type": "code",
"execution_count":
9
,
"execution_count":
null
,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(SEED)\n",
"\n",
"dataset = NumpyCSVDataset(DATASET_DIR , clinical_data , target_column, SIZE , mode='train')\n",
"idx_train, idx_test = train_test_indexes_patient_wise(dataset, test_size=0.2, seed=SEED, stratify=True)"
"dataset = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, seed=SEED)"
]
},
{
...
...
@@ -226,11 +207,28 @@
"metadata": {},
"outputs": [],
"source": [
"dataset_test = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='test', augmentation_function=augment_3D_HN)\n",
"dataset_test._indexes = idx_test\n",
"if SPLIT == 'valieres':\n",
" dataset_train = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='train', transforms=augment_3D_HN)\n",
" \n",
" # in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same\n",
" idx_train = [i for i, f in enumerate(dataset_train.get_files()) if f.split('-')[1] in ['CHUS', 'HGJ']]\n",
" dataset_train.indexes = np.array(idx_train)\n",
" \n",
" dataset_test = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='test', transforms=augment_3D_HN)\n",
" \n",
" # in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same\n",
" idx_test = [i for i, f in enumerate(dataset_test.get_files()) if f.split('-')[1] in ['HMR', 'CHUM']]\n",
" dataset_test.indexes = np.array(idx_test)\n",
" \n",
"\n",
"else:\n",
" idx_train, idx_test = train_test_indexes_patient_wise(dataset, test_size=K, stratify=True)\n",
" \n",
" dataset_test = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='test', transforms=augment_3D_HN)\n",
" dataset_test.indexes = np.array(idx_test)\n",
"\n",
"dataset_train = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='train',
augmentation_function
=augment_3D_HN)\n",
"dataset_train.
_
indexes = idx_train"
"
dataset_train = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='train',
transforms
=augment_3D_HN)\n",
"
dataset_train.indexes =
np.array(
idx_train
)\n
"
]
},
{
...
...
@@ -308,7 +306,7 @@
"metadata": {},
"outputs": [],
"source": [
"model = MODEL(n_classes=N_CLASSES, dropout=DROPOUT)\n",
"model = MODEL(n_classes=N_CLASSES,
n_channels=1, modality='CT',
dropout=DROPOUT)\n",
"\n",
"if multigpu:\n",
" model = nn.DataParallel(model.to(device))\n",
...
...
@@ -363,6 +361,19 @@
" if name_complete in pretrained_PT_dict.keys():\n",
" print(name)\n",
" model.PT_branch.state_dict()[name].copy_(pretrained_PT_dict[name_complete])\n",
"\n",
"elif PRETRAINED == 'T-stage':\n",
" pretrained_dict = torch.load(PRETRAINED_T_STAGE) \n",
" model_dict = model.state_dict()\n",
"\n",
" # discard layers not present in destination network or with different shape\n",
" pretrained_dict = {k: v for k, v in pretrained_dict.items() if\n",
" (k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape)}\n",
"\n",
" for name in model.state_dict().keys():\n",
" if name in pretrained_dict.keys():\n",
" #print(name)\n",
" model.state_dict()[name].copy_(pretrained_dict[name])\n",
" "
]
},
...
...
@@ -552,7 +563,7 @@
"# trues.append(label)\n",
" trues_tr.append(dictionary[label])\n",
" probs_tr.append(output.data.cpu().numpy())\n",
" filenames_tr.append(data['
sampl
e'])\n",
" filenames_tr.append(data['
filenam
e'])\n",
"\n",
"probs_tr = np.concatenate(probs_tr)\n",
"preds_tr = np.concatenate(preds_tr)\n",
...
...
@@ -599,7 +610,7 @@
" preds_ts.append(pred.data.cpu().numpy())\n",
" trues_ts.append(dictionary[label])\n",
" probs_ts.append(output.data.cpu().numpy())\n",
" filenames_ts.append(data['
sampl
e'])\n",
" filenames_ts.append(data['
filenam
e'])\n",
"\n",
"probs_ts = np.concatenate(probs_ts)\n",
"preds_ts = np.concatenate(preds_ts)\n",
...
...
@@ -735,6 +746,89 @@
"source": [
"torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/weights.pth')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"clinical = pd.read_csv('data/clinical_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clinical['patient'] = [('-').join((p.split('-')[0], p.split('-')[1])) for p in clinical['Patient #']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clinical.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"HGJ = clinical.loc[clinical['patient']=='HN-HGJ']\n",
"CHUS = clinical.loc[clinical['patient']=='HN-CHUS']\n",
"HMR = clinical.loc[clinical['patient']=='HN-HMR']\n",
"CHUM = clinical.loc[clinical['patient']=='HN-CHUM']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.mean(CHUM['Age']), np.std(CHUM['Age'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CHUM.groupby('Sex').count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CHUM.groupby('Locoregional').count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from mlpy import bootstrap_ci"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
...
...
%% Cell type:markdown id: tags:
## Training network for featture extraction
%% Cell type:markdown id: tags:
### Set Path
%% Cell type:code id: tags:
```
python
%
reload_ext
autoreload
%
autoreload
2
import
os
PATH
=
os
.
getcwd
()
print
(
PATH
)
```
%%%% Output: stream
/home/utente/bussola/networks_dami
%% Cell type:markdown id: tags:
### Import packages
%% Cell type:code id: tags:
```
python
import
datetime
import
gc
import
pickle
import
sys
import
time
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
sklearn.metrics
import
matthews_corrcoef
as
mcor
,
accuracy_score
as
acc
,
recall_score
as
recall
,
precision_score
as
precision
,
confusion_matrix
import
torch
import
torch.nn
as
nn
from
torch.utils.data
import
DataLoader
from
torch.utils.tensorboard
import
SummaryWriter
from
networks
import
CiompiDO
,
ResNet50_3d
from
dataset
import
NumpyCSVDataset
,
augment_3D_HN
from
split
import
train_test_indexes_patient_wise
```
%% Cell type:code id: tags:
```
python
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
multigpu
=
True
```
%% Cell type:code id: tags:
```
python
DATASET_DIR
=
f
"/thunderdisk/HN/processed/bbox_
64_augmented
/"
#Not augmented but already 64**3 (for faster loading)
DATASET_DIR
=
f
"/thunderdisk/HN/processed/bbox_
fixed2_64
/"
#Not augmented but already 64**3 (for faster loading)
EXPERIMENT_DIR
=
f
"
{
PATH
}
/experiments"
PRETRAINED_MED3D_WEIGHTS
=
'/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth'
PRETRAINED_T_STAGE
=
f
'
{
EXPERIMENT_DIR
}
/Tstage_binary_augmented_noTx_branch_wise_20191028-104101/checkpoint_40.pth'
```
%% Cell type:markdown id: tags:
### Settings
%% Cell type:code id: tags:
```
python
EXPERIMENT_NAME
=
'prova'
+
datetime
.
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
settings
=
{
'model'
:
CiompiDO
,
'batch_size'
:
16
,
'lr'
:
1e-
4
,
'epochs'
:
1
,
'batch_size'
:
32
,
'lr'
:
1e-
5
,
'epochs'
:
500
,
'optim'
:
torch
.
optim
.
Adam
,
'K'
:
0.
2
,
'K'
:
0.
5
,
'n_classes'
:
2
,
#TSTAGE
'seed'
:
1234
,
'dropout'
:
0.5
,
'split'
:
'8020'
,
'size'
:
64
,
'pretrained'
:
'
branch-wise
'
,
'pretrained'
:
''
,
}
assert
settings
[
'pretrained'
]
in
[
'Med3D'
,
'branch-wise'
,
''
]
assert
settings
[
'split'
]
in
[
'valeries'
,
'8020'
]
assert
settings
[
'pretrained'
]
in
[
'Med3D'
,
'branch-wise'
,
'T-stage'
,
''
]
os
.
makedirs
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
'
,
exist_ok
=
False
)
```
%% Cell type:code id: tags:
```
python
MODEL
=
settings
[
'model'
]
BATCH_SIZE
=
settings
[
'batch_size'
]
LR
=
settings
[
'lr'
]
EPOCHS
=
settings
[
'epochs'
]
OPTIMIZER
=
settings
[
'optim'
]
K
=
settings
[
'K'
]
N_CLASSES
=
settings
[
'n_classes'
]
SEED
=
settings
[
'seed'
]
DROPOUT
=
settings
[
'dropout'
]
SPLIT
=
settings
[
'split'
]
SIZE
=
settings
[
'size'
]
PRETRAINED
=
settings
[
'pretrained'
]
```
%% Cell type:markdown id: tags:
### Tensorboard settings
%% Cell type:code id: tags:
```
python
def
new_run_log_dir
(
experiment_name
):
log_dir
=
os
.
path
.
join
(
PATH
,
'tb-runs'
)
if
not
os
.
path
.
exists
(
log_dir
):
os
.
makedirs
(
log_dir
)
run_log_dir
=
os
.
path
.
join
(
log_dir
,
experiment_name
)
return
run_log_dir
log_dir
=
new_run_log_dir
(
EXPERIMENT_NAME
)
print
(
f
'Tensorboard folder:
{
log_dir
}
'
)
writer
=
SummaryWriter
(
log_dir
)
```
%%%% Output: stream
Tensorboard folder: /home/utente/bussola/networks_dami/tb-runs/prova20191022-182638
%% Cell type:markdown id: tags:
### Data Handlers
%% Cell type:markdown id: tags:
Train-Test split 80/20 indexes using sklearn StratifiedShuffleSplit
%% Cell type:code id: tags:
```
python
clinical_data
=
f
'
{
PATH
}
/data/clinical_data_noTx.csv'
target_column
=
'T-stage_binary'
```
%% Cell type:code id: tags:
```
python
np
.
random
.
seed
(
SEED
)
dataset
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'train'
)
idx_train
,
idx_test
=
train_test_indexes_patient_wise
(
dataset
,
test_size
=
0.2
,
seed
=
SEED
,
stratify
=
True
)
dataset
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
seed
=
SEED
)
```
%% Cell type:markdown id: tags:
Create train-test datasets
%% Cell type:code id: tags:
```
python
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'test'
,
augmentation_function
=
augment_3D_HN
)
dataset_test
.
_indexes
=
idx_test
if
SPLIT
==
'valieres'
:
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'train'
,
transforms
=
augment_3D_HN
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_train
=
[
i
for
i
,
f
in
enumerate
(
dataset_train
.
get_files
())
if
f
.
split
(
'-'
)[
1
]
in
[
'CHUS'
,
'HGJ'
]]
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'train'
,
augmentation_function
=
augment_3D_HN
)
dataset_train
.
_indexes
=
idx_train
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'test'
,
transforms
=
augment_3D_HN
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_test
=
[
i
for
i
,
f
in
enumerate
(
dataset_test
.
get_files
())
if
f
.
split
(
'-'
)[
1
]
in
[
'HMR'
,
'CHUM'
]]
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
else
:
idx_train
,
idx_test
=
train_test_indexes_patient_wise
(
dataset
,
test_size
=
K
,
stratify
=
True
)
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'test'
,
transforms
=
augment_3D_HN
)
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'train'
,
transforms
=
augment_3D_HN
)
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
```
%% Cell type:markdown id: tags:
Check class balance
%% Cell type:code id: tags:
```
python
labels_test
=
dataset_test
.
get_labels
()
labels_train
=
dataset_train
.
get_labels
()
c
,
n
=
np
.
unique
(
labels_test
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_test
)])
c
,
n
=
np
.
unique
(
labels_train
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_train
)])
```
%% Cell type:markdown id: tags:
Create loaders
%% Cell type:code id: tags:
```
python
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
BATCH_SIZE
//
2
,
num_workers
=
12
,
shuffle
=
True
)
loader_train
=
DataLoader
(
dataset_train
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
pin_memory
=
True
,
shuffle
=
True
)
```
%% Cell type:markdown id: tags:
Compute weights
%% Cell type:code id: tags:
```
python
labels
=
dataset_train
.
get_labels
()
#class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])
_
,
class_sample_count
=
np
.
unique
(
labels
,
return_counts
=
True
)
n_min
=
np
.
min
(
class_sample_count
)
weights
=
n_min
/
class_sample_count
# versione proporzionale, usare n_min invece che 1 per pesi ~1
weights
=
torch
.
Tensor
(
weights
).
to
(
device
)
```
%% Cell type:markdown id: tags:
### Initialize Model
%% Cell type:code id: tags:
```
python
model
=
MODEL
(
n_classes
=
N_CLASSES
,
dropout
=
DROPOUT
)
model
=
MODEL
(
n_classes
=
N_CLASSES
,
n_channels
=
1
,
modality
=
'CT'
,
dropout
=
DROPOUT
)
if
multigpu
:
model
=
nn
.
DataParallel
(
model
.
to
(
device
))
model
=
model
.
module
```
%% Cell type:code id: tags:
```
python
model
.
initialize_weights
()
if
PRETRAINED
==
'Med3D'
:
pretrained_dict
=
torch
.
load
(
PRETRAINED_MED3D_WEIGHTS
)[
'state_dict'
]
model_dict
=
model
.
state_dict
()
# discard layers not present in destination network or with different shape
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_dict
[
k
].
shape
)}
for
name
in
model
.
state_dict
().
keys
():
if
name
in
pretrained_dict
.
keys
():
#print(name)
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
elif
PRETRAINED
==
'branch-wise'
:
pretrained_CT_dict
=
torch
.
load
(
f
'
{
EXPERIMENT_DIR
}
/Tstage_grouped_noTx_CT_20191021-143133/weights.pth'
)
pretrained_PT_dict
=
torch
.
load
(
f
'
{
EXPERIMENT_DIR
}
/Tstage_binary_PET_noTx_20191022-124046/weights.pth'
)
model_dict
=
model
.
state_dict
()
pretrained_CT_dict
=
{
k
:
v
for
k
,
v
in
pretrained_CT_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_CT_dict
[
k
].
shape
)}
pretrained_PT_dict
=
{
k
:
v
for
k
,
v
in
pretrained_PT_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_PT_dict
[
k
].
shape
)}
to_add
=
'module.'
if
multigpu
else
''
for
name
in
model
.
CT_branch
.
state_dict
().
keys
():
name_complete
=
to_add
+
'CT_branch.'
+
name
#print(name_complete)
if
name_complete
in
pretrained_CT_dict
.
keys
():
print
(
name
)
model
.
CT_branch
.
state_dict
()[
name
].
copy_
(
pretrained_CT_dict
[
name_complete
])
for
name
in
model
.
PT_branch
.
state_dict
().
keys
():
name_complete
=
to_add
+
'PT_branch.'
+
name
#print(name_complete)
if
name_complete
in
pretrained_PT_dict
.
keys
():
print
(
name
)
model
.
PT_branch
.
state_dict
()[
name
].
copy_
(
pretrained_PT_dict
[
name_complete
])
elif
PRETRAINED
==
'T-stage'
:
pretrained_dict
=
torch
.
load
(
PRETRAINED_T_STAGE
)
model_dict
=
model
.
state_dict
()
# discard layers not present in destination network or with different shape
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_dict
[
k
].
shape
)}
for
name
in
model
.
state_dict
().
keys
():
if
name
in
pretrained_dict
.
keys
():
#print(name)
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
```
%% Cell type:markdown id: tags:
Optimizer
%% Cell type:code id: tags:
```
python
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
LR
)
```
%% Cell type:code id: tags:
```
python
#[x.shape for x in model.parameters()]
```
%% Cell type:markdown id: tags:
Loss
%% Cell type:code id: tags:
```
python
criterion
=
nn
.
CrossEntropyLoss
(
weight
=
weights
)
```
%% Cell type:code id: tags:
```
python
NEW_LABELS
=
list
(
range
(
len
(
list
(
np
.
unique
(
labels_train
)))))
dictionary
=
dict
(
zip
(
list
(
np
.
unique
(
labels_train
)),
NEW_LABELS
))
dictionary
```
%% Cell type:markdown id: tags:
### Train
%% Cell type:code id: tags:
```
python
model
.
train
()
# Set model to training mode
global_i
=
0
losses_tr
=
[]
losses_ts
=
[]
last_loss_test
=
-
1
iteration
=
0
start_time
=
time
.
time
()
for
epoch
in
range
(
EPOCHS
):
#print(epoch)
if
epoch
%
10
==
0
:
#save checkpoint
torch
.
save
(
model
.
state_dict
(),
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/checkpoint_
{
epoch
}
.pth'
)
for
j
,
data
in
enumerate
(
loader_train
):
global_i
+=
1
if
j
%
10
==
0
:
print
(
time
.
time
()
-
start_time
)
start_time
=
time
.
time
()
optimizer
.
zero_grad
()
images_tr
=
data
[
'data'
].
to
(
device
)
labels_tr
=
torch
.
LongTensor
([
dictionary
[
i
]
for
i
in
data
[
'target'
]]).
to
(
device
)
outputs_tr
=
model
(
images_tr
).
to
(
device
)
# backward
loss
=
criterion
(
outputs_tr
,
labels_tr
)
loss
.
backward
()
optimizer
.
step
()
# check test set
if
j
%
int
(
len
(
loader_train
)
/
2
)
==
0
and
j
!=
0
:
model
.
eval
()
with
torch
.
no_grad
():
losses_sum
=
0
num_samples_test
=
0
for
data_test
in
loader_test
:
images_ts
=
data_test
[
'data'
].
to
(
device
)
labels_ts
=
torch
.
LongTensor
([
dictionary
[
i
]
for
i
in
data_test
[
'target'
]]).
to
(
device
)
outputs_ts
=
model
.
forward
(
images_ts
)
loss_test_sum
=
criterion
(
outputs_ts
,
labels_ts
).
item
()
losses_sum
+=
loss_test_sum
num_samples_test
+=
1
loss_test_avg
=
losses_sum
/
num_samples_test
writer
.
add_scalar
(
f
'
{
EXPERIMENT_NAME
}
/test_loss'
,
loss_test_avg
,
global_i
)
writer
.
flush
()
#is_best = loss_val_avg < last_loss_val
#if is_best:
# torch.save(model.state_dict(),
# f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_best_{epoch}.pth')
last_loss_test
=
loss_test_avg
losses_tr
.
append
(
loss
.
item
())
losses_ts
.
append
(
loss_test_avg
)
del
images_ts
,
labels_ts
iteration
+=
1
del
images_tr
,
labels_tr
gc
.
collect
()
model
.
train
()
# sys.stdout.write
writer
.
add_scalar
(
f
'
{
EXPERIMENT_NAME
}
/train_loss'
,
loss
.
item
(),
global_i
)
writer
.
flush
()
sys
.
stdout
.
write
(
'
\r
Epoch {} of {} [{:.2f}%] - loss TR/TS: {:.4f} / {:.4f} - {}'
.
format
(
epoch
+
1
,
EPOCHS
,
100
*
j
/
len
(
loader_train
),
loss
.
item
(),
last_loss_test
,
optimizer
.
param_groups
[
0
][
'lr'
]))
```
%% Cell type:markdown id: tags:
### Predict on Train
%% Cell type:code id: tags:
```
python
model
.
eval
()
dataset_train
.
mode
=
'test'
#no augmentation
preds_tr
=
[]
trues_tr
=
[]
probs_tr
=
[]
filenames_tr
=
[]
with
torch
.
no_grad
():
for
data
in
dataset_train
:
image
=
data
[
"data"
].
unsqueeze
(
0
).
to
(
device
)
label
=
data
[
"target"
]
output
=
model
(
image
)
#forward
_
,
pred
=
torch
.
max
(
output
,
1
)
preds_tr
.
append
(
pred
.
data
.
cpu
().
numpy
())
# trues.append(label)
trues_tr
.
append
(
dictionary
[
label
])
probs_tr
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_tr
.
append
(
data
[
'
sampl
e'
])
filenames_tr
.
append
(
data
[
'
filenam
e'
])
probs_tr
=
np
.
concatenate
(
probs_tr
)
preds_tr
=
np
.
concatenate
(
preds_tr
)
trues_tr
=
np
.
array
(
trues_tr
)
filenames_tr
=
np
.
array
(
filenames_tr
)
MCC_tr
=
mcor
(
trues_tr
,
preds_tr
)
ACC_tr
=
acc
(
trues_tr
,
preds_tr
)
prec_tr
=
precision
(
trues_tr
,
preds_tr
,
average
=
'weighted'
)
rec_tr
=
recall
(
trues_tr
,
preds_tr
,
average
=
'weighted'
)
print
(
"MCC train"
,
MCC_tr
,
"ACC train"
,
ACC_tr
)
print
(
"precision train"
,
prec_tr
,
"recall train"
,
rec_tr
)
train_metrics
=
np
.
array
([
MCC_tr
,
ACC_tr
,
prec_tr
,
rec_tr
])
```
%% Cell type:markdown id: tags:
### Predict on Test
%% Cell type:code id: tags:
```
python
model
.
eval
()
preds_ts
=
[]
trues_ts
=
[]
probs_ts
=
[]
filenames_ts
=
[]
with
torch
.
no_grad
():
for
data
in
dataset_test
:
image
=
data
[
"data"
].
unsqueeze
(
0
).
to
(
device
)
label
=
data
[
"target"
]
output
=
model
(
image
)
#forward
_
,
pred
=
torch
.
max
(
output
,
1
)
preds_ts
.
append
(
pred
.
data
.
cpu
().
numpy
())
trues_ts
.
append
(
dictionary
[
label
])
probs_ts
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_ts
.
append
(
data
[
'
sampl
e'
])
filenames_ts
.
append
(
data
[
'
filenam
e'
])
probs_ts
=
np
.
concatenate
(
probs_ts
)
preds_ts
=
np
.
concatenate
(
preds_ts
)
trues_ts
=
np
.
array
(
trues_ts
)
filenames_ts
=
np
.
array
(
filenames_ts
)
MCC_ts
=
mcor
(
trues_ts
,
preds_ts
)
ACC_ts
=
acc
(
trues_ts
,
preds_ts
)
prec_ts
=
precision
(
trues_ts
,
preds_ts
,
average
=
'weighted'
)
rec_ts
=
recall
(
trues_ts
,
preds_ts
,
average
=
'weighted'
)
print
(
"MCC test"
,
MCC_ts
,
"ACC test"
,
ACC_ts
)
print
(
"precision test"
,
prec_ts
,
"recall test"
,
rec_ts
)
test_metrics
=
np
.
array
([
MCC_ts
,
ACC_ts
,
prec_ts
,
rec_ts
])
```
%% Cell type:markdown id: tags:
## Save results
%% Cell type:markdown id: tags:
Save settings
%% Cell type:code id: tags:
```
python
with
open
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/settings.pkl'
,
'wb'
)
as
f
:
pickle
.
dump
(
settings
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
```
%% Cell type:markdown id: tags:
Save losses
%% Cell type:code id: tags:
```
python
losses_tr
=
np
.
array
(
losses_tr
)
losses_vl
=
np
.
array
(
losses_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses_tr.npy'
,
losses_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses_ts.npy'
,
losses_vl
)
```
%% Cell type:markdown id: tags:
Plot losses
%% Cell type:code id: tags:
```
python
plt
.
figure
(
figsize
=
(
20
,
10
))
plt
.
plot
(
losses_tr
,
color
=
'blue'
)
plt
.
plot
(
losses_ts
,
color
=
'orange'
)
plt
.
legend
([
'train'
,
'valid'
])
plt
.
savefig
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses.png'
,
close
=
True
,
verbose
=
True
)
plt
.
close
()
```
%% Cell type:markdown id: tags:
Save predictions, ground truth, probabilities and filenames
%% Cell type:code id: tags:
```
python
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/preds_tr.npy'
,
preds_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/trues_tr.npy'
,
trues_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/probs_tr.npy'
,
probs_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/filenames_tr.npy'
,
filenames_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/preds_ts.npy'
,
preds_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/trues_ts.npy'
,
trues_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/probs_ts.npy'
,
probs_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/filenames_ts.npy'
,
filenames_ts
)
```
%% Cell type:markdown id: tags:
Save metrics
%% Cell type:code id: tags:
```
python
metrics_out
=
np
.
stack
([
train_metrics
,
test_metrics
],
0
)
np
.
savetxt
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/metrics_out.txt'
,
metrics_out
)
```
%% Cell type:markdown id: tags:
Save model weights
%% Cell type:code id: tags:
```
python
torch
.
save
(
model
.
state_dict
(),
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/weights.pth'
)
```
%% Cell type:code id: tags:
```
python
import
pandas
as
pd
clinical
=
pd
.
read_csv
(
'data/clinical_data.csv'
)
```
%% Cell type:code id: tags:
```
python
clinical
[
'patient'
]
=
[(
'-'
).
join
((
p
.
split
(
'-'
)[
0
],
p
.
split
(
'-'
)[
1
]))
for
p
in
clinical
[
'Patient #'
]]
```
%% Cell type:code id: tags:
```
python
clinical
.
head
()
```
%% Cell type:code id: tags:
```
python
HGJ
=
clinical
.
loc
[
clinical
[
'patient'
]
==
'HN-HGJ'
]
CHUS
=
clinical
.
loc
[
clinical
[
'patient'
]
==
'HN-CHUS'
]
HMR
=
clinical
.
loc
[
clinical
[
'patient'
]
==
'HN-HMR'
]
CHUM
=
clinical
.
loc
[
clinical
[
'patient'
]
==
'HN-CHUM'
]
```
%% Cell type:code id: tags:
```
python
np
.
mean
(
CHUM
[
'Age'
]),
np
.
std
(
CHUM
[
'Age'
])
```
%% Cell type:code id: tags:
```
python
CHUM
.
groupby
(
'Sex'
).
count
()
```
%% Cell type:code id: tags:
```
python
CHUM
.
groupby
(
'Locoregional'
).
count
()
```
%% Cell type:code id: tags:
```
python
from
mlpy
import
bootstrap_ci
```
%% Cell type:code id: tags:
```
python
```
...
...
trainingTstage.py
View file @
e1268943
#!/usr/bin/env python
# coding: utf-8
# ### NOTEBOOK INFO
# Train on T-stage
#