Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
456b7009
Commit
456b7009
authored
Mar 11, 2020
by
Alessia Marcolini
Browse files
Use Path everywhere and update according to new dataset structure
parent
4b5d8d22
Changes
1
Hide whitespace changes
Inline
Side-by-side
training.py
View file @
456b7009
#!/usr/bin/env python
#%% [markdown]
# coding: utf-8
# ## Training network for feature extraction
# %%
# ## Training network for featture extraction
# ### Set Path
# In[ ]:
# get_ipython().run_line_magic('reload_ext', 'autoreload')
# get_ipython().run_line_magic('autoreload', '2')
import
os
PATH
=
os
.
getcwd
()
print
(
PATH
)
# ### Import packages
# In[ ]:
import
datetime
import
datetime
import
gc
import
gc
import
os
import
pickle
import
pickle
import
sys
import
sys
import
time
import
time
from
pathlib
import
Path
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
from
sklearn.metrics
import
(
matthews_corrcoef
as
mcor
,
accuracy_score
as
acc
,
recall_score
as
recall
,
precision_score
as
precision
,
confusion_matrix
,
)
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
sklearn.metrics
import
accuracy_score
as
acc
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
matthews_corrcoef
as
mcor
from
sklearn.metrics
import
precision_score
as
precision
from
sklearn.metrics
import
recall_score
as
recall
from
torch.utils.data
import
DataLoader
from
torch.utils.data
import
DataLoader
from
torch.utils.tensorboard
import
SummaryWriter
from
torch.utils.tensorboard
import
SummaryWriter
from
networks
import
CiompiDO
,
ResNet50_3d
from
dataset
import
NumpyCSVDataset
,
augment_3D_HN
from
dataset
import
NumpyCSVDataset
,
augment_3D_HN
from
networks
import
CiompiDO
,
ResNet50_3d
from
split
import
train_test_indexes_patient_wise
from
split
import
train_test_indexes_patient_wise
PATH
=
Path
(
os
.
getcwd
())
# In[ ]:
print
(
PATH
)
#%%
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
multigpu
=
True
multigpu
=
True
# In[ ]:
# In[ ]:
DATASET
=
'HN_val'
BBOX_SUBDATASET
=
'bbox_64'
DATASET_DIR
=
PATH
/
'data'
/
DATASET
/
'processed'
/
'bbox'
/
BBOX_SUBDATASET
EXPERIMENT_DIR
=
PATH
/
'experiment'
DATASET_DIR
=
(
PRETRAINED_MED3D_WEIGHTS
=
PATH
/
'pretrained_weights'
/
'resnet_50.pth'
f
"/thunderdisk/HN/processed/bbox_fixed2_64"
PRETRAINED_T_STAGE
=
EXPERIMENT_DIR
/
'Tstage_4_noTx_CT_20191114-163418'
/
'weights.pth'
)
# Not augmented but already 64**3 (for faster loading)
# %%
EXPERIMENT_DIR
=
f
"
{
PATH
}
/experiments"
### Settings
EXPERIMENT_NAME
=
"prova"
+
datetime
.
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
PRETRAINED_MED3D_WEIGHTS
=
(
"/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth"
)
PRETRAINED_T_STAGE
=
f
"
{
EXPERIMENT_DIR
}
/Tstage_4_noTx_CT_20191114-163418/weights.pth"
# ### Settings
# In[ ]:
EXPERIMENT_NAME
=
"prova"
+
datetime
.
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
settings
=
{
settings
=
{
"model"
:
CiompiDO
,
"model"
:
CiompiDO
,
...
@@ -94,15 +61,12 @@ settings = {
...
@@ -94,15 +61,12 @@ settings = {
"pretrained"
:
""
,
"pretrained"
:
""
,
}
}
assert
settings
[
"split"
]
in
[
"valieres"
,
"8020"
]
assert
settings
[
"split"
]
in
[
"vallieres"
,
"8020"
]
assert
not
settings
[
"splits"
]
==
"vallieres"
or
DATASET
==
'HN_val'
assert
settings
[
"pretrained"
]
in
[
"Med3D"
,
"branch-wise"
,
"T-stage"
,
""
]
assert
settings
[
"pretrained"
]
in
[
"Med3D"
,
"branch-wise"
,
"T-stage"
,
""
]
os
.
makedirs
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
"
,
exist_ok
=
False
)
os
.
makedirs
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
,
exist_ok
=
False
)
# %%
# In[ ]:
MODEL
=
settings
[
"model"
]
MODEL
=
settings
[
"model"
]
BATCH_SIZE
=
settings
[
"batch_size"
]
BATCH_SIZE
=
settings
[
"batch_size"
]
LR
=
settings
[
"lr"
]
LR
=
settings
[
"lr"
]
...
@@ -117,16 +81,15 @@ SIZE = settings["size"]
...
@@ -117,16 +81,15 @@ SIZE = settings["size"]
PRETRAINED
=
settings
[
"pretrained"
]
PRETRAINED
=
settings
[
"pretrained"
]
# %%
# ### Tensorboard settings
# ### Tensorboard settings
# In[ ]:
def
new_run_log_dir
(
experiment_name
):
def
new_run_log_dir
(
experiment_name
):
log_dir
=
os
.
path
.
join
(
PATH
,
"tb-runs"
)
log_dir
=
PATH
/
"tb-runs"
if
not
os
.
path
.
exists
(
log_dir
):
if
not
os
.
path
.
exists
(
log_dir
):
os
.
makedirs
(
log_dir
)
os
.
makedirs
(
log_dir
)
run_log_dir
=
os
.
path
.
join
(
log_dir
,
experiment_name
)
run_log_dir
=
log_dir
/
experiment_name
return
run_log_dir
return
run_log_dir
...
@@ -134,152 +97,93 @@ log_dir = new_run_log_dir(EXPERIMENT_NAME)
...
@@ -134,152 +97,93 @@ log_dir = new_run_log_dir(EXPERIMENT_NAME)
print
(
f
"Tensorboard folder:
{
log_dir
}
"
)
print
(
f
"Tensorboard folder:
{
log_dir
}
"
)
writer
=
SummaryWriter
(
log_dir
)
writer
=
SummaryWriter
(
log_dir
)
# %%
# ### Data Handlers
# ### Data Handlers
# In[ ]:
clinical_file
=
PATH
/
'data'
/
DATASET
/
'processed'
/
f
'clinical_
{
DATASET
}
.csv'
clinical_data
=
f
"
{
PATH
}
/data/clinical_data_noTx.csv"
target_column
=
"T-stage_grouped"
target_column
=
"T-stage_grouped"
# %%
# In[ ]:
np
.
random
.
seed
(
SEED
)
np
.
random
.
seed
(
SEED
)
dataset
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
seed
=
SEED
)
dataset_train
=
NumpyCSVDataset
(
data_dir
=
DATASET_DIR
,
clinical_file
=
clinical_data
,
label_col
=
target_column
,
size
=
SIZE
,
mode
=
'train'
,
seed
=
SEED
,
)
dataset_test
=
NumpyCSVDataset
(
data_dir
=
DATASET_DIR
,
clinical_file
=
clinical_data
,
label_col
=
target_column
,
size
=
SIZE
,
mode
=
'test'
,
seed
=
SEED
,
)
# %%
# Create train-test datasets
# Create train-test datasets
if
SPLIT
==
"vallieres"
:
# In[ ]:
if
SPLIT
==
"valieres"
:
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"train"
,
transforms
=
augment_3D_HN
,
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_train
=
[
idx_train
=
[
i
i
for
i
,
f
in
enumerate
(
dataset_train
.
get_files
()
)
for
i
,
f
in
enumerate
(
dataset_train
.
patients
)
if
f
.
split
(
"-"
)[
1
]
in
[
"CHUS"
,
"HGJ"
]
if
f
.
split
(
"-"
)[
1
]
in
[
"CHUS"
,
"HGJ"
]
]
]
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"test"
,
transforms
=
augment_3D_HN
,
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_test
=
[
idx_test
=
[
i
i
for
i
,
f
in
enumerate
(
dataset_test
.
get_files
()
)
for
i
,
f
in
enumerate
(
dataset_test
.
patients
)
if
f
.
split
(
"-"
)[
1
]
in
[
"HMR"
,
"CHUM"
]
if
f
.
split
(
"-"
)[
1
]
in
[
"HMR"
,
"CHUM"
]
]
]
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
else
:
else
:
idx_train
,
idx_test
=
train_test_indexes_patient_wise
(
idx_train
,
idx_test
=
train_test_indexes_patient_wise
(
dataset
,
test_size
=
K
,
stratify
=
True
dataset
_train
,
test_size
=
K
,
stratify
=
True
)
)
dataset_test
=
NumpyCSVDataset
(
dataset_train
.
indices
=
np
.
array
(
idx_train
)
DATASET_DIR
,
dataset_test
.
indices
=
np
.
array
(
idx_test
)
clinical_data
,
# %%
target_column
,
SIZE
,
mode
=
"test"
,
transforms
=
augment_3D_HN
,
)
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"train"
,
transforms
=
augment_3D_HN
,
)
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
# Check class balance
# Check class balance
labels_train
=
dataset_train
.
labels
# In[ ]:
labels_test
=
dataset_test
.
labels
labels_test
=
dataset_test
.
get_labels
()
labels_train
=
dataset_train
.
get_labels
()
c
,
n
=
np
.
unique
(
labels_test
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_test
)])
c
,
n
=
np
.
unique
(
labels_train
,
return_counts
=
True
)
c
,
n
=
np
.
unique
(
labels_train
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_train
)])
print
(
np
.
c_
[
c
,
n
/
len
(
labels_train
)])
c
,
n
=
np
.
unique
(
labels_test
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_test
)])
# %%
# Create loaders
# Create loaders
# In[ ]:
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
BATCH_SIZE
//
2
,
num_workers
=
12
,
shuffle
=
True
)
loader_train
=
DataLoader
(
loader_train
=
DataLoader
(
dataset_train
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
pin_memory
=
True
,
shuffle
=
True
dataset_train
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
pin_memory
=
True
,
shuffle
=
True
)
)
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
shuffle
=
False
)
# %%
# Compute weights
# Compute weights
labels_train
=
dataset_train
.
labels
# In[ ]:
labels
=
dataset_train
.
get_labels
()
# class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])
# class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])
_
,
class_sample_count
=
np
.
unique
(
labels
,
return_counts
=
True
)
_
,
class_sample_count
=
np
.
unique
(
labels
_train
,
return_counts
=
True
)
n_min
=
np
.
min
(
class_sample_count
)
n_min
=
np
.
min
(
class_sample_count
)
weights
=
(
weights
=
(
n_min
/
class_sample_count
n_min
/
class_sample_count
)
# versione proporzionale, usare n_min invece che 1 per pesi ~1
)
# versione proporzionale, usare n_min invece che 1 per pesi ~1
weights
=
torch
.
Tensor
(
weights
).
to
(
device
)
weights
=
torch
.
Tensor
(
weights
).
to
(
device
)
# %%
# ### Initialize Model
# ### Initialize Model
# In[ ]:
model
=
MODEL
(
n_classes
=
N_CLASSES
,
n_channels
=
2
,
modality
=
"CT/PET"
,
dropout
=
DROPOUT
)
model
=
MODEL
(
n_classes
=
N_CLASSES
,
n_channels
=
2
,
modality
=
"CT/PET"
,
dropout
=
DROPOUT
)
if
multigpu
:
if
multigpu
:
model
=
nn
.
DataParallel
(
model
.
to
(
device
))
model
=
nn
.
DataParallel
(
model
.
to
(
device
))
model
=
model
.
module
model
=
model
.
module
# %%
# model.initialize_weights()
# In[ ]:
#model.initialize_weights()
if
PRETRAINED
==
"Med3D"
:
if
PRETRAINED
==
"Med3D"
:
pretrained_dict
=
torch
.
load
(
PRETRAINED_MED3D_WEIGHTS
)[
"state_dict"
]
pretrained_dict
=
torch
.
load
(
PRETRAINED_MED3D_WEIGHTS
)[
"state_dict"
]
...
@@ -299,10 +203,14 @@ if PRETRAINED == "Med3D":
...
@@ -299,10 +203,14 @@ if PRETRAINED == "Med3D":
elif
PRETRAINED
==
"branch-wise"
:
elif
PRETRAINED
==
"branch-wise"
:
pretrained_CT_dict
=
torch
.
load
(
pretrained_CT_dict
=
torch
.
load
(
f
"
{
EXPERIMENT_DIR
}
/Tstage_grouped_noTx_CT_valieres_20191029-173736/checkpoint_290.pth"
EXPERIMENT_DIR
/
'Tstage_grouped_noTx_CT_valieres_20191029-173736'
/
'checkpoint_290.pth'
)
)
pretrained_PT_dict
=
torch
.
load
(
pretrained_PT_dict
=
torch
.
load
(
f
"
{
EXPERIMENT_DIR
}
/Tstage_grouped_noTx_PET_valieres_20191029-195338/checkpoint_290.pth"
EXPERIMENT_DIR
/
'Tstage_grouped_noTx_PET_valieres_20191029-195338'
/
'checkpoint_290.pth'
)
)
model_dict
=
model
.
state_dict
()
model_dict
=
model
.
state_dict
()
...
@@ -351,42 +259,13 @@ elif PRETRAINED == "T-stage":
...
@@ -351,42 +259,13 @@ elif PRETRAINED == "T-stage":
# print(name)
# print(name)
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
# %%
# Optimizer
# Optimizer and criterion
optimizer
=
OPTIMIZER
(
model
.
parameters
(),
lr
=
LR
)
# In[ ]:
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
LR
)
# In[ ]:
# [x.shape for x in model.parameters()]
# Loss
# In[ ]:
criterion
=
nn
.
CrossEntropyLoss
(
weight
=
weights
)
criterion
=
nn
.
CrossEntropyLoss
(
weight
=
weights
)
# %%
# In[ ]:
NEW_LABELS
=
list
(
range
(
len
(
list
(
np
.
unique
(
labels_train
)))))
dictionary
=
dict
(
zip
(
list
(
np
.
unique
(
labels_train
)),
NEW_LABELS
))
dictionary
# ### Train
# ### Train
# In[ ]:
model
.
train
()
# Set model to training mode
model
.
train
()
# Set model to training mode
global_i
=
0
global_i
=
0
...
@@ -403,7 +282,7 @@ for epoch in range(EPOCHS):
...
@@ -403,7 +282,7 @@ for epoch in range(EPOCHS):
if
epoch
%
10
==
0
:
# save checkpoint
if
epoch
%
10
==
0
:
# save checkpoint
torch
.
save
(
torch
.
save
(
model
.
state_dict
(),
model
.
state_dict
(),
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/
checkpoint_
{
epoch
}
.pth
"
,
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
f
'
checkpoint_
{
epoch
}
.pth
'
,
)
)
for
j
,
data
in
enumerate
(
loader_train
):
for
j
,
data
in
enumerate
(
loader_train
):
...
@@ -416,7 +295,7 @@ for epoch in range(EPOCHS):
...
@@ -416,7 +295,7 @@ for epoch in range(EPOCHS):
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
images_tr
=
data
[
"data"
].
to
(
device
)
images_tr
=
data
[
"data"
].
to
(
device
)
labels_tr
=
torch
.
LongTensor
(
[
dictionary
[
i
]
for
i
in
data
[
"target"
]
]
).
to
(
device
)
labels_tr
=
torch
.
LongTensor
(
data
[
"target"
]).
to
(
device
)
outputs_tr
=
model
(
images_tr
).
to
(
device
)
outputs_tr
=
model
(
images_tr
).
to
(
device
)
# backward
# backward
...
@@ -436,9 +315,7 @@ for epoch in range(EPOCHS):
...
@@ -436,9 +315,7 @@ for epoch in range(EPOCHS):
for
data_test
in
loader_test
:
for
data_test
in
loader_test
:
images_ts
=
data_test
[
"data"
].
to
(
device
)
images_ts
=
data_test
[
"data"
].
to
(
device
)
labels_ts
=
torch
.
LongTensor
(
labels_ts
=
torch
.
LongTensor
(
data_test
[
"target"
]).
to
(
device
)
[
dictionary
[
i
]
for
i
in
data_test
[
"target"
]]
).
to
(
device
)
outputs_ts
=
model
.
forward
(
images_ts
)
outputs_ts
=
model
.
forward
(
images_ts
)
...
@@ -453,6 +330,8 @@ for epoch in range(EPOCHS):
...
@@ -453,6 +330,8 @@ for epoch in range(EPOCHS):
)
)
writer
.
flush
()
writer
.
flush
()
# TODO: fix best model check
# is_best = loss_val_avg < last_loss_val
# is_best = loss_val_avg < last_loss_val
# if is_best:
# if is_best:
# torch.save(model.state_dict(),
# torch.save(model.state_dict(),
...
@@ -484,12 +363,8 @@ for epoch in range(EPOCHS):
...
@@ -484,12 +363,8 @@ for epoch in range(EPOCHS):
)
)
)
)
# %%
# ### Predict on Train
### Predict on Train
]
# In[ ]:
model
.
eval
()
model
.
eval
()
dataset_train
.
mode
=
"test"
# no augmentation
dataset_train
.
mode
=
"test"
# no augmentation
...
@@ -507,7 +382,7 @@ with torch.no_grad():
...
@@ -507,7 +382,7 @@ with torch.no_grad():
preds_tr
.
append
(
pred
.
data
.
cpu
().
numpy
())
preds_tr
.
append
(
pred
.
data
.
cpu
().
numpy
())
# trues.append(label)
# trues.append(label)
trues_tr
.
append
(
dictionary
[
label
]
)
trues_tr
.
append
(
label
)
probs_tr
.
append
(
output
.
data
.
cpu
().
numpy
())
probs_tr
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_tr
.
append
(
data
[
"filename"
])
filenames_tr
.
append
(
data
[
"filename"
])
...
@@ -532,11 +407,8 @@ train_metrics = [
...
@@ -532,11 +407,8 @@ train_metrics = [
]
]
# %%
# ### Predict on Test
# ### Predict on Test
# In[ ]:
model
.
eval
()
model
.
eval
()
preds_ts
=
[]
preds_ts
=
[]
...
@@ -552,7 +424,7 @@ with torch.no_grad():
...
@@ -552,7 +424,7 @@ with torch.no_grad():
_
,
pred
=
torch
.
max
(
output
,
1
)
_
,
pred
=
torch
.
max
(
output
,
1
)
preds_ts
.
append
(
pred
.
data
.
cpu
().
numpy
())
preds_ts
.
append
(
pred
.
data
.
cpu
().
numpy
())
trues_ts
.
append
(
dictionary
[
label
]
)
trues_ts
.
append
(
label
)
probs_ts
.
append
(
output
.
data
.
cpu
().
numpy
())
probs_ts
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_ts
.
append
(
data
[
"filename"
])
filenames_ts
.
append
(
data
[
"filename"
])
...
@@ -570,71 +442,48 @@ print("MCC test", round(MCC_ts, 3), "ACC test", round(ACC_ts, 3))
...
@@ -570,71 +442,48 @@ print("MCC test", round(MCC_ts, 3), "ACC test", round(ACC_ts, 3))
print
(
"precision test"
,
round
(
prec_ts
,
3
),
"recall test"
,
round
(
rec_ts
,
3
))
print
(
"precision test"
,
round
(
prec_ts
,
3
),
"recall test"
,
round
(
rec_ts
,
3
))
test_metrics
=
[
round
(
MCC_ts
,
3
),
round
(
ACC_ts
,
3
),
round
(
prec_ts
,
3
),
round
(
rec_ts
,
3
)]
test_metrics
=
[
round
(
MCC_ts
,
3
),
round
(
ACC_ts
,
3
),
round
(
prec_ts
,
3
),
round
(
rec_ts
,
3
)]
# %%
# ## Save results
# ## Save results
# Save settings
# Save settings
#
with
open
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'settings.pkl'
,
'wb'
)
as
f
:
# In[ ]:
with
open
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/settings.pkl"
,
"wb"
)
as
f
:
pickle
.
dump
(
settings
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
pickle
.
dump
(
settings
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
# Save losses
# Save losses
#
# In[ ]:
losses_tr
=
np
.
array
(
losses_tr
)
losses_tr
=
np
.
array
(
losses_tr
)
losses_vl
=
np
.
array
(
losses_ts
)