Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
456b7009
Commit
456b7009
authored
Mar 11, 2020
by
Alessia Marcolini
Browse files
Use Path everywhere and update according to new dataset structure
parent
4b5d8d22
Changes
1
Hide whitespace changes
Inline
Side-by-side
training.py
View file @
456b7009
#!/usr/bin/env python
# coding: utf-8
# ## Training network for featture extraction
# ### Set Path
# In[ ]:
# get_ipython().run_line_magic('reload_ext', 'autoreload')
# get_ipython().run_line_magic('autoreload', '2')
import
os
PATH
=
os
.
getcwd
()
print
(
PATH
)
# ### Import packages
# In[ ]:
#%% [markdown]
# ## Training network for feature extraction
# %%
import
datetime
import
gc
import
os
import
pickle
import
sys
import
time
from
pathlib
import
Path
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
from
sklearn.metrics
import
(
matthews_corrcoef
as
mcor
,
accuracy_score
as
acc
,
recall_score
as
recall
,
precision_score
as
precision
,
confusion_matrix
,
)
import
torch
import
torch.nn
as
nn
from
sklearn.metrics
import
accuracy_score
as
acc
from
sklearn.metrics
import
confusion_matrix
from
sklearn.metrics
import
matthews_corrcoef
as
mcor
from
sklearn.metrics
import
precision_score
as
precision
from
sklearn.metrics
import
recall_score
as
recall
from
torch.utils.data
import
DataLoader
from
torch.utils.tensorboard
import
SummaryWriter
from
networks
import
CiompiDO
,
ResNet50_3d
from
dataset
import
NumpyCSVDataset
,
augment_3D_HN
from
networks
import
CiompiDO
,
ResNet50_3d
from
split
import
train_test_indexes_patient_wise
# In[ ]:
# os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
PATH
=
Path
(
os
.
getcwd
())
print
(
PATH
)
#%%
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
multigpu
=
True
# In[ ]:
DATASET
=
'HN_val'
BBOX_SUBDATASET
=
'bbox_64'
DATASET_DIR
=
PATH
/
'data'
/
DATASET
/
'processed'
/
'bbox'
/
BBOX_SUBDATASET
EXPERIMENT_DIR
=
PATH
/
'experiment'
DATASET_DIR
=
(
f
"/thunderdisk/HN/processed/bbox_fixed2_64"
)
# Not augmented but already 64**3 (for faster loading)
EXPERIMENT_DIR
=
f
"
{
PATH
}
/experiments"
PRETRAINED_MED3D_WEIGHTS
=
(
"/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth"
)
PRETRAINED_T_STAGE
=
f
"
{
EXPERIMENT_DIR
}
/Tstage_4_noTx_CT_20191114-163418/weights.pth"
# ### Settings
# In[ ]:
EXPERIMENT_NAME
=
"prova"
+
datetime
.
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
PRETRAINED_MED3D_WEIGHTS
=
PATH
/
'pretrained_weights'
/
'resnet_50.pth'
PRETRAINED_T_STAGE
=
EXPERIMENT_DIR
/
'Tstage_4_noTx_CT_20191114-163418'
/
'weights.pth'
# %%
### Settings
EXPERIMENT_NAME
=
"prova"
+
datetime
.
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
settings
=
{
"model"
:
CiompiDO
,
...
...
@@ -94,15 +61,12 @@ settings = {
"pretrained"
:
""
,
}
assert
settings
[
"split"
]
in
[
"valieres"
,
"8020"
]
assert
settings
[
"split"
]
in
[
"vallieres"
,
"8020"
]
assert
not
settings
[
"splits"
]
==
"vallieres"
or
DATASET
==
'HN_val'
assert
settings
[
"pretrained"
]
in
[
"Med3D"
,
"branch-wise"
,
"T-stage"
,
""
]
os
.
makedirs
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
"
,
exist_ok
=
False
)
# In[ ]:
os
.
makedirs
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
,
exist_ok
=
False
)
# %%
MODEL
=
settings
[
"model"
]
BATCH_SIZE
=
settings
[
"batch_size"
]
LR
=
settings
[
"lr"
]
...
...
@@ -117,16 +81,15 @@ SIZE = settings["size"]
PRETRAINED
=
settings
[
"pretrained"
]
# %%
# ### Tensorboard settings
# In[ ]:
def
new_run_log_dir
(
experiment_name
):
log_dir
=
os
.
path
.
join
(
PATH
,
"tb-runs"
)
log_dir
=
PATH
/
"tb-runs"
if
not
os
.
path
.
exists
(
log_dir
):
os
.
makedirs
(
log_dir
)
run_log_dir
=
os
.
path
.
join
(
log_dir
,
experiment_name
)
run_log_dir
=
log_dir
/
experiment_name
return
run_log_dir
...
...
@@ -134,152 +97,93 @@ log_dir = new_run_log_dir(EXPERIMENT_NAME)
print
(
f
"Tensorboard folder:
{
log_dir
}
"
)
writer
=
SummaryWriter
(
log_dir
)
# %%
# ### Data Handlers
# In[ ]:
clinical_data
=
f
"
{
PATH
}
/data/clinical_data_noTx.csv"
clinical_file
=
PATH
/
'data'
/
DATASET
/
'processed'
/
f
'clinical_
{
DATASET
}
.csv'
target_column
=
"T-stage_grouped"
# In[ ]:
# %%
np
.
random
.
seed
(
SEED
)
dataset
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
seed
=
SEED
)
dataset_train
=
NumpyCSVDataset
(
data_dir
=
DATASET_DIR
,
clinical_file
=
clinical_data
,
label_col
=
target_column
,
size
=
SIZE
,
mode
=
'train'
,
seed
=
SEED
,
)
dataset_test
=
NumpyCSVDataset
(
data_dir
=
DATASET_DIR
,
clinical_file
=
clinical_data
,
label_col
=
target_column
,
size
=
SIZE
,
mode
=
'test'
,
seed
=
SEED
,
)
# %%
# Create train-test datasets
# In[ ]:
if
SPLIT
==
"valieres"
:
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"train"
,
transforms
=
augment_3D_HN
,
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
if
SPLIT
==
"vallieres"
:
idx_train
=
[
i
for
i
,
f
in
enumerate
(
dataset_train
.
get_files
()
)
for
i
,
f
in
enumerate
(
dataset_train
.
patients
)
if
f
.
split
(
"-"
)[
1
]
in
[
"CHUS"
,
"HGJ"
]
]
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"test"
,
transforms
=
augment_3D_HN
,
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_test
=
[
i
for
i
,
f
in
enumerate
(
dataset_test
.
get_files
()
)
for
i
,
f
in
enumerate
(
dataset_test
.
patients
)
if
f
.
split
(
"-"
)[
1
]
in
[
"HMR"
,
"CHUM"
]
]
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
else
:
idx_train
,
idx_test
=
train_test_indexes_patient_wise
(
dataset
,
test_size
=
K
,
stratify
=
True
dataset
_train
,
test_size
=
K
,
stratify
=
True
)
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"test"
,
transforms
=
augment_3D_HN
,
)
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
"train"
,
transforms
=
augment_3D_HN
,
)
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
dataset_train
.
indices
=
np
.
array
(
idx_train
)
dataset_test
.
indices
=
np
.
array
(
idx_test
)
# %%
# Check class balance
# In[ ]:
labels_test
=
dataset_test
.
get_labels
()
labels_train
=
dataset_train
.
get_labels
()
c
,
n
=
np
.
unique
(
labels_test
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_test
)])
labels_train
=
dataset_train
.
labels
labels_test
=
dataset_test
.
labels
c
,
n
=
np
.
unique
(
labels_train
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_train
)])
c
,
n
=
np
.
unique
(
labels_test
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_test
)])
# %%
# Create loaders
# In[ ]:
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
BATCH_SIZE
//
2
,
num_workers
=
12
,
shuffle
=
True
)
loader_train
=
DataLoader
(
dataset_train
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
pin_memory
=
True
,
shuffle
=
True
)
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
shuffle
=
False
)
# %%
# Compute weights
# In[ ]:
labels
=
dataset_train
.
get_labels
()
labels_train
=
dataset_train
.
labels
# class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])
_
,
class_sample_count
=
np
.
unique
(
labels
,
return_counts
=
True
)
_
,
class_sample_count
=
np
.
unique
(
labels
_train
,
return_counts
=
True
)
n_min
=
np
.
min
(
class_sample_count
)
weights
=
(
n_min
/
class_sample_count
)
# versione proporzionale, usare n_min invece che 1 per pesi ~1
weights
=
torch
.
Tensor
(
weights
).
to
(
device
)
# %%
# ### Initialize Model
# In[ ]:
model
=
MODEL
(
n_classes
=
N_CLASSES
,
n_channels
=
2
,
modality
=
"CT/PET"
,
dropout
=
DROPOUT
)
if
multigpu
:
model
=
nn
.
DataParallel
(
model
.
to
(
device
))
model
=
model
.
module
# In[ ]:
#model.initialize_weights()
# %%
# model.initialize_weights()
if
PRETRAINED
==
"Med3D"
:
pretrained_dict
=
torch
.
load
(
PRETRAINED_MED3D_WEIGHTS
)[
"state_dict"
]
...
...
@@ -299,10 +203,14 @@ if PRETRAINED == "Med3D":
elif
PRETRAINED
==
"branch-wise"
:
pretrained_CT_dict
=
torch
.
load
(
f
"
{
EXPERIMENT_DIR
}
/Tstage_grouped_noTx_CT_valieres_20191029-173736/checkpoint_290.pth"
EXPERIMENT_DIR
/
'Tstage_grouped_noTx_CT_valieres_20191029-173736'
/
'checkpoint_290.pth'
)
pretrained_PT_dict
=
torch
.
load
(
f
"
{
EXPERIMENT_DIR
}
/Tstage_grouped_noTx_PET_valieres_20191029-195338/checkpoint_290.pth"
EXPERIMENT_DIR
/
'Tstage_grouped_noTx_PET_valieres_20191029-195338'
/
'checkpoint_290.pth'
)
model_dict
=
model
.
state_dict
()
...
...
@@ -351,42 +259,13 @@ elif PRETRAINED == "T-stage":
# print(name)
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
# Optimizer
# In[ ]:
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
LR
)
# In[ ]:
# [x.shape for x in model.parameters()]
# Loss
# In[ ]:
# %%
# Optimizer and criterion
optimizer
=
OPTIMIZER
(
model
.
parameters
(),
lr
=
LR
)
criterion
=
nn
.
CrossEntropyLoss
(
weight
=
weights
)
# In[ ]:
NEW_LABELS
=
list
(
range
(
len
(
list
(
np
.
unique
(
labels_train
)))))
dictionary
=
dict
(
zip
(
list
(
np
.
unique
(
labels_train
)),
NEW_LABELS
))
dictionary
# %%
# ### Train
# In[ ]:
model
.
train
()
# Set model to training mode
global_i
=
0
...
...
@@ -403,7 +282,7 @@ for epoch in range(EPOCHS):
if
epoch
%
10
==
0
:
# save checkpoint
torch
.
save
(
model
.
state_dict
(),
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/
checkpoint_
{
epoch
}
.pth
"
,
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
f
'
checkpoint_
{
epoch
}
.pth
'
,
)
for
j
,
data
in
enumerate
(
loader_train
):
...
...
@@ -416,7 +295,7 @@ for epoch in range(EPOCHS):
optimizer
.
zero_grad
()
images_tr
=
data
[
"data"
].
to
(
device
)
labels_tr
=
torch
.
LongTensor
(
[
dictionary
[
i
]
for
i
in
data
[
"target"
]
]
).
to
(
device
)
labels_tr
=
torch
.
LongTensor
(
data
[
"target"
]).
to
(
device
)
outputs_tr
=
model
(
images_tr
).
to
(
device
)
# backward
...
...
@@ -436,9 +315,7 @@ for epoch in range(EPOCHS):
for
data_test
in
loader_test
:
images_ts
=
data_test
[
"data"
].
to
(
device
)
labels_ts
=
torch
.
LongTensor
(
[
dictionary
[
i
]
for
i
in
data_test
[
"target"
]]
).
to
(
device
)
labels_ts
=
torch
.
LongTensor
(
data_test
[
"target"
]).
to
(
device
)
outputs_ts
=
model
.
forward
(
images_ts
)
...
...
@@ -453,6 +330,8 @@ for epoch in range(EPOCHS):
)
writer
.
flush
()
# TODO: fix best model check
# is_best = loss_val_avg < last_loss_val
# if is_best:
# torch.save(model.state_dict(),
...
...
@@ -484,12 +363,8 @@ for epoch in range(EPOCHS):
)
)
# ### Predict on Train
]
# In[ ]:
# %%
### Predict on Train
model
.
eval
()
dataset_train
.
mode
=
"test"
# no augmentation
...
...
@@ -507,7 +382,7 @@ with torch.no_grad():
preds_tr
.
append
(
pred
.
data
.
cpu
().
numpy
())
# trues.append(label)
trues_tr
.
append
(
dictionary
[
label
]
)
trues_tr
.
append
(
label
)
probs_tr
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_tr
.
append
(
data
[
"filename"
])
...
...
@@ -532,11 +407,8 @@ train_metrics = [
]
# %%
# ### Predict on Test
# In[ ]:
model
.
eval
()
preds_ts
=
[]
...
...
@@ -552,7 +424,7 @@ with torch.no_grad():
_
,
pred
=
torch
.
max
(
output
,
1
)
preds_ts
.
append
(
pred
.
data
.
cpu
().
numpy
())
trues_ts
.
append
(
dictionary
[
label
]
)
trues_ts
.
append
(
label
)
probs_ts
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_ts
.
append
(
data
[
"filename"
])
...
...
@@ -570,71 +442,48 @@ print("MCC test", round(MCC_ts, 3), "ACC test", round(ACC_ts, 3))
print
(
"precision test"
,
round
(
prec_ts
,
3
),
"recall test"
,
round
(
rec_ts
,
3
))
test_metrics
=
[
round
(
MCC_ts
,
3
),
round
(
ACC_ts
,
3
),
round
(
prec_ts
,
3
),
round
(
rec_ts
,
3
)]
# %%
# ## Save results
# Save settings
#
# In[ ]:
with
open
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/settings.pkl"
,
"wb"
)
as
f
:
with
open
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'settings.pkl'
,
'wb'
)
as
f
:
pickle
.
dump
(
settings
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
# Save losses
#
# In[ ]:
losses_tr
=
np
.
array
(
losses_tr
)
losses_vl
=
np
.
array
(
losses_ts
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses_tr.npy"
,
losses_tr
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses_ts.npy"
,
losses_vl
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'losses_tr.npy'
,
losses_tr
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'losses_ts.npy'
,
losses_vl
)
# %%
# Plot losses
# In[ ]:
plt
.
figure
(
figsize
=
(
20
,
10
))
plt
.
plot
(
losses_tr
,
color
=
"blue"
)
plt
.
plot
(
losses_ts
,
color
=
"orange"
)
plt
.
legend
([
"train"
,
"valid"
])
plt
.
savefig
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/
losses.png
"
,
close
=
True
,
verbose
=
True
)
plt
.
savefig
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'
losses.png
'
,
close
=
True
,
verbose
=
True
)
plt
.
close
()
# %%
# Save predictions, ground truth, probabilities and filenames
# In[ ]:
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/preds_tr.npy"
,
preds_tr
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/trues_tr.npy"
,
trues_tr
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/probs_tr.npy"
,
probs_tr
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/filenames_tr.npy"
,
filenames_tr
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/preds_ts.npy"
,
preds_ts
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/trues_ts.npy"
,
trues_ts
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/probs_ts.npy"
,
probs_ts
)
np
.
save
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/filenames_ts.npy"
,
filenames_ts
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'preds_tr.npy'
,
preds_tr
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'trues_tr.npy'
,
trues_tr
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'probs_tr.npy'
,
probs_tr
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'filenames_tr.npy'
,
filenames_tr
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'preds_ts.npy'
,
preds_ts
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'trues_ts.npy'
,
trues_ts
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'probs_ts.npy'
,
probs_ts
)
np
.
save
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'filenames_ts.npy'
,
filenames_ts
)
# %%
# Save metrics
# In[ ]:
metrics_out
=
pd
.
DataFrame
(
(
train_metrics
,
test_metrics
),
columns
=
[
"MCC"
,
"ACC"
,
"prec"
,
"rec"
],
index
=
[
"train"
,
"test"
],
)
metrics_out
.
to_csv
(
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/
metrics_out.csv
"
)
metrics_out
.
to_csv
(
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'
metrics_out.csv
'
)
# Save model weights
torch
.
save
(
model
.
state_dict
(),
f
"
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/
weights.pth
"
)
torch
.
save
(
model
.
state_dict
(),
EXPERIMENT_DIR
/
EXPERIMENT_NAME
/
'
weights.pth
'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment