Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
eb6c83a5
Commit
eb6c83a5
authored
Mar 11, 2020
by
Alessia Marcolini
Browse files
Remove unused file
parent
6c5fe9b2
Changes
1
Hide whitespace changes
Inline
Side-by-side
trainingTstage.ipynb
deleted
100755 → 0
View file @
6c5fe9b2
%% Cell type:markdown id: tags:
## Training network for featture extraction
%% Cell type:markdown id: tags:
### Set Path
%% Cell type:code id: tags:
```
python
%
reload_ext
autoreload
%
autoreload
2
import
os
PATH
=
os
.
getcwd
()
print
(
PATH
)
```
%% Cell type:markdown id: tags:
### Import packages
%% Cell type:code id: tags:
```
python
import
datetime
import
gc
import
pickle
import
sys
import
time
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
sklearn.metrics
import
matthews_corrcoef
as
mcor
,
accuracy_score
as
acc
,
recall_score
as
recall
,
precision_score
as
precision
,
confusion_matrix
import
torch
import
torch.nn
as
nn
from
torch.utils.data
import
DataLoader
from
torch.utils.tensorboard
import
SummaryWriter
from
networks
import
CiompiDO
,
ResNet50_3d
from
dataset
import
NumpyCSVDataset
,
augment_3D_HN
from
split
import
train_test_indexes_patient_wise
```
%% Cell type:code id: tags:
```
python
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
multigpu
=
True
```
%% Cell type:code id: tags:
```
python
DATASET_DIR
=
f
"/thunderdisk/HN/processed/bbox_fixed2_64/"
#Not augmented but already 64**3 (for faster loading)
EXPERIMENT_DIR
=
f
"
{
PATH
}
/experiments"
PRETRAINED_MED3D_WEIGHTS
=
'/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth'
PRETRAINED_T_STAGE
=
f
'
{
EXPERIMENT_DIR
}
/Tstage_binary_augmented_noTx_branch_wise_20191028-104101/checkpoint_40.pth'
```
%% Cell type:markdown id: tags:
### Settings
%% Cell type:code id: tags:
```
python
EXPERIMENT_NAME
=
'Tstage_grouped_noTx_CT_valieres_'
+
datetime
.
datetime
.
now
().
strftime
(
"%Y%m%d-%H%M%S"
)
settings
=
{
'model'
:
CiompiDO
,
'batch_size'
:
32
,
'lr'
:
1e-5
,
'epochs'
:
300
,
'optim'
:
torch
.
optim
.
Adam
,
'K'
:
0.2
,
'n_classes'
:
2
,
#TSTAGE
'seed'
:
1234
,
'dropout'
:
0.5
,
'split'
:
'valieres'
,
'size'
:
64
,
'pretrained'
:
''
,
}
assert
settings
[
'split'
]
in
[
'valeries'
,
'8020'
]
assert
settings
[
'pretrained'
]
in
[
'Med3D'
,
'branch-wise'
,
'T-stage'
,
''
]
os
.
makedirs
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
'
,
exist_ok
=
False
)
```
%% Cell type:code id: tags:
```
python
MODEL
=
settings
[
'model'
]
BATCH_SIZE
=
settings
[
'batch_size'
]
LR
=
settings
[
'lr'
]
EPOCHS
=
settings
[
'epochs'
]
OPTIMIZER
=
settings
[
'optim'
]
K
=
settings
[
'K'
]
N_CLASSES
=
settings
[
'n_classes'
]
SEED
=
settings
[
'seed'
]
DROPOUT
=
settings
[
'dropout'
]
SPLIT
=
settings
[
'split'
]
SIZE
=
settings
[
'size'
]
PRETRAINED
=
settings
[
'pretrained'
]
```
%% Cell type:markdown id: tags:
### Tensorboard settings
%% Cell type:code id: tags:
```
python
def
new_run_log_dir
(
experiment_name
):
log_dir
=
os
.
path
.
join
(
PATH
,
'tb-runs'
)
if
not
os
.
path
.
exists
(
log_dir
):
os
.
makedirs
(
log_dir
)
run_log_dir
=
os
.
path
.
join
(
log_dir
,
experiment_name
)
return
run_log_dir
log_dir
=
new_run_log_dir
(
EXPERIMENT_NAME
)
print
(
f
'Tensorboard folder:
{
log_dir
}
'
)
writer
=
SummaryWriter
(
log_dir
)
```
%% Cell type:markdown id: tags:
### Data Handlers
%% Cell type:code id: tags:
```
python
clinical_data
=
f
'
{
PATH
}
/data/clinical_data_noTx.csv'
target_column
=
'T-stage_binary'
```
%% Cell type:code id: tags:
```
python
np
.
random
.
seed
(
SEED
)
dataset
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
seed
=
SEED
)
```
%% Cell type:markdown id: tags:
Create train-test datasets
%% Cell type:code id: tags:
```
python
if
SPLIT
==
'valieres'
:
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'train'
,
transforms
=
augment_3D_HN
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_train
=
[
i
for
i
,
f
in
enumerate
(
dataset_train
.
get_files
())
if
f
.
split
(
'-'
)[
1
]
in
[
'CHUS'
,
'HGJ'
]]
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'test'
,
transforms
=
augment_3D_HN
)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_test
=
[
i
for
i
,
f
in
enumerate
(
dataset_test
.
get_files
())
if
f
.
split
(
'-'
)[
1
]
in
[
'HMR'
,
'CHUM'
]]
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
else
:
idx_train
,
idx_test
=
train_test_indexes_patient_wise
(
dataset
,
test_size
=
K
,
stratify
=
True
)
dataset_test
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'test'
,
transforms
=
augment_3D_HN
)
dataset_test
.
indexes
=
np
.
array
(
idx_test
)
dataset_train
=
NumpyCSVDataset
(
DATASET_DIR
,
clinical_data
,
target_column
,
SIZE
,
mode
=
'train'
,
transforms
=
augment_3D_HN
)
dataset_train
.
indexes
=
np
.
array
(
idx_train
)
```
%% Cell type:markdown id: tags:
Check class balance
%% Cell type:code id: tags:
```
python
labels_test
=
dataset_test
.
get_labels
()
labels_train
=
dataset_train
.
get_labels
()
c
,
n
=
np
.
unique
(
labels_test
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_test
)])
c
,
n
=
np
.
unique
(
labels_train
,
return_counts
=
True
)
print
(
np
.
c_
[
c
,
n
/
len
(
labels_train
)])
```
%% Cell type:markdown id: tags:
Create loaders
%% Cell type:code id: tags:
```
python
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
BATCH_SIZE
//
2
,
num_workers
=
12
,
shuffle
=
True
)
loader_train
=
DataLoader
(
dataset_train
,
batch_size
=
BATCH_SIZE
,
num_workers
=
12
,
pin_memory
=
True
,
shuffle
=
True
)
```
%% Cell type:markdown id: tags:
Compute weights
%% Cell type:code id: tags:
```
python
labels
=
dataset_train
.
get_labels
()
#class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])
_
,
class_sample_count
=
np
.
unique
(
labels
,
return_counts
=
True
)
n_min
=
np
.
min
(
class_sample_count
)
weights
=
n_min
/
class_sample_count
# versione proporzionale, usare n_min invece che 1 per pesi ~1
weights
=
torch
.
Tensor
(
weights
).
to
(
device
)
```
%% Cell type:markdown id: tags:
### Initialize Model
%% Cell type:code id: tags:
```
python
model
=
MODEL
(
n_classes
=
N_CLASSES
,
n_channels
=
1
,
modality
=
'CT'
,
dropout
=
DROPOUT
)
if
multigpu
:
model
=
nn
.
DataParallel
(
model
.
to
(
device
))
model
=
model
.
module
```
%% Cell type:code id: tags:
```
python
model
.
initialize_weights
()
if
PRETRAINED
==
'Med3D'
:
pretrained_dict
=
torch
.
load
(
PRETRAINED_MED3D_WEIGHTS
)[
'state_dict'
]
model_dict
=
model
.
state_dict
()
# discard layers not present in destination network or with different shape
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_dict
[
k
].
shape
)}
for
name
in
model
.
state_dict
().
keys
():
if
name
in
pretrained_dict
.
keys
():
#print(name)
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
elif
PRETRAINED
==
'branch-wise'
:
pretrained_CT_dict
=
torch
.
load
(
f
'
{
EXPERIMENT_DIR
}
/Tstage_grouped_noTx_CT_20191021-143133/weights.pth'
)
pretrained_PT_dict
=
torch
.
load
(
f
'
{
EXPERIMENT_DIR
}
/Tstage_binary_PET_noTx_20191022-124046/weights.pth'
)
model_dict
=
model
.
state_dict
()
pretrained_CT_dict
=
{
k
:
v
for
k
,
v
in
pretrained_CT_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_CT_dict
[
k
].
shape
)}
pretrained_PT_dict
=
{
k
:
v
for
k
,
v
in
pretrained_PT_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_PT_dict
[
k
].
shape
)}
to_add
=
'module.'
if
multigpu
else
''
for
name
in
model
.
CT_branch
.
state_dict
().
keys
():
name_complete
=
to_add
+
'CT_branch.'
+
name
#print(name_complete)
if
name_complete
in
pretrained_CT_dict
.
keys
():
print
(
name
)
model
.
CT_branch
.
state_dict
()[
name
].
copy_
(
pretrained_CT_dict
[
name_complete
])
for
name
in
model
.
PT_branch
.
state_dict
().
keys
():
name_complete
=
to_add
+
'PT_branch.'
+
name
#print(name_complete)
if
name_complete
in
pretrained_PT_dict
.
keys
():
print
(
name
)
model
.
PT_branch
.
state_dict
()[
name
].
copy_
(
pretrained_PT_dict
[
name_complete
])
elif
PRETRAINED
==
'T-stage'
:
pretrained_dict
=
torch
.
load
(
PRETRAINED_T_STAGE
)
model_dict
=
model
.
state_dict
()
# discard layers not present in destination network or with different shape
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_dict
.
items
()
if
(
k
in
model_dict
)
and
(
model_dict
[
k
].
shape
==
pretrained_dict
[
k
].
shape
)}
for
name
in
model
.
state_dict
().
keys
():
if
name
in
pretrained_dict
.
keys
():
#print(name)
model
.
state_dict
()[
name
].
copy_
(
pretrained_dict
[
name
])
```
%% Cell type:markdown id: tags:
Optimizer
%% Cell type:code id: tags:
```
python
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
LR
)
```
%% Cell type:code id: tags:
```
python
#[x.shape for x in model.parameters()]
```
%% Cell type:markdown id: tags:
Loss
%% Cell type:code id: tags:
```
python
criterion
=
nn
.
CrossEntropyLoss
(
weight
=
weights
)
```
%% Cell type:code id: tags:
```
python
NEW_LABELS
=
list
(
range
(
len
(
list
(
np
.
unique
(
labels_train
)))))
dictionary
=
dict
(
zip
(
list
(
np
.
unique
(
labels_train
)),
NEW_LABELS
))
dictionary
```
%% Cell type:markdown id: tags:
### Train
%% Cell type:code id: tags:
```
python
model
.
train
()
# Set model to training mode
global_i
=
0
losses_tr
=
[]
losses_ts
=
[]
last_loss_test
=
-
1
iteration
=
0
start_time
=
time
.
time
()
for
epoch
in
range
(
EPOCHS
):
#print(epoch)
if
epoch
%
10
==
0
:
#save checkpoint
torch
.
save
(
model
.
state_dict
(),
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/checkpoint_
{
epoch
}
.pth'
)
for
j
,
data
in
enumerate
(
loader_train
):
global_i
+=
1
if
j
%
10
==
0
:
print
(
time
.
time
()
-
start_time
)
start_time
=
time
.
time
()
optimizer
.
zero_grad
()
images_tr
=
data
[
'data'
].
to
(
device
)
labels_tr
=
torch
.
LongTensor
([
dictionary
[
i
]
for
i
in
data
[
'target'
]]).
to
(
device
)
outputs_tr
=
model
(
images_tr
).
to
(
device
)
# backward
loss
=
criterion
(
outputs_tr
,
labels_tr
)
loss
.
backward
()
optimizer
.
step
()
# check test set
if
j
%
int
(
len
(
loader_train
)
/
2
)
==
0
and
j
!=
0
:
model
.
eval
()
with
torch
.
no_grad
():
losses_sum
=
0
num_samples_test
=
0
for
data_test
in
loader_test
:
images_ts
=
data_test
[
'data'
].
to
(
device
)
labels_ts
=
torch
.
LongTensor
([
dictionary
[
i
]
for
i
in
data_test
[
'target'
]]).
to
(
device
)
outputs_ts
=
model
.
forward
(
images_ts
)
loss_test_sum
=
criterion
(
outputs_ts
,
labels_ts
).
item
()
losses_sum
+=
loss_test_sum
num_samples_test
+=
1
loss_test_avg
=
losses_sum
/
num_samples_test
writer
.
add_scalar
(
f
'
{
EXPERIMENT_NAME
}
/test_loss'
,
loss_test_avg
,
global_i
)
writer
.
flush
()
#is_best = loss_val_avg < last_loss_val
#if is_best:
# torch.save(model.state_dict(),
# f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_best_{epoch}.pth')
last_loss_test
=
loss_test_avg
losses_tr
.
append
(
loss
.
item
())
losses_ts
.
append
(
loss_test_avg
)
del
images_ts
,
labels_ts
iteration
+=
1
del
images_tr
,
labels_tr
gc
.
collect
()
model
.
train
()
# sys.stdout.write
writer
.
add_scalar
(
f
'
{
EXPERIMENT_NAME
}
/train_loss'
,
loss
.
item
(),
global_i
)
writer
.
flush
()
sys
.
stdout
.
write
(
'
\r
Epoch {} of {} [{:.2f}%] - loss TR/TS: {:.4f} / {:.4f} - {}'
.
format
(
epoch
+
1
,
EPOCHS
,
100
*
j
/
len
(
loader_train
),
loss
.
item
(),
last_loss_test
,
optimizer
.
param_groups
[
0
][
'lr'
]))
```
%% Cell type:markdown id: tags:
### Predict on Train
%% Cell type:code id: tags:
```
python
model
.
eval
()
dataset_train
.
mode
=
'test'
#no augmentation
preds_tr
=
[]
trues_tr
=
[]
probs_tr
=
[]
filenames_tr
=
[]
with
torch
.
no_grad
():
for
data
in
dataset_train
:
image
=
data
[
"data"
].
unsqueeze
(
0
).
to
(
device
)
label
=
data
[
"target"
]
output
=
model
(
image
)
#forward
_
,
pred
=
torch
.
max
(
output
,
1
)
preds_tr
.
append
(
pred
.
data
.
cpu
().
numpy
())
# trues.append(label)
trues_tr
.
append
(
dictionary
[
label
])
probs_tr
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_tr
.
append
(
data
[
'filename'
])
probs_tr
=
np
.
concatenate
(
probs_tr
)
preds_tr
=
np
.
concatenate
(
preds_tr
)
trues_tr
=
np
.
array
(
trues_tr
)
filenames_tr
=
np
.
array
(
filenames_tr
)
MCC_tr
=
mcor
(
trues_tr
,
preds_tr
)
ACC_tr
=
acc
(
trues_tr
,
preds_tr
)
prec_tr
=
precision
(
trues_tr
,
preds_tr
,
average
=
'weighted'
)
rec_tr
=
recall
(
trues_tr
,
preds_tr
,
average
=
'weighted'
)
print
(
"MCC train"
,
round
(
MCC_tr
,
3
),
"ACC train"
,
round
(
ACC_tr
,
3
))
print
(
"precision train"
,
round
(
prec_tr
,
3
),
"recall train"
,
round
(
rec_tr
,
3
))
train_metrics
=
[
round
(
MCC_tr
,
3
),
round
(
ACC_tr
,
3
),
round
(
prec_tr
,
3
),
round
(
rec_tr
,
3
)]
```
%% Cell type:markdown id: tags:
### Predict on Test
%% Cell type:code id: tags:
```
python
model
.
eval
()
preds_ts
=
[]
trues_ts
=
[]
probs_ts
=
[]
filenames_ts
=
[]
with
torch
.
no_grad
():
for
data
in
dataset_test
:
image
=
data
[
"data"
].
unsqueeze
(
0
).
to
(
device
)
label
=
data
[
"target"
]
output
=
model
(
image
)
#forward
_
,
pred
=
torch
.
max
(
output
,
1
)
preds_ts
.
append
(
pred
.
data
.
cpu
().
numpy
())
trues_ts
.
append
(
dictionary
[
label
])
probs_ts
.
append
(
output
.
data
.
cpu
().
numpy
())
filenames_ts
.
append
(
data
[
'filename'
])
probs_ts
=
np
.
concatenate
(
probs_ts
)
preds_ts
=
np
.
concatenate
(
preds_ts
)
trues_ts
=
np
.
array
(
trues_ts
)
filenames_ts
=
np
.
array
(
filenames_ts
)
MCC_ts
=
mcor
(
trues_ts
,
preds_ts
)
ACC_ts
=
acc
(
trues_ts
,
preds_ts
)
prec_ts
=
precision
(
trues_ts
,
preds_ts
,
average
=
'weighted'
)
rec_ts
=
recall
(
trues_ts
,
preds_ts
,
average
=
'weighted'
)
print
(
"MCC train"
,
round
(
MCC_ts
,
3
),
"ACC train"
,
round
(
ACC_ts
,
3
))
print
(
"precision train"
,
round
(
prec_ts
,
3
),
"recall train"
,
round
(
rec_ts
,
3
))
train_metrics
=
[
round
(
MCC_ts
,
3
),
round
(
ACC_ts
,
3
),
round
(
prec_ts
,
3
),
round
(
rec_ts
,
3
)]
```
%% Cell type:markdown id: tags:
## Save results
%% Cell type:markdown id: tags:
Save settings
%% Cell type:code id: tags:
```
python
with
open
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/settings.pkl'
,
'wb'
)
as
f
:
pickle
.
dump
(
settings
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
```
%% Cell type:markdown id: tags:
Save losses
%% Cell type:code id: tags:
```
python
losses_tr
=
np
.
array
(
losses_tr
)
losses_vl
=
np
.
array
(
losses_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses_tr.npy'
,
losses_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses_ts.npy'
,
losses_vl
)
```
%% Cell type:markdown id: tags:
Plot losses
%% Cell type:code id: tags:
```
python
plt
.
figure
(
figsize
=
(
20
,
10
))
plt
.
plot
(
losses_tr
,
color
=
'blue'
)
plt
.
plot
(
losses_ts
,
color
=
'orange'
)
plt
.
legend
([
'train'
,
'valid'
])
plt
.
savefig
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/losses.png'
,
close
=
True
,
verbose
=
True
)
plt
.
close
()
```
%% Cell type:markdown id: tags:
Save predictions, ground truth, probabilities and filenames
%% Cell type:code id: tags:
```
python
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/preds_tr.npy'
,
preds_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/trues_tr.npy'
,
trues_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/probs_tr.npy'
,
probs_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/filenames_tr.npy'
,
filenames_tr
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/preds_ts.npy'
,
preds_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/trues_ts.npy'
,
trues_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/probs_ts.npy'
,
probs_ts
)
np
.
save
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/filenames_ts.npy'
,
filenames_ts
)
```
%% Cell type:markdown id: tags:
Save metrics
%% Cell type:code id: tags:
```
python
import
pandas
as
pd
metrics_out
=
pd
.
DataFrame
((
train_metrics
,
test_metrics
),
columns
=
[
'MCC'
,
'ACC'
,
'prec'
,
'rec'
],
index
=
[
'train'
,
'test'
])
metrics_out
.
to_csv
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
/metrics_out.csv'
,
index
=
False
)
```
%% Cell type:markdown id: tags:
Save model weights
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment