Commit cc494d23 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

save output metrics (rounded) in csv file

parent e1268943
......@@ -97,19 +97,19 @@
"metadata": {},
"outputs": [],
"source": [
"EXPERIMENT_NAME = 'prova' + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
"EXPERIMENT_NAME = 'Tstage_grouped_noTx_CT_valieres_' + datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n",
"\n",
"settings = {\n",
" 'model': CiompiDO,\n",
" 'batch_size': 32,\n",
" 'lr': 1e-5,\n",
" 'epochs': 500,\n",
" 'epochs': 300,\n",
" 'optim': torch.optim.Adam,\n",
" 'K': 0.5,\n",
" 'K': 0.2,\n",
" 'n_classes': 2, #TSTAGE\n",
" 'seed': 1234,\n",
" 'dropout': 0.5,\n",
" 'split': '8020',\n",
" 'split': 'valieres',\n",
" 'size': 64,\n",
" 'pretrained': '',\n",
" }\n",
......@@ -575,9 +575,10 @@
"prec_tr = precision(trues_tr, preds_tr, average='weighted')\n",
"rec_tr = recall(trues_tr, preds_tr, average='weighted')\n",
"\n",
"print(\"MCC train\", MCC_tr, \"ACC train\", ACC_tr)\n",
"print(\"precision train\", prec_tr, \"recall train\", rec_tr )\n",
"train_metrics = np.array([MCC_tr, ACC_tr, prec_tr, rec_tr])"
"print(\"MCC train\", round(MCC_tr,3), \"ACC train\", round(ACC_tr, 3))\n",
"print(\"precision train\", round(prec_tr, 3), \"recall train\", round(rec_tr, 3))\n",
"\n",
"train_metrics = [round(MCC_tr ,3), round(ACC_tr,3), round(prec_tr, 3), round(rec_tr, 3)]"
]
},
{
......@@ -621,9 +622,10 @@
"ACC_ts = acc(trues_ts, preds_ts)\n",
"prec_ts = precision(trues_ts, preds_ts, average='weighted')\n",
"rec_ts = recall(trues_ts, preds_ts, average='weighted')\n",
"print(\"MCC test\", MCC_ts, \"ACC test\", ACC_ts)\n",
"print(\"precision test\", prec_ts, \"recall test\", rec_ts )\n",
"test_metrics = np.array([MCC_ts, ACC_ts, prec_ts, rec_ts])"
"\n",
"print(\"MCC train\", round(MCC_ts,3), \"ACC train\", round(ACC_ts, 3))\n",
"print(\"precision train\", round(prec_ts, 3), \"recall train\", round(rec_ts, 3))\n",
"train_metrics = [round(MCC_ts ,3), round(ACC_ts,3), round(prec_ts, 3), round(rec_ts, 3)]"
]
},
{
......@@ -727,8 +729,9 @@
"metadata": {},
"outputs": [],
"source": [
"metrics_out = np.stack([train_metrics, test_metrics], 0)\n",
"np.savetxt(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics_out.txt', metrics_out)"
"import pandas as pd\n",
"metrics_out = pd.DataFrame((train_metrics, test_metrics), columns=['MCC', 'ACC', 'prec', 'rec'], index = ['train','test'])\n",
"metrics_out.to_csv(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics_out.csv', index=False)"
]
},
{
......@@ -737,98 +740,6 @@
"source": [
"Save model weights"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/weights.pth')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"clinical = pd.read_csv('data/clinical_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clinical['patient'] = [('-').join((p.split('-')[0], p.split('-')[1])) for p in clinical['Patient #']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clinical.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"HGJ = clinical.loc[clinical['patient']=='HN-HGJ']\n",
"CHUS = clinical.loc[clinical['patient']=='HN-CHUS']\n",
"HMR = clinical.loc[clinical['patient']=='HN-HMR']\n",
"CHUM = clinical.loc[clinical['patient']=='HN-CHUM']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.mean(CHUM['Age']), np.std(CHUM['Age'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CHUM.groupby('Sex').count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"CHUM.groupby('Locoregional').count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from mlpy import bootstrap_ci"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......
%% Cell type:markdown id: tags:
## Training network for featture extraction
%% Cell type:markdown id: tags:
### Set Path
%% Cell type:code id: tags:
``` python
%reload_ext autoreload
%autoreload 2
import os
PATH = os.getcwd()
print(PATH)
```
%% Cell type:markdown id: tags:
### Import packages
%% Cell type:code id: tags:
``` python
import datetime
import gc
import pickle
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import matthews_corrcoef as mcor, accuracy_score as acc, recall_score as recall, precision_score as precision, confusion_matrix
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from networks import CiompiDO, ResNet50_3d
from dataset import NumpyCSVDataset, augment_3D_HN
from split import train_test_indexes_patient_wise
```
%% Cell type:code id: tags:
``` python
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
multigpu = True
```
%% Cell type:code id: tags:
``` python
DATASET_DIR = f"/thunderdisk/HN/processed/bbox_fixed2_64/" #Not augmented but already 64**3 (for faster loading)
EXPERIMENT_DIR = f"{PATH}/experiments"
PRETRAINED_MED3D_WEIGHTS = '/thunderdisk/HN/MedicalNet_pytorch_files/pretrain/resnet_50.pth'
PRETRAINED_T_STAGE = f'{EXPERIMENT_DIR}/Tstage_binary_augmented_noTx_branch_wise_20191028-104101/checkpoint_40.pth'
```
%% Cell type:markdown id: tags:
### Settings
%% Cell type:code id: tags:
``` python
EXPERIMENT_NAME = 'prova' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
EXPERIMENT_NAME = 'Tstage_grouped_noTx_CT_valieres_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
settings = {
'model': CiompiDO,
'batch_size': 32,
'lr': 1e-5,
'epochs': 500,
'epochs': 300,
'optim': torch.optim.Adam,
'K': 0.5,
'K': 0.2,
'n_classes': 2, #TSTAGE
'seed': 1234,
'dropout': 0.5,
'split': '8020',
'split': 'valieres',
'size': 64,
'pretrained': '',
}
assert settings['split'] in ['valeries', '8020']
assert settings['pretrained'] in ['Med3D', 'branch-wise', 'T-stage', '']
os.makedirs(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}', exist_ok=False)
```
%% Cell type:code id: tags:
``` python
MODEL = settings['model']
BATCH_SIZE = settings['batch_size']
LR = settings['lr']
EPOCHS = settings['epochs']
OPTIMIZER = settings['optim']
K = settings['K']
N_CLASSES = settings['n_classes']
SEED = settings['seed']
DROPOUT = settings['dropout']
SPLIT = settings['split']
SIZE = settings['size']
PRETRAINED = settings['pretrained']
```
%% Cell type:markdown id: tags:
### Tensorboard settings
%% Cell type:code id: tags:
``` python
def new_run_log_dir(experiment_name):
log_dir = os.path.join(PATH, 'tb-runs')
if not os.path.exists(log_dir):
os.makedirs(log_dir)
run_log_dir = os.path.join(log_dir, experiment_name)
return run_log_dir
log_dir = new_run_log_dir(EXPERIMENT_NAME)
print(f'Tensorboard folder: {log_dir}')
writer = SummaryWriter(log_dir)
```
%% Cell type:markdown id: tags:
### Data Handlers
%% Cell type:code id: tags:
``` python
clinical_data = f'{PATH}/data/clinical_data_noTx.csv'
target_column = 'T-stage_binary'
```
%% Cell type:code id: tags:
``` python
np.random.seed(SEED)
dataset = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, seed=SEED)
```
%% Cell type:markdown id: tags:
Create train-test datasets
%% Cell type:code id: tags:
``` python
if SPLIT == 'valieres':
dataset_train = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='train', transforms=augment_3D_HN)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_train = [i for i, f in enumerate(dataset_train.get_files()) if f.split('-')[1] in ['CHUS', 'HGJ']]
dataset_train.indexes = np.array(idx_train)
dataset_test = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='test', transforms=augment_3D_HN)
# in this particular case getting `dataset_train._files_full` or `dataset_train.get_files()` is the same
idx_test = [i for i, f in enumerate(dataset_test.get_files()) if f.split('-')[1] in ['HMR', 'CHUM']]
dataset_test.indexes = np.array(idx_test)
else:
idx_train, idx_test = train_test_indexes_patient_wise(dataset, test_size=K, stratify=True)
dataset_test = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='test', transforms=augment_3D_HN)
dataset_test.indexes = np.array(idx_test)
dataset_train = NumpyCSVDataset(DATASET_DIR, clinical_data, target_column, SIZE, mode='train', transforms=augment_3D_HN)
dataset_train.indexes = np.array(idx_train)
```
%% Cell type:markdown id: tags:
Check class balance
%% Cell type:code id: tags:
``` python
labels_test = dataset_test.get_labels()
labels_train = dataset_train.get_labels()
c,n = np.unique(labels_test, return_counts=True)
print(np.c_[c,n/len(labels_test)])
c,n = np.unique(labels_train, return_counts=True)
print(np.c_[c,n/len(labels_train)])
```
%% Cell type:markdown id: tags:
Create loaders
%% Cell type:code id: tags:
``` python
loader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE//2, num_workers=12, shuffle=True)
loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=12, pin_memory=True, shuffle=True)
```
%% Cell type:markdown id: tags:
Compute weights
%% Cell type:code id: tags:
``` python
labels = dataset_train.get_labels()
#class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])
_, class_sample_count = np.unique(labels, return_counts=True)
n_min = np.min(class_sample_count)
weights = n_min / class_sample_count # versione proporzionale, usare n_min invece che 1 per pesi ~1
weights = torch.Tensor(weights).to(device)
```
%% Cell type:markdown id: tags:
### Initialize Model
%% Cell type:code id: tags:
``` python
model = MODEL(n_classes=N_CLASSES, n_channels=1, modality='CT', dropout=DROPOUT)
if multigpu:
model = nn.DataParallel(model.to(device))
model = model.module
```
%% Cell type:code id: tags:
``` python
model.initialize_weights()
if PRETRAINED == 'Med3D':
pretrained_dict = torch.load(PRETRAINED_MED3D_WEIGHTS)['state_dict']
model_dict = model.state_dict()
# discard layers not present in destination network or with different shape
pretrained_dict = {k: v for k, v in pretrained_dict.items() if
(k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape)}
for name in model.state_dict().keys():
if name in pretrained_dict.keys():
#print(name)
model.state_dict()[name].copy_(pretrained_dict[name])
elif PRETRAINED == 'branch-wise':
pretrained_CT_dict = torch.load(f'{EXPERIMENT_DIR}/Tstage_grouped_noTx_CT_20191021-143133/weights.pth')
pretrained_PT_dict = torch.load(f'{EXPERIMENT_DIR}/Tstage_binary_PET_noTx_20191022-124046/weights.pth')
model_dict = model.state_dict()
pretrained_CT_dict = {k: v for k, v in pretrained_CT_dict.items() if
(k in model_dict) and (model_dict[k].shape == pretrained_CT_dict[k].shape)}
pretrained_PT_dict = {k: v for k, v in pretrained_PT_dict.items() if
(k in model_dict) and (model_dict[k].shape == pretrained_PT_dict[k].shape)}
to_add = 'module.' if multigpu else ''
for name in model.CT_branch.state_dict().keys():
name_complete = to_add + 'CT_branch.' + name
#print(name_complete)
if name_complete in pretrained_CT_dict.keys():
print(name)
model.CT_branch.state_dict()[name].copy_(pretrained_CT_dict[name_complete])
for name in model.PT_branch.state_dict().keys():
name_complete = to_add + 'PT_branch.' + name
#print(name_complete)
if name_complete in pretrained_PT_dict.keys():
print(name)
model.PT_branch.state_dict()[name].copy_(pretrained_PT_dict[name_complete])
elif PRETRAINED == 'T-stage':
pretrained_dict = torch.load(PRETRAINED_T_STAGE)
model_dict = model.state_dict()
# discard layers not present in destination network or with different shape
pretrained_dict = {k: v for k, v in pretrained_dict.items() if
(k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape)}
for name in model.state_dict().keys():
if name in pretrained_dict.keys():
#print(name)
model.state_dict()[name].copy_(pretrained_dict[name])
```
%% Cell type:markdown id: tags:
Optimizer
%% Cell type:code id: tags:
``` python
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
```
%% Cell type:code id: tags:
``` python
#[x.shape for x in model.parameters()]
```
%% Cell type:markdown id: tags:
Loss
%% Cell type:code id: tags:
``` python
criterion = nn.CrossEntropyLoss(weight=weights)
```
%% Cell type:code id: tags:
``` python
NEW_LABELS = list(range(len(list(np.unique(labels_train)))))
dictionary = dict(zip(list(np.unique(labels_train)), NEW_LABELS))
dictionary
```
%% Cell type:markdown id: tags:
### Train
%% Cell type:code id: tags:
``` python
model.train() # Set model to training mode
global_i = 0
losses_tr = []
losses_ts = []
last_loss_test = -1
iteration = 0
start_time = time.time()
for epoch in range(EPOCHS):
#print(epoch)
if epoch % 10 == 0: #save checkpoint
torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_{epoch}.pth')
for j, data in enumerate(loader_train):
global_i += 1
if j%10 == 0:
print(time.time() - start_time)
start_time = time.time()
optimizer.zero_grad()
images_tr = data['data'].to(device)
labels_tr = torch.LongTensor([dictionary[i] for i in data['target']]).to(device)
outputs_tr = model(images_tr).to(device)
# backward
loss = criterion(outputs_tr, labels_tr)
loss.backward()
optimizer.step()
# check test set
if j % int(len(loader_train) / 2) == 0 and j != 0:
model.eval()
with torch.no_grad():
losses_sum = 0
num_samples_test = 0
for data_test in loader_test:
images_ts = data_test['data'].to(device)
labels_ts = torch.LongTensor([dictionary[i] for i in data_test['target']]).to(device)
outputs_ts = model.forward(images_ts)
loss_test_sum = criterion(outputs_ts, labels_ts).item()
losses_sum += loss_test_sum
num_samples_test += 1
loss_test_avg = losses_sum / num_samples_test
writer.add_scalar(f'{EXPERIMENT_NAME}/test_loss', loss_test_avg, global_i)
writer.flush()
#is_best = loss_val_avg < last_loss_val
#if is_best:
# torch.save(model.state_dict(),
# f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_best_{epoch}.pth')
last_loss_test = loss_test_avg
losses_tr.append(loss.item())
losses_ts.append(loss_test_avg)
del images_ts, labels_ts
iteration += 1
del images_tr, labels_tr
gc.collect()
model.train()
# sys.stdout.write
writer.add_scalar(f'{EXPERIMENT_NAME}/train_loss', loss.item(), global_i)
writer.flush()
sys.stdout.write('\r Epoch {} of {} [{:.2f}%] - loss TR/TS: {:.4f} / {:.4f} - {}'.format(epoch + 1, EPOCHS,
100 * j / len(
loader_train),
loss.item(),
last_loss_test,
optimizer.param_groups[
0]['lr']))
```
%% Cell type:markdown id: tags:
### Predict on Train
%% Cell type:code id: tags:
``` python
model.eval()
dataset_train.mode = 'test' #no augmentation
preds_tr = []
trues_tr = []
probs_tr = []
filenames_tr = []
with torch.no_grad():
for data in dataset_train:
image = data["data"].unsqueeze(0).to(device)
label = data["target"]
output = model(image) #forward
_, pred = torch.max(output,1)
preds_tr.append(pred.data.cpu().numpy())
# trues.append(label)
trues_tr.append(dictionary[label])
probs_tr.append(output.data.cpu().numpy())
filenames_tr.append(data['filename'])
probs_tr = np.concatenate(probs_tr)
preds_tr = np.concatenate(preds_tr)
trues_tr = np.array(trues_tr)
filenames_tr = np.array(filenames_tr)
MCC_tr = mcor(trues_tr, preds_tr)
ACC_tr = acc(trues_tr, preds_tr)
prec_tr = precision(trues_tr, preds_tr, average='weighted')
rec_tr = recall(trues_tr, preds_tr, average='weighted')
print("MCC train", MCC_tr, "ACC train", ACC_tr)
print("precision train", prec_tr, "recall train", rec_tr )
train_metrics = np.array([MCC_tr, ACC_tr, prec_tr, rec_tr])
print("MCC train", round(MCC_tr,3), "ACC train", round(ACC_tr, 3))
print("precision train", round(prec_tr, 3), "recall train", round(rec_tr, 3))
train_metrics = [round(MCC_tr ,3), round(ACC_tr,3), round(prec_tr, 3), round(rec_tr, 3)]
```
%% Cell type:markdown id: tags:
### Predict on Test
%% Cell type:code id: tags:
``` python
model.eval()
preds_ts = []
trues_ts = []
probs_ts = []
filenames_ts = []
with torch.no_grad():
for data in dataset_test:
image = data["data"].unsqueeze(0).to(device)
label = data["target"]
output = model(image) #forward
_, pred = torch.max(output,1)
preds_ts.append(pred.data.cpu().numpy())
trues_ts.append(dictionary[label])
probs_ts.append(output.data.cpu().numpy())
filenames_ts.append(data['filename'])
probs_ts = np.concatenate(probs_ts)
preds_ts = np.concatenate(preds_ts)
trues_ts = np.array(trues_ts)
filenames_ts = np.array(filenames_ts)
MCC_ts = mcor(trues_ts, preds_ts)
ACC_ts = acc(trues_ts, preds_ts)
prec_ts = precision(trues_ts, preds_ts, average='weighted')
rec_ts = recall(trues_ts, preds_ts, average='weighted')
print("MCC test", MCC_ts, "ACC test", ACC_ts)
print("precision test", prec_ts, "recall test", rec_ts )
test_metrics = np.array([MCC_ts, ACC_ts, prec_ts, rec_ts])
print("MCC train", round(MCC_ts,3), "ACC train", round(ACC_ts, 3))
print("precision train", round(prec_ts, 3), "recall train", round(rec_ts, 3))
train_metrics = [round(MCC_ts ,3), round(ACC_ts,3), round(prec_ts, 3), round(rec_ts, 3)]
```
%% Cell type:markdown id: tags:
## Save results
%% Cell type:markdown id: tags:
Save settings
%% Cell type:code id: tags:
``` python
with open(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/settings.pkl', 'wb') as f:
pickle.dump(settings, f, pickle.HIGHEST_PROTOCOL)
```
%% Cell type:markdown id: tags:
Save losses
%% Cell type:code id: tags:
``` python
losses_tr = np.array(losses_tr)
losses_vl = np.array(losses_ts)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/losses_tr.npy', losses_tr)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/losses_ts.npy', losses_vl)
```
%% Cell type:markdown id: tags:
Plot losses
%% Cell type:code id: tags:
``` python
plt.figure(figsize=(20,10))
plt.plot(losses_tr, color='blue')
plt.plot(losses_ts, color='orange')
plt.legend(['train', 'valid'])
plt.savefig(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/losses.png', close=True, verbose=True)
plt.close()
```
%% Cell type:markdown id: tags:
Save predictions, ground truth, probabilities and filenames
%% Cell type:code id: tags:
``` python
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/preds_tr.npy', preds_tr)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/trues_tr.npy', trues_tr)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/probs_tr.npy', probs_tr)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/filenames_tr.npy', filenames_tr)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/preds_ts.npy', preds_ts)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/trues_ts.npy', trues_ts)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/probs_ts.npy', probs_ts)
np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/filenames_ts.npy', filenames_ts)
```
%% Cell type:markdown id: tags:
Save metrics
%% Cell type:code id: tags:
``` python
metrics_out = np.stack([train_metrics, test_metrics], 0)
np.savetxt(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics_out.txt', metrics_out)
import pandas as pd
metrics_out = pd.DataFrame((train_metrics, test_metrics), columns=['MCC', 'ACC', 'prec', 'rec'], index = ['train','test'])
metrics_out.to_csv(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics_out.csv', index=False)
```
%% Cell type:markdown id: tags:
Save model weights
%% Cell type:code id: tags:
``` python
torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/weights.pth')
```
%% Cell type:code id: tags:
``` python
import pandas as pd
clinical = pd.read_csv('data/clinical_data.csv')
```
%% Cell type:code id: tags:
``` python
clinical['patient'] = [('-').join((p.split('-')[0], p.split('-')[1])) for p in clinical['Patient #']]
```
%% Cell type:code id: tags:
``` python
clinical.head()
```
%% Cell type:code id: tags:
``` python
HGJ = clinical.loc[clinical['patient']=='HN-HGJ']
CHUS = clinical.loc[clinical['patient']=='HN-CHUS']
HMR = clinical.loc[clinical['patient']=='HN-HMR']
CHUM = clinical.loc[clinical['patient']=='HN-CHUM']
```
%% Cell type:code id: tags:
``` python
np.mean(CHUM['Age']), np.std(CHUM['Age'])
```
%% Cell type:code id: tags:
``` python
CHUM.groupby('Sex').count()
```
%% Cell type:code id: tags:
``` python
CHUM.groupby('Locoregional').count()
```
%% Cell type:code id: tags:
``` python
from mlpy import bootstrap_ci
```
%% Cell type:code id: tags:
``` python
```
......
......@@ -63,19 +63,19 @@ PRETRAINED_T_STAGE = f'{EXPERIMENT_DIR}/Tstage_binary_augmented_noTx_branch_wise
# In[ ]:
EXPERIMENT_NAME = 'prova' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
EXPERIMENT_NAME = 'Tstage_grouped_noTx_CT_valieres_' + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
settings = {
'model': CiompiDO,
'batch_size': 32,
'lr': 1e-5,
'epochs': 500,
'epochs': 300,
'optim': torch.optim.Adam,
'K': 0.5,
'K': 0.2,
'n_classes': 2, #TSTAGE
'seed': 1234,
'dropout': 0.5,
'split': '8020',
'split': 'valieres',
'size': 64,
'pretrained': '',
}
......@@ -438,9 +438,10 @@ ACC_tr = acc(trues_tr, preds_tr)
prec_tr = precision(trues_tr, preds_tr, average='weighted')
rec_tr = recall(trues_tr, preds_tr, average='weighted')
print("MCC train", MCC_tr, "ACC train", ACC_tr)
print("precision train", prec_tr, "recall train", rec_tr )
train_metrics = np.array([MCC_tr, ACC_tr, prec_tr, rec_tr])
print("MCC train", round(MCC_tr,3), "ACC train", round(ACC_tr, 3))
print("precision train", round(prec_tr, 3), "recall train", round(rec_tr, 3))
train_metrics = [round(MCC_tr ,3), round(ACC_tr,3), round(prec_tr, 3), round(rec_tr, 3)]
# ### Predict on Test
......@@ -476,9 +477,10 @@ MCC_ts = mcor(trues_ts, preds_ts)
ACC_ts = acc(trues_ts, preds_ts)
prec_ts = precision(trues_ts, preds_ts, average='weighted')
rec_ts = recall(trues_ts, preds_ts, average='weighted')
print("MCC test", MCC_ts, "ACC test", ACC_ts)
print("precision test", prec_ts, "recall test", rec_ts )
test_metrics = np.array([MCC_ts, ACC_ts, prec_ts, rec_ts])
print("MCC train", round(MCC_ts,3), "ACC train", round(ACC_ts, 3))
print("precision train", round(prec_ts, 3), "recall train", round(rec_ts, 3))
train_metrics = [round(MCC_ts ,3), round(ACC_ts,3), round(prec_ts, 3), round(rec_ts, 3)]
# ## Save results
......@@ -539,72 +541,9 @@ np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/filenames_ts.npy', filenames_ts)
# In[ ]:
metrics_out = np.stack([train_metrics, test_metrics], 0)
np.savetxt(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics_out.txt', metrics_out)
# Save model weights
# In[ ]:
torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/weights.pth')
# In[ ]:
import pandas as pd
clinical = pd.read_csv('data/clinical_data.csv')
# In[ ]:
clinical['patient'] = [('-').join((p.split('-')[0], p.split('-')[1])) for p in clinical['Patient #']]
# In[ ]:
clinical.head()
# In[ ]:
HGJ = clinical.loc[clinical['patient']=='HN-HGJ']
CHUS = clinical.loc[clinical['patient']=='HN-CHUS']
HMR = clinical.loc[clinical['patient']=='HN-HMR']
CHUM = clinical.loc[clinical['patient']=='HN-CHUM']
# In[ ]:
np.mean(CHUM['Age']), np.std(CHUM['Age'])
# In[ ]:
CHUM.groupby('Sex').count()
# In[ ]:
CHUM.groupby('Locoregional').count()
# In[ ]:
from mlpy import bootstrap_ci
# In[ ]:
metrics_out = pd.DataFrame((train_metrics, test_metrics), columns=['MCC', 'ACC', 'prec', 'rec'], index = ['train','test'])
metrics_out.to_csv(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics_out.csv', index=False)
# Save model weights
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment