#%% [markdown] # ## Training network for feature extraction # %% import datetime import gc import os import pickle import sys import time from pathlib import Path # os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn from sklearn.metrics import accuracy_score as acc from sklearn.metrics import confusion_matrix from sklearn.metrics import matthews_corrcoef as mcor from sklearn.metrics import precision_score as precision from sklearn.metrics import recall_score as recall from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter from dataset import NumpyCSVDataset, augment_3D_HN from networks import CiompiDO, ResNet50_3d from split import train_test_indexes_patient_wise from config import get_project_root #%% device = torch.device("cuda" if torch.cuda.is_available() else "cpu") multigpu = True # PROJECT_ROOT = get_project_root() DATASET = 'HN_val' BBOX_SUBDATASET = 'bbox_64' DATASET_DIR = PROJECT_ROOT / 'data' / DATASET / 'processed' / 'bbox' / BBOX_SUBDATASET EXPERIMENT_DIR = PROJECT_ROOT / 'experiments' PRETRAINED_MED3D_WEIGHTS = PROJECT_ROOT / 'pretrained_weights' / 'resnet_50.pth' PRETRAINED_T_STAGE = EXPERIMENT_DIR / 'Tstage_4_noTx_CT_20191114-163418' / 'weights.pth' # %% ### Settings EXPERIMENT_NAME = "prova" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") settings = { "model": CiompiDO, "batch_size": 16, "lr": 1e-5, "epochs": 1, "optim": torch.optim.Adam, "K": 0.2, "n_classes": 4, # TSTAGE "seed": 1234, "dropout": 0.5, "split": "8020", "size": 64, "pretrained": "", } assert settings["split"] in ["vallieres", "8020"] assert not settings["split"] == "vallieres" or DATASET == 'HN_val' assert settings["pretrained"] in ["Med3D", "branch-wise", "T-stage", ""] os.makedirs(EXPERIMENT_DIR / EXPERIMENT_NAME, exist_ok=False) # %% MODEL = settings["model"] BATCH_SIZE = settings["batch_size"] LR = settings["lr"] EPOCHS = settings["epochs"] OPTIMIZER = settings["optim"] K = settings["K"] N_CLASSES = settings["n_classes"] SEED = settings["seed"] DROPOUT = settings["dropout"] SPLIT = settings["split"] SIZE = settings["size"] PRETRAINED = settings["pretrained"] # %% # ### Tensorboard settings def new_run_log_dir(experiment_name): log_dir = PROJECT_ROOT / "tb-runs" if not os.path.exists(log_dir): os.makedirs(log_dir) run_log_dir = log_dir / experiment_name return run_log_dir log_dir = new_run_log_dir(EXPERIMENT_NAME) print(f"Tensorboard folder: {log_dir}") writer = SummaryWriter(log_dir) # %% # ### Data Handlers clinical_file = ( PROJECT_ROOT / 'data' / DATASET / 'processed' / f'clinical_{DATASET}.csv' ) target_column = "T-stage_grouped" # %% np.random.seed(SEED) dataset_train = NumpyCSVDataset( data_dir=DATASET_DIR, clinical_file=clinical_file, label_col=target_column, size=SIZE, mode='train', seed=SEED, ) dataset_test = NumpyCSVDataset( data_dir=DATASET_DIR, clinical_file=clinical_file, label_col=target_column, size=SIZE, mode='test', seed=SEED, ) # %% # Create train-test datasets if SPLIT == "vallieres": idx_train = [ i for i, f in enumerate(dataset_train.patients) if f.split("-")[1] in ["CHUS", "HGJ"] ] idx_test = [ i for i, f in enumerate(dataset_test.patients) if f.split("-")[1] in ["HMR", "CHUM"] ] else: idx_train, idx_test = train_test_indexes_patient_wise( dataset_train, test_size=K, stratify=True ) dataset_train.indices = np.array(idx_train) dataset_test.indices = np.array(idx_test) # %% # Create loaders loader_train = DataLoader( dataset_train, batch_size=BATCH_SIZE, num_workers=12, pin_memory=True, shuffle=True ) loader_test = DataLoader( dataset_test, batch_size=BATCH_SIZE, num_workers=12, shuffle=False ) # %% # Compute weights only on training set labels_train = dataset_train.labels _, class_sample_count = np.unique(labels_train, return_counts=True) n_max = np.max(class_sample_count) weights = n_max / class_sample_count weights = torch.Tensor(weights).to(device) # %% # ### Initialize Model model = MODEL(n_classes=N_CLASSES, n_channels=2, modality="CT/PET", dropout=DROPOUT) if multigpu: model = nn.DataParallel(model.to(device)) model = model.module # %% # model.initialize_weights() if PRETRAINED == "Med3D": pretrained_dict = torch.load(PRETRAINED_MED3D_WEIGHTS)["state_dict"] model_dict = model.state_dict() # discard layers not present in destination network or with different shape pretrained_dict = { k: v for k, v in pretrained_dict.items() if (k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape) } for name in model.state_dict().keys(): if name in pretrained_dict.keys(): # print(name) model.state_dict()[name].copy_(pretrained_dict[name]) elif PRETRAINED == "branch-wise": pretrained_CT_dict = torch.load( EXPERIMENT_DIR / 'Tstage_grouped_noTx_CT_valieres_20191029-173736' / 'checkpoint_290.pth' ) pretrained_PT_dict = torch.load( EXPERIMENT_DIR / 'Tstage_grouped_noTx_PET_valieres_20191029-195338' / 'checkpoint_290.pth' ) model_dict = model.state_dict() pretrained_CT_dict = { k: v for k, v in pretrained_CT_dict.items() if (k in model_dict) and (model_dict[k].shape == pretrained_CT_dict[k].shape) } pretrained_PT_dict = { k: v for k, v in pretrained_PT_dict.items() if (k in model_dict) and (model_dict[k].shape == pretrained_PT_dict[k].shape) } to_add = "module." if multigpu else "" for name in model.CT_branch.state_dict().keys(): name_complete = to_add + "CT_branch." + name # print(name_complete) if name_complete in pretrained_CT_dict.keys(): print(name) model.CT_branch.state_dict()[name].copy_(pretrained_CT_dict[name_complete]) for name in model.PT_branch.state_dict().keys(): name_complete = to_add + "PT_branch." + name # print(name_complete) if name_complete in pretrained_PT_dict.keys(): print(name) model.PT_branch.state_dict()[name].copy_(pretrained_PT_dict[name_complete]) elif PRETRAINED == "T-stage": pretrained_dict = torch.load(PRETRAINED_T_STAGE) model_dict = model.state_dict() # discard layers not present in destination network or with different shape pretrained_dict = { k: v for k, v in pretrained_dict.items() if (k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape) } for name in model.state_dict().keys(): if name in pretrained_dict.keys(): # print(name) model.state_dict()[name].copy_(pretrained_dict[name]) # %% # Optimizer and criterion optimizer = OPTIMIZER(model.parameters(), lr=LR) criterion = nn.CrossEntropyLoss(weight=weights) # %% # ### Train model.train() # Set model to training mode global_i = 0 losses_tr = [] losses_ts = [] last_loss_test = -1 iteration = 0 start_time = time.time() for epoch in range(EPOCHS): # print(epoch) if epoch % 10 == 0: # save checkpoint torch.save( model.state_dict(), EXPERIMENT_DIR / EXPERIMENT_NAME / f'checkpoint_{epoch}.pth', ) for j, data in enumerate(loader_train): global_i += 1 if j % 10 == 0: print(time.time() - start_time) start_time = time.time() optimizer.zero_grad() images_tr = data["data"].to(device) labels_tr = torch.LongTensor(data["target"]).to(device) outputs_tr = model(images_tr).to(device) # backward loss = criterion(outputs_tr, labels_tr) loss.backward() optimizer.step() # check test set if j % int(len(loader_train) / 2) == 0 and j != 0: model.eval() with torch.no_grad(): losses_sum = 0 num_samples_test = 0 for data_test in loader_test: images_ts = data_test["data"].to(device) labels_ts = torch.LongTensor(data_test["target"]).to(device) outputs_ts = model.forward(images_ts) loss_test_sum = criterion(outputs_ts, labels_ts).item() losses_sum += loss_test_sum num_samples_test += 1 loss_test_avg = losses_sum / num_samples_test writer.add_scalar( f"{EXPERIMENT_NAME}/test_loss", loss_test_avg, global_i ) writer.flush() # TODO: fix best model check # is_best = loss_val_avg < last_loss_val # if is_best: # torch.save(model.state_dict(), # f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_best_{epoch}.pth') last_loss_test = loss_test_avg losses_tr.append(loss.item()) losses_ts.append(loss_test_avg) del images_ts, labels_ts iteration += 1 del images_tr, labels_tr gc.collect() model.train() # sys.stdout.write writer.add_scalar(f"{EXPERIMENT_NAME}/train_loss", loss.item(), global_i) writer.flush() sys.stdout.write( "\r Epoch {} of {} [{:.2f}%] - loss TR/TS: {:.4f} / {:.4f} - {}".format( epoch + 1, EPOCHS, 100 * j / len(loader_train), loss.item(), last_loss_test, optimizer.param_groups[0]["lr"], ) ) # %% ### Predict on Train model.eval() dataset_train.mode = "test" # no augmentation preds_tr = [] trues_tr = [] probs_tr = [] filenames_tr = [] with torch.no_grad(): for data in dataset_train: image = data["data"].unsqueeze(0).to(device) label = data["target"] output = model(image) # forward _, pred = torch.max(output, 1) preds_tr.append(pred.data.cpu().numpy()) # trues.append(label) trues_tr.append(label) probs_tr.append(output.data.cpu().numpy()) filenames_tr.append(data["filename"]) probs_tr = np.concatenate(probs_tr) preds_tr = np.concatenate(preds_tr) trues_tr = np.array(trues_tr) filenames_tr = np.array(filenames_tr) MCC_tr = mcor(trues_tr, preds_tr) ACC_tr = acc(trues_tr, preds_tr) prec_tr = precision(trues_tr, preds_tr, average="weighted") rec_tr = recall(trues_tr, preds_tr, average="weighted") print("MCC train", round(MCC_tr, 3), "ACC train", round(ACC_tr, 3)) print("precision train", round(prec_tr, 3), "recall train", round(rec_tr, 3)) train_metrics = [ round(MCC_tr, 3), round(ACC_tr, 3), round(prec_tr, 3), round(rec_tr, 3), ] # %% # ### Predict on Test model.eval() preds_ts = [] trues_ts = [] probs_ts = [] filenames_ts = [] with torch.no_grad(): for data in dataset_test: image = data["data"].unsqueeze(0).to(device) label = data["target"] output = model(image) # forward _, pred = torch.max(output, 1) preds_ts.append(pred.data.cpu().numpy()) trues_ts.append(label) probs_ts.append(output.data.cpu().numpy()) filenames_ts.append(data["filename"]) probs_ts = np.concatenate(probs_ts) preds_ts = np.concatenate(preds_ts) trues_ts = np.array(trues_ts) filenames_ts = np.array(filenames_ts) MCC_ts = mcor(trues_ts, preds_ts) ACC_ts = acc(trues_ts, preds_ts) prec_ts = precision(trues_ts, preds_ts, average="weighted") rec_ts = recall(trues_ts, preds_ts, average="weighted") print("MCC test", round(MCC_ts, 3), "ACC test", round(ACC_ts, 3)) print("precision test", round(prec_ts, 3), "recall test", round(rec_ts, 3)) test_metrics = [round(MCC_ts, 3), round(ACC_ts, 3), round(prec_ts, 3), round(rec_ts, 3)] # %% # ## Save results # Save settings with open(EXPERIMENT_DIR / EXPERIMENT_NAME / 'settings.pkl', 'wb') as f: pickle.dump(settings, f, pickle.HIGHEST_PROTOCOL) # Save losses losses_tr = np.array(losses_tr) losses_vl = np.array(losses_ts) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'losses_tr.npy', losses_tr) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'losses_ts.npy', losses_vl) # %% # Plot losses plt.figure(figsize=(20, 10)) plt.plot(losses_tr, color="blue") plt.plot(losses_ts, color="orange") plt.legend(["train", "valid"]) plt.savefig(EXPERIMENT_DIR / EXPERIMENT_NAME / 'losses.png', close=True, verbose=True) plt.close() # %% # Save predictions, ground truth, probabilities and filenames np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'preds_tr.npy', preds_tr) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'trues_tr.npy', trues_tr) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'probs_tr.npy', probs_tr) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'filenames_tr.npy', filenames_tr) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'preds_ts.npy', preds_ts) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'trues_ts.npy', trues_ts) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'probs_ts.npy', probs_ts) np.save(EXPERIMENT_DIR / EXPERIMENT_NAME / 'filenames_ts.npy', filenames_ts) # %% # Save metrics metrics_out = pd.DataFrame( (train_metrics, test_metrics), columns=["MCC", "ACC", "prec", "rec"], index=["train", "test"], ) metrics_out.to_csv(EXPERIMENT_DIR / EXPERIMENT_NAME / 'metrics_out.csv') # Save model weights torch.save(model.state_dict(), EXPERIMENT_DIR / EXPERIMENT_NAME / 'weights.pth') # %%