Commit cdb08a2e authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Remove unused files

parent 0bd7b229
# IPython line magic to enable import and executions of `utils` in notebooks
#
# To enable this magic, please copy or link this file in your **startup** IPython folder
# Default Path: $HOME/.ipython/profile_default/startup
from IPython.core.magic import register_line_magic
from IPython.display import HTML as html_print
import os
import sys
@register_line_magic
def HN_env(line):
def set_path(path):
if not path:
path = '..'
curpath = os.path.abspath(os.path.curdir)
env = os.path.join(curpath, path)
sys.path += [env]
return f'<text style="color: green;">Success</text>'
return html_print(set_path(line))
preprocess.ipynb, Andrea, 10 Dec 2018
Errors:
['HN-CHUM-005', 'HN-HMR-017', 'HN-HMR-007']
Errors in SUV conversion
HN-CHUM-020
HN-CHUS-093
HN-HMR-001
\ No newline at end of file
%% Cell type:code id: tags:
``` python
#PATH = '/home/bizzego/UniTn/networks_dami'
PATH = '/media/damiana/DATA'
#PATH = '/media/damiana/Maxtor'
```
%% Cell type:code id: tags:
``` python
import os
import numpy as np
import SimpleITK as sitk
import pandas as pd
import dicom_utils.dicom_utils as du
import dicom_utils.dicom_utils.visualize as viz
import gc
def to_numpy(image):
return(sitk.GetArrayFromImage(image))
```
%% Cell type:code id: tags:
``` python
#%%
DATADIR = f'{PATH}/data/original' #Original data
OUTDIR = f'{PATH}/data/processed/bbox_fixed2/' #Destinatino of processed data (a folder for each patient will be created)
DIR_INFO_FILE = f'{PATH}/data/summary.csv' #where to find the info about the Original data folders to use
ROI_INFO_FILE = f'{PATH}/data/INFO_GTVcontours_HN.csv' # where to find the info about the name of the ROI
```
%% Cell type:code id: tags:
``` python
BOX_SIZE = 128 #mm == pixels for voxel size of 1mm3
VOXEL_SIZE = [1,1,1]
HALF_BOX = int(BOX_SIZE/2)
gaussian = sitk.SmoothingRecursiveGaussianImageFilter()
gaussian.SetSigma(2)
#%%
dir_info = pd.read_csv(DIR_INFO_FILE)
roi_info = pd.read_csv(ROI_INFO_FILE)
subjects = os.listdir(DATADIR)
errors = []
```
%% Cell type:code id: tags:
``` python
SUB = subjects[:4]
```
%% Cell type:code id: tags:
``` python
#%%
for SUB in subjects[:4]: #cambiare
#%%
print(SUB)
#%%
```
%%%% Output: stream
HN-CHUM-001
%% Cell type:code id: tags:
``` python
subjects
```
%%%% Output: execute_result
['HN-CHUM-001',
'HN-CHUM-002',
'HN-CHUM-003',
'HN-CHUM-004',
'HN-CHUM-005',
'HN-CHUM-006',
'HN-CHUM-007',
'HN-CHUM-008',
'HN-CHUM-009',
'HN-CHUM-010',
'HN-CHUM-011',
'HN-CHUM-012',
'HN-CHUM-013',
'HN-CHUM-014',
'HN-CHUM-015',
'HN-CHUM-016',
'HN-CHUM-017',
'HN-CHUM-018',
'HN-CHUM-019',
'HN-CHUM-021',
'HN-CHUM-022',
'HN-CHUM-023',
'HN-CHUM-024',
'HN-CHUM-025',
'HN-CHUM-026',
'HN-CHUM-027',
'HN-CHUM-028',
'HN-CHUM-029',
'HN-CHUM-030',
'HN-CHUM-031',
'HN-CHUM-032',
'HN-CHUM-033',
'HN-CHUM-034',
'HN-CHUM-035',
'HN-CHUM-036',
'HN-CHUM-037',
'HN-CHUM-038',
'HN-CHUM-039',
'HN-CHUM-041',
'HN-CHUM-042',
'HN-CHUM-043',
'HN-CHUM-044',
'HN-CHUM-045',
'HN-CHUM-046',
'HN-CHUM-047',
'HN-CHUM-048',
'HN-CHUM-049',
'HN-CHUM-050',
'HN-CHUM-051',
'HN-CHUM-052',
'HN-CHUM-053',
'HN-CHUM-054',
'HN-CHUM-055',
'HN-CHUM-056',
'HN-CHUM-057',
'HN-CHUM-058',
'HN-CHUM-059',
'HN-CHUM-061',
'HN-CHUM-062',
'HN-CHUM-063',
'HN-CHUM-064',
'HN-CHUM-065',
'HN-CHUS-001',
'HN-CHUS-002',
'HN-CHUS-003',
'HN-CHUS-004',
'HN-CHUS-005',
'HN-CHUS-006',
'HN-CHUS-007',
'HN-CHUS-008',
'HN-CHUS-009',
'HN-CHUS-010',
'HN-CHUS-011',
'HN-CHUS-012',
'HN-CHUS-013',
'HN-CHUS-014',
'HN-CHUM-020',
'HN-CHUM-040',
'HN-CHUM-060',
'HN-CHUS-015',
'HN-CHUS-016',
'HN-CHUS-017',
'HN-CHUS-018',
'HN-CHUS-019',
'HN-CHUS-020',
'HN-CHUS-021',
'HN-CHUS-022',
'HN-CHUS-023',
'HN-CHUS-024',
'HN-CHUS-025',
'HN-CHUS-026',
'HN-CHUS-027',
'HN-CHUS-028',
'HN-CHUS-029',
'HN-CHUS-030',
'HN-CHUS-031',
'HN-CHUS-032',
'HN-CHUS-033',
'HN-CHUS-034',
'HN-CHUS-035',
'HN-CHUS-036',
'HN-CHUS-037',
'HN-CHUS-038',
'HN-CHUS-039',
'HN-CHUS-040',
'HN-CHUS-041']
%% Cell type:code id: tags:
``` python
dir_info_sub = dir_info.query("subject == @SUB")
roi_name = roi_info.query('patient == @SUB')['roi_name'].values[0]
dir_CT = dir_info_sub.query("modality == 'CT'").dir.values[0]
dir_PT = dir_info_sub.query("modality == 'PT'").dir.values[0]
dir_RT = dir_info_sub.query("modality == 'RTSTRUCT'").dir.values[0]
scan_CT = du.load_series(os.path.join(DATADIR, dir_CT))
scan_PT = du.load_SUV(os.path.join(DATADIR, dir_PT))
#%%
if 'PETPET' in dir_RT:
segmentation = du.load_roi(os.path.join(DATADIR, dir_RT, '000000.dcm'), roi_name, scan_PT)
else:
segmentation = du.load_roi(os.path.join(DATADIR, dir_RT, '000000.dcm'), roi_name, scan_CT)
#%%
start_mm, stop_mm = du.get_bbox_vertices(segmentation)
center_mm = (np.array(stop_mm) + np.array(start_mm))/2
start_mm = center_mm - HALF_BOX - 5 #add margin to allow a better interpolation
stop_mm = center_mm + HALF_BOX + 5
scan_CT_box = du.extract_volume(scan_CT, start_mm, stop_mm)
#%%
#upsample and register
scan_CT_box = du.processing.resample(scan_CT_box, spacing=VOXEL_SIZE)
scan_PT_box = sitk.Resample(scan_PT, scan_CT_box)
#segmentation_box = sitk.Resample(segmentation, scan_CT_box)
#segmentation_box = gaussian.Execute(segmentation_box)
#out = segmentation_box>0.5
start_mm = start_mm + 5 #remove margin
stop_mm = stop_mm - 5 #remove margin
scan_CT_box = du.extract_volume(scan_CT_box, start_mm, stop_mm)
scan_PT_box = du.extract_volume(scan_PT_box, start_mm, stop_mm)
out = np.stack([to_numpy(scan_CT_box), to_numpy(scan_PT_box)], axis = 0)#, to_numpy(segmentation_box)], axis = 0)
#save
np.save(f'{OUTDIR}/{SUB}.npy', out)
del scan_CT_box, scan_PT_box, segmentation, out
gc.collect()
```
%%%% Output: execute_result
0
%% Cell type:code id: tags:
``` python
errors.append(SUB)
print(f'Error processing sub: {SUB}')
#%%
print(errors)
```
%%%% Output: stream
Error processing sub: HN-CHUM-001
['HN-CHUM-001', 'HN-CHUM-002', 'HN-CHUM-003', 'HN-CHUM-001', 'HN-CHUM-001']
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
### Set Path
%% Cell type:code id: tags:
``` python
%reload_ext autoreload
%autoreload 2
#PATH = '/home/dsalvalai/projects/networks_dami'
import os
PATH = os.getcwd()
print(PATH)
```
%% Cell type:markdown id: tags:
TRANSFER LEARNING EXPERIMENT RESPECTING COHORTS .
NO AUGMENTATION ON DATA .
"CLONING" POSITIVE SUBJECTS BY WeigtherRandomSampler
TRAINING COHORTS : HGJ & CHUS
%% Cell type:markdown id: tags:
weight = [0.001, 0.8] --> MCC = 0 ACC = 1 PREC = 1 REC = 1
%% Cell type:markdown id: tags:
weight = [0.01, 0.6] --> MCC = 0 ACC = 0.97
%% Cell type:markdown id: tags:
weight = [0.1, 0.6] --> MCC = -0.01 ACC = 0.95 PREC = ? REC = ?
%% Cell type:markdown id: tags:
weight = [0.1, 0.4] --> MCC = ? ACC = ? PREC = ? REC = ?
%% Cell type:markdown id: tags:
### Import packages
%% Cell type:code id: tags:
``` python
import sys
import torch
import pickle
from torch.utils.data import DataLoader
from torch.utils.data import WeightedRandomSampler
import torch.nn as nn
import numpy as np
import os
from networks import Ciompi
from dataset import NumpyCSVDataset
from sklearn.metrics import matthews_corrcoef as mcor, accuracy_score as acc, recall_score as recall, precision_score as precision
```
%% Cell type:code id: tags:
``` python
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
```
%% Cell type:code id: tags:
``` python
#DATASETDIR = f"{PATH}/data/processed/bbox_fixed2_64_TRAIN"
DATASETDIR = f"{PATH}/data/processed/bbox_fixed2_64_TEST"
EXPERIMENT_DIR = f"{PATH}/experiments"
```
%% Cell type:markdown id: tags:
### Settings
%% Cell type:code id: tags:
``` python
EXPERIMENT_NAME = 'cohortsWSampler'
settings = {
'model': Ciompi,
'batch_size': 32,
'lr': 1e-4,
'epochs': 100,
'optim': torch.optim.Adam,
'K': 0.5,
'n_classes': 2,
'seed': 1234
}
os.makedirs(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}', exist_ok=False)
```
%% Cell type:code id: tags:
``` python
MODEL = settings['model']
BATCH_SIZE = settings['batch_size']
LR = settings['lr']
EPOCHS = settings['epochs']
OPTIMIZER = settings['optim']
K = settings['K']
N_CLASSES = settings['n_classes']
SEED = settings['seed']
```
%% Cell type:markdown id: tags:
### Data Handlers
%% Cell type:markdown id: tags:
Train-Test split indexes
%% Cell type:code id: tags:
``` python
np.random.seed(SEED)
n_samples = len(os.listdir(DATASETDIR))
indexes = np.arange(n_samples)
np.random.shuffle(indexes)
k_idx = int(K*n_samples)
idx_test = indexes[:k_idx]
idx_train = indexes[k_idx:]
```
%% Cell type:markdown id: tags:
Create train-test datasets
%% Cell type:code id: tags:
``` python
dataset_test = NumpyCSVDataset(DATASETDIR , f"{PATH}/data/labels.csv" , "Locoregional",64 , mode="test")
dataset_test._indexes = idx_test
dataset_train = NumpyCSVDataset(DATASETDIR , f"{PATH}/data/labels.csv" , "Locoregional", 64 , mode="train")
dataset_train._indexes = idx_train
```
%% Cell type:markdown id: tags:
Compute weights
%% Cell type:code id: tags:
``` python
labels = dataset_train.get_labels()
weights = [1, (len(labels)-np.sum(labels))/np.sum(labels)]# [1, 1] #
settings['weights'] = weights
weights = torch.Tensor(weights).to(device)
```
%% Cell type:markdown id: tags:
Compute weights for umbanaced class (clone)
%% Cell type:code id: tags:
``` python
# TRAIN
targets_TR = np.arange( len(idx_train) )
for i in range(len(idx_train)):
targets_TR[i] = dataset_train[i]['target']
```
%% Cell type:code id: tags:
``` python
class_sample_count = np.array([len(np.where( targets_TR == t )[0]) for t in np.unique( targets_TR )])
#weight = 1. / class_sample_count # versione proporzionale
weight = [0.1, 0.4] # versione brutale
weights_umb_train = np.array([weight[t] for t in targets_TR ])
```
%% Cell type:code id: tags:
``` python
# TEST
targets_TS = np.arange( len(idx_test) )
for i in range(len(idx_test)):
targets_TS[i] = dataset_test[i]['target']
```
%% Cell type:code id: tags:
``` python
class_sample_count = np.array([len(np.where( targets_TS == t )[0]) for t in np.unique( targets_TS )])
#weight = 3. / class_sample_count # versione proporzionale
weight = [0.1, 0.4] # versione brutale
weights_umb_test = np.array([weight[t] for t in targets_TS ])
```
%% Cell type:markdown id: tags:
Create loaders
%% Cell type:code id: tags:
``` python
umb_sampler_test = WeightedRandomSampler( weights_umb_test , num_samples=len(idx_test) ,replacement=True) #unbalanced class
loader_test = DataLoader(dataset_test, batch_size=int(BATCH_SIZE/2), num_workers=1,sampler = umb_sampler_test)
umb_sampler_train = WeightedRandomSampler( weights_umb_train , num_samples=len(idx_train) ,replacement=True) #unbalanced class
loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=24, pin_memory=True, sampler = umb_sampler_train)
```
%% Cell type:markdown id: tags:
### Initialize
%% Cell type:markdown id: tags:
Model
%% Cell type:code id: tags:
``` python
model = MODEL()
model.initialize_weights() #transfer learning step1
#model.load_state_dict(torch.load(f'{EXPERIMENT_DIR}/ciompi_TL/weights.pth')) #transfer learning step2
model.to(device)
```
%% Cell type:markdown id: tags:
Optimizer
%% Cell type:code id: tags:
``` python
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
```
%% Cell type:code id: tags:
``` python
#[x.shape for x in model.parameters()]
```
%% Cell type:markdown id: tags:
Loss
%% Cell type:code id: tags:
``` python
criterion = nn.CrossEntropyLoss( weight= weights )
```
%% Cell type:markdown id: tags:
### Train
%% Cell type:code id: tags:
``` python
model.train()
losses_tr = []
losses_ts = []
last_loss_val = -1
iteration = 0
for epoch in range(EPOCHS):
if epoch % 10 == 0: #save checkpoint
torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_{epoch}.pth')
for j, batch in enumerate(loader_train):
optimizer.zero_grad()
image = batch["data"].to(device)
label = batch["target"].to(device)
output = model(image) #forward
loss = criterion(output, label) #compute loss
loss.backward() #backward
optimizer.step() #update weights
# check loss on valid
if j % 2 == 0:
with torch.no_grad():
data_val = next(iter(loader_test))
images_vl = data_val['data'].to(device)
labels_vl = data_val['target'].to(device)
outputs_vl = model.forward(images_vl)
loss_val = criterion(outputs_vl,labels_vl).item()
last_loss_val = loss_val
losses_tr.append(loss.item())
losses_ts.append(loss_val)
sys.stdout.write('\r Epoch {} of {} [{:.2f}%] - loss TR/TS: {:.4f} / {:.4f} - {}'.format(epoch+1, EPOCHS, 100*j/len(loader_train),
loss.item(), last_loss_val, optimizer.param_groups[0]['lr']))
```
%% Cell type:code id: tags:
``` python
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(losses_tr)
plt.plot(losses_ts)
```
%% Cell type:markdown id: tags:
### Test
%% Cell type:code id: tags:
``` python
model.eval()
preds = []
trues = []
probs = []
filenames = []