Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
55933f62
Commit
55933f62
authored
Nov 04, 2019
by
Alessia Marcolini
Browse files
Remove unused files
parent
da697be5
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
_trainingTstage-augm.ipynb
deleted
100755 → 0
View file @
da697be5
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set Path"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/dsalvalai/projects/networks_dami\n"
]
}
],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2\n",
"#PATH = '/home/dsalvalai/projects/networks_dami'\n",
"import os\n",
"PATH = os.getcwd()\n",
"print(PATH)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TRANSFER LEARNING from Tstage to LOCOREGIONAL EXPERIMENT RESPECTING COHORTS .\n",
"\n",
"***\n",
" WATCH OUT: you have to modify also networks.py ( n_class=4 ) instead of 2\n",
"***\n",
"\n",
"TRAINING COHORTS : HGJ & CHUS"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import packages"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import torch\n",
"import pickle\n",
"from torch.utils.data import DataLoader\n",
"import torch.nn as nn\n",
"import numpy as np\n",
"import os\n",
"\n",
"from networks import Ciompi \n",
"\n",
"from dataset import NumpyCSVDataset \n",
"from sklearn.metrics import matthews_corrcoef as mcor, accuracy_score as acc, recall_score as recall, precision_score as precision"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0,1\"\n",
"#os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"2,3\"\n",
"\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"DATASETDIR = f\"{PATH}/data/processed/bbox_fixed2_augmented\"\n",
"\n",
"EXPERIMENT_DIR = f\"{PATH}/experiments\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Settings"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"EXPERIMENT_NAME = 'cohortsTstage-augm' \n",
"\n",
"settings = {\n",
" 'model': Ciompi, \n",
" 'batch_size': 32,\n",
" 'lr': 1e-4,\n",
" 'epochs': 100,\n",
" 'optim': torch.optim.Adam,\n",
" 'K': 0.25,\n",
" 'n_classes': 4, #TSTAGE\n",
" 'seed': 1234\n",
" }\n",
"\n",
"os.makedirs(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}', exist_ok=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"MODEL = settings['model']\n",
"BATCH_SIZE = settings['batch_size']\n",
"LR = settings['lr']\n",
"EPOCHS = settings['epochs']\n",
"OPTIMIZER = settings['optim']\n",
"K = settings['K']\n",
"N_CLASSES = settings['n_classes']\n",
"SEED = settings['seed']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Data Handlers"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Train-Test split indexes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(SEED)\n",
"\n",
"n_samples = len(os.listdir(DATASETDIR))\n",
"indexes = np.arange(n_samples)\n",
"np.random.shuffle(indexes)\n",
"\n",
"k_idx = int(K*n_samples)\n",
"idx_test = indexes[:k_idx]\n",
"idx_train = indexes[k_idx:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create train-test datasets"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"dataset_test = NumpyCSVDataset(DATASETDIR , f\"{PATH}/data/labels.csv\" , \"T-stage\",64 , mode=\"test\")\n",
"dataset_test._indexes = idx_test\n",
"\n",
"dataset_train = NumpyCSVDataset(DATASETDIR , f\"{PATH}/data/labels.csv\" , \"T-stage\", 64 , mode=\"train\")\n",
"dataset_train._indexes = idx_train"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create loaders"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"loader_test = DataLoader(dataset_test, batch_size=int(BATCH_SIZE/2), num_workers=1, shuffle=True)\n",
"loader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=24, pin_memory=True, shuffle=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compute weights"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"labels = dataset_train.get_labels()\n",
"\n",
"class_sample_count = np.array([len(np.where( labels == t )[0]) for t in np.unique( labels )])\n",
"weights = 1. / class_sample_count # versione proporzionale\n",
"weights = torch.Tensor(weights).to(device)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialize"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model = MODEL()\n",
"\n",
"model.initialize_weights() \n",
"#model.load_state_dict(torch.load(f'{EXPERIMENT_DIR}/ ... /weights.pth')) \n",
"\n",
"model.to(device)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Optimizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"optimizer = torch.optim.Adam(model.parameters(), lr=LR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# [x.shape for x in model.parameters()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Loss"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"criterion = nn.CrossEntropyLoss( weight= weights )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.train()\n",
"\n",
"losses_tr = []\n",
"losses_ts = []\n",
"\n",
"last_loss_val = -1\n",
"iteration = 0\n",
"for epoch in range(EPOCHS):\n",
" if epoch % 10 == 0: #save checkpoint\n",
" torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/checkpoint_{epoch}.pth')\n",
" \n",
" for j, batch in enumerate(loader_train):\n",
" optimizer.zero_grad()\n",
" \n",
" image = batch[\"data\"].to(device)\n",
" label = batch[\"target\"].to(device)\n",
" output = model(image) #forward\n",
" loss = criterion(output, label) #compute loss\n",
" loss.backward() #backward\n",
" optimizer.step() #update weights\n",
" \n",
" # check loss on valid\n",
" if j % 2 == 0:\n",
" with torch.no_grad():\n",
" data_val = next(iter(loader_test))\n",
" images_vl = data_val['data'].to(device)\n",
" labels_vl = data_val['target'].to(device)\n",
" outputs_vl = model.forward(images_vl)\n",
" loss_val = criterion(outputs_vl,labels_vl).item()\n",
" last_loss_val = loss_val\n",
" \n",
" losses_tr.append(loss.item())\n",
" losses_ts.append(loss_val)\n",
" \n",
" sys.stdout.write('\\r Epoch {} of {} [{:.2f}%] - loss TR/TS: {:.4f} / {:.4f} - {}'.format(epoch+1, EPOCHS, 100*j/len(loader_train), \n",
" loss.item(), last_loss_val, optimizer.param_groups[0]['lr']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"plt.plot(losses_tr)\n",
"plt.plot(losses_ts)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.eval()\n",
"\n",
"preds = []\n",
"trues = []\n",
"probs = []\n",
"filenames = []\n",
"with torch.no_grad():\n",
" for j, batch in enumerate(loader_test):\n",
" image = batch[\"data\"].to(device)\n",
" label = batch[\"target\"].to(device)\n",
" output = model(image) #forward\n",
" _, pred = torch.max(output,1)\n",
" \n",
" preds.append(pred.data.cpu().numpy())\n",
" trues.append(label.data.cpu().numpy())\n",
" probs.append(output.data.cpu().numpy())\n",
" filenames.append(batch['sample'])\n",
"\n",
"probs = np.concatenate(probs)\n",
"preds = np.concatenate(preds)\n",
"trues = np.concatenate(trues)\n",
"filenames = np.concatenate(filenames)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"MCC = mcor(trues, preds)\n",
"ACC = acc(trues, preds)\n",
"precision = precision(trues, preds, average = 'weighted')\n",
"recall = recall(trues, preds ,average = 'weighted')\n",
"print(\"MCC\", MCC, \"ACC\", ACC)\n",
"print(\"precision\", precision, \"recall\", recall )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"preds"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trues"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Save results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save settings\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/settings.pkl', 'wb') as f:\n",
" pickle.dump(settings, f, pickle.HIGHEST_PROTOCOL)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save losses\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"losses_tr = np.array(losses_tr)\n",
"losses_vl = np.array(losses_ts)\n",
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/losses_tr.npy', losses_tr)\n",
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/losses_ts.npy', losses_vl)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Plot losses"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(20,10))\n",
"plt.plot(losses_tr, color='blue')\n",
"plt.plot(losses_ts, color='orange')\n",
"plt.legend(['train', 'valid'])\n",
"plt.savefig(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/losses.png', close=True, verbose=True)\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save predictions, ground truth, probabilities and filenames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/preds.npy', preds)\n",
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/trues.npy', trues)\n",
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/probs.npy', probs)\n",
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/filenames.npy', filenames)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"metrics = np.array([MCC, ACC, precision, recall])\n",
"np.save(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/metrics.npy', metrics) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save model weights"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"torch.save(model.state_dict(), f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}/weights.pth')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:markdown id: tags:
### Set Path
%% Cell type:code id: tags:
```
python
%
reload_ext
autoreload
%
autoreload
2
#PATH = '/home/dsalvalai/projects/networks_dami'
import
os
PATH
=
os
.
getcwd
()
print
(
PATH
)
```
%%%% Output: stream
/home/dsalvalai/projects/networks_dami
%% Cell type:markdown id: tags:
TRANSFER LEARNING from Tstage to LOCOREGIONAL EXPERIMENT RESPECTING COHORTS .
***
WATCH OUT: you have to modify also networks.py ( n_class=4 ) instead of 2
***
TRAINING COHORTS : HGJ & CHUS
%% Cell type:markdown id: tags:
### Import packages
%% Cell type:code id: tags:
```
python
import
sys
import
torch
import
pickle
from
torch.utils.data
import
DataLoader
import
torch.nn
as
nn
import
numpy
as
np
import
os
from
networks
import
Ciompi
from
dataset
import
NumpyCSVDataset
from
sklearn.metrics
import
matthews_corrcoef
as
mcor
,
accuracy_score
as
acc
,
recall_score
as
recall
,
precision_score
as
precision
```
%% Cell type:code id: tags:
```
python
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
"0,1"
#os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
```
%% Cell type:code id: tags:
```
python
DATASETDIR
=
f
"
{
PATH
}
/data/processed/bbox_fixed2_augmented"
EXPERIMENT_DIR
=
f
"
{
PATH
}
/experiments"
```
%% Cell type:markdown id: tags:
### Settings
%% Cell type:code id: tags:
```
python
EXPERIMENT_NAME
=
'cohortsTstage-augm'
settings
=
{
'model'
:
Ciompi
,
'batch_size'
:
32
,
'lr'
:
1e-4
,
'epochs'
:
100
,
'optim'
:
torch
.
optim
.
Adam
,
'K'
:
0.25
,
'n_classes'
:
4
,
#TSTAGE
'seed'
:
1234
}
os
.
makedirs
(
f
'
{
EXPERIMENT_DIR
}
/
{
EXPERIMENT_NAME
}
'
,
exist_ok
=
False
)
```
%% Cell type:code id: tags:
```
python
MODEL
=
settings
[
'model'
]
BATCH_SIZE
=
settings
[
'batch_size'
]
LR
=
settings
[
'lr'
]
EPOCHS
=
settings
[
'epochs'
]
OPTIMIZER
=
settings
[
'optim'
]
K
=
settings
[
'K'
]
N_CLASSES
=
settings
[
'n_classes'
]
SEED
=
settings
[
'seed'
]
```
%% Cell type:markdown id: tags:
### Data Handlers
%% Cell type:markdown id: tags:
Train-Test split indexes