Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
0bd7b229
Commit
0bd7b229
authored
Jan 15, 2020
by
Alessia Marcolini
Browse files
Remove unused files
parent
8a2ddba5
Changes
20
Expand all
Hide whitespace changes
Inline
Side-by-side
cloning.ipynb
deleted
100755 → 0
View file @
8a2ddba5
%% Cell type:code id: tags:
```
python
import
os
PATH
=
os
.
getcwd
()
import
sys
import
numpy
as
np
import
pandas
as
pd
import
SimpleITK
as
sitk
from
dicom_utils
import
augmentation
as
aug
,
processing
as
dup
from
dataset
import
NumpyCSVDataset
#%%
```
%% Cell type:code id: tags:
```
python
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
"0,1"
```
%% Cell type:code id: tags:
```
python
SIZE
=
64
DATASETDIR
=
f
"
{
PATH
}
/data/processed/bbox_fixed2_64_TRAIN"
#PRIMA SOLUZIONE
#OUTPUT_DIR = f'{PATH}/data/processed/bbox_fixed2_64_TRAIN_cloned'
#os.makedirs( f'{PATH}/data/processed/bbox_fixed2_64_TRAIN_cloned' , exist_ok=False)
#SECONDA SOLUZIONE
OUTPUT_DIR
=
f
'
{
PATH
}
/data/processed/bbox_fixed2_64_TRAIN_buildingclones'
#os.makedirs( f'{PATH}/data/processed/bbox_fixed2_64_TRAIN_buildingclones' , exist_ok=False)
dataset
=
NumpyCSVDataset
(
DATASETDIR
,
f
"
{
PATH
}
/data/labels.csv"
,
"Locoregional"
,
SIZE
,
mode
=
"test"
)
```
%% Cell type:markdown id: tags:
### Soluzione
%% Cell type:code id: tags:
```
python
# L'idea e' copiare i file negativi solamente una volta, mentre copiare i file "positivi" piu' volte,
# in modo da creare vari cloni dei file positivi
# Per far questo:
# 1. quando trovi un file positivo devi ripetere K volte questa operazione di cloning
# dove K e' il rapporto di sbilanciamento n_NEGATIVI / n_POSITIVI
# 2. devi aggiungere al nome del soggetto un suffisso,
# perche' se crei tante copie dello stesso file il nome deve essere diverso, altrimenti sovrascrivi e siamo al punto di prima
# occhio alla differenza tra:
# > sample: e' un oggetto di tipo dizionario che viene restituito da NumpyCSVDataset.
# contiene: sample['sample']: il nome del soggetto
# sample['data']: la matrice (3D) di numpy che contiene l'immagine 3D, ovvero i valori di grigio di ogni pixel
# sample['target']: la label (0 o 1)
# > file XXX.npy: file che salva in formato binario l'immagine 3D (= sample['data'])
# > dataset: oggetto di tipo NumpyCSVDataset: praticamente e' una lista di dizionari (vedi sample) un dizionario per ogni file in DATASETDIR
# Praticamente: e' la classe NumpyCSVDataset che si 'arrangia' a caricare i file .npy dalla classe DATASETDIR,
# capire il nome del soggetto (dal nome del file),
# cercare la label del soggetto (nel file labels.csv)
# e associare il tutto in un dizionario che poi viene messo in dataset
#TROVO K:
labels
=
dataset
.
_labels
.
values
idx_positive
=
np
.
where
(
labels
==
1
)[
0
]
K
=
int
((
len
(
labels
)
-
len
(
idx_positive
))
/
len
(
idx_positive
))
for
i
in
range
(
len
(
dataset
)):
sample
=
dataset
[
i
]
SUB
=
sample
[
'sample'
]
print
(
SUB
)
image_orig
=
sample
[
'data'
]
label
=
sample
[
'target'
]
ratio
=
1
if
label
==
0
else
K
# se negativo copia una volta (ratio=1), altrimenti copia K volte (ratio=K)
# CREO UN CONTATORE DA USARE COME SUFFISSO
id_image
=
0
for
j
in
range
(
ratio
):
# image_aug = augment_3D(image_orig, 'train', SIZE) # Questo passaggio non lo faccio perche' non voglio fare augmentation
np
.
save
(
f
'
{
OUTPUT_DIR
}
/
{
SUB
}
_
{
id_image
}
.npy'
,
image_orig
)
# nel nome del file ho inserito il suffisso
id_image
+=
1
```
%% Cell type:markdown id: tags:
## ================
%% Cell type:markdown id: tags:
PRIMA SOLUZIONE
%% Cell type:markdown id: tags:
Copy each file from one folder to another
%% Cell type:code id: tags:
```
python
# questo e' sbagliato perche' 'sample' e' un dizionario che contiene, tra le varie cose la variabile numpy con dentro l'immagine
# image = sample['data'], dovresti salvare quella: np.save(f'{OUTPUT_DIR}/{subject}.npy', IMAGE )
for
i
in
range
(
len
(
dataset
)):
sample
=
dataset
[
i
]
subject
=
sample
[
'sample'
]
np
.
save
(
f
'
{
OUTPUT_DIR
}
/
{
subject
}
.npy'
,
sample
)
```
%% Cell type:markdown id: tags:
Clone 1-times only positive samples
%% Cell type:code id: tags:
```
python
labels
=
dataset
.
_labels
.
values
idx_positive
=
np
.
where
(
labels
==
1
)[
0
]
for
j
in
range
(
len
(
idx_positive
)
):
sample
=
dataset
[
idx_positive
[
j
]
]
image_orig
=
sample
[
'data'
]
subject
=
sample
[
'sample'
]
sample
[
'target'
]
=
int
(
sample
[
'target'
]
)
#QUesta e' giusta
np
.
save
(
f
'
{
OUTPUT_DIR
}
/
{
subject
}
_cl3.npy'
,
image_orig
)
# questa e' sbagliata (stesso errore che hai fatto sopra).
# siccome usi lo stesso nome del file, il fatto che prima tu abbia salvato il file correttamente diventa inutile
np
.
save
(
f
'
{
OUTPUT_DIR
}
/
{
subject
}
_cl3.npy'
,
sample
)
```
%% Cell type:markdown id: tags:
SECONDA SOLUZIONE
%% Cell type:markdown id: tags:
"building" new files
%% Cell type:code id: tags:
```
python
for
i
in
range
(
len
(
dataset
)):
it
=
dataset
[
i
].
items
()
it
=
list
(
it
)
sample
=
sample
=
dataset
[
i
]
subject
=
sample
[
'sample'
]
data
=
it
[
0
]
target
=
it
[
1
]
sample
=
it
[
2
]
dic
=
{
data
,
target
,
sample
}
#sbagliato, stai salvando un dizionario,come sopra
np
.
save
(
f
'
{
OUTPUT_DIR
}
/
{
subject
}
.npy'
,
dic
)
```
%% Cell type:markdown id: tags:
CHECK SOLUZIONI
%% Cell type:code id: tags:
```
python
#newdataset = NumpyCSVDataset( f'{PATH}/data/processed/bbox_fixed2_64_TRAIN_cloned', f"{PATH}/data/labels.csv" , "Locoregional", SIZE , mode="test")
newdataset
=
NumpyCSVDataset
(
f
'
{
PATH
}
/data/processed/bbox_fixed2_64_TRAIN_buildingclones'
,
f
"
{
PATH
}
/data/labels.csv"
,
"Locoregional"
,
SIZE
,
mode
=
"test"
)
```
%% Cell type:code id: tags:
```
python
idx_positive
[
0
]
#5
dataset
[
5
][
'target'
]
```
%% Cell type:code id: tags:
```
python
newdataset
[
5
][
'target'
]
```
%%%% Output: error
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-9-1c6cfa41156c> in <module>()
----> 1 newdataset[ 5 ]['target']
~/projects/networks_dami/dataset.py in __getitem__(self, idx, no_data)
46 data = np.load(data_file)
47
---> 48 data = self.augmentation(data, self.mode, self.size) #qst è numpy
49 data = torch.Tensor(data) #qst è tensor - la network vuole tensor
50 output = {'data': data, 'target': label, 'sample': sample}
~/projects/networks_dami/dataset.py in augment_3D(image, mode, size)
8
9 def augment_3D(image, mode, size):
---> 10 N_CHANNELS = image.shape[0]
11 image_seq = [sitk.GetImageFromArray(image[i,:,:,:]) for i in range(N_CHANNELS)]
12
IndexError: tuple index out of range
%% Cell type:code id: tags:
```
python
cloned
=
NumpyCSVDataset
(
f
'
{
PATH
}
/data/processed/bbox_fixed2_64_TRAIN_cloned/bla'
,
f
"
{
PATH
}
/data/labels.csv"
,
"Locoregional"
,
SIZE
,
mode
=
"test"
)
```
%% Cell type:code id: tags:
```
python
cloned
.
size
```
%% Cell type:markdown id: tags:
DA COMMAND LINE:
%% Cell type:code id: tags:
```
python
'''
dsalvalai@Starscream:~$ cp -r bbox_fixed2_64_TRAIN bbox_fixed2_64_TRAIN_cloned
dsalvalai@Starscream:~$ cp HN-CHUS-003.npy HN-CHUS-003_cl1.npy
'''
```
compute_metrics.py
deleted
100755 → 0
View file @
8a2ddba5
import
numpy
as
np
import
pandas
as
pd
from
sklearn.metrics
import
classification_report
,
accuracy_score
as
acc
,
precision_score
as
prec
,
recall_score
as
rec
,
matthews_corrcoef
as
mcc
from
sklearn.metrics
import
confusion_matrix
#%%
y
=
pd
.
read_csv
(
'/home/bizzego/Downloads/TT/TT_merged_features_B_UNCORR_selected_SVMlinear/predictions_test.csv'
)
y_true
=
y
[
'true'
].
values
y_pred
=
y
[
'pred'
].
values
#%%
print
(
classification_report
(
y_true
,
y_pred
))
print
(
acc
(
y_true
,
y_pred
),
mcc
(
y_true
,
y_pred
))
extract_features.py
deleted
100644 → 0
View file @
8a2ddba5
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
from
IPython
import
get_ipython
# %% [markdown]
# ## Deep features extraction
# %%
get_ipython
().
run_line_magic
(
'HN_env'
,
''
)
# %%
import
os
PATH
=
os
.
path
.
abspath
(
os
.
path
.
curdir
)
# %%
get_ipython
().
run_line_magic
(
'reload_ext'
,
'autoreload'
)
get_ipython
().
run_line_magic
(
'autoreload'
,
'2'
)
# %% [markdown]
# ### Import
# %%
import
os
import
sys
from
tqdm
import
tqdm
import
numpy
as
np
import
pandas
as
pd
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
'0'
import
torch
import
torch.nn
as
nn
from
torch.utils.data
import
DataLoader
from
networks
import
CiompiDO
from
dataset
import
NumpyCSVDataset
,
augment_3D_HN
# %%
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
multigpu
=
True
# %%
DATASETDIR
=
'/thunderdisk/HN/processed/bbox_64_augmented_LR'
EXPERIMENT_DIR
=
f
'
{
PATH
}
/experiments'
# %%
MODEL_NAME
=
'LR_noTx_branch_wise_free_aug_CT_20191027-124913'
SIZE
=
64
OUTDIR
=
f
'
{
EXPERIMENT_DIR
}
/
{
MODEL_NAME
}
/features/'
OUTFILE
=
'features_noTx_AUG.csv'
os
.
makedirs
(
OUTDIR
,
exist_ok
=
True
)
# %%
dataset
=
NumpyCSVDataset
(
DATASETDIR
,
f
'
{
PATH
}
/data/clinical_data_noTx.csv'
,
'Locoregional'
,
SIZE
,
mode
=
'test'
)
loader
=
DataLoader
(
dataset
,
batch_size
=
8
,
num_workers
=
12
,
pin_memory
=
True
,
shuffle
=
False
,
drop_last
=
False
)
model_weights
=
f
'
{
EXPERIMENT_DIR
}
/
{
MODEL_NAME
}
/weights.pth'
# %%
model
=
CiompiDO
(
n_classes
=
2
,
n_channels
=
1
,
modality
=
'CT'
)
if
multigpu
:
model
=
nn
.
DataParallel
(
model
.
to
(
device
))
model
=
model
.
module
model
.
load_state_dict
(
torch
.
load
(
model_weights
))
# %%
#%%
deep_features
=
[]
sample_names
=
[]
labels
=
[]
patients
=
[]
with
torch
.
no_grad
():
for
batch
in
tqdm
(
loader
):
names_batch
=
[
name
.
split
(
'.'
)[
0
]
for
name
in
batch
[
'filename'
]]
images_batch
=
batch
[
'data'
].
to
(
device
)
labels_batch
=
batch
[
'target'
]
patients_batch
=
batch
[
'patient'
]
out
=
model
.
extract_features
(
images_batch
.
cuda
())
deep_features
.
append
(
out
.
data
.
cpu
().
numpy
())
sample_names
.
append
(
names_batch
)
labels
.
append
(
labels_batch
)
patients
.
append
(
patients_batch
)
# %%
deep_features
=
np
.
concatenate
(
deep_features
)
sample_names
=
np
.
concatenate
(
sample_names
)
labels
=
np
.
concatenate
(
labels
)
patients
=
np
.
concatenate
(
patients
)
# %%
len
(
labels
)
# %%
print
(
deep_features
.
shape
,
len
(
sample_names
),
len
(
labels
))
# %%
deep_features_pd
=
pd
.
DataFrame
(
deep_features
,
index
=
sample_names
)
deep_features_pd
[
'label'
]
=
labels
deep_features_pd
[
'patient'
]
=
patients
#%% SAVE
print
(
deep_features_pd
.
shape
)
deep_features_pd
.
to_csv
(
f
'
{
OUTDIR
}
/
{
OUTFILE
}
'
)
# %%
keep_GPU_alive.ipynb
deleted
100755 → 0
View file @
8a2ddba5
%% Cell type:code id: tags:
```
python
import
numpy
as
np
import
torch
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
```
%% Cell type:code id: tags:
```
python
n
=
torch
.
tensor
([
1
]).
to
(
device
)
```
%% Cell type:code id: tags:
```
python
```
multiCto01_128px.ipynb
deleted
100755 → 0
View file @
8a2ddba5
%% Cell type:markdown id: tags:
### Set Path
%% Cell type:code id: tags:
```
python
%
reload_ext
autoreload
%
autoreload
2
#PATH = '/home/dsalvalai/projects/networks_dami'
import
os
PATH
=
os
.
getcwd
()
print
(
PATH
)
```
%%%% Output: stream
/home/dsalvalai/projects/networks_dami
%% Cell type:markdown id: tags:
TRANSFER LEARNING EXPERIMENT RESPECTING COHORTS .
NO AUGMENTATION ON DATA & NEITHER CLONING POSITIVE DATA .
TRAINING COHORTS : HGJ & CHUS
%% Cell type:markdown id: tags:
### Import packages
%% Cell type:code id: tags:
```
python
import
sys
import
torch
import
pickle
from
torch.utils.data
import
DataLoader
import
torch.nn
as
nn
import
numpy
as
np
import
os
from
networks
import
Ciompi
from
dataset
import
NumpyCSVDataset
from
sklearn.metrics
import
matthews_corrcoef
as
mcor
,
accuracy_score
as
acc
,
recall_score
as
recall
,
precision_score
as
precision
```
%% Cell type:code id: tags:
```
python
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
"0,1"
#os.environ["CUDA_VISIBLE_DEVICES"] = "2,3"
device
=
torch
.
device
(
"cuda"
if
torch
.
cuda
.
is_available
()
else
"cpu"
)
```
%% Cell type:code id: tags:
```
python
DATASETDIR
=
f
"
{
PATH
}
/data/processed/bbox_fixed2_64_TRAIN"
# fine tuning
#DATASETDIR = f"{PATH}/data/processed/bbox_fixed2_64_TEST" # prediction
EXPERIMENT_DIR
=
f
"
{
PATH
}
/experiments"
```
%% Cell type:markdown id: tags:
### Settings
%% Cell type:code id: tags:
```
python
EXPERIMENT_NAME
=
'multiCto01'
settings
=
{
'model'
:
Ciompi
,
'batch_size'
:
32
,
'lr'
:
1e-4
,
'epochs'
:
100
,
'optim'
:
torch
.
optim
.
Adam
,
'K'
:
0.25
,
'n_classes'
:
2
,
'seed'
:
1234
}
#os.makedirs(f'{EXPERIMENT_DIR}/{EXPERIMENT_NAME}', exist_ok=False)
```
%% Cell type:code id: tags:
```
python
MODEL
=
settings
[
'model'
]
BATCH_SIZE
=
settings
[
'batch_size'
]
LR
=
settings
[
'lr'
]
EPOCHS
=
settings
[
'epochs'
]
OPTIMIZER
=
settings
[
'optim'
]
K
=
settings
[
'K'
]
N_CLASSES
=
settings
[
'n_classes'
]
SEED
=
settings
[
'seed'
]
```
%% Cell type:markdown id: tags:
### Data Handlers
%% Cell type:markdown id: tags:
Train-Test split indexes
%% Cell type:code id: tags:
```
python
np
.
random
.
seed
(
SEED
)
n_samples
=
len
(
os
.
listdir
(
DATASETDIR
))
indexes
=
np
.
arange
(
n_samples
)
np
.
random
.
shuffle
(
indexes
)
k_idx
=
int
(
K
*
n_samples
)
idx_test
=
indexes
[:
k_idx
]
idx_train
=
indexes
[
k_idx
:]
```
%% Cell type:markdown id: tags:
Create train-test datasets
%% Cell type:code id: tags:
```
python
dataset_test
=
NumpyCSVDataset
(
DATASETDIR
,
f
"
{
PATH
}
/data/labels.csv"
,
"Locoregional"
,
64
,
mode
=
"test"
)
dataset_test
.
_indexes
=
idx_test
dataset_train
=
NumpyCSVDataset
(
DATASETDIR
,
f
"
{
PATH
}
/data/labels.csv"
,
"Locoregional"
,
64
,
mode
=
"train"
)
dataset_train
.
_indexes
=
idx_train
```
%% Cell type:markdown id: tags:
Create loaders
%% Cell type:code id: tags:
```
python
loader_test
=
DataLoader
(
dataset_test
,
batch_size
=
int
(
BATCH_SIZE
/
2
),
num_workers
=
1
,
shuffle
=
True
)
loader_train
=
DataLoader
(
dataset_train
,
batch_size
=
BATCH_SIZE
,
num_workers
=
24
,
pin_memory
=
True
,
shuffle
=
True
)
```
%% Cell type:markdown id: tags:
Compute weights
%% Cell type:code id: tags:
```
python
labels
=
dataset_train
.
get_labels
()
weights
=
[
1
,
(
len
(
labels
)
-
np
.
sum
(
labels
))
/
np
.
sum
(
labels
)]
# [1, 1] #
print
(
weights
)
settings
[
'weights'
]
=
weights
weights
=
torch
.
Tensor
(
weights
).
to
(
device
)
```
%%%% Output: stream
[1, 5.545454545454546]
%% Cell type:markdown id: tags:
### Initialize
%% Cell type:markdown id: tags:
Model
%% Cell type:code id: tags:
```
python
model
=
MODEL
()
state_dict
=
torch
.
load
(
f
'
{
EXPERIMENT_DIR
}
/cohortsTstage-augm/weights.pth'
)
state_dict
[
'linear.2.weight'
]
=
state_dict
[
'linear.2.weight'
][
0
:
2
]
state_dict
[
'linear.2.bias'
]
=
state_dict
[
'linear.2.bias'
][
0
:
2
]
model
.
load_state_dict
(
state_dict
)
#fine tuning
model
.
to
(
device
)
```
%%%% Output: execute_result
Ciompi(
(CT_branch): Sequential(
(0): BatchNorm3d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Conv3d(1, 32, kernel_size=(5, 5, 5), stride=(1, 1, 1))
(2): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(6): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(8): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU()
(10): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(11): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(12): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(13): ReLU()
(14): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(15): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(16): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(17): ReLU()
(18): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(19): AdaptiveAvgPool3d(output_size=1)
)
(PT_branch): Sequential(
(0): BatchNorm3d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Conv3d(1, 32, kernel_size=(5, 5, 5), stride=(1, 1, 1))
(2): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(6): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(8): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU()
(10): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(11): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1))
(12): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)