Commit a714181f authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Radiomics pipeline repo

parent 32ce2db1
.ipynb_checkpoints/
.project
.pydevproject
.spyproject
__pycache__
# Compiled python modules.
*.pyc
# Setuptools distribution folder.
/dist/
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
data
data/
results
results/
experiments/
experiments
PATH = '/home/bizzego/projects/networks_dami/radiomics_pipeline'
import numpy as np
import pandas as pd
#%%
FEATDIR = f'{PATH}/data/HN/features'
FILENAME = 'merged_features_B_UNCORR_selected'
FEATURE_FILE = f'{FEATDIR}/selected/{FILENAME}.csv'
features = pd.read_csv(FEATURE_FILE, index_col=0)
#%%
subjects = features.index.values
is_train = [x.split('-')[1] in ['HGJ', 'CHUS'] for x in subjects]
is_test = [x.split('-')[1] in ['HMR', 'CHUM'] for x in subjects]
features_train = features.iloc[is_train, :]
features_test = features.iloc[is_test, :]
#%%
features_train.to_csv(f'{FEATDIR}/selected_TT/{FILENAME}_TRAIN.csv')
features_test.to_csv(f'{FEATDIR}/selected_TT/{FILENAME}_TEST.csv')
PATH = '/home/bizzego/projects/radiomics_pipeline'
import numpy as np
import pandas as pd
import torch
from dap.datasets.numpydatasets import NumpyCSVDataset, augment_3D
from networks import CNN3D_NET
#%%
DATASET = 'OPBG'
LAB_COL = 'group'
MODEL_NAME = f'DAP_bbox_T2-FLAIR_{LAB_COL}_CNN3D'
SIZE = 64
#%%
DATADIR = f'{PATH}/data/{DATASET}/bbox_T2-FLAIR_noseg'
LABELS_FILE = f'{PATH}/data/{DATASET}/labels.csv'
OUTDIR = f'{PATH}/data/{DATASET}/features'
OUTFILE = 'deep_features.csv'
#%%
dataset = NumpyCSVDataset(DATADIR, LABELS_FILE, LAB_COL, 'multi', SIZE, augmentation_function=augment_3D, name = f'bbox_{LAB_COL}')
model_file = f'{PATH}/data/{DATASET}/predictions/{MODEL_NAME}/components/torch_model.th'
model = torch.load(model_file)
#%%
deep_features = []
sample_names = []
with torch.autograd.no_grad():
for i,sample in enumerate(dataset):
name = sample['sample']
image = sample['data']
image = image.unsqueeze(0)
out = model.extract_features(image)
deep_features.append(out.numpy())
sample_names.append(name)
deep_features = np.array(deep_features)
deep_features_pd = pd.DataFrame(deep_features[:,0,:], index = sample_names)
#%% SAVE
print(deep_features_pd.shape)
deep_features_pd.to_csv(f'{OUTDIR}/{OUTFILE}')
PATH = '/home/bizzego/projects/radiomics_pipeline'
import numpy as np
import pandas as pd
import torch
from dap.datasets.numpydatasets import NumpyCSVDataset, augment_3D
from networks import CNN3D_NET
#%%
model_file = f'{PATH}/data/BraTS/predictions/DAP_bbox_lab_3classes_CNN3D/components/torch_model.th'
DATASET = 'OPBG'
LAB_COL = 'group'
SIZE = 64
#%%
DATADIR = f'{PATH}/data/{DATASET}/bbox_T2-FLAIR_noseg'
LABELS_FILE = f'{PATH}/data/{DATASET}/labels.csv'
OUTDIR = f'{PATH}/data/{DATASET}/features'
OUTFILE = 'transfer_features_FLAIR.csv'
#%%
dataset = NumpyCSVDataset(DATADIR, LABELS_FILE, LAB_COL, 'multi', SIZE, augmentation_function=augment_3D, name = f'bbox_{LAB_COL}')
model = torch.load(model_file)
'''
#%%
deep_features = []
sample_names = []
with torch.autograd.no_grad():
for i,sample in enumerate(dataset):
name = sample['sample']
image = sample['data']
image = image.unsqueeze(0)
out_t2 = model.features_1(image[:,0,:,:,:].unsqueeze(1)).view(1, -1).numpy()
out_flair = model.features_3(image[:,1,:,:,:].unsqueeze(1)).view(1, -1).numpy()
out = np.hstack([out_t2, out_flair])
deep_features.append(out)
sample_names.append(name)
'''
deep_features = []
sample_names = []
with torch.autograd.no_grad():
for i,sample in enumerate(dataset):
name = sample['sample']
image = sample['data']
image = image.unsqueeze(0)
#out_t2 = model.features_1(image[:,0,:,:,:].unsqueeze(1)).view(1, -1).numpy()
out_flair = model.features_3(image[:,1,:,:,:].unsqueeze(1)).view(1, -1).numpy()
#out = np.hstack([out_t2, out_flair])
deep_features.append(out_flair)
sample_names.append(name)
deep_features = np.array(deep_features)
deep_features_pd = pd.DataFrame(deep_features[:,0,:], index = sample_names)
#%% SAVE
print(deep_features_pd.shape)
deep_features_pd.to_csv(f'{OUTDIR}/{OUTFILE}')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 5 11:31:52 2018
@author: andrea
"""
import torch.nn as nn
import torch
def get_conv3d_block(n_features, dropout):
block = nn.Sequential(nn.Conv3d(1, 20, 3, padding=1),
nn.BatchNorm3d(20),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout),
nn.Conv3d(20, 40, 3, padding=1),
nn.BatchNorm3d(40),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout/2),
nn.Conv3d(40, n_features, 3),
nn.BatchNorm3d(n_features),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout/4),
nn.AdaptiveAvgPool3d(1))
return(block)
def get_fully_connect(n_features, n_channels, n_classes):
block = nn.Sequential(nn.Linear(n_features*n_channels, 50),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(50, n_classes),
nn.Softmax(1))
return(block)
class CNN3D_NET(nn.Module):
def __init__(self, **params):
super(CNN3D_NET, self).__init__()
self.n_classes = params.get('n_classes', 2)
self.n_channels = params.get('n_channels', 1)
self.dropout = params.get('dropout', 0.5)
self.n_features = params.get('n_features', 80)
for ID_CH in range(self.n_channels):
setattr(self, f'features_{ID_CH}', get_conv3d_block(self.n_features, self.dropout))
self.linear = get_fully_connect(self.n_features, self.n_channels, self.n_classes)
def forward(self, x):
out_features = []
for ID_CH in range(self.n_channels):
x_channel = x[:, ID_CH, :,:,:].unsqueeze(1)
conv_block = getattr(self, f'features_{ID_CH}')
out_channel = conv_block(x_channel).view(x.shape[0], -1)
out_features.append(out_channel)
out_merged = torch.cat(out_features, dim=1)
out = self.linear(out_merged)
return(out)
def extract_features(self, x):
out_features = []
for ID_CH in range(self.n_channels):
x_channel = x[:, ID_CH, :,:,:].unsqueeze(1)
conv_block = getattr(self, f'features_{ID_CH}')
out_channel = conv_block(x_channel).view(x.shape[0], -1)
out_features.append(out_channel)
out_merged = torch.cat(out_features, dim=1)
return(out_merged)
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Conv3d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm3d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
'''
class FeatureNet(nn.Module):
def __init__(self, **params):
super(FeatureNet, self).__init__()
self.n_feat = params['n_feat'] if 'n_feat' in params else 1000
self.n_classes = params['n_classes'] if 'n_classes' in params else 2
self.n_hidden = params['n_hidden'] if 'n_hidden' in params else 1000
self.dropout = params['dropout'] if 'dropout' in params else 0.5
self.fc1 = nn.Sequential(nn.Linear(self.n_feat, self.n_hidden),
nn.Dropout(self.dropout),
nn.ReLU())
self.fc2 = nn.Sequential(nn.Linear(self.n_hidden, 100),
nn.Dropout(self.dropout/2),
nn.ReLU())
self.fc3 = nn.Sequential(nn.Linear(100, self.n_classes),
nn.Dropout(self.dropout/4),
nn.Softmax(dim=1))
def forward(self, x):
out = self.fc3(self.fc2(self.fc1(x)))
# print(self.get_params())
return(out)
def extract_features(self, x):
out = self.fc2(self.fc1(x))
return(out)
def get_params(self):
return({'n_classes':self.n_classes, 'n_hidden':self.n_hidden, 'dropout':self.dropout})
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
'''
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 5 11:31:52 2018
@author: andrea
"""
import torch.nn as nn
import torch
def get_conv3d_block(n_features, dropout):
block = nn.Sequential(nn.Conv3d(1, 20, 3),
nn.BatchNorm3d(20),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout),
nn.Conv3d(20, 40, 3),
nn.BatchNorm3d(40),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout/2),
nn.Conv3d(40, 80, 3),
nn.BatchNorm3d(80),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout/4),
nn.Conv3d(80, n_features, 3),
nn.BatchNorm3d(n_features),
nn.ReLU(True),
nn.MaxPool3d(2,stride=2),
nn.Dropout3d(dropout/4),
nn.AdaptiveAvgPool3d(1))
return(block)
def get_fully_connect(n_features, n_channels, n_classes):
block = nn.Sequential(nn.Linear(n_features*n_channels, 50),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(50, n_classes),
nn.Softmax(1))
return(block)
class CNN3D_NET(nn.Module):
def __init__(self, **params):
super(CNN3D_NET, self).__init__()
self.n_classes = params.get('n_classes', 2)
self.n_channels = params.get('n_channels', 1)
self.dropout = params.get('dropout', 0.5)
self.n_features = params.get('n_features', 160)
for ID_CH in range(self.n_channels):
setattr(self, f'features_{ID_CH}', get_conv3d_block(self.n_features, self.dropout))
self.linear = get_fully_connect(self.n_features, self.n_channels, self.n_classes)
def forward(self, x):
out_features = []
for ID_CH in range(self.n_channels):
x_channel = x[:, ID_CH, :,:,:].unsqueeze(1)
conv_block = getattr(self, f'features_{ID_CH}')
out_channel = conv_block(x_channel).view(x.shape[0], -1)
out_features.append(out_channel)
out_merged = torch.cat(out_features, dim=1)
out = self.linear(out_merged)
return(out)
def extract_features(self, x):
out_features = []
for ID_CH in range(self.n_channels):
x_channel = x[:, ID_CH, :,:,:].unsqueeze(1)
conv_block = getattr(self, f'features_{ID_CH}')
out_channel = conv_block(x_channel).view(x.shape[0], -1)
out_features.append(out_channel)
out_merged = torch.cat(out_features, dim=1)
return(out_merged)
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Conv3d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm3d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
'''
class FeatureNet(nn.Module):
def __init__(self, **params):
super(FeatureNet, self).__init__()
self.n_feat = params['n_feat'] if 'n_feat' in params else 1000
self.n_classes = params['n_classes'] if 'n_classes' in params else 2
self.n_hidden = params['n_hidden'] if 'n_hidden' in params else 1000
self.dropout = params['dropout'] if 'dropout' in params else 0.5
self.fc1 = nn.Sequential(nn.Linear(self.n_feat, self.n_hidden),
nn.Dropout(self.dropout),
nn.ReLU())
self.fc2 = nn.Sequential(nn.Linear(self.n_hidden, 100),
nn.Dropout(self.dropout/2),
nn.ReLU())
self.fc3 = nn.Sequential(nn.Linear(100, self.n_classes),
nn.Dropout(self.dropout/4),
nn.Softmax(dim=1))
def forward(self, x):
out = self.fc3(self.fc2(self.fc1(x)))
# print(self.get_params())
return(out)
def extract_features(self, x):
out = self.fc2(self.fc1(x))
return(out)
def get_params(self):
return({'n_classes':self.n_classes, 'n_hidden':self.n_hidden, 'dropout':self.dropout})
def initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(m.bias, 0)
'''
\ No newline at end of file
PATH = '/home/bizzego/projects/radiomics_pipeline'
import numpy as np
import dap
import os
import torch
torch.set_num_threads(20)
import SimpleITK as sitk
import dicom_utils.augmentation as aug
from dap.datasets.numpydatasets import NumpyCSVDataset, augment_3D
from dap.metrics import BinaryMetrics, MulticlassMetrics
from dap.models.deepmodel import DeepLearningModel
from dap.crossval import kfold_split
import dicom_utils.processing as dup
from networks_OPBG import CNN3D_NET
#%%
DATASET = 'OPBG'
LAB_COL = 'group'
N_CLASSES = 4
SIZE = 64
N_CHANNELS = 2
#%%
DATADIR = f'{PATH}/data/{DATASET}/bbox_T2-FLAIR_noseg'
OUT_DIR = f'{PATH}/data/{DATASET}/predictions'
LABELS_FILE = f'{PATH}/data/{DATASET}/labels.csv'
dataset = NumpyCSVDataset(DATADIR, LABELS_FILE, LAB_COL, 'multi', SIZE, augmentation_function=augment_3D, name = f'bbox_T2-FLAIR_{LAB_COL}')
##%
#dataset.INFO['mode'] = 'train'
#test_sample = dataset[100]
#image = sitk.GetImageFromArray(test_sample['data'].numpy()[0,:,:,:])
#viz.multi_slice_viewer(image)
#%
metrics = MulticlassMetrics(reference='MCC')
model = DeepLearningModel(CNN3D_NET, params={'device': 'cpu', 'n_classes':N_CLASSES, 'n_channels':N_CHANNELS, 'n_features':160}, name='CNN3D_OPBG')
dap_settings = {'stratified_test': True,
'ratio_test': 0.2,
'stratified_valid': True,
'ratio_valid': 0.2,
'fold_method': kfold_split,
'cv_n': 1,
'one_fold': True}
DAP = dap.DAP(dap_settings, dataset, model, metrics)
#%%
parameters = {'epochs':[1000], 'batch_size':[4, 8], 'dropout':[0.5, 0.1]}
param_grid = dap.dap.create_param_grid(parameters)
DAP.fit(param_grid)
#%%
DAP.predict_on_test()
#%%
DAP.save(OUT_DIR, lite=False)
PATH = '/home/bizzego/projects/networks_dami/radiomics_pipeline'
import numpy as np
import pandas as pd
import itertools
from sklearn.svm import SVC
#from sklearn.ensemble import RandomForestClassifier
#from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, Imputer
from sklearn.feature_selection import RFECV, mutual_info_classif, f_classif, SelectKBest
from sklearn.metrics import matthews_corrcoef as mcc, make_scorer, auc, precision_score
def create_param_grid(params):
keys, values = zip(*params.items())
experiments = [dict(zip(keys, v)) for v in itertools.product(*values)]
grid = dict([(i, d) for i, d in enumerate(experiments)])
return(grid)
#%%
DATASET = 'HN'
FEATURE_NAME = 'merged_features_B_UNCORR'
FEATUREFILE = f'selected/{FEATURE_NAME}.csv'
LAB_COL = 'Locoregional'
OUTFILE = f'selected/{FEATURE_NAME}_selected.csv'
#%%
PREPROCESS = True
REMOVE_CORRELATED = True
UNIVARIATE_RANKING = True
RFE_RANKING = True
UNIV_TESTER = f_classif
N_FEAT_UNIV = 1000
model = SVC()
model_params = {'C':[1], 'kernel':['linear']} # model_params = {'C':[0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['linear']}
N_FEAT_RFE = 1000
#%%
FEATDIR = f'{PATH}/data/{DATASET}/features'
LABELFILE = f'{PATH}/data/{DATASET}/labels.csv'
#%%
features = pd.read_csv(f'{FEATDIR}/{FEATUREFILE}', index_col =0)
labels = pd.read_csv(LABELFILE, index_col =0)[LAB_COL].to_frame()
features = pd.merge(features, labels, left_index=True, right_index=True)
labels = features.pop(LAB_COL).to_frame()
X = features.values
y = labels.values[:,0]
print(X.shape)
#%% PREPROCESS
if PREPROCESS:
#% remove nan
imputer = Imputer()
X = imputer.fit_transform(X)
#% scale
scaler = StandardScaler()
X = scaler.fit_transform(X)
print('Done preprocessing')
print(X.shape, features.shape)
#%% REMOVE CORRELATED
if REMOVE_CORRELATED:
corr_matrix = abs(np.corrcoef(X, rowvar=False))
corr_matrix[np.where(np.tril(np.ones(corr_matrix.shape), k=1).astype(np.bool) == True)] = 0
idx_to_keep = [(corr_matrix[:,idx]<=0.95).all() for idx in range(corr_matrix.shape[1])]
X = X[:,idx_to_keep]
features = features.iloc[:,idx_to_keep]
print('Remove correlated')
print(X.shape, features.shape)
#%% UNIVARIATE RANKING
if UNIVARIATE_RANKING:
selector = SelectKBest(UNIV_TESTER, k='all')
selector.fit(X, y)
scores = selector.scores_
idx_sorted = np.argsort(scores)[::-1]
if N_FEAT_UNIV == 0:
X = X[:,idx_sorted]
features = features.iloc[:,idx_sorted]
else:
X = X[:,idx_sorted[:N_FEAT_UNIV]]
features = features.iloc[:,idx_sorted[:N_FEAT_UNIV]]
print('Done univariate')
print(X.shape, features.shape)