Commit b961f1bf authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Add deep features extraction script

parent 2b1a6b2c
# %%
import os
from pathlib import Path
PATH = Path(os.path.abspath(os.path.curdir))
# %%
import os
import sys
from tqdm import tqdm
import numpy as np
import pandas as pd
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from networks import CiompiDO
from dataset import NumpyCSVDataset, augment_3D_HN
from split import train_test_indexes_patient_wise
# %%
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
multigpu = True
# %%
DATASET = 'HN_val'
BBOX_SUBDATASET = 'bbox_64'
DATASET_DIR = PATH / 'data' / DATASET / 'processed' / 'bbox' / BBOX_SUBDATASET
EXPERIMENT_DIR = PATH / 'experiments'
# %%
MODEL_NAME = 'Tstage_binary_PET_noTx_20191022-124046'
SIZE = 64
SEED = 1234 # TODO: read from pickled settings instead
K = 0.2 # TODO: read from pickled settings instead
OUTDIR = EXPERIMENT_DIR / MODEL_NAME / 'features'
OUTFILE = 'features'
os.makedirs(OUTDIR, exist_ok=True)
clinical_file = PATH / 'data' / DATASET / 'processed' / f'clinical_{DATASET}.csv'
target_column = 'locoregional'
# %%
dataset_train = NumpyCSVDataset(
data_dir=DATASET_DIR,
clinical_file=clinical_file,
label_col=target_column,
size=SIZE,
mode='train',
seed=SEED,
)
dataset_test = NumpyCSVDataset(
data_dir=DATASET_DIR,
clinical_file=clinical_file,
label_col=target_column,
size=SIZE,
mode='test',
seed=SEED,
)
idx_train, idx_test = train_test_indexes_patient_wise(
dataset_train, test_size=K, stratify=True, seed=SEED
)
dataset_train.indices = np.array(idx_train)
dataset_test.indices = np.array(idx_test)
loader_train = DataLoader(
dataset_train, batch_size=8, num_workers=12, pin_memory=True, shuffle=True
)
loader_test = DataLoader(dataset_test, batch_size=8, num_workers=12, shuffle=False)
model_weights = EXPERIMENT_DIR / MODEL_NAME / 'weights.pth'
# %%
model = CiompiDO(n_classes=2, n_channels=1, modality='PET')
if multigpu:
model = nn.DataParallel(model.to(device))
# model.load_state_dict(torch.load(model_weights), strict=False)
pretrained_dict = torch.load(model_weights)
model_dict = model.state_dict()
to_add = "module." if multigpu else ""
# discard layers not present in destination network or with different shape
pretrained_dict = {
k: v
for k, v in pretrained_dict.items()
if (k in model_dict) and (model_dict[k].shape == pretrained_dict[k].shape)
}
for name in model.state_dict().keys():
if name in pretrained_dict.keys():
print(name)
model.state_dict()[name].copy_(pretrained_dict[name])
else:
pass
# print(name)
if multigpu:
model = model.module
#%%
# Extract deep features from training dataset
deep_features_train = []
sample_names_train = []
labels_train = []
patients_train = []
with torch.no_grad():
for batch in tqdm(loader_train):
filenames_batch = batch['filename']
images_batch = batch['data'].to(device)
labels_batch = batch['target']
patients_batch = batch['patient']
out = model.extract_features(images_batch.cuda())
deep_features_train.append(out.data.cpu().numpy())
sample_names_train.append(filenames_batch)
labels_train.append(labels_batch)
patients_train.append(patients_batch)
# %%
deep_features_train = np.concatenate(deep_features_train)
sample_names_train = np.concatenate(sample_names_train)
labels_train = np.concatenate(labels_train)
patients_train = np.concatenate(patients_train)
# %%
print(deep_features_train.shape, len(sample_names_train), len(labels_train))
# %%
deep_features_pd_train = pd.DataFrame(deep_features_train)
deep_features_pd_train['filename'] = sample_names_train
deep_features_pd_train[target_column] = labels_train
deep_features_pd_train['patient'] = patients_train
deep_features_pd_train['dataset'] = DATASET
deep_features_pd_train['bbox_subdataset'] = BBOX_SUBDATASET
deep_features_pd_train['mode'] = 'train'
#%% SAVE
print(deep_features_pd_train.shape)
deep_features_pd_train.to_csv(OUTDIR / f'{OUTFILE}_train.csv', index=False)
# %%
# Extract deep features from test dataset
deep_features_test = []
sample_names_test = []
labels_test = []
patients_test = []
with torch.no_grad():
for batch in tqdm(loader_test):
filenames_batch = batch['filename']
images_batch = batch['data'].to(device)
labels_batch = batch['target']
patients_batch = batch['patient']
out = model.extract_features(images_batch.cuda())
deep_features_test.append(out.data.cpu().numpy())
sample_names_test.append(filenames_batch)
labels_test.append(labels_batch)
patients_test.append(patients_batch)
# %%
deep_features_test = np.concatenate(deep_features_test)
sample_names_test = np.concatenate(sample_names_test)
labels_test = np.concatenate(labels_test)
patients_test = np.concatenate(patients_test)
# %%
len(labels_test)
# %%
print(deep_features_test.shape, len(sample_names_test), len(labels_test))
# %%
deep_features_pd_test = pd.DataFrame(deep_features_test)
deep_features_pd_test['filename'] = sample_names_test
deep_features_pd_test[target_column] = labels_test
deep_features_pd_test['patient'] = patients_test
deep_features_pd_test['dataset'] = DATASET
deep_features_pd_test['bbox_subdataset'] = BBOX_SUBDATASET
deep_features_pd_test['mode'] = 'test'
#%% SAVE
print(deep_features_pd_test.shape)
deep_features_pd_test.to_csv(OUTDIR / f'{OUTFILE}_test.csv', index=False)
# %%
# %%
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment