Commit ad667c97 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Store patients and labels along with the features

parent 55933f62
%% Cell type:markdown id: tags:
## Deep features extraction
%% Cell type:code id: tags:
``` python
%HN_env
```
%% Cell type:code id: tags:
``` python
import os
PATH = os.path.abspath(os.path.curdir)
```
%% Cell type:code id: tags:
``` python
%reload_ext autoreload
%autoreload 2
```
%% Cell type:markdown id: tags:
### Import
%% Cell type:code id: tags:
``` python
import os
import sys
from tqdm import tqdm
import numpy as np
import pandas as pd
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from networks import CiompiDO
from dataset import NumpyCSVDataset, augment_3D_HN
```
%% Cell type:code id: tags:
``` python
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
multigpu = True
```
%% Cell type:code id: tags:
``` python
DATASETDIR = '/thunderdisk/HN/processed/bbox_fixed2_64'
EXPERIMENT_DIR = f'{PATH}/experiments'
```
%% Cell type:code id: tags:
``` python
MODEL_NAME = 'Tstage_binary_augmented_noTx_branch_wise_20191028-104101'
SIZE = 64
OUTDIR = f'{EXPERIMENT_DIR}/{MODEL_NAME}/features/'
OUTFILE = 'features_noTx.csv'
os.makedirs(OUTDIR, exist_ok=True)
```
%% Cell type:code id: tags:
``` python
dataset = NumpyCSVDataset(DATASETDIR , f'{PATH}/data/clinical_data_noTx.csv' , 'Locoregional', SIZE , mode='test')
loader = DataLoader(dataset, batch_size=8, num_workers=12, pin_memory=True, shuffle=False, drop_last=False)
model_weights = f'{EXPERIMENT_DIR}/{MODEL_NAME}/weights.pth'
```
%% Cell type:code id: tags:
``` python
model = CiompiDO(n_classes=2, n_channels=2, modality='CT/PET')
if multigpu:
model = nn.DataParallel(model.to(device))
model = model.module
model.load_state_dict(torch.load(model_weights))
```
%% Cell type:code id: tags:
``` python
#%%
deep_features = []
sample_names = []
labels = []
with torch.no_grad():
for batch in tqdm(loader):
names = [name.split('.')[0] for name in batch['filename']]
image = batch['data'].to(device)
label = batch['target']
out = model.extract_features(image.cuda())
deep_features.append(out.data.cpu().numpy())
sample_names.append(names)
labels.append(label)
```
%% Cell type:code id: tags:
``` python
deep_features = np.concatenate(deep_features)
sample_names = np.concatenate(sample_names)
labels = np.concatenate(labels)
```
%% Cell type:code id: tags:
``` python
len(labels)
```
%% Cell type:code id: tags:
``` python
print(deep_features.shape, len(sample_names),len(labels))
```
%% Cell type:code id: tags:
``` python
deep_features_pd = pd.DataFrame(deep_features, index=sample_names)
# deep_features_pd['class'] = labels
#%% SAVE
print(deep_features_pd.shape)
deep_features_pd.to_csv(f'{OUTDIR}/{OUTFILE}')
```
%% Cell type:code id: tags:
``` python
```
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
from IPython import get_ipython
# %% [markdown]
# ## Deep features extraction
# %%
get_ipython().run_line_magic('HN_env', '')
# %%
import os
PATH = os.path.abspath(os.path.curdir)
# %%
get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
# %% [markdown]
# ### Import
# %%
import os
import sys
from tqdm import tqdm
import numpy as np
import pandas as pd
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from networks import CiompiDO
from dataset import NumpyCSVDataset, augment_3D_HN
# %%
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
multigpu = True
# %%
DATASETDIR = '/thunderdisk/HN/processed/bbox_64_augmented_LR'
EXPERIMENT_DIR = f'{PATH}/experiments'
# %%
MODEL_NAME = 'LR_noTx_branch_wise_free_aug_CT_20191027-124913'
SIZE = 64
OUTDIR = f'{EXPERIMENT_DIR}/{MODEL_NAME}/features/'
OUTFILE = 'features_noTx_AUG.csv'
os.makedirs(OUTDIR, exist_ok=True)
# %%
dataset = NumpyCSVDataset(DATASETDIR , f'{PATH}/data/clinical_data_noTx.csv' , 'Locoregional', SIZE , mode='test')
loader = DataLoader(dataset, batch_size=8, num_workers=12, pin_memory=True, shuffle=False, drop_last=False)
model_weights = f'{EXPERIMENT_DIR}/{MODEL_NAME}/weights.pth'
# %%
model = CiompiDO(n_classes=2, n_channels=1, modality='CT')
if multigpu:
model = nn.DataParallel(model.to(device))
model = model.module
model.load_state_dict(torch.load(model_weights))
# %%
#%%
deep_features = []
sample_names = []
labels = []
patients = []
with torch.no_grad():
for batch in tqdm(loader):
names_batch = [name.split('.')[0] for name in batch['filename']]
images_batch = batch['data'].to(device)
labels_batch = batch['target']
patients_batch = batch['patient']
out = model.extract_features(images_batch.cuda())
deep_features.append(out.data.cpu().numpy())
sample_names.append(names_batch)
labels.append(labels_batch)
patients.append(patients_batch)
# %%
deep_features = np.concatenate(deep_features)
sample_names = np.concatenate(sample_names)
labels = np.concatenate(labels)
patients = np.concatenate(patients)
# %%
len(labels)
# %%
print(deep_features.shape, len(sample_names),len(labels))
# %%
deep_features_pd = pd.DataFrame(deep_features, index=sample_names)
deep_features_pd['label'] = labels
deep_features_pd['patient'] = patients
#%% SAVE
print(deep_features_pd.shape)
deep_features_pd.to_csv(f'{OUTDIR}/{OUTFILE}')
# %%
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment