Commit 34b94221 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Extract ROI volume according to new dataset structure

parent f439ea47
#%%
import os
import numpy as np
import SimpleITK as sitk
import pandas as pd
from pathlib import Path
import sys
PATH = os.path.join(os.path.abspath(os.path.curdir), '..')
sys.path.append(PATH)
from tqdm import tqdm
import dicom_utils.dicom_utils as du
import dicom_utils.dicom_utils.visualize as viz
import gc
def to_numpy(image):
return(sitk.GetArrayFromImage(image))
from utils import sitk_to_numpy, filter_outbounds
# os.chdir(PATH)
#%%
DATASET_NAME = 'HN_BZ'
PROJECT_DATA_PATH = Path('data') / DATASET_NAME
PROCESSED_DATA_PATH = PROJECT_DATA_PATH / 'processed'
CLINICAL_DATA_FILENAME_CLEAN = f'clinical_{DATASET_NAME}.csv'
DCM_DIR = PROCESSED_DATA_PATH / 'dcm'
BBOX_OUT_DIR = PROCESSED_DATA_PATH / 'bbox'
BBOX_SUBDATASET = 'bbox_64'
#PATH = os.path.join(os.path.abspath(os.path.curdir), '..')
DATA_PATH = '/thunderdisk/HN/'
DATADIR = f'{DATA_PATH}/data/Head-Neck-PET-CT'
OUTDIR = f'{DATA_PATH}/processed/bbox_64/'# Destination of processed data (a folder for each patient will be created)
OUTDIR = BBOX_OUT_DIR / BBOX_SUBDATASET
os.makedirs(OUTDIR, exist_ok=False)
DIR_INFO_FILE = f'{PATH}/data/summary.csv' # where to find the info about the Original data folders to use
ROI_INFO_FILE = f'{PATH}/data/INFO_GTVcontours_HN.csv' # where to find the info about the name of the ROI
CT_ranges = [-1050, 3050]
PT_ranges = [0, 50]
BOX_SIZE = 64 # mm == pixels for voxel size of 1mm3
VOXEL_SIZE = [1,1,1]
HALF_BOX = BOX_SIZE//2
gaussian = sitk.SmoothingRecursiveGaussianImageFilter()
gaussian.SetSigma(2)
VOXEL_SIZE = [1, 1, 1]
HALF_BOX = BOX_SIZE // 2
# gaussian = sitk.SmoothingRecursiveGaussianImageFilter()
# gaussian.SetSigma(2)
#%%
dir_info = pd.read_csv(DIR_INFO_FILE)
roi_info = pd.read_csv(ROI_INFO_FILE)
clinical = pd.read_csv(PROCESSED_DATA_PATH / CLINICAL_DATA_FILENAME_CLEAN)
available_patients = patients = [
f for f in os.listdir(DCM_DIR) if os.path.isdir(DCM_DIR / f)
]
subjects = os.listdir(DATADIR)
clinical = clinical.loc[clinical['patient'].isin(available_patients)]
errors = []
#%%
for SUB in subjects:
#%%
print(SUB)
for i, row in tqdm(clinical.iterrows()):
try:
#%%
dir_info_sub = dir_info.query("subject == @SUB")
roi_name = roi_info.query('patient == @SUB')['roi_name'].values[0]
patient = row['patient']
roi_name = row['ROI_name']
roi_modality = row['ROI_modality']
scan_CT = du.load_series(DCM_DIR / patient / 'CT')
scan_PT = du.load_SUV(DCM_DIR / patient / 'PT')
dir_CT = dir_info_sub.query("modality == 'CT'").dir.values[0]
dir_PT = dir_info_sub.query("modality == 'PT'").dir.values[0]
dir_RT = dir_info_sub.query("modality == 'RTSTRUCT'").dir.values[0]
rs_filename = os.listdir(DCM_DIR / patient / 'RTSTRUCT')[0]
scan_CT = du.load_series(os.path.join(DATADIR, dir_CT))
scan_PT = du.load_SUV(os.path.join(DATADIR, dir_PT))
if roi_modality == 'CT':
#%%
if 'PETPET' in dir_RT:
segmentation = du.load_roi(os.path.join(DATADIR, dir_RT, '000000.dcm'), roi_name, scan_PT)
segmentation = du.load_roi(
DCM_DIR / patient / 'RTSTRUCT' / rs_filename, roi_name, scan_CT
)
elif roi_modality == 'PT':
segmentation = du.load_roi(
DCM_DIR / patient / 'RTSTRUCT' / rs_filename, roi_name, scan_PT
)
else:
segmentation = du.load_roi(os.path.join(DATADIR, dir_RT, '000000.dcm'), roi_name, scan_CT)
raise ValueError('ROI modality must be `CT` or `PT`.')
#%%
start_mm, stop_mm = du.get_bbox_vertices(segmentation)
center_mm = (np.array(stop_mm) + np.array(start_mm))/2
center_mm = (np.array(stop_mm) + np.array(start_mm)) / 2
start_mm = center_mm - HALF_BOX - 5 # add margin to allow a better interpolation
start_mm = (
center_mm - HALF_BOX - 5
) # add margin to allow a better interpolation
stop_mm = center_mm + HALF_BOX + 5
scan_CT_box = du.extract_volume(scan_CT, start_mm, stop_mm)
scan_CT_box = filter_outbounds(scan_CT_box, CT_ranges)
#%%
# upsample and register
scan_CT_box = du.processing.resample(scan_CT_box, spacing=VOXEL_SIZE)
scan_PT_box = sitk.Resample(scan_PT, scan_CT_box)
scan_PT_box = filter_outbounds(scan_PT_box, PT_ranges)
# segmentation_box = sitk.Resample(segmentation, scan_CT_box)
# segmentation_box = gaussian.Execute(segmentation_box)
# out = segmentation_box>0.5
......@@ -81,18 +102,19 @@ for SUB in subjects:
scan_CT_box = du.extract_volume(scan_CT_box, start_mm, stop_mm)
scan_PT_box = du.extract_volume(scan_PT_box, start_mm, stop_mm)
out = np.stack([to_numpy(scan_CT_box), to_numpy(scan_PT_box)], axis = 0)#, to_numpy(segmentation_box)], axis = 0)
out = np.stack([sitk_to_numpy(scan_CT_box), sitk_to_numpy(scan_PT_box)], axis=0)
#save
np.save(f'{OUTDIR}/{SUB}.npy', out)
# save
np.save(f'{OUTDIR}/{patient}.npy', out)
del scan_CT_box, scan_PT_box, segmentation, out
gc.collect()
except Exception as e:
print(e)
errors.append(SUB)
print(f'Error processing sub: {SUB}')
errors.append(row['patient'])
print(f'Error processing patient: {row["patient"]}')
#%%
print(errors)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment