extract_roi_volume.py 3.44 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#%%
import os
import numpy as np
import SimpleITK as sitk
import pandas as pd
from pathlib import Path
import sys

PATH = os.path.join(os.path.abspath(os.path.curdir), '..')
sys.path.append(PATH)
from tqdm import tqdm
import dicom_utils.dicom_utils as du
import dicom_utils.dicom_utils.visualize as viz
import gc

from utils import sitk_to_numpy, filter_outbounds

# os.chdir(PATH)
#%%
DATASET_NAME = 'HN_BZ'
PROJECT_DATA_PATH = Path('data') / DATASET_NAME
PROCESSED_DATA_PATH = PROJECT_DATA_PATH / 'processed'
CLINICAL_DATA_FILENAME_CLEAN = f'clinical_{DATASET_NAME}.csv'

DCM_DIR = PROCESSED_DATA_PATH / 'dcm'
BBOX_OUT_DIR = PROCESSED_DATA_PATH / 'bbox'
BBOX_SUBDATASET = 'bbox_64'

OUTDIR = BBOX_OUT_DIR / BBOX_SUBDATASET

os.makedirs(OUTDIR, exist_ok=False)


CT_ranges = [-1050, 3050]
PT_ranges = [0, 50]
BOX_SIZE = 64  # mm == pixels for voxel size of 1mm3
VOXEL_SIZE = [1, 1, 1]
HALF_BOX = BOX_SIZE // 2

# gaussian = sitk.SmoothingRecursiveGaussianImageFilter()
# gaussian.SetSigma(2)

#%%
clinical = pd.read_csv(PROCESSED_DATA_PATH / CLINICAL_DATA_FILENAME_CLEAN)

available_patients = patients = [
    f for f in os.listdir(DCM_DIR) if os.path.isdir(DCM_DIR / f)
]

clinical = clinical.loc[clinical['patient'].isin(available_patients)]

errors = []
#%%
for i, row in tqdm(clinical.iterrows()):

    try:
        patient = row['patient']
58
        filename_out = row['filename']
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        roi_name = row['ROI_name']
        roi_modality = row['ROI_modality']

        scan_CT = du.load_series(DCM_DIR / patient / 'CT')
        scan_PT = du.load_SUV(DCM_DIR / patient / 'PT')

        rs_filename = os.listdir(DCM_DIR / patient / 'RTSTRUCT')[0]

        if roi_modality == 'CT':

            segmentation = du.load_roi(
                DCM_DIR / patient / 'RTSTRUCT' / rs_filename, roi_name, scan_CT
            )
        elif roi_modality == 'PT':
            segmentation = du.load_roi(
                DCM_DIR / patient / 'RTSTRUCT' / rs_filename, roi_name, scan_PT
            )
        else:
            raise ValueError('ROI modality must be `CT` or `PT`.')

        start_mm, stop_mm = du.get_bbox_vertices(segmentation)
        center_mm = (np.array(stop_mm) + np.array(start_mm)) / 2

        start_mm = (
            center_mm - HALF_BOX - 5
        )  # add margin to allow a better interpolation
        stop_mm = center_mm + HALF_BOX + 5

        scan_CT_box = du.extract_volume(scan_CT, start_mm, stop_mm)
        scan_CT_box = filter_outbounds(scan_CT_box, CT_ranges)

        # upsample and register
        scan_CT_box = du.processing.resample(scan_CT_box, spacing=VOXEL_SIZE)

        scan_PT_box = sitk.Resample(scan_PT, scan_CT_box)
        scan_PT_box = filter_outbounds(scan_PT_box, PT_ranges)

        # segmentation_box = sitk.Resample(segmentation, scan_CT_box)
        # segmentation_box = gaussian.Execute(segmentation_box)
        # out = segmentation_box>0.5

        start_mm = start_mm + 5  # remove margin
        stop_mm = stop_mm - 5  # remove margin

        scan_CT_box = du.extract_volume(scan_CT_box, start_mm, stop_mm)
        scan_PT_box = du.extract_volume(scan_PT_box, start_mm, stop_mm)

        out = np.stack([sitk_to_numpy(scan_CT_box), sitk_to_numpy(scan_PT_box)], axis=0)

        # save
109
        np.save(OUTDIR / filename_out, out)
110
111
112
113
114
115
116
117
118
119
120
121

        del scan_CT_box, scan_PT_box, segmentation, out
        gc.collect()

    except Exception as e:
        print(e)
        errors.append(row['patient'])
        print(f'Error processing patient: {row["patient"]}')

#%%
print(errors)