extract_features_radiomics.py 6.04 KB
Newer Older
1
2
#%%
import os
3
4
from pathlib import Path

5
6
7
8
9
import SimpleITK as sitk
import numpy as np
import pandas as pd

from tqdm import tqdm
10
from pyradiomics_radler import MyRadiomicsFeaturesExtractor as RadiomicsFeatureExtractor
11
from multiproc import ListMultiprocessing
12
import warnings
13

14
warnings.filterwarnings("ignore")
15
# %%
Alessia Marcolini's avatar
Alessia Marcolini committed
16
17

# os.chdir('..')
18
19
DATASET = Path('HN_BZ')
BBOX = 'bbox_64_rad'
20
VOXEL_SPACING = (1.0, 1.0, 1.0)
21
22
23
SCAN_NAMES = ['CT', 'PT']
CT_index = SCAN_NAMES.index('CT')
PT_index = SCAN_NAMES.index('PT')
24
25
26
27
28
29
30

N_JOBS = 32
N_BINS = [8, 16, 32, 64]
PIXEL_SPACING = [1.0, 2.0, 3.0, 4.0, 5.0]
#%%
DATADIR = Path('data') / DATASET / 'processed' / 'bbox' / BBOX
OUTDIR = Path('data') / DATASET / 'processed'
31
OUTFILE = f'radiomics_features_{BBOX}_1.csv'  # output file name
Alessia Marcolini's avatar
Alessia Marcolini committed
32

33
34
35
clinical = pd.read_csv(Path('data') / DATASET / 'processed' / f'clinical_{DATASET}.csv')

#%%
36
37
38
39
40
41
FEATURES_CONFIGURATION_FOLDER = Path(__file__).parent.resolve()

params_shape = (
    FEATURES_CONFIGURATION_FOLDER / 'shape.yaml'
)  # param file to use to create the extractor
extractor_shape = RadiomicsFeatureExtractor(str(params_shape))
42

43
44
45
46
params_intensity_CT = (
    FEATURES_CONFIGURATION_FOLDER / 'intensity_CT.yaml'
)  # param file to use to create the extractor
extractor_intensity_CT = RadiomicsFeatureExtractor(str(params_intensity_CT))
47

48
49
50
51
params_intensity_PET = (
    FEATURES_CONFIGURATION_FOLDER / 'intensity_PET.yaml'
)  # param file to use to create the extractor
extractor_intensity_PET = RadiomicsFeatureExtractor(str(params_intensity_PET))
52

53
54
55
56
params_texture = (
    FEATURES_CONFIGURATION_FOLDER / 'texture.yaml'
)  # param file to use to create the extractor
extractor_texture = RadiomicsFeatureExtractor(str(params_texture))
57

58
filenames = [f for f in os.listdir(DATADIR) if f.endswith('.npy')]
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
exclude_list = []

#%%
def process_file(filename):
    broken = False

    data = np.load(f'{DATADIR}/{filename}')

    file_no_ext = filename.split('.')[0]

    if file_no_ext not in exclude_list:
        n_modalities = data.shape[0]
        modalities = []
        for i in range(n_modalities):
            curr_modality = sitk.GetImageFromArray(data[i, :, :, :].astype(np.float32))
            curr_modality.SetSpacing(VOXEL_SPACING)
            modalities.append(curr_modality)

77
        has_label = np.max(data[-1, :, :, :]) > 0  #### fix
78
79
80
81
82

        if has_label:
            segmentation = sitk.GetImageFromArray(data[-1, :, :, :].astype(np.uint8))
            segmentation.SetSpacing(VOXEL_SPACING)
            feature = {}
Alessia Marcolini's avatar
Alessia Marcolini committed
83

84
85
            # shape features only for CT
            result = extractor_shape.execute(modalities[CT_index], segmentation)
86
87
            for key, value in result.items():
                if not key.startswith("general_"):
88
                    feature[f'{key}_{SCAN_NAMES[CT_index]}'] = result[key]
89
90
91

            for modality, name in zip(modalities, SCAN_NAMES):
                # intensity features
92
93
94
95
96
97
98
99
100
101
                if name == 'CT':
                    extractor = extractor_intensity_CT
                elif name == 'PT':
                    extractor = extractor_intensity_PET
                else:
                    raise ValueError(
                        f'No intensity extractor defined for {name} modality!'
                    )

                result = extractor.execute(modality, segmentation)
102
103
104
105
106
107
108
109
110
111
112
                for key, value in result.items():
                    if not key.startswith("general_"):
                        feature[f'{key}_{name}'] = result[key]

                # texture features
                for P in PIXEL_SPACING:
                    if not broken:
                        extractor_texture.settings['resampledPixelSpacing'] = [P, P, P]
                        for NB in N_BINS:
                            if not broken:
                                extractor_texture.settings['binCount'] = NB
Alessia Marcolini's avatar
Alessia Marcolini committed
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
                                try:
                                    # uniform binning
                                    extractor_texture.settings['binMode'] = 'uniform'

                                    result = extractor_texture.execute(
                                        modality, segmentation
                                    )
                                    for key, value in result.items():
                                        if not key.startswith("general_"):
                                            feature[
                                                f'{key}_{P}_{NB}_uniform_{name}'
                                            ] = result[key]

                                    # equalized binning
                                    extractor_texture.settings['binMode'] = 'equal'
                                    result = extractor_texture.execute(
                                        modality, segmentation
                                    )
                                    for key, value in result.items():
                                        if not key.startswith("general_"):
                                            feature[
                                                f'{key}_{P}_{NB}_equalized_{name}'
                                            ] = result[key]
                                except Exception as e:
                                    print(e)
                                    broken = True
139
140
141
142
143
144
145
146
147
148

            if not broken:
                feature['filename'] = filename
                patient = clinical.loc[clinical['filename'] == filename][
                    'patient'
                ].values[0]
                feature['patient'] = patient
                return feature

        else:
Alessia Marcolini's avatar
Alessia Marcolini committed
149
            return {'filename': filename, 'patient': patient}
150
151
152


#%% MULTIPROCESS
Alessia Marcolini's avatar
Alessia Marcolini committed
153
154
155
156
# result = []
# for file in filenames:
#    feat = process_file(file)
#    result.append(feat)
157
158


Alessia Marcolini's avatar
Alessia Marcolini committed
159
# %%
Alessia Marcolini's avatar
Alessia Marcolini committed
160
161
162
multiproc = ListMultiprocessing(process_file, N_JOBS)
multiproc.execute(filenames)
result = multiproc.get_result()
Alessia Marcolini's avatar
Alessia Marcolini committed
163
164

# print('done')
165
166
167
168
169
#%% Save features in a Pandas df
features = pd.DataFrame(result)
features.set_index('filename', inplace=True)

print('Number of files: ', features.shape[0])
Alessia Marcolini's avatar
Alessia Marcolini committed
170
print('Number of features: ', features.shape[1] - 1)
171
172
# %%
features.to_csv(OUTDIR / OUTFILE)
Alessia Marcolini's avatar
Alessia Marcolini committed
173
# %%