extract_features_radiomics.py 5.94 KB
Newer Older
1
2
#%%
import os
Alessia Marcolini's avatar
Alessia Marcolini committed
3
from pyradiomics_radler import MyRadiomicsFeaturesExtractor as RadiomicsFeatureExtractor
4
5
6
7
8
9
10
11
import SimpleITK as sitk
import numpy as np
import pandas as pd

from pathlib import Path
from tqdm import tqdm

from multiproc import ListMultiprocessing
12
import warnings
13

14
warnings.filterwarnings("ignore")
15
# %%
Alessia Marcolini's avatar
Alessia Marcolini committed
16
17

# os.chdir('..')
18
19
DATASET = Path('HN_BZ')
BBOX = 'bbox_64_rad'
20
VOXEL_SPACING = (1.0, 1.0, 1.0)
21
22
23
SCAN_NAMES = ['CT', 'PT']
CT_index = SCAN_NAMES.index('CT')
PT_index = SCAN_NAMES.index('PT')
24
25
26
27
28
29
30

N_JOBS = 32
N_BINS = [8, 16, 32, 64]
PIXEL_SPACING = [1.0, 2.0, 3.0, 4.0, 5.0]
#%%
DATADIR = Path('data') / DATASET / 'processed' / 'bbox' / BBOX
OUTDIR = Path('data') / DATASET / 'processed'
Alessia Marcolini's avatar
Alessia Marcolini committed
31
OUTFILE = f'radiomics_features_{BBOX}.csv'  # output file name
Alessia Marcolini's avatar
Alessia Marcolini committed
32

33
34
35
clinical = pd.read_csv(Path('data') / DATASET / 'processed' / f'clinical_{DATASET}.csv')

#%%
Alessia Marcolini's avatar
Alessia Marcolini committed
36
37
params_shape = '02_radiomics_features_extraction/shape_new.yaml'  # param file to use to create the extractor
extractor_shape = RadiomicsFeatureExtractor(params_shape)
38

39
40
41
42
43
params_intensity_CT = '02_radiomics_features_extraction/intensity_CT.yaml'  # param file to use to create the extractor
extractor_intensity_CT = RadiomicsFeatureExtractor(params_intensity_CT)

params_intensity_PET = '02_radiomics_features_extraction/intensity_PET.yaml'  # param file to use to create the extractor
extractor_intensity_PET = RadiomicsFeatureExtractor(params_intensity_PET)
44

Alessia Marcolini's avatar
Alessia Marcolini committed
45
46
params_texture = '02_radiomics_features_extraction/texture_new.yaml'  # param file to use to create the extractor
extractor_texture = RadiomicsFeatureExtractor(params_texture)
47

48
filenames = [f for f in os.listdir(DATADIR) if f.endswith('.npy')][:2]
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

exclude_list = []

#%%
def process_file(filename):
    broken = False

    data = np.load(f'{DATADIR}/{filename}')

    file_no_ext = filename.split('.')[0]

    if file_no_ext not in exclude_list:
        n_modalities = data.shape[0]
        modalities = []
        for i in range(n_modalities):
            curr_modality = sitk.GetImageFromArray(data[i, :, :, :].astype(np.float32))
            curr_modality.SetSpacing(VOXEL_SPACING)
            modalities.append(curr_modality)

68
        has_label = np.max(data[-1, :, :, :]) > 0  #### fix
69
70
71
72
73

        if has_label:
            segmentation = sitk.GetImageFromArray(data[-1, :, :, :].astype(np.uint8))
            segmentation.SetSpacing(VOXEL_SPACING)
            feature = {}
Alessia Marcolini's avatar
Alessia Marcolini committed
74

75
76
            # shape features only for CT
            result = extractor_shape.execute(modalities[CT_index], segmentation)
77
78
            for key, value in result.items():
                if not key.startswith("general_"):
79
                    feature[f'{key}_{SCAN_NAMES[CT_index]}'] = result[key]
80
81
82

            for modality, name in zip(modalities, SCAN_NAMES):
                # intensity features
83
84
85
86
87
88
89
90
91
92
                if name == 'CT':
                    extractor = extractor_intensity_CT
                elif name == 'PT':
                    extractor = extractor_intensity_PET
                else:
                    raise ValueError(
                        f'No intensity extractor defined for {name} modality!'
                    )

                result = extractor.execute(modality, segmentation)
93
94
95
96
97
98
99
100
101
102
103
                for key, value in result.items():
                    if not key.startswith("general_"):
                        feature[f'{key}_{name}'] = result[key]

                # texture features
                for P in PIXEL_SPACING:
                    if not broken:
                        extractor_texture.settings['resampledPixelSpacing'] = [P, P, P]
                        for NB in N_BINS:
                            if not broken:
                                extractor_texture.settings['binCount'] = NB
Alessia Marcolini's avatar
Alessia Marcolini committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
                                try:
                                    # uniform binning
                                    extractor_texture.settings['binMode'] = 'uniform'

                                    result = extractor_texture.execute(
                                        modality, segmentation
                                    )
                                    for key, value in result.items():
                                        if not key.startswith("general_"):
                                            feature[
                                                f'{key}_{P}_{NB}_uniform_{name}'
                                            ] = result[key]

                                    # equalized binning
                                    extractor_texture.settings['binMode'] = 'equal'
                                    result = extractor_texture.execute(
                                        modality, segmentation
                                    )
                                    for key, value in result.items():
                                        if not key.startswith("general_"):
                                            feature[
                                                f'{key}_{P}_{NB}_equalized_{name}'
                                            ] = result[key]
                                except Exception as e:
                                    print(e)
                                    broken = True
130
131
132
133
134
135
136
137
138
139

            if not broken:
                feature['filename'] = filename
                patient = clinical.loc[clinical['filename'] == filename][
                    'patient'
                ].values[0]
                feature['patient'] = patient
                return feature

        else:
Alessia Marcolini's avatar
Alessia Marcolini committed
140
            return {'filename': filename, 'patient': patient}
141
142
143


#%% MULTIPROCESS
Alessia Marcolini's avatar
Alessia Marcolini committed
144
145
146
147
# result = []
# for file in filenames:
#    feat = process_file(file)
#    result.append(feat)
148
149


Alessia Marcolini's avatar
Alessia Marcolini committed
150
# %%
Alessia Marcolini's avatar
Alessia Marcolini committed
151
152
153
multiproc = ListMultiprocessing(process_file, N_JOBS)
multiproc.execute(filenames)
result = multiproc.get_result()
Alessia Marcolini's avatar
Alessia Marcolini committed
154
155

# print('done')
156
157
158
159
160
#%% Save features in a Pandas df
features = pd.DataFrame(result)
features.set_index('filename', inplace=True)

print('Number of files: ', features.shape[0])
Alessia Marcolini's avatar
Alessia Marcolini committed
161
print('Number of features: ', features.shape[1] - 1)
162
163
# %%
features.to_csv(OUTDIR / OUTFILE)
Alessia Marcolini's avatar
Alessia Marcolini committed
164
# %%