#%% import os import warnings from pathlib import Path import numpy as np import pandas as pd import SimpleITK as sitk from tqdm import tqdm from multiproc import ListMultiprocessing from pyradiomics_radler import MyRadiomicsFeaturesExtractor as RadiomicsFeatureExtractor warnings.filterwarnings("ignore") # %% # os.chdir('..') DATASET = Path('HN_val') BBOX = 'bbox_64_rad' VOXEL_SPACING = (1.0, 1.0, 1.0) SCAN_NAMES = ['CT', 'PT'] CT_index = SCAN_NAMES.index('CT') PT_index = SCAN_NAMES.index('PT') N_JOBS = 32 N_BINS = [8, 16, 32, 64] PIXEL_SPACING = [1.0, 2.0, 3.0, 4.0, 5.0] #%% DATADIR = Path('data') / DATASET / 'processed' / 'bbox' / BBOX OUTDIR = Path('data') / DATASET / 'processed' OUTFILE = f'radiomics_features_{BBOX}.csv' # output file name clinical = pd.read_csv(Path('data') / DATASET / 'processed' / f'clinical_{DATASET}.csv') #%% FEATURES_CONFIGURATION_FOLDER = Path(__file__).parent.resolve() params_shape = ( FEATURES_CONFIGURATION_FOLDER / 'shape.yaml' ) # param file to use to create the extractor extractor_shape = RadiomicsFeatureExtractor(str(params_shape)) params_intensity_CT = ( FEATURES_CONFIGURATION_FOLDER / 'intensity_CT.yaml' ) # param file to use to create the extractor extractor_intensity_CT = RadiomicsFeatureExtractor(str(params_intensity_CT)) params_intensity_PET = ( FEATURES_CONFIGURATION_FOLDER / 'intensity_PET.yaml' ) # param file to use to create the extractor extractor_intensity_PET = RadiomicsFeatureExtractor(str(params_intensity_PET)) params_texture = ( FEATURES_CONFIGURATION_FOLDER / 'texture.yaml' ) # param file to use to create the extractor extractor_texture = RadiomicsFeatureExtractor(str(params_texture)) filenames = [f for f in os.listdir(DATADIR) if f.endswith('.npy')] exclude_list = [] #%% def process_file(filename): broken = False data = np.load(f'{DATADIR}/{filename}') file_no_ext = filename.split('.')[0] if file_no_ext not in exclude_list: n_modalities = data.shape[0] modalities = [] for i in range(n_modalities): curr_modality = sitk.GetImageFromArray(data[i, :, :, :].astype(np.float32)) curr_modality.SetSpacing(VOXEL_SPACING) modalities.append(curr_modality) has_label = np.max(data[-1, :, :, :]) > 0 #### fix if has_label: segmentation = sitk.GetImageFromArray(data[-1, :, :, :].astype(np.uint8)) segmentation.SetSpacing(VOXEL_SPACING) feature = {} # shape features only for CT result = extractor_shape.execute(modalities[CT_index], segmentation) for key, value in result.items(): if not key.startswith("general_"): feature[f'{key}_{SCAN_NAMES[CT_index]}'] = result[key] for modality, name in zip(modalities, SCAN_NAMES): # intensity features if name == 'CT': extractor = extractor_intensity_CT elif name == 'PT': extractor = extractor_intensity_PET else: raise ValueError( f'No intensity extractor defined for {name} modality!' ) result = extractor.execute(modality, segmentation) for key, value in result.items(): if not key.startswith("general_"): feature[f'{key}_{name}'] = result[key] # texture features for P in PIXEL_SPACING: if not broken: extractor_texture.settings['resampledPixelSpacing'] = [P, P, P] for NB in N_BINS: if not broken: extractor_texture.settings['binCount'] = NB try: # uniform binning extractor_texture.settings['binMode'] = 'uniform' result = extractor_texture.execute( modality, segmentation ) for key, value in result.items(): if not key.startswith("general_"): feature[ f'{key}_{P}_{NB}_uniform_{name}' ] = result[key] # equalized binning extractor_texture.settings['binMode'] = 'equal' result = extractor_texture.execute( modality, segmentation ) for key, value in result.items(): if not key.startswith("general_"): feature[ f'{key}_{P}_{NB}_equalized_{name}' ] = result[key] except Exception as e: print(e) broken = True if not broken: feature['filename'] = filename patient = clinical.loc[clinical['filename'] == filename][ 'patient' ].values[0] feature['patient'] = patient return feature else: return {'filename': filename, 'patient': patient} #%% MULTIPROCESS # result = [] # for file in filenames: # feat = process_file(file) # result.append(feat) # %% multiproc = ListMultiprocessing(process_file, N_JOBS) multiproc.execute(filenames) result = multiproc.get_result() # print('done') #%% Save features in a Pandas df features = pd.DataFrame(result) features.set_index('filename', inplace=True) print('Number of files: ', features.shape[0]) print('Number of features: ', features.shape[1] - 1) # %% features.to_csv(OUTDIR / OUTFILE) # %%