#%% import os from pyradiomics_radler import MyRadiomicsFeaturesExtractor as RadiomicsFeatureExtractor import SimpleITK as sitk import numpy as np import pandas as pd from pathlib import Path from tqdm import tqdm from multiproc import ListMultiprocessing import warnings warnings.filterwarnings("ignore") # %% # os.chdir('..') DATASET = Path('HN_BZ') BBOX = 'bbox_64_rad' VOXEL_SPACING = (1.0, 1.0, 1.0) SCAN_NAMES = ['CT', 'PT'] CT_index = SCAN_NAMES.index('CT') PT_index = SCAN_NAMES.index('PT') N_JOBS = 32 N_BINS = [8, 16, 32, 64] PIXEL_SPACING = [1.0, 2.0, 3.0, 4.0, 5.0] #%% DATADIR = Path('data') / DATASET / 'processed' / 'bbox' / BBOX OUTDIR = Path('data') / DATASET / 'processed' OUTFILE = f'radiomics_features_{BBOX}.csv' # output file name clinical = pd.read_csv(Path('data') / DATASET / 'processed' / f'clinical_{DATASET}.csv') #%% params_shape = '02_radiomics_features_extraction/shape_new.yaml' # param file to use to create the extractor extractor_shape = RadiomicsFeatureExtractor(params_shape) params_intensity_CT = '02_radiomics_features_extraction/intensity_CT.yaml' # param file to use to create the extractor extractor_intensity_CT = RadiomicsFeatureExtractor(params_intensity_CT) params_intensity_PET = '02_radiomics_features_extraction/intensity_PET.yaml' # param file to use to create the extractor extractor_intensity_PET = RadiomicsFeatureExtractor(params_intensity_PET) params_texture = '02_radiomics_features_extraction/texture_new.yaml' # param file to use to create the extractor extractor_texture = RadiomicsFeatureExtractor(params_texture) filenames = [f for f in os.listdir(DATADIR) if f.endswith('.npy')][:2] exclude_list = [] #%% def process_file(filename): broken = False data = np.load(f'{DATADIR}/{filename}') file_no_ext = filename.split('.')[0] if file_no_ext not in exclude_list: n_modalities = data.shape[0] modalities = [] for i in range(n_modalities): curr_modality = sitk.GetImageFromArray(data[i, :, :, :].astype(np.float32)) curr_modality.SetSpacing(VOXEL_SPACING) modalities.append(curr_modality) has_label = np.max(data[-1, :, :, :]) > 0 #### fix if has_label: segmentation = sitk.GetImageFromArray(data[-1, :, :, :].astype(np.uint8)) segmentation.SetSpacing(VOXEL_SPACING) feature = {} # shape features only for CT result = extractor_shape.execute(modalities[CT_index], segmentation) for key, value in result.items(): if not key.startswith("general_"): feature[f'{key}_{SCAN_NAMES[CT_index]}'] = result[key] for modality, name in zip(modalities, SCAN_NAMES): # intensity features if name == 'CT': extractor = extractor_intensity_CT elif name == 'PT': extractor = extractor_intensity_PET else: raise ValueError( f'No intensity extractor defined for {name} modality!' ) result = extractor.execute(modality, segmentation) for key, value in result.items(): if not key.startswith("general_"): feature[f'{key}_{name}'] = result[key] # texture features for P in PIXEL_SPACING: if not broken: extractor_texture.settings['resampledPixelSpacing'] = [P, P, P] for NB in N_BINS: if not broken: extractor_texture.settings['binCount'] = NB try: # uniform binning extractor_texture.settings['binMode'] = 'uniform' result = extractor_texture.execute( modality, segmentation ) for key, value in result.items(): if not key.startswith("general_"): feature[ f'{key}_{P}_{NB}_uniform_{name}' ] = result[key] # equalized binning extractor_texture.settings['binMode'] = 'equal' result = extractor_texture.execute( modality, segmentation ) for key, value in result.items(): if not key.startswith("general_"): feature[ f'{key}_{P}_{NB}_equalized_{name}' ] = result[key] except Exception as e: print(e) broken = True if not broken: feature['filename'] = filename patient = clinical.loc[clinical['filename'] == filename][ 'patient' ].values[0] feature['patient'] = patient return feature else: return {'filename': filename, 'patient': patient} #%% MULTIPROCESS # result = [] # for file in filenames: # feat = process_file(file) # result.append(feat) # %% multiproc = ListMultiprocessing(process_file, N_JOBS) multiproc.execute(filenames) result = multiproc.get_result() # print('done') #%% Save features in a Pandas df features = pd.DataFrame(result) features.set_index('filename', inplace=True) print('Number of files: ', features.shape[0]) print('Number of features: ', features.shape[1] - 1) # %% features.to_csv(OUTDIR / OUTFILE) # %%