Commit b0bd174a authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Merge branch 'pyradiomics_radler' into 'master'

PyRadiomics wrapper to implement custom functions and utilities

See merge request !2
parents aea99cc7 9694000b
#%%
import os
from radiomics import featureextractor
import SimpleITK as sitk
import warnings
from pathlib import Path
import numpy as np
import pandas as pd
import os
from pathlib import Path
import SimpleITK as sitk
from tqdm import tqdm
from multiproc import ListMultiprocessing
import warnings
from pyradiomics_radler import MyRadiomicsFeaturesExtractor as RadiomicsFeatureExtractor
warnings.filterwarnings("ignore")
# %%
# os.chdir('..')
DATASET = Path('HN_val')
BBOX = 'bbox_64'
BBOX = 'bbox_64_rad'
VOXEL_SPACING = (1.0, 1.0, 1.0)
SCAN_NAMES = ['CT', 'PT'] # you are lucky that CT scans are the first in the stack
SCAN_NAMES = ['CT', 'PT']
CT_index = SCAN_NAMES.index('CT')
PT_index = SCAN_NAMES.index('PT')
N_JOBS = 32
N_BINS = [8, 16, 32, 64]
......@@ -26,29 +29,34 @@ PIXEL_SPACING = [1.0, 2.0, 3.0, 4.0, 5.0]
DATADIR = Path('data') / DATASET / 'processed' / 'bbox' / BBOX
OUTDIR = Path('data') / DATASET / 'processed'
OUTFILE = f'radiomics_features_{BBOX}.csv' # output file name
clinical = pd.read_csv(Path('data') / DATASET / 'processed' / f'clinical_{DATASET}.csv')
#%%
params_shape = 'shape.yaml' # param file to use to create the extractor
extractor_shape = featureextractor.RadiomicsFeaturesExtractor(params_shape)
FEATURES_CONFIGURATION_FOLDER = Path(__file__).parent.resolve()
params_intensity = 'intensity.yaml' # param file to use to create the extractor
extractor_intensity = featureextractor.RadiomicsFeaturesExtractor(params_intensity)
params_shape = (
FEATURES_CONFIGURATION_FOLDER / 'shape.yaml'
) # param file to use to create the extractor
extractor_shape = RadiomicsFeatureExtractor(str(params_shape))
params_texture = 'texture.yaml' # param file to use to create the extractor
extractor_texture = featureextractor.RadiomicsFeaturesExtractor(params_texture)
params_intensity_CT = (
FEATURES_CONFIGURATION_FOLDER / 'intensity_CT.yaml'
) # param file to use to create the extractor
extractor_intensity_CT = RadiomicsFeatureExtractor(str(params_intensity_CT))
filenames = [f for f in os.listdir(DATADIR) if f.endswith('.npy')]
params_intensity_PET = (
FEATURES_CONFIGURATION_FOLDER / 'intensity_PET.yaml'
) # param file to use to create the extractor
extractor_intensity_PET = RadiomicsFeatureExtractor(str(params_intensity_PET))
exclude_list = []
# 'HN-CHUM-011',
# 'HN-HMR-008',
# 'HN-CHUS-043',
# 'HN-HMR-002',
# 'HN-CHUM-010',
# 'HN-HGJ-044',
# ] #'HN-CHUM-020', 'HN-CHUS-093', 'HN-HMR-001', 'HN-HMR-007', 'HN-HMR-017'] # BAD SUV conversion
params_texture = (
FEATURES_CONFIGURATION_FOLDER / 'texture.yaml'
) # param file to use to create the extractor
extractor_texture = RadiomicsFeatureExtractor(str(params_texture))
filenames = [f for f in os.listdir(DATADIR) if f.endswith('.npy')]
exclude_list = []
#%%
def process_file(filename):
......@@ -66,21 +74,31 @@ def process_file(filename):
curr_modality.SetSpacing(VOXEL_SPACING)
modalities.append(curr_modality)
has_label = np.max(data[-1, :, :, :]) > 0
has_label = np.max(data[-1, :, :, :]) > 0 #### fix
if has_label:
segmentation = sitk.GetImageFromArray(data[-1, :, :, :].astype(np.uint8))
segmentation.SetSpacing(VOXEL_SPACING)
feature = {}
# shape features
result = extractor_shape.execute(modalities[0], segmentation)
# shape features only for CT
result = extractor_shape.execute(modalities[CT_index], segmentation)
for key, value in result.items():
if not key.startswith("general_"):
feature[key] = result[key]
feature[f'{key}_{SCAN_NAMES[CT_index]}'] = result[key]
for modality, name in zip(modalities, SCAN_NAMES):
# intensity features
result = extractor_intensity.execute(modality, segmentation)
if name == 'CT':
extractor = extractor_intensity_CT
elif name == 'PT':
extractor = extractor_intensity_PET
else:
raise ValueError(
f'No intensity extractor defined for {name} modality!'
)
result = extractor.execute(modality, segmentation)
for key, value in result.items():
if not key.startswith("general_"):
feature[f'{key}_{name}'] = result[key]
......@@ -92,10 +110,10 @@ def process_file(filename):
for NB in N_BINS:
if not broken:
extractor_texture.settings['binCount'] = NB
# uniform binning
extractor_texture.settings['binMode'] = 'uniform'
try:
# uniform binning
extractor_texture.settings['binMode'] = 'uniform'
result = extractor_texture.execute(
modality, segmentation
)
......@@ -115,7 +133,8 @@ def process_file(filename):
feature[
f'{key}_{P}_{NB}_equalized_{name}'
] = result[key]
except:
except Exception as e:
print(e)
broken = True
if not broken:
......@@ -127,22 +146,28 @@ def process_file(filename):
return feature
else:
return {}
return {'filename': filename, 'patient': patient}
#%% MULTIPROCESS
# result = []
# for file in filenames:
# feat = process_file(file)
# result.append(feat)
# %%
multiproc = ListMultiprocessing(process_file, N_JOBS)
multiproc.execute(filenames)
result = multiproc.get_result()
print('done')
# print('done')
#%% Save features in a Pandas df
features = pd.DataFrame(result)
features.set_index('filename', inplace=True)
print('Number of files: ', features.shape[0])
print('Number of features: ', features.shape[1])
print('Number of features: ', features.shape[1] - 1)
# %%
features.to_csv(OUTDIR / OUTFILE)
# %%
......@@ -15,3 +15,6 @@ imageType:
# for that class. Otherwise, the specified features are calculated, or, if none are specified, all are calculated (excluding redundant/deprecated features).
featureClass:
firstorder:
- 'Variance'
- 'Skewness'
- 'Kurtosis'
#
# Damiana
#
setting:
label: 1
interpolator: 'sitkBSpline' # This is an enumerated value, here None is not allowed
weightingNorm: 'euclidean'
normalize: False
# Image types to use: "Original" for unfiltered image, for possible filters, see documentation.
imageType:
Original: {}
# Featureclasses, from which features must be calculated. If a featureclass is not mentioned, no features are calculated
# for that class. Otherwise, the specified features are calculated, or, if none are specified, all are calculated (excluding redundant/deprecated features).
featureClass:
firstorder:
- 'Variance'
- 'Skewness'
- 'Kurtosis'
myfirstorder:
- 'SuvMax'
- 'SuvPeak'
- 'SuvMean'
- 'AUCCSH'
- 'TLG'
- 'InactiveVolume' #TBC
- 'gETU'
\ No newline at end of file
......@@ -14,4 +14,10 @@ imageType:
# Featureclasses, from which features must be calculated. If a featureclass is not mentioned, no features are calculated
# for that class. Otherwise, the specified features are calculated, or, if none are specified, all are calculated (excluding redundant/deprecated features).
featureClass:
myshape:
- 'Compactness2' # 1. Compactness
- 'Eccentricity' # 4. Eccentricity
- 'Solidity' # 5. Solidity
shape:
- 'Volume' # 2. Volume
- 'Maximum3DDiameter' # 3. Size
......@@ -14,7 +14,7 @@ imageType:
# Featureclasses, from which features must be calculated. If a featureclass is not mentioned, no features are calculated
# for that class. Otherwise, the specified features are calculated, or, if none are specified, all are calculated (excluding redundant/deprecated features).
featureClass:
glcm:
myglcm:
- 'JointEnergy'
- 'Contrast'
- 'Correlation'
......@@ -23,7 +23,7 @@ featureClass:
- 'JointAverage'
- 'DifferenceVariance'
- 'Autocorrelation'
glrlm:
myglrlm:
- 'ShortRunEmphasis'
- 'LongRunEmphasis'
- 'GrayLevelNonUniformity'
......@@ -37,7 +37,7 @@ featureClass:
- 'LongRunHighGrayLevelEmphasis'
- 'GrayLevelVariance'
- 'RunVariance'
glszm:
myglszm:
- 'SmallAreaEmphasis'
- 'LargeAreaEmphasis'
- 'GrayLevelNonUniformity'
......@@ -52,7 +52,7 @@ featureClass:
- 'LargeAreaHighGrayLevelEmphasis'
- 'GrayLevelVariance'
- 'ZoneVariance'
ngtdm:
myngtdm:
- 'Coarseness'
- 'Contrast'
- 'Busyness'
......
......@@ -39,7 +39,30 @@ and then install `mlpy` from GitLab:
pip install git+https://gitlab.fbk.eu/MPBA/mlpy.git
```
## Pretrained models
We share the weights of the models, as described in the manuscript. See folder [pretrained_weights](pretrained_weights).
#### OCTAVE
`octave` is needed as some first order features are extracted using functions not yet implemented in Python.
To install `octave` run in a Terminal:
```bash
apt install octave
apt install liboctave-dev
```
Open the `octave` prompt:
```bash
octave
```
and run:
```octave
pkg install --forge image
```
and then in a Terminal:
```bash
conda install -c conda-forge oct2py
```
## Pretrained models
We share the weights of the models, as described in the manuscript. See folder [pretrained_weights](pretrained_weights).
\ No newline at end of file
import radiomics
import pkgutil
import os
import sys
import inspect
from .myfeatureextractor import MyRadiomicsFeaturesExtractor
def getFeatureClasses():
"""
Iterates over all modules of the radiomics package using pkgutil and subsequently imports those modules.
Return a dictionary of all modules containing featureClasses, with modulename as key, abstract
class object of the featureClass as value. Assumes only one featureClass per module
This is achieved by inspect.getmembers. Modules are added if it contains a member that is a class,
with name starting with 'Radiomics' and is inherited from :py:class:`radiomics.base.RadiomicsFeaturesBase`.
This iteration only runs once (at initialization of toolbox), subsequent calls return the dictionary created by the
first call.
"""
global _featureClasses
if (
_featureClasses is None
): # On first call, enumerate possible feature classes and import PyRadiomics modules
_featureClasses = {}
for _, mod, _ in pkgutil.iter_modules([os.path.dirname(__file__)]):
if str(mod).startswith(
'_'
): # Skip loading of 'private' classes, these don't contain feature classes
continue
__import__('pyradiomics_radler.' + mod)
module = sys.modules['pyradiomics_radler.' + mod]
attributes = inspect.getmembers(module, inspect.isclass)
for a in attributes:
if a[0].startswith('MyRadiomics'):
for parentClass in inspect.getmro(a[1])[
1:
]: # only include classes that inherit from RadiomicsFeaturesBase
if parentClass.__name__ == 'MyRadiomicsFeaturesBase':
_featureClasses[mod] = a[1]
break
return _featureClasses
_featureClasses = None
_featureClasses = getFeatureClasses()
newFeatureClasses = radiomics._featureClasses
for f, cl in _featureClasses.items():
newFeatureClasses[f] = cl
radiomics._featureClasses = newFeatureClasses
from radiomics.base import RadiomicsFeaturesBase
from . import myimageoperations as imageoperations
import numpy
class MyRadiomicsFeaturesBase(RadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
self.binMode = kwargs.get('binMode', 'uniform')
super().__init__(inputImage, inputMask, **kwargs)
def _applyBinning(self):
self.matrix, self.binEdges = imageoperations.binImage(
self.binWidth,
self.imageArray,
self.maskArray,
self.settings.get('binCount', None),
self.binMode,
)
self.coefficients['grayLevels'] = numpy.unique(self.matrix[self.maskArray])
self.coefficients['Ng'] = int(
numpy.max(self.coefficients['grayLevels'])
) # max gray level in the ROI
from radiomics.featureextractor import RadiomicsFeaturesExtractor
from . import myimageoperations as imageoperations
import collections
import six
from itertools import chain
class MyRadiomicsFeaturesExtractor(RadiomicsFeaturesExtractor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False):
"""
Compute radiomics signature for provide image and mask combination. It comprises of the following steps:
1. Image and mask are loaded and normalized/resampled if necessary.
2. Validity of ROI is checked using :py:func:`~imageoperations.checkMask`, which also computes and returns the
bounding box.
3. If enabled, provenance information is calculated and stored as part of the result. (Not available in voxel-based
extraction)
4. Shape features are calculated on a cropped (no padding) version of the original image. (Not available in
voxel-based extraction)
5. If enabled, resegment the mask based upon the range specified in ``resegmentRange`` (default None: resegmentation
disabled).
6. Other enabled feature classes are calculated using all specified image types in ``_enabledImageTypes``. Images
are cropped to tumor mask (no padding) after application of any filter and before being passed to the feature
class.
7. The calculated features is returned as ``collections.OrderedDict``.
:param imageFilepath: SimpleITK Image, or string pointing to image file location
:param maskFilepath: SimpleITK Image, or string pointing to labelmap file location
:param label: Integer, value of the label for which to extract features. If not specified, last specified label
is used. Default label is 1.
:returns: dictionary containing calculated signature ("<imageType>_<featureClass>_<featureName>":value).
"""
if self.geometryTolerance != self.settings.get('geometryTolerance'):
self._setTolerance()
if label is not None:
self.settings['label'] = label
self.settings['voxelBased'] = voxelBased
if voxelBased:
self.logger.info('Starting voxel based extraction')
self.logger.info('Calculating features with label: %d', self.settings['label'])
self.logger.debug('Enabled images types: %s', self._enabledImagetypes)
self.logger.debug('Enabled features: %s', self._enabledFeatures)
self.logger.debug('Current settings: %s', self.settings)
if self.settings.get('binCount', None) is not None:
self.logger.warning(
'Fixed bin Count enabled! However, we recommend using a fixed bin Width. See '
'http://pyradiomics.readthedocs.io/en/latest/faq.html#radiomics-fixed-bin-width for more '
'details'
)
# 1. Load the image and mask
featureVector = collections.OrderedDict()
image, mask = self.loadImage(imageFilepath, maskFilepath)
if image is None or mask is None:
# No features can be extracted, return the empty featureVector
return featureVector
# 2. Check whether loaded mask contains a valid ROI for feature extraction and get bounding box
boundingBox, correctedMask = imageoperations.checkMask(
image, mask, **self.settings
)
# Update the mask if it had to be resampled
if correctedMask is not None:
mask = correctedMask
if boundingBox is None:
# Mask checks failed, do not extract features and return the empty featureVector
return featureVector
self.logger.debug('Image and Mask loaded and valid, starting extraction')
if not voxelBased:
# 3. Add the additional information if enabled
if self.settings['additionalInfo']:
featureVector.update(
self.getProvenance(imageFilepath, maskFilepath, mask)
)
# 4. If shape descriptors should be calculated, handle it separately here
if 'shape' in self._enabledFeatures.keys():
croppedImage, croppedMask = imageoperations.cropToTumorMask(
image, mask, boundingBox
)
enabledFeatures = self._enabledFeatures['shape']
self.logger.info('Computing shape')
shapeClass = self.featureClasses['shape'](
croppedImage, croppedMask, **self.settings
)
if enabledFeatures is None or len(enabledFeatures) == 0:
shapeClass.enableAllFeatures()
else:
for feature in enabledFeatures:
shapeClass.enableFeatureByName(feature)
for (featureName, featureValue) in six.iteritems(shapeClass.execute()):
newFeatureName = 'original_shape_%s' % featureName
featureVector[newFeatureName] = featureValue
# 5. Resegment the mask if enabled (parameter regsegmentMask is not None)
resegmentRange = self.settings.get('resegmentRange', None)
if resegmentRange is not None:
resegmentedMask = imageoperations.resegmentMask(
image, mask, resegmentRange, self.settings['label']
)
# Recheck to see if the mask is still valid
boundingBox, correctedMask = imageoperations.checkMask(
image, resegmentedMask, **self.settings
)
# Update the mask if it had to be resampled
if correctedMask is not None:
resegmentedMask = correctedMask
if boundingBox is None:
# Mask checks failed, do not extract features and return the empty featureVector
return featureVector
# Resegmentation successful
mask = resegmentedMask
# 6. Calculate other enabled feature classes using enabled image types
# Make generators for all enabled image types
self.logger.debug('Creating image type iterator')
imageGenerators = []
for imageType, customKwargs in six.iteritems(self._enabledImagetypes):
args = self.settings.copy()
args.update(customKwargs)
self.logger.info(
'Adding image type "%s" with custom settings: %s'
% (imageType, str(customKwargs))
)
imageGenerators = chain(
imageGenerators,
getattr(imageoperations, 'get%sImage' % imageType)(image, mask, **args),
)
self.logger.debug('Extracting features')
# Calculate features for all (filtered) images in the generator
for inputImage, imageTypeName, inputKwargs in imageGenerators:
self.logger.info('Calculating features for %s image', imageTypeName)
inputImage, inputMask = imageoperations.cropToTumorMask(
inputImage, mask, boundingBox
)
featureVector.update(
self.computeFeatures(
inputImage, inputMask, imageTypeName, **inputKwargs
)
)
self.logger.debug('Features extracted')
return featureVector
import numpy
from oct2py import Oct2Py, Oct2PyError
from scipy import ndimage
from radiomics.firstorder import RadiomicsFirstOrder
from .mybase import MyRadiomicsFeaturesBase
class MyRadiomicsFirstOrder(MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
self.pixelSpacing = inputImage.GetSpacing()
self.voxelArrayShift = kwargs.get('voxelArrayShift', 0)
def _initCalculation(self):
return super()._initSegmentBasedCalculation()
self.targetVoxelArray = self.imageArray[self.labelledVoxelCoordinates].astype(
'float'
)
self.discretizedTargetVoxelArray = None # Lazy instantiation
self.logger.debug('First order feature class initialized')
def _moment(self, a, moment=1, axis=0):
return RadiomicsFirstOrder._moment(self, a, moment, axis)
def _getDiscretizedTargetVoxelArray(self):
return RadiomicsFirstOrder._getDiscretizedTargetVoxelArray(self)
def getSuvMaxFeatureValue(self):
r"""
**20. SUV max**
Implemented as one of the intensity features extracted by Vallieres et al.
Maximum SUV of the tumour region.
.. note::
Extracted from PET scans and not used in the CT feature set.
"""
ROIPet = self.imageArray
mask = self.maskArray
ROIPet[~mask] = numpy.nan
return numpy.max(ROIPet[~numpy.isnan(ROIPet)])
def getSuvMeanFeatureValue(self):
r"""
**21. SUV mean**
Implemented as one of the intensity features extracted by valieres et al.
Average SUV of the tumour region.
.. note::
Extracted from PET scans and not used in the CT feature set.
"""
ROIPet = self.imageArray
mask = self.maskArray
ROIPet[~mask] = numpy