Commit 90f38f72 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

PyRadiomics wrapper to implement custom functions and utilities

parent b961f1bf
import radiomics
import pkgutil
import os
import sys
import inspect
from .myfeatureextractor import MyRadiomicsFeaturesExtractor
def getFeatureClasses():
"""
Iterates over all modules of the radiomics package using pkgutil and subsequently imports those modules.
Return a dictionary of all modules containing featureClasses, with modulename as key, abstract
class object of the featureClass as value. Assumes only one featureClass per module
This is achieved by inspect.getmembers. Modules are added if it contains a member that is a class,
with name starting with 'Radiomics' and is inherited from :py:class:`radiomics.base.RadiomicsFeaturesBase`.
This iteration only runs once (at initialization of toolbox), subsequent calls return the dictionary created by the
first call.
"""
global _featureClasses
if (
_featureClasses is None
): # On first call, enumerate possible feature classes and import PyRadiomics modules
_featureClasses = {}
for _, mod, _ in pkgutil.iter_modules([os.path.dirname(__file__)]):
if str(mod).startswith(
'_'
): # Skip loading of 'private' classes, these don't contain feature classes
continue
__import__('pyradiomics_radler.' + mod)
module = sys.modules['pyradiomics_radler.' + mod]
attributes = inspect.getmembers(module, inspect.isclass)
for a in attributes:
if a[0].startswith('MyRadiomics'):
for parentClass in inspect.getmro(a[1])[
1:
]: # only include classes that inherit from RadiomicsFeaturesBase
if parentClass.__name__ == 'MyRadiomicsFeaturesBase':
_featureClasses[mod] = a[1]
break
return _featureClasses
_featureClasses = None
_featureClasses = getFeatureClasses()
newFeatureClasses = radiomics._featureClasses
for f, cl in _featureClasses.items():
newFeatureClasses[f] = cl
radiomics._featureClasses = newFeatureClasses
from radiomics.base import RadiomicsFeaturesBase
from . import myimageoperations as imageoperations
import numpy
class MyRadiomicsFeaturesBase(RadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
self.binMode = kwargs.get('binMode', 'uniform')
super().__init__(inputImage, inputMask, **kwargs)
def _applyBinning(self):
self.matrix, self.binEdges = imageoperations.binImage(
self.binWidth,
self.imageArray,
self.maskArray,
self.settings.get('binCount', None),
self.binMode,
)
self.coefficients['grayLevels'] = numpy.unique(self.matrix[self.maskArray])
self.coefficients['Ng'] = int(
numpy.max(self.coefficients['grayLevels'])
) # max gray level in the ROI
from radiomics.featureextractor import RadiomicsFeaturesExtractor
from . import myimageoperations as imageoperations
import collections
import six
from itertools import chain
class MyRadiomicsFeaturesExtractor(RadiomicsFeaturesExtractor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def execute(self, imageFilepath, maskFilepath, label=None, voxelBased=False):
"""
Compute radiomics signature for provide image and mask combination. It comprises of the following steps:
1. Image and mask are loaded and normalized/resampled if necessary.
2. Validity of ROI is checked using :py:func:`~imageoperations.checkMask`, which also computes and returns the
bounding box.
3. If enabled, provenance information is calculated and stored as part of the result. (Not available in voxel-based
extraction)
4. Shape features are calculated on a cropped (no padding) version of the original image. (Not available in
voxel-based extraction)
5. If enabled, resegment the mask based upon the range specified in ``resegmentRange`` (default None: resegmentation
disabled).
6. Other enabled feature classes are calculated using all specified image types in ``_enabledImageTypes``. Images
are cropped to tumor mask (no padding) after application of any filter and before being passed to the feature
class.
7. The calculated features is returned as ``collections.OrderedDict``.
:param imageFilepath: SimpleITK Image, or string pointing to image file location
:param maskFilepath: SimpleITK Image, or string pointing to labelmap file location
:param label: Integer, value of the label for which to extract features. If not specified, last specified label
is used. Default label is 1.
:returns: dictionary containing calculated signature ("<imageType>_<featureClass>_<featureName>":value).
"""
if self.geometryTolerance != self.settings.get('geometryTolerance'):
self._setTolerance()
if label is not None:
self.settings['label'] = label
self.settings['voxelBased'] = voxelBased
if voxelBased:
self.logger.info('Starting voxel based extraction')
self.logger.info('Calculating features with label: %d', self.settings['label'])
self.logger.debug('Enabled images types: %s', self._enabledImagetypes)
self.logger.debug('Enabled features: %s', self._enabledFeatures)
self.logger.debug('Current settings: %s', self.settings)
if self.settings.get('binCount', None) is not None:
self.logger.warning(
'Fixed bin Count enabled! However, we recommend using a fixed bin Width. See '
'http://pyradiomics.readthedocs.io/en/latest/faq.html#radiomics-fixed-bin-width for more '
'details'
)
# 1. Load the image and mask
featureVector = collections.OrderedDict()
image, mask = self.loadImage(imageFilepath, maskFilepath)
if image is None or mask is None:
# No features can be extracted, return the empty featureVector
return featureVector
# 2. Check whether loaded mask contains a valid ROI for feature extraction and get bounding box
boundingBox, correctedMask = imageoperations.checkMask(
image, mask, **self.settings
)
# Update the mask if it had to be resampled
if correctedMask is not None:
mask = correctedMask
if boundingBox is None:
# Mask checks failed, do not extract features and return the empty featureVector
return featureVector
self.logger.debug('Image and Mask loaded and valid, starting extraction')
if not voxelBased:
# 3. Add the additional information if enabled
if self.settings['additionalInfo']:
featureVector.update(
self.getProvenance(imageFilepath, maskFilepath, mask)
)
# 4. If shape descriptors should be calculated, handle it separately here
if 'shape' in self._enabledFeatures.keys():
croppedImage, croppedMask = imageoperations.cropToTumorMask(
image, mask, boundingBox
)
enabledFeatures = self._enabledFeatures['shape']
self.logger.info('Computing shape')
shapeClass = self.featureClasses['shape'](
croppedImage, croppedMask, **self.settings
)
if enabledFeatures is None or len(enabledFeatures) == 0:
shapeClass.enableAllFeatures()
else:
for feature in enabledFeatures:
shapeClass.enableFeatureByName(feature)
for (featureName, featureValue) in six.iteritems(shapeClass.execute()):
newFeatureName = 'original_shape_%s' % featureName
featureVector[newFeatureName] = featureValue
# 5. Resegment the mask if enabled (parameter regsegmentMask is not None)
resegmentRange = self.settings.get('resegmentRange', None)
if resegmentRange is not None:
resegmentedMask = imageoperations.resegmentMask(
image, mask, resegmentRange, self.settings['label']
)
# Recheck to see if the mask is still valid
boundingBox, correctedMask = imageoperations.checkMask(
image, resegmentedMask, **self.settings
)
# Update the mask if it had to be resampled
if correctedMask is not None:
resegmentedMask = correctedMask
if boundingBox is None:
# Mask checks failed, do not extract features and return the empty featureVector
return featureVector
# Resegmentation successful
mask = resegmentedMask
# 6. Calculate other enabled feature classes using enabled image types
# Make generators for all enabled image types
self.logger.debug('Creating image type iterator')
imageGenerators = []
for imageType, customKwargs in six.iteritems(self._enabledImagetypes):
args = self.settings.copy()
args.update(customKwargs)
self.logger.info(
'Adding image type "%s" with custom settings: %s'
% (imageType, str(customKwargs))
)
imageGenerators = chain(
imageGenerators,
getattr(imageoperations, 'get%sImage' % imageType)(image, mask, **args),
)
self.logger.debug('Extracting features')
# Calculate features for all (filtered) images in the generator
for inputImage, imageTypeName, inputKwargs in imageGenerators:
self.logger.info('Calculating features for %s image', imageTypeName)
inputImage, inputMask = imageoperations.cropToTumorMask(
inputImage, mask, boundingBox
)
featureVector.update(
self.computeFeatures(
inputImage, inputMask, imageTypeName, **inputKwargs
)
)
self.logger.debug('Features extracted')
return featureVector
from .mybase import MyRadiomicsFeaturesBase
from radiomics.glcm import RadiomicsGLCM
class MyRadiomicsGLCM(RadiomicsGLCM, MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
from .mybase import MyRadiomicsFeaturesBase
from radiomics.gldm import RadiomicsGLDM
class MyRadiomicsGLDM(RadiomicsGLDM, MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
from .mybase import MyRadiomicsFeaturesBase
from radiomics.glrlm import RadiomicsGLRLM
import numpy
class MyRadiomicsGLRLM(RadiomicsGLRLM, MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
def getGrayLevelNonUniformityFeatureValue(self):
r"""
**3. Gray Level Non-Uniformity (GLN)**
.. math::
\textit{GLN} = \frac{\sum^{N_g}_{i=1}\left(\sum^{N_r}_{j=1}{\textbf{P}(i,j|\theta)}\right)^2}{N_z(\theta)}
GLN measures the similarity of gray-level intensity values in the image, where a lower GLN value correlates with a
greater similarity in intensity values.
"""
pg = self.coefficients['pg']
Nz = self.coefficients['Nz']
gln = numpy.sum(
(pg ** 2), 0
) # NB the division by Nz has been removed according to valieres et al
return gln.mean()
def getRunLengthNonUniformityFeatureValue(self):
r"""
**5. Run Length Non-Uniformity (RLN)**
.. math::
\textit{RLN} = \frac{\sum^{N_r}_{j=1}\left(\sum^{N_g}_{i=1}{\textbf{P}(i,j|\theta)}\right)^2}{N_z(\theta)}
RLN measures the similarity of run lengths throughout the image, with a lower value indicating more homogeneity
among run lengths in the image.
"""
pr = self.coefficients['pr']
Nz = self.coefficients['Nz']
# NB the division by Nz has been removed according to Vallieres et al
rln = numpy.sum((pr ** 2), 0) # / Nz
return rln.mean()
from .mybase import MyRadiomicsFeaturesBase
from radiomics.glszm import RadiomicsGLSZM
class MyRadiomicsGLSZM(RadiomicsGLSZM, MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
import logging
from radiomics.imageoperations import *
logger = logging.getLogger(__name__)
def binImage(
binwidth,
parameterMatrix,
parameterMatrixCoordinates=None,
bincount=None,
mode='uniform',
):
r"""
Discretizes the parameterMatrix (matrix representation of the gray levels in the ROI) using the binEdges calculated
using :py:func:`getBinEdges`. Only voxels defined by parameterMatrixCoordinates (defining the segmentation) are used
for calculation of histogram and subsequently discretized. Voxels outside segmentation are left unchanged.
:math:`X_{b, i} = \lfloor \frac{X_{gl, i}}{W} \rfloor - \lfloor \frac {\min(X_{gl})}{W} \rfloor + 1`
Here, :math:`X_{gl, i}` and :math:`X_{b, i}` are gray level intensities before and after discretization, respectively.
:math:`{W}` is the bin width value (specfied in ``binWidth`` parameter). The first part of the formula ensures that
the bins are equally spaced from 0, whereas the second part ensures that the minimum gray level intensity inside the
ROI after binning is always 1.
If the range of gray level intensities is equally dividable by the binWidth, i.e. :math:`(\max(X_{gl})- \min(X_{gl}))
\mod W = 0`, the maximum intensity will be encoded as numBins + 1, therefore the maximum number of gray
level intensities in the ROI after binning is number of bins + 1.
.. warning::
This is different from the assignment of voxels to the bins by ``numpy.histogram`` , which has half-open bins, with
the exception of the rightmost bin, which means this maximum values are assigned to the topmost bin.
``numpy.digitize`` uses half-open bins, including the rightmost bin.
.. note::
This method is slightly different from the fixed bin size discretization method described by IBSI. The two most
notable differences are 1) that PyRadiomics uses a floor division (and adds 1), as opposed to a ceiling division and
2) that in PyRadiomics, bins are always equally spaced from 0, as opposed to equally spaced from the minimum
gray level intensity.
"""
global logger
logger.debug('Discretizing gray levels inside ROI')
if parameterMatrixCoordinates is None:
if bincount is not None:
if mode == 'uniform':
binEdges = numpy.histogram(parameterMatrix[:], bincount)[1]
elif mode == 'equal':
binEdges = numpy.percentile(
parameterMatrix[:], numpy.arange(0, 1, 1 / bincount)
)
binEdges = numpy.r_[binEdges, numpy.max(parameterMatrix[:])]
# check monotonicity
diff = numpy.diff(binEdges)
while not (diff >= 0).all():
idx_non_monotonical = numpy.where(diff < 0)[0]
binEdges = numpy.delete(binEdges, idx_non_monotonical)
diff = numpy.diff(binEdges)
else:
binEdges = getBinEdges(binwidth, parameterMatrix[:])
parameterMatrix = numpy.digitize(parameterMatrix, binEdges)
else:
if bincount is not None:
if mode == 'uniform':
binEdges = numpy.histogram(
parameterMatrix[parameterMatrixCoordinates], bincount
)[1]
elif mode == 'equal':
binEdges = numpy.percentile(
parameterMatrix[parameterMatrixCoordinates],
numpy.arange(0, 1, 1 / bincount),
)
binEdges = numpy.r_[binEdges, numpy.max(parameterMatrix[:])]
# check monotonicity
diff = numpy.diff(binEdges)
while not (diff >= 0).all():
idx_non_monotonical = numpy.where(diff < 0)[0]
binEdges = numpy.delete(binEdges, idx_non_monotonical)
diff = numpy.diff(binEdges)
else:
binEdges = getBinEdges(
binwidth, parameterMatrix[parameterMatrixCoordinates]
)
parameterMatrix[parameterMatrixCoordinates] = numpy.digitize(
parameterMatrix[parameterMatrixCoordinates], binEdges
)
return parameterMatrix, binEdges
def checkMask(imageNode, maskNode, **kwargs):
"""
Checks whether the Region of Interest (ROI) defined in the mask size and dimensions match constraints, specified in
settings. The following checks are performed.
1. Check whether the mask corresponds to the image (i.e. has a similar size, spacing, direction and origin). **N.B.
This check is performed by SimpleITK, if it fails, an error is logged, with additional error information from
SimpleITK logged with level DEBUG (i.e. logging-level has to be set to debug to store this information in the log
file).** The tolerance can be increased using the ``geometryTolerance`` parameter. Alternatively, if the
``correctMask`` parameter is ``True``, PyRadiomics will check if the mask contains a valid ROI (inside image
physical area) and if so, resample the mask to image geometry. See :ref:`radiomics-settings-label` for more info.
2. Check if the label is present in the mask
3. Count the number of dimensions in which the size of the ROI > 1 (i.e. does the ROI represent a single voxel (0), a
line (1), a surface (2) or a volume (3)) and compare this to the minimum number of dimension required (specified in
``minimumROIDimensions``).
4. Optional. Check if there are at least N voxels in the ROI. N is defined in ``minimumROISize``, this test is skipped
if ``minimumROISize = None``.
This function returns a tuple of two items. The first item (if not None) is the bounding box of the mask. The second
item is the mask that has been corrected by resampling to the input image geometry (if that resampling was successful).
If a check fails, an error is logged and a (None,None) tuple is returned. No features will be extracted for this mask.
If the mask passes all tests, this function returns the bounding box, which is used in the :py:func:`cropToTumorMask`
function.
The bounding box is calculated during (1.) and used for the subsequent checks. The bounding box is
calculated by SimpleITK.LabelStatisticsImageFilter() and returned as a tuple of indices: (L_x, U_x, L_y, U_y, L_z,
U_z), where 'L' and 'U' are lower and upper bound, respectively, and 'x', 'y' and 'z' the three image dimensions.
By reusing the bounding box calculated here, calls to SimpleITK.LabelStatisticsImageFilter() are reduced, improving
performance.
Uses the following settings:
- minimumROIDimensions [1]: Integer, range 1-3, specifies the minimum dimensions (1D, 2D or 3D, respectively).
Single-voxel segmentations are always excluded.
- minimumROISize [None]: Integer, > 0, specifies the minimum number of voxels required. Test is skipped if
this parameter is set to None.
.. note::
If the first check fails there are generally 2 possible causes:
1. The image and mask are matched, but there is a slight difference in origin, direction or spacing. The exact
cause, difference and used tolerance are stored with level DEBUG in a log (if enabled). For more information on
setting up logging, see ":ref:`setting up logging <radiomics-logging-label>`" and the helloRadiomics examples
(located in the ``pyradiomics/examples`` folder). This problem can be fixed by changing the global tolerance
(``geometryTolerance`` parameter) or enabling mask correction (``correctMask`` parameter).
2. The image and mask do not match, but the ROI contained within the mask does represent a physical volume
contained within the image. If this is the case, resampling is needed to ensure matching geometry between image
and mask before features can be extracted. This can be achieved by enabling mask correction using the
``correctMask`` parameter.
"""
global logger
boundingBox = None
correctedMask = None
label = kwargs.get('label', 1)
minDims = kwargs.get('minimumROIDimensions', 1)
minSize = kwargs.get('minimumROISize', None)
logger.debug('Checking mask with label %d', label)
logger.debug('Calculating bounding box')
# Determine bounds
lsif = sitk.LabelStatisticsImageFilter()
try:
lsif.Execute(imageNode, maskNode)
# If lsif fails, and mask is corrected, it includes a check whether the label is present. Therefore, perform
# this test here only if lsif does not fail on the first attempt.
if label not in lsif.GetLabels():
logger.error('Label (%g) not present in mask', label)
return (boundingBox, correctedMask)
except RuntimeError as e:
# If correctMask = True, try to resample the mask to the image geometry, otherwise return None ("fail")
if not kwargs.get('correctMask', False):
if (
"Both images for LabelStatisticsImageFilter don't match type or dimension!"
in e.args[0]
):
logger.error(
'Image/Mask datatype or size mismatch. Potential solution: enable correctMask, see '
'Documentation:Usage:Customizing the Extraction:Settings:correctMask for more information'
)
logger.debug('Additional information on error.', exc_info=True)
elif "Inputs do not occupy the same physical space!" in e.args[0]:
logger.error(
'Image/Mask geometry mismatch. Potential solution: increase tolerance using geometryTolerance, '
'see Documentation:Usage:Customizing the Extraction:Settings:geometryTolerance for more '
'information'
)
logger.debug('Additional information on error.', exc_info=True)
return (boundingBox, correctedMask)
logger.warning('Image/Mask geometry mismatch, attempting to correct Mask')
correctedMask = _correctMask(imageNode, maskNode, label)
if correctedMask is None: # Resampling failed (ROI outside image physical space
logger.error(
'Image/Mask correction failed, ROI invalid (not found or outside of physical image bounds)'
)
return boundingBox, correctedMask
# Resampling succesful, try to calculate boundingbox
try:
lsif.Execute(imageNode, correctedMask)
except RuntimeError:
logger.error(
'Calculation of bounding box failed, for more information run with DEBUG logging and check log'
)
logger.debug(
'Bounding box calculation with resampled mask failed', exc_info=True
)
return boundingBox, correctedMask
# LBound and UBound of the bounding box, as (L_X, U_X, L_Y, U_Y, L_Z, U_Z)
boundingBox = numpy.array(lsif.GetBoundingBox(label))
logger.debug('Checking minimum number of dimensions requirements (%d)', minDims)
ndims = numpy.sum(
(boundingBox[1::2] - boundingBox[0::2] + 1) > 1
) # UBound - LBound + 1 = Size
if ndims < minDims:
logger.error(
'mask has too few dimensions (number of dimensions %d, minimum required %d)',
ndims,
minDims,
)
return None, correctedMask
if minSize is not None:
logger.debug('Checking minimum size requirements (minimum size: %d)', minSize)
roiSize = lsif.GetCount(label)
if roiSize <= minSize:
logger.error(
'Size of the ROI is too small (minimum size: %g, ROI size: %g',
minSize,
roiSize,
)
return None, correctedMask
return boundingBox, correctedMask
from .mybase import MyRadiomicsFeaturesBase
from radiomics.ngtdm import RadiomicsNGTDM
class MyRadiomicsNGTDM(RadiomicsNGTDM, MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
import numpy
from scipy.spatial import ConvexHull
from radiomics.shape import RadiomicsShape
from .mybase import MyRadiomicsFeaturesBase
class MyRadiomicsShape(MyRadiomicsFeaturesBase):
def __init__(self, inputImage, inputMask, **kwargs):
super().__init__(inputImage, inputMask, **kwargs)
def _initVoxelBasedCalculation(self):
RadiomicsShape._initVoxelBasedCalculation(self)
def _initSegmentBasedCalculation(self):
RadiomicsShape._initSegmentBasedCalculation(self)
def _calculateSurfaceArea(self):
self.SurfaceArea = RadiomicsShape._calculateSurfaceArea(self)
def calculateDiameters(self):
RadiomicsShape.calculateDiameters(self)
def _interpolate(self, grid, p1, p2):
RadiomicsShape._interpolate(self, grid, p1, p2)
def getCompactness2FeatureValue(self):
r"""
**6. Compactness 2**
.. math::
\textit{compactness 2} = 36 \pi \frac{V^2}{A^3}
Similar to Sphericity and Compactness 1, Compactness 2 is a measure of how compact the shape of the tumor is