Commit 7737f279 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Custom histology library

parent 50b49c41
from .augment import *
from .conversions import *
#from .loaders import *
from .tiles import *
#from .visualization import *
from .wsi import *
\ No newline at end of file
import numpy as np
from skimage import transform
from skimage import color
from skimage import draw
def rotate_img(img, masks=None, max_rotate = 90):
angle = np.random.uniform(-1,1)
angle = angle*max_rotate
rotate_angle = np.pi*angle/180
affine_tf = transform.AffineTransform(rotation=rotate_angle)
img_ = transform.warp(img, inverse_map=affine_tf,mode='wrap', preserve_range=True)
if masks is None:
return(img_)
else:
masks_ = []
for mask in masks:
mask_ = transform.warp(mask, inverse_map=affine_tf, mode='wrap', preserve_range=True)
masks_.append(mask_)
return(img_, masks_)
def shear_img(img, masks=None, max_shear = 10):
angle = np.random.uniform(-1,1)
angle = angle*max_shear
shear_angle = np.pi*angle/180
affine_tf = transform.AffineTransform(shear=shear_angle)
img_ = transform.warp(img, inverse_map=affine_tf, mode='wrap', preserve_range=True)
if masks is None:
return(img_)
else:
masks_ = []
for mask in masks:
mask_ = transform.warp(mask, inverse_map=affine_tf, mode='wrap', preserve_range=True)
# mask_[mask_<255] = 0
masks_.append(mask_)
return(img_, masks_)
def rescale_img(img, masks=None, max_scale = [0.5, 2]):
scale_h = np.random.uniform(max_scale[0], max_scale[1])
scale_w = np.random.uniform(max_scale[0], max_scale[1])
img_ = transform.rescale(img, (scale_h, scale_w), mode='wrap', preserve_range=True)
if masks is None:
return(img_)
else:
masks_ = []
for mask in masks:
mask_ = transform.rescale(mask, (scale_h, scale_w), mode='wrap', preserve_range=True)
# mask_[mask_<255] = 0
masks_.append(mask_)
return(img_, masks_)
def crop_img(img, masks=None, max_crop = [0.5, 0.5]):
img_ = np.copy(img)
h,w = img_.shape[0], img_.shape[1]
size_h = int(np.random.uniform(int((1-max_crop[0])*h), h))
size_w = int(np.random.uniform(int((1-max_crop[1])*w), w))
h0 = int(np.random.uniform(0, h - size_h))
w0 = int(np.random.uniform(0, w - size_w))
img_ = img_[h0 : h0+size_h, w0 : w0+size_w]
if masks is None:
return(img_)
else:
masks_ = []
for mask in masks:
mask_ = mask[h0 : h0+size_h, w0 : w0+size_w]
masks_.append(mask_)
return(img_, masks_)
def flip_img(img, masks=None, flip_probs = [0.5, 0.5]):
img_ = np.copy(img)
h_flip = np.random.uniform(0, 1)<flip_probs[0]
v_flip = np.random.uniform(0, 1)<flip_probs[1]
if h_flip:
img_ = img_[:,::-1]
if v_flip:
img_ = img_[::-1,:]
if masks is None:
return(img_)
else:
masks_ = []
for mask_ in masks:
if h_flip:
mask_ = mask_[:,::-1]
if v_flip:
mask_ = mask_[::-1,:]
masks_.append(mask_)
return(img_, masks_)
#%% greyscale augment
def bright_contrast(img, max_scale=0.01):
img_ = np.copy(img)
low = np.min(img_)
high = np.max(img_)
scale = np.random.uniform(max_scale, 1)
shift = np.random.uniform(0, 1-scale)
img_ = (img_ - low)/(high - low)
img_ = img_*scale + shift
img_[0,0] = 0
img_[-1,-1] = 1
return(img_)
def invert(img, inv_prob=0.5):
img_ = np.copy(img)
invert = np.random.random()<inv_prob
if invert:
img_ = -1*img_ + 1
return(img_)
#%% morphological augment
def random_circle(img, max_radius = 0.5, alpha=0.25):
img_ = np.copy(img)
h,w = img_.shape[0], img_.shape[1]
random_col = np.random.random()
random_r = np.random.randint(0, h)
random_c = np.random.randint(0, w)
random_radius = np.random.randint(0.5, int(np.max([h,w])*max_radius))
rr,cc = draw.circle(r = random_r, c = random_c, radius = random_radius, shape=(h, w))
img_h = color.rgb2hsv(img_)
img_h[rr,cc, 0] = random_col
img_h[rr,cc, 1] = 0.1
img_h[rr,cc, 2] = 1 - img_h[rr,cc, 2]*alpha
#
out = color.hsv2rgb(img_h)
return(out)
#%% color augment
def generate_3d():
"""Generate a 3D random rotation matrix.
Returns:
np.matrix: A 3D rotation matrix.
"""
x1, x2, x3 = np.random.rand(3)
R = np.matrix([[np.cos(2 * np.pi * x1), np.sin(2 * np.pi * x1), 0],
[-np.sin(2 * np.pi * x1), np.cos(2 * np.pi * x1), 0],
[0, 0, 1]])
v = np.matrix([[np.cos(2 * np.pi * x2) * np.sqrt(x3)],
[np.sin(2 * np.pi * x2) * np.sqrt(x3)],
[np.sqrt(1 - x3)]])
H = np.eye(3) - 2 * v * v.T
M = -H * R
return M
def rotate_colors(img):
rotation_matrix = np.array(generate_3d())
img_rotated = np.dot(img, rotation_matrix)
img_rotated = (img_rotated - np.min(img_rotated))/(np.max(img_rotated) - np.min(img_rotated))
return(img_rotated)
#%%
def data_augment(img, masks=None,
max_rotate=30,
max_shear=5,
max_scale = [0.8, 1.2],
max_crop=[0.2, 0.2],
flip_probs=[0.5, 0.5]):
img_, masks_ = rotate_img(img, masks, max_rotate=max_rotate)
img_, masks_ = shear_img(img_, masks_, max_shear=max_shear)
img_, masks_ = rescale_img(img_, masks_, max_scale)
img_, masks_ = flip_img(img_, masks_, flip_probs=flip_probs)
img_, masks_ = crop_img(img_, masks_, max_crop=max_crop)
return(img_, masks_)
import numpy as np
from PIL import Image
def numpy2PIL(img):
#rescale to 0-255
if((np.max(img) - np.min(img))) != 0:
img = (img - np.min(img))/(np.max(img) - np.min(img))*255
img = np.uint8(img)
img_PIL = Image.fromarray(img)
return(img_PIL)
def PIL2numpy(img):
return(np.array(img))
import numpy as np
import os
import pandas as pd
from PIL import Image
#import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import skimage
from .conversions import numpy2PIL
from .augment import data_augment
# TODO: use torchvision functional api for a unique transform for input (and target if it exists)
#%%
def __transform__(image, phase, size):
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
if phase == 'train':
transformations = transforms.Compose([transforms.RandomRotation((-180,180)),
transforms.RandomCrop(size),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ToTensor(),
normalize])
image = transformations(image)
else:
transformations = transforms.Compose([transforms.CenterCrop(size),
transforms.ToTensor(),
normalize])
image = transformations(image)
return(image)
class CSVLabelsLoader(Dataset):
'''
Load images and labels. Use when images are in a unique folder and labels are in a csv file (see specs below).
image_dir: folder were all the images are stored
labels_file: csv file with two columns without header.
The first column should contain the filenames of the images WITH extension.
The second column should contain the label of each image.
'''
def __init__(self, image_dir, labels_file=None, transform=None, size=224, phase='train', seed=0):
np.random.seed(seed)
assert phase in ['train', 'valid', 'test'], "Phase should be either 'train' or 'valid' or 'test'."
self.phase = phase
self.size = size
self.image_dir = image_dir
if self.phase != 'test':
assert labels_file is not None, "Labels file should be provided"
label_data = pd.read_csv(labels_file, header=None)
samples = np.array(label_data.iloc[:,0]).astype(str)
labels = np.array(label_data.iloc[:,1])
else:
samples = np.array(os.listdir(self.image_dir))
labels = np.repeat(None, len(samples))
idx_shuffle = np.random.permutation(len(samples))
self.samples = samples[idx_shuffle]
self.labels = labels[idx_shuffle]
if transform is None:
self.transform = __transform__
else:
self.transform = transform
def __len__(self):
return(len(self.files))
def __getitem__(self, idx):
sample = self.samples[idx]
img_path = os.path.join(self.image_dir, sample)
image = Image.open(img_path).convert('RGB')
image = self.transform(image, self.phase, self.size)
label = self.labels[np.where(self.samples == sample)[0][0]]
item = {'image': image, 'label': label, 'sample': sample}
return(item)
class FolderLabelsLoader(Dataset):
'''
Load images and labels. Use when images are in a separate folders and each folder is a class.
image_dir: folder were all the images are stored
classes: selected classes
'''
def __init__(self, root_dir, classes=None, transform=None, size=224, phase='train', seed=0):
self.root_dir = root_dir
assert phase in ['train', 'valid', 'test'], "Phase should be either 'train' or 'valid' or 'test'."
self.phase = phase
self.size = size
if classes is None:
self.classes = np.array(sorted(os.listdir(root_dir)))
else:
self.classes = classes
if transform is None:
self.transform = __transform__
else:
self.transform = transform
np.random.seed(seed)
files=[]
labels=[]
original_classes = []
for i, CLASS in enumerate(self.classes):
os.chdir(self.root_dir)
os.chdir(CLASS)
files_class = np.array(os.listdir(os.getcwd()))
label_class = np.repeat(str(i), len(files_class))
original_class = np.repeat(CLASS, len(files_class))
files.append(['{}/{}'.format(CLASS, f) for f in files_class])
labels.append(label_class)
original_classes.append(original_class)
files = np.concatenate(files)
labels = np.concatenate(labels)
original_classes = np.concatenate(original_classes)
idx_shuffle = np.random.permutation(len(files))
self.samples = files[idx_shuffle]
self.labels = labels[idx_shuffle]
self.original_classes = original_classes[idx_shuffle]
def __len__(self):
return(self.dataset.shape[0])
def __getitem__(self, idx):
sample = self.samples[idx]
label = self.labels[idx]
original_class = self.original_classes[idx]
img_path = os.path.join(self.root_dir, sample)
image = Image.open(img_path).convert('RGB')
image = self.transform(image, self.phase, self.size)
item = {'image': image, 'label': label, 'sample': sample, 'class':original_class}
return(item)
def MaskLabelsLoader(Dataset):
def __init__(self, root_dir, size = 256, phase = 'train', seed=0):
self.ROOT = root_dir
self.size = size
self.phase = phase
np.random.seed(seed)
self.samples = os.listdir(root_dir)
def __len__(self):
return(len(self.samples))
def __getitem__(self, idx):
SAMPLE = self.samples[idx]
SAMPLE_PATH = self.ROOT + SAMPLE
img = skimage.io.imread(SAMPLE_PATH + '/images/' + SAMPLE + '.png')
if self.phase == 'test':
img = img[:,:,:3]
img = numpy2PIL(img)
img, size = self._to_tensor(img)
sample = {'image': img, 'sample':SAMPLE, 'size':size}
elif self.phase == 'valid':
labels = skimage.io.imread(SAMPLE_PATH + '/labels/' + SAMPLE + '.png')
img = numpy2PIL(img)
labels = numpy2PIL(labels)
img, size, labels = self._to_tensor(img, labels)
sample = {'image': img, 'mask': labels, 'sample':SAMPLE, 'size':size}
elif self.phase == 'train':
labels = skimage.io.imread(SAMPLE_PATH + '/labels/' + SAMPLE + '.png') #0-255
img, labels = data_augment(img, [labels],
max_rotate=10,
max_shear=0,
max_scale = [0.8,1.2],
max_crop=[0.1,0.1],
flip_probs=[0.5,0.5])
labels = labels[0]
th = np.max(labels)*0.9 #threshold_otsu(labels)
labels[labels<th] = 0
labels[labels>=th] = 1
img = numpy2PIL(img)
labels = numpy2PIL(labels)
img, size, labels = self._to_tensor(img, labels)
sample = {'image': img, 'mask': labels, 'sample':SAMPLE, 'size':size}
else:
print('Phase not implemented')
sample = {'image': None, 'mask': None, 'sample':None, 'size':None}
return(sample)
def _to_tensor(self, image, labels=None):
w,h = image.size
image = transforms.Compose([transforms.Resize((self.size, self.size), interpolation = Image.NEAREST), transforms.ToTensor()])(image)
if labels != None:
labels = transforms.ToTensor()(labels)
return(image, (h,w), labels)
else:
return(image, (h,w))
#%%
import matplotlib.pyplot as plt
loader = FolderLabelsLoader('.../GTEx_images_tissues/', phase='train')
sample = loader[4]
plt.imshow(sample['image'].numpy()[2,:,:], 'gray')
print(sample['label'])
print(sample['sample'])
print(sample['class'])
import numpy as np
import skimage.morphology as morph
from skimage.filters import threshold_otsu
from skimage import color
from scipy import ndimage, linalg
# import colorsys
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# %% general utilities
def has_enough_tissue(image, threshold=0.8, near_zero_var_threshold=0.1):
image_gray = color.rgb2gray(image)
# Check if image is FULL-WHITE
if np.mean(image_gray.ravel()) > 0.9 and np.std(image_gray.ravel()) < 0.09: # full or almost white
return False
# Calculate the threshold of pixel-values corresponding to FOREGROUND
# using Threshold-Otsu Method
thresh = threshold_otsu(image_gray)
# Filter out the Background
image_bw = image_gray < thresh
# Generate a Disk shaped filter of radius=5
strel = morph.disk(5)
# Generate Morphological Dilation, i.e. enlarge dark regions, shrinks dark regions
image_bw_dilated = morph.dilation(image_bw, strel)
# Fill holes in brightness based on a (minimum) reference structure to look for
image_bw_filled = ndimage.binary_fill_holes(image_bw_dilated, structure=np.ones((5, 5))).astype(np.uint8)
# Near-zero variance threshold
# This also includes cases in which there is ALL TISSUE (too clear) or NO TISSUE (zeros)
if np.var(image_bw_filled) < near_zero_var_threshold:
return False
return np.mean(image_bw_filled) > threshold
def maxmin_norm(img):
return (img - np.min(img)) / (np.max(img) - np.min(img))
def invert_grays(img):
return -1 * (img - np.max(img))
def is_grayscale(img):
h_r = np.histogram(img[:, :, 0].ravel(), bins=np.arange(0, 256))
h_g = np.histogram(img[:, :, 1].ravel(), bins=np.arange(0, 256))
h_b = np.histogram(img[:, :, 2].ravel(), bins=np.arange(0, 256))
return np.all(h_r[0] == h_g[0]) and np.all(h_r[0] == h_b[0])
# %% color processing
# dummy white balance
def balance_white(img):
# simple white balance: the maximum value of each image should be white
img_balanced = np.zeros_like(img)
img_balanced[:, :, 0] = img[:, :, 0] / np.max(img[:, :, 0].ravel()) * 255.
img_balanced[:, :, 1] = img[:, :, 1] / np.max(img[:, :, 1].ravel()) * 255.
img_balanced[:, :, 2] = img[:, :, 2] / np.max(img[:, :, 2].ravel()) * 255.
return img_balanced
# deconvolution
def detect_colors(img, n_colors=3, method='kmeans'):
assert method in ['PCA', 'kmeans']
img = maxmin_norm(img)
img_l = img.reshape((img.shape[0] * img.shape[1], img.shape[2]))
if method == 'PCA':
assert n_colors <= 3, "Maximum 3 n_colors when using PCA"
clt = PCA(n_components=n_colors)
clt.fit(img_l)
colors = clt.components_
return colors
elif method == 'kmeans':
clt = KMeans(n_clusters=n_colors)
clt.fit(img_l)
colors = clt.cluster_centers_
return colors
def find_color_base(color_1, color_2=np.array([255, 255, 255])):
def normalize_vector(v):
norm = np.sqrt(np.sum(v ** 2))
return v / norm
color_1_n = normalize_vector(color_1)
color_2_n = normalize_vector(color_2)
color_3_n = normalize_vector(np.cross(color_1_n, color_2_n))
colors_norm = np.vstack([color_1_n, color_2_n, color_3_n])
return colors_norm
def colorize_image(img, color):
img_3 = np.stack([img, img, img], axis=2)
img_tinted = color * img_3
return img_tinted
def separate_colors(img, colors, colorize=True):
colors_new = np.vstack([colors, colors[0, :]])
stain_images = []
for id_col in range(colors.shape[0]):
color_1 = colors_new[id_col, :]
color_2 = colors_new[id_col + 1, :]
colors_base = find_color_base(color_1, color_2)
color_matrix = linalg.inv(colors_base)
separated = color.separate_stains(img, color_matrix)
separated_main = separated[:, :, 0]
separated_main = maxmin_norm(separated_main)
if colorize:
separated_main = colorize_image(separated_main, color_1)
stain_images.append(separated_main)
return stain_images
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from skimage import color
def plot_rgb_hist(img):
ax = plt.gca()
plt.hist(img[:,:,0].ravel(), bins = np.arange(0,256), color='red', alpha=0.8)
plt.hist(img[:,:,1].ravel(), bins = np.arange(0,256), color='green', alpha=0.8)
plt.hist(img[:,:,2].ravel(), bins = np.arange(0,256), color='blue', alpha=0.8)
plt.hist(255*color.rgb2gray(img).ravel(), bins = np.arange(0,256), color='white', alpha = 0.5)
ax.set_facecolor('black')
def plot_base(colors_base):
origin = [0,0,0]
X, Y, Z = zip(origin,origin,origin)
U,V,W = zip(colors_base)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.quiver(X, Y, Z, U, V, W)
plt.show()
#%% plot torch tensor
#%% plot image
\ No newline at end of file
import numpy as np
import openslide
import gc
from .tiles import has_enough_tissue
from skimage import color
import skimage.morphology as morph
from skimage.measure import label, regionprops
from skimage.filters import threshold_otsu