Commit 433f429e authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Dataset augmentation script

parent 603710b7
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Augment dataset class-wise"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<text style=\"color: green;\">Success</text>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%HN_env"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"PATH = os.path.join(os.path.abspath(os.path.curdir), '..') \n",
"\n",
"import sys\n",
"import numpy as np\n",
"from dataset import NumpyCSVDataset, augment_3D_HN\n",
"import SimpleITK as sitk"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#%%\n",
"DATASETDIR = '/thunderdisk/HN/processed/bbox_fixed2_64/'\n",
"AUGMENT_DIR = '/thunderdisk/HN/processed/bbox_64_augmented_LR'\n",
"\n",
"os.makedirs(AUGMENT_DIR, exist_ok=False)\n",
"SIZE = 64\n",
"\n",
"dataset = NumpyCSVDataset(DATASETDIR , f'{PATH}/data/clinical_data.csv', 'Locoregional', SIZE, mode='test')\n",
"\n",
"labels = dataset.get_labels()\n",
"idx_positive = np.where(labels=='1')[0]\n",
"\n",
"augment_K = 5\n",
"ratio_NP = int((len(labels) - len(idx_positive))/len(idx_positive))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(294, 42)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(labels), len(idx_positive)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HN-CHUM-001.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-002.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-003.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-004.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-005.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-006.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-007.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-008.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-009.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-010.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-011.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-012.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-013.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-014.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-015.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-016.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-017.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-018.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-019.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-021.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-022.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-023.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-024.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-025.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-026.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-027.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-028.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-029.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-030.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-031.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-032.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-033.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-034.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-035.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-036.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-037.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-038.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-039.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-040.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-041.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-042.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-043.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-044.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-045.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-046.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-047.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-048.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-049.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-050.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-051.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-052.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-053.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-054.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-055.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-056.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-057.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-058.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-059.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-060.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-061.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-062.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-063.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-064.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUM-065.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-001.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-002.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-003.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-004.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-005.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-006.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-007.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-008.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-009.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-010.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-011.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-012.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-013.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-014.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-015.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-016.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-017.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-018.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-019.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-020.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-021.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-022.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-023.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-024.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-025.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-026.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-027.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-028.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-029.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-030.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-031.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-032.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-033.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-034.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-035.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-036.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-037.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-038.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-039.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-040.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-041.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-042.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-043.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-044.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-045.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-046.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-047.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-048.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-049.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-050.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-051.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-052.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-053.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-054.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-055.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-056.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-057.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-058.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-059.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-060.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-061.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-062.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-063.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-064.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-065.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-066.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-067.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-068.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-069.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-070.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-071.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-072.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-073.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-074.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-075.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-076.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-077.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-078.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-080.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-081.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-082.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-083.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-084.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-085.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-086.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-087.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-088.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-089.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-090.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-091.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-092.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-094.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-095.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-096.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-097.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-098.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-099.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-100.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-101.npy\n",
"<class 'numpy.str_'>\n",
"HN-CHUS-102.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-001.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-002.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-003.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-004.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-005.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-006.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-007.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-008.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-009.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-010.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-011.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-012.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-013.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-014.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-015.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-016.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-017.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-018.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-019.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-020.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-021.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-022.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-023.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-024.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-025.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-026.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-027.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-028.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-029.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-030.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-031.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-032.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-033.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-034.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-035.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-036.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-037.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-038.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-039.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-040.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-041.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-042.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-043.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-044.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-045.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-046.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-047.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-048.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-049.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-050.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-051.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-052.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-053.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-054.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-055.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-056.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-057.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-058.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-059.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-060.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-061.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-062.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-063.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-064.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-065.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-066.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-067.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-069.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-070.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-071.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-072.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-073.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-074.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-075.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-076.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-077.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-078.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-079.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-080.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-081.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-082.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-083.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-084.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-085.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-086.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-087.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-088.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-089.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-090.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-091.npy\n",
"<class 'numpy.str_'>\n",
"HN-HGJ-092.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-001.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-002.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-003.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-004.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-005.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-006.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-008.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-009.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-010.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-011.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-012.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-013.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-014.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-015.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-016.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-018.npy\n",
"<class 'numpy.str_'>\n",
"HN-HMR-019.npy\n",
"<class 'numpy.str_'>\n",