Commit 63cbaf9c authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Select only available patients on filesystem and preprocess HN_BZ dataset

parent 668dee72
......@@ -30,7 +30,7 @@ import shutil
from tqdm import tqdm
# %%
DATASET_NAME = 'HN_val'
DATASET_NAME = 'HN_BZ'
PROJECT_DATA_PATH = Path('data') / DATASET_NAME
RAW_DATA_PATH = PROJECT_DATA_PATH / 'raw'
PROCESSED_DATA_PATH = PROJECT_DATA_PATH / 'processed'
......@@ -40,12 +40,15 @@ os.makedirs(PROCESSED_DCM_PATH, exist_ok=False)
if DATASET_NAME == 'HN_val':
# TODO: read path from path_original_data.csv and not from summary.csv
DATASET_DESCRIPTION_FILE = 'path_original_data.csv'
dataset_description = pd.read_csv(PROCESSED_DATA_PATH / DATASET_DESCRIPTION_FILE)
available_patients = patients = [
f for f in os.listdir(DCM_DIR) if os.path.isdir(DCM_DIR / f)
]
for i, row in tqdm(dataset_description.iterrows()):
patient = row['Subject ID']
old_files_dir = RAW_DATA_PATH / row['dicom_folder']
......@@ -65,5 +68,19 @@ if DATASET_NAME == 'HN_val':
shutil.copytree(old_files_dir, new_files_dir)
elif DATASET_NAME == 'HN_BZ':
patients = [
f for f in os.listdir(RAW_DATA_PATH) if os.path.isdir(RAW_DATA_PATH / f)
]
for patient in tqdm(patients):
old_files_dir = RAW_DATA_PATH / patient
new_files_dir = PROCESSED_DCM_PATH / patient
shutil.copytree(old_files_dir, new_files_dir)
os.rename(new_files_dir / 'RS', new_files_dir / 'RTSTRUCT')
os.rename(new_files_dir / 'PET', new_files_dir / 'PT')
# %%
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment