Commit c88bb9d0 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Change dtype of labels to int

parent 41d68d4e
......@@ -15,7 +15,7 @@ from utils import remove_na, remove_constant_cols
# os.chdir('..')
# %%
DATASET_NAME = 'HN_BZ'
DATASET_NAME = 'HN_val'
PROJECT_DATA_PATH = Path('data') / DATASET_NAME
RAW_DATA_PATH = PROJECT_DATA_PATH / 'raw'
PROCESSED_DATA_PATH = PROJECT_DATA_PATH / 'processed'
......@@ -35,22 +35,22 @@ os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)
# Tx T-stages cluster with low T-stages in the UMAP projection
grading_dict_binary = {
'T1': '0',
'T2': '0',
'T3': '1',
'T4': '1',
'T4a': '1',
'T4b': '1',
'Tx': '0',
'T1': 0,
'T2': 0,
'T3': 1,
'T4': 1,
'T4a': 1,
'T4b': 1,
'Tx': 0,
}
grading_dict_grouped = {
'T1': '0',
'T2': '1',
'T3': '2',
'T4': '3',
'T4a': '3',
'T4b': '3',
'Tx': '0',
'T1': 0,
'T2': 1,
'T3': 2,
'T4': 3,
'T4a': 3,
'T4b': 3,
'Tx': 0,
}
if DATASET_NAME == 'HN_val':
......@@ -369,6 +369,13 @@ if DATASET_NAME == 'HN_val':
clinical
), f'Clinical file and metadata file differ in patients.'
for label in [
LABEL_COL_LOCOREGIONAL,
LABEL_COL_T_STAGE_BINARY,
LABEL_COL_T_STAGE_GROUPED,
]:
clinical[label] = clinical[label].astype(np.uint8)
clinical.to_csv(PROCESSED_DATA_PATH / CLINICAL_DATA_FILENAME_CLEAN, index=False)
elif DATASET_NAME == 'HN_BZ':
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment