Commit aea99cc7 authored by Alessia Marcolini's avatar Alessia Marcolini
Browse files

Link files in bbox folders

parent 9d5a91fc
...@@ -4,7 +4,7 @@ import pandas as pd ...@@ -4,7 +4,7 @@ import pandas as pd
import numpy as np import numpy as np
from functools import reduce from functools import reduce
from pathlib import Path from pathlib import Path
import shutil import subprocess
from config import get_project_root from config import get_project_root
...@@ -12,7 +12,14 @@ from config import get_project_root ...@@ -12,7 +12,14 @@ from config import get_project_root
PROJECT_ROOT = get_project_root() PROJECT_ROOT = get_project_root()
DATASETS = ['HN_val', 'HN_BZ'] DATASETS = ['HN_val', 'HN_BZ']
BBOX_SUBDATASETS_NAMES = ['bbox_64', 'bbox_64']
BBOX_SUBDATASETS_PATHS = [
PROJECT_ROOT / 'data' / dataset / 'processed' / 'bbox' / bbox_name
for dataset, bbox_name in zip(DATASETS, BBOX_SUBDATASETS_NAMES)
]
MERGED_BBOX_SUBDATASET_NAME = 'bbox_64'
print('Copying from: ', '\t'.join([str(path) for path in BBOX_SUBDATASETS_PATHS]))
# %% # %%
# find union names in datasets # find union names in datasets
DATASETS_NAME_UNION = list( DATASETS_NAME_UNION = list(
...@@ -22,12 +29,20 @@ DATASETS_NAME_UNION = list( ...@@ -22,12 +29,20 @@ DATASETS_NAME_UNION = list(
# rearrange name order (HN should be first) # rearrange name order (HN should be first)
DATASETS_NAME_UNION.insert(0, DATASETS_NAME_UNION.pop(DATASETS_NAME_UNION.index('HN'))) DATASETS_NAME_UNION.insert(0, DATASETS_NAME_UNION.pop(DATASETS_NAME_UNION.index('HN')))
NEW_DATASET_NAME = '_'.join(DATASETS_NAME_UNION) NEW_DATASET_NAME = '_'.join(DATASETS_NAME_UNION)
os.makedirs(
PROJECT_ROOT / 'data' / NEW_DATASET_NAME / 'processed' / 'bbox', exist_ok=False NEW_DATASET_PATH = (
PROJECT_ROOT
/ 'data'
/ NEW_DATASET_NAME
/ 'processed'
/ 'bbox'
/ MERGED_BBOX_SUBDATASET_NAME
) )
os.makedirs(NEW_DATASET_PATH, exist_ok=False)
print('Copying into: ', str(NEW_DATASET_PATH))
#%% [markdown] #%% [markdown]
# Merge and create a new file clinical # # Merge and create a new clinical file
#%% #%%
clinicals = [] clinicals = []
dataset_name = [] dataset_name = []
...@@ -40,7 +55,9 @@ for dataset in DATASETS: ...@@ -40,7 +55,9 @@ for dataset in DATASETS:
clinicals.append(clinical) clinicals.append(clinical)
merged_clinical = pd.concat([i for i in clinicals], join='inner') merged_clinical = pd.concat([i for i in clinicals], join='inner')
merged_clinical['dataset'] = dataset_name merged_clinical['dataset'] = dataset_name
merged_clinical.to_csv( merged_clinical.to_csv(
PROJECT_ROOT PROJECT_ROOT
/ 'data' / 'data'
...@@ -49,4 +66,9 @@ merged_clinical.to_csv( ...@@ -49,4 +66,9 @@ merged_clinical.to_csv(
/ f'clinical_{NEW_DATASET_NAME}.csv', / f'clinical_{NEW_DATASET_NAME}.csv',
index=False, index=False,
) )
#%% [markdown]
# # Link files from respective folders
for old_path in BBOX_SUBDATASETS_PATHS:
subprocess.call(f'ln -s {old_path}/* {NEW_DATASET_PATH}', shell=True)
# %% # %%
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment