Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA Radiomics
RADLER
Commits
aea99cc7
Commit
aea99cc7
authored
Mar 18, 2020
by
Alessia Marcolini
Browse files
Link files in bbox folders
parent
9d5a91fc
Changes
1
Hide whitespace changes
Inline
Side-by-side
01_preprocessing/merge_datasets.py
View file @
aea99cc7
...
...
@@ -4,7 +4,7 @@ import pandas as pd
import
numpy
as
np
from
functools
import
reduce
from
pathlib
import
Path
import
s
hutil
import
s
ubprocess
from
config
import
get_project_root
...
...
@@ -12,7 +12,14 @@ from config import get_project_root
PROJECT_ROOT
=
get_project_root
()
DATASETS
=
[
'HN_val'
,
'HN_BZ'
]
BBOX_SUBDATASETS_NAMES
=
[
'bbox_64'
,
'bbox_64'
]
BBOX_SUBDATASETS_PATHS
=
[
PROJECT_ROOT
/
'data'
/
dataset
/
'processed'
/
'bbox'
/
bbox_name
for
dataset
,
bbox_name
in
zip
(
DATASETS
,
BBOX_SUBDATASETS_NAMES
)
]
MERGED_BBOX_SUBDATASET_NAME
=
'bbox_64'
print
(
'Copying from: '
,
'
\t
'
.
join
([
str
(
path
)
for
path
in
BBOX_SUBDATASETS_PATHS
]))
# %%
# find union names in datasets
DATASETS_NAME_UNION
=
list
(
...
...
@@ -22,12 +29,20 @@ DATASETS_NAME_UNION = list(
# rearrange name order (HN should be first)
DATASETS_NAME_UNION
.
insert
(
0
,
DATASETS_NAME_UNION
.
pop
(
DATASETS_NAME_UNION
.
index
(
'HN'
)))
NEW_DATASET_NAME
=
'_'
.
join
(
DATASETS_NAME_UNION
)
os
.
makedirs
(
PROJECT_ROOT
/
'data'
/
NEW_DATASET_NAME
/
'processed'
/
'bbox'
,
exist_ok
=
False
NEW_DATASET_PATH
=
(
PROJECT_ROOT
/
'data'
/
NEW_DATASET_NAME
/
'processed'
/
'bbox'
/
MERGED_BBOX_SUBDATASET_NAME
)
os
.
makedirs
(
NEW_DATASET_PATH
,
exist_ok
=
False
)
print
(
'Copying into: '
,
str
(
NEW_DATASET_PATH
))
#%% [markdown]
# Merge and create a new
file
clinical
#
# Merge and create a new clinical
file
#%%
clinicals
=
[]
dataset_name
=
[]
...
...
@@ -40,7 +55,9 @@ for dataset in DATASETS:
clinicals
.
append
(
clinical
)
merged_clinical
=
pd
.
concat
([
i
for
i
in
clinicals
],
join
=
'inner'
)
merged_clinical
[
'dataset'
]
=
dataset_name
merged_clinical
.
to_csv
(
PROJECT_ROOT
/
'data'
...
...
@@ -49,4 +66,9 @@ merged_clinical.to_csv(
/
f
'clinical_
{
NEW_DATASET_NAME
}
.csv'
,
index
=
False
,
)
#%% [markdown]
# # Link files from respective folders
for
old_path
in
BBOX_SUBDATASETS_PATHS
:
subprocess
.
call
(
f
'ln -s
{
old_path
}
/*
{
NEW_DATASET_PATH
}
'
,
shell
=
True
)
# %%
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment