Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
INF
Commits
92addcf3
Commit
92addcf3
authored
Mar 30, 2020
by
Nicole Bussola
Browse files
minor fix
parent
1c24e8a0
Changes
1
Hide whitespace changes
Inline
Side-by-side
UMAP_INF_features.ipynb
View file @
92addcf3
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# UMAP projection
# UMAP projection
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
import
os
import
os
import
pandas
as
pd
import
pandas
as
pd
from
pathlib
import
Path
from
pathlib
import
Path
import
numpy
as
np
import
numpy
as
np
from
bokeh.plotting
import
figure
,
output_file
,
show
,
save
from
bokeh.plotting
import
figure
,
output_file
,
show
,
save
from
bokeh.io
import
output_notebook
,
export_png
from
bokeh.io
import
output_notebook
,
export_png
from
bokeh.palettes
import
colorblind
from
bokeh.palettes
import
colorblind
from
bokeh.models
import
CategoricalColorMapper
,
ColumnDataSource
,
LassoSelectTool
,
WheelZoomTool
,
ZoomInTool
,
BoxZoomTool
,
ResetTool
from
bokeh.models
import
CategoricalColorMapper
,
ColumnDataSource
,
LassoSelectTool
,
WheelZoomTool
,
ZoomInTool
,
BoxZoomTool
,
ResetTool
from
bokeh.layouts
import
gridplot
from
bokeh.layouts
import
gridplot
from
bokeh.resources
import
CDN
from
bokeh.resources
import
CDN
from
bokeh.embed
import
file_html
from
bokeh.embed
import
file_html
import
numpy
as
np
import
numpy
as
np
import
umap
import
umap
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
output_notebook
()
output_notebook
()
```
```
%%%% Output: display_data
%%%% Output: display_data
%%%% Output: display_data
%%%% Output: display_data
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Load Features datasets
## Load Features datasets
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
TASK
=
'subtypes'
TASK
=
'subtypes'
DATASET
=
'tcga_breast'
DATASET
=
'tcga_breast'
MODEL
=
'randomForest'
MODEL
=
'randomForest'
layers
=
'gene_cnv_prot'
layers
=
'gene_cnv_prot'
PATH
=
Path
(
'data'
)
/
DATASET
/
TASK
PATH
=
Path
(
'data'
)
/
DATASET
/
TASK
PATH_RESULTS
=
Path
(
'results'
)
/
DATASET
/
TASK
/
MODEL
PATH_RESULTS
=
Path
(
'results'
)
/
DATASET
/
TASK
/
MODEL
SPLIT
=
2
# choose a random split for the train, test, and test2 files
SPLIT
=
2
# choose a random split for the train, test, and test2 files
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
file_tr
=
f
'
{
PATH
}
/
{
SPLIT
}
/
{
layers
}
_tr.txt'
# Fit UMAP
file_tr
=
f
'
{
PATH
}
/
{
SPLIT
}
/
{
layers
}
_tr.txt'
# Fit UMAP
file_test
=
f
'
{
PATH
}
/
{
SPLIT
}
/
{
layers
}
_ts.txt'
# test UMAP on TS
file_test
=
f
'
{
PATH
}
/
{
SPLIT
}
/
{
layers
}
_ts.txt'
# test UMAP on TS
file_test2
=
f
'
{
PATH
}
/
{
SPLIT
}
/
{
layers
}
_ts2.txt'
# test UMAP on TS2
file_test2
=
f
'
{
PATH
}
/
{
SPLIT
}
/
{
layers
}
_ts2.txt'
# test UMAP on TS2
features_train
=
pd
.
read_csv
(
file_tr
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_train
=
pd
.
read_csv
(
file_tr
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test
=
pd
.
read_csv
(
file_test
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test
=
pd
.
read_csv
(
file_test
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test2
=
pd
.
read_csv
(
file_test2
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test2
=
pd
.
read_csv
(
file_test2
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
BEST
=
False
# restrict the features to the INF signature
BEST
=
False
# restrict the features to the INF signature
INF_feats
=
pd
.
read_csv
(
f
'
{
PATH_RESULTS
}
/
{
SPLIT
}
/rSNFi/
{
layers
}
_ts_RandomForest_KBest_featurelist.txt'
,
sep
=
'
\t
'
)[
'FEATURE_NAME'
].
values
.
tolist
()
INF_feats
=
pd
.
read_csv
(
f
'
{
PATH_RESULTS
}
/
{
SPLIT
}
/rSNFi/
{
layers
}
_ts_RandomForest_KBest_featurelist.txt'
,
sep
=
'
\t
'
)[
'FEATURE_NAME'
].
values
.
tolist
()
best_train
=
features_train
[
INF_feats
]
best_train
=
features_train
[
INF_feats
]
best_test
=
features_test
[
INF_feats
]
best_test
=
features_test
[
INF_feats
]
best_test2
=
features_test2
[
INF_feats
]
best_test2
=
features_test2
[
INF_feats
]
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
if
BEST
:
if
BEST
:
features_train
=
best_train
features_train
=
best_train
features_test
=
best_test
features_test
=
best_test
features_test2
=
best_test2
features_test2
=
best_test2
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
samples_tr
=
features_train
.
index
samples_tr
=
features_train
.
index
labels_tr
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
SPLIT
}
/labels_
{
TASK
}
_tr.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
labels_tr
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
SPLIT
}
/labels_
{
TASK
}
_tr.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
features_train
[
'labels'
]
=
labels_tr
features_train
[
'labels'
]
=
labels_tr
labels_tr
=
features_train
[
'labels'
]
labels_tr
=
features_train
[
'labels'
]
features_tr
=
features_train
[
features_train
.
columns
[:
-
1
]].
values
features_tr
=
features_train
[
features_train
.
columns
[:
-
1
]].
values
samples_test
=
features_test
.
index
samples_test
=
features_test
.
index
labels_test
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
SPLIT
}
/labels_
{
TASK
}
_ts.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
labels_test
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
SPLIT
}
/labels_
{
TASK
}
_ts.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
features_test
[
'labels'
]
=
labels_test
features_test
[
'labels'
]
=
labels_test
labels_test
=
features_test
[
'labels'
]
labels_test
=
features_test
[
'labels'
]
features_ts
=
features_test
[
features_test
.
columns
[:
-
1
]].
values
features_ts
=
features_test
[
features_test
.
columns
[:
-
1
]].
values
samples_test2
=
features_test2
.
index
samples_test2
=
features_test2
.
index
labels_test2
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
SPLIT
}
/labels_
{
TASK
}
_ts2.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
labels_test2
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
SPLIT
}
/labels_
{
TASK
}
_ts2.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
features_test2
[
'labels'
]
=
labels_test2
features_test2
[
'labels'
]
=
labels_test2
labels_test2
=
features_test2
[
'labels'
]
labels_test2
=
features_test2
[
'labels'
]
features_ts2
=
features_test2
[
features_test2
.
columns
[:
-
1
]].
values
features_ts2
=
features_test2
[
features_test2
.
columns
[:
-
1
]].
values
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
train_data
=
features_tr
train_data
=
features_tr
test_data
=
features_ts
test_data
=
features_ts
test2_data
=
features_ts2
test2_data
=
features_ts2
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
# Check
# Check
print
(
len
(
features_tr
),
len
(
samples_tr
),
len
(
labels_tr
))
print
(
len
(
features_tr
),
len
(
samples_tr
),
len
(
labels_tr
))
print
(
len
(
features_ts
),
len
(
samples_test
),
len
(
labels_test
))
print
(
len
(
features_ts
),
len
(
samples_test
),
len
(
labels_test
))
print
(
len
(
features_ts2
),
len
(
samples_test2
),
len
(
labels_test2
))
print
(
len
(
features_ts2
),
len
(
samples_test2
),
len
(
labels_test2
))
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Fit on the training data and transform the test set into the learned space
## Fit on the training data and transform the test set into the learned space
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
mapper
=
umap
.
UMAP
(
n_neighbors
=
40
,
min_dist
=
0.01
,
n_components
=
2
,
mapper
=
umap
.
UMAP
(
n_neighbors
=
40
,
min_dist
=
0.01
,
n_components
=
2
,
metric
=
'euclidean'
).
fit
(
train_data
)
metric
=
'euclidean'
).
fit
(
train_data
)
test_embedding
=
mapper
.
transform
(
test_data
)
test_embedding
=
mapper
.
transform
(
test_data
)
test2_embedding
=
mapper
.
transform
(
test2_data
)
test2_embedding
=
mapper
.
transform
(
test2_data
)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
# Check
# Check
len
(
mapper
.
embedding_
),
len
(
test_embedding
),
len
(
test2_embedding
)
len
(
mapper
.
embedding_
),
len
(
test_embedding
),
len
(
test2_embedding
)
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Plot UMAP 2D projection
## Plot UMAP 2D projection
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
TOOLTIPS
=
[
TOOLTIPS
=
[
(
"index"
,
"$index"
),
(
"index"
,
"$index"
),
(
"(x,y)"
,
"($x, $y)"
),
(
"(x,y)"
,
"($x, $y)"
),
(
"desc"
,
"@desc"
),
(
"desc"
,
"@desc"
),
]
]
mycols
=
colorblind
[
'Colorblind'
][
4
]
mycols
=
colorblind
[
'Colorblind'
][
4
]
myclasses
=
pd
.
unique
(
labels_tr
).
tolist
()
myclasses
=
pd
.
unique
(
labels_tr
).
tolist
()
p
=
figure
(
plot_width
=
1200
,
plot_height
=
1200
,
tooltips
=
TOOLTIPS
,
tools
=
'save'
,
toolbar_location
=
"left"
)
p
=
figure
(
plot_width
=
1200
,
plot_height
=
1200
,
tooltips
=
TOOLTIPS
,
tools
=
'save'
,
toolbar_location
=
"left"
)
p
.
title
.
align
=
"center"
p
.
title
.
align
=
"center"
p
.
title
.
text_color
=
"black"
p
.
title
.
text_color
=
"black"
p
.
title
.
text_font_size
=
"25px"
p
.
title
.
text_font_size
=
"25px"
size
=
12
size
=
12
for
col
,
theclass
in
zip
(
mycols
,
myclasses
):
for
col
,
theclass
in
zip
(
mycols
,
myclasses
):
idx_tr
=
np
.
where
(
np
.
array
(
labels_tr
)
==
theclass
)[
0
].
tolist
()
idx_tr
=
np
.
where
(
np
.
array
(
labels_tr
)
==
theclass
)[
0
].
tolist
()
samples_train
=
np
.
expand_dims
(
samples_tr
[
idx_tr
,],
axis
=
1
)
samples_train
=
np
.
expand_dims
(
samples_tr
[
idx_tr
,],
axis
=
1
)
data_tr
=
np
.
hstack
((
mapper
.
embedding_
[
idx_tr
,],
samples_train
))
data_tr
=
np
.
hstack
((
mapper
.
embedding_
[
idx_tr
,],
samples_train
))
df_tr
=
pd
.
DataFrame
(
data_tr
,
columns
=
[
'x'
,
'y'
,
'sample'
])
df_tr
=
pd
.
DataFrame
(
data_tr
,
columns
=
[
'x'
,
'y'
,
'sample'
])
source_tr
=
ColumnDataSource
(
data
=
dict
(
source_tr
=
ColumnDataSource
(
data
=
dict
(
x
=
df_tr
[
'x'
],
x
=
df_tr
[
'x'
],
y
=
df_tr
[
'y'
],
y
=
df_tr
[
'y'
],
desc
=
df_tr
[
'sample'
]))
desc
=
df_tr
[
'sample'
]))
p
.
circle
(
x
=
'x'
,
y
=
'y'
,
size
=
size
,
source
=
source_tr
,
color
=
col
,
alpha
=
0.8
,
legend
=
str
(
theclass
))
p
.
circle
(
x
=
'x'
,
y
=
'y'
,
size
=
size
,
source
=
source_tr
,
color
=
col
,
alpha
=
0.8
,
legend
=
str
(
theclass
))
idx_ts
=
np
.
where
(
np
.
array
(
labels_test
)
==
theclass
)[
0
].
tolist
()
idx_ts
=
np
.
where
(
np
.
array
(
labels_test
)
==
theclass
)[
0
].
tolist
()
samples_ts
=
np
.
expand_dims
(
samples_test
[
idx_ts
,],
axis
=
1
)
samples_ts
=
np
.
expand_dims
(
samples_test
[
idx_ts
,],
axis
=
1
)
data_ts
=
np
.
hstack
((
test_embedding
[
idx_ts
,],
samples_ts
))
data_ts
=
np
.
hstack
((
test_embedding
[
idx_ts
,],
samples_ts
))
df_ts
=
pd
.
DataFrame
(
data_ts
,
columns
=
[
'x'
,
'y'
,
'sample'
])
df_ts
=
pd
.
DataFrame
(
data_ts
,
columns
=
[
'x'
,
'y'
,
'sample'
])
source_ts
=
ColumnDataSource
(
data
=
dict
(
source_ts
=
ColumnDataSource
(
data
=
dict
(
x
=
df_ts
[
'x'
],
x
=
df_ts
[
'x'
],
y
=
df_ts
[
'y'
],
y
=
df_ts
[
'y'
],
desc
=
df_ts
[
'sample'
]))
desc
=
df_ts
[
'sample'
]))
p
.
triangle
(
x
=
'x'
,
y
=
'y'
,
size
=
size
,
source
=
source_ts
,
color
=
col
,
alpha
=
0.8
)
p
.
triangle
(
x
=
'x'
,
y
=
'y'
,
size
=
size
,
source
=
source_ts
,
color
=
col
,
alpha
=
0.8
)
idx_ts2
=
np
.
where
(
np
.
array
(
labels_test2
)
==
theclass
)[
0
].
tolist
()
idx_ts2
=
np
.
where
(
np
.
array
(
labels_test2
)
==
theclass
)[
0
].
tolist
()
samples_ts2
=
np
.
expand_dims
(
samples_test2
[
idx_ts2
,],
axis
=
1
)
samples_ts2
=
np
.
expand_dims
(
samples_test2
[
idx_ts2
,],
axis
=
1
)
data_ts2
=
np
.
hstack
((
test2_embedding
[
idx_ts2
,],
samples_ts2
))
data_ts2
=
np
.
hstack
((
test2_embedding
[
idx_ts2
,],
samples_ts2
))
df_ts2
=
pd
.
DataFrame
(
data_ts2
,
columns
=
[
'x'
,
'y'
,
'sample'
])
df_ts2
=
pd
.
DataFrame
(
data_ts2
,
columns
=
[
'x'
,
'y'
,
'sample'
])
source_ts2
=
ColumnDataSource
(
data
=
dict
(
source_ts2
=
ColumnDataSource
(
data
=
dict
(
x
=
df_ts2
[
'x'
],
x
=
df_ts2
[
'x'
],
y
=
df_ts2
[
'y'
],
y
=
df_ts2
[
'y'
],
desc
=
df_ts2
[
'sample'
]))
desc
=
df_ts2
[
'sample'
]))
p
.
diamond
(
x
=
'x'
,
y
=
'y'
,
size
=
size
,
source
=
source_ts2
,
color
=
col
,
alpha
=
0.8
)
p
.
diamond
(
x
=
'x'
,
y
=
'y'
,
size
=
size
,
source
=
source_ts2
,
color
=
col
,
alpha
=
0.8
)
p
.
add_tools
(
LassoSelectTool
())
p
.
add_tools
(
LassoSelectTool
())
p
.
add_tools
(
WheelZoomTool
())
p
.
add_tools
(
WheelZoomTool
())
p
.
legend
.
label_text_font_size
=
"20pt"
p
.
legend
.
label_text_font_size
=
"20pt"
p
.
yaxis
.
major_label_text_font_size
=
"15pt"
p
.
yaxis
.
major_label_text_font_size
=
"15pt"
p
.
xaxis
.
major_label_text_font_size
=
"15pt"
p
.
xaxis
.
major_label_text_font_size
=
"15pt"
p
.
add_tools
(
ZoomInTool
())
p
.
add_tools
(
ZoomInTool
())
p
.
add_tools
(
ResetTool
())
p
.
add_tools
(
ResetTool
())
p
.
add_tools
(
BoxZoomTool
())
p
.
add_tools
(
BoxZoomTool
())
p
.
legend
.
location
=
"top_left"
p
.
legend
.
location
=
"top_left"
p
.
legend
.
click_policy
=
'hide'
p
.
legend
.
click_policy
=
'hide'
# p.title()
# p.title()
if
BEST
:
if
BEST
:
export_png
(
p
,
filename
=
f
"subtypes_INF_split
{
SPLIT
}
.png"
)
#save the plot
export_png
(
p
,
filename
=
f
"subtypes_INF_split
{
SPLIT
}
.png"
)
#save the plot
else
:
else
:
export_png
(
p
,
filename
=
f
"subtypes_juXT_split
{
SPLIT
}
.png"
)
export_png
(
p
,
filename
=
f
"subtypes_juXT_split
{
SPLIT
}
.png"
)
show
(
p
)
show
(
p
)
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Grid plot for all other splits
## Grid plot for all other splits
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
def
range_with_ignore
(
start
,
stop
,
ignore
):
def
range_with_ignore
(
start
,
stop
,
ignore
):
return
np
.
concatenate
([
return
np
.
concatenate
([
np
.
arange
(
start
,
ignore
),
np
.
arange
(
start
,
ignore
),
np
.
arange
(
ignore
+
1
,
stop
)
np
.
arange
(
ignore
+
1
,
stop
)
])
])
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
BEST
=
False
BEST
=
False
mycols
=
colorblind
[
'Colorblind'
][
4
]
mycols
=
colorblind
[
'Colorblind'
][
4
]
plots
=
[]
plots
=
[]
size
=
12
size
=
12
for
split
in
range_with_ignore
(
0
,
9
,
SPLIT
).
tolist
():
for
split
in
range_with_ignore
(
0
,
10
,
SPLIT
).
tolist
():
file_tr
=
f
'
{
PATH
}
/
{
split
}
/
{
layers
}
_tr.txt'
# Fit UMAP
file_tr
=
f
'
{
PATH
}
/
{
split
}
/
{
layers
}
_tr.txt'
# Fit UMAP
file_test
=
f
'
{
PATH
}
/
{
split
}
/
{
layers
}
_ts.txt'
# test UMAP
file_test
=
f
'
{
PATH
}
/
{
split
}
/
{
layers
}
_ts.txt'
# test UMAP
file_test2
=
f
'
{
PATH
}
/
{
split
}
/
{
layers
}
_ts2.txt'
# test UMAP
file_test2
=
f
'
{
PATH
}
/
{
split
}
/
{
layers
}
_ts2.txt'
# test UMAP
features_train
=
pd
.
read_csv
(
file_tr
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_train
=
pd
.
read_csv
(
file_tr
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test
=
pd
.
read_csv
(
file_test
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test
=
pd
.
read_csv
(
file_test
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test2
=
pd
.
read_csv
(
file_test2
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
features_test2
=
pd
.
read_csv
(
file_test2
,
sep
=
'
\t
'
,
header
=
0
,
index_col
=
0
)
INF_feats
=
pd
.
read_csv
(
f
'
{
PATH_RESULTS
}
/
{
split
}
/rSNFi/
{
layers
}
_ts_RandomForest_KBest_featurelist.txt'
,
sep
=
'
\t
'
)[
'FEATURE_NAME'
].
values
.
tolist
()
INF_feats
=
pd
.
read_csv
(
f
'
{
PATH_RESULTS
}
/
{
split
}
/rSNFi/
{
layers
}
_ts_RandomForest_KBest_featurelist.txt'
,
sep
=
'
\t
'
)[
'FEATURE_NAME'
].
values
.
tolist
()
best_train
=
features_train
[
INF_feats
]
best_train
=
features_train
[
INF_feats
]
best_test
=
features_test
[
INF_feats
]
best_test
=
features_test
[
INF_feats
]
best_test2
=
features_test2
[
INF_feats
]
best_test2
=
features_test2
[
INF_feats
]
if
BEST
:
if
BEST
:
features_train
=
best_train
features_train
=
best_train
features_test
=
best_test
features_test
=
best_test
features_test2
=
best_test2
features_test2
=
best_test2
samples_tr
=
features_train
.
index
samples_tr
=
features_train
.
index
labels_tr
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
split
}
/labels_
{
TASK
}
_tr.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
labels_tr
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
split
}
/labels_
{
TASK
}
_tr.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
features_train
[
'labels'
]
=
labels_tr
features_train
[
'labels'
]
=
labels_tr
labels_tr
=
features_train
[
'labels'
]
labels_tr
=
features_train
[
'labels'
]
features_tr
=
features_train
[
features_train
.
columns
[:
-
1
]].
values
features_tr
=
features_train
[
features_train
.
columns
[:
-
1
]].
values
samples_test
=
features_test
.
index
samples_test
=
features_test
.
index
labels_test
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
split
}
/labels_
{
TASK
}
_ts.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
labels_test
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
split
}
/labels_
{
TASK
}
_ts.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
features_test
[
'labels'
]
=
labels_test
features_test
[
'labels'
]
=
labels_test
labels_test
=
features_test
[
'labels'
]
labels_test
=
features_test
[
'labels'
]
features_ts
=
features_test
[
features_test
.
columns
[:
-
1
]].
values
features_ts
=
features_test
[
features_test
.
columns
[:
-
1
]].
values
samples_test2
=
features_test2
.
index
samples_test2
=
features_test2
.
index
labels_test2
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
split
}
/labels_
{
TASK
}
_ts2.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
labels_test2
=
pd
.
read_csv
(
f
'
{
PATH
}
/
{
split
}
/labels_
{
TASK
}
_ts2.txt'
,
sep
=
'
\t
'
,
header
=
None
)[
0
].
tolist
()
features_test2
[
'labels'
]
=
labels_test2
features_test2
[
'labels'
]
=
labels_test2
labels_test2
=
features_test2
[
'labels'
]
labels_test2
=
features_test2
[
'labels'
]
features_ts2
=
features_test2
[
features_test2
.
columns
[:
-
1
]].
values
features_ts2
=
features_test2
[
features_test2
.
columns
[:
-
1
]].
values
train_data
=
features_tr
train_data
=
features_tr
test_data
=
features_ts
test_data
=
features_ts
test2_data
=
features_ts2
test2_data
=
features_ts2
mapper
=
umap
.
UMAP
(
n_neighbors
=
40
,
min_dist
=
0.01
,
n_components
=
2
,
metric
=
'euclidean'
).
fit
(
train_data
)
mapper
=
umap
.
UMAP
(
n_neighbors
=
40
,
min_dist
=
0.01
,
n_components
=
2
,
metric
=
'euclidean'
).
fit
(
train_data
)
test_embedding
=
mapper
.
transform
(
test_data
)
test_embedding
=
mapper
.
transform
(
test_data
)
test2_embedding
=
mapper
.
transform
(
test2_data
)
test2_embedding
=
mapper
.
transform
(
test2_data
)
myclasses
=
pd
.
unique
(
labels_tr
).
tolist
()
myclasses
=
pd
.
unique
(
labels_tr
).
tolist
()
p
=
figure
(
title
=
f
'split
{
split
}
'
)
p
=
figure
(
title
=
f
'split
{
split
}
'
)
p
.
title
.
text_font_size
=
'25pt'
p
.
title
.
text_font_size
=
'25pt'
p
.
title
.
align
=
"center"
p
.
title
.
align
=
"center"
p
.
title
.
text_color
=
"black"
p
.
title
.
text_color
=
"black"
p
.
title
.
text_font_size
=
"25px"
p
.
title
.
text_font_size
=
"25px"
for
col
,
theclass
in
zip
(
mycols
,
myclasses
):
for
col
,
theclass
in
zip
(
mycols
,
myclasses
):
idx_tr
=
np
.
where
(
np
.
array
(
labels_tr
)
==
theclass
)[
0
].
tolist
()
idx_tr
=
np
.
where
(
np
.
array
(
labels_tr
)
==
theclass
)[
0
].
tolist
()
samples_train
=
np
.
expand_dims
(
samples_tr
[
idx_tr
,],
axis
=
1
)
samples_train
=
np
.
expand_dims
(
samples_tr
[
idx_tr
,],
axis
=
1
)
data_tr
=
np
.
hstack
((
mapper
.
embedding_
[
idx_tr
,],
samples_train
))
<