Commit 1c24e8a0 authored by Nicole Bussola's avatar Nicole Bussola
Browse files

add function in the grid plot

parent ff38b502
......@@ -47,17 +47,18 @@
layers = 'gene_cnv_prot'
PATH = Path('data') / DATASET / TASK
PATH_RESULTS = Path('results') / DATASET / TASK / MODEL
SPLIT = 2# choose a random split for the train, test, and test2 files
```
%% Cell type:code id: tags:
``` python
SPLIT = 2# choose a random split for the train, test, and test2 files
file_tr = f'{PATH}/{SPLIT}/{layers}_tr.txt' # Fit UMAP
file_test = f'{PATH}/{SPLIT}/{layers}_ts.txt' # test UMAP on TS
file_test2 = f'{PATH}/{SPLIT}/{layers}_ts2.txt' # test UMAP on TS2
features_train = pd.read_csv(file_tr, sep='\t', header=0, index_col=0)
......@@ -67,11 +68,11 @@
```
%% Cell type:code id: tags:
``` python
BEST = True # restrict the features to the INF signature
BEST = False # restrict the features to the INF signature
INF_feats = pd.read_csv(f'{PATH_RESULTS}/{SPLIT}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\t')['FEATURE_NAME'].values.tolist()
best_train = features_train[INF_feats]
best_test = features_test[INF_feats]
......@@ -221,11 +222,11 @@
p.add_tools(ZoomInTool())
p.add_tools(ResetTool())
p.add_tools(BoxZoomTool())
p.legend.location = "bottom_left"
p.legend.location = "top_left"
p.legend.click_policy='hide'
# p.title()
if BEST:
export_png(p, filename=f"subtypes_INF_split{SPLIT}.png") #save the plot
......@@ -240,28 +241,38 @@
## Grid plot for all other splits
%% Cell type:code id: tags:
``` python
BEST = True
def range_with_ignore(start, stop, ignore):
return np.concatenate([
np.arange(start, ignore),
np.arange(ignore + 1, stop)
])
```
%% Cell type:code id: tags:
``` python
BEST = False
mycols = colorblind['Colorblind'][4]
plots = []
size = 12
for SPLIT in np.arange(9).tolist():
for split in range_with_ignore(0,9,SPLIT).tolist():
file_tr = f'{PATH}/{SPLIT}/{layers}_tr.txt' # Fit UMAP
file_test = f'{PATH}/{SPLIT}/{layers}_ts.txt' # test UMAP
file_test2 = f'{PATH}/{SPLIT}/{layers}_ts2.txt' # test UMAP
file_tr = f'{PATH}/{split}/{layers}_tr.txt' # Fit UMAP
file_test = f'{PATH}/{split}/{layers}_ts.txt' # test UMAP
file_test2 = f'{PATH}/{split}/{layers}_ts2.txt' # test UMAP
features_train = pd.read_csv(file_tr, sep='\t', header=0, index_col=0)
features_test = pd.read_csv(file_test, sep='\t', header=0, index_col=0)
features_test2 = pd.read_csv(file_test2, sep='\t', header=0, index_col=0)
INF_feats = pd.read_csv(f'{PATH_RESULTS}/{SPLIT}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\t')['FEATURE_NAME'].values.tolist()
INF_feats = pd.read_csv(f'{PATH_RESULTS}/{split}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\t')['FEATURE_NAME'].values.tolist()
best_train = features_train[INF_feats]
best_test = features_test[INF_feats]
best_test2 = features_test2[INF_feats]
......@@ -272,27 +283,27 @@
features_test2 = best_test2
samples_tr = features_train.index
labels_tr = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_tr.txt', sep='\t', header=None)[0].tolist()
labels_tr = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_tr.txt', sep='\t', header=None)[0].tolist()
features_train['labels'] = labels_tr
labels_tr = features_train['labels']
features_tr = features_train[features_train.columns[:-1]].values
samples_test = features_test.index
labels_test = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_ts.txt', sep='\t', header=None)[0].tolist()
labels_test = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_ts.txt', sep='\t', header=None)[0].tolist()
features_test['labels'] = labels_test
labels_test = features_test['labels']
features_ts = features_test[features_test.columns[:-1]].values
samples_test2 = features_test2.index
labels_test2 = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_ts2.txt', sep='\t', header=None)[0].tolist()
labels_test2 = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_ts2.txt', sep='\t', header=None)[0].tolist()
features_test2['labels'] = labels_test2
labels_test2 = features_test2['labels']
features_ts2 = features_test2[features_test2.columns[:-1]].values
......@@ -309,11 +320,11 @@
myclasses = pd.unique(labels_tr).tolist()
p = figure(title=f'split {SPLIT}')
p = figure(title=f'split {split}')
p.title.text_font_size = '25pt'
p.title.align = "center"
p.title.text_color = "black"
......@@ -329,85 +340,41 @@
source_tr = ColumnDataSource(data=dict(
x=df_tr['x'],
y=df_tr['y'],
desc=df_tr['sample']))
p.circle(x='x',y='y',size=12,source=source_tr,color=col,alpha=0.8,legend=str(theclass))
p.circle(x='x',y='y',size=size,source=source_tr,color=col,alpha=0.8,legend=str(theclass))
idx_ts = np.where(np.array(labels_test)==theclass)[0].tolist()
samples_ts = np.expand_dims(samples_test[idx_ts,], axis=1)
data_ts = np.hstack((test_embedding[idx_ts,], samples_ts))
df_ts = pd.DataFrame(data_ts,columns=['x','y','sample'])
source_ts = ColumnDataSource(data=dict(
x=df_ts['x'],
y=df_ts['y'],
desc=df_ts['sample']))
p.triangle(x='x',y='y',size=12,source=source_ts,color=col,alpha=0.8)
p.triangle(x='x',y='y',size=size,source=source_ts,color=col,alpha=0.8)
idx_ts2 = np.where(np.array(labels_test2)==theclass)[0].tolist()
samples_ts2 = np.expand_dims(samples_test2[idx_ts2,], axis=1)
data_ts2 = np.hstack((test2_embedding[idx_ts2,], samples_ts2))
df_ts2 = pd.DataFrame(data_ts2,columns=['x','y','sample'])
source_ts2 = ColumnDataSource(data=dict(
x=df_ts2['x'],
y=df_ts2['y'],
desc=df_ts2['sample']))
p.diamond(x='x',y='y',size=12,source=source_ts2,color=col,alpha=0.8)
p.diamond(x='x',y='y',size=size,source=source_ts2,color=col,alpha=0.8)
# p.legend.location = "bottom_left"
p.legend.label_text_font_size = "20pt"
p.yaxis.major_label_text_font_size = "15pt"
p.xaxis.major_label_text_font_size = "15pt"
plots.append(p)
```
%%%% Output: error
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-13-710083258612> in <module>
31
32 samples_tr = features_train.index
---> 33 labels_tr = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_tr.txt', sep='\t', header=None)[0].tolist()
34
35 features_train['labels'] = labels_tr
~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
--> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.__name__ = name
~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
455
456 # Create the parser.
--> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
459 if chunksize or iterator:
~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
893 self.options["has_index_names"] = kwds["has_index_names"]
894
--> 895 self._make_engine(self.engine)
896
897 def close(self):
~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
1133 def _make_engine(self, engine="c"):
1134 if engine == "c":
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
1137 if engine == "python":
~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1915 kwds["usecols"] = self.usecols
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
1919
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File b'data/tcga_breast/subtypes/0/labels_subtypes_tr.txt' does not exist: b'data/tcga_breast/subtypes/0/labels_subtypes_tr.txt'
%% Cell type:code id: tags:
``` python
grid = gridplot([[plots[0], plots[1], plots[2]], [plots[3], plots[4], plots[5]], [plots[6], plots[7], plots[8]]], plot_width=1200, plot_height=1200)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment