{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# UMAP projection" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "from pathlib import Path\n", "import numpy as np\n", "\n", "from bokeh.plotting import figure, output_file, show, save\n", "from bokeh.io import output_notebook, export_png\n", "from bokeh.palettes import colorblind\n", "from bokeh.models import CategoricalColorMapper, ColumnDataSource, LassoSelectTool, WheelZoomTool, ZoomInTool, BoxZoomTool, ResetTool\n", "from bokeh.layouts import gridplot\n", "from bokeh.resources import CDN\n", "from bokeh.embed import file_html\n", "\n", "import numpy as np\n", "import umap" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", " root._bokeh_onload_callbacks = [];\n", " root._bokeh_is_loading = undefined;\n", " }\n", "\n", " var JS_MIME_TYPE = 'application/javascript';\n", " var HTML_MIME_TYPE = 'text/html';\n", " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", " var CLASS_NAME = 'output_bokeh rendered_html';\n", "\n", " /**\n", " * Render data to the DOM node\n", " */\n", " function render(props, node) {\n", " var script = document.createElement(\"script\");\n", " node.appendChild(script);\n", " }\n", "\n", " /**\n", " * Handle when an output is cleared or removed\n", " */\n", " function handleClearOutput(event, handle) {\n", " var cell = handle.cell;\n", "\n", " var id = cell.output_area._bokeh_element_id;\n", " var server_id = cell.output_area._bokeh_server_id;\n", " // Clean up Bokeh references\n", " if (id != null && id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", "\n", " if (server_id !== undefined) {\n", " // Clean up Bokeh references\n", " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", " cell.notebook.kernel.execute(cmd, {\n", " iopub: {\n", " output: function(msg) {\n", " var id = msg.content.text.trim();\n", " if (id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", " }\n", " }\n", " });\n", " // Destroy server and session\n", " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", " cell.notebook.kernel.execute(cmd);\n", " }\n", " }\n", "\n", " /**\n", " * Handle when a new output is added\n", " */\n", " function handleAddOutput(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", "\n", " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", "\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", "\n", " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", " // store reference to embed id on output_area\n", " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " }\n", " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", " }\n", "\n", " function register_renderer(events, OutputArea) {\n", "\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[toinsert.length - 1]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " /* Handle when an output is cleared or removed */\n", " events.on('clear_output.CodeCell', handleClearOutput);\n", " events.on('delete.Cell', handleClearOutput);\n", "\n", " /* Handle when a new output is added */\n", " events.on('output_added.OutputArea', handleAddOutput);\n", "\n", " /**\n", " * Register the mime type and append_mime function with output_area\n", " */\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " /* Is output safe? */\n", " safe: true,\n", " /* Index of renderer in `output_area.display_order` */\n", " index: 0\n", " });\n", " }\n", "\n", " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", " if (root.Jupyter !== undefined) {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", "\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " }\n", "\n", " \n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " var el = document.getElementById(\"1001\");\n", " if (el != null) {\n", " el.textContent = \"BokehJS is loading...\";\n", " }\n", " if (root.Bokeh !== undefined) {\n", " if (el != null) {\n", " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", " }\n", " } else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) {\n", " if (callback != null)\n", " callback();\n", " });\n", " } finally {\n", " delete root._bokeh_onload_callbacks\n", " }\n", " console.debug(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(css_urls, js_urls, callback) {\n", " if (css_urls == null) css_urls = [];\n", " if (js_urls == null) js_urls = [];\n", "\n", " root._bokeh_onload_callbacks.push(callback);\n", " if (root._bokeh_is_loading > 0) {\n", " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", "\n", " function on_load() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", " run_callbacks()\n", " }\n", " }\n", "\n", " function on_error() {\n", " console.error(\"failed to load \" + url);\n", " }\n", "\n", " for (var i = 0; i < css_urls.length; i++) {\n", " var url = css_urls[i];\n", " const element = document.createElement(\"link\");\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.rel = \"stylesheet\";\n", " element.type = \"text/css\";\n", " element.href = url;\n", " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", " document.body.appendChild(element);\n", " }\n", "\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " };var element = document.getElementById(\"1001\");\n", " if (element == null) {\n", " console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " function inject_raw_css(css) {\n", " const element = document.createElement(\"style\");\n", " element.appendChild(document.createTextNode(css));\n", " document.body.appendChild(element);\n", " }\n", "\n", " \n", " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", " var css_urls = [];\n", " \n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " function(Bokeh) {\n", " \n", " \n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if (root.Bokeh !== undefined || force === true) {\n", " \n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i].call(root, root.Bokeh);\n", " }\n", " if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(css_urls, js_urls, function() {\n", " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(window));" ], "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Features datasets" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "TASK = 'subtypes'\n", "DATASET = 'tcga_breast'\n", "MODEL = 'randomForest'\n", "\n", "layers = 'gene_cnv_prot'\n", "\n", "PATH = Path('data') / DATASET / TASK\n", "PATH_RESULTS = Path('results') / DATASET / TASK / MODEL\n", "\n", "SPLIT = 2# choose a random split for the train, test, and test2 files\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "file_tr = f'{PATH}/{SPLIT}/{layers}_tr.txt' # Fit UMAP \n", "file_test = f'{PATH}/{SPLIT}/{layers}_ts.txt' # test UMAP on TS\n", "file_test2 = f'{PATH}/{SPLIT}/{layers}_ts2.txt' # test UMAP on TS2\n", "\n", "features_train = pd.read_csv(file_tr, sep='\\t', header=0, index_col=0)\n", "features_test = pd.read_csv(file_test, sep='\\t', header=0, index_col=0)\n", "features_test2 = pd.read_csv(file_test2, sep='\\t', header=0, index_col=0)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "BEST = False # restrict the features to the INF signature\n", "\n", "INF_feats = pd.read_csv(f'{PATH_RESULTS}/{SPLIT}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\\t')['FEATURE_NAME'].values.tolist()\n", "\n", "best_train = features_train[INF_feats]\n", "best_test = features_test[INF_feats]\n", "best_test2 = features_test2[INF_feats]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if BEST:\n", " features_train = best_train\n", " features_test = best_test\n", " features_test2 = best_test2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "samples_tr = features_train.index\n", "labels_tr = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_tr.txt', sep='\\t', header=None)[0].tolist()\n", "\n", "features_train['labels'] = labels_tr\n", "labels_tr = features_train['labels']\n", "features_tr = features_train[features_train.columns[:-1]].values\n", "\n", "\n", "samples_test = features_test.index\n", "labels_test = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_ts.txt', sep='\\t', header=None)[0].tolist()\n", "\n", "features_test['labels'] = labels_test\n", "labels_test = features_test['labels']\n", "features_ts = features_test[features_test.columns[:-1]].values\n", "\n", "\n", "samples_test2 = features_test2.index\n", "labels_test2 = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_ts2.txt', sep='\\t', header=None)[0].tolist()\n", "\n", "\n", "features_test2['labels'] = labels_test2\n", "labels_test2 = features_test2['labels']\n", "features_ts2 = features_test2[features_test2.columns[:-1]].values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_data = features_tr\n", "test_data = features_ts\n", "test2_data = features_ts2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Check\n", "print(len(features_tr),len(samples_tr),len(labels_tr))\n", "print(len(features_ts),len(samples_test),len(labels_test))\n", "print(len(features_ts2),len(samples_test2),len(labels_test2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fit on the training data and transform the test set into the learned space" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mapper = umap.UMAP(n_neighbors=40, min_dist=0.01, n_components=2,\n", " metric='euclidean').fit(train_data)\n", "test_embedding = mapper.transform(test_data)\n", "test2_embedding = mapper.transform(test2_data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Check\n", "len(mapper.embedding_), len(test_embedding),len(test2_embedding)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot UMAP 2D projection" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TOOLTIPS = [\n", " (\"index\", \"$index\"),\n", " (\"(x,y)\", \"($x, $y)\"),\n", " (\"desc\", \"@desc\"),\n", "]\n", "\n", "mycols = colorblind['Colorblind'][4]\n", "myclasses = pd.unique(labels_tr).tolist()\n", "\n", "p = figure(plot_width=1200, plot_height=1200, tooltips = TOOLTIPS, tools='save', toolbar_location=\"left\")\n", "\n", "\n", "p.title.align = \"center\"\n", "p.title.text_color = \"black\"\n", "p.title.text_font_size = \"25px\"\n", "\n", "size=12\n", "\n", "for col, theclass in zip(mycols,myclasses):\n", "\n", " idx_tr = np.where(np.array(labels_tr)==theclass)[0].tolist()\n", " samples_train = np.expand_dims(samples_tr[idx_tr,], axis=1)\n", " data_tr = np.hstack((mapper.embedding_[idx_tr,], samples_train)) \n", " df_tr = pd.DataFrame(data_tr,columns=['x','y','sample'])\n", "\n", " source_tr = ColumnDataSource(data=dict(\n", " x=df_tr['x'],\n", " y=df_tr['y'],\n", " desc=df_tr['sample']))\n", " p.circle(x='x',y='y',size=size,source=source_tr,color=col,alpha=0.8,legend=str(theclass))\n", " \n", " idx_ts = np.where(np.array(labels_test)==theclass)[0].tolist()\n", " samples_ts = np.expand_dims(samples_test[idx_ts,], axis=1)\n", " data_ts = np.hstack((test_embedding[idx_ts,], samples_ts)) \n", " df_ts = pd.DataFrame(data_ts,columns=['x','y','sample'])\n", "\n", " source_ts = ColumnDataSource(data=dict(\n", " x=df_ts['x'],\n", " y=df_ts['y'],\n", " desc=df_ts['sample']))\n", " p.triangle(x='x',y='y',size=size,source=source_ts,color=col,alpha=0.8)\n", "\n", " idx_ts2 = np.where(np.array(labels_test2)==theclass)[0].tolist()\n", " samples_ts2 = np.expand_dims(samples_test2[idx_ts2,], axis=1)\n", " data_ts2 = np.hstack((test2_embedding[idx_ts2,], samples_ts2)) \n", " df_ts2 = pd.DataFrame(data_ts2,columns=['x','y','sample'])\n", "\n", " source_ts2 = ColumnDataSource(data=dict(\n", " x=df_ts2['x'],\n", " y=df_ts2['y'],\n", " desc=df_ts2['sample']))\n", " p.diamond(x='x',y='y',size=size,source=source_ts2,color=col,alpha=0.8)\n", "\n", " \n", "p.add_tools(LassoSelectTool())\n", "p.add_tools(WheelZoomTool())\n", "p.legend.label_text_font_size = \"20pt\"\n", "p.yaxis.major_label_text_font_size = \"15pt\"\n", "p.xaxis.major_label_text_font_size = \"15pt\"\n", "\n", "\n", "p.add_tools(ZoomInTool())\n", "p.add_tools(ResetTool())\n", "p.add_tools(BoxZoomTool())\n", "p.legend.location = \"top_left\"\n", "p.legend.click_policy='hide'\n", "# p.title()\n", "\n", "if BEST:\n", " export_png(p, filename=f\"subtypes_INF_split{SPLIT}.png\") #save the plot\n", "else:\n", " export_png(p, filename=f\"subtypes_juXT_split{SPLIT}.png\")\n", "\n", "show(p)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Grid plot for all other splits" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def range_with_ignore(start, stop, ignore):\n", " return np.concatenate([\n", " np.arange(start, ignore),\n", " np.arange(ignore + 1, stop)\n", " ])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "BEST = False\n", "\n", "mycols = colorblind['Colorblind'][4]\n", "plots = []\n", "size = 12\n", "\n", "for split in range_with_ignore(0,10,SPLIT).tolist():\n", " \n", " file_tr = f'{PATH}/{split}/{layers}_tr.txt' # Fit UMAP \n", " file_test = f'{PATH}/{split}/{layers}_ts.txt' # test UMAP \n", " file_test2 = f'{PATH}/{split}/{layers}_ts2.txt' # test UMAP \n", "\n", " features_train = pd.read_csv(file_tr, sep='\\t', header=0, index_col=0)\n", " features_test = pd.read_csv(file_test, sep='\\t', header=0, index_col=0)\n", " features_test2 = pd.read_csv(file_test2, sep='\\t', header=0, index_col=0)\n", "\n", "\n", " INF_feats = pd.read_csv(f'{PATH_RESULTS}/{split}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\\t')['FEATURE_NAME'].values.tolist()\n", "\n", " best_train = features_train[INF_feats]\n", " best_test = features_test[INF_feats]\n", " best_test2 = features_test2[INF_feats]\n", "\n", "\n", " if BEST:\n", " features_train = best_train\n", " features_test = best_test\n", " features_test2 = best_test2\n", "\n", "\n", "\n", " samples_tr = features_train.index\n", " labels_tr = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_tr.txt', sep='\\t', header=None)[0].tolist()\n", "\n", " features_train['labels'] = labels_tr\n", " labels_tr = features_train['labels']\n", " features_tr = features_train[features_train.columns[:-1]].values\n", "\n", "\n", " samples_test = features_test.index\n", " labels_test = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_ts.txt', sep='\\t', header=None)[0].tolist()\n", "\n", " features_test['labels'] = labels_test\n", " labels_test = features_test['labels']\n", " features_ts = features_test[features_test.columns[:-1]].values\n", "\n", "\n", " samples_test2 = features_test2.index\n", " labels_test2 = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_ts2.txt', sep='\\t', header=None)[0].tolist()\n", "\n", "\n", " features_test2['labels'] = labels_test2\n", " labels_test2 = features_test2['labels']\n", " features_ts2 = features_test2[features_test2.columns[:-1]].values\n", "\n", "\n", " train_data = features_tr\n", " test_data = features_ts\n", " test2_data = features_ts2\n", "\n", "\n", " mapper = umap.UMAP(n_neighbors=40, min_dist=0.01, n_components=2, metric='euclidean').fit(train_data)\n", " test_embedding = mapper.transform(test_data)\n", " test2_embedding = mapper.transform(test2_data)\n", "\n", "\n", "\n", " myclasses = pd.unique(labels_tr).tolist()\n", "\n", " p = figure(title=f'split {split}')\n", " p.title.text_font_size = '25pt'\n", "\n", "\n", " p.title.align = \"center\"\n", " p.title.text_color = \"black\"\n", " p.title.text_font_size = \"25px\"\n", "\n", "\n", " for col, theclass in zip(mycols,myclasses):\n", "\n", " idx_tr = np.where(np.array(labels_tr)==theclass)[0].tolist()\n", " samples_train = np.expand_dims(samples_tr[idx_tr,], axis=1)\n", " data_tr = np.hstack((mapper.embedding_[idx_tr,], samples_train)) \n", " df_tr = pd.DataFrame(data_tr,columns=['x','y','sample'])\n", "\n", " source_tr = ColumnDataSource(data=dict(\n", " x=df_tr['x'],\n", " y=df_tr['y'],\n", " desc=df_tr['sample']))\n", " p.circle(x='x',y='y',size=size,source=source_tr,color=col,alpha=0.8,legend=str(theclass))\n", " \n", " idx_ts = np.where(np.array(labels_test)==theclass)[0].tolist()\n", " samples_ts = np.expand_dims(samples_test[idx_ts,], axis=1)\n", " data_ts = np.hstack((test_embedding[idx_ts,], samples_ts)) \n", " df_ts = pd.DataFrame(data_ts,columns=['x','y','sample'])\n", "\n", " source_ts = ColumnDataSource(data=dict(\n", " x=df_ts['x'],\n", " y=df_ts['y'],\n", " desc=df_ts['sample']))\n", " p.triangle(x='x',y='y',size=size,source=source_ts,color=col,alpha=0.8)\n", "\n", " idx_ts2 = np.where(np.array(labels_test2)==theclass)[0].tolist()\n", " samples_ts2 = np.expand_dims(samples_test2[idx_ts2,], axis=1)\n", " data_ts2 = np.hstack((test2_embedding[idx_ts2,], samples_ts2)) \n", " df_ts2 = pd.DataFrame(data_ts2,columns=['x','y','sample'])\n", "\n", " source_ts2 = ColumnDataSource(data=dict(\n", " x=df_ts2['x'],\n", " y=df_ts2['y'],\n", " desc=df_ts2['sample']))\n", " p.diamond(x='x',y='y',size=size,source=source_ts2,color=col,alpha=0.8)\n", " \n", "# p.legend.location = \"bottom_left\"\n", " p.legend.label_text_font_size = \"20pt\"\n", " p.yaxis.major_label_text_font_size = \"15pt\"\n", " p.xaxis.major_label_text_font_size = \"15pt\"\n", " plots.append(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grid = gridplot([[plots[0], plots[1], plots[2]], [plots[3], plots[4], plots[5]], [plots[6], plots[7], plots[8]]], plot_width=1200, plot_height=1200)\n", "\n", "if BEST:\n", " export_png(grid, filename=\"subtypes_INF_suppl.png\")\n", "else:\n", " export_png(grid, filename=\"subtypes_juXT_suppl.png\")\n", " \n", "show(grid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.6.9 64-bit ('inf_env': conda)", "language": "python", "name": "python36964bitinfenvconda17aeaf48b6154bb383fac738bc345b49" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 2 }