Commit 1c24e8a0 authored by Nicole Bussola's avatar Nicole Bussola
Browse files

add function in the grid plot

parent ff38b502
......@@ -364,7 +364,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -375,7 +375,9 @@
"layers = 'gene_cnv_prot'\n",
"\n",
"PATH = Path('data') / DATASET / TASK\n",
"PATH_RESULTS = Path('results') / DATASET / TASK / MODEL"
"PATH_RESULTS = Path('results') / DATASET / TASK / MODEL\n",
"\n",
"SPLIT = 2# choose a random split for the train, test, and test2 files\n"
]
},
{
......@@ -384,8 +386,6 @@
"metadata": {},
"outputs": [],
"source": [
"SPLIT = 2# choose a random split for the train, test, and test2 files\n",
" \n",
"file_tr = f'{PATH}/{SPLIT}/{layers}_tr.txt' # Fit UMAP \n",
"file_test = f'{PATH}/{SPLIT}/{layers}_ts.txt' # test UMAP on TS\n",
"file_test2 = f'{PATH}/{SPLIT}/{layers}_ts2.txt' # test UMAP on TS2\n",
......@@ -401,7 +401,7 @@
"metadata": {},
"outputs": [],
"source": [
"BEST = True # restrict the features to the INF signature\n",
"BEST = False # restrict the features to the INF signature\n",
"\n",
"INF_feats = pd.read_csv(f'{PATH_RESULTS}/{SPLIT}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\\t')['FEATURE_NAME'].values.tolist()\n",
"\n",
......@@ -582,7 +582,7 @@
"p.add_tools(ZoomInTool())\n",
"p.add_tools(ResetTool())\n",
"p.add_tools(BoxZoomTool())\n",
"p.legend.location = \"bottom_left\"\n",
"p.legend.location = \"top_left\"\n",
"p.legend.click_policy='hide'\n",
"# p.title()\n",
"\n",
......@@ -603,47 +603,52 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def range_with_ignore(start, stop, ignore):\n",
" return np.concatenate([\n",
" np.arange(start, ignore),\n",
" np.arange(ignore + 1, stop)\n",
" ])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] File b'data/tcga_breast/subtypes/0/labels_subtypes_tr.txt' does not exist: b'data/tcga_breast/subtypes/0/labels_subtypes_tr.txt'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-710083258612>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0msamples_tr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeatures_train\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m \u001b[0mlabels_tr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{PATH}/{SPLIT}/labels_{TASK}_tr.txt'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'\\t'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mfeatures_train\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'labels'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlabels_tr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 683\u001b[0m )\n\u001b[1;32m 684\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 685\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 686\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp_or_buf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 459\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"has_index_names\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 895\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 896\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1133\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"c\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1134\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"c\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1135\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1136\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1137\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"python\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/envs/inf_env/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1915\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"usecols\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1916\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1917\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1918\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munnamed_cols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1919\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] File b'data/tcga_breast/subtypes/0/labels_subtypes_tr.txt' does not exist: b'data/tcga_breast/subtypes/0/labels_subtypes_tr.txt'"
"name": "stderr",
"output_type": "stream",
"text": [
"BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead\n",
"BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead\n",
"BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead\n",
"BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead\n"
]
}
],
"source": [
"BEST = True\n",
"BEST = False\n",
"\n",
"mycols = colorblind['Colorblind'][4]\n",
"plots = []\n",
"size = 12\n",
"\n",
"\n",
"for SPLIT in np.arange(9).tolist():\n",
"for split in range_with_ignore(0,9,SPLIT).tolist():\n",
" \n",
" file_tr = f'{PATH}/{SPLIT}/{layers}_tr.txt' # Fit UMAP \n",
" file_test = f'{PATH}/{SPLIT}/{layers}_ts.txt' # test UMAP \n",
" file_test2 = f'{PATH}/{SPLIT}/{layers}_ts2.txt' # test UMAP \n",
" file_tr = f'{PATH}/{split}/{layers}_tr.txt' # Fit UMAP \n",
" file_test = f'{PATH}/{split}/{layers}_ts.txt' # test UMAP \n",
" file_test2 = f'{PATH}/{split}/{layers}_ts2.txt' # test UMAP \n",
"\n",
" features_train = pd.read_csv(file_tr, sep='\\t', header=0, index_col=0)\n",
" features_test = pd.read_csv(file_test, sep='\\t', header=0, index_col=0)\n",
" features_test2 = pd.read_csv(file_test2, sep='\\t', header=0, index_col=0)\n",
"\n",
"\n",
" INF_feats = pd.read_csv(f'{PATH_RESULTS}/{SPLIT}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\\t')['FEATURE_NAME'].values.tolist()\n",
" INF_feats = pd.read_csv(f'{PATH_RESULTS}/{split}/rSNFi/{layers}_ts_RandomForest_KBest_featurelist.txt', sep='\\t')['FEATURE_NAME'].values.tolist()\n",
"\n",
" best_train = features_train[INF_feats]\n",
" best_test = features_test[INF_feats]\n",
......@@ -658,7 +663,7 @@
"\n",
"\n",
" samples_tr = features_train.index\n",
" labels_tr = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_tr.txt', sep='\\t', header=None)[0].tolist()\n",
" labels_tr = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_tr.txt', sep='\\t', header=None)[0].tolist()\n",
"\n",
" features_train['labels'] = labels_tr\n",
" labels_tr = features_train['labels']\n",
......@@ -666,7 +671,7 @@
"\n",
"\n",
" samples_test = features_test.index\n",
" labels_test = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_ts.txt', sep='\\t', header=None)[0].tolist()\n",
" labels_test = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_ts.txt', sep='\\t', header=None)[0].tolist()\n",
"\n",
" features_test['labels'] = labels_test\n",
" labels_test = features_test['labels']\n",
......@@ -674,7 +679,7 @@
"\n",
"\n",
" samples_test2 = features_test2.index\n",
" labels_test2 = pd.read_csv(f'{PATH}/{SPLIT}/labels_{TASK}_ts2.txt', sep='\\t', header=None)[0].tolist()\n",
" labels_test2 = pd.read_csv(f'{PATH}/{split}/labels_{TASK}_ts2.txt', sep='\\t', header=None)[0].tolist()\n",
"\n",
"\n",
" features_test2['labels'] = labels_test2\n",
......@@ -695,7 +700,7 @@
"\n",
" myclasses = pd.unique(labels_tr).tolist()\n",
"\n",
" p = figure(title=f'split {SPLIT}')\n",
" p = figure(title=f'split {split}')\n",
" p.title.text_font_size = '25pt'\n",
"\n",
"\n",
......@@ -715,7 +720,7 @@
" x=df_tr['x'],\n",
" y=df_tr['y'],\n",
" desc=df_tr['sample']))\n",
" p.circle(x='x',y='y',size=12,source=source_tr,color=col,alpha=0.8,legend=str(theclass))\n",
" p.circle(x='x',y='y',size=size,source=source_tr,color=col,alpha=0.8,legend=str(theclass))\n",
" \n",
" idx_ts = np.where(np.array(labels_test)==theclass)[0].tolist()\n",
" samples_ts = np.expand_dims(samples_test[idx_ts,], axis=1)\n",
......@@ -726,7 +731,7 @@
" x=df_ts['x'],\n",
" y=df_ts['y'],\n",
" desc=df_ts['sample']))\n",
" p.triangle(x='x',y='y',size=12,source=source_ts,color=col,alpha=0.8)\n",
" p.triangle(x='x',y='y',size=size,source=source_ts,color=col,alpha=0.8)\n",
"\n",
" idx_ts2 = np.where(np.array(labels_test2)==theclass)[0].tolist()\n",
" samples_ts2 = np.expand_dims(samples_test2[idx_ts2,], axis=1)\n",
......@@ -737,7 +742,7 @@
" x=df_ts2['x'],\n",
" y=df_ts2['y'],\n",
" desc=df_ts2['sample']))\n",
" p.diamond(x='x',y='y',size=12,source=source_ts2,color=col,alpha=0.8)\n",
" p.diamond(x='x',y='y',size=size,source=source_ts2,color=col,alpha=0.8)\n",
" \n",
"# p.legend.location = \"bottom_left\"\n",
" p.legend.label_text_font_size = \"20pt\"\n",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment