Commit 8be3a823 authored by Marco Cristoforetti's avatar Marco Cristoforetti
Browse files

new network

parent 7eb46cd9
......@@ -281,7 +281,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
......@@ -303,10 +303,11 @@
" self.ln1 = nn.LayerNorm(self.nhidden_o )\n",
" self.linear_o_2 = nn.Linear(self.nhidden_o, self.nhidden_o)\n",
" self.linear_o_3 = nn.Linear(self.nhidden_o, self.nhidden_o // 2)\n",
" self.linear_o_4 = nn.Linear(self.nhidden_o // 2, self.after)\n",
" self.linear_o_4_c = nn.Linear(self.nhidden_o // 2, self.after * 4)\n",
" \n",
" self.linear_o_4_c = nn.Linear(1, 2)\n",
" self.linear_o_5_c = nn.Linear(2, 4)\n",
" self.linear_o_4_r = nn.Linear(4, 2)\n",
"# self.linear_o_4b_r = nn.Linear(2, 2)\n",
" self.linear_o_5_r = nn.Linear(2, 1)\n",
" \n",
" \n",
" def init_hidden(self, batch_size):\n",
......@@ -329,18 +330,19 @@
" \n",
" x = F.relu(self.linear_o_3(x))\n",
" x = F.dropout(x, 0.2, training=self.training)\n",
" x1 = self.linear_o_4(x)\n",
" x1 = self.linear_o_4_c(x)\n",
" x2 = self.linear_o_4_r(x1.view(-1, 4))\n",
"# x2 = self.linear_o_4b_r(x2)\n",
" x2 = self.linear_o_5_r(x2)\n",
" x2 = x2.reshape(x0.size(0), self.after)\n",
" x1 = x1.reshape(x0.size(0) * self.after, 4)\n",
" \n",
" x2 = self.linear_o_4_c(x1.view(-1, 1))\n",
" x2 = self.linear_o_5_c(x2)\n",
" x2 = x2.reshape(x0.size(0) * self.after, 4)\n",
" \n",
" return x1, x2"
" return x2, x1"
]
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
......@@ -350,7 +352,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": null,
"metadata": {},
"outputs": [
{
......@@ -364,28 +366,10 @@
" (ln1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)\n",
" (linear_o_2): Linear(in_features=96, out_features=96, bias=True)\n",
" (linear_o_3): Linear(in_features=96, out_features=48, bias=True)\n",
" (linear_o_4): Linear(in_features=48, out_features=12, bias=True)\n",
" (linear_o_4_c): Linear(in_features=1, out_features=4, bias=True)\n",
")\n",
"Epoch 0 time = 7.45, tr_rmse = 2241.50965, val_rmse = 2920.91749, ts_rmse = 2924.31052, tr_c = 1.09764, val_c = 0.86375, ts_c = 0.85440\n",
"Epoch 10 time = 8.34, tr_rmse = 1935.26607, val_rmse = 2358.63838, ts_rmse = 2364.55768, tr_c = 0.80118, val_c = 0.57136, ts_c = 0.57078\n",
"Epoch 20 time = 8.61, tr_rmse = 1724.70287, val_rmse = 2025.84303, ts_rmse = 2036.27356, tr_c = 0.71661, val_c = 0.50405, ts_c = 0.49882\n",
"Epoch 30 time = 8.96, tr_rmse = 2066.65466, val_rmse = 2203.72957, ts_rmse = 2219.25776, tr_c = 0.62792, val_c = 0.45197, ts_c = 0.45185\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-44-ed204af52b92>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0mdst_net\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 48\u001b[0;31m \u001b[0mout_r\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout_c\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdst_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_in_scaled\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlast_train\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 49\u001b[0m \u001b[0mloss_tr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss_mse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmm_scaler_out\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minverse_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout_r\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_out\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlast_train\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0mloss_mae_tr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloss_f\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmm_scaler_out\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minverse_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mout_r\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_out\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mlast_train\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/dst/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 728\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 729\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-42-dccbb3f298dd>\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x0)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhidden\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minit_hidden\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx0\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlstm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhidden\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx0\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbn1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0;31m# x = F.relu(x)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/dst/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 725\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 727\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 728\u001b[0m for hook in itertools.chain(\n\u001b[1;32m 729\u001b[0m \u001b[0m_global_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/dst/lib/python3.7/site-packages/torch/nn/modules/rnn.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input, hx)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbatch_sizes\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 581\u001b[0m result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,\n\u001b[0;32m--> 582\u001b[0;31m self.dropout, self.training, self.bidirectional, self.batch_first)\n\u001b[0m\u001b[1;32m 583\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 584\u001b[0m result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
" (linear_o_4_c): Linear(in_features=48, out_features=48, bias=True)\n",
" (linear_o_4_r): Linear(in_features=4, out_features=2, bias=True)\n",
" (linear_o_5_r): Linear(in_features=2, out_features=1, bias=True)\n",
")\n"
]
}
],
......@@ -457,7 +441,7 @@
" history_ts[epoch] = [loss_ts, loss_mae_ts, loss_c_ts]\n",
" epoch_time = time.time() - start_time\n",
"\n",
" if (epoch % 10 == 0):\n",
" if (epoch % 1 == 0):\n",
" print('Epoch %d time = %.2f, tr_rmse = %0.5f, val_rmse = %0.5f, ts_rmse = %0.5f, tr_c = %.5f, val_c = %.5f, ts_c = %.5f' % \n",
" (epoch, epoch_time, loss_tr, loss_val, loss_ts, loss_c_tr, loss_c_val, loss_c_ts)) \n",
" \n"
......
%% Cell type:code id: tags:
``` python
%matplotlib inline
import matplotlib.pyplot as plt
import sys; sys.path.append('../DST')
import os
from DST.config import data_path
import pandas as pd
import numpy as np
import seaborn as sns; sns.set(style="whitegrid", font_scale=1.3)
import torch
import torch.nn as nn
import time
import math
import torch.utils.data as utils_data
import torch.nn.functional as F
import datetime
```
%% Cell type:code id: tags:
``` python
torch.manual_seed(21894)
np.random.seed(21894)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
```
%% Cell type:code id: tags:
``` python
BEFORE = 12
AFTER = 12
```
%% Cell type:code id: tags:
``` python
dst_data = pd.read_pickle(os.path.join(data_path,'dst.pkl'))
dst_data['ora_round'] = dst_data.ora.apply(lambda x:int(x.split(':')[0]))
dati_agg = dst_data.groupby(['data','ora_round']).agg({
'BX': np.mean,
'BY': np.mean,
'BZ': np.mean,
'FLOW_SPEED': np.mean,
'PROTON_DENSITY': np.mean,
'TEMPERATURE': np.mean,
'PRESSION': np.mean,
'ELETTRIC': np.mean,
'y': np.mean})
dati_agg.reset_index(inplace=True)
dati_agg.sort_values(by = ['data','ora_round'],inplace=True)
dataset = dati_agg.drop(columns = ['data','ora_round']).values
dataset = torch.from_numpy(np.hstack([np.arange(len(dataset)).reshape([-1,1]),dataset]))
last_date_train = dati_agg[dati_agg.data <= datetime.datetime(2008,12,31)].index[-1]
len_valid_test = (len(dataset) - last_date_train)/2
last_date_train/len(dataset), len_valid_test/len(dataset)
data_in = dataset.unfold(0, BEFORE, 1).transpose(2,1)
data_out = dataset[BEFORE:].unfold(0, AFTER, 1).transpose(2,1)
data_in = data_in[:data_out.size(0)]
data_out = data_out[:,:,-1]
data_in.size(), data_out.size()
```
%%%% Output: execute_result
(torch.Size([261794, 12, 10]), torch.Size([261794, 12]))
%% Cell type:code id: tags:
``` python
where_not_nan_in = ~torch.isnan(data_in).any(2, keepdim=True).any(1, keepdim=True).reshape(-1)
data_in = data_in[where_not_nan_in]
data_out = data_out[where_not_nan_in]
where_not_nan_out = ~torch.isnan(data_out).any(1, keepdim=True).reshape(-1)
data_in = data_in[where_not_nan_out]
data_out = data_out[where_not_nan_out]
last_train = np.where(data_in[:,0,0] <= last_date_train)[0][-1] + 1
data_in = data_in[:, :, 1:]
#len_tr = int(data_in.size(0) * 0.6)
n_channels = data_in.size(2)
```
%% Cell type:code id: tags:
``` python
class MinMaxScaler():
"""
Transform features by scaling each feature to a given range
Features in the last dim
The transformation is given by::
X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
X_scaled = X_std * (max - min) + min
where min, max = feature_range.
"""
def __init__(self, feature_range=(0,1)):
self.feature_range = feature_range
def fit(self, X):
X_size = X.size()
X = X.reshape(-1, X_size[-1])
data_min = X.min(axis=0).values
data_max = X.max(axis=0).values
data_range = data_max - data_min
self.scale_ = ((self.feature_range[1] - self.feature_range[0]) / data_range)
self.min_ = self.feature_range[0] - data_min * self.scale_
self.data_min_ = data_min
self.data_max_ = data_max
self.data_range_ = data_range
X = X.reshape(X_size)
return self
def transform(self, X):
X *= self.scale_
X += self.min_
return X
def inverse_transform(self, X):
X -= self.min_
X /= self.scale_
return X
```
%% Cell type:code id: tags:
``` python
mmScaler = MinMaxScaler((0.1, .9))
mmScaler.fit(data_in[:last_train])
data_in_scaled = data_in.clone()
data_in_scaled = mmScaler.transform(data_in_scaled)
mm_scaler_out = MinMaxScaler((0.1, .9))
mm_scaler_out.fit(data_in[:last_train, :, -1].reshape(-1, data_in.size(1), 1))
data_out_scaled = data_out.clone()
data_out_scaled = mm_scaler_out.transform(data_out_scaled)
```
%% Cell type:code id: tags:
``` python
dst_levels = [-20,-50,-100]
data_out_c = data_out.clone()
data_out_c[np.where(data_out_c >= dst_levels[0])] = 0
data_out_c[np.where((data_out_c < dst_levels[0]) & (data_out_c >= dst_levels[1]))] = 1
data_out_c[np.where((data_out_c < dst_levels[1]) & (data_out_c >= dst_levels[2]))] = 2
data_out_c[np.where((data_out_c < dst_levels[2]))] = 3
```
%% Cell type:code id: tags:
``` python
class Dataset(utils_data.Dataset):
def __init__(self, dataset_in, dataset_out, dataset_out_c, weights):
self.dataset_in = dataset_in
self.dataset_out = dataset_out
self.dataset_out_c = dataset_out_c
self.weights = weights
def __len__(self):
return self.dataset_in.size(0)
def __getitem__(self, idx):
din_src = self.dataset_in[idx]
dout = self.dataset_out[idx]
dout_c = self.dataset_out_c[idx]
ww = self.weights[idx]
return din_src, dout, dout_c, ww
```
%% Cell type:code id: tags:
``` python
ixs_valid_test = np.arange(int(len_valid_test)) + last_train
np.random.shuffle(ixs_valid_test)
ixs_valid = ixs_valid_test[::2]
ixs_test = ixs_valid_test[1::2]
```
%% Cell type:code id: tags:
``` python
dst_min = data_out[:last_train].min(axis=1).values.flatten()
bins = [dst_min.min() - 10] + list(np.arange(-300, dst_min.max() + 10, 10))
h, b = np.histogram(dst_min, bins=bins)
if len(np.argwhere(h == 0)) > 0:
bins = np.delete(bins, np.argwhere(h == 0)[0] + 1)
h, b = np.histogram(dst_min, bins=bins)
w = h.max()/h
def fix_weight(dst_v):
pos = np.argwhere(np.abs(b - dst_v) == np.abs((b - dst_v)).min())[0,0]
if dst_v - b[pos] < 0:
pos = pos-1
# return w[pos]/h.max()
return np.sqrt(w[pos]/h.max())
fix_weight_v = np.vectorize(fix_weight)
weights = fix_weight_v(dst_min)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, num_samples= len(dst_min))
BATCH_SIZE=256
dataset_tr = Dataset(data_in_scaled[:last_train], data_out_scaled[:last_train], data_out_c[:last_train], weights)
# data_loader_tr = utils_data.DataLoader(dataset_tr, batch_size=BATCH_SIZE, num_workers = 4, shuffle=False, sampler = sampler)
data_loader_tr = utils_data.DataLoader(dataset_tr, batch_size=BATCH_SIZE, num_workers = 4, shuffle=True)
```
%% Cell type:code id: tags:
``` python
aa = next(iter(data_loader_tr))#[0].size()
```
%% Cell type:code id: tags:
``` python
class DSTnet(nn.Module):
def __init__(self, nvars, nhidden_i, nhidden_o, n_out_i, before, after):
super().__init__()
self.nvars = nvars
self.nhidden_i = nhidden_i
self.nhidden_o = nhidden_o
self.before = before
self.after = after
self.n_out_i = n_out_i
self.lstm = nn.LSTM(self.nvars, self.n_out_i, self.nhidden_i, batch_first=True)
self.first_merged_layer = self.n_out_i * self.before
self.bn1 = nn.BatchNorm1d(num_features=self.first_merged_layer)
self.linear_o_1 = nn.Linear(self.first_merged_layer, self.nhidden_o)
self.ln1 = nn.LayerNorm(self.nhidden_o )
self.linear_o_2 = nn.Linear(self.nhidden_o, self.nhidden_o)
self.linear_o_3 = nn.Linear(self.nhidden_o, self.nhidden_o // 2)
self.linear_o_4 = nn.Linear(self.nhidden_o // 2, self.after)
self.linear_o_4_c = nn.Linear(self.nhidden_o // 2, self.after * 4)
self.linear_o_4_c = nn.Linear(1, 2)
self.linear_o_5_c = nn.Linear(2, 4)
self.linear_o_4_r = nn.Linear(4, 2)
# self.linear_o_4b_r = nn.Linear(2, 2)
self.linear_o_5_r = nn.Linear(2, 1)
def init_hidden(self, batch_size):
hidden = torch.randn(self.nhidden_i, batch_size, self.n_out_i).to(device)
cell = torch.randn(self.nhidden_i, batch_size, self.n_out_i).to(device)
return (hidden, cell)
def forward(self, x0):
self.hidden = self.init_hidden(x0.size(0))
x = self.lstm(x0, self.hidden)[0].reshape(x0.shape[0], -1)
x = self.bn1(x)
# x = F.relu(x)
x = F.relu(self.linear_o_1(x))
# x = self.ln1(x)
x = F.dropout(x, 0.2, training=self.training)
x = F.relu(self.linear_o_2(x))
x = F.dropout(x, 0.2, training=self.training)
x = F.relu(self.linear_o_3(x))
x = F.dropout(x, 0.2, training=self.training)
x1 = self.linear_o_4(x)
x1 = self.linear_o_4_c(x)
x2 = self.linear_o_4_r(x1.view(-1, 4))
# x2 = self.linear_o_4b_r(x2)
x2 = self.linear_o_5_r(x2)
x2 = x2.reshape(x0.size(0), self.after)
x1 = x1.reshape(x0.size(0) * self.after, 4)
x2 = self.linear_o_4_c(x1.view(-1, 1))
x2 = self.linear_o_5_c(x2)
x2 = x2.reshape(x0.size(0) * self.after, 4)
return x1, x2
return x2, x1
```
%% Cell type:code id: tags:
``` python
aa = data_out_c[:last_train]
weights_c = torch.tensor([len(aa[aa==0])/len(aa[aa==0]), len(aa[aa==0])/len(aa[aa==1]), len(aa[aa==0])/len(aa[aa==2]), len(aa[aa==0])/len(aa[aa==3])]).to(device).sqrt()
```
%% Cell type:code id: tags:
``` python
loss_f = nn.L1Loss()
loss_mse = nn.MSELoss(reduction='none')
#loss_fc= nn.CrossEntropyLoss()
loss_fc= nn.CrossEntropyLoss(weight = weights_c)
nhidden_i = 2
nhidden_o = 96
n_out_i = 8
before = BEFORE
nvars = data_in_scaled.shape[-1]
dst_net = DSTnet(nvars, nhidden_i, nhidden_o, n_out_i, before, AFTER).to(device)
print(dst_net)
num_epochs = 2000
lr = 1e-4
optimizer = torch.optim.Adam(dst_net.parameters(), lr=lr)#, weight_decay=1e-5)
history_tr = np.zeros((num_epochs, 3))
history_valid = np.zeros((num_epochs, 3))
history_ts = np.zeros((num_epochs, 3))
for epoch in range(num_epochs):
start_time = time.time()
for i, batch in enumerate(data_loader_tr):
x = batch[0].float().to(device)
y_r = batch[1].float().to(device)
y_c = batch[2].flatten().long().to(device)
w = batch[3].to(device)
optimizer.zero_grad()
dst_net.train()
out_r, out_c = dst_net(x)
loss_r = loss_f(out_r, y_r)
loss_c = loss_fc(out_c, y_c)
loss = (loss_r * w).mean() + loss_c
loss.backward()
optimizer.step()
dst_net.eval()
out_r, out_c = dst_net(data_in_scaled[:last_train].to(device).float())
loss_tr = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[:last_train].to(device).float()).mean().item())
loss_mae_tr = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[:last_train].to(device).float()).item()
loss_c_tr = loss_fc(out_c, data_out_c[:last_train].flatten().long().to(device)).item()
out_r, out_c = dst_net(data_in_scaled[ixs_valid].to(device).float())
loss_val = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_valid].to(device).float()).mean().item())
loss_mae_val = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_valid].to(device).float()).item()
loss_c_val = loss_fc(out_c, data_out_c[ixs_valid].flatten().long().to(device)).item()
out_r, out_c = dst_net(data_in_scaled[ixs_test].to(device).float())
loss_ts = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_test].to(device).float()).mean().item())
loss_mae_ts = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_test].to(device).float()).item()
loss_c_ts = loss_fc(out_c, data_out_c[ixs_test].flatten().long().to(device)).item()
history_tr[epoch] = [loss_tr, loss_mae_tr, loss_c_tr]
history_valid[epoch] = [loss_val, loss_mae_val, loss_c_val]
history_ts[epoch] = [loss_ts, loss_mae_ts, loss_c_ts]
epoch_time = time.time() - start_time
if (epoch % 10 == 0):
if (epoch % 1 == 0):
print('Epoch %d time = %.2f, tr_rmse = %0.5f, val_rmse = %0.5f, ts_rmse = %0.5f, tr_c = %.5f, val_c = %.5f, ts_c = %.5f' %
(epoch, epoch_time, loss_tr, loss_val, loss_ts, loss_c_tr, loss_c_val, loss_c_ts))
```
%%%% Output: stream
DSTnet(
(lstm): LSTM(9, 8, num_layers=2, batch_first=True)
(bn1): BatchNorm1d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(linear_o_1): Linear(in_features=96, out_features=96, bias=True)
(ln1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(linear_o_2): Linear(in_features=96, out_features=96, bias=True)
(linear_o_3): Linear(in_features=96, out_features=48, bias=True)
(linear_o_4): Linear(in_features=48, out_features=12, bias=True)
(linear_o_4_c): Linear(in_features=1, out_features=4, bias=True)
(linear_o_4_c): Linear(in_features=48, out_features=48, bias=True)
(linear_o_4_r): Linear(in_features=4, out_features=2, bias=True)
(linear_o_5_r): Linear(in_features=2, out_features=1, bias=True)
)
Epoch 0 time = 7.45, tr_rmse = 2241.50965, val_rmse = 2920.91749, ts_rmse = 2924.31052, tr_c = 1.09764, val_c = 0.86375, ts_c = 0.85440
Epoch 10 time = 8.34, tr_rmse = 1935.26607, val_rmse = 2358.63838, ts_rmse = 2364.55768, tr_c = 0.80118, val_c = 0.57136, ts_c = 0.57078
Epoch 20 time = 8.61, tr_rmse = 1724.70287, val_rmse = 2025.84303, ts_rmse = 2036.27356, tr_c = 0.71661, val_c = 0.50405, ts_c = 0.49882
Epoch 30 time = 8.96, tr_rmse = 2066.65466, val_rmse = 2203.72957, ts_rmse = 2219.25776, tr_c = 0.62792, val_c = 0.45197, ts_c = 0.45185
%%%% Output: error
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-44-ed204af52b92> in <module>
46 dst_net.eval()
47
---> 48 out_r, out_c = dst_net(data_in_scaled[:last_train].to(device).float())
49 loss_tr = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[:last_train].to(device).float()).mean().item())
50 loss_mae_tr = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[:last_train].to(device).float()).item()
~/miniconda3/envs/dst/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
<ipython-input-42-dccbb3f298dd> in forward(self, x0)
31
32 self.hidden = self.init_hidden(x0.size(0))
---> 33 x = self.lstm(x0, self.hidden)[0].reshape(x0.shape[0], -1)
34 x = self.bn1(x)
35 # x = F.relu(x)
~/miniconda3/envs/dst/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
~/miniconda3/envs/dst/lib/python3.7/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
580 if batch_sizes is None:
581 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
--> 582 self.dropout, self.training, self.bidirectional, self.batch_first)
583 else:
584 result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,
KeyboardInterrupt:
%% Cell type:code id: tags:
``` python
```
......
......@@ -281,7 +281,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
......@@ -303,10 +303,11 @@
" self.ln1 = nn.LayerNorm(self.nhidden_o )\n",
" self.linear_o_2 = nn.Linear(self.nhidden_o, self.nhidden_o)\n",
" self.linear_o_3 = nn.Linear(self.nhidden_o, self.nhidden_o // 2)\n",
" self.linear_o_4 = nn.Linear(self.nhidden_o // 2, self.after)\n",
" self.linear_o_4_c = nn.Linear(self.nhidden_o // 2, self.after * 4)\n",
" \n",
" self.linear_o_4_c = nn.Linear(1, 2)\n",
" self.linear_o_5_c = nn.Linear(2, 4)\n",
" self.linear_o_4_r = nn.Linear(4, 2)\n",
"# self.linear_o_4b_r = nn.Linear(2, 2)\n",
" self.linear_o_5_r = nn.Linear(2, 1)\n",
" \n",
" \n",
" def init_hidden(self, batch_size):\n",
......@@ -329,18 +330,19 @@
" \n",
" x = F.relu(self.linear_o_3(x))\n",
" x = F.dropout(x, 0.2, training=self.training)\n",
" x1 = self.linear_o_4(x)\n",
" x1 = self.linear_o_4_c(x)\n",
" x2 = self.linear_o_4_r(x1.view(-1, 4))\n",
"# x2 = self.linear_o_4b_r(x2)\n",
" x2 = self.linear_o_5_r(x2)\n",
" x2 = x2.reshape(x0.size(0), self.after)\n",
" x1 = x1.reshape(x0.size(0) * self.after, 4)\n",
" \n",
" x2 = self.linear_o_4_c(x1.view(-1, 1))\n",
" x2 = self.linear_o_5_c(x2)\n",
" x2 = x2.reshape(x0.size(0) * self.after, 4)\n",
" \n",
" return x1, x2"
" return x2, x1"
]
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
......@@ -364,25 +366,10 @@
" (ln1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)\n",
" (linear_o_2): Linear(in_features=96, out_features=96, bias=True)\n",
" (linear_o_3): Linear(in_features=96, out_features=48, bias=True)\n",
" (linear_o_4): Linear(in_features=48, out_features=12, bias=True)\n",
" (linear_o_4_c): Linear(in_features=1, out_features=2, bias=True)\n",
" (linear_o_5_c): Linear(in_features=2, out_features=4, bias=True)\n",
")\n",
"Epoch 0 time = 7.89, tr_rmse = 3312.61694, val_rmse = 4039.37631, ts_rmse = 4041.35114, tr_c = 0.99143, val_c = 0.71103, ts_c = 0.71483\n",
"Epoch 10 time = 8.79, tr_rmse = 3083.35775, val_rmse = 3648.69607, ts_rmse = 3659.21959, tr_c = 0.69483, val_c = 0.51741, ts_c = 0.52159\n",
"Epoch 20 time = 8.70, tr_rmse = 2548.58157, val_rmse = 2916.96280, ts_rmse = 2926.98241, tr_c = 0.60299, val_c = 0.44207, ts_c = 0.44323\n",
"Epoch 30 time = 8.56, tr_rmse = 2105.74037, val_rmse = 2274.36277, ts_rmse = 2281.30029, tr_c = 0.57979, val_c = 0.42495, ts_c = 0.42557\n",
"Epoch 40 time = 8.52, tr_rmse = 1945.62355, val_rmse = 1995.75568, ts_rmse = 2001.10207, tr_c = 0.57197, val_c = 0.41847, ts_c = 0.42077\n",
"Epoch 50 time = 8.52, tr_rmse = 1828.50157, val_rmse = 1846.91134, ts_rmse = 1851.98265, tr_c = 0.56634, val_c = 0.41308, ts_c = 0.41552\n",
"Epoch 60 time = 8.51, tr_rmse = 1747.86348, val_rmse = 1718.97600, ts_rmse = 1722.82145, tr_c = 0.56212, val_c = 0.41132, ts_c = 0.41435\n",
"Epoch 70 time = 8.55, tr_rmse = 1686.30469, val_rmse = 1636.66330, ts_rmse = 1640.06905, tr_c = 0.55596, val_c = 0.40910, ts_c = 0.41027\n",
"Epoch 80 time = 8.32, tr_rmse = 1586.46076, val_rmse = 1508.65022, ts_rmse = 1508.27202, tr_c = 0.55525, val_c = 0.40960, ts_c = 0.41081\n",
"Epoch 90 time = 8.31, tr_rmse = 1531.40842, val_rmse = 1458.94705, ts_rmse = 1458.39355, tr_c = 0.55096, val_c = 0.40696, ts_c = 0.40922\n",
"Epoch 100 time = 8.28, tr_rmse = 1532.15053, val_rmse = 1429.54626, ts_rmse = 1428.39814, tr_c = 0.54678, val_c = 0.40646, ts_c = 0.40710\n",
"Epoch 110 time = 8.42, tr_rmse = 1346.41886, val_rmse = 1223.24180, ts_rmse = 1219.79358, tr_c = 0.56146, val_c = 0.42584, ts_c = 0.42524\n",
"Epoch 120 time = 8.38, tr_rmse = 1410.67568, val_rmse = 1330.91825, ts_rmse = 1328.96637, tr_c = 0.54528, val_c = 0.40766, ts_c = 0.40964\n",
"Epoch 130 time = 8.29, tr_rmse = 1378.11991, val_rmse = 1293.02499, ts_rmse = 1291.56015, tr_c = 0.53945, val_c = 0.40513, ts_c = 0.40642\n",
"Epoch 140 time = 9.14, tr_rmse = 1287.39082, val_rmse = 1187.76034, ts_rmse = 1185.95911, tr_c = 0.54060, val_c = 0.40944, ts_c = 0.41106\n"
" (linear_o_4_c): Linear(in_features=48, out_features=48, bias=True)\n",
" (linear_o_4_r): Linear(in_features=4, out_features=2, bias=True)\n",
" (linear_o_5_r): Linear(in_features=2, out_features=1, bias=True)\n",
")\n"
]
}
],
......@@ -454,7 +441,7 @@
" history_ts[epoch] = [loss_ts, loss_mae_ts, loss_c_ts]\n",
" epoch_time = time.time() - start_time\n",
"\n",
" if (epoch % 10 == 0):\n",
" if (epoch % 1 == 0):\n",
" print('Epoch %d time = %.2f, tr_rmse = %0.5f, val_rmse = %0.5f, ts_rmse = %0.5f, tr_c = %.5f, val_c = %.5f, ts_c = %.5f' % \n",
" (epoch, epoch_time, loss_tr, loss_val, loss_ts, loss_c_tr, loss_c_val, loss_c_ts)) \n",
" \n"
......
%% Cell type:code id: tags:
``` python
%matplotlib inline
import matplotlib.pyplot as plt
import sys; sys.path.append('../DST')
import os
from DST.config import data_path
import pandas as pd
import numpy as np
import seaborn as sns; sns.set(style="whitegrid", font_scale=1.3)
import torch
import torch.nn as nn
import time
import math
import torch.utils.data as utils_data
import torch.nn.functional as F
import datetime
```
%% Cell type:code id: tags:
``` python
torch.manual_seed(21894)
np.random.seed(21894)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
```
%% Cell type:code id: tags:
``` python
BEFORE = 12
AFTER = 12
```
%% Cell type:code id: tags:
``` python
dst_data = pd.read_pickle(os.path.join(data_path,'dst.pkl'))
dst_data['ora_round'] = dst_data.ora.apply(lambda x:int(x.split(':')[0]))
dati_agg = dst_data.groupby(['data','ora_round']).agg({
'BX': np.mean,
'BY': np.mean,
'BZ': np.mean,
'FLOW_SPEED': np.mean,
'PROTON_DENSITY': np.mean,
'TEMPERATURE': np.mean,
'PRESSION': np.mean,
'ELETTRIC': np.mean,
'y': np.mean})
dati_agg.reset_index(inplace=True)
dati_agg.sort_values(by = ['data','ora_round'],inplace=True)
dataset = dati_agg.drop(columns = ['data','ora_round']).values
dataset = torch.from_numpy(np.hstack([np.arange(len(dataset)).reshape([-1,1]),dataset]))
last_date_train = dati_agg[dati_agg.data <= datetime.datetime(2008,12,31)].index[-1]
len_valid_test = (len(dataset) - last_date_train)/2
last_date_train/len(dataset), len_valid_test/len(dataset)
data_in = dataset.unfold(0, BEFORE, 1).transpose(2,1)
data_out = dataset[BEFORE:].unfold(0, AFTER, 1).transpose(2,1)
data_in = data_in[:data_out.size(0)]
data_out = data_out[:,:,-1]
data_in.size(), data_out.size()
```
%%%% Output: execute_result
(torch.Size([261794, 12, 10]), torch.Size([261794, 12]))
%% Cell type:code id: tags:
``` python
where_not_nan_in = ~torch.isnan(data_in).any(2, keepdim=True).any(1, keepdim=True).reshape(-1)
data_in = data_in[where_not_nan_in]
data_out = data_out[where_not_nan_in]
where_not_nan_out = ~torch.isnan(data_out).any(1, keepdim=True).reshape(-1)
data_in = data_in[where_not_nan_out]
data_out = data_out[where_not_nan_out]
last_train = np.where(data_in[:,0,0] <= last_date_train)[0][-1] + 1
data_in = data_in[:, :, 1:]
#len_tr = int(data_in.size(0) * 0.6)
n_channels = data_in.size(2)
```
%% Cell type:code id: tags:
``` python
class MinMaxScaler():
"""
Transform features by scaling each feature to a given range
Features in the last dim
The transformation is given by::
X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
X_scaled = X_std * (max - min) + min
where min, max = feature_range.
"""
def __init__(self, feature_range=(0,1)):
self.feature_range = feature_range
def fit(self, X):
X_size = X.size()
X = X.reshape(-1, X_size[-1])
data_min = X.min(axis=0).values
data_max = X.max(axis=0).values
data_range = data_max - data_min
self.scale_ = ((self.feature_range[1] - self.feature_range[0]) / data_range)
self.min_ = self.feature_range[0] - data_min * self.scale_
self.data_min_ = data_min
self.data_max_ = data_max
self.data_range_ = data_range
X = X.reshape(X_size)
return self
def transform(self, X):
X *= self.scale_
X += self.min_
return X
def inverse_transform(self, X):
X -= self.min_
X /= self.scale_
return X
```
%% Cell type:code id: tags:
``` python
mmScaler = MinMaxScaler((0.1, .9))
mmScaler.fit(data_in[:last_train])
data_in_scaled = data_in.clone()
data_in_scaled = mmScaler.transform(data_in_scaled)
mm_scaler_out = MinMaxScaler((0.1, .9))
mm_scaler_out.fit(data_in[:last_train, :, -1].reshape(-1, data_in.size(1), 1))
data_out_scaled = data_out.clone()
data_out_scaled = mm_scaler_out.transform(data_out_scaled)
```
%% Cell type:code id: tags:
``` python
dst_levels = [-20,-50,-100]
data_out_c = data_out.clone()
data_out_c[np.where(data_out_c >= dst_levels[0])] = 0
data_out_c[np.where((data_out_c < dst_levels[0]) & (data_out_c >= dst_levels[1]))] = 1
data_out_c[np.where((data_out_c < dst_levels[1]) & (data_out_c >= dst_levels[2]))] = 2
data_out_c[np.where((data_out_c < dst_levels[2]))] = 3
```
%% Cell type:code id: tags:
``` python
class Dataset(utils_data.Dataset):
def __init__(self, dataset_in, dataset_out, dataset_out_c, weights):
self.dataset_in = dataset_in
self.dataset_out = dataset_out
self.dataset_out_c = dataset_out_c
self.weights = weights
def __len__(self):
return self.dataset_in.size(0)
def __getitem__(self, idx):
din_src = self.dataset_in[idx]
dout = self.dataset_out[idx]
dout_c = self.dataset_out_c[idx]
ww = self.weights[idx]
return din_src, dout, dout_c, ww
```
%% Cell type:code id: tags:
``` python
ixs_valid_test = np.arange(int(len_valid_test)) + last_train
np.random.shuffle(ixs_valid_test)
ixs_valid = ixs_valid_test[::2]
ixs_test = ixs_valid_test[1::2]
```
%% Cell type:code id: tags:
``` python
dst_min = data_out[:last_train].min(axis=1).values.flatten()
bins = [dst_min.min() - 10] + list(np.arange(-300, dst_min.max() + 10, 10))
h, b = np.histogram(dst_min, bins=bins)
if len(np.argwhere(h == 0)) > 0:
bins = np.delete(bins, np.argwhere(h == 0)[0] + 1)
h, b = np.histogram(dst_min, bins=bins)
w = h.max()/h
def fix_weight(dst_v):
pos = np.argwhere(np.abs(b - dst_v) == np.abs((b - dst_v)).min())[0,0]
if dst_v - b[pos] < 0:
pos = pos-1
# return w[pos]/h.max()
return np.sqrt(w[pos]/h.max())
fix_weight_v = np.vectorize(fix_weight)
weights = fix_weight_v(dst_min)
sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, num_samples= len(dst_min))
BATCH_SIZE=256
dataset_tr = Dataset(data_in_scaled[:last_train], data_out_scaled[:last_train], data_out_c[:last_train], weights)
# data_loader_tr = utils_data.DataLoader(dataset_tr, batch_size=BATCH_SIZE, num_workers = 4, shuffle=False, sampler = sampler)
data_loader_tr = utils_data.DataLoader(dataset_tr, batch_size=BATCH_SIZE, num_workers = 4, shuffle=True)
```
%% Cell type:code id: tags:
``` python
aa = next(iter(data_loader_tr))#[0].size()
```
%% Cell type:code id: tags:
``` python
class DSTnet(nn.Module):
def __init__(self, nvars, nhidden_i, nhidden_o, n_out_i, before, after):
super().__init__()
self.nvars = nvars
self.nhidden_i = nhidden_i
self.nhidden_o = nhidden_o
self.before = before
self.after = after
self.n_out_i = n_out_i
self.lstm = nn.LSTM(self.nvars, self.n_out_i, self.nhidden_i, batch_first=True)
self.first_merged_layer = self.n_out_i * self.before
self.bn1 = nn.BatchNorm1d(num_features=self.first_merged_layer)
self.linear_o_1 = nn.Linear(self.first_merged_layer, self.nhidden_o)
self.ln1 = nn.LayerNorm(self.nhidden_o )
self.linear_o_2 = nn.Linear(self.nhidden_o, self.nhidden_o)
self.linear_o_3 = nn.Linear(self.nhidden_o, self.nhidden_o // 2)
self.linear_o_4 = nn.Linear(self.nhidden_o // 2, self.after)
self.linear_o_4_c = nn.Linear(self.nhidden_o // 2, self.after * 4)
self.linear_o_4_c = nn.Linear(1, 2)
self.linear_o_5_c = nn.Linear(2, 4)
self.linear_o_4_r = nn.Linear(4, 2)
# self.linear_o_4b_r = nn.Linear(2, 2)
self.linear_o_5_r = nn.Linear(2, 1)
def init_hidden(self, batch_size):
hidden = torch.randn(self.nhidden_i, batch_size, self.n_out_i).to(device)
cell = torch.randn(self.nhidden_i, batch_size, self.n_out_i).to(device)
return (hidden, cell)
def forward(self, x0):
self.hidden = self.init_hidden(x0.size(0))
x = self.lstm(x0, self.hidden)[0].reshape(x0.shape[0], -1)
x = self.bn1(x)
# x = F.relu(x)
x = F.relu(self.linear_o_1(x))
# x = self.ln1(x)
x = F.dropout(x, 0.2, training=self.training)
x = F.relu(self.linear_o_2(x))
x = F.dropout(x, 0.2, training=self.training)
x = F.relu(self.linear_o_3(x))
x = F.dropout(x, 0.2, training=self.training)
x1 = self.linear_o_4(x)
x1 = self.linear_o_4_c(x)
x2 = self.linear_o_4_r(x1.view(-1, 4))
# x2 = self.linear_o_4b_r(x2)
x2 = self.linear_o_5_r(x2)
x2 = x2.reshape(x0.size(0), self.after)
x1 = x1.reshape(x0.size(0) * self.after, 4)
x2 = self.linear_o_4_c(x1.view(-1, 1))
x2 = self.linear_o_5_c(x2)
x2 = x2.reshape(x0.size(0) * self.after, 4)
return x1, x2
return x2, x1
```
%% Cell type:code id: tags:
``` python
aa = data_out_c[:last_train]
weights_c = torch.tensor([len(aa[aa==0])/len(aa[aa==0]), len(aa[aa==0])/len(aa[aa==1]), len(aa[aa==0])/len(aa[aa==2]), len(aa[aa==0])/len(aa[aa==3])]).to(device).sqrt()
```
%% Cell type:code id: tags:
``` python
loss_f = nn.L1Loss()
loss_mse = nn.MSELoss(reduction='none')
#loss_fc= nn.CrossEntropyLoss()
loss_fc= nn.CrossEntropyLoss(weight = weights_c)
nhidden_i = 2
nhidden_o = 96
n_out_i = 8
before = BEFORE
nvars = data_in_scaled.shape[-1]
dst_net = DSTnet(nvars, nhidden_i, nhidden_o, n_out_i, before, AFTER).to(device)
print(dst_net)
num_epochs = 2000
lr = 1e-4
optimizer = torch.optim.Adam(dst_net.parameters(), lr=lr)#, weight_decay=1e-5)
history_tr = np.zeros((num_epochs, 3))
history_valid = np.zeros((num_epochs, 3))
history_ts = np.zeros((num_epochs, 3))
for epoch in range(num_epochs):
start_time = time.time()
for i, batch in enumerate(data_loader_tr):
x = batch[0].float().to(device)
y_r = batch[1].float().to(device)
y_c = batch[2].flatten().long().to(device)
w = batch[3].to(device)
optimizer.zero_grad()
dst_net.train()
out_r, out_c = dst_net(x)
loss_r = loss_f(out_r, y_r)
loss_c = loss_fc(out_c, y_c)
loss = (loss_r * w).mean() + loss_c
loss.backward()
optimizer.step()
dst_net.eval()
out_r, out_c = dst_net(data_in_scaled[:last_train].to(device).float())
loss_tr = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[:last_train].to(device).float()).mean().item())
loss_mae_tr = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[:last_train].to(device).float()).item()
loss_c_tr = loss_fc(out_c, data_out_c[:last_train].flatten().long().to(device)).item()
out_r, out_c = dst_net(data_in_scaled[ixs_valid].to(device).float())
loss_val = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_valid].to(device).float()).mean().item())
loss_mae_val = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_valid].to(device).float()).item()
loss_c_val = loss_fc(out_c, data_out_c[ixs_valid].flatten().long().to(device)).item()
out_r, out_c = dst_net(data_in_scaled[ixs_test].to(device).float())
loss_ts = np.sqrt(loss_mse(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_test].to(device).float()).mean().item())
loss_mae_ts = loss_f(mm_scaler_out.inverse_transform(out_r.cpu().clone()).to(device), data_out[ixs_test].to(device).float()).item()
loss_c_ts = loss_fc(out_c, data_out_c[ixs_test].flatten().long().to(device)).item()
history_tr[epoch] = [loss_tr, loss_mae_tr, loss_c_tr]
history_valid[epoch] = [loss_val, loss_mae_val, loss_c_val]
history_ts[epoch] = [loss_ts, loss_mae_ts, loss_c_ts]
epoch_time = time.time() - start_time
if (epoch % 10 == 0):
if (epoch % 1 == 0):
print('Epoch %d time = %.2f, tr_rmse = %0.5f, val_rmse = %0.5f, ts_rmse = %0.5f, tr_c = %.5f, val_c = %.5f, ts_c = %.5f' %
(epoch, epoch_time, loss_tr, loss_val, loss_ts, loss_c_tr, loss_c_val, loss_c_ts))
```
%%%% Output: stream
DSTnet(
(lstm): LSTM(9, 8, num_layers=2, batch_first=True)
(bn1): BatchNorm1d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(linear_o_1): Linear(in_features=96, out_features=96, bias=True)
(ln1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(linear_o_2): Linear(in_features=96, out_features=96, bias=True)
(linear_o_3): Linear(in_features=96, out_features=48, bias=True)
(linear_o_4): Linear(in_features=48, out_features=12, bias=True)
(linear_o_4_c): Linear(in_features=1, out_features=2, bias=True)
(linear_o_5_c): Linear(in_features=2, out_features=4, bias=True)
(linear_o_4_c): Linear(in_features=48, out_features=48, bias=True)
(linear_o_4_r): Linear(in_features=4, out_features=2, bias=True)
(linear_o_5_r): Linear(in_features=2, out_features=1, bias=True)
)
Epoch 0 time = 7.89, tr_rmse = 3312.61694, val_rmse = 4039.37631, ts_rmse = 4041.35114, tr_c = 0.99143, val_c = 0.71103, ts_c = 0.71483
Epoch 10 time = 8.79, tr_rmse = 3083.35775, val_rmse = 3648.69607, ts_rmse = 3659.21959, tr_c = 0.69483, val_c = 0.51741, ts_c = 0.52159
Epoch 20 time = 8.70, tr_rmse = 2548.58157, val_rmse = 2916.96280, ts_rmse = 2926.98241, tr_c = 0.60299, val_c = 0.44207, ts_c = 0.44323
Epoch 30 time = 8.56, tr_rmse = 2105.74037, val_rmse = 2274.36277, ts_rmse = 2281.30029, tr_c = 0.57979, val_c = 0.42495, ts_c = 0.42557
Epoch 40 time = 8.52, tr_rmse = 1945.62355, val_rmse = 1995.75568, ts_rmse = 2001.10207, tr_c = 0.57197, val_c = 0.41847, ts_c = 0.42077
Epoch 50 time = 8.52, tr_rmse = 1828.50157, val_rmse = 1846.91134, ts_rmse = 1851.98265, tr_c = 0.56634, val_c = 0.41308, ts_c = 0.41552
Epoch 60 time = 8.51, tr_rmse = 1747.86348, val_rmse = 1718.97600, ts_rmse = 1722.82145, tr_c = 0.56212, val_c = 0.41132, ts_c = 0.41435
Epoch 70 time = 8.55, tr_rmse = 1686.30469, val_rmse = 1636.66330, ts_rmse = 1640.06905, tr_c = 0.55596, val_c = 0.40910, ts_c = 0.41027
Epoch 80 time = 8.32, tr_rmse = 1586.46076, val_rmse = 1508.65022, ts_rmse = 1508.27202, tr_c = 0.55525, val_c = 0.40960, ts_c = 0.41081
Epoch 90 time = 8.31, tr_rmse = 1531.40842, val_rmse = 1458.94705, ts_rmse = 1458.39355, tr_c = 0.55096, val_c = 0.40696, ts_c = 0.40922
Epoch 100 time = 8.28, tr_rmse = 1532.15053, val_rmse = 1429.54626, ts_rmse = 1428.39814, tr_c = 0.54678, val_c = 0.40646, ts_c = 0.40710
Epoch 110 time = 8.42, tr_rmse = 1346.41886, val_rmse = 1223.24180, ts_rmse = 1219.79358, tr_c = 0.56146, val_c = 0.42584, ts_c = 0.42524
Epoch 120 time = 8.38, tr_rmse = 1410.67568, val_rmse = 1330.91825, ts_rmse = 1328.96637, tr_c = 0.54528, val_c = 0.40766, ts_c = 0.40964
Epoch 130 time = 8.29, tr_rmse = 1378.11991, val_rmse = 1293.02499, ts_rmse = 1291.56015, tr_c = 0.53945, val_c = 0.40513, ts_c = 0.40642
Epoch 140 time = 9.14, tr_rmse = 1287.39082, val_rmse = 1187.76034, ts_rmse = 1185.95911, tr_c = 0.54060, val_c = 0.40944, ts_c = 0.41106
%% Cell type:code id: tags:
``` python
```
......
......@@ -197,10 +197,11 @@ class DSTnet(nn.Module):
self.ln1 = nn.LayerNorm(self.nhidden_o )
self.linear_o_2 = nn.Linear(self.nhidden_o, self.nhidden_o)
self.linear_o_3 = nn.Linear(self.nhidden_o, self.nhidden_o // 2)
self.linear_o_4 = nn.Linear(self.nhidden_o // 2, self.after)
self.linear_o_4_c = nn.Linear(self.nhidden_o // 2, self.after * 4)
self.linear_o_4_c = nn.Linear(1, 2)
self.linear_o_5_c = nn.Linear(2, 4)
self.linear_o_4_r = nn.Linear(4, 2)
# self.linear_o_4b_r = nn.Linear(2, 2)
self.linear_o_5_r = nn.Linear(2, 1)
def init_hidden(self, batch_size):
......@@ -223,13 +224,14 @@ class DSTnet(nn.Module):
x = F.relu(self.linear_o_3(x))
x = F.dropout(x, 0.2, training=self.training)
x1 = self.linear_o_4(x)
x1 = self.linear_o_4_c(x)
x2 = self.linear_o_4_r(x1.view(-1, 4))
# x2 = self.linear_o_4b_r(x2)
x2 = self.linear_o_5_r(x2)
x2 = x2.reshape(x0.size(0), self.after)
x1 = x1.reshape(x0.size(0) * self.after, 4)
x2 = self.linear_o_4_c(x1.view(-1, 1))
x2 = self.linear_o_5_c(x2)
x2 = x2.reshape(x0.size(0) * self.after, 4)
return x1, x2
return x2, x1
aa = data_out_c[:last_train]
weights_c = torch.tensor([len(aa[aa==0])/len(aa[aa==0]), len(aa[aa==1])/len(aa[<