205 lines
6.7 KiB
Plaintext
205 lines
6.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "cd21c67a-e679-43e3-85a4-471a35522d42",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"\"\\nparam_mse = list()\\nfor param in parameter_space:\\n model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\\n forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\\n forecast = forecast[datetime(year=2023,month=4,day=1):]\\n #test_data.index = pd.DatetimeIndex(forecast.index)\\n \\n final = pd.concat([forecast,test_data], axis=1)\\n errors = final['trip'] - final['predicted_mean']\\n mse = np.mean(errors**2)\\n\\n param_mse.append((mse, param))\\n\""
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from sklearn.model_selection import TimeSeriesSplit\n",
|
|
"from datetime import datetime\n",
|
|
"import statsmodels.api as sm\n",
|
|
"\n",
|
|
"# Use Kamppi as an example, could be generalized to all stations\n",
|
|
"df = pd.read_csv(\"datasets/Jämeräntaival_hourly_aggregate.csv\")\n",
|
|
"df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n",
|
|
"df.set_index('Departure', inplace=True)\n",
|
|
"\n",
|
|
"train_end = datetime(year=2022, month=10, day=31)\n",
|
|
"train_start = datetime(year=2018,month=4,day=1)\n",
|
|
"train_data = df[:train_end]\n",
|
|
"test_end = datetime(year=2023, month=10, day = 31)\n",
|
|
"test_data = df[datetime(year=2023,month=4,day=1):]\n",
|
|
"\n",
|
|
"parameter_space = list()\n",
|
|
"\n",
|
|
"for i in range(0,3):\n",
|
|
" for j in range(0,3):\n",
|
|
" for k in range(0,3):\n",
|
|
" parameter_space.append((i,j,k))\n",
|
|
"\n",
|
|
"#parameter_space = [x for x in parameter_space if x[1] == 0]\n",
|
|
"parameter_space = [x for x in parameter_space if x != (0,0,0)]\n",
|
|
"'''\n",
|
|
"param_mse = list()\n",
|
|
"for param in parameter_space:\n",
|
|
" model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\n",
|
|
" forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n",
|
|
" forecast = forecast[datetime(year=2023,month=4,day=1):]\n",
|
|
" #test_data.index = pd.DatetimeIndex(forecast.index)\n",
|
|
" \n",
|
|
" final = pd.concat([forecast,test_data], axis=1)\n",
|
|
" errors = final['trip'] - final['predicted_mean']\n",
|
|
" mse = np.mean(errors**2)\n",
|
|
"\n",
|
|
" param_mse.append((mse, param))\n",
|
|
"'''\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
" \n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"id": "db9591dd-7bfd-46d3-aa50-8a757776aeb0",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"9.043665073451029 9.056463656418249\n",
|
|
"9.029642126494705 9.043665073451029\n",
|
|
"9.029317575211266 9.029642126494705\n",
|
|
"(np.float64(9.029317575211266), (1, 0, 2))\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"min_mse = param_mse[0][0]\n",
|
|
"\n",
|
|
"index = 0\n",
|
|
"for i in range(0,len(param_mse)):\n",
|
|
" if param_mse[i][0] < min_mse:\n",
|
|
" print(param_mse[i][0], min_mse)\n",
|
|
" min_mse = param_mse[i][0]\n",
|
|
" index = i\n",
|
|
"\n",
|
|
"\n",
|
|
"print(param_mse[index])\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "5b01e8b2-19f2-4809-bee7-0e0589489841",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/aleksi/venv/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency h will be used.\n",
|
|
" self._init_dates(dates, freq)\n",
|
|
" This problem is unconstrained.\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"RUNNING THE L-BFGS-B CODE\n",
|
|
"\n",
|
|
" * * *\n",
|
|
"\n",
|
|
"Machine precision = 2.220D-16\n",
|
|
" N = 3 M = 10\n",
|
|
"\n",
|
|
"At X0 0 variables are exactly at the bounds\n",
|
|
"\n",
|
|
"At iterate 0 f= 2.51523D+00 |proj g|= 8.46816D-02\n",
|
|
"\n",
|
|
"At iterate 5 f= 2.46808D+00 |proj g|= 9.28777D-03\n",
|
|
"\n",
|
|
"At iterate 10 f= 2.45763D+00 |proj g|= 8.55309D-03\n",
|
|
"\n",
|
|
"At iterate 15 f= 2.45509D+00 |proj g|= 5.19865D-03\n",
|
|
"\n",
|
|
"At iterate 20 f= 2.45471D+00 |proj g|= 1.64676D-03\n",
|
|
"\n",
|
|
"At iterate 25 f= 2.45467D+00 |proj g|= 4.36219D-04\n",
|
|
"\n",
|
|
"At iterate 30 f= 2.45467D+00 |proj g|= 2.78346D-06\n",
|
|
"\n",
|
|
" * * *\n",
|
|
"\n",
|
|
"Tit = total number of iterations\n",
|
|
"Tnf = total number of function evaluations\n",
|
|
"Tnint = total number of segments explored during Cauchy searches\n",
|
|
"Skip = number of BFGS updates skipped\n",
|
|
"Nact = number of active bounds at final generalized Cauchy point\n",
|
|
"Projg = norm of the final projected gradient\n",
|
|
"F = final function value\n",
|
|
"\n",
|
|
" * * *\n",
|
|
"\n",
|
|
" N Tit Tnf Tnint Skip Nact Projg F\n",
|
|
" 3 30 36 1 0 0 2.783D-06 2.455D+00\n",
|
|
" F = 2.4546707052577292 \n",
|
|
"\n",
|
|
"CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL \n",
|
|
"10.388099588499935\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(1,1,1), seasonal_order=(0,1,0,24), freq='h').fit()\n",
|
|
"forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n",
|
|
"forecast = forecast[datetime(year=2023,month=4,day=1):]\n",
|
|
"#test_data.index = pd.DatetimeIndex(forecast.index)\n",
|
|
"\n",
|
|
"final = pd.concat([forecast,test_data], axis=1)\n",
|
|
"errors = final['trip'] - final['predicted_mean']\n",
|
|
"mse = np.mean(errors**2)\n",
|
|
"\n",
|
|
"print(mse)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "726acc6f-ea29-4bb6-8252-a6556ae10ac4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|