{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "cd21c67a-e679-43e3-85a4-471a35522d42", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"\\nparam_mse = list()\\nfor param in parameter_space:\\n model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\\n forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\\n forecast = forecast[datetime(year=2023,month=4,day=1):]\\n #test_data.index = pd.DatetimeIndex(forecast.index)\\n \\n final = pd.concat([forecast,test_data], axis=1)\\n errors = final['trip'] - final['predicted_mean']\\n mse = np.mean(errors**2)\\n\\n param_mse.append((mse, param))\\n\"" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import TimeSeriesSplit\n", "from datetime import datetime\n", "import statsmodels.api as sm\n", "\n", "# Use Kamppi as an example, could be generalized to all stations\n", "df = pd.read_csv(\"datasets/Jämeräntaival_hourly_aggregate.csv\")\n", "df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n", "df.set_index('Departure', inplace=True)\n", "\n", "train_end = datetime(year=2022, month=10, day=31)\n", "train_start = datetime(year=2018,month=4,day=1)\n", "train_data = df[:train_end]\n", "test_end = datetime(year=2023, month=10, day = 31)\n", "test_data = df[datetime(year=2023,month=4,day=1):]\n", "\n", "parameter_space = list()\n", "\n", "for i in range(0,3):\n", " for j in range(0,3):\n", " for k in range(0,3):\n", " parameter_space.append((i,j,k))\n", "\n", "#parameter_space = [x for x in parameter_space if x[1] == 0]\n", "parameter_space = [x for x in parameter_space if x != (0,0,0)]\n", "'''\n", "param_mse = list()\n", "for param in parameter_space:\n", " model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\n", " forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n", " forecast = forecast[datetime(year=2023,month=4,day=1):]\n", " #test_data.index = pd.DatetimeIndex(forecast.index)\n", " \n", " final = pd.concat([forecast,test_data], axis=1)\n", " errors = final['trip'] - final['predicted_mean']\n", " mse = np.mean(errors**2)\n", "\n", " param_mse.append((mse, param))\n", "'''\n", "\n", "\n", "\n", " \n" ] }, { "cell_type": "code", "execution_count": 23, "id": "db9591dd-7bfd-46d3-aa50-8a757776aeb0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "9.043665073451029 9.056463656418249\n", "9.029642126494705 9.043665073451029\n", "9.029317575211266 9.029642126494705\n", "(np.float64(9.029317575211266), (1, 0, 2))\n" ] } ], "source": [ "min_mse = param_mse[0][0]\n", "\n", "index = 0\n", "for i in range(0,len(param_mse)):\n", " if param_mse[i][0] < min_mse:\n", " print(param_mse[i][0], min_mse)\n", " min_mse = param_mse[i][0]\n", " index = i\n", "\n", "\n", "print(param_mse[index])\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "5b01e8b2-19f2-4809-bee7-0e0589489841", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/aleksi/venv/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency h will be used.\n", " self._init_dates(dates, freq)\n", " This problem is unconstrained.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "RUNNING THE L-BFGS-B CODE\n", "\n", " * * *\n", "\n", "Machine precision = 2.220D-16\n", " N = 3 M = 10\n", "\n", "At X0 0 variables are exactly at the bounds\n", "\n", "At iterate 0 f= 2.51523D+00 |proj g|= 8.46816D-02\n", "\n", "At iterate 5 f= 2.46808D+00 |proj g|= 9.28777D-03\n", "\n", "At iterate 10 f= 2.45763D+00 |proj g|= 8.55309D-03\n", "\n", "At iterate 15 f= 2.45509D+00 |proj g|= 5.19865D-03\n", "\n", "At iterate 20 f= 2.45471D+00 |proj g|= 1.64676D-03\n", "\n", "At iterate 25 f= 2.45467D+00 |proj g|= 4.36219D-04\n", "\n", "At iterate 30 f= 2.45467D+00 |proj g|= 2.78346D-06\n", "\n", " * * *\n", "\n", "Tit = total number of iterations\n", "Tnf = total number of function evaluations\n", "Tnint = total number of segments explored during Cauchy searches\n", "Skip = number of BFGS updates skipped\n", "Nact = number of active bounds at final generalized Cauchy point\n", "Projg = norm of the final projected gradient\n", "F = final function value\n", "\n", " * * *\n", "\n", " N Tit Tnf Tnint Skip Nact Projg F\n", " 3 30 36 1 0 0 2.783D-06 2.455D+00\n", " F = 2.4546707052577292 \n", "\n", "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL \n", "10.388099588499935\n" ] } ], "source": [ "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(1,1,1), seasonal_order=(0,1,0,24), freq='h').fit()\n", "forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n", "forecast = forecast[datetime(year=2023,month=4,day=1):]\n", "#test_data.index = pd.DatetimeIndex(forecast.index)\n", "\n", "final = pd.concat([forecast,test_data], axis=1)\n", "errors = final['trip'] - final['predicted_mean']\n", "mse = np.mean(errors**2)\n", "\n", "print(mse)" ] }, { "cell_type": "code", "execution_count": null, "id": "726acc6f-ea29-4bb6-8252-a6556ae10ac4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }