{ "cells": [ { "cell_type": "code", "execution_count": 56, "id": "cd21c67a-e679-43e3-85a4-471a35522d42", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "asd\n", "asd2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.\n", " warn('Non-stationary starting autoregressive parameters'\n", "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n", " warn('Non-invertible starting MA parameters found.'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "RUNNING THE L-BFGS-B CODE\n", "\n", " * * *\n", "\n", "Machine precision = 2.220D-16\n", " N = 8 M = 10\n", "\n", "At X0 0 variables are exactly at the bounds\n", "\n", "At iterate 0 f= 2.52687D+00 |proj g|= 3.92148D-01\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " This problem is unconstrained.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "At iterate 5 f= 2.24021D+00 |proj g|= 1.15085D-01\n", "\n", "At iterate 10 f= 2.19412D+00 |proj g|= 5.75993D-03\n", "\n", "At iterate 15 f= 2.19337D+00 |proj g|= 8.22902D-04\n", "\n", "At iterate 20 f= 2.19327D+00 |proj g|= 4.98625D-03\n", "\n", "At iterate 25 f= 2.19325D+00 |proj g|= 3.48694D-05\n", "\n", " * * *\n", "\n", "Tit = total number of iterations\n", "Tnf = total number of function evaluations\n", "Tnint = total number of segments explored during Cauchy searches\n", "Skip = number of BFGS updates skipped\n", "Nact = number of active bounds at final generalized Cauchy point\n", "Projg = norm of the final projected gradient\n", "F = final function value\n", "\n", " * * *\n", "\n", " N Tit Tnf Tnint Skip Nact Projg F\n", " 8 25 26 1 0 0 3.487D-05 2.193D+00\n", " F = 2.1932501470633761 \n", "\n", "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.\n", " warn('Non-stationary starting autoregressive parameters'\n", "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n", " warn('Non-invertible starting MA parameters found.'\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "RUNNING THE L-BFGS-B CODE\n", "\n", " * * *\n", "\n", "Machine precision = 2.220D-16\n", " N = 10 M = 10\n", "\n", "At X0 0 variables are exactly at the bounds\n", "\n", "At iterate 0 f= 2.52689D+00 |proj g|= 3.92567D-01\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " This problem is unconstrained.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "At iterate 5 f= 2.23987D+00 |proj g|= 1.15352D-01\n", "\n", "At iterate 10 f= 2.19343D+00 |proj g|= 5.82758D-03\n", "\n", "At iterate 15 f= 2.19242D+00 |proj g|= 1.41481D-03\n", "\n", "At iterate 20 f= 2.19228D+00 |proj g|= 1.02376D-03\n", "\n", "At iterate 25 f= 2.19225D+00 |proj g|= 8.93884D-04\n", "\n", "At iterate 30 f= 2.19224D+00 |proj g|= 6.02699D-05\n", "\n", "At iterate 35 f= 2.19224D+00 |proj g|= 4.09553D-04\n", "\n", "At iterate 40 f= 2.19223D+00 |proj g|= 4.81752D-04\n", "\n", "At iterate 45 f= 2.19220D+00 |proj g|= 9.39974D-04\n", "\n", "At iterate 50 f= 2.19205D+00 |proj g|= 5.16353D-03\n", "\n", " * * *\n", "\n", "Tit = total number of iterations\n", "Tnf = total number of function evaluations\n", "Tnint = total number of segments explored during Cauchy searches\n", "Skip = number of BFGS updates skipped\n", "Nact = number of active bounds at final generalized Cauchy point\n", "Projg = norm of the final projected gradient\n", "F = final function value\n", "\n", " * * *\n", "\n", " N Tit Tnf Tnint Skip Nact Projg F\n", " 10 50 54 1 0 0 5.164D-03 2.192D+00\n", " F = 2.1920465431369465 \n", "\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", " warnings.warn(\"Maximum Likelihood optimization failed to \"\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import TimeSeriesSplit\n", "from datetime import datetime\n", "import statsmodels.api as sm\n", "\n", "stations = ['Jämeräntaival']\n", "data = pd.read_csv('datasets/' + stations[0] + '_hourly_aggregate.csv')\n", "data['Departure'] = pd.to_datetime(data['Departure'], format='mixed')\n", "\n", "results = []\n", "\n", "weather_df = pd.read_csv('datasets/weather_hourly_helsinki.csv')\n", "weather_df = weather_df.loc[1:, :]\n", "weather_df.columns = weather_df.iloc[0]\n", "weather_df = weather_df.loc[2:, :]\n", "weather_df['time'] = pd.to_datetime(weather_df['time'], format='mixed')\n", "\n", "data = pd.merge(weather_df, data, how='inner', left_on='time', right_on='Departure')\n", "data = data.drop(['time'], axis=1)\n", "\n", "data['temperature_2m (°C)'] = pd.to_numeric(data['temperature_2m (°C)'], errors='coerce')\n", "data['rain (mm)'] = pd.to_numeric(data['rain (mm)'], errors='coerce')\n", "data['trip'] = pd.to_numeric(data['trip'], errors='coerce')\n", "\n", "\n", "# generation of weekday & hour series\n", "datedata = pd.DataFrame()\n", "datedata['date'] = data['Departure']\n", "datedata['weekday'] = data['Departure'].dt.weekday\n", "days = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat']\n", "for day in days:\n", " datedata[day] = 0\n", " datedata.loc[datedata['date'].dt.weekday == 0, day] = 1\n", "for i in range(0, 23):\n", " asd = 'hour' + str(i)\n", " days.append(asd)\n", " datedata[asd] = 0\n", " datedata.loc[datedata['date'].dt.hour == i, asd] = 1\n", "\n", "data = pd.merge(datedata, data, how='inner', left_on='date', right_on='Departure')\n", "data.set_index(data['Departure'], inplace=True)\n", "\n", "\n", "\n", "train_end = datetime(year=2022, month=10, day=31)\n", "train_start = datetime(year=2018,month=4,day=1)\n", "train_data = data[:train_end]\n", "test_end = datetime(year=2023, month=10, day = 31)\n", "test_data = data[datetime(year=2023,month=4,day=1):]\n", "\n", "\n", "def MASE(y_true, y_pred, y_train):\n", " forecast = y_pred.reset_index(drop=True)\n", " outsample = y_true[:].iloc[:len(y_pred)]\n", " insample = y_train.reset_index(drop=True).to_numpy()\n", " frequency=1\n", " return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))\n", "\n", "\n", "# reset indexes as they do funky things\n", "test_data.reset_index(drop=True, inplace=True)\n", "train_data.reset_index(drop=True, inplace=True)\n", "\n", "print('')\n", "\n", "# no exogenous variables at all\n", "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24)).fit()\n", "forecast = model.forecast(steps=24*30*7)\n", "forecast = forecast[:]\n", "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'no exog'))\n", "\n", "#just the weather as exogenous data\n", "exogenous = ['rain (mm)', 'temperature_2m (°C)']\n", "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[exogenous]).fit()\n", "forecast = model.forecast(steps=24*30*7, exog=test_data[exogenous].iloc[:(24*30*7)])\n", "forecast = forecast[:]\n", "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'just weather'))\n", "\n", "#just weekday + time of the day as exogenous\n", "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[days]).fit()\n", "forecast = model.forecast(steps=24*30*7, exog=test_data[days].iloc[:(24*30*7)])\n", "forecast = forecast[:]\n", "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'just timely stuff'))\n", "\n", "\n", "#all exogenous variables\n", "for asd in days:\n", " exogenous.append(asd)\n", "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[exogenous]).fit()\n", "forecast = model.forecast(steps=24*30*7, exog=test_data[exogenous].iloc[:(24*30*7)])\n", "forecast = forecast[:]\n", "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'all exogs'))\n", "\n", "\n", "print(results)\n", " \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5b01e8b2-19f2-4809-bee7-0e0589489841", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" } }, "nbformat": 4, "nbformat_minor": 5 }