{ "cells": [ { "cell_type": "code", "execution_count": 65, "id": "edadd89f-0fc1-4e07-a249-2a0d4052258c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " SARIMAX Results \n", "==============================================================================\n", "Dep. Variable: Trip count No. Observations: 7999\n", "Model: ARIMA(2, 1, 2) Log Likelihood -14325.727\n", "Date: Thu, 26 Sep 2024 AIC 28661.454\n", "Time: 17:35:05 BIC 28696.389\n", "Sample: 04-01-2023 HQIC 28673.412\n", " - 10-31-2023 \n", "Covariance Type: opg \n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "ar.L1 0.4432 0.034 12.890 0.000 0.376 0.511\n", "ar.L2 0.2343 0.009 26.703 0.000 0.217 0.251\n", "ma.L1 -1.5914 0.035 -45.151 0.000 -1.660 -1.522\n", "ma.L2 0.5926 0.035 16.913 0.000 0.524 0.661\n", "sigma2 2.1040 0.015 136.783 0.000 2.074 2.134\n", "===================================================================================\n", "Ljung-Box (L1) (Q): 0.14 Jarque-Bera (JB): 29710.16\n", "Prob(Q): 0.71 Prob(JB): 0.00\n", "Heteroskedasticity (H): 0.77 Skew: 2.39\n", "Prob(H) (two-sided): 0.00 Kurtosis: 11.15\n", "===================================================================================\n", "\n", "Warnings:\n", "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n", "2024-02-28 07:00 0.438484\n", "Freq: h, dtype: float64\n" ] } ], "source": [ "import pandas as pd\n", "import statsmodels.api as sm\n", "from statsmodels.tsa.arima.model import ARIMA\n", "\n", "import matplotlib.pyplot as plt\n", "df = pd.read_csv('datasets/aggregated_2023_Designmuseo.csv')\n", "df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n", "df['Trip count'] = pd.to_numeric(df['Trip count'])\n", "\n", "start_date = '2023-01-04 00:00'\n", "end_date = '2023-31-10 23:00'\n", "\n", "start = pd.to_datetime(start_date, format = '%Y-%d-%m %H:%M')\n", "end_date = pd.to_datetime(end_date, format = '%Y-%d-%m %H:%M')\n", "dates = pd.DataFrame({'Departure':pd.date_range(start, end_date, freq='h')})\n", "\n", "\n", "df3 = pd.concat([df, dates])\n", "values = {'Trip count' : 0, 'Departure station id' : 7.0, 'Departure station name' : 'Designmuseo'}\n", "df3.fillna(value=values, inplace = True)\n", "df3 = df3.sort_values(by='Departure')\n", "\n", "df3.set_index('Departure', inplace=True)\n", "df3.index = pd.DatetimeIndex(df3.index).to_period('h')\n", "\n", "model = ARIMA(df3['Trip count'], order=(2,1,2))\n", "model_fit = model.fit()\n", "# summary of fit model\n", "print(model_fit.summary())\n", "'''\n", "print(df3.sort_values(by='Departure'))\n", "plt.acorr(df3['Trip count'], maxlags = 1000)\n", "plt.grid(True)\n", "'''\n", "'''\n", "sm.graphics.tsa.plot_pacf(df3['Trip count'], lags =24, method='ywm')\n", "plt.show()\n", "'''\n", "print(model_fit.forecast(1))\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }