109 lines
4.5 KiB
Plaintext
109 lines
4.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 65,
|
|
"id": "edadd89f-0fc1-4e07-a249-2a0d4052258c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" SARIMAX Results \n",
|
|
"==============================================================================\n",
|
|
"Dep. Variable: Trip count No. Observations: 7999\n",
|
|
"Model: ARIMA(2, 1, 2) Log Likelihood -14325.727\n",
|
|
"Date: Thu, 26 Sep 2024 AIC 28661.454\n",
|
|
"Time: 17:35:05 BIC 28696.389\n",
|
|
"Sample: 04-01-2023 HQIC 28673.412\n",
|
|
" - 10-31-2023 \n",
|
|
"Covariance Type: opg \n",
|
|
"==============================================================================\n",
|
|
" coef std err z P>|z| [0.025 0.975]\n",
|
|
"------------------------------------------------------------------------------\n",
|
|
"ar.L1 0.4432 0.034 12.890 0.000 0.376 0.511\n",
|
|
"ar.L2 0.2343 0.009 26.703 0.000 0.217 0.251\n",
|
|
"ma.L1 -1.5914 0.035 -45.151 0.000 -1.660 -1.522\n",
|
|
"ma.L2 0.5926 0.035 16.913 0.000 0.524 0.661\n",
|
|
"sigma2 2.1040 0.015 136.783 0.000 2.074 2.134\n",
|
|
"===================================================================================\n",
|
|
"Ljung-Box (L1) (Q): 0.14 Jarque-Bera (JB): 29710.16\n",
|
|
"Prob(Q): 0.71 Prob(JB): 0.00\n",
|
|
"Heteroskedasticity (H): 0.77 Skew: 2.39\n",
|
|
"Prob(H) (two-sided): 0.00 Kurtosis: 11.15\n",
|
|
"===================================================================================\n",
|
|
"\n",
|
|
"Warnings:\n",
|
|
"[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n",
|
|
"2024-02-28 07:00 0.438484\n",
|
|
"Freq: h, dtype: float64\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import statsmodels.api as sm\n",
|
|
"from statsmodels.tsa.arima.model import ARIMA\n",
|
|
"\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"df = pd.read_csv('datasets/aggregated_2023_Designmuseo.csv')\n",
|
|
"df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n",
|
|
"df['Trip count'] = pd.to_numeric(df['Trip count'])\n",
|
|
"\n",
|
|
"start_date = '2023-01-04 00:00'\n",
|
|
"end_date = '2023-31-10 23:00'\n",
|
|
"\n",
|
|
"start = pd.to_datetime(start_date, format = '%Y-%d-%m %H:%M')\n",
|
|
"end_date = pd.to_datetime(end_date, format = '%Y-%d-%m %H:%M')\n",
|
|
"dates = pd.DataFrame({'Departure':pd.date_range(start, end_date, freq='h')})\n",
|
|
"\n",
|
|
"\n",
|
|
"df3 = pd.concat([df, dates])\n",
|
|
"values = {'Trip count' : 0, 'Departure station id' : 7.0, 'Departure station name' : 'Designmuseo'}\n",
|
|
"df3.fillna(value=values, inplace = True)\n",
|
|
"df3 = df3.sort_values(by='Departure')\n",
|
|
"\n",
|
|
"df3.set_index('Departure', inplace=True)\n",
|
|
"df3.index = pd.DatetimeIndex(df3.index).to_period('h')\n",
|
|
"\n",
|
|
"model = ARIMA(df3['Trip count'], order=(2,1,2))\n",
|
|
"model_fit = model.fit()\n",
|
|
"# summary of fit model\n",
|
|
"print(model_fit.summary())\n",
|
|
"'''\n",
|
|
"print(df3.sort_values(by='Departure'))\n",
|
|
"plt.acorr(df3['Trip count'], maxlags = 1000)\n",
|
|
"plt.grid(True)\n",
|
|
"'''\n",
|
|
"'''\n",
|
|
"sm.graphics.tsa.plot_pacf(df3['Trip count'], lags =24, method='ywm')\n",
|
|
"plt.show()\n",
|
|
"'''\n",
|
|
"print(model_fit.forecast(1))\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|