---
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
env
|
||||
datasets/20*
|
||||
datasets/full_bike_data.csv
|
||||
main.ipynb
|
||||
deploy.sh
|
||||
.env
|
||||
@@ -0,0 +1,14 @@
|
||||
FROM python:3.13.0-bookworm
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
COPY . .
|
||||
COPY .env.production .env
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
EXPOSE 9000
|
||||
|
||||
ENV DJANGO_ENV=production
|
||||
|
||||
CMD ["python", "manage.py", "runserver", "0.0.0.0:9000", "--noreload"]
|
||||
@@ -0,0 +1 @@
|
||||
## This is the Django backend for our Data Science project.
|
||||
@@ -0,0 +1,3 @@
|
||||
from django.contrib import admin
|
||||
|
||||
# Register your models here.
|
||||
@@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class AppConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'app'
|
||||
@@ -0,0 +1,3 @@
|
||||
from django.db import models
|
||||
|
||||
# Create your models here.
|
||||
@@ -0,0 +1,3 @@
|
||||
from django.test import TestCase
|
||||
|
||||
# Create your tests here.
|
||||
@@ -0,0 +1,8 @@
|
||||
from django.urls import path
|
||||
|
||||
from . import views
|
||||
|
||||
urlpatterns = [
|
||||
path('', views.index, name='index'),
|
||||
path('predict', views.predict, name='predict'),
|
||||
]
|
||||
@@ -0,0 +1,80 @@
|
||||
from django.http import HttpResponse, JsonResponse
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import statsmodels.api as sm
|
||||
|
||||
stations = ['Kamppi (M)', 'Rautatientori - itä']
|
||||
station_dict = {}
|
||||
|
||||
for station in stations:
|
||||
departure_data = pd.read_csv('datasets/' + station + '_hourly_aggregate.csv')
|
||||
return_data = pd.read_csv('datasets/' + station + '_return_hourly_aggregate.csv')
|
||||
departure_data['Departure'] = pd.to_datetime(departure_data['Departure'], format='mixed')
|
||||
return_data['Return'] = pd.to_datetime(return_data['Return'], format='mixed')
|
||||
|
||||
departure_data.set_index(departure_data['Departure'], inplace=True)
|
||||
return_data.set_index(return_data['Return'], inplace=True)
|
||||
|
||||
departure_data['trip'] = pd.to_numeric(departure_data['trip'], errors='coerce')
|
||||
return_data['trip'] = pd.to_numeric(return_data['trip'], errors='coerce')
|
||||
|
||||
departure_data = departure_data.dropna(axis=1)
|
||||
return_data = return_data.dropna(axis=1)
|
||||
|
||||
departure_mod = sm.tsa.statespace.SARIMAX(departure_data['trip'], order=(1, 1, 1), seasonal_order=(0, 1, 0, 24), freq='h').fit(disp=False, low_memory=True)
|
||||
return_mod = sm.tsa.statespace.SARIMAX(return_data['trip'], order=(1, 1, 1), seasonal_order=(0, 1, 0, 24), freq='h').fit(disp=False, low_memory=True)
|
||||
|
||||
station_dict[station] = {}
|
||||
station_dict[station]['departure_mod'] = departure_mod
|
||||
station_dict[station]['return_mod'] = return_mod
|
||||
|
||||
|
||||
def index(request):
|
||||
return HttpResponse('You\'re at the app index.')
|
||||
|
||||
|
||||
def predict(request):
|
||||
if request.method == 'GET':
|
||||
current = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
timestamp = request.GET.get('timestamp', current)
|
||||
station = request.GET.get('station', 'Kamppi (M)')
|
||||
ts = timestamp
|
||||
|
||||
defaultBikeCount = 28
|
||||
|
||||
departingCount = -1
|
||||
if departure_data.index.max() >= pd.to_datetime(timestamp):
|
||||
ls = departure_data[departure_data['Departure'] == ts]['trip'].tolist()
|
||||
if len(ls) != 0:
|
||||
departingCount = ls[0]
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
departure_mod = station_dict[station]['departure_mod']
|
||||
departForecast = departure_mod.forecast(timestamp)
|
||||
departingCount = round(departForecast[-1])
|
||||
|
||||
returningCount = -1
|
||||
if return_data.index.max() >= pd.to_datetime(timestamp):
|
||||
ls = return_data[return_data['Return'] == ts]['trip'].tolist()
|
||||
if len(ls) != 0:
|
||||
returningCount = ls[0]
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
return_mod = station_dict[station]['return_mod']
|
||||
returnForecast = return_mod.forecast(timestamp)
|
||||
returningCount = round(returnForecast[-1])
|
||||
|
||||
bikeAtStationCount = defaultBikeCount - departingCount + returningCount
|
||||
|
||||
result = {
|
||||
'timestamp': timestamp,
|
||||
'station': station,
|
||||
'departingCount': departingCount,
|
||||
'returningCount': returningCount,
|
||||
'bikeAtStationCount': bikeAtStationCount,
|
||||
'increasing': returningCount > departingCount
|
||||
}
|
||||
|
||||
return JsonResponse(result)
|
||||
@@ -0,0 +1,204 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cd21c67a-e679-43e3-85a4-471a35522d42",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\nparam_mse = list()\\nfor param in parameter_space:\\n model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\\n forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\\n forecast = forecast[datetime(year=2023,month=4,day=1):]\\n #test_data.index = pd.DatetimeIndex(forecast.index)\\n \\n final = pd.concat([forecast,test_data], axis=1)\\n errors = final['trip'] - final['predicted_mean']\\n mse = np.mean(errors**2)\\n\\n param_mse.append((mse, param))\\n\""
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import TimeSeriesSplit\n",
|
||||
"from datetime import datetime\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"\n",
|
||||
"# Use Kamppi as an example, could be generalized to all stations\n",
|
||||
"df = pd.read_csv(\"datasets/Jämeräntaival_hourly_aggregate.csv\")\n",
|
||||
"df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n",
|
||||
"df.set_index('Departure', inplace=True)\n",
|
||||
"\n",
|
||||
"train_end = datetime(year=2022, month=10, day=31)\n",
|
||||
"train_start = datetime(year=2018,month=4,day=1)\n",
|
||||
"train_data = df[:train_end]\n",
|
||||
"test_end = datetime(year=2023, month=10, day = 31)\n",
|
||||
"test_data = df[datetime(year=2023,month=4,day=1):]\n",
|
||||
"\n",
|
||||
"parameter_space = list()\n",
|
||||
"\n",
|
||||
"for i in range(0,3):\n",
|
||||
" for j in range(0,3):\n",
|
||||
" for k in range(0,3):\n",
|
||||
" parameter_space.append((i,j,k))\n",
|
||||
"\n",
|
||||
"#parameter_space = [x for x in parameter_space if x[1] == 0]\n",
|
||||
"parameter_space = [x for x in parameter_space if x != (0,0,0)]\n",
|
||||
"'''\n",
|
||||
"param_mse = list()\n",
|
||||
"for param in parameter_space:\n",
|
||||
" model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\n",
|
||||
" forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n",
|
||||
" forecast = forecast[datetime(year=2023,month=4,day=1):]\n",
|
||||
" #test_data.index = pd.DatetimeIndex(forecast.index)\n",
|
||||
" \n",
|
||||
" final = pd.concat([forecast,test_data], axis=1)\n",
|
||||
" errors = final['trip'] - final['predicted_mean']\n",
|
||||
" mse = np.mean(errors**2)\n",
|
||||
"\n",
|
||||
" param_mse.append((mse, param))\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "db9591dd-7bfd-46d3-aa50-8a757776aeb0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"9.043665073451029 9.056463656418249\n",
|
||||
"9.029642126494705 9.043665073451029\n",
|
||||
"9.029317575211266 9.029642126494705\n",
|
||||
"(np.float64(9.029317575211266), (1, 0, 2))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"min_mse = param_mse[0][0]\n",
|
||||
"\n",
|
||||
"index = 0\n",
|
||||
"for i in range(0,len(param_mse)):\n",
|
||||
" if param_mse[i][0] < min_mse:\n",
|
||||
" print(param_mse[i][0], min_mse)\n",
|
||||
" min_mse = param_mse[i][0]\n",
|
||||
" index = i\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(param_mse[index])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "5b01e8b2-19f2-4809-bee7-0e0589489841",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/aleksi/venv/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency h will be used.\n",
|
||||
" self._init_dates(dates, freq)\n",
|
||||
" This problem is unconstrained.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"RUNNING THE L-BFGS-B CODE\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
"Machine precision = 2.220D-16\n",
|
||||
" N = 3 M = 10\n",
|
||||
"\n",
|
||||
"At X0 0 variables are exactly at the bounds\n",
|
||||
"\n",
|
||||
"At iterate 0 f= 2.51523D+00 |proj g|= 8.46816D-02\n",
|
||||
"\n",
|
||||
"At iterate 5 f= 2.46808D+00 |proj g|= 9.28777D-03\n",
|
||||
"\n",
|
||||
"At iterate 10 f= 2.45763D+00 |proj g|= 8.55309D-03\n",
|
||||
"\n",
|
||||
"At iterate 15 f= 2.45509D+00 |proj g|= 5.19865D-03\n",
|
||||
"\n",
|
||||
"At iterate 20 f= 2.45471D+00 |proj g|= 1.64676D-03\n",
|
||||
"\n",
|
||||
"At iterate 25 f= 2.45467D+00 |proj g|= 4.36219D-04\n",
|
||||
"\n",
|
||||
"At iterate 30 f= 2.45467D+00 |proj g|= 2.78346D-06\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
"Tit = total number of iterations\n",
|
||||
"Tnf = total number of function evaluations\n",
|
||||
"Tnint = total number of segments explored during Cauchy searches\n",
|
||||
"Skip = number of BFGS updates skipped\n",
|
||||
"Nact = number of active bounds at final generalized Cauchy point\n",
|
||||
"Projg = norm of the final projected gradient\n",
|
||||
"F = final function value\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
" N Tit Tnf Tnint Skip Nact Projg F\n",
|
||||
" 3 30 36 1 0 0 2.783D-06 2.455D+00\n",
|
||||
" F = 2.4546707052577292 \n",
|
||||
"\n",
|
||||
"CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL \n",
|
||||
"10.388099588499935\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(1,1,1), seasonal_order=(0,1,0,24), freq='h').fit()\n",
|
||||
"forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n",
|
||||
"forecast = forecast[datetime(year=2023,month=4,day=1):]\n",
|
||||
"#test_data.index = pd.DatetimeIndex(forecast.index)\n",
|
||||
"\n",
|
||||
"final = pd.concat([forecast,test_data], axis=1)\n",
|
||||
"errors = final['trip'] - final['predicted_mean']\n",
|
||||
"mse = np.mean(errors**2)\n",
|
||||
"\n",
|
||||
"print(mse)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "726acc6f-ea29-4bb6-8252-a6556ae10ac4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
[(np.float64(3.3274909833208963), 'no exogenous data'), (np.float64(3.158263780804383), 'just weather'), (np.float64(3.3220296790264277), 'just weekdays and hours'), (np.float64(3.1363872336562717), 'all exogenous')]
|
||||
@@ -0,0 +1,301 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"id": "cd21c67a-e679-43e3-85a4-471a35522d42",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"asd\n",
|
||||
"asd2\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.\n",
|
||||
" warn('Non-stationary starting autoregressive parameters'\n",
|
||||
"/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
|
||||
" warn('Non-invertible starting MA parameters found.'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"RUNNING THE L-BFGS-B CODE\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
"Machine precision = 2.220D-16\n",
|
||||
" N = 8 M = 10\n",
|
||||
"\n",
|
||||
"At X0 0 variables are exactly at the bounds\n",
|
||||
"\n",
|
||||
"At iterate 0 f= 2.52687D+00 |proj g|= 3.92148D-01\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" This problem is unconstrained.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"At iterate 5 f= 2.24021D+00 |proj g|= 1.15085D-01\n",
|
||||
"\n",
|
||||
"At iterate 10 f= 2.19412D+00 |proj g|= 5.75993D-03\n",
|
||||
"\n",
|
||||
"At iterate 15 f= 2.19337D+00 |proj g|= 8.22902D-04\n",
|
||||
"\n",
|
||||
"At iterate 20 f= 2.19327D+00 |proj g|= 4.98625D-03\n",
|
||||
"\n",
|
||||
"At iterate 25 f= 2.19325D+00 |proj g|= 3.48694D-05\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
"Tit = total number of iterations\n",
|
||||
"Tnf = total number of function evaluations\n",
|
||||
"Tnint = total number of segments explored during Cauchy searches\n",
|
||||
"Skip = number of BFGS updates skipped\n",
|
||||
"Nact = number of active bounds at final generalized Cauchy point\n",
|
||||
"Projg = norm of the final projected gradient\n",
|
||||
"F = final function value\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
" N Tit Tnf Tnint Skip Nact Projg F\n",
|
||||
" 8 25 26 1 0 0 3.487D-05 2.193D+00\n",
|
||||
" F = 2.1932501470633761 \n",
|
||||
"\n",
|
||||
"CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.\n",
|
||||
" warn('Non-stationary starting autoregressive parameters'\n",
|
||||
"/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
|
||||
" warn('Non-invertible starting MA parameters found.'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"RUNNING THE L-BFGS-B CODE\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
"Machine precision = 2.220D-16\n",
|
||||
" N = 10 M = 10\n",
|
||||
"\n",
|
||||
"At X0 0 variables are exactly at the bounds\n",
|
||||
"\n",
|
||||
"At iterate 0 f= 2.52689D+00 |proj g|= 3.92567D-01\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" This problem is unconstrained.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"At iterate 5 f= 2.23987D+00 |proj g|= 1.15352D-01\n",
|
||||
"\n",
|
||||
"At iterate 10 f= 2.19343D+00 |proj g|= 5.82758D-03\n",
|
||||
"\n",
|
||||
"At iterate 15 f= 2.19242D+00 |proj g|= 1.41481D-03\n",
|
||||
"\n",
|
||||
"At iterate 20 f= 2.19228D+00 |proj g|= 1.02376D-03\n",
|
||||
"\n",
|
||||
"At iterate 25 f= 2.19225D+00 |proj g|= 8.93884D-04\n",
|
||||
"\n",
|
||||
"At iterate 30 f= 2.19224D+00 |proj g|= 6.02699D-05\n",
|
||||
"\n",
|
||||
"At iterate 35 f= 2.19224D+00 |proj g|= 4.09553D-04\n",
|
||||
"\n",
|
||||
"At iterate 40 f= 2.19223D+00 |proj g|= 4.81752D-04\n",
|
||||
"\n",
|
||||
"At iterate 45 f= 2.19220D+00 |proj g|= 9.39974D-04\n",
|
||||
"\n",
|
||||
"At iterate 50 f= 2.19205D+00 |proj g|= 5.16353D-03\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
"Tit = total number of iterations\n",
|
||||
"Tnf = total number of function evaluations\n",
|
||||
"Tnint = total number of segments explored during Cauchy searches\n",
|
||||
"Skip = number of BFGS updates skipped\n",
|
||||
"Nact = number of active bounds at final generalized Cauchy point\n",
|
||||
"Projg = norm of the final projected gradient\n",
|
||||
"F = final function value\n",
|
||||
"\n",
|
||||
" * * *\n",
|
||||
"\n",
|
||||
" N Tit Tnf Tnint Skip Nact Projg F\n",
|
||||
" 10 50 54 1 0 0 5.164D-03 2.192D+00\n",
|
||||
" F = 2.1920465431369465 \n",
|
||||
"\n",
|
||||
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
|
||||
" warnings.warn(\"Maximum Likelihood optimization failed to \"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.model_selection import TimeSeriesSplit\n",
|
||||
"from datetime import datetime\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"\n",
|
||||
"stations = ['Jämeräntaival']\n",
|
||||
"data = pd.read_csv('datasets/' + stations[0] + '_hourly_aggregate.csv')\n",
|
||||
"data['Departure'] = pd.to_datetime(data['Departure'], format='mixed')\n",
|
||||
"\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"weather_df = pd.read_csv('datasets/weather_hourly_helsinki.csv')\n",
|
||||
"weather_df = weather_df.loc[1:, :]\n",
|
||||
"weather_df.columns = weather_df.iloc[0]\n",
|
||||
"weather_df = weather_df.loc[2:, :]\n",
|
||||
"weather_df['time'] = pd.to_datetime(weather_df['time'], format='mixed')\n",
|
||||
"\n",
|
||||
"data = pd.merge(weather_df, data, how='inner', left_on='time', right_on='Departure')\n",
|
||||
"data = data.drop(['time'], axis=1)\n",
|
||||
"\n",
|
||||
"data['temperature_2m (°C)'] = pd.to_numeric(data['temperature_2m (°C)'], errors='coerce')\n",
|
||||
"data['rain (mm)'] = pd.to_numeric(data['rain (mm)'], errors='coerce')\n",
|
||||
"data['trip'] = pd.to_numeric(data['trip'], errors='coerce')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# generation of weekday & hour series\n",
|
||||
"datedata = pd.DataFrame()\n",
|
||||
"datedata['date'] = data['Departure']\n",
|
||||
"datedata['weekday'] = data['Departure'].dt.weekday\n",
|
||||
"days = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat']\n",
|
||||
"for day in days:\n",
|
||||
" datedata[day] = 0\n",
|
||||
" datedata.loc[datedata['date'].dt.weekday == 0, day] = 1\n",
|
||||
"for i in range(0, 23):\n",
|
||||
" asd = 'hour' + str(i)\n",
|
||||
" days.append(asd)\n",
|
||||
" datedata[asd] = 0\n",
|
||||
" datedata.loc[datedata['date'].dt.hour == i, asd] = 1\n",
|
||||
"\n",
|
||||
"data = pd.merge(datedata, data, how='inner', left_on='date', right_on='Departure')\n",
|
||||
"data.set_index(data['Departure'], inplace=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_end = datetime(year=2022, month=10, day=31)\n",
|
||||
"train_start = datetime(year=2018,month=4,day=1)\n",
|
||||
"train_data = data[:train_end]\n",
|
||||
"test_end = datetime(year=2023, month=10, day = 31)\n",
|
||||
"test_data = data[datetime(year=2023,month=4,day=1):]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def MASE(y_true, y_pred, y_train):\n",
|
||||
" forecast = y_pred.reset_index(drop=True)\n",
|
||||
" outsample = y_true[:].iloc[:len(y_pred)]\n",
|
||||
" insample = y_train.reset_index(drop=True).to_numpy()\n",
|
||||
" frequency=1\n",
|
||||
" return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# reset indexes as they do funky things\n",
|
||||
"test_data.reset_index(drop=True, inplace=True)\n",
|
||||
"train_data.reset_index(drop=True, inplace=True)\n",
|
||||
"\n",
|
||||
"print('')\n",
|
||||
"\n",
|
||||
"# no exogenous variables at all\n",
|
||||
"model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24)).fit()\n",
|
||||
"forecast = model.forecast(steps=24*30*7)\n",
|
||||
"forecast = forecast[:]\n",
|
||||
"results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'no exog'))\n",
|
||||
"\n",
|
||||
"#just the weather as exogenous data\n",
|
||||
"exogenous = ['rain (mm)', 'temperature_2m (°C)']\n",
|
||||
"model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[exogenous]).fit()\n",
|
||||
"forecast = model.forecast(steps=24*30*7, exog=test_data[exogenous].iloc[:(24*30*7)])\n",
|
||||
"forecast = forecast[:]\n",
|
||||
"results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'just weather'))\n",
|
||||
"\n",
|
||||
"#just weekday + time of the day as exogenous\n",
|
||||
"model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[days]).fit()\n",
|
||||
"forecast = model.forecast(steps=24*30*7, exog=test_data[days].iloc[:(24*30*7)])\n",
|
||||
"forecast = forecast[:]\n",
|
||||
"results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'just timely stuff'))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#all exogenous variables\n",
|
||||
"for asd in days:\n",
|
||||
" exogenous.append(asd)\n",
|
||||
"model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[exogenous]).fit()\n",
|
||||
"forecast = model.forecast(steps=24*30*7, exog=test_data[exogenous].iloc[:(24*30*7)])\n",
|
||||
"forecast = forecast[:]\n",
|
||||
"results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'all exogs'))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(results)\n",
|
||||
" \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5b01e8b2-19f2-4809-bee7-0e0589489841",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 65,
|
||||
"id": "edadd89f-0fc1-4e07-a249-2a0d4052258c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" SARIMAX Results \n",
|
||||
"==============================================================================\n",
|
||||
"Dep. Variable: Trip count No. Observations: 7999\n",
|
||||
"Model: ARIMA(2, 1, 2) Log Likelihood -14325.727\n",
|
||||
"Date: Thu, 26 Sep 2024 AIC 28661.454\n",
|
||||
"Time: 17:35:05 BIC 28696.389\n",
|
||||
"Sample: 04-01-2023 HQIC 28673.412\n",
|
||||
" - 10-31-2023 \n",
|
||||
"Covariance Type: opg \n",
|
||||
"==============================================================================\n",
|
||||
" coef std err z P>|z| [0.025 0.975]\n",
|
||||
"------------------------------------------------------------------------------\n",
|
||||
"ar.L1 0.4432 0.034 12.890 0.000 0.376 0.511\n",
|
||||
"ar.L2 0.2343 0.009 26.703 0.000 0.217 0.251\n",
|
||||
"ma.L1 -1.5914 0.035 -45.151 0.000 -1.660 -1.522\n",
|
||||
"ma.L2 0.5926 0.035 16.913 0.000 0.524 0.661\n",
|
||||
"sigma2 2.1040 0.015 136.783 0.000 2.074 2.134\n",
|
||||
"===================================================================================\n",
|
||||
"Ljung-Box (L1) (Q): 0.14 Jarque-Bera (JB): 29710.16\n",
|
||||
"Prob(Q): 0.71 Prob(JB): 0.00\n",
|
||||
"Heteroskedasticity (H): 0.77 Skew: 2.39\n",
|
||||
"Prob(H) (two-sided): 0.00 Kurtosis: 11.15\n",
|
||||
"===================================================================================\n",
|
||||
"\n",
|
||||
"Warnings:\n",
|
||||
"[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n",
|
||||
"2024-02-28 07:00 0.438484\n",
|
||||
"Freq: h, dtype: float64\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"from statsmodels.tsa.arima.model import ARIMA\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"df = pd.read_csv('datasets/aggregated_2023_Designmuseo.csv')\n",
|
||||
"df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n",
|
||||
"df['Trip count'] = pd.to_numeric(df['Trip count'])\n",
|
||||
"\n",
|
||||
"start_date = '2023-01-04 00:00'\n",
|
||||
"end_date = '2023-31-10 23:00'\n",
|
||||
"\n",
|
||||
"start = pd.to_datetime(start_date, format = '%Y-%d-%m %H:%M')\n",
|
||||
"end_date = pd.to_datetime(end_date, format = '%Y-%d-%m %H:%M')\n",
|
||||
"dates = pd.DataFrame({'Departure':pd.date_range(start, end_date, freq='h')})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df3 = pd.concat([df, dates])\n",
|
||||
"values = {'Trip count' : 0, 'Departure station id' : 7.0, 'Departure station name' : 'Designmuseo'}\n",
|
||||
"df3.fillna(value=values, inplace = True)\n",
|
||||
"df3 = df3.sort_values(by='Departure')\n",
|
||||
"\n",
|
||||
"df3.set_index('Departure', inplace=True)\n",
|
||||
"df3.index = pd.DatetimeIndex(df3.index).to_period('h')\n",
|
||||
"\n",
|
||||
"model = ARIMA(df3['Trip count'], order=(2,1,2))\n",
|
||||
"model_fit = model.fit()\n",
|
||||
"# summary of fit model\n",
|
||||
"print(model_fit.summary())\n",
|
||||
"'''\n",
|
||||
"print(df3.sort_values(by='Departure'))\n",
|
||||
"plt.acorr(df3['Trip count'], maxlags = 1000)\n",
|
||||
"plt.grid(True)\n",
|
||||
"'''\n",
|
||||
"'''\n",
|
||||
"sm.graphics.tsa.plot_pacf(df3['Trip count'], lags =24, method='ywm')\n",
|
||||
"plt.show()\n",
|
||||
"'''\n",
|
||||
"print(model_fit.forecast(1))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
+1305
File diff suppressed because one or more lines are too long
Executable
+22
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
"""Django's command-line utility for administrative tasks."""
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
"""Run administrative tasks."""
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings')
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Couldn't import Django. Are you sure it's installed and "
|
||||
"available on your PYTHONPATH environment variable? Did you "
|
||||
"forget to activate a virtual environment?"
|
||||
) from exc
|
||||
execute_from_command_line(sys.argv)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
ASGI config for project project.
|
||||
|
||||
It exposes the ASGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings')
|
||||
|
||||
application = get_asgi_application()
|
||||
@@ -0,0 +1,122 @@
|
||||
"""
|
||||
Django settings for project project.
|
||||
|
||||
Generated by 'django-admin startproject' using Django 5.1.1.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.1/topics/settings/
|
||||
|
||||
For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/5.1/ref/settings/
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
DJANGO_ENV = os.getenv('DJANGO_ENV', 'development')
|
||||
__prod__ = DJANGO_ENV == 'production'
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY=os.getenv('SECRET_KEY') if __prod__ else 'django-insecure-tu6x-0wmq^rlhrlcd=k$p2ytx7jr7y0jp+*xhq8$(2g5(+ensv'
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
# DEBUG = False if __prod__ else True
|
||||
DEBUG = True
|
||||
|
||||
# ALLOWED_HOSTS = [os.getenv('BACKEND_ORIGIN')] if __prod__ else ['.localhost', '127.0.0.1', '[::1]', '127.0.0.1:3000', 'localhost:3000']
|
||||
ALLOWED_HOSTS = ['*']
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = ['django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'corsheaders']
|
||||
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
'corsheaders.middleware.CorsMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'project.urls'
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
'django.template.context_processors.debug',
|
||||
'django.template.context_processors.request',
|
||||
'django.contrib.auth.context_processors.auth',
|
||||
'django.contrib.messages.context_processors.messages',
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'project.wsgi.application'
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': BASE_DIR / 'db.sqlite3',
|
||||
}
|
||||
}
|
||||
|
||||
# Password validation
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
|
||||
},
|
||||
]
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/5.1/topics/i18n/
|
||||
|
||||
LANGUAGE_CODE = 'en-us'
|
||||
|
||||
TIME_ZONE = 'UTC'
|
||||
|
||||
USE_I18N = True
|
||||
|
||||
USE_TZ = True
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
# https://docs.djangoproject.com/en/5.1/howto/static-files/
|
||||
|
||||
STATIC_URL = 'static/'
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
|
||||
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||
|
||||
CORS_ALLOW_ALL_ORIGINS = True
|
||||
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
URL configuration for project project.
|
||||
|
||||
The `urlpatterns` list routes URLs to views. For more information please see:
|
||||
https://docs.djangoproject.com/en/5.1/topics/http/urls/
|
||||
Examples:
|
||||
Function views
|
||||
1. Add an import: from my_app import views
|
||||
2. Add a URL to urlpatterns: path('', views.home, name='home')
|
||||
Class-based views
|
||||
1. Add an import: from other_app.views import Home
|
||||
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
|
||||
Including another URLconf
|
||||
1. Import the include() function: from django.urls import include, path
|
||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
||||
"""
|
||||
from django.contrib import admin
|
||||
from django.urls import include, path
|
||||
|
||||
urlpatterns = [
|
||||
path('admin/', admin.site.urls),
|
||||
path('app/', include('app.urls'))
|
||||
]
|
||||
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
WSGI config for project project.
|
||||
|
||||
It exposes the WSGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings')
|
||||
|
||||
application = get_wsgi_application()
|
||||
@@ -0,0 +1,8 @@
|
||||
pandas
|
||||
numpy
|
||||
matplotlib
|
||||
seaborn
|
||||
statsmodels
|
||||
datetime
|
||||
django-cors-headers
|
||||
python-dotenv
|
||||
Reference in New Issue
Block a user