---

2026-06-24 16:52:08 +02:00
commit 5abeb4fd48
53 changed files with 276551 additions and 0 deletions
@@ -0,0 +1,6 @@
+env
+datasets/20*
+datasets/full_bike_data.csv
+main.ipynb
+deploy.sh
+.env
@@ -0,0 +1,14 @@
+FROM python:3.13.0-bookworm
+
+WORKDIR /usr/src/app
+
+COPY . .
+COPY .env.production .env
+
+RUN pip install -r requirements.txt
+
+EXPOSE 9000
+
+ENV DJANGO_ENV=production
+
+CMD ["python", "manage.py", "runserver", "0.0.0.0:9000", "--noreload"]
@@ -0,0 +1 @@
+## This is the Django backend for our Data Science project.  
@@ -0,0 +1,3 @@
+from django.contrib import admin
+
+# Register your models here.
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class AppConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'app'
@@ -0,0 +1,3 @@
+from django.db import models
+
+# Create your models here.
@@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.
@@ -0,0 +1,8 @@
+from django.urls import path
+
+from . import views
+
+urlpatterns = [
+    path('', views.index, name='index'),
+    path('predict', views.predict, name='predict'),
+]
@@ -0,0 +1,80 @@
+from django.http import HttpResponse, JsonResponse
+from datetime import datetime
+import pandas as pd
+import statsmodels.api as sm
+
+stations = ['Kamppi (M)', 'Rautatientori - itä']
+station_dict = {}
+
+for station in stations:
+  departure_data = pd.read_csv('datasets/' + station + '_hourly_aggregate.csv')
+  return_data = pd.read_csv('datasets/' + station + '_return_hourly_aggregate.csv')
+  departure_data['Departure'] = pd.to_datetime(departure_data['Departure'], format='mixed')
+  return_data['Return'] = pd.to_datetime(return_data['Return'], format='mixed')
+
+  departure_data.set_index(departure_data['Departure'], inplace=True)
+  return_data.set_index(return_data['Return'], inplace=True)
+
+  departure_data['trip'] = pd.to_numeric(departure_data['trip'], errors='coerce')
+  return_data['trip'] = pd.to_numeric(return_data['trip'], errors='coerce')
+
+  departure_data = departure_data.dropna(axis=1)
+  return_data = return_data.dropna(axis=1)
+
+  departure_mod = sm.tsa.statespace.SARIMAX(departure_data['trip'], order=(1, 1, 1), seasonal_order=(0, 1, 0, 24), freq='h').fit(disp=False, low_memory=True)
+  return_mod = sm.tsa.statespace.SARIMAX(return_data['trip'], order=(1, 1, 1), seasonal_order=(0, 1, 0, 24), freq='h').fit(disp=False, low_memory=True)
+
+  station_dict[station] = {} 
+  station_dict[station]['departure_mod'] = departure_mod 
+  station_dict[station]['return_mod'] = return_mod
+
+
+def index(request):
+  return HttpResponse('You\'re at the app index.')
+
+
+def predict(request):
+  if request.method == 'GET':
+    current = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    timestamp = request.GET.get('timestamp', current)
+    station = request.GET.get('station', 'Kamppi (M)')
+    ts = timestamp
+
+    defaultBikeCount = 28
+
+    departingCount = -1
+    if departure_data.index.max() >= pd.to_datetime(timestamp):
+      ls = departure_data[departure_data['Departure'] == ts]['trip'].tolist()
+      if len(ls) != 0:
+        departingCount = ls[0]
+      else:
+        pass
+    else:
+      departure_mod = station_dict[station]['departure_mod']
+      departForecast = departure_mod.forecast(timestamp)
+      departingCount = round(departForecast[-1])
+
+    returningCount = -1
+    if return_data.index.max() >= pd.to_datetime(timestamp):
+      ls = return_data[return_data['Return'] == ts]['trip'].tolist()
+      if len(ls) != 0:
+        returningCount = ls[0]
+      else:
+        pass
+    else:
+      return_mod = station_dict[station]['return_mod']
+      returnForecast = return_mod.forecast(timestamp)
+      returningCount = round(returnForecast[-1])
+
+    bikeAtStationCount = defaultBikeCount - departingCount + returningCount
+
+    result = {
+        'timestamp': timestamp,
+        'station': station,
+        'departingCount': departingCount,
+        'returningCount': returningCount,
+        'bikeAtStationCount': bikeAtStationCount,
+        'increasing': returningCount > departingCount
+    }
+
+    return JsonResponse(result)
@@ -0,0 +1,204 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cd21c67a-e679-43e3-85a4-471a35522d42",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"\\nparam_mse = list()\\nfor param in parameter_space:\\n    model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\\n    forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\\n    forecast = forecast[datetime(year=2023,month=4,day=1):]\\n    #test_data.index = pd.DatetimeIndex(forecast.index)\\n    \\n    final = pd.concat([forecast,test_data], axis=1)\\n    errors = final['trip'] - final['predicted_mean']\\n    mse = np.mean(errors**2)\\n\\n    param_mse.append((mse, param))\\n\""
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import TimeSeriesSplit\n",
+    "from datetime import datetime\n",
+    "import statsmodels.api as sm\n",
+    "\n",
+    "# Use Kamppi as an example, could be generalized to all stations\n",
+    "df = pd.read_csv(\"datasets/Jämeräntaival_hourly_aggregate.csv\")\n",
+    "df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n",
+    "df.set_index('Departure', inplace=True)\n",
+    "\n",
+    "train_end = datetime(year=2022, month=10, day=31)\n",
+    "train_start = datetime(year=2018,month=4,day=1)\n",
+    "train_data = df[:train_end]\n",
+    "test_end = datetime(year=2023, month=10, day = 31)\n",
+    "test_data  = df[datetime(year=2023,month=4,day=1):]\n",
+    "\n",
+    "parameter_space = list()\n",
+    "\n",
+    "for i in range(0,3):\n",
+    "    for j in range(0,3):\n",
+    "        for k in range(0,3):\n",
+    "            parameter_space.append((i,j,k))\n",
+    "\n",
+    "#parameter_space = [x for x in parameter_space if x[1] == 0]\n",
+    "parameter_space = [x for x in parameter_space if x != (0,0,0)]\n",
+    "'''\n",
+    "param_mse = list()\n",
+    "for param in parameter_space:\n",
+    "    model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=param, seasonal_order=(0, 1, 0, 24), freq='h').fit()\n",
+    "    forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n",
+    "    forecast = forecast[datetime(year=2023,month=4,day=1):]\n",
+    "    #test_data.index = pd.DatetimeIndex(forecast.index)\n",
+    "    \n",
+    "    final = pd.concat([forecast,test_data], axis=1)\n",
+    "    errors = final['trip'] - final['predicted_mean']\n",
+    "    mse = np.mean(errors**2)\n",
+    "\n",
+    "    param_mse.append((mse, param))\n",
+    "'''\n",
+    "\n",
+    "\n",
+    "\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "db9591dd-7bfd-46d3-aa50-8a757776aeb0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "9.043665073451029 9.056463656418249\n",
+      "9.029642126494705 9.043665073451029\n",
+      "9.029317575211266 9.029642126494705\n",
+      "(np.float64(9.029317575211266), (1, 0, 2))\n"
+     ]
+    }
+   ],
+   "source": [
+    "min_mse = param_mse[0][0]\n",
+    "\n",
+    "index = 0\n",
+    "for i in range(0,len(param_mse)):\n",
+    "    if param_mse[i][0] < min_mse:\n",
+    "        print(param_mse[i][0], min_mse)\n",
+    "        min_mse = param_mse[i][0]\n",
+    "        index = i\n",
+    "\n",
+    "\n",
+    "print(param_mse[index])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5b01e8b2-19f2-4809-bee7-0e0589489841",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/aleksi/venv/lib/python3.12/site-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency h will be used.\n",
+      "  self._init_dates(dates, freq)\n",
+      " This problem is unconstrained.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RUNNING THE L-BFGS-B CODE\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "Machine precision = 2.220D-16\n",
+      " N =            3     M =           10\n",
+      "\n",
+      "At X0         0 variables are exactly at the bounds\n",
+      "\n",
+      "At iterate    0    f=  2.51523D+00    |proj g|=  8.46816D-02\n",
+      "\n",
+      "At iterate    5    f=  2.46808D+00    |proj g|=  9.28777D-03\n",
+      "\n",
+      "At iterate   10    f=  2.45763D+00    |proj g|=  8.55309D-03\n",
+      "\n",
+      "At iterate   15    f=  2.45509D+00    |proj g|=  5.19865D-03\n",
+      "\n",
+      "At iterate   20    f=  2.45471D+00    |proj g|=  1.64676D-03\n",
+      "\n",
+      "At iterate   25    f=  2.45467D+00    |proj g|=  4.36219D-04\n",
+      "\n",
+      "At iterate   30    f=  2.45467D+00    |proj g|=  2.78346D-06\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "Tit   = total number of iterations\n",
+      "Tnf   = total number of function evaluations\n",
+      "Tnint = total number of segments explored during Cauchy searches\n",
+      "Skip  = number of BFGS updates skipped\n",
+      "Nact  = number of active bounds at final generalized Cauchy point\n",
+      "Projg = norm of the final projected gradient\n",
+      "F     = final function value\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F\n",
+      "    3     30     36      1     0     0   2.783D-06   2.455D+00\n",
+      "  F =   2.4546707052577292     \n",
+      "\n",
+      "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            \n",
+      "10.388099588499935\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(1,1,1), seasonal_order=(0,1,0,24), freq='h').fit()\n",
+    "forecast = model.forecast(datetime(year=2023,month=10,day=31,hour=17))\n",
+    "forecast = forecast[datetime(year=2023,month=4,day=1):]\n",
+    "#test_data.index = pd.DatetimeIndex(forecast.index)\n",
+    "\n",
+    "final = pd.concat([forecast,test_data], axis=1)\n",
+    "errors = final['trip'] - final['predicted_mean']\n",
+    "mse = np.mean(errors**2)\n",
+    "\n",
+    "print(mse)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "726acc6f-ea29-4bb6-8252-a6556ae10ac4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1 @@
+[(np.float64(3.3274909833208963), 'no exogenous data'), (np.float64(3.158263780804383), 'just weather'), (np.float64(3.3220296790264277), 'just weekdays and hours'), (np.float64(3.1363872336562717), 'all exogenous')]
@@ -0,0 +1,301 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "id": "cd21c67a-e679-43e3-85a4-471a35522d42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "asd\n",
+      "asd2\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.\n",
+      "  warn('Non-stationary starting autoregressive parameters'\n",
+      "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
+      "  warn('Non-invertible starting MA parameters found.'\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RUNNING THE L-BFGS-B CODE\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "Machine precision = 2.220D-16\n",
+      " N =            8     M =           10\n",
+      "\n",
+      "At X0         0 variables are exactly at the bounds\n",
+      "\n",
+      "At iterate    0    f=  2.52687D+00    |proj g|=  3.92148D-01\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " This problem is unconstrained.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "At iterate    5    f=  2.24021D+00    |proj g|=  1.15085D-01\n",
+      "\n",
+      "At iterate   10    f=  2.19412D+00    |proj g|=  5.75993D-03\n",
+      "\n",
+      "At iterate   15    f=  2.19337D+00    |proj g|=  8.22902D-04\n",
+      "\n",
+      "At iterate   20    f=  2.19327D+00    |proj g|=  4.98625D-03\n",
+      "\n",
+      "At iterate   25    f=  2.19325D+00    |proj g|=  3.48694D-05\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "Tit   = total number of iterations\n",
+      "Tnf   = total number of function evaluations\n",
+      "Tnint = total number of segments explored during Cauchy searches\n",
+      "Skip  = number of BFGS updates skipped\n",
+      "Nact  = number of active bounds at final generalized Cauchy point\n",
+      "Projg = norm of the final projected gradient\n",
+      "F     = final function value\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F\n",
+      "    8     25     26      1     0     0   3.487D-05   2.193D+00\n",
+      "  F =   2.1932501470633761     \n",
+      "\n",
+      "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.\n",
+      "  warn('Non-stationary starting autoregressive parameters'\n",
+      "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
+      "  warn('Non-invertible starting MA parameters found.'\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RUNNING THE L-BFGS-B CODE\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "Machine precision = 2.220D-16\n",
+      " N =           10     M =           10\n",
+      "\n",
+      "At X0         0 variables are exactly at the bounds\n",
+      "\n",
+      "At iterate    0    f=  2.52689D+00    |proj g|=  3.92567D-01\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      " This problem is unconstrained.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "At iterate    5    f=  2.23987D+00    |proj g|=  1.15352D-01\n",
+      "\n",
+      "At iterate   10    f=  2.19343D+00    |proj g|=  5.82758D-03\n",
+      "\n",
+      "At iterate   15    f=  2.19242D+00    |proj g|=  1.41481D-03\n",
+      "\n",
+      "At iterate   20    f=  2.19228D+00    |proj g|=  1.02376D-03\n",
+      "\n",
+      "At iterate   25    f=  2.19225D+00    |proj g|=  8.93884D-04\n",
+      "\n",
+      "At iterate   30    f=  2.19224D+00    |proj g|=  6.02699D-05\n",
+      "\n",
+      "At iterate   35    f=  2.19224D+00    |proj g|=  4.09553D-04\n",
+      "\n",
+      "At iterate   40    f=  2.19223D+00    |proj g|=  4.81752D-04\n",
+      "\n",
+      "At iterate   45    f=  2.19220D+00    |proj g|=  9.39974D-04\n",
+      "\n",
+      "At iterate   50    f=  2.19205D+00    |proj g|=  5.16353D-03\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "Tit   = total number of iterations\n",
+      "Tnf   = total number of function evaluations\n",
+      "Tnint = total number of segments explored during Cauchy searches\n",
+      "Skip  = number of BFGS updates skipped\n",
+      "Nact  = number of active bounds at final generalized Cauchy point\n",
+      "Projg = norm of the final projected gradient\n",
+      "F     = final function value\n",
+      "\n",
+      "           * * *\n",
+      "\n",
+      "   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F\n",
+      "   10     50     54      1     0     0   5.164D-03   2.192D+00\n",
+      "  F =   2.1920465431369465     \n",
+      "\n",
+      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/otto/.pyenv/versions/3.11.2/lib/python3.11/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n",
+      "  warnings.warn(\"Maximum Likelihood optimization failed to \"\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.model_selection import TimeSeriesSplit\n",
+    "from datetime import datetime\n",
+    "import statsmodels.api as sm\n",
+    "\n",
+    "stations = ['Jämeräntaival']\n",
+    "data = pd.read_csv('datasets/' + stations[0] + '_hourly_aggregate.csv')\n",
+    "data['Departure'] = pd.to_datetime(data['Departure'], format='mixed')\n",
+    "\n",
+    "results = []\n",
+    "\n",
+    "weather_df = pd.read_csv('datasets/weather_hourly_helsinki.csv')\n",
+    "weather_df = weather_df.loc[1:, :]\n",
+    "weather_df.columns = weather_df.iloc[0]\n",
+    "weather_df = weather_df.loc[2:, :]\n",
+    "weather_df['time'] = pd.to_datetime(weather_df['time'], format='mixed')\n",
+    "\n",
+    "data = pd.merge(weather_df, data, how='inner', left_on='time', right_on='Departure')\n",
+    "data = data.drop(['time'], axis=1)\n",
+    "\n",
+    "data['temperature_2m (°C)'] = pd.to_numeric(data['temperature_2m (°C)'], errors='coerce')\n",
+    "data['rain (mm)'] = pd.to_numeric(data['rain (mm)'], errors='coerce')\n",
+    "data['trip'] = pd.to_numeric(data['trip'], errors='coerce')\n",
+    "\n",
+    "\n",
+    "# generation of weekday & hour series\n",
+    "datedata = pd.DataFrame()\n",
+    "datedata['date'] = data['Departure']\n",
+    "datedata['weekday'] = data['Departure'].dt.weekday\n",
+    "days = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat']\n",
+    "for day in days:\n",
+    "    datedata[day] = 0\n",
+    "    datedata.loc[datedata['date'].dt.weekday == 0, day] = 1\n",
+    "for i in range(0, 23):\n",
+    "    asd = 'hour' + str(i)\n",
+    "    days.append(asd)\n",
+    "    datedata[asd] = 0\n",
+    "    datedata.loc[datedata['date'].dt.hour == i, asd] = 1\n",
+    "\n",
+    "data = pd.merge(datedata, data, how='inner', left_on='date', right_on='Departure')\n",
+    "data.set_index(data['Departure'], inplace=True)\n",
+    "\n",
+    "\n",
+    "\n",
+    "train_end = datetime(year=2022, month=10, day=31)\n",
+    "train_start = datetime(year=2018,month=4,day=1)\n",
+    "train_data = data[:train_end]\n",
+    "test_end = datetime(year=2023, month=10, day = 31)\n",
+    "test_data  = data[datetime(year=2023,month=4,day=1):]\n",
+    "\n",
+    "\n",
+    "def MASE(y_true, y_pred, y_train):\n",
+    "    forecast = y_pred.reset_index(drop=True)\n",
+    "    outsample = y_true[:].iloc[:len(y_pred)]\n",
+    "    insample = y_train.reset_index(drop=True).to_numpy()\n",
+    "    frequency=1\n",
+    "    return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))\n",
+    "\n",
+    "\n",
+    "# reset indexes as they do funky things\n",
+    "test_data.reset_index(drop=True, inplace=True)\n",
+    "train_data.reset_index(drop=True, inplace=True)\n",
+    "\n",
+    "print('')\n",
+    "\n",
+    "# no exogenous variables at all\n",
+    "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24)).fit()\n",
+    "forecast = model.forecast(steps=24*30*7)\n",
+    "forecast = forecast[:]\n",
+    "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'no exog'))\n",
+    "\n",
+    "#just the weather as exogenous data\n",
+    "exogenous = ['rain (mm)', 'temperature_2m (°C)']\n",
+    "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[exogenous]).fit()\n",
+    "forecast = model.forecast(steps=24*30*7, exog=test_data[exogenous].iloc[:(24*30*7)])\n",
+    "forecast = forecast[:]\n",
+    "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'just weather'))\n",
+    "\n",
+    "#just weekday + time of the day as exogenous\n",
+    "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[days]).fit()\n",
+    "forecast = model.forecast(steps=24*30*7, exog=test_data[days].iloc[:(24*30*7)])\n",
+    "forecast = forecast[:]\n",
+    "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'just timely stuff'))\n",
+    "\n",
+    "\n",
+    "#all exogenous variables\n",
+    "for asd in days:\n",
+    "    exogenous.append(asd)\n",
+    "model = sm.tsa.statespace.SARIMAX(train_data['trip'], order=(3,1,2), seasonal_order=(1, 1, 1, 24), exog=train_data[exogenous]).fit()\n",
+    "forecast = model.forecast(steps=24*30*7, exog=test_data[exogenous].iloc[:(24*30*7)])\n",
+    "forecast = forecast[:]\n",
+    "results.append((MASE(test_data['trip'], forecast, train_data['trip']), 'all exogs'))\n",
+    "\n",
+    "\n",
+    "print(results)\n",
+    "    \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b01e8b2-19f2-4809-bee7-0e0589489841",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "id": "edadd89f-0fc1-4e07-a249-2a0d4052258c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                               SARIMAX Results                                \n",
+      "==============================================================================\n",
+      "Dep. Variable:             Trip count   No. Observations:                 7999\n",
+      "Model:                 ARIMA(2, 1, 2)   Log Likelihood              -14325.727\n",
+      "Date:                Thu, 26 Sep 2024   AIC                          28661.454\n",
+      "Time:                        17:35:05   BIC                          28696.389\n",
+      "Sample:                    04-01-2023   HQIC                         28673.412\n",
+      "                         - 10-31-2023                                         \n",
+      "Covariance Type:                  opg                                         \n",
+      "==============================================================================\n",
+      "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
+      "------------------------------------------------------------------------------\n",
+      "ar.L1          0.4432      0.034     12.890      0.000       0.376       0.511\n",
+      "ar.L2          0.2343      0.009     26.703      0.000       0.217       0.251\n",
+      "ma.L1         -1.5914      0.035    -45.151      0.000      -1.660      -1.522\n",
+      "ma.L2          0.5926      0.035     16.913      0.000       0.524       0.661\n",
+      "sigma2         2.1040      0.015    136.783      0.000       2.074       2.134\n",
+      "===================================================================================\n",
+      "Ljung-Box (L1) (Q):                   0.14   Jarque-Bera (JB):             29710.16\n",
+      "Prob(Q):                              0.71   Prob(JB):                         0.00\n",
+      "Heteroskedasticity (H):               0.77   Skew:                             2.39\n",
+      "Prob(H) (two-sided):                  0.00   Kurtosis:                        11.15\n",
+      "===================================================================================\n",
+      "\n",
+      "Warnings:\n",
+      "[1] Covariance matrix calculated using the outer product of gradients (complex-step).\n",
+      "2024-02-28 07:00    0.438484\n",
+      "Freq: h, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import statsmodels.api as sm\n",
+    "from statsmodels.tsa.arima.model import ARIMA\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "df = pd.read_csv('datasets/aggregated_2023_Designmuseo.csv')\n",
+    "df['Departure'] = pd.to_datetime(df['Departure'], format='mixed')\n",
+    "df['Trip count'] = pd.to_numeric(df['Trip count'])\n",
+    "\n",
+    "start_date = '2023-01-04 00:00'\n",
+    "end_date = '2023-31-10 23:00'\n",
+    "\n",
+    "start = pd.to_datetime(start_date, format = '%Y-%d-%m %H:%M')\n",
+    "end_date = pd.to_datetime(end_date, format = '%Y-%d-%m %H:%M')\n",
+    "dates = pd.DataFrame({'Departure':pd.date_range(start, end_date, freq='h')})\n",
+    "\n",
+    "\n",
+    "df3 = pd.concat([df, dates])\n",
+    "values = {'Trip count' : 0, 'Departure station id' : 7.0, 'Departure station name' : 'Designmuseo'}\n",
+    "df3.fillna(value=values, inplace = True)\n",
+    "df3 = df3.sort_values(by='Departure')\n",
+    "\n",
+    "df3.set_index('Departure', inplace=True)\n",
+    "df3.index = pd.DatetimeIndex(df3.index).to_period('h')\n",
+    "\n",
+    "model = ARIMA(df3['Trip count'], order=(2,1,2))\n",
+    "model_fit = model.fit()\n",
+    "# summary of fit model\n",
+    "print(model_fit.summary())\n",
+    "'''\n",
+    "print(df3.sort_values(by='Departure'))\n",
+    "plt.acorr(df3['Trip count'], maxlags = 1000)\n",
+    "plt.grid(True)\n",
+    "'''\n",
+    "'''\n",
+    "sm.graphics.tsa.plot_pacf(df3['Trip count'], lags =24, method='ywm')\n",
+    "plt.show()\n",
+    "'''\n",
+    "print(model_fit.forecast(1))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+"""Django's command-line utility for administrative tasks."""
+import os
+import sys
+
+
+def main():
+    """Run administrative tasks."""
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings')
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError as exc:
+        raise ImportError(
+            "Couldn't import Django. Are you sure it's installed and "
+            "available on your PYTHONPATH environment variable? Did you "
+            "forget to activate a virtual environment?"
+        ) from exc
+    execute_from_command_line(sys.argv)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,16 @@
+"""
+ASGI config for project project.
+
+It exposes the ASGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/
+"""
+
+import os
+
+from django.core.asgi import get_asgi_application
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings')
+
+application = get_asgi_application()
@@ -0,0 +1,122 @@
+"""
+Django settings for project project.
+
+Generated by 'django-admin startproject' using Django 5.1.1.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/5.1/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/5.1/ref/settings/
+"""
+
+from pathlib import Path
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+DJANGO_ENV = os.getenv('DJANGO_ENV', 'development')
+__prod__ = DJANGO_ENV == 'production'
+
+# Build paths inside the project like this: BASE_DIR / 'subdir'.
+BASE_DIR = Path(__file__).resolve().parent.parent
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY=os.getenv('SECRET_KEY') if __prod__ else 'django-insecure-tu6x-0wmq^rlhrlcd=k$p2ytx7jr7y0jp+*xhq8$(2g5(+ensv'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+# DEBUG = False if __prod__ else True
+DEBUG = True
+
+# ALLOWED_HOSTS = [os.getenv('BACKEND_ORIGIN')] if __prod__ else ['.localhost', '127.0.0.1', '[::1]', '127.0.0.1:3000', 'localhost:3000']
+ALLOWED_HOSTS = ['*']
+
+# Application definition
+
+INSTALLED_APPS = ['django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'corsheaders']
+
+MIDDLEWARE = [
+    'django.middleware.security.SecurityMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.common.CommonMiddleware',
+    'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+    'corsheaders.middleware.CorsMiddleware',
+    'django.middleware.common.CommonMiddleware',
+]
+
+ROOT_URLCONF = 'project.urls'
+
+TEMPLATES = [
+    {
+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
+        'DIRS': [],
+        'APP_DIRS': True,
+        'OPTIONS': {
+            'context_processors': [
+                'django.template.context_processors.debug',
+                'django.template.context_processors.request',
+                'django.contrib.auth.context_processors.auth',
+                'django.contrib.messages.context_processors.messages',
+            ],
+        },
+    },
+]
+
+WSGI_APPLICATION = 'project.wsgi.application'
+
+# Database
+# https://docs.djangoproject.com/en/5.1/ref/settings/#databases
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'NAME': BASE_DIR / 'db.sqlite3',
+    }
+}
+
+# Password validation
+# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+    },
+]
+
+# Internationalization
+# https://docs.djangoproject.com/en/5.1/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = 'UTC'
+
+USE_I18N = True
+
+USE_TZ = True
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/5.1/howto/static-files/
+
+STATIC_URL = 'static/'
+
+# Default primary key field type
+# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field
+
+DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
+
+CORS_ALLOW_ALL_ORIGINS = True
@@ -0,0 +1,23 @@
+"""
+URL configuration for project project.
+
+The `urlpatterns` list routes URLs to views. For more information please see:
+    https://docs.djangoproject.com/en/5.1/topics/http/urls/
+Examples:
+Function views
+    1. Add an import:  from my_app import views
+    2. Add a URL to urlpatterns:  path('', views.home, name='home')
+Class-based views
+    1. Add an import:  from other_app.views import Home
+    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
+Including another URLconf
+    1. Import the include() function: from django.urls import include, path
+    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
+"""
+from django.contrib import admin
+from django.urls import include, path
+
+urlpatterns = [
+    path('admin/', admin.site.urls),
+    path('app/', include('app.urls'))
+]
@@ -0,0 +1,16 @@
+"""
+WSGI config for project project.
+
+It exposes the WSGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/
+"""
+
+import os
+
+from django.core.wsgi import get_wsgi_application
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings')
+
+application = get_wsgi_application()
@@ -0,0 +1,8 @@
+pandas
+numpy
+matplotlib
+seaborn
+statsmodels
+datetime
+django-cors-headers
+python-dotenv
				`@@ -0,0 +1 @@`
				`## This is the Django backend for our Data Science project.`
				`@@ -0,0 +1 @@`
				`[(np.float64(3.3274909833208963), 'no exogenous data'), (np.float64(3.158263780804383), 'just weather'), (np.float64(3.3220296790264277), 'just weekdays and hours'), (np.float64(3.1363872336562717), 'all exogenous')]`