198 lines
6.9 KiB
R
198 lines
6.9 KiB
R
# Loading and Merging Datasets
|
|
income <- read.csv("tulot2017.csv", encoding = "latin1")
|
|
str(income)
|
|
|
|
finland <- read.csv("ek2023.csv", encoding = "latin1")
|
|
str(finland)
|
|
|
|
df <- merge(finland, income, by = "Alue")
|
|
colnames(df)
|
|
|
|
# Loading Libraries
|
|
library(tidyverse)
|
|
library(ggplot2)
|
|
|
|
# Explorative Data Analysis
|
|
## Scatterplots
|
|
ggplot(data = df, mapping = aes(x = Tulot, y = SDP)) +
|
|
geom_point() +
|
|
labs(
|
|
x = "Average Taxable Income (euros)",
|
|
y = "Support for the SDP (%)"
|
|
)
|
|
|
|
plot_income_against_support <- function(income_type, party) {
|
|
income_type_english <- ""
|
|
if (income_type == "Tulot") {
|
|
income_type_english <- "Average Taxable Income"
|
|
} else if (income_type == "Mediaanitulot") {
|
|
income_type_english <- "Median Taxable Income"
|
|
} else if (income_type == "Ansiotulot") {
|
|
income_type_english <- "Average Earned Income"
|
|
} else if (income_type == "Pääomatulot") {
|
|
income_type_english <- "Average Investment Income"
|
|
} else if (income_type == "Tulot_miinus_verot") {
|
|
income_type_english <- "Average Income after Tax"
|
|
}
|
|
ggplot(data = df, mapping = aes(x = .data[[income_type]], y = .data[[party]])) +
|
|
geom_point() +
|
|
labs(
|
|
title = paste("Support for", party, "against", income_type_english),
|
|
x = paste(income_type_english, "(euros)"),
|
|
y = paste("Support for", party, "(%)"),
|
|
) +
|
|
theme_minimal()
|
|
}
|
|
|
|
plot_income_against_support("Tulot_miinus_verot", "SDP")
|
|
plot_income_against_support("Tulot_miinus_verot", "PS")
|
|
plot_income_against_support("Tulot_miinus_verot", "KOK")
|
|
plot_income_against_support("Tulot_miinus_verot", "KESK")
|
|
plot_income_against_support("Tulot_miinus_verot", "VIHR")
|
|
plot_income_against_support("Tulot_miinus_verot", "VAS")
|
|
plot_income_against_support("Tulot_miinus_verot", "RKP")
|
|
plot_income_against_support("Tulot_miinus_verot", "KD")
|
|
plot_income_against_support("Tulot_miinus_verot", "LIIKE")
|
|
|
|
## Heatmap
|
|
income_and_voting_columns <- c("Tulot", "Mediaanitulot", "Ansiotulot", "Pääomatulot", "Tulot_miinus_verot", "SDP", "PS", "KOK", "KESK", "VIHR", "VAS", "RKP", "KD", "LIIKE")
|
|
english_names <- c("Average Taxable Income", "Median Taxable Income", "Average Earned Income", "Average Investment Income", "Average Income after Tax", "SDP", "PS", "KOK", "KESK", "VIHR", "VAS", "RKP", "KD", "LIIKE")
|
|
cor_matrix <- cor(df[income_and_voting_columns])
|
|
cor_data <- as.data.frame(as.table(cor_matrix))
|
|
cor_data$Var1 <- factor(cor_data$Var1,
|
|
levels = income_and_voting_columns, labels =
|
|
english_names
|
|
)
|
|
cor_data$Var2 <- factor(cor_data$Var2,
|
|
levels = income_and_voting_columns, labels =
|
|
english_names
|
|
)
|
|
ggplot(cor_data, aes(Var1, Var2, fill = Freq)) +
|
|
geom_tile(color = "white") +
|
|
geom_text(aes(label = round(Freq, 2)), color = "black", size = 4) +
|
|
scale_fill_gradient2(
|
|
low = "blue",
|
|
high = "red",
|
|
mid = "white",
|
|
midpoint = 0,
|
|
limit = c(-1, 1),
|
|
name = "Correlation"
|
|
) +
|
|
theme_minimal() +
|
|
theme(
|
|
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
|
|
axis.title.x = element_blank(),
|
|
axis.title.y = element_blank()
|
|
)
|
|
|
|
## Stacked bar chart
|
|
parties <- c("SDP", "PS", "KOK", "KESK", "VIHR", "VAS", "RKP", "KD", "LIIKE")
|
|
weighted_districts <- df %>%
|
|
group_by(Vaalipiiri) %>%
|
|
summarize(across(all_of(parties), ~ weighted.mean(., Tulonsaajia)))
|
|
weighted_districts_long <- weighted_districts %>%
|
|
pivot_longer(cols = all_of(parties), names_to = "Party", values_to = "Support")
|
|
ggplot(weighted_districts_long, aes(x = Vaalipiiri, y = Support, fill = Party)) +
|
|
geom_bar(stat = "identity") +
|
|
labs(
|
|
x = "Electoral District",
|
|
y = "Weighted Average Support (%)",
|
|
fill = "Party"
|
|
) +
|
|
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
|
|
theme_minimal()
|
|
|
|
# Hypothesis 1
|
|
model <- lm(KOK ~ Tulot_miinus_verot, data = df)
|
|
summary(model)
|
|
|
|
ggplot(data = df, mapping = aes(x = Tulot_miinus_verot, y = KOK)) +
|
|
geom_point() +
|
|
geom_smooth(method = "lm") +
|
|
labs(
|
|
x = "Average Income after Tax (euros)",
|
|
y = "Support for KOK (%)",
|
|
) +
|
|
theme_minimal()
|
|
|
|
# Hypothesis 2
|
|
model <- lm(VIHR ~ Tulot_miinus_verot, data = df)
|
|
summary(model)
|
|
|
|
ggplot(data = df, mapping = aes(x = Tulot_miinus_verot, y = VIHR)) +
|
|
geom_point() +
|
|
geom_smooth(method = "lm") +
|
|
labs(
|
|
x = "Average Income after Tax (euros)",
|
|
y = "Support for VIHR (%)",
|
|
) +
|
|
theme_minimal()
|
|
|
|
# Hypothesis 3
|
|
model <- lm(PS ~ Tulot_miinus_verot, data = df)
|
|
summary(model)
|
|
|
|
# Hypothesis 4
|
|
model <- lm(KESK ~ Tulot_miinus_verot, data = df)
|
|
summary(model)
|
|
|
|
ggplot(data = df, mapping = aes(x = Tulot_miinus_verot, y = KESK)) +
|
|
geom_point() +
|
|
geom_smooth(method = "lm") +
|
|
labs(
|
|
x = "Average Income after Tax (euros)",
|
|
y = "Support for KESK (%)",
|
|
) +
|
|
theme_minimal()
|
|
|
|
# Hypothesis 5
|
|
urban_areas <- c(
|
|
"Helsinki", "Tampere", "Turku", "Oulu", "Jyväskylä", "Lahti", "Kuopio", "Pori",
|
|
"Joensuu", "Vaasa", "Lappeenranta", "Rovaniemi", "Seinäjoki", "Hämeenlinna",
|
|
"Porvoo", "Kotka", "Kouvola", "Hyvinkää", "Mikkeli", "Kokkola", "Rauma", "Lohja",
|
|
"Kajaani", "Salo", "Riihimäki", "Imatra", "Kemi", "Forssa", "Jakobstad",
|
|
"Savonlinna", "Kirkkonummi", "Raahe", "Varkaus", "Valkeakoski", "Tornio",
|
|
"Hamina", "Iisalmi", "Mariehamn", "Nummela", "Heinola", "Ilmajoki", "Kurikka",
|
|
"Pieksämäki", "Ylivieska", "Jämsä", "Nastola", "Mäntsälä", "Siilinjärvi", "Lapua",
|
|
"Uusikaupunki", "Vammala", "Söderkulla", "Pargas", "Orimattila", "Loimaa", "Ekenäs",
|
|
"Kauhajoki", "Äänekoski", "Paimio", "Toijala", "Kuusamo", "Laukaa", "Karis",
|
|
"Kankaanpää", "Nurmijärvi", "Turenki", "Mänttä", "Karkkila", "Hanko",
|
|
"Rajamäki", "Muurame", "Muhos", "Loviisa", "Lieksa", "Joutseno", "Kyröskoski",
|
|
"Parola", "Lauttakylä", "Laihia", "Kalajoki", "Iin Hamina", "Jokela", "Eura",
|
|
"Orivesi", "Veikkola", "Kyläsaari", "Pihlava", "Vuokatti", "Keuruu", "Valkeala",
|
|
"Myllykoski", "Kiiminki", "Laitila", "Toivala", "Vuorela", "Kauhava", "Vuores",
|
|
"Nivala", "Oulainen", "Kuhmo", "Liminka", "Viiala", "Suonenjoki"
|
|
)
|
|
|
|
df$Type <- ifelse(df$Alue %in% urban_areas, "urban", "rural")
|
|
|
|
urban_support <- df$KOK[df$Type == "urban"]
|
|
rural_support <- df$KOK[df$Type == "rural"]
|
|
result <- t.test(urban_support, rural_support, alternative = "greater", conf.level = 0.99)
|
|
print(result)
|
|
|
|
# Hypothesis 6
|
|
urban_support <- df$KESK[df$Type == "urban"]
|
|
rural_support <- df$KESK[df$Type == "rural"]
|
|
result <- t.test(urban_support, rural_support, alternative = "less", conf.level = 0.99)
|
|
print(result)
|
|
|
|
# Hypothesis 7
|
|
urban_support <- df$PS[df$Type == "urban"]
|
|
rural_support <- df$PS[df$Type == "rural"]
|
|
result <- t.test(urban_support, rural_support, alternative = "less", conf.level = 0.99)
|
|
print(result)
|
|
|
|
# Hypothesis 8
|
|
weighted <- df %>%
|
|
group_by(Vaalipiiri) %>%
|
|
summarise(weighted_sdp_support = sum(SDP * Tulonsaajia) / sum(Tulonsaajia))
|
|
result <- t.test(weighted$weighted_sdp_support, mu = 20, conf.level = 0.99)
|
|
print(result)
|
|
|
|
# Hypothesis 9
|
|
vaasa <- df$RKP[df$Vaalipiiri == "Vaasa"]
|
|
other <- df$RKP[df$Vaalipiiri != "Vaasa"]
|
|
result <- t.test(vaasa, other, alternative = "greater", conf.level = 0.99)
|
|
print(result)
|