---
This commit is contained in:
@@ -0,0 +1,197 @@
|
||||
# Loading and Merging Datasets
|
||||
income <- read.csv("tulot2017.csv", encoding = "latin1")
|
||||
str(income)
|
||||
|
||||
finland <- read.csv("ek2023.csv", encoding = "latin1")
|
||||
str(finland)
|
||||
|
||||
df <- merge(finland, income, by = "Alue")
|
||||
colnames(df)
|
||||
|
||||
# Loading Libraries
|
||||
library(tidyverse)
|
||||
library(ggplot2)
|
||||
|
||||
# Explorative Data Analysis
|
||||
## Scatterplots
|
||||
ggplot(data = df, mapping = aes(x = Tulot, y = SDP)) +
|
||||
geom_point() +
|
||||
labs(
|
||||
x = "Average Taxable Income (euros)",
|
||||
y = "Support for the SDP (%)"
|
||||
)
|
||||
|
||||
plot_income_against_support <- function(income_type, party) {
|
||||
income_type_english <- ""
|
||||
if (income_type == "Tulot") {
|
||||
income_type_english <- "Average Taxable Income"
|
||||
} else if (income_type == "Mediaanitulot") {
|
||||
income_type_english <- "Median Taxable Income"
|
||||
} else if (income_type == "Ansiotulot") {
|
||||
income_type_english <- "Average Earned Income"
|
||||
} else if (income_type == "Pääomatulot") {
|
||||
income_type_english <- "Average Investment Income"
|
||||
} else if (income_type == "Tulot_miinus_verot") {
|
||||
income_type_english <- "Average Income after Tax"
|
||||
}
|
||||
ggplot(data = df, mapping = aes(x = .data[[income_type]], y = .data[[party]])) +
|
||||
geom_point() +
|
||||
labs(
|
||||
title = paste("Support for", party, "against", income_type_english),
|
||||
x = paste(income_type_english, "(euros)"),
|
||||
y = paste("Support for", party, "(%)"),
|
||||
) +
|
||||
theme_minimal()
|
||||
}
|
||||
|
||||
plot_income_against_support("Tulot_miinus_verot", "SDP")
|
||||
plot_income_against_support("Tulot_miinus_verot", "PS")
|
||||
plot_income_against_support("Tulot_miinus_verot", "KOK")
|
||||
plot_income_against_support("Tulot_miinus_verot", "KESK")
|
||||
plot_income_against_support("Tulot_miinus_verot", "VIHR")
|
||||
plot_income_against_support("Tulot_miinus_verot", "VAS")
|
||||
plot_income_against_support("Tulot_miinus_verot", "RKP")
|
||||
plot_income_against_support("Tulot_miinus_verot", "KD")
|
||||
plot_income_against_support("Tulot_miinus_verot", "LIIKE")
|
||||
|
||||
## Heatmap
|
||||
income_and_voting_columns <- c("Tulot", "Mediaanitulot", "Ansiotulot", "Pääomatulot", "Tulot_miinus_verot", "SDP", "PS", "KOK", "KESK", "VIHR", "VAS", "RKP", "KD", "LIIKE")
|
||||
english_names <- c("Average Taxable Income", "Median Taxable Income", "Average Earned Income", "Average Investment Income", "Average Income after Tax", "SDP", "PS", "KOK", "KESK", "VIHR", "VAS", "RKP", "KD", "LIIKE")
|
||||
cor_matrix <- cor(df[income_and_voting_columns])
|
||||
cor_data <- as.data.frame(as.table(cor_matrix))
|
||||
cor_data$Var1 <- factor(cor_data$Var1,
|
||||
levels = income_and_voting_columns, labels =
|
||||
english_names
|
||||
)
|
||||
cor_data$Var2 <- factor(cor_data$Var2,
|
||||
levels = income_and_voting_columns, labels =
|
||||
english_names
|
||||
)
|
||||
ggplot(cor_data, aes(Var1, Var2, fill = Freq)) +
|
||||
geom_tile(color = "white") +
|
||||
geom_text(aes(label = round(Freq, 2)), color = "black", size = 4) +
|
||||
scale_fill_gradient2(
|
||||
low = "blue",
|
||||
high = "red",
|
||||
mid = "white",
|
||||
midpoint = 0,
|
||||
limit = c(-1, 1),
|
||||
name = "Correlation"
|
||||
) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
|
||||
axis.title.x = element_blank(),
|
||||
axis.title.y = element_blank()
|
||||
)
|
||||
|
||||
## Stacked bar chart
|
||||
parties <- c("SDP", "PS", "KOK", "KESK", "VIHR", "VAS", "RKP", "KD", "LIIKE")
|
||||
weighted_districts <- df %>%
|
||||
group_by(Vaalipiiri) %>%
|
||||
summarize(across(all_of(parties), ~ weighted.mean(., Tulonsaajia)))
|
||||
weighted_districts_long <- weighted_districts %>%
|
||||
pivot_longer(cols = all_of(parties), names_to = "Party", values_to = "Support")
|
||||
ggplot(weighted_districts_long, aes(x = Vaalipiiri, y = Support, fill = Party)) +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(
|
||||
x = "Electoral District",
|
||||
y = "Weighted Average Support (%)",
|
||||
fill = "Party"
|
||||
) +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
|
||||
theme_minimal()
|
||||
|
||||
# Hypothesis 1
|
||||
model <- lm(KOK ~ Tulot_miinus_verot, data = df)
|
||||
summary(model)
|
||||
|
||||
ggplot(data = df, mapping = aes(x = Tulot_miinus_verot, y = KOK)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
labs(
|
||||
x = "Average Income after Tax (euros)",
|
||||
y = "Support for KOK (%)",
|
||||
) +
|
||||
theme_minimal()
|
||||
|
||||
# Hypothesis 2
|
||||
model <- lm(VIHR ~ Tulot_miinus_verot, data = df)
|
||||
summary(model)
|
||||
|
||||
ggplot(data = df, mapping = aes(x = Tulot_miinus_verot, y = VIHR)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
labs(
|
||||
x = "Average Income after Tax (euros)",
|
||||
y = "Support for VIHR (%)",
|
||||
) +
|
||||
theme_minimal()
|
||||
|
||||
# Hypothesis 3
|
||||
model <- lm(PS ~ Tulot_miinus_verot, data = df)
|
||||
summary(model)
|
||||
|
||||
# Hypothesis 4
|
||||
model <- lm(KESK ~ Tulot_miinus_verot, data = df)
|
||||
summary(model)
|
||||
|
||||
ggplot(data = df, mapping = aes(x = Tulot_miinus_verot, y = KESK)) +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm") +
|
||||
labs(
|
||||
x = "Average Income after Tax (euros)",
|
||||
y = "Support for KESK (%)",
|
||||
) +
|
||||
theme_minimal()
|
||||
|
||||
# Hypothesis 5
|
||||
urban_areas <- c(
|
||||
"Helsinki", "Tampere", "Turku", "Oulu", "Jyväskylä", "Lahti", "Kuopio", "Pori",
|
||||
"Joensuu", "Vaasa", "Lappeenranta", "Rovaniemi", "Seinäjoki", "Hämeenlinna",
|
||||
"Porvoo", "Kotka", "Kouvola", "Hyvinkää", "Mikkeli", "Kokkola", "Rauma", "Lohja",
|
||||
"Kajaani", "Salo", "Riihimäki", "Imatra", "Kemi", "Forssa", "Jakobstad",
|
||||
"Savonlinna", "Kirkkonummi", "Raahe", "Varkaus", "Valkeakoski", "Tornio",
|
||||
"Hamina", "Iisalmi", "Mariehamn", "Nummela", "Heinola", "Ilmajoki", "Kurikka",
|
||||
"Pieksämäki", "Ylivieska", "Jämsä", "Nastola", "Mäntsälä", "Siilinjärvi", "Lapua",
|
||||
"Uusikaupunki", "Vammala", "Söderkulla", "Pargas", "Orimattila", "Loimaa", "Ekenäs",
|
||||
"Kauhajoki", "Äänekoski", "Paimio", "Toijala", "Kuusamo", "Laukaa", "Karis",
|
||||
"Kankaanpää", "Nurmijärvi", "Turenki", "Mänttä", "Karkkila", "Hanko",
|
||||
"Rajamäki", "Muurame", "Muhos", "Loviisa", "Lieksa", "Joutseno", "Kyröskoski",
|
||||
"Parola", "Lauttakylä", "Laihia", "Kalajoki", "Iin Hamina", "Jokela", "Eura",
|
||||
"Orivesi", "Veikkola", "Kyläsaari", "Pihlava", "Vuokatti", "Keuruu", "Valkeala",
|
||||
"Myllykoski", "Kiiminki", "Laitila", "Toivala", "Vuorela", "Kauhava", "Vuores",
|
||||
"Nivala", "Oulainen", "Kuhmo", "Liminka", "Viiala", "Suonenjoki"
|
||||
)
|
||||
|
||||
df$Type <- ifelse(df$Alue %in% urban_areas, "urban", "rural")
|
||||
|
||||
urban_support <- df$KOK[df$Type == "urban"]
|
||||
rural_support <- df$KOK[df$Type == "rural"]
|
||||
result <- t.test(urban_support, rural_support, alternative = "greater", conf.level = 0.99)
|
||||
print(result)
|
||||
|
||||
# Hypothesis 6
|
||||
urban_support <- df$KESK[df$Type == "urban"]
|
||||
rural_support <- df$KESK[df$Type == "rural"]
|
||||
result <- t.test(urban_support, rural_support, alternative = "less", conf.level = 0.99)
|
||||
print(result)
|
||||
|
||||
# Hypothesis 7
|
||||
urban_support <- df$PS[df$Type == "urban"]
|
||||
rural_support <- df$PS[df$Type == "rural"]
|
||||
result <- t.test(urban_support, rural_support, alternative = "less", conf.level = 0.99)
|
||||
print(result)
|
||||
|
||||
# Hypothesis 8
|
||||
weighted <- df %>%
|
||||
group_by(Vaalipiiri) %>%
|
||||
summarise(weighted_sdp_support = sum(SDP * Tulonsaajia) / sum(Tulonsaajia))
|
||||
result <- t.test(weighted$weighted_sdp_support, mu = 20, conf.level = 0.99)
|
||||
print(result)
|
||||
|
||||
# Hypothesis 9
|
||||
vaasa <- df$RKP[df$Vaalipiiri == "Vaasa"]
|
||||
other <- df$RKP[df$Vaalipiiri != "Vaasa"]
|
||||
result <- t.test(vaasa, other, alternative = "greater", conf.level = 0.99)
|
||||
print(result)
|
||||
Reference in New Issue
Block a user