# Dateien im Projekt herunterladen download.file(url = "https://rstats.kamapu.net/Ressourcen/Dateien/KursDateien.zip", destfile = "KursDateien.zip") unzip("KursDateien.zip", overwrite = TRUE) unlink("KursDateien.zip") # Daten Umstrukturieren ---- Bonn <- readRDS("BonnBevoelkerung.rds") Bezirke <- readRDS("BonnBezirke.rds") Bonn <- merge(Bonn, Bezirke) Bonn2021 <-subset(Bonn, Jahr == 2021) Bonn2021 <- Bonn2021[ c("BezirkNr", "Frauen", "Maenner")] Bonn2021$FrauenProzent <- with(Bonn2021, { Frauen/(Maenner + Frauen)*100 }) # Das Gleiche als Workflow library(dplyr) Bonn2019 <- Bonn %>% filter(Jahr == 2019) %>% select(BezirkNr, Frauen, Maenner) %>% mutate(FrauenProzent = Frauen/(Frauen + Maenner)*100) library(tidyr) Gender <- Bonn %>% mutate(Frauen = Frauen/Gesamt*100, Maenner = Maenner/Gesamt*100) %>% pivot_longer(cols = c("Maenner", "Frauen"), names_to = "Gender", values_to = "Prozent") %>% select(BezirkNr, Jahr, Gender, Prozent) library(ggplot2) ggplot(data = Gender, aes(x = Gender, y = Prozent, fill = Gender)) + geom_boxplot() + facet_wrap(~Jahr) # Alles in Workflow Bonn %>% mutate(Frauen = Frauen/Gesamt*100, Maenner = Maenner/Gesamt*100) %>% pivot_longer(cols = c("Maenner", "Frauen"), names_to = "Gender", values_to = "Prozent") %>% select(BezirkNr, Jahr, Gender, Prozent) %>% ggplot(aes(x = factor(Jahr), y = Prozent, fill = factor(Jahr))) + geom_boxplot() + facet_wrap(~Gender) # Regressionen ----- ?airquality data("airquality") summary(airquality) air <- subset(airquality, !is.na(Ozone) & !is.na(Solar.R)) pairs(air) plot(Ozone ~ Wind, data = air, pch = 16) model_1 <- lm(Ozone ~ Wind, data = air) summary(model_1) # Model im Graphiken ---- plot(Ozone ~ Wind, data = air, pch = 16) abline(model_1, lty = "dashed", col = "red") # Was ist im Objekt? str(model_1) str(summary(model_1)) r_quadrat <- summary(model_1)$r.squared r_quadrat text(15, 150, labels = round(r_quadrat, 2), col = "blue") # Mit Funktionen coef(model_1) residuals(model_1) fitted.values(model_1) # Predicted vs. Observed plot(fitted.values(model_1) ~ Ozone, data = air, pch = 16, xlim = c(0, 200), ylim = c(0, 200)) abline(a = 0, b = 1, lty = "dashed", col = "red") # Residuen plot(resid(model_1) ~ Ozone, data = air, pch = 16) abline(h = 0, lty = "dashed", col = "red") # QQplots plot(model_1, which = 2) hist(air$Ozone) with(air, hist((Ozone - mean(Ozone))/sd(Ozone))) plot(model_1, which = 2) qqnorm(resid(model_1)) qqline(resid(model_1), lty = "dashed", col = "red") # Multidimensionales Model ---- model_2 <- lm(Ozone ~ Wind + Temp, data = air) summary(model_2)