.Rhistory

plot(example3, col = "steelblue", pch = 20, xlab = "X", ylab = "Y",
main = "Correlation = 0")
plot(example4, col = "steelblue", pch = 20, xlab = "X", ylab = "Y",
main = "Correlation = 0")
library(tidyverse)
library(readxl)
read_excel(https://github.com/lisamarie0509/swirl-class/blob/main/Customer%20data.xlsx?raw=true)
read_excel(https://github.com/lisamarie0509/swirl-class/blob/main/Customer%20data.xlsx)
read_excel(https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx)
as_tibble(df)
read_excel(https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx)
read_excel("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.xlsx")
read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx")
read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx")
jjj <- read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx")
jjj <- read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx")
jjj <- read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer 20data.xlsx")
ccc <-read_excel("https://github.com/lisamarie0509/swirl-class/blob/main/Customer%20data.xlsx")
ccc <-read_excel("https://github.com/lisamarie0509/swirl-class/blob/main/Customer 20data.xlsx")
ccc <-read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx")
read_csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv")
?read_csv
read_csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv", delim=;)
read_csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv", delim=";")
read_csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv", delim=;)
read_csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv")
problems()
read_csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv", sep=";")
read.csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv", sep=";")
sss <- read.csv("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv", sep=";")
View(sss)
sss <- read_csv2("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv")
sss <- read_csv2("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv")
sss <- read_csv2("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.csv")
jjj <- read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer 20data.xlsx")
ccc <-read_excel("https://github.com/lisamarie0509/swirl-class/raw/main/Customer%20data.xlsx")
read_excel("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.xlsx")
problems()
serq <- read_excel("https://raw.githubusercontent.com/lisamarie0509/swirl-class/main/Customer%20data.xlsx")
```{r data generation, echo = FALSE, results = "hide"}
d <- data.frame(x = runif(100, -1, 1))
a <- 0
b <- sample(c(-1, 1), 1) * sample(c(0, 0.6, 0.9), 1)
d$y <- a + b * d$x + rnorm(100, sd = 0.25)
write.csv(d, "regression.csv", row.names = FALSE, quote = FALSE)
m <- lm(y ~ x, data = d)
bhat <- coef(m)[2]
bpvl <- summary(m)$coefficients[2, 4]
bsol <- c(bpvl >= 0.05, (bpvl < 0.05) & (bhat > 0), (bpvl < 0.05) & (bhat < 0))
plot(y ~ x, data = d)
To replicate the analysis in R:
```
## data
d <- read.csv("regression.csv")
## regression
m <- lm(y ~ x, data = d)
summary(m)
## visualization
plot(y ~ x, data = d)
abline(m)
```
?fmt
??fmt
install.packages("gt")
library("gt")
install.packages("exams")
?exams
??exams
devtools::install_github("koncina/unilur")
1:3 + 1:10
devtools::install_github("jrnold/r4ds-exercise-solutions")
Run `ggplot(data = mpg)` what do you see?
devtools::install_github("jonotuke/examMarking")
??examMarking
## ----------------------------------------------------------------------------------------
## install.packages("tidyverse")
library(tidyverse)
## list of pre loaded data
data()
# the mpg data set
data(mpg)
?mpg
head(mpg)
tail(mpg)
## list of pre loaded data
data()
seq(1,3)
seq(1,3,3)
seq(1,12,3)
seq(1,120,3)
112 115 118
tail()
tail(deaths)
tail(covid)
# install.packages("tidyverse")
# install.packages("ggpubr")
# install.packages("sjPlot")
library("tidyverse")
library("ggpubr")
library("sjPlot")
load(url("https://github.com/hubchev/courses/raw/main/dta/forest.Rdata"))
head(df)
tail(df)
dim(df)
summary(df)
df <- rename(df, country=country.x)
df <- rename(df, year=date)
df <- df %>%
mutate(gdp_pc = gdp/pop)
ggplot(df, aes(x = gdp_growth, y = unemployment_dif)) +
geom_point() +
stat_smooth(formula=y~x, method="lm", se=FALSE)
#18
ggplot(df, aes(x=gdp_growth, y=unemployment_dif)) + geom_point() +
stat_smooth(formula = y~x, method="lm", se=FALSE)
#15plot(df)
#16
plot(data=df(df$year==2015))
#15
plot(df)
#19
ggplot(df, aes(x=gdp_growth, y=unemployment_dif, label=income)) +
geom_point() + stat_smooth(formula = y~x, method="lm", se=FALSE) +
+
# geom_text(hjust=0, vjust=0) +
+
ggtitle("high income")
#18
ggplot(df, aes(x=gdp_growth, y=unemployment_dif)) + geom_point() +
stat_smooth(formula = y~x, method="lm", se=FALSE)
#19
ggplot(df, aes(x=gdp_growth, y=unemployment_dif, label=income)) +
geom_point() + stat_smooth(formula = y~x, method="lm", se=FALSE)
#20
ggarrange
ggplot(df, aes(x=gdp_growth, y=unemployment_dif, label=income)) +
geom_point() + stat_smooth(formula = y~x, method="lm", se=FALSE) +
+
# geom_text(hjust=0, vjust=0) +
+
ggtitle("high income")
ggplot(df, aes(x=gdp_growth, y=unemployment_dif, label=income)) +
geom_point() + stat_smooth(formula = y~x, method="lm", se=FALSE) +
+
# geom_text(hjust=0, vjust=0) +
+
ggtitle("low income")
#12
df <- df %>%
mutate(gdp_pc) = gdp/pop
#13
df %>%
group_by(income) %>%
summarise(meanunemploy = mean(unemployment), meangdp_pc = mean(gdp_pc))
#14
df%>%
group_by(year==2020) %>%
summarise(df, year, unemployment_diff, gdppc)
#15
pairs(~ gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif,
data = data(gdp))
#15
pairs(~ gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif,
data = df)
#18df %>%
ggplot(aes(gdp_growth,unemployment_diff)) +
geom_point(size=3) +
geom_smooth(method = lm, se = FALSE)
#18
df %>%
ggplot(aes(gdp_growth,unemployment_diff)) +
geom_point(size=3) +
geom_smooth(method = lm, se = FALSE)
#18
df %>%
ggplot(aes(gdp_growth,unemployment_dif)) +
geom_point(size=3) +
geom_smooth(method = lm, se = FALSE)
#21
#In the graphs you can see how the variables unemployment_dif and gdp_growth
#affect each other, if it is a High income country and once, if it is a Low
#income country. With High income you can see that the dispersion is realatively
#wide, unlike Low income. Than with Low income. Low income have an unstable
#correlation between the variable. High income, the other side, see that
#unemployment_dif is, the higher is the gdp_growth.
#22
reg_base <- lm(unemployment_dif ~ gdp_growth, data = df)
#I try this with the Panel, there is a mistake.
#23
summary(reg_base)
# 14
df %>%
filter(year > 2020) %>%
group_by(income) %>%
summarise(m_unemployment = mean(unemployment),
m_gdp_pc = mean(gdp_pc),
unemployment_dif,
gdp_growth)
# 14
df %>%
filter(year == 2020) %>%
group_by(income) %>%
summarise(m_unemployment = mean(unemployment),
m_gdp_pc = mean(gdp_pc),
unemployment_dif,
gdp_growth)
# 13
df %>%
group_by(income) %>%
summarise(m_unemployment = mean(unemployment),
m_gdp_pc = mean(gdp_pc))
# 14
df %>%
filter(year == 2020) %>%
group_by(income) %>%
summarise(m_unemployment = mean(unemployment),
m_gdp_pc = mean(gdp_pc),
unemployment_dif,
gdp_growth)
# 14
df %>%
filter(year == 2020) %>%
group_by(income) %>%
summarise(m_unemployment = mean(unemployment),
m_gdp_pc = mean(gdp_pc))
# 15
pairs(~gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif, data =
df,
main = "Scatterplot Matrix")
# 16
pairs(~gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif + year
== 2015,
data = df, main = "Scatterplot Matrix")
# 17cor(df$unemployment_dif, df$gdp_growth, method = c("pearson"))
# 18
ggplot(df, aes(x=gdp_growth, y=unemployment_dif))+
geom_point() +
stat_smooth(formula=y~x, method="lm", se=FALSE, colour="blue",
linetype=1)
# 19
ggplot(df, aes(x=gdp_growth, y=unemployment_dif)) +
geom_point( aes(income = "High income")) +
stat_smooth(formula = y ~ x, method = "lm",
se = FALSE, colour = "blue", linetype = 1) +
ggtitle("High income")
ggplot(df, aes(x=gdp_growth, y=unemployment_dif)) +
geom_point( aes(income = "Low income")) +
stat_smooth(formula = y ~ x, method = "lm",
se = FALSE, colour = "blue", linetype = 1)+
ggtitle("Low income")
# 20
gg1 <- ggplot(df, aes(x=gdp_growth, y=unemployment_dif)) +
geom_point( aes(income = "High income")) +
stat_smooth(formula = y ~ x, method = "lm",
se = FALSE, colour = "blue", linetype = 1) +
ggtitle("High income")
gg2 <- ggplot(df, aes(x=gdp_growth, y=unemployment_dif)) +
geom_point( aes(income = "Low income")) +
stat_smooth(formula = y ~ x, method = "lm",
se = FALSE, colour = "blue", linetype = 1)+
ggtitle("Low income")
ggarrange(gg1, gg2)
allobservation <- lm(unemployment_dif ~ gdp_growth, data = df)
summary(allobservation)
highincome <- lm(unemployment_dif ~ gdp_growth, income == "High income",
data = df)
summary(highincome)
lowincome <- lm(unemployment_dif ~ gdp_growth, income == "Low income", data
= df)
summary(lowincome)
# 23
as.table("allobservation", "highincome", "lowincome")
# variables:country, date
#8 there are 11 observations and 6 variables
#9 summary statistics
summary(df)
#13table(df$unemployment, df$gdp_pc)
#14
table(df$unemployment, df$gdppc, df$unemployment_dif, df$gdp_growth)
#15
plot(gdp, gdp_pc, gdp_growth, unemployment, unemployment_dif)
load(url("https://github.com/hubchev/courses/raw/main/dta/forest.Rdata"))
head(df)
tail(df)
dim(df)
summary(df)
df <- rename(df, country=country.x)
df <- rename(df, year=date)
df <- df %>%
mutate(gdp_pc = gdp/pop)
#15
plot(gdp, gdp_pc, gdp_growth, unemployment, unemployment_dif)
results <- lm(unemployment_dif ~ gdp_growth, data=df)
cor.test(df$unemployment_dif, df$gdp_growth)
summary(results)
#18
ggplot(df, (x=gdp_growth, y=unemployment_dif))
conf.int = TRUE
plot(unemployment_dif ~ growth
data = df
main= "scatterplot of unemployment_dif and growth"
xlab = "growth (X)",
ylab= "unemplyoment_dif (Y)")
#14
df %>%
group_by(year == 2020) %>%
summarise(df,year,unemployment_dif, gdppc)
#15
pairs(~ gdp + gdp_pc + gdp_growth + unemployment + unemployment_diff, data = df)
#15
pairs(~ gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif, data = df)
df <- df %>%
filter (year == 2015) %>%
paris(~gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif, data = df)
df <- df %>%
filter (year == 2015) %>%
pairs(~gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif, data = df)
df <- df %>%
filter (year == 2015) %>%
pairs(~gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif)
df <- df %>%
filter (year == 2015) %>%
pairs(~gdp + gdp_pc + gdp_growth + unemployment + unemployment_dif, data = .)
#17
cor(df$unemployment_dif, df$gdp_growth, method=c("person"))
#18
df %>%
ggplot(aes(gdp_growth, unemployment_dif)) +
geom_point(size=3)+
geom_smooth(method = lm, se = FALSE)
load(url("https://github.com/hubchev/courses/raw/main/dta/forest.Rdata"))
head(df)
tail(df)
dim(df)
summary(df)
df <- rename(df, country=country.x)
df <- rename(df, year=date)
df <- df %>%
mutate(gdp_pc = gdp/pop)
#18
df %>%
ggplot(aes(gdp_growth, unemployment_dif)) +
geom_point(size=3)+
geom_smooth(method = lm, se = FALSE)
library("swirl")
ls()
rm(list=ls())
install_course_github("hubchev", "swirl-it")
swirl()
# do some calculation for training purpose
x <- 5
x2 <- 6
resultofxandx2 <- x + x2
# do some calculation for training purpose
x <- 99995
x2 <- 6
resultofxandx2 <- x + x2
# do some calculation for training purpose
x <- 9.9995
x2 <- 6
resultofxandx2 <- x + x2
install.packages("swirl")
library("swirl")
rm(list=ls())
install_course_github("hubchev", "swirl-it")
swirl()
sltjd()
skip()
swirl()
x <- 678
rm(list=ls())
ls()
x <- 678
ls()
?ls
?rm
# install.packages("tidyverse")
# install.packages("ggpubr")
# install.packages("sjPlot")
library("tidyverse")
detach("package:forcats", unload = TRUE)
library(swirl)
swirl()
source("~/Dropbox/hsf/github/courses/scr/exe_subset.R")
swirl()
nxt()
skip()
skip()
skip()
skip()
skip()
skip()
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("22-10_dsb-project-description.Rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("22-10_dsb-project-description.Rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("22-10_dsb-project-description.Rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("22-10_dsb-project-description.Rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("22-10_dsb-project-description.Rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("checklist.Rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("checklist.rmd", "all")
setwd("/home/sthu/Dropbox/hsf/github/courses/rmd/")
rmarkdown::render("22-10_dsb-project-description.Rmd", "all")
?diff
x1 = rnorm(100)
x2 = rnorm(100)
x3 = rnorm(100)
t = data.frame(a=x1, b=x1+x2, c=x1+x2+x3)
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=3, col=rgb(1,0,0,0.3))
lines(t$b, type='s', lwd=2, col=rgb(0.3,0.4,0.3,0.9))
points(t$c, pch=20, cex=4, col=rgb(0,0,1,0.3))
points(t$c, pch=20, cex=3, col=rgb(0,0,1,0.3))
points(t$c, pch=20, cex=2, col=rgb(0,0,1,0.3))
points(t$c, pch=20, cex=8, col=rgb(0,0,1,0.3))
points(t$c, pch=20, cex=4, col=rgb(0,0,1,0.3))
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=3, col=rgb(1,0,0,0.3))
lines(t$b, type='s', lwd=2, col=rgb(0.3,0.4,0.3,0.9))
points(t$c, pch=20, cex=4, col=rgb(0,0,1,0.3))
points(t$c, pch=10, cex=4, col=rgb(0,0,1,0.3))
points(t$c, pch=20, cex=4, col=rgb(0,0,1,0.3))
x1 = rnorm(100)
x2 = rnorm(100)
x3 = rnorm(100)
t = data.frame(a=x1, b=x1+x2, c=x1+x2+x3)
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=3, col=rgb(1,0,0,0.3))
lines(t$b, type='s', lwd=2, col=rgb(0.3,0.4,0.3,0.9))
points(t$c, pch=20, cex=4, col=rgb(0,0,1,0.3))
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=3, col=rgb(1,0,0,0.3))
lines(t$b, type='s', lwd=2, col=rgb(0.3,0.4,0.3,0.9))
points(t$c, pch=20, cex=2, col=rgb(0,0,1,0.3))
?plot
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=1, col=rgb(1,0,0,0.3))
lines(t$b, type='s', lwd=2, col=rgb(0.3,0.4,0.3,0.9))
points(t$c, pch=20, cex=2, col=rgb(0,0,1,0.3))
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=9, col=rgb(1,0,0,0.3))
# Plot data frame
plot(t$a, type='l', ylim=range(t), lwd=2, col=rgb(1,0,0,0.3))
load()
data()
data(mtcars)
force(mtcars)
View(mtcars)
data()
View(mtcars)
head(mtcars, 6)
?mtcars
nrow(mtcars)
ncol(mtcars)
summary(mtcars)
quantile(mtcars$wt)
var(mtcars$wt)
hist(mtcars$hp)
?vars
?var
# clear the environment
rm(list = ls())
# set working directory
setwd("/home/sthu/Dropbox/hsf/github/courses/")
# Let us do the following:
# 1. check if a package is installed
# 2. if not installed the package should be installed and loaded
# 3. if installed the package should be loaded
# I like to do it with a function that is part of pacman package:
if (!require("pacman")) {install.packages("pacman")}
pacman::p_load(haven, tidyverse, vtable, gtsummary, pastecs, Hmisc,
sjlabelled, tis, ggpubr, sjPlot)
convergence.dta
# import data
data <- read_dta("dta/convergence.dta")
View(data)
# inspect data
names(data)
str(data)
data
head(data)
tail(data)
summary(data)
view(data)
#library(vtable)
vtable(data, missing=TRUE)
# library(pastecs)
stat.desc(data)
# library(Hmisc)
describe(data)
# library(gtsummary)
tbl_summary(data)
# library(Hmisc)
describe(data)
# library(gtsummary)
tbl_summary(data)
View(data)
# check the assignements of countries to continents
data %>%
select(country, africa, asia, weurope) %>%
view()
data <- mutate(data, x_1 = africa + asia + weurope)
View(data)
data %>%
filter(x_1==0) %>%
select(africa, asia, weurope, country) %>%
view()