-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleaning_wb.R
166 lines (128 loc) · 4.34 KB
/
cleaning_wb.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# Start
# title: "Cleaning economic indicators datasets"
# author: "Raquel Baeta"
# date: "2024-03-04"
# Install necessary libraries
install.packages(c("readxl", "dplyr", "tidyverse", "dplyr", "wbstats", "WDI"))
# Load Libraries
library(c(readxl, tidyr, tidyverse, dplyr, wbstats, WDI))
# Set wtd
setwd("~/Desktop/working-sessions/cleaning_data")
# Download data from R packages on CRAN
worldbank_data <- wb_data(
indicator = c("NY.GDP.PCAP.CD", # GDP per capita (current US$)
"NY.GDP.MKTP.CD", # GDP (current US$)
"NE.DAB.TOTL.ZS", # Gross national expenditure (% of GDP)
"NY.ADJ.NNTY.PC.CD", # Adjusted net national income per capita (current US$)
"NE.IMP.GNFS.ZS", # Imports of goods and services (% of GDP)
"NE.EXP.GNFS.ZS"), # Exports of goods and services (% of GDP)
start = 1996, end = 2019)
# Check
names(worldbank_data)
# Download regions and extra information
wb_countries <- wb_countries()
names(wb_countries)
# Merge
merged_wb_data <- merge(
worldbank_data,
y = wb_countries[c("iso3c", "country", "region", "region_iso3c")],
by = "iso3c", all.x = TRUE)
merged_wb_data <- subset(merged_wb_data, region != "Aggregates") # removes NAs
# View colnames
colnames(merged_wb_data)
# Rename columns
merged_wb_data <- merged_wb_data %>%
rename(
region = "region",
region_iso3c = "region_iso3c",
country = "country.x",
code = "iso3c",
year = "date",
gdp_cap_dollar = "NY.GDP.PCAP.CD",
gdp = "NY.GDP.MKTP.CD",
gne_gdp = "NE.DAB.TOTL.ZS",
exports_gdp = "NE.EXP.GNFS.ZS",
imports_gdp = "NE.IMP.GNFS.ZS",
) %>%
select(
region, region_iso3c, country, code, year, gdp_cap_dollar, gdp, gne_gdp, exports_gdp, imports_gdp)
# Now, worldbank_data has the columns renamed
colnames(merged_wb_data)
# Remove duplicates
unique_countries <- unique(merged_wb_data$country)
# Display the unique country names
print(unique_countries)
#
# Creating Trade Ratio Variable
#
# Add the new column trade_ratio
merged_wb_data$trade_ratio <- (merged_wb_data$exports_gdp + merged_wb_data$imports_gdp) / merged_wb_data$gdp_cap_dollar
# Look
str(merged_wb_data)
head(merged_wb_data)
#
# US Deflator
#
# Set the indicator code for GDP deflator
deflator_indicator_code <- "NY.GDP.DEFL.ZS.AD"
# Set the country code for the United States
country_code <- "USA"
# Fetch US GDP deflator data
us_deflator_data <- wbstats::wb(indicator = deflator_indicator_code,
country = country_code)
# Check
colnames(us_deflator_data)
# Display the structure of us_deflator_data
print(head(us_deflator_data))
# Remove unwanted columns
us_deflator_data <- subset(
us_deflator_data,
select = -c(iso3c, indicatorID, indicator, iso2c, country))
# Check
print(us_deflator_data)
# Find the deflator value for the year 2015
deflator_2015 <- us_deflator_data %>%
filter(date == "2015") %>%
pull(value)
# Check
print(deflator_2015)
#
# Apply the deflator
#
# Create a new column for the deflated GDP per capita
merged_wb_data_deflated <- merged_wb_data %>%
mutate(adjusted_gdp_cap = gdp_cap_dollar / 100)
# Display the adjusted data
print(head(merged_wb_data_deflated$adjusted_gdp_cap))
str(merged_wb_data_deflated)
#
# Log transforming GDP Variables
#
# log transform gdp per capita
merged_wb_data_deflated$log_adjusted_gdp_cap <- log(merged_wb_data_deflated$adjusted_gdp_cap)
# log transform gdp per capita
merged_wb_data_deflated$log_gdp <- log(merged_wb_data_deflated$gdp)
# Look
str(merged_wb_data_deflated)
head(merged_wb_data_deflated)
# Reorder columns
wb_data <- merged_wb_data_deflated[, c("region",
"region_iso3c",
"country",
"code",
"year",
"log_adjusted_gdp_cap",
"log_gdp",
"trade_ratio",
"exports_gdp",
"imports_gdp")]
str(wb_data)
summary(wb_data)
# Save as a .csv
write.csv(
wb_data,
file = "~/Desktop/working-sessions/cleaning_data/government_data_updated.csv",
row.names = FALSE)
# Save as a .rds
saveRDS(wb_data, "~/Desktop/working-sessions/government_data_updated.csv.rds")
# End