-
Notifications
You must be signed in to change notification settings - Fork 0
/
Importing_data.R
87 lines (63 loc) · 3.13 KB
/
Importing_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#loading tidyverse library
library(tidyverse)
#reading csv file from URL
inspections <- read_csv('http://594442.youcanlearnit.net/inspections.csv')
# display the contents of the tibble
glimpse(inspections)
#load a help page for read_csv
?read_csv
#creating a vector of column names
names <- c("ID", "DBAName", "AKAName", "License", "FacilityType", "Risk", "Address",
"City", "State", "ZIP", "InspectionDate", "InspectionType", "Results",
"Violations", "Latitude", "Longitude", "Location")
# adding our column names and skipping the first row of data (old column headers) in the csv
inspections <- read_csv('http://594442.youcanlearnit.net/inspections.csv',
col_names = names, skip=1)
glimpse(inspections)
# now importanting TSV files!!!! tab seperated value files
inpatient <- read_tsv('http://594442.youcanlearnit.net/inpatient.tsv')
glimpse(inpatient)
names2 <- c("DRGDefinition", "ProviderID", "Name", "Address",
"City", "State", "ZIP", "Region", "Discharges",
"AverageCharges", "AverageTotalPayments", "AverageMedicarePayments")
inpatient <- read_tsv('http://594442.youcanlearnit.net/inpatient.tsv', col_names = names2,
skip=1)
glimpse(inpatient)
#defining data types - we want the first 8 variables to be character types, but for
# the discharges field to be an integer. then the last three variables will be
# generic numeric data type
types <- 'ccccccccinnn'
inpatient <- read_tsv('http://594442.youcanlearnit.net/inpatient.tsv', col_names = names2,
skip=1, col_types = types)
glimpse(inpatient)
# Now importing delimited files!!!
?read_delim
# you can also use this to read csv and tsv files, as long as you set the right delimeter!
deaths <- read_delim(file='http://594442.youcanlearnit.net/drugdeaths.txt', delim='^')
#Now importing fixed-width files!!!
?read_fwf
names3 <- c("Name", "Title", "Department", "Salary")
lengths <- c(32, 50, 24, NA)
widths <- fwf_widths(lengths, names3)
employees <- read_fwf('http://594442.youcanlearnit.net/chicagoemployees.txt', widths, skip=1)
glimpse(employees)
#IMPORTING FUNNY FORMATTED DATA FROM EXCEL
library(readxl)
#need to have the file downloaded and in your working directory, then you can load to a tibble
breakfast <- read_excel('breakfast.xls', skip=3)
glimpse(breakfast)
names4 <- c("Year", "FreeStudents", "ReducedStudents", "PaidStudents", "TotalStudents",
"MealsServed", "PercentFree")
breakfast <- read_excel('breakfast.xls', skip=5, col_names=names4)
glimpse(breakfast)
# Some of the numeric values of this file are meant to represent millions, so we need to
# mutliply this set of values so we have the accurate number
breakfast$FreeStudents = breakfast$FreeStudents * 10000000
breakfast$ReducedStudents = breakfast$ReducedStudents * 10000000
breakfast$PaidStudents = breakfast$PaidStudents * 10000000
breakfast$TotalStudents = breakfast$TotalStudents * 10000000
breakfast$MealsServed = breakfast$MealsServed * 10000000
glimpse(breakfast)
# We can express the percentage differently, with a decimal in front of the percent
breakfast$PercentFree = breakfast$PercentFree / 100
glimpse(breakfast)