-
Notifications
You must be signed in to change notification settings - Fork 0
/
jumpstart_cohort12datachallenge.rb
292 lines (232 loc) · 10.7 KB
/
jumpstart_cohort12datachallenge.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
require 'csv'
data_file = './MajorLandUse.csv'
land_data = []
CSV.foreach(data_file, headers: true) do |row|
land_data.push(row.to_h) # to_h = hash
end
# print land_data
#Before you begin, first exclude all rows: 1) "48 states" or "U.S. Total" 2) states with the letter G
#1) under column Region
# land_data.each do |region|
# # puts region["Region"]
# if region["Region"] == "48 States" || region["Region"] == "U.S. total"
# land_data.delete(region)
# end
# end
land_data.delete_if do |region|
if region["Region"] == "48 States" || region["Region"] == "U.S. total" || region["Region or State"].index('g') != nil
true
end
end
print "here"
puts ""
puts "Question 1: Which state had the most forest-use land grazed in 2012?"
#Test this out in excel to see what the answer would be. Next, figure out what data structure we are looking at, ie hash, array. In this case it is a hash. the key is the columns for each variable we need to look at. Filter out forest use land grazed and year. We then loop through the record to determine what state has the most forest land use. This requires doing a DO loop. Inside this DO loop, we do a check that is an IF statement to set our filters. We create a hash where the key is the state and the value is the forest land use. In order to determine the most forest land use, we will compare the data of the first value to the next value. Then compare the next value to the previous value until we reach the highest value. Each time one value is largest, that gets saved to a variable we created called "current greatest land value".
# Check if land gazed is the current greatest
# Check if land gazed is greater than the current champ
# If so then have a new champ
# What option do you want?
# 1. most Forest-use land grazed
# 2. Most Forest-use land not grazed
# 3. Most Forest-use land (all)
puts "What information do you want to select from below?"
options_hash = {}
options_hash = {
1 => "Forest-use land grazed",
2 => "Forest-use land not grazed",
3 => "Forest-use land (all)"
}
puts "Here are your options:"
options_hash.each do |key, value|
puts "#{key}: most #{value}"
end
user_option = gets.chomp.to_i
puts options_hash[user_option]
while user_option > options_hash.length || user_option <= 0
puts "Select a valid option"
user_option = gets.chomp.to_i
end
#instead of specifically year 2012, let the user input the year
#make sure to change numbers to INTS bc csv is all string
puts "What year do you want to see the state with the most #{options_hash[user_option]}?"
user_year = gets.chomp.to_i
while user_year <= 1900 # this covers if they input string, negative year, or year less than 1900
puts "Try again."
user_year = gets.chomp.to_i
end
current_greatest_land = 0
current_greatest_state = ""
land_data.each do |record|
# puts region["Region"]
if record["Year"].to_i == user_year
#puts "#{record["Region or State"]}, #{record["Year"]}, #{record["Forest-use land grazed"].to_f}"
if record[options_hash[user_option]].to_f > current_greatest_land
current_greatest_land = record[options_hash[user_option]].to_f
current_greatest_state = record["Region or State"]
end
end
end
#if year doesn't exist, then greatest land and state also don't exist.
#land mass (it's a number) is set to zero and state (it's a string) is set to an empty string
if (current_greatest_land == 0) && (current_greatest_state == "")
puts "Data for #{user_year} does not exist"
else #everything else. in this it means there was data.
puts "#{current_greatest_state} has the most #{options_hash[user_option]} of #{current_greatest_land}"
end
puts "Question 2: In the year 1945, amongst the states in the Mountain region, which state made the least contribution to its region's cropland use for pasture?"
#Filter the year, set it to an integer since all the data will be read as strings. Filter out the region. We do a DO loop for the record and inside this DO loop, we are checking statements by using IF statements.
# year = 1945
# Region = mountain region
# least contribution
# Cropland used for pasture
# output : Nevada
puts "What year do you want Cropland used for pasture information?"
user_crop_year = gets.chomp.to_i
#check for valid year
while user_crop_year <= 1900
puts "Enter a valid year"
user_crop_year = gets.chomp.to_i
end
least_Cropland_used_for_pasture = nil
least_state_record = nil
land_data.each do |record|
if (record["Year"].to_i == user_crop_year) && (record["Region"] == "Mountain")
#compare first record of Cropland used for pasture to next, and so on
if least_Cropland_used_for_pasture == nil
least_Cropland_used_for_pasture = record["Cropland used for pasture"].to_i
end
if record["Cropland used for pasture"].to_i < least_Cropland_used_for_pasture
least_Cropland_used_for_pasture = record["Cropland used for pasture"].to_i
least_state_record = record
end
end
end
if least_Cropland_used_for_pasture == nil || least_state_record == nil
puts "No data exists"
else
puts " #{least_state_record["Region or State"]} made the least contribution in Cropland used for pasture with a total of #{least_state_record["Cropland used for pasture"]}."
end
puts "Question 3: Among the states that begin with N, which state had less than 100 in land in urban areas/
for at least 5 years between 1950 and 2000?"
#Filter the variables we need to look at based on the question. Then create a hash with a key and value to identify the key: states and value: data in land in urban areas. Because we are looking a value of 5 years between 1950 and 2000, we need to determine a count. This is an IF statement checking if the state (key) has land in urban areas less than 100, if yes, we start to increment (start the count). Next, we create a hash with key: states, value: land use to LOOP through and see which states have less than five years and the ones that have more than five years, we save into the hash that contains the key: state and value: land usage that we created.
#begins with N : (record["Region or State"].start_with?("N"))
#state < 100 in Land in urban areas : (record["Land in urban areas"].to_i < 100)
# least 5 years :
#between 1950-2000 : (record["Year"].to_i == (1950...2000)
state_count_hash = {}
# {
# }
land_data.each do |record|
if (record["Region or State"].start_with?("N")) &&
(record["Region or State"] != "Northeast") &&
(record["Region or State"] != "Northern Plains") &&
(record["Land in urban areas"].to_i < 100) &&
(record["Year"].to_i >= 1950) &&
(record["Year"].to_i <= 2000)
puts "here #{record["Region or State"]}"
if state_count_hash.key?(record["Region or State"]) # does exist, increment count
state_count_hash[record["Region or State"]] +=1
else # does not exist, set count to one
state_count_hash[record["Region or State"]] =1
end
end
end
puts state_count_hash
# Loop thru hash and eliminate states with less than 5 counts
states = []
state_count_hash.each do |key, value|
if value >= 5
states.push(key)
end
end
if !states.empty?
puts "States are: "
states.each do |state|
puts state
end
else
puts "No states exist"
end
# North Dakota, Nevada
# puts states = []
# Nevada
puts "Question 4: In the year 1969, amongst the states in the Northeast, Northern Plains, and Appalachian regions, what was the median value for grassland pasture and range?"
#
# year 1969
# states (Region or State) in Region: Northeast, Northern Plains, Appalachian
# median value
#Grassland pasture and range
grass_array = []
land_data.each do |record|
if (record["Year"].to_i == 1969) && (
(record["Region"] == "Northeast") ||
(record["Region"] == "Northern Plains") ||
(record["Region"] == "Appalachian") )
grass_array.push(record["Grassland pasture and range"].to_i)
end
end
# array of values = Grassland pasture and range
# take an array of the values and sort them
# then find out how many values there are.
# if odd number of values = length +1
# if even number of values = find middle two numbers and divide by 2
grass_array = grass_array.sort
#puts grass_array
#puts grass_array.length
median = nil
if grass_array.length % 2 == 0 # length is even
first_index = grass_array.length / 2
#puts first_index
#puts grass_array[first_index - 1]
second_index = first_index + 1
#puts second_index
median = (grass_array[first_index - 1] + grass_array[second_index - 1]) / 2
#puts median
else #length is odd
#puts (grass_array.length / 2) + 1
median = grass_array[(grass_array.length / 2) + 1]
end
puts "The median for grassland pasture and range is #{median}."
#initialize array/hash
#access array/hash
#add or delete from array/hash
#zero based for math
puts "Question 5: Consider the top 10 best overall ranked states according to the U.S. News in the year 2017. Amongst these states, what was the average value of all special uses in land in these states in the year 2007."
# look up US news report for top 10 states in 2017
# filter out these states using OR
# filter out the year 2007
# select All special uses of land
# for loop through the values in special uses in land to add each value
# divide by array length to get the average
# "Region or State": Massachusetts, New Hampshire, Minnesota, North Dakota, Washington, Iowa, Utah, Maryland, Colorado, Vermont
#states_array = []
#states_array = ["Massachusetts", "New Hampshire", "Minnesota", "North Dakota", "Washington", "Iowa", "Utah", "Maryland", "Colorado", "Vermont"]
#puts states_array.length
special_land_array = []
land_data.each do |record|
if (record["Year"].to_i == 2007) && (
(record["Region or State"] == "Massachusetts") ||
(record["Region or State"] == "New Hampshire") ||
(record["Region or State"] == "Minnesota") ||
(record["Region or State"] == "North Dakota") ||
(record["Region or State"] == "Washington") ||
(record["Region or State"] == "Iowa") ||
(record["Region or State"] == "Utah") ||
(record["Region or State"] == "Maryland") ||
(record["Region or State"] == "Colorado") ||
(record["Region or State"] == "Vermont")
)
special_land_array.push(record["All special uses of land"].to_i)
end
end
puts special_land_array
#calculate total here
total = 0
special_land_array.each_with_index do |a_value, index|
total = total + a_value
end
puts total
#calculate mean here
mean = 0
mean = total / special_land_array.length
puts "The average value of all special uses in land in 2007 for the top 10 states based on U.S. News is #{mean}."