-
Notifications
You must be signed in to change notification settings - Fork 1
/
6 Build Test Cases.R
153 lines (136 loc) · 6.17 KB
/
6 Build Test Cases.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#Project: Zillow Tracker
#Code: 6 Build Test Cases
#Author: Scott Onestak
#Last Executed: 7/10/2022
#library
library(tidyverse)
#read in data
theData = read.csv('Data/cleanedFinalDataset.csv',header=T,stringsAsFactors=F)
#Filter data down to suburbs wanting to look at
theData = theData %>% filter(suburb %in% c("Edgewood","Highland Park","Point Breeze","Point Breeze North",
"Regent Square","Shadyside","Squirrel Hill North","Squirrel Hill South",
"Swisshelm Park","Swissvale"))
#Get the average scores by suburb
avgSuburbScore = theData %>%
group_by(suburb) %>%
summarise(walk_score = median(walk_score,na.rm=T),
transit_score = median(transit_score,na.rm=T),
bike_score = median(bike_score,na.rm=T))
#Get the typical livingArea to lotArea ratio
area_ratios = theData$livingArea / theData$lotArea
median_area_ratio = round(median(area_ratios,na.rm=T),2)
#Get min and maxes for testing
min_baths = min(theData$baths,na.rm=T)
max_baths = max(theData$baths,na.rm=T)
med_baths = median(theData$baths,na.rm=T)
min_livingArea = min(theData$livingArea,na.rm=T)
max_livingArea = max(theData$livingArea,na.rm=T)
med_livingArea = median(theData$livingArea,na.rm=T)
min_bed = min(theData$bed,na.rm=T)
max_bed = max(theData$bed,na.rm=T)
med_bed = median(theData$bed,na.rm=T)
#Get medians for price range
theDataPriceFilter = theData %>% filter(soldPrice >= 150000 & soldPrice <= 250000)
med_baths_price = median(theDataPriceFilter$baths,na.rm=T)
med_livingArea_price = median(theDataPriceFilter$livingArea,na.rm=T)
med_bed_price = median(theDataPriceFilter$bed,na.rm=T)
#Do some reasonableness adjustments... some max and mins are extreme/questionable data values
min_baths = 1
max_baths = 3
min_livingArea = 1000
max_livingArea = 2500
min_bed = 1
max_bed = 4
min_rate = 3.00
max_rate = 6.00
#Get the median of unimportant (keep static) remaining variables
yearBuilt = median(theData$yearBuilt,na.rm=T)
sunScore = median(theData$sunScore,na.rm=T)
parkingCapacity = median(theData$parkingCapacity,na.rm=T)
hasImage = median(theData$hasImage,na.rm=T)
view = median(theData$view,na.rm=T)
stories = median(theData$stories,na.rm=T)
climateFactor = median(theData$climateFactor,na.rm=T)
phraseExclamation = median(theData$phraseExclamation,na.rm=T)
phraseLocation = median(theData$phraseLocation,na.rm=T)
phraseOld = median(theData$phraseOld,na.rm=T)
phraseOpen = median(theData$phraseOpen,na.rm=T)
phraseRenovated = median(theData$phraseRenovated,na.rm=T)
phraseCustom = median(theData$phraseCustom,na.rm=T)
phraseSmall = median(theData$phraseSmall,na.rm=T)
phraseStainless = median(theData$phraseStainless,na.rm=T)
buildFactor = median(theData$buildFactor,na.rm=T)
electricityFactor = median(theData$electricityFactor,na.rm=T)
solarFactor = median(theData$solarFactor,na.rm=T)
#Build Test Cases for heating type
heatingType = as.data.frame(
rbind(c(1,0,0,0),
c(0,1,0,0),
c(0,0,1,0),
c(0,0,0,1)))
colnames(heatingType) = c("heatingGas","heatingRadiant","heatingElectric","heatingForcedAir")
#Build Test Cases for cooling type
coolingType = as.data.frame(
rbind(c(0,0,0),
c(1,1,0),
c(1,0,1)))
colnames(coolingType) = c("hasCooling","CentralAir","WindowWallAir")
#Build Test Cases for flooring type
flooringType = as.data.frame(
rbind(c(1,0,0,0,0),
c(0,1,0,0,0),
c(0,0,1,0,0),
c(0,0,0,1,0),
c(0,0,0,0,1)))
colnames(flooringType) = c("flooringHardwood","flooringTile","flooringVinyl","flooringCarpet","flooringLaminate")
#Build Test Cases for parking
parkingType = as.data.frame(
rbind(c(1,0,0,0,0,0),
c(0,1,1,0,0,0),
c(0,1,0,1,0,0),
c(0,1,0,0,1,0),
c(0,1,0,0,0,1)))
colnames(parkingType) = c("parkingStreet","parkingOffStreet",
"parkingGarage","parkingCovered","parkingAttached","parkingDetached")
#Create base table for the mins and maxes to join the rest of the test cases to
finalDataset = NA
for(i in seq(from=min_baths,to=max_baths,by=1)){
for(j in seq(from=min_bed,to=max_bed,by=1)){
for(k in seq(from=min_livingArea,to=max_livingArea,by=250)){
for(l in seq(from=min_rate,to=max_rate,by=0.50)){
temp = avgSuburbScore %>% mutate(baths = i,
beds = j,
livingArea = k,
lotArea = k/median_area_ratio,
avg_rate = l)
if(i==min_baths & j==min_bed & k==min_livingArea & l==min_rate){
finalDataset = temp
} else {
finalDataset = rbind(finalDataset,temp)
}
}
}
}
}
#Join additional test cases
testFile = crossing(finalDataset,coolingType,heatingType,flooringType,parkingType) %>%
mutate(yearBuilt = yearBuilt,
sunScore = sunScore,
parkingCapacity = parkingCapacity,
hasImage = hasImage,
view = view,
stories = stories,
climateFactor = climateFactor,
phraseExclamation = phraseExclamation,
phraseLocation = phraseLocation,
phraseOld = phraseOld,
phraseOpen = phraseOpen,
phraseRenovated = phraseRenovated,
phraseCustom = phraseCustom,
phraseSmall = phraseSmall,
phraseStainless = phraseStainless,
buildFactor = buildFactor,
electricityFactor = electricityFactor,
solarFactor = solarFactor)
#Write out the test cases
write.csv(testFile,"Data/TestCases.csv",row.names=FALSE)