-
Notifications
You must be signed in to change notification settings - Fork 4
/
7. fcs_sm.R
81 lines (56 loc) · 1.6 KB
/
7. fcs_sm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
library(synthpop)
# Load dataset
# setwd must be at project folder for load/save
#load("satgpa.rda")
load("acs.rdata")
acs <- ACS
cols <- c('PUMA',
'YEAR',
'GQ',
'SEX',
'MARST',
'RACE',
'HISPAN',
'CITIZEN',
'SPEAKENG',
'HCOVANY',
'HCOVPRIV',
'HINSEMP',
'HINSCAID',
'HINSCARE',
'EDUC',
'EMPSTAT',
'LABFORCE',
'WRKLSTWK',
'ABSENT',
'LOOKING',
'AVAILBLE',
'WRKRECAL',
'WORKEDYR')
acs[cols] <- lapply(acs[cols], factor) ## as.factor() could also be used
x <- syn(data = acs, method = "cart")
result <- x$syn
sm_acs_fcs_cart <- result
save(sm_acs_fcs_cart, file = "results/sm_acs_fcs_cart.rda")
###### Data for exaluation
synthetic_dataset <- result
# for using synthpop
x <- x
######
# Evaluate Provacy
#####
library(synthpop)
replicated.uniques(object = x, data = sat)
######
# Evaluate UTILITY
#####
library(synthpop)
summary(synthetic_dataset)
summary(sat)
diff <- (synthetic_dataset$sat_v + synthetic_dataset$sat_m) - synthetic_dataset$sat_sum
print(sum(diff))
utility.gen(object = as.data.frame(synthetic_dataset), data = as.data.frame(sat))
utility.tables(object = as.data.frame(synthetic_dataset), data = as.data.frame(sat))
multi.compare(object = x, data = as.data.frame(sat), var = "sat_v", by = "sex")
multi.compare(object = x, data = as.data.frame(sat), var = "hs_gpa", by = "sex")
multi.compare(object = x, data = as.data.frame(sat), var = "sat_m", by="sex", cont.type = "boxplot")