-
Notifications
You must be signed in to change notification settings - Fork 0
/
17_missing_indicator_calculator.R
58 lines (45 loc) · 2.01 KB
/
17_missing_indicator_calculator.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Missing indicator calculator (using hc57)
missing_hc57 <- lapply(merged_datasets, function(x) {
return(!"hc57" %in% names(x))
})
# Get the indices of dataframes missing `hc57`
missing_indices <- which(unlist(missing_hc57))
# Show the dataframes in the list that are missing `hc57`
missing_dataframes <- merged_datasets[missing_indices]
# Print the SurveyId of the dataframes missing `hc57`
for (i in seq_along(missing_dataframes)) {
print(paste("SurveyId with missing hc57:", missing_dataframes[[i]]$SurveyId[1]))
}
# Optionally, display the first few rows of the dataframes missing `hc57`
for (df in missing_dataframes) {
print(head(df))
}
# For NA values
# Identify and print dataframes where `hc57` is present but contains only `NA` values
na_hc57 <- lapply(merged_datasets, function(x) {
return("hc57" %in% names(x) && all(is.na(x$hc57)))
})
# Get the indices of dataframes where `hc57` is all `NA`
na_indices <- which(unlist(na_hc57))
# Show the dataframes in the list where `hc57` is all `NA`
na_dataframes <- merged_datasets[na_indices]
# Print the SurveyId of the dataframes where `hc57` is all `NA`
for (i in seq_along(na_dataframes)) {
print(paste("SurveyId with all NAs in hc57:", na_dataframes[[i]]$SurveyId[1]))
}
# Optionally, display the first few rows of the dataframes where `hc57` is all `NA`
for (df in na_dataframes) {
print(head(df))
}
# Removing dataframes where `hc57` is missing or all `NA`
na_or_missing_hc57 <- lapply(merged_datasets, function(x) {
return(!"hc57" %in% names(x) || all(is.na(x$hc57)))
})
# Get the indices of dataframes where `hc57` is missing or all `NA`
na_or_missing_indices <- which(unlist(na_or_missing_hc57))
# Remove the dataframes where `hc57` is missing or all `NA`
filtered_datasets <- merged_datasets[-na_or_missing_indices]
# Verify the removal by printing the SurveyId of the remaining datasets
for (i in seq_along(filtered_datasets)) {
print(paste("Remaining SurveyId:", filtered_datasets[[i]]$SurveyId[1]))
}