-
Notifications
You must be signed in to change notification settings - Fork 5
/
preview_dataset.R
50 lines (46 loc) · 1.91 KB
/
preview_dataset.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
default_csv_loader <- function(dataset_name) {
readr::read_csv(file.path("../data", dataset_name), guess_max = Inf,
show_col_types = FALSE)
}
#' Produce a nice preview table of the chosen dataset
#'
#' Reads the name of the data file from the `datafile` attribute of the page
#' metadata.
#'
#' @param n Number of rows of data to include in the table. This should not be
#' so large that the web page contains megabytes of data, but should contain
#' enough rows that readers can understand the format of the data.
#' @param dataset Either a dataframe or the name of a CSV file (in `data/`
#' directory) to load and preview. Defaults to the `datafile` attribute of the
#' page metadata.
#' @return A `paged_df` object that is automatically formatted by R Markdown as
#' a nice table.
preview_dataset <- function(n = 20, dataset = rmarkdown::metadata$datafile) {
if (is.character(dataset)) {
if (tolower(tools::file_ext(dataset)) == "csv") {
df <- default_csv_loader(dataset)
} else {
stop("preview by filename only implemented for csv files")
}
} else if (is.data.frame(dataset)) {
df <- dataset
} else {
stop("invalid dataset type, must be a string or a data frame")
}
df |>
head(n = n) |>
rmarkdown::paged_table(options = list(rownames.print = FALSE))
}
preview_datasets <- function(n = 20, datasets = rmarkdown::metadata$data$files) {
# datasets can be either a named list of dataframes, or a character vector
if (!is.list(datasets)) { #case where only a vector of names is provided
datasets <- sapply(datasets, function(d) { preview_dataset(n, d) },
simplify = FALSE)
}
# datasets is now a named list of dataframes
for (d in names(datasets)) {
url <- paste0("https://cmustatistics.github.io/data-repository/data/", d)
cat("<h4><a href=\"", url, "\">", d, "</a></h4>\n")
cat(rmarkdown:::print.paged_df(datasets[[d]]))
}
}