forked from oharac/eds_211_parallel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parallel_markdown.Rmd
124 lines (98 loc) · 2.41 KB
/
parallel_markdown.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
---
title: "Parallel Playground"
author: "Elke Windschitl"
date: "2023-02-06"
output: html_document
---
```{r setup, echo = TRUE, message = FALSE, warning = TRUE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(parallel)
library(furrr)
library(raster)
library(tictoc)
```
## Do a slow thing
```{r}
# make a list of files
rast_fs <- list.files('data',full.names = TRUE)
# read in raster
r <- raster::raster(rast_fs[1])
plot(r)
df <- as.data.frame(r, xy = TRUE) %>%
setNames(c('x', 'y', 'z')) %>%
drop_na() %>%
mutate(type = case_when(z < .1 ~ 'low',
z < .25 ~ 'medium',
z < .5 ~ 'high',
TRUE ~ 'vhi')) # anything else is high
summary_df <- df %>%
group_by(type) %>%
summarize(n_cells = n())
```
### For Loop
```{r}
system.time({ # request time elapsed for the following
out_list_loop <- vector('list', length = length(rast_fs)) %>%
setNames(rast_fs)
for(f in rast_fs) {
r <- raster::raster(f)
df <- as.data.frame(r, xy = TRUE) %>%
setNames(c('x', 'y', 'z')) %>%
drop_na() %>%
mutate(type = case_when(z < .1 ~ 'low',
z < .25 ~ 'medium',
z < .5 ~ 'high',
TRUE ~ 'vhi'))
summary_df <- df %>%
group_by(type) %>%
summarize(n_cells = n(),
f = basename(f))
out_list_loop[[f]] <- summary_df
}
})
out_df_loop <- bind_rows(out_list_loop)
```
## Create a function
```{r}
process_rast_file <- function(f) {
r <- raster::raster(r)
df <- as.data.frame(r, xy = TRUE) %>%
setNames(c('x', 'y', 'z')) %>%
drop_na() %>%
mutate(type = case_when(z < .1 ~ "low",
z < .25 ~ "medium",
z < .5 ~ "high",
TRUE ~ "vhi"))
summary_df <- df %>%
group_by(type) %>%
summarize(n_cells = n(),
f = basename(f))
return(summary_df)
}
```
### lapply
```{r}
tic()
out_list_lapply <- lapply(X = rast_fs, FUN = process_rast_file)
toc()
```
### mclapply
```{r}
tic()
out_list_mcapply <- parallel::mclapply(X = rast_fs, FUN = process_rast_file, mc.cores = 4)
toc()
```
### purrr
```{r}
tic()
out_list_purrr <- purrr::map(.x = rast_fs, .f = process_rast_file)
toc()
```
### furrr
```{r}
plan(multisession, workers = 4)
tic()
out_list_purrr <- furrr::future_map(.x = rast_fs, .f = process_rast_file)
toc()
```