generated from BYUIDSconsulting/template_project_repo
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Treys_Attempts.R
70 lines (60 loc) · 2.44 KB
/
Treys_Attempts.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
library(tidyverse)
library(dplyr)
library(stringr)
library(stringi)
# Loading in the data
raw <- read_csv("https://raw.githubusercontent.com/BYUIDSconsulting/woodruff_stories/master/data/raw/2022-12-12-wwp-pages-export.csv")
View(raw)
# Filtering out just the journals
all_journals <- raw %>%
filter(`Document Type` == "Journals")
# Sorting the journals so they are more or less in order of when they were written
sorted <- all_journals %>%
arrange(`Internal ID`)
# This paste turns it into one long string
n_full_text <- paste(sorted$`Text Only Transcript`, collapse = "")
# This is the pattern that removes all posible ways that WW wrote his dates
pattern_trey <- "(([J|F|M|A|J|S|O|N|D][a-z]{3,8}\\s){1,2}\\d{1,2}(th|st|rd|nd)?,?(\\s\\d{4})?|[J|F|M|A|J|S|O|N|D][a-z]{3,8}\\s\\d{1,2}(th|st|rd|nd)?) ~"
# Removing of the dates
matches <- str_extract_all(n_full_text, pattern=pattern_trey) %>% unlist()
# Emma wrote this, i think that this removies the first NA
matches2 <- append(matches, NA, after = 0)
# This then resplits the data by date as opposed by page
text <- strsplit(n_full_text, split = pattern_trey) %>% unlist()
# This creates our database which is huge for us
papers <- data.frame(
date = matches2,
text = text
)
# Creating People Column
calvin <-raw%>%
filter(People != 'NA')
# Creating the list
real_people_list <- calvin$People
# creating one string
colpasts_people <- paste(real_people_list, collapse = "|")
# Splitting it by one person
franklin <-str_split(colpasts_people,'\\|')
# This unlisted it though im not sure it did anything
carl <- franklin %>% unlist()
# This creates unique list
new <-unique(carl)
# This extracts from the text all things that match any names in the list and put it into a new column
papers$names <- sapply(str_extract_all(papers$text, paste(new, collapse = "|")),
paste, collapse = ", ")
# This whole thing is the exact same code but done for the place
micheal <-raw%>%
filter(Places != 'NA')
view(micheal$Places)
real_place_list <- micheal$Places
view(real_place_list)
colpasts_place <- paste(real_place_list, collapse = "|")
delenore <-str_split(colpasts_place,'\\|')
rose <- delenore %>% unlist()
length(unique(carl))
newer <-unique(rose)
papers$places <- sapply(str_extract_all(papers$text, paste(newer, collapse = "|")),
paste, collapse = ", ")
papers$date <- substring(papers$date, 1, nchar(papers$date) - 1)
view(papers)
write.csv(papers, "", row.names=FALSE)