Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/judgelord/rulemaking
Browse files Browse the repository at this point in the history
  • Loading branch information
judgelord committed Mar 23, 2021
2 parents 5cb1ebe + ed746d1 commit b25ddf7
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 87 deletions.
6 changes: 3 additions & 3 deletions code/updateRdata.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ save(ejcommentsnew, file = here::here("data", "ejcommentsnew.Rdata"))
# subset comments

load("comment_metadata.Rdata")
names(comments_all)
names(comment_metadata)

org_comments <- comments_all %>% filter(!is.na(organization)) %>% count(organization, docket_id, number_of_comments_received)
org_comments <- comment_metadata %>% filter(!is.na(organization)) %>% distinct(organization, docket_id, number_of_comments_received)

head(org_comments)
dim(org_comments)
Expand All @@ -103,7 +103,7 @@ save(org_comments, file = here::here("data", "org_comments.Rdata"))



comments_min <- comments_all %>% distinct(document_id, organization,submitter_name, number_of_comments_received)
comments_min <- comment_metadata %>% distinct(id, organization,submitter_name, number_of_comments_received)

head(comments_min)
dim(comments_min)
Expand Down
145 changes: 138 additions & 7 deletions data/mergecomments.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,20 @@
# It joins Rdata subsets of comment metadata from the regulations.gov API
# These data are created by "functions/regulations-gov-get-all-comments.R"
source(here::here("setup.R"))
directory <- "ascending2"

load(here("ascending2", "lastcomments.Rdata"))
# third pull
directory <- "ascending3"

load(here(directory, "lastcomments.Rdata"))
head(d$postedDate)
all <- d

load(here(directory, "14000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "13500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "13000comments.Rdata"))
all %<>% full_join(d)

Expand Down Expand Up @@ -88,21 +97,143 @@ all %<>% full_join(d)
dim(all)

#####################################
# second pull
#####################################
directory <- "ascending2"

load(here(directory, "lastcomments.Rdata"))
all %<>% full_join(d)

load(here(directory, "13000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "12500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "12000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "11500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "11000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "10500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "10000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "9500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "9000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "8500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "8000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "7500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "7000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "6500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "6000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "5500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "5000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "4500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "4000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "3500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "3000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "2500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "2000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "1500comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "1000comments.Rdata"))
all %<>% full_join(d)

load(here(directory, "500comments.Rdata"))
all %<>% full_join(d)

dim(all)

#####################################


all %>% count(openForComment)
all %>% count(documentStatus)
all %>% count(allowLateComment)
all %>% count(documentType)

dim(all)

all %<>% select(-openForComment,
-allowLateComment,
-documentType) %>%
distinct()

# save all
save(all,
file = here(directory, "allcomments.Rdata"))
file = here("data", "allcomments2020.Rdata"))

comment_metadata <- all %>% namingthings()
names(comment_metadata)
dim(comment_metadata)
head(comment_metadata)

# save all
save(comment_metadata,
file = here("data", "comment_metadata2020.Rdata"))

comment_meta_min <- comment_metadata %>%
distinct(docket_id, id, posted_date, organization, title, submitter_name,
attachment_count, number_of_comments_received)

# save minimal set
save(comment_meta_min,
file = here("data", "comment_meta_min.Rdata"))

#####################################
# save comments with non-attachment text only to data folder
textcomments <- filter(all, nchar(commentText)> 240)

textcomments <- filter(comment_metadata, nchar(comment_text)> 240)
dim(textcomments)
save(textcomments,
file = here(directory, "textcomments.Rdata"))
file = here("data", "textcomments.Rdata"))

#####################################
# save mass comment campaigns only to data folder
mass <- filter(all, numberOfCommentsReceived > 99)
mass <- filter(comment_metadata, number_of_comments_received > 99)
dim(mass)
save(mass,
file = "data/masscomments.Rdata")
file = here("data", "comment_meta_mass.Rdata"))



11 changes: 6 additions & 5 deletions functions/regulations-gov-get-all-comments.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
source("setup.R")

# I keep keep data I have already downloaded in ascending order in a directory called "ascending"
directory <- "ascending2"
directory <- "ascending3"
list.files(here::here(directory))
# The api call below loads data in ascending order.
# To start with the earliest comment, set page to 1
Expand All @@ -20,7 +20,7 @@ url <- "https://api.data.gov"
rpp <- 1000 # 1000 = max results per page
order <- "ASC" # DESC = Decending, ASC = Ascending
sortby <- "postedDate" #docketId (Docket ID) docId (Document ID) title (Title) postedDate (Posted Date) agency (Agency) documentType (Document Type) submitterName (Submitter Name) organization (Organization)
pages <- c(1, (seq(1000000)*rpp)+1) # up to 100,000,000 results
pages <- c(1, (seq(100000000)*rpp)+1) # up to 100,000,000 results
documenttype <- "PS" # "N%2BPR%2BFR%2BPS%2BSR%2BO"
## N: Notice,
## PR: Proposed Rule,
Expand Down Expand Up @@ -101,7 +101,7 @@ while (error < 61) {
Sys.sleep(60)
}

# If call works, mege in new data
# If call works, merge in new data
if (raw.result$status_code == 200) {
# extract content to list
content <- fromJSON(rawToChar(raw.result$content))
Expand Down Expand Up @@ -139,7 +139,7 @@ while (error < 61) {
}# END LOOP

# Save last comments
load("lastcomments.Rdata")
save(d, "lastcomments.Rdata")
save(d, page, skip, file = here::here(directory, "lastcomments.Rdata") )
save.image()

Expand All @@ -148,5 +148,6 @@ save(d, file = "data/recentcomments.Rdata")

tail(d %>% drop_na(postedDate) %>% .$postedDate)


max(d$postedDate, na.rm = T)
min(d$postedDate, na.rm = T)

Loading

0 comments on commit b25ddf7

Please sign in to comment.