Skip to content

Commit

Permalink
Merge pull request #95 from yanlesin/issue-2024-Q3
Browse files Browse the repository at this point in the history
fix issue #94
  • Loading branch information
yanlesin authored Oct 9, 2024
2 parents 6f66740 + bcba4d7 commit 6881941
Show file tree
Hide file tree
Showing 14 changed files with 49 additions and 218 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: SEC13Flist
Title: Functions to Work with Official List of SEC Section 13(f) Securities and Security Identifiers
Version: 1.0.1
Version: 1.1.0
Authors@R:
person(given = "Yan",
family = "Lyesin",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# SEC13Flist

## 1.1.0

* significant change: `SEC_13F_list` function no longer attempts to determine current year and quarter for the official list - it will produce an error when no year and quarter supplied to the function call

## 1.0.1

* fixed issue related to change in landing page of sec.gov for Official List of securities
Expand Down
35 changes: 17 additions & 18 deletions R/sec_13f_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,36 @@
#' \dontrun{library(SEC13Flist)
#' SEC_13F_list_2018_Q3 <- SEC_13F_list(2018,3) #Parse list for Q3 2018 without progress indicator
#' SEC_13F_list_2018_Q3_ <- SEC_13F_list(2018,3,TRUE) #Parse list with progress indicator
#' SEC_13F_list_current <- SEC_13F_list() #Parse current list from SEC.gov
#' }
#' @useDynLib SEC13Flist, .registration = TRUE
#' @importFrom Rcpp sourceCpp

SEC_13F_list <- function(YEAR_,QUARTER_, show_progress = FALSE){

url_SEC <- "https://www.sec.gov/divisions/investment/13flists.htm"

html_page <- readLines(url_SEC)
html_line <- html_page[grep("Current List", html_page)]
url <- sub("<a href=\"(.*)\">.*", "\\1", html_line)
current_list_url <- sub("^.*https", "https", url)

current_year <- substr(current_list_url,nchar(current_list_url)-9,nchar(current_list_url)-6) |>
as.integer()

current_quarter <- substr(current_list_url,nchar(current_list_url)-4,nchar(current_list_url)-4) |>
as.integer()
# url_SEC <- "https://www.sec.gov/divisions/investment/13flists.htm"
#
# html_page <- readLines(url_SEC)
# html_line <- html_page[grep("Current List", html_page)]
# url <- sub("<a href=\"(.*)\">.*", "\\1", html_line)
# current_list_url <- sub("^.*https", "https", url)
#
# current_year <- substr(current_list_url,nchar(current_list_url)-9,nchar(current_list_url)-6) |>
# as.integer()
#
# current_quarter <- substr(current_list_url,nchar(current_list_url)-4,nchar(current_list_url)-4) |>
# as.integer()

error_msg <- "Error: sec_13f_list function requires Year and Quarter to read the list from website"

if (missing(YEAR_)) {
YEAR_ <- current_year
warning("Default year: ", YEAR_)
stop(error_msg)
}

if (missing(QUARTER_)) {
QUARTER_ <- current_quarter
warning("Default quarter: ", QUARTER_)
stop(error_msg)
}

url_file <- url_file_func(YEAR_,QUARTER_,current_year,current_quarter)
url_file <- url_file_func(YEAR_,QUARTER_)

text <- pdftools::pdf_text(url_file)

Expand Down
57 changes: 2 additions & 55 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,8 @@ line_separator_func <- function(CR, LF, text)
}

url_file_func <- function(YEAR_,
QUARTER_,
current_year,
current_quarter)
QUARTER_)
{
if (missing(YEAR_) &
current_year == 0 |
missing(QUARTER_) &
current_quarter == 0)
stop(
"Error: Unable to determine current year or quarter. Please supply YEAR and QUARTER in function call and report this error"
)

if (missing(YEAR_)) {
YEAR_ <- current_year
warning("Default year: ", YEAR_)
}

if (missing(QUARTER_)) {
QUARTER_ <- current_quarter
warning("Default quarter: ", QUARTER_)
}

#0,0 supplied in function call
if (YEAR_ == 0 |
QUARTER_ == 0)
stop(
"Error: Please supply integer values for YEAR_ and QUARTER_ starting in 2004 Q1. Example: SEC_13F_list(2004, 1)"
)

#Validating inputs to the function
YEAR_ <- as.integer(YEAR_)
QUARTER_ <- as.integer(QUARTER_)
Expand All @@ -59,35 +32,9 @@ url_file_func <- function(YEAR_,
stop(
"Error: SEC_13F_list function only works with SEC list files starting at Q1 2004. Example: SEC_13F_list(2004, 1)"
)
if (QUARTER_ > 4)
if (QUARTER_ > 4 | QUARTER_ < 1)
stop("Error: Please, supply integer number for QUARTER_ in range between 1 and 4")

if (current_year != 0)
(if (YEAR_ > current_year)
stop (
paste0(
"Error: no list available for year ",
YEAR_,
". Please, use integer number in range 2004..",
current_year
)
))

if (current_quarter != 0)
(if (YEAR_ == current_year &
QUARTER_ > current_quarter)
stop (
paste0(
"Error: no list available for year ",
YEAR_,
" and quarter ",
QUARTER_,
". Last available quarter for current year - ",
current_quarter,
"."
)
))

if (YEAR_ == 2004 & QUARTER_ == 1)
{
file_name <- "13f-list.pdf"
Expand Down
7 changes: 2 additions & 5 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,13 @@ library(tidyverse)
## Return list for Q3 2018
SEC13Flist_2018_Q3 <- SEC_13F_list(2018,3)
## Current list form SEC website
SEC13Flist_current <- SEC_13F_list() #Current list form SEC website
## Customizing
SEC13Flist_current <- SEC_13F_list() |>
SEC13Flist_current <- SEC_13F_list(2023, 3) |>
filter(STATUS!="DELETED") |> #Filter out records with STATUS "DELETED"
select(-YEAR,-QUARTER) #Remove YEAR and QUARTER columns
## Verifying CUSIP
verify_CUSIP <- SEC_13F_list() |>
verify_CUSIP <- SEC_13F_list(2023, 3) |>
rowwise() |> ##CUSIPs are not unique, isCusip function is not vectorized and requires single nine character CUSIP as input
mutate(VALID_CUSIP=isCusip(CUSIP)) ##validating CUSIP
Expand Down
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

<!-- README.md is generated from README.Rmd. Please edit that file -->

<!-- badges: start -->

[![R build
Expand Down Expand Up @@ -82,16 +83,13 @@ library(tidyverse)
## Return list for Q3 2018
SEC13Flist_2018_Q3 <- SEC_13F_list(2018,3)

## Current list form SEC website
SEC13Flist_current <- SEC_13F_list() #Current list form SEC website

## Customizing
SEC13Flist_current <- SEC_13F_list() |>
SEC13Flist_current <- SEC_13F_list(2023, 3) |>
filter(STATUS!="DELETED") |> #Filter out records with STATUS "DELETED"
select(-YEAR,-QUARTER) #Remove YEAR and QUARTER columns

## Verifying CUSIP
verify_CUSIP <- SEC_13F_list() |>
verify_CUSIP <- SEC_13F_list(2023, 3) |>
rowwise() |> ##CUSIPs are not unique, isCusip function is not vectorized and requires single nine character CUSIP as input
mutate(VALID_CUSIP=isCusip(CUSIP)) ##validating CUSIP
```
Expand Down
1 change: 1 addition & 0 deletions SEC13Flist.Rproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Version: 1.0
ProjectId: e84fe728-17c3-49d6-8543-eb5c867b5ed3

RestoreWorkspace: No
SaveWorkspace: No
Expand Down
1 change: 0 additions & 1 deletion man/SEC_13F_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat/test-Pass_0_0_into_function_call.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ test_that("0,0 and other into function call", {
expect_error(SEC13Flist::SEC_13F_list(0,1))
expect_error(SEC13Flist::SEC_13F_list(2004,"a"))
expect_error(SEC13Flist::SEC_13F_list("b","2"))
expect_error(SEC13Flist::SEC_13F_list())
})
59 changes: 3 additions & 56 deletions tests/testthat/test-SEC_row_count.R
Original file line number Diff line number Diff line change
@@ -1,71 +1,18 @@
test_that("Parsed row count equal to total row count per PDF list", {
options(HTTPUserAgent="Yan Lyesin, SEC13Flist Package for R, yan.lyesin@gmail.com ")
url_SEC <- "https://www.sec.gov/divisions/investment/13flists.htm"
YEAR_ <- 2024
QUARTER_ <- 3

html_page <- readLines(url_SEC)
html_line <- html_page[grep("Current List", html_page)]
url <- sub("<a href=\"(.*)\">.*", "\\1", html_line)
current_list_url <- sub("^.*https", "https", url)

current_year <- substr(current_list_url,nchar(current_list_url)-9,nchar(current_list_url)-6) |>
as.integer()

current_quarter <- substr(current_list_url,nchar(current_list_url)-4,nchar(current_list_url)-4) |>
as.integer()

#if (missing(YEAR_)&current_year==0|missing(QUARTER_)&current_quarter==0) stop("Error: Unable to determine current year or quarter. Please supply YEAR and QUARTER in function call and report this error")


YEAR_ <- current_year
# warning("Default year: ", YEAR_)

QUARTER_ <- current_quarter
# warning("Default quarter: ", QUARTER_)

#0,0 supplied in function call
if (YEAR_==0|QUARTER_==0) stop("Error: Please supply integer values for YEAR_ and QUARTER_ starting in 2004 Q1. Example: SEC_13F_list(2004, 1)")

#Validating inputs to the function
YEAR_ <- as.integer(YEAR_)
QUARTER_ <- as.integer(QUARTER_)

if (is.na(YEAR_)|is.na(QUARTER_)) stop("Error: Please supply integer values for YEAR_ and QUARTER_ starting in 2004 Q1. Example: SEC_13F_list(2004, 1)")

if (YEAR_<2004) stop("Error: SEC_13F_list function only works with SEC list files starting at Q1 2004. Example: SEC_13F_list(2004, 1)")
if (QUARTER_>4) stop("Error: Please, supply integer number for QUARTER_ in range between 1 and 4")

if(current_year!=0) (if (YEAR_>current_year) stop (paste0("Error: no list available for year ",
YEAR_, ". Please, use integer number in range 2004..", current_year))
)

if(current_quarter!=0) (if (YEAR_==current_year&QUARTER_>current_quarter) stop (paste0("Error: no list available for year ",
YEAR_, " and quarter ", QUARTER_, ". Last available quarter for current year - ", current_quarter, "."))
)

if (YEAR_ == 2004 & QUARTER_ == 1)
{
file_name <- "13f-list.pdf"
url_file <-
paste0("https://www.sec.gov/divisions/investment/", file_name)
}
else if (YEAR_ < 2021 | (YEAR_ == 2021 & QUARTER_ <= 1))
{
file_name <- paste0('13flist', YEAR_, 'q', QUARTER_, '.pdf')
url_file <-
paste0("https://www.sec.gov/divisions/investment/13f/",
file_name)
} else {
file_name <- paste0('13flist', YEAR_, 'q', QUARTER_, '.pdf')
url_file <-
paste0("https://www.sec.gov/files/investment/",
file_name)
}
text <- pdftools::pdf_text(url_file)
page_total_count <- min(which(!(regexpr("Total Count:", text)) == -1))
total_count <- as.integer(gsub("[^0-9.-]", "", substr(text[page_total_count],
regexpr("Total Count: ", text[page_total_count])[1]+1, nchar(text[page_total_count]))))

total_count_parse <- nrow(SEC13Flist::SEC_13F_list())
total_count_parse <- nrow(SEC13Flist::SEC_13F_list(YEAR_, QUARTER_))

expect_equal(total_count, total_count_parse)
})
61 changes: 3 additions & 58 deletions tests/testthat/test-SEC_row_count_2004.R
Original file line number Diff line number Diff line change
@@ -1,74 +1,19 @@
test_that("Parsed row count equal to total row count per PDF list for 2004 Q1", {
options(HTTPUserAgent="Yan Lyesin, SEC13Flist Package for R, yan.lyesin@gmail.com ")
url_SEC <- "https://www.sec.gov/divisions/investment/13flists.htm"

html_page <- readLines(url_SEC)
html_line <- html_page[grep("Current List", html_page)]
url <- sub("<a href=\"(.*)\">.*", "\\1", html_line)
current_list_url <- sub("^.*https", "https", url)

current_year <- substr(current_list_url,nchar(current_list_url)-9,nchar(current_list_url)-6) |>
as.integer()

current_quarter <- substr(current_list_url,nchar(current_list_url)-4,nchar(current_list_url)-4) |>
as.integer()

#if (missing(YEAR_)&current_year==0|missing(QUARTER_)&current_quarter==0) stop("Error: Unable to determine current year or quarter. Please supply YEAR and QUARTER in function call and report this error")


YEAR_ <- 2004
# warning("Default year: ", YEAR_)

QUARTER_ <- 1
# warning("Default quarter: ", QUARTER_)

#0,0 supplied in function call
if (YEAR_==0|QUARTER_==0) stop("Error: Please supply integer values for YEAR_ and QUARTER_ starting in 2004 Q1. Example: SEC_13F_list(2004, 1)")

#Validating inputs to the function
YEAR_ <- as.integer(YEAR_)
QUARTER_ <- as.integer(QUARTER_)

if (is.na(YEAR_)|is.na(QUARTER_)) stop("Error: Please supply integer values for YEAR_ and QUARTER_ starting in 2004 Q1. Example: SEC_13F_list(2004, 1)")

if (YEAR_<2004) stop("Error: SEC_13F_list function only works with SEC list files starting at Q1 2004. Example: SEC_13F_list(2004, 1)")
if (QUARTER_>4) stop("Error: Please, supply integer number for QUARTER_ in range between 1 and 4")

if(current_year!=0) (if (YEAR_>current_year) stop (paste0("Error: no list available for year ",
YEAR_, ". Please, use integer number in range 2004..", current_year))
)

if(current_quarter!=0) (if (YEAR_==current_year&QUARTER_>current_quarter) stop (paste0("Error: no list available for year ",
YEAR_, " and quarter ", QUARTER_, ". Last available quarter for current year - ", current_quarter, "."))
)

if (YEAR_ == 2004 & QUARTER_ == 1)
{
file_name <- "13f-list.pdf"

file_name <- "13f-list.pdf"
url_file <-
paste0("https://www.sec.gov/divisions/investment/", file_name)
}
else
{
if (YEAR_ >= 2021 & QUARTER_ >= 2) {
file_name <- paste0('13flist', YEAR_, 'q', QUARTER_, '.pdf')
url_file <-
paste0("https://www.sec.gov/files/investment/",
file_name)
} else {
file_name <- paste0('13flist', YEAR_, 'q', QUARTER_, '.pdf')
url_file <-
paste0("https://www.sec.gov/divisions/investment/13f/",
file_name)
}
}

text <- pdftools::pdf_text(url_file)
page_total_count <- min(which(!(regexpr("Total Count:", text)) == -1))
total_count <- as.integer(gsub("[^0-9.-]", "", substr(text[page_total_count],
regexpr("Total Count: ", text[page_total_count])[1]+1, nchar(text[page_total_count]))))

total_count_parse <- nrow(SEC13Flist::SEC_13F_list(2004,1))
total_count_parse <- nrow(SEC13Flist::SEC_13F_list(YEAR_, QUARTER_))

expect_equal(total_count, total_count_parse)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-has_listed_options.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_that("list of possible values for HAS_LISTED_OPTION column", {
options(HTTPUserAgent="Yan Lyesin, SEC13Flist Package for R, yan.lyesin@gmail.com ")
list_all <- SEC13Flist::SEC_13F_list()
list_all <- SEC13Flist::SEC_13F_list(2023, 3)
list_all <- unique(list_all["HAS_LISTED_OPTION"])
list_HAS_LISTED_OPTION <- nrow(subset(list_all, !HAS_LISTED_OPTION %in% c("","*")))
expect_equal(list_HAS_LISTED_OPTION, 0)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-status.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ test_that("list of possible values for STATUS column", {
STATUS_1 <- ""
STATUS_2 <- "ADDED"
STATUS_3 <- "DELETED"
list_all <- SEC13Flist::SEC_13F_list()
list_all <- SEC13Flist::SEC_13F_list(2024, 3)
list_all <- unique(list_all["STATUS"])
list_STATUS <- nrow(subset(list_all, !STATUS %in% c(STATUS_1,STATUS_2,STATUS_3)))
expect_equal(list_STATUS, 0)
Expand Down
Loading

0 comments on commit 6881941

Please sign in to comment.