Skip to content

Commit

Permalink
Merge pull request #23 from tbeason/newrules
Browse files Browse the repository at this point in the history
new reading heuristics, more tests, bump version 0.2
  • Loading branch information
tbeason authored Oct 11, 2020
2 parents c081f34 + 0d331f4 commit 7e2ca9a
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FamaFrenchData"
uuid = "bd2a388e-9788-4ef7-9fc3-f4c919ffde82"
authors = ["Tyler Beason <tbeas12@gmail.com>"]
version = "0.1.5"
version = "0.2.0"

[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Expand Down
76 changes: 53 additions & 23 deletions src/FamaFrenchData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,31 +120,16 @@ Returns three pieces:
- `filenotes::String` - notes at the top of the file
"""
function parsefile(lines;kwargs...)
csvopt = (missingstrings = ["-99.99","-999"],normalizenames = true,kwargs...)
csvopt = (missingstrings = ["-99.99","-999"],normalizenames = false,kwargs...)

stringarray = readlines(lines,keep=true)
striparray = strip.(stringarray)
splits = split.(stringarray,",")
ncol = maximum(length(s) for s in splits)
ncols = length.(splits)

trows = findall(s->length(s)==ncol,splits)
breaks = findall(!isequal(1),diff(trows))
nbreaks = length(breaks)
ntables = nbreaks + 1
tranges = contiguousblocks(ncols)
ntables = length(tranges)

tranges = Vector{UnitRange{Int64}}(undef,ntables)

if ntables == 1
tranges[1] = trows[1]:trows[end]
else
tranges[1] = trows[1]:trows[breaks[1]]
tranges[ntables] = trows[breaks[end]+1]:trows[end]
if ntables > 2
for n in 2:ntables-1
tranges[n] = trows[breaks[n-1]+1]:trows[breaks[n]]
end
end
end
allempty = findall(isempty,striparray)
lastempty = [lastornothing(filter(x -> x < k,allempty)) for k in first.(tranges)]
nameranges = [isnothing(lastempty[i]) ? nothing : range(lastempty[i],first(tranges[i])-1,step=1) for i in 1:ntables]
Expand All @@ -157,18 +142,63 @@ function parsefile(lines;kwargs...)
close(ios)
end
if isnothing(first(nameranges))
notestop = trows[1]-1
tablenotes = [i==1 ? "" : string(striparray[nameranges[i]]...) for i in 1:ntables]
notestop = tranges[1][1]-1
tablenotes = [i==1 ? "" : strip_rn(string(striparray[nameranges[i]]...)) for i in 1:ntables]
else
tablenotes = [string(striparray[nameranges[i]]...) for i in 1:ntables]
tablenotes = [strip_rn(string(striparray[nameranges[i]]...)) for i in 1:ntables]
notestop = first(lastempty)
end
filenotes = string(lines.name," \n",striparray[1:notestop]...)
filenotes = strip_rn(string(lines.name," ",strip.(stringarray[1:notestop])...))

return dfvec,tablenotes,filenotes
end


strip_rn(s) = foldl(replace,["\r"=>"", "\n"=>" "],init=s)



"""
contiguousblocks(x)
Takes in a vector of `Int` and returns the portions containing contiguous blocks (sequences that increase by 1).
Requires a sequence to have length 3 or greater.
"""
function contiguousblocks(x::AbstractVector{T}) where {T}
M = maximum(x)
L = length(x)
tranges = Vector{UnitRange{Int64}}(undef,0)
for i in 2:M
tmpi = findall(==(i),x)
isempty(tmpi) && continue
starti = tmpi[1]
lasti = tmpi[1]
for (j,n) in enumerate(tmpi)
j==1 && continue
distance = n-lasti
if distance==1
lasti = n
if j==length(tmpi)
R = starti:lasti
# println(R)
length(R)>=3 && push!(tranges,R)
end
continue
else
R = starti:lasti
# println(R)
length(R)>=3 && push!(tranges,R)
starti = n
lasti = n
end
end

end
return tranges
end



"""
pathtoFamaFrench(ffn)
Expand Down
28 changes: 28 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,30 @@ using Test
@test length(tablesff) == 2

@test ncol(tablesff[1]) == 5

@test tablesff[1].Date[1] == 192607

@test tablesff[2].Date[1] == 1927

tablesff, tablenotesff, filenotesff = readFamaFrench("F-F_Momentum_Factor")
@test length(tablesff) == 2

@test ncol(tablesff[1]) == 2

tablesff, tablenotesff, filenotesff = readFamaFrench("25_Portfolios_5x5")
@test length(tablesff) == 10

@test ncol(tablesff[1]) == 26

@test tablesff[1].Date[1] == 192607


tablesff, tablenotesff, filenotesff = readFamaFrench("ME_Breakpoints",header=false)
@test length(tablesff) == 1

@test ncol(tablesff[1]) == 22

@test tablesff[1].Date[1] == 192512
end

@testset "Download" begin
Expand All @@ -37,6 +61,10 @@ using Test
end

@testset "Unexported" begin
@test FamaFrenchData.strip_rn("abc 123\r\n") == "abc 123 "

@test FamaFrenchData.contiguousblocks([1,2,3,1,1,1,2,2,2,2,1,2,2,2,2,2]) == [7:10, 12:16]

@test FamaFrenchData.lastornothing([1;2]) == 2

@test FamaFrenchData.pathtoFamaFrench("F-F_Research_Data_Factors") == "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
Expand Down

2 comments on commit 7e2ca9a

@tbeason
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

New reading heuristics.
Column names no longer modified from original headings.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/22797

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.0 -m "<description of version>" 7e2ca9a4847046f55a8d69aaf9ce8740aa8e3079
git push origin v0.2.0

Please sign in to comment.