Skip to content

Commit

Permalink
Fix: Readability.published_at and Readability.title works as expected (
Browse files Browse the repository at this point in the history
  • Loading branch information
vkryukov authored Nov 12, 2024
1 parent e9a80fc commit e3c152b
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 23 deletions.
4 changes: 2 additions & 2 deletions lib/readability.ex
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ defmodule Readability do
@spec title(binary | html_tree) :: binary
def title(raw_html) when is_binary(raw_html) do
raw_html
|> Floki.parse_document()
|> Floki.parse_document!()
|> title
end

Expand Down Expand Up @@ -183,7 +183,7 @@ defmodule Readability do
@spec published_at(binary | html_tree) :: %DateTime{} | %Date{} | nil
def published_at(raw_html) when is_binary(raw_html) do
raw_html
|> Floki.parse_document()
|> Floki.parse_document!()
|> published_at()
end

Expand Down
28 changes: 20 additions & 8 deletions test/readability/author_finder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,35 @@ defmodule Readability.AuthoFinderTest do

alias Readability.AuthorFinder

defp test_fixture(file_name, expected_authors) do
html = TestHelper.read_fixture(file_name)
assert Readability.authors(html) == expected_authors

parsed_html = TestHelper.read_parse_fixture(file_name)
assert AuthorFinder.find(parsed_html) == expected_authors
end

test "extracting bbc format author" do
html = TestHelper.read_parse_fixture("bbc.html")
assert AuthorFinder.find(html) == ["BBC News"]
test_fixture("bbc.html", ["BBC News"])
end

test "extracting buzzfeed format author" do
html = TestHelper.read_parse_fixture("buzzfeed.html")
assert AuthorFinder.find(html) == ["Salvador Hernandez", "Hamza Shaban"]
test_fixture("buzzfeed.html", ["Salvador Hernandez", "Hamza Shaban"])
end

test "extracting medium format author" do
html = TestHelper.read_parse_fixture("medium.html")
assert AuthorFinder.find(html) == ["Ken Mazaika"]
test_fixture("medium.html", ["Ken Mazaika"])
end

test "extracting nytimes format author" do
html = TestHelper.read_parse_fixture("nytimes.html")
assert AuthorFinder.find(html) == ["Judith H. Dobrzynski"]
test_fixture("nytimes.html", ["Judith H. Dobrzynski"])
end

test "extracting pubmed format author" do
test_fixture("pubmed.html", ["Meno H ", "et al."])
end

# test "extracting elixir format author" do
# test_fixture("elixir.html", ["José Valim"])
# end
end
25 changes: 12 additions & 13 deletions test/readability/published_at_finder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,30 @@ defmodule Readability.PublishedAtFinderTest do

alias Readability.PublishedAtFinder

test "extracting bbc format published at" do
html = TestHelper.read_parse_fixture("bbc.html")
defp test_fixture(file_name, expected_published_at) do
html = TestHelper.read_fixture(file_name)
assert Readability.published_at(html) == expected_published_at
parsed_html = TestHelper.read_parse_fixture(file_name)
assert PublishedAtFinder.find(parsed_html) == expected_published_at
end

assert PublishedAtFinder.find(html) == nil
test "extracting bbc format published at" do
test_fixture("bbc.html", nil)
end

test "extracting buzzfeed format published at" do
html = TestHelper.read_parse_fixture("buzzfeed.html")

assert PublishedAtFinder.find(html) == nil
test_fixture("buzzfeed.html", nil)
end

test "extracting elixir format published at" do
html = TestHelper.read_parse_fixture("elixir.html")

assert PublishedAtFinder.find(html) == nil
test_fixture("elixir.html", nil)
end

test "extracting medium format published at" do
html = TestHelper.read_parse_fixture("medium.html")
assert PublishedAtFinder.find(html) == ~U[2015-01-31 22:58:05.645Z]
test_fixture("medium.html", ~U[2015-01-31 22:58:05.645Z])
end

test "extracting nytimes format published at" do
html = TestHelper.read_parse_fixture("nytimes.html")
assert PublishedAtFinder.find(html) == ~D[2016-03-16]
test_fixture("nytimes.html", ~D[2016-03-16])
end
end
16 changes: 16 additions & 0 deletions test/readability/title_finder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ defmodule Readability.TitleFinderTest do
assert title == "og title"
end

test "extracts most proper title from an html string" do
assert Readability.title(@html) == "og title"
end

test "extracts regular title from an html string" do
html = """
<html>
<head>
<title>Tag title - test</title>
</head>
</html>
"""

assert Readability.title(html) == "Tag title"
end

test "extract og title" do
title = Readability.TitleFinder.og_title(@html_tree)
assert title == "og title"
Expand Down

0 comments on commit e3c152b

Please sign in to comment.