Skip to content

Commit

Permalink
Merge branch 'master' into parser
Browse files Browse the repository at this point in the history
Merged with the current master:

* Implemented behavior for Onigumo.Parser.
* Added Parser to CLI and environment.
  • Loading branch information
Glutexo committed May 10, 2024
2 parents 7223f36 + 0d890b9 commit 493e10f
Show file tree
Hide file tree
Showing 13 changed files with 162 additions and 41 deletions.
2 changes: 2 additions & 0 deletions config/dev.exs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import Config

config(:onigumo, :http_client, HTTPoison)
config(:onigumo, :downloader, Onigumo.Downloader)
config(:onigumo, :parser, Onigumo.Parser)
1 change: 1 addition & 0 deletions config/test.exs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import Config

config(:onigumo, :http_client, HTTPoisonMock)
config(:onigumo, :downloader, OnigumoDownloaderMock)
38 changes: 34 additions & 4 deletions lib/cli.ex
Original file line number Diff line number Diff line change
@@ -1,7 +1,37 @@
defmodule Onigumo.CLI do
def main([component]) do
module = Module.safe_concat("Onigumo", component)
root_path = File.cwd!()
module.main(root_path)
@components %{
:downloader => Application.compile_env(:onigumo, :downloader),
:parser => Application.compile_env(:onigumo, :parser)
}

def main(argv) do
case OptionParser.parse(
argv,
aliases: [C: :working_dir],
strict: [working_dir: :string]
) do
{parsed_switches, [component], []} ->
{:ok, module} = Map.fetch(@components, String.to_atom(component))
working_dir = Keyword.get(parsed_switches, :working_dir, File.cwd!())
module.main(working_dir)

_ ->
usage_message()
end
end

defp usage_message() do
components = Enum.join(Map.keys(@components), ", ")

IO.puts("""
Usage: onigumo [OPTION]... [COMPONENT]
Simple program that retrieves HTTP web content as structured data.
COMPONENT\tOnigumo component to run, available: #{components}
OPTIONS:
-C, --working-dir <dir>\tChange working dir to <dir> before running
""")
end
end
4 changes: 4 additions & 0 deletions lib/onigumo/component.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
defmodule Onigumo.Component do
@doc "Runs the component."
@callback main(root_path :: String.t()) :: :ok
end
2 changes: 2 additions & 0 deletions lib/onigumo/downloader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ defmodule Onigumo.Downloader do
@moduledoc """
Web scraper
"""
@behaviour Onigumo.Component

@impl Onigumo.Component
def main(root_path) do
http_client().start()

Expand Down
4 changes: 3 additions & 1 deletion lib/onigumo/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ defmodule Onigumo.Parser do
@moduledoc """
Web scraper
"""
@behaviour Onigumo.Component

@impl Onigumo.Component
def main(root_path) do
root_path
|> list_downloaded()
Expand All @@ -17,6 +19,6 @@ defmodule Onigumo.Parser do

defp is_downloaded(path) do
suffix = Application.get_env(:onigumo, :downloaded_suffix)
Path.extname(path) == ".#{suffix}"
Path.extname(path) == suffix
end
end
13 changes: 11 additions & 2 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ defmodule Onigumo.MixProject do
use Mix.Project

def project do
env = Mix.env()

[
app: :onigumo,
version: "0.1.0",
elixir: "~> 1.10",
start_permanent: Mix.env() == :prod,
start_permanent: env == :prod,
deps: deps(),
escript: escript()
escript: escript(),
elixirc_paths: elixirc_paths(env)
]
end

Expand Down Expand Up @@ -37,4 +40,10 @@ defmodule Onigumo.MixProject do
main_module: Onigumo.CLI
]
end

defp elixirc_paths(:test), do: elixirc_paths_default() ++ ["test/support"]

defp elixirc_paths(_), do: elixirc_paths_default()

defp elixirc_paths_default(), do: Mix.Project.config()[:elixirc_paths]
end
12 changes: 6 additions & 6 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
%{
"certifi": {:hex, :certifi, "2.9.0", "6f2a475689dd47f19fb74334859d460a2dc4e3252a3324bd2111b8f0429e7e21", [:rebar3], [], "hexpm", "266da46bdb06d6c6d35fde799bcb28d36d985d424ad7c08b5bb48f5b5cdd4641"},
"floki": {:hex, :floki, "0.32.1", "dfe3b8db3b793939c264e6f785bca01753d17318d144bd44b407fb3493acaa87", [:mix], [{:html_entities, "~> 0.5.0", [hex: :html_entities, repo: "hexpm", optional: false]}], "hexpm", "d4b91c713e4a784a3f7b1e3cc016eefc619f6b1c3898464222867cafd3c681a3"},
"hackney": {:hex, :hackney, "1.18.1", "f48bf88f521f2a229fc7bae88cf4f85adc9cd9bcf23b5dc8eb6a1788c662c4f6", [:rebar3], [{:certifi, "~>2.9.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.3.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "a4ecdaff44297e9b5894ae499e9a070ea1888c84afdd1fd9b7b2bc384950128e"},
"certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"},
"floki": {:hex, :floki, "0.35.4", "cc947b446024732c07274ac656600c5c4dc014caa1f8fb2dfff93d275b83890d", [:mix], [], "hexpm", "27fa185d3469bd8fc5947ef0f8d5c4e47f0af02eb6b070b63c868f69e3af0204"},
"hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~>2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~>6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~>1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~>1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~>1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"},
"html_entities": {:hex, :html_entities, "0.5.2", "9e47e70598da7de2a9ff6af8758399251db6dbb7eebe2b013f2bbd2515895c3c", [:mix], [], "hexpm", "c53ba390403485615623b9531e97696f076ed415e8d8058b1dbaa28181f4fdcc"},
"httpoison": {:hex, :httpoison, "1.8.2", "9eb9c63ae289296a544842ef816a85d881d4a31f518a0fec089aaa744beae290", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "2bb350d26972e30c96e2ca74a1aaf8293d61d0742ff17f01e0279fef11599921"},
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~>0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
"metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"},
"mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"},
"mox": {:hex, :mox, "1.0.2", "dc2057289ac478b35760ba74165b4b3f402f68803dd5aecd3bfd19c183815d64", [:mix], [], "hexpm", "f9864921b3aaf763c8741b5b8e6f908f44566f1e427b2630e89e9a73b981fef2"},
"parse_trans": {:hex, :parse_trans, "3.3.1", "16328ab840cc09919bd10dab29e431da3af9e9e7e7e6f0089dd5a2d2820011d8", [:rebar3], [], "hexpm", "07cd9577885f56362d414e8c4c4e6bdf10d43a8767abb92d24cbe8b24c54888b"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.6", "cf344f5692c82d2cd7554f5ec8fd961548d4fd09e7d22f5b62482e5aeaebd4b0", [:make, :mix, :rebar3], [], "hexpm", "bdb0d2471f453c88ff3908e7686f86f9be327d065cc1ec16fa4540197ea04680"},
"mox": {:hex, :mox, "1.1.0", "0f5e399649ce9ab7602f72e718305c0f9cdc351190f72844599545e4996af73c", [:mix], [], "hexpm", "d44474c50be02d5b72131070281a5d3895c0e7a95c780e90bc0cfe712f633a13"},
"parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"},
}
68 changes: 68 additions & 0 deletions test/onigumo_cli_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
defmodule OnigumoCLITest do
use ExUnit.Case
import ExUnit.CaptureIO
import Mox

@invalid_arguments [
"Downloader",
"uploader"
]

@invalid_switches [
"--invalid",
"-c"
]

@working_dir_switches [
"--working-dir",
"-C"
]

describe("Onigumo.CLI.main/1") do
for argument <- @invalid_arguments do
test("run CLI with invalid argument #{inspect(argument)}") do
assert_raise(MatchError, fn -> Onigumo.CLI.main([unquote(argument)]) end)
end
end

test("run CLI with no arguments") do
assert usage_message_printed?(fn -> Onigumo.CLI.main([]) end)
end

test("run CLI with more than one argument") do
assert usage_message_printed?(fn -> Onigumo.CLI.main(["Downloader", "Parser"]) end)
end

for switch <- @invalid_switches do
test("run CLI with invalid switch #{inspect(switch)}") do
assert usage_message_printed?(fn -> Onigumo.CLI.main([unquote(switch)]) end)
end
end

@tag :tmp_dir
test("run CLI with 'downloader' argument passing cwd", %{tmp_dir: tmp_dir}) do
expect(OnigumoDownloaderMock, :main, fn working_dir -> working_dir end)

File.cd(tmp_dir)
assert Onigumo.CLI.main(["downloader"]) == tmp_dir
end

for switch <- @working_dir_switches do
@tag :tmp_dir
test("run CLI 'downloader' with #{inspect(switch)} switch", %{tmp_dir: tmp_dir}) do
expect(OnigumoDownloaderMock, :main, fn working_dir -> working_dir end)

assert Onigumo.CLI.main(["downloader", unquote(switch), tmp_dir]) == tmp_dir
end

test("run CLI 'downloader' with #{inspect(switch)} without any value") do
assert usage_message_printed?(fn -> Onigumo.CLI.main(["downloader", unquote(switch)]) end)
end
end

defp usage_message_printed?(function) do
output = capture_io(function)
String.starts_with?(output, "Usage: onigumo ")
end
end
end
40 changes: 12 additions & 28 deletions test/onigumo_downloader_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ defmodule OnigumoDownloaderTest do
@tag :tmp_dir
test("run Downloader", %{tmp_dir: tmp_dir}) do
expect(HTTPoisonMock, :start, fn -> nil end)
expect(HTTPoisonMock, :get!, length(@urls), &prepare_response/1)
expect(HTTPoisonMock, :get!, length(@urls), &HttpSupport.response/1)

input_path_env = Application.get_env(:onigumo, :input_path)
input_path_tmp = Path.join(tmp_dir, input_path_env)
input_file_content = prepare_input(@urls)
input_file_content = InputSupport.url_list(@urls)
File.write!(input_path_tmp, input_file_content)

Onigumo.Downloader.main(tmp_dir)
Expand All @@ -30,11 +30,11 @@ defmodule OnigumoDownloaderTest do
describe("Onigumo.Downloader.create_download_stream/1") do
@tag :tmp_dir
test("download URLs from the input file with a created stream", %{tmp_dir: tmp_dir}) do
expect(HTTPoisonMock, :get!, length(@urls), &prepare_response/1)
expect(HTTPoisonMock, :get!, length(@urls), &HttpSupport.response/1)

input_path_env = Application.get_env(:onigumo, :input_path)
input_path_tmp = Path.join(tmp_dir, input_path_env)
input_file_content = prepare_input(@urls)
input_file_content = InputSupport.url_list(@urls)
File.write!(input_path_tmp, input_file_content)

Onigumo.Downloader.create_download_stream(tmp_dir) |> Stream.run()
Expand All @@ -46,36 +46,36 @@ defmodule OnigumoDownloaderTest do
describe("Onigumo.Downloader.download_url/2") do
@tag :tmp_dir
test("download a URL", %{tmp_dir: tmp_dir}) do
expect(HTTPoisonMock, :get!, &prepare_response/1)
expect(HTTPoisonMock, :get!, &HttpSupport.response/1)

input_url = Enum.at(@urls, 0)
Onigumo.Downloader.download_url(input_url, tmp_dir)

output_file_name = Onigumo.Downloader.create_file_name(input_url)
output_path = Path.join(tmp_dir, output_file_name)
read_output = File.read!(output_path)
expected_output = body(input_url)
expected_output = HttpSupport.body(input_url)
assert(read_output == expected_output)
end
end

describe("Onigumo.Downloader.get_url/1") do
test("get response by HTTP request") do
expect(HTTPoisonMock, :get!, &prepare_response/1)
expect(HTTPoisonMock, :get!, &HttpSupport.response/1)

url = Enum.at(@urls, 0)
get_response = Onigumo.Downloader.get_url(url)
expected_response = prepare_response(url)
expected_response = HttpSupport.response(url)
assert(get_response == expected_response)
end
end

describe("Onigumo.Downloader.get_body/1") do
test("extract body from URL response") do
url = Enum.at(@urls, 0)
response = prepare_response(url)
response = HttpSupport.response(url)
get_body = Onigumo.Downloader.get_body(response)
expected_body = body(url)
expected_body = HttpSupport.body(url)
assert(get_body == expected_body)
end
end
Expand All @@ -101,7 +101,7 @@ defmodule OnigumoDownloaderTest do

input_path_env = Application.get_env(:onigumo, :input_path)
input_path_tmp = Path.join(tmp_dir, input_path_env)
input_file_content = prepare_input(input_urls)
input_file_content = InputSupport.url_list(input_urls)
File.write!(input_path_tmp, input_file_content)

loaded_urls = Onigumo.Downloader.load_urls(tmp_dir) |> Enum.to_list()
Expand All @@ -124,27 +124,11 @@ defmodule OnigumoDownloaderTest do
end
end

defp prepare_response(url) do
%HTTPoison.Response{
status_code: 200,
body: body(url)
}
end

defp prepare_input(urls) do
Enum.map(urls, &(&1 <> "\n"))
|> Enum.join()
end

defp body(url) do
"Body from: #{url}\n"
end

defp assert_downloaded(url, tmp_dir) do
file_name = Onigumo.Downloader.create_file_name(url)
output_path = Path.join(tmp_dir, file_name)
read_output = File.read!(output_path)
expected_output = body(url)
expected_output = HttpSupport.body(url)
assert(read_output == expected_output)
end
end
12 changes: 12 additions & 0 deletions test/support/http.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
defmodule HttpSupport do
def response(url) do
%HTTPoison.Response{
status_code: 200,
body: body(url)
}
end

def body(url) do
"Body from: #{url}\n"
end
end
6 changes: 6 additions & 0 deletions test/support/input.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defmodule InputSupport do
def url_list(urls) do
Enum.map(urls, &(&1 <> "\n"))
|> Enum.join()
end
end
1 change: 1 addition & 0 deletions test/test_helper.exs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
ExUnit.start()

Mox.defmock(HTTPoisonMock, for: HTTPoison.Base)
Mox.defmock(OnigumoDownloaderMock, for: Onigumo.Component)

0 comments on commit 493e10f

Please sign in to comment.