Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add raw line on error tuple option #95

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions lib/csv.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ defmodule CSV do
When set to `false` (default), will use no header values.
When set to anything but `false`, the resulting rows in the matrix will
be maps instead of lists.
* `:raw_line_on_error` – When set to true, raw csv line will be returned on
error tuples. Defaults to false.

## Examples

Expand Down Expand Up @@ -209,6 +211,13 @@ defmodule CSV do
line: index + 1,
escape_max_lines: escape_max_lines
end
defp yield_or_raise!({:error, EscapeSequenceError, escape_sequence, index, raw_line}, escape_max_lines) do
raise EscapeSequenceError,
escape_sequence: escape_sequence,
line: index + 1,
escape_max_lines: escape_max_lines,
raw_line: raw_line
end

defp yield_or_raise!({:error, StrayQuoteError, field, index}, _) do
raise StrayQuoteError,
Expand All @@ -219,6 +228,9 @@ defmodule CSV do
defp yield_or_raise!({:error, mod, message, index}, _) do
raise mod, message: message, line: index + 1
end
defp yield_or_raise!({:error, mod, message, index, raw_line}, _) do
raise mod, message: message, line: index + 1, raw_line: raw_line
end

defp yield_or_raise!({:ok, row}, _), do: row

Expand All @@ -236,6 +248,14 @@ defmodule CSV do
escape_max_lines: escape_max_lines
).message}
end
defp yield_or_inline!({:error, EscapeSequenceError, escape_sequence, index, raw_line}, escape_max_lines) do
{:error,
EscapeSequenceError.exception(
escape_sequence: escape_sequence,
line: index + 1,
escape_max_lines: escape_max_lines
).message, raw_line}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is breaking backwards compatibility if I interpret it correctly. If someone has existing code that depends on the length of the tuple being returned, they will get a compilation error. We should either try to avoid it by adding the raw line to the message, or reserve this for a major version upgrade.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I included the :raw_line_on_error option (defauls to false) so that only when set to true the extra raw_line will return on the error tuple, so I'd say it won't break backwards compatibility.

end

defp yield_or_inline!({:error, StrayQuoteError, field, index}, _) do
{:error,
Expand All @@ -244,10 +264,20 @@ defmodule CSV do
line: index + 1
).message}
end
defp yield_or_inline!({:error, StrayQuoteError, field, index, raw_line}, _) do
{:error,
StrayQuoteError.exception(
field: field,
line: index + 1
).message, raw_line}
end

defp yield_or_inline!({:error, errormod, message, index}, _) do
{:error, errormod.exception(message: message, line: index + 1).message}
end
defp yield_or_inline!({:error, errormod, message, index, raw_line}, _) do
{:error, errormod.exception(message: message, line: index + 1).message, raw_line}
end

defp yield_or_inline!(value, _), do: value

Expand Down
22 changes: 14 additions & 8 deletions lib/csv/decoding/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ defmodule CSV.Decoding.Decoder do
be maps instead of lists.
* `:replacement` – The replacement string to use where lines have bad
encoding. Defaults to `nil`, which disables replacement.
* `:raw_line_on_error` – When set to true, raw csv line will be returned on
error tuples. Defaults to false.

## Examples

Expand Down Expand Up @@ -131,13 +133,16 @@ defmodule CSV.Decoding.Decoder do

defp decode_row({line, index, headers, row_length}, options) do
with {:ok, parsed, _} <- parse_row({line, index}, options),
{:ok, _} <- validate_row_length({parsed, index}, row_length),
{:ok, _} <- validate_row_length({parsed, line, index}, row_length, options),
do: build_row(parsed, headers)
end

defp parse_row({line, index}, options) do
with {:ok, lex, _} <- Lexer.lex({line, index}, options),
do: Parser.parse({lex, index}, options)
with {:ok, lex, _} <- Lexer.lex({line, index}, options) do
Parser.parse({lex, line, index}, options)
else
error -> Parser.append_raw_line?(error, line, options)
end
end

defp build_row(data, headers) when is_list(headers) do
Expand Down Expand Up @@ -216,17 +221,18 @@ defmodule CSV.Decoding.Decoder do
{[{line, index, headers, row_length}], {row_length, options}}
end

defp validate_row_length({data, _}, false), do: {:ok, data}
defp validate_row_length({data, _}, nil), do: {:ok, data}
defp validate_row_length({data, _, _}, false, _), do: {:ok, data}
defp validate_row_length({data, _, _}, nil, _), do: {:ok, data}

defp validate_row_length({data, index}, expected_length) do
defp validate_row_length({data, line, index}, expected_length, options) do
case data |> Enum.count() do
^expected_length ->
{:ok, data}

actual_length ->
{:error, RowLengthError,
"Row has length #{actual_length} - expected length #{expected_length}", index}
message = "Row has length #{actual_length} - expected length #{expected_length}"
{:error, RowLengthError, message, index}
|> Parser.append_raw_line?(line, options)
end
end
end
20 changes: 18 additions & 2 deletions lib/csv/decoding/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@ defmodule CSV.Decoding.Parser do

* `:strip_fields` – When set to true, will strip whitespace from fields.
Defaults to false.
* `:raw_line_on_error` – When set to true, raw csv line will be returned on
error tuples. Defaults to false.
"""

def parse(message, options \\ [])

def parse({tokens, index}, options) do
def parse({tokens, index}, options),
do: parse({tokens, "", index}, options)

def parse({tokens, raw_line, index}, options) do
case parse([], "", tokens, :unescaped, options) do
{:ok, row} -> {:ok, row, index}
{:error, type, message} -> {:error, type, message, index}
{:error, type, message} ->
{:error, type, message, index}
|> append_raw_line?(raw_line, options)
end
end

Expand Down Expand Up @@ -124,4 +131,13 @@ defmodule CSV.Decoding.Parser do
_ -> field
end
end

@doc false
def append_raw_line?(error_tuple, raw_line, options) do
raw_line_on_error = options |> Keyword.get(:raw_line_on_error, false)
do_append_raw_line?(error_tuple, raw_line, raw_line_on_error)
end
defp do_append_raw_line?(error_tuple, raw_line, true = _raw_line_on_error),
do: Tuple.append(error_tuple, raw_line)
defp do_append_raw_line?(error_tuple, _raw_line, _raw_line_on_error), do: error_tuple
end
24 changes: 16 additions & 8 deletions lib/csv/exceptions.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@ defmodule CSV.EncodingError do
Raised at runtime when the CSV encoding is invalid.
"""

defexception [:line, :message]
defexception [:line, :message, :raw_line]

def exception(options) do
line = options |> Keyword.fetch!(:line)
message = options |> Keyword.fetch!(:message)
raw_line = options |> Keyword.get(:raw_line)

%__MODULE__{
line: line,
message: message <> " on line " <> Integer.to_string(line)
message: message <> " on line " <> Integer.to_string(line),
raw_line: raw_line
}
end
end
Expand All @@ -21,15 +23,17 @@ defmodule CSV.RowLengthError do
Raised at runtime when the CSV has rows of variable length.
"""

defexception [:line, :message]
defexception [:line, :message, :raw_line]

def exception(options) do
line = options |> Keyword.fetch!(:line)
message = options |> Keyword.fetch!(:message)
raw_line = options |> Keyword.get(:raw_line)

%__MODULE__{
line: line,
message: message <> " on line " <> Integer.to_string(line)
message: message <> " on line " <> Integer.to_string(line),
raw_line: raw_line
}
end
end
Expand All @@ -39,18 +43,20 @@ defmodule CSV.StrayQuoteError do
Raised at runtime when the CSV row has stray quotes.
"""

defexception [:line, :message]
defexception [:line, :message, :raw_line]

def exception(options) do
line = options |> Keyword.fetch!(:line)
field = options |> Keyword.fetch!(:field)
raw_line = options |> Keyword.get(:raw_line)

message = "Stray quote on line " <>
Integer.to_string(line) <> " near \"" <> field <> "\""

%__MODULE__{
line: line,
message: message
message: message,
raw_line: raw_line
}
end
end
Expand All @@ -60,12 +66,13 @@ defmodule CSV.EscapeSequenceError do
Raised at runtime when the CSV stream ends with unfinished escape sequences
"""

defexception [:message]
defexception [:message, :raw_line]

def exception(options) do
line = options |> Keyword.fetch!(:line)
escape_sequence = options |> Keyword.fetch!(:escape_sequence)
escape_max_lines = options |> Keyword.fetch!(:escape_max_lines)
raw_line = options |> Keyword.get(:raw_line)

message =
"Escape sequence started on line #{line} " <>
Expand All @@ -76,7 +83,8 @@ defmodule CSV.EscapeSequenceError do
"it using the escape_max_lines option: https://hexdocs.pm/csv/CSV.html#decode/2"

%__MODULE__{
message: message
message: message,
raw_line: raw_line
}
end
end
54 changes: 54 additions & 0 deletions test/decoding/append_raw_line_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
defmodule DecodingTests.AppendRawLineTest do
use ExUnit.Case
import TestSupport.StreamHelpers

alias CSV.Decoding.Decoder
alias CSV.RowLengthError
alias CSV.EscapeSequenceError
alias CSV.StrayQuoteError
alias CSV.EncodingError

defp filter_errors(stream) do
stream
|> Stream.filter(fn
{:error, _, _, _} -> true
{:error, _, _, _, _} -> true
_ -> false
end)
end

test "produces meaningful errors for non-unicode files returning raw_file" do
stream = "../fixtures/broken-encoding.csv" |> Path.expand(__DIR__) |> File.stream!()

errors = stream |> Decoder.decode(raw_line_on_error: true) |> filter_errors |> Enum.to_list()

assert [
{:error, EncodingError, "Invalid encoding", 0, invalid_string}
] = errors
refute String.valid?(invalid_string)
end

test "includes an error for rows with variable length returning raw_file" do
stream = ["a,\"be\"", ",c,d", "e,f", "g,,h", "i,j", "k,l"] |> to_stream

errors = stream |> Decoder.decode(raw_line_on_error: true) |> filter_errors |> Enum.to_list()

assert [
{:error, RowLengthError, "Row has length 3 - expected length 2", 1, line1},
{:error, RowLengthError, "Row has length 3 - expected length 2", 3, line2}
] = errors
assert [line1, line2] |> Enum.all?(&String.valid?/1)
end

test "includes an error for rows with unescaped quotes returning raw_file" do
stream = ["a\",\"be", "\"c,d", "\"e,f\"g\",h"] |> to_stream
errors = stream |> Decoder.decode(raw_line_on_error: true) |> Enum.to_list()

assert [
{:error, StrayQuoteError, "a", 0, line1},
{:error, EscapeSequenceError, "c,d", 1, line2},
{:error, StrayQuoteError, "e,f", 2, line3}
] = errors
assert [line1, line2, line3] |> Enum.all?(&String.valid?/1)
end
end