Skip to content

Commit

Permalink
Merge pull request #4205 from nulib/5273-skip-bad-encodes
Browse files Browse the repository at this point in the history
Log and skip failed encodes when indexing
  • Loading branch information
mbklein authored Oct 14, 2024
2 parents 8e09175 + dc091f7 commit cb8b0fb
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
17 changes: 16 additions & 1 deletion app/lib/meadow/data/indexer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ defmodule Meadow.Data.Indexer do
Repo.transaction(
fn ->
stream
|> Stream.map(&SearchDocument.encode(&1, version))
|> Stream.map(&encode_document(&1, version))
|> Stream.reject(&(&1 == :skip))
|> Bulk.upload(index)

SearchIndex.refresh(index)
Expand All @@ -104,6 +105,20 @@ defmodule Meadow.Data.Indexer do
)
end

defp encode_document(nil, _), do: :skip

defp encode_document(item, version) do
SearchDocument.encode(item, version)
rescue
e ->
with_log_metadata module: __MODULE__, id: item.id do
("Index encoding failed due to: " <> Exception.format_banner(:error, e, []))
|> Logger.error()
end

:skip
end

def stream(query, preloads) do
from(query)
|> Repo.stream()
Expand Down
23 changes: 23 additions & 0 deletions app/test/meadow/data/indexer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ defmodule Meadow.Data.IndexerTest do
# use Meadow.AuthorityCase
use Meadow.DataCase
use Meadow.IndexCase
alias Ecto.Adapters.SQL
alias Ecto.Adapters.SQL.Sandbox
alias Meadow.Data.{Collections, FileSets, Indexer, Works}
alias Meadow.Data.Schemas.{Collection, FileSet, Work}
alias Meadow.Ingest.{Projects, Sheets}
alias Meadow.{Config, Repo}

import ExUnit.CaptureLog

describe "indexing" do
setup do
{:ok, indexable_data()}
Expand All @@ -21,6 +24,26 @@ defmodule Meadow.Data.IndexerTest do
assert_doc_counts_match(context)
end

test "error_handling", context do
assert_all_empty()
%{file_sets: [file_set | _]} = context

SQL.query!(
Repo,
"UPDATE file_sets SET core_metadata = NULL WHERE id = $1",
[Ecto.UUID.dump!(file_set.id)]
)

logged = capture_log(fn -> Indexer.synchronize_index() end)
assert {:ok, file_set_count} = indexed_doc_count(FileSet, 2)
assert file_set_count == length(context.file_sets) - 1

assert String.contains?(
logged,
"id=#{file_set.id} [error] Index encoding failed due to: ** (KeyError)"
)
end

test "reindex_all", context do
Indexer.synchronize_index()
assert_doc_counts_match(context)
Expand Down

0 comments on commit cb8b0fb

Please sign in to comment.