From dc091f7e2fab7120746af44a4431f5a56d33723d Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Mon, 14 Oct 2024 19:52:41 +0000 Subject: [PATCH] Log and skip failed encodes when indexing --- app/lib/meadow/data/indexer.ex | 17 ++++++++++++++++- app/test/meadow/data/indexer_test.exs | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/app/lib/meadow/data/indexer.ex b/app/lib/meadow/data/indexer.ex index 47fa7c276..200703f04 100644 --- a/app/lib/meadow/data/indexer.ex +++ b/app/lib/meadow/data/indexer.ex @@ -94,7 +94,8 @@ defmodule Meadow.Data.Indexer do Repo.transaction( fn -> stream - |> Stream.map(&SearchDocument.encode(&1, version)) + |> Stream.map(&encode_document(&1, version)) + |> Stream.reject(&(&1 == :skip)) |> Bulk.upload(index) SearchIndex.refresh(index) @@ -104,6 +105,20 @@ defmodule Meadow.Data.Indexer do ) end + defp encode_document(nil, _), do: :skip + + defp encode_document(item, version) do + SearchDocument.encode(item, version) + rescue + e -> + with_log_metadata module: __MODULE__, id: item.id do + ("Index encoding failed due to: " <> Exception.format_banner(:error, e, [])) + |> Logger.error() + end + + :skip + end + def stream(query, preloads) do from(query) |> Repo.stream() diff --git a/app/test/meadow/data/indexer_test.exs b/app/test/meadow/data/indexer_test.exs index f2dbbbe59..a868cf9f2 100644 --- a/app/test/meadow/data/indexer_test.exs +++ b/app/test/meadow/data/indexer_test.exs @@ -4,12 +4,15 @@ defmodule Meadow.Data.IndexerTest do # use Meadow.AuthorityCase use Meadow.DataCase use Meadow.IndexCase + alias Ecto.Adapters.SQL alias Ecto.Adapters.SQL.Sandbox alias Meadow.Data.{Collections, FileSets, Indexer, Works} alias Meadow.Data.Schemas.{Collection, FileSet, Work} alias Meadow.Ingest.{Projects, Sheets} alias Meadow.{Config, Repo} + import ExUnit.CaptureLog + describe "indexing" do setup do {:ok, indexable_data()} @@ -21,6 +24,26 @@ defmodule Meadow.Data.IndexerTest do assert_doc_counts_match(context) end + test "error_handling", context do + assert_all_empty() + %{file_sets: [file_set | _]} = context + + SQL.query!( + Repo, + "UPDATE file_sets SET core_metadata = NULL WHERE id = $1", + [Ecto.UUID.dump!(file_set.id)] + ) + + logged = capture_log(fn -> Indexer.synchronize_index() end) + assert {:ok, file_set_count} = indexed_doc_count(FileSet, 2) + assert file_set_count == length(context.file_sets) - 1 + + assert String.contains?( + logged, + "id=#{file_set.id} [error] Index encoding failed due to: ** (KeyError)" + ) + end + test "reindex_all", context do Indexer.synchronize_index() assert_doc_counts_match(context)