Skip to content

Commit

Permalink
Dynamically determine dimensionality of embedding field (#4073)
Browse files Browse the repository at this point in the history
* Dynamically determine dimensionality of embedding field

* Use secrets to manage model dimension config because of Sagemaker cold starts

---------

Co-authored-by: Brendan Quinn <brendan-quinn@northwestern.edu>
Co-authored-by: Karen Shaw <karendid@gmail.com>
  • Loading branch information
3 people authored Aug 1, 2024
1 parent c71f2df commit 98c80d4
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
11 changes: 7 additions & 4 deletions app/config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,13 @@ config :meadow, Meadow.Search.Cluster,
bulk_page_size: 200,
bulk_wait_interval: 500,
json_encoder: Ecto.Jason,
embedding_model_id: aws_secret("meadow",
dig: ["search", "embedding_model_id"],
default: nil
)
embedding_model_id:
aws_secret("meadow",
dig: ["search", "embedding_model_id"],
default: nil
),
embedding_dimensions:
aws_secret("meadow", dig: ["search", "embedding_dimensions"], default: nil)

config :meadow,
ark: %{
Expand Down
24 changes: 24 additions & 0 deletions app/lib/meadow/search/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defmodule Meadow.Search.Config do
@moduledoc """
Convenience methods for retrieving search-specific configuration
"""
alias Meadow.Search.HTTP

require Logger

Expand Down Expand Up @@ -38,6 +39,7 @@ defmodule Meadow.Search.Config do
File.read!(settings)
|> Jason.decode!()
|> put_in(["settings", "default_pipeline"], pipeline)
|> add_embedding_dimension()

{%{settings: settings}, _} ->
File.read!(settings) |> Jason.decode!()
Expand All @@ -52,6 +54,28 @@ defmodule Meadow.Search.Config do
|> Keyword.get(:embedding_model_id)
end

def add_embedding_dimension(
%{"mappings" => %{"properties" => %{"embedding" => %{"dimension" => _}}}} = settings
) do
case embedding_model_dimensions() do
nil -> settings
_ -> insert_embedding_dimension(settings)
end
end

def insert_embedding_dimension(settings),
do:
put_in(
settings,
["mappings", "properties", "embedding", "dimension"],
embedding_model_dimensions()
)

def embedding_model_dimensions do
Application.get_env(:meadow, Meadow.Search.Cluster)
|> Keyword.get(:embedding_dimensions)
end

def index_versions do
index_configs()
|> Enum.map(& &1.version)
Expand Down

0 comments on commit 98c80d4

Please sign in to comment.