From 41ac8ec7e7dd7e213a4a4e0b2e701957c44d19bd Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Fri, 9 Aug 2024 18:06:25 -0700
Subject: [PATCH 1/3] Update sentence-transformers and allow setting
 trust_remote_code

sentence-transformers APIs changed. This update allows 3.x (so far).
The update is required to support some models.
Most of the update is adding new params to be in sync with the
super SentenceTransformer class. Most of these params are not used.

Support is added to allow passing the trust_remote_code paramater.
This can be done in the runtime config or with EMBEDDING_TRUST_REMOTE_CODE=true
(or =1) environment variable.

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 caikit_nlp/config/config.yml                  |  2 +
 .../modules/text_embedding/embedding.py       | 42 ++++++++++++++++++-
 pyproject.toml                                |  2 +-
 runtime_config.yaml                           |  2 +
 4 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/caikit_nlp/config/config.yml b/caikit_nlp/config/config.yml
index 6f440a22..8eb5a500 100644
--- a/caikit_nlp/config/config.yml
+++ b/caikit_nlp/config/config.yml
@@ -38,6 +38,8 @@ training_data_limit:
 
 # Config used only in EmbeddingModule. Set here or use env vars like EMBEDDING_RETRIES=32
 embedding:
+  # Allow models with remote code.
+  trust_remote_code: false
   # Number of times to retry on error. Most deployments should use 0 retries.
   retries: 0
   # Batch size for encode() if <= 0 or invalid, the sentence-transformers default is used
diff --git a/caikit_nlp/modules/text_embedding/embedding.py b/caikit_nlp/modules/text_embedding/embedding.py
index efa97728..97fcb7ac 100644
--- a/caikit_nlp/modules/text_embedding/embedding.py
+++ b/caikit_nlp/modules/text_embedding/embedding.py
@@ -22,6 +22,7 @@
     Dict,
     Iterable,
     List,
+    Literal,
     NamedTuple,
     Optional,
     TypeVar,
@@ -82,6 +83,8 @@
     sentence_transformers = importlib.import_module("sentence_transformers")
     # Third Party
     from sentence_transformers import SentenceTransformer
+    from sentence_transformers.model_card import SentenceTransformerModelCardData
+    from sentence_transformers.similarity_functions import SimilarityFunction
     from sentence_transformers.util import batch_to_device, cos_sim, dot_score
     from sentence_transformers.util import (
         normalize_embeddings as normalize,  # avoid parameter shadowing
@@ -107,6 +110,7 @@ def __init__(self, *args, **kwargs):  # pylint: disable=unused-argument
     val=embedding_cfg.get("implicit_truncation_errors", True)
 )
 DEVICE = embedding_cfg.get("device", "")
+TRUST_REMOTE_CODE = embedding_cfg.get("trust_remote_code")
 
 RT = TypeVar("RT")  # return type
 
@@ -183,7 +187,9 @@ def load(cls, model_path: str, *args, **kwargs) -> "EmbeddingModule":
         ipex = cls._get_ipex(IPEX)
         device = cls._select_device(ipex, DEVICE)
         model = SentenceTransformerWithTruncate(
-            model_name_or_path=artifacts_path, device=device
+            model_name_or_path=artifacts_path,
+            device=device,
+            trust_remote_code=TRUST_REMOTE_CODE,
         )
         model.eval()  # required for IPEX at least
         if device is not None:
@@ -719,7 +725,12 @@ def bootstrap(cls, model_name_or_path: str) -> "EmbeddingModule":
             model_name_or_path: str
                 Model name (Hugging Face hub) or path to model to load.
         """
-        return cls(model=SentenceTransformer(model_name_or_path=model_name_or_path))
+        return cls(
+            model=SentenceTransformer(
+                model_name_or_path=model_name_or_path,
+                trust_remote_code=TRUST_REMOTE_CODE,
+            )
+        )
 
     def save(self, model_path: str, *args, **kwargs):
         """Save model using config in model_path
@@ -875,21 +886,39 @@ def __init__(
         model_name_or_path: Optional[str] = None,
         modules: Optional[Iterable[nn.Module]] = None,
         device: Optional[str] = None,
+        prompts: Optional[Dict[str, str]] = None,
+        default_prompt_name: Optional[str] = None,
+        similarity_fn_name: Optional[Union[str, SimilarityFunction]] = None,
         cache_folder: Optional[str] = None,
         trust_remote_code: bool = False,
         revision: Optional[str] = None,
+        local_files_only: bool = False,
         token: Optional[Union[bool, str]] = None,
         use_auth_token: Optional[Union[bool, str]] = None,
+        truncate_dim: Optional[int] = None,
+        model_kwargs: Optional[Dict[str, Any]] = None,
+        tokenizer_kwargs: Optional[Dict[str, Any]] = None,
+        config_kwargs: Optional[Dict[str, Any]] = None,
+        model_card_data: Optional[SentenceTransformerModelCardData] = None,
     ):
         super().__init__(
             model_name_or_path,
             modules,
             device,
+            prompts,
+            default_prompt_name,
+            similarity_fn_name,
             cache_folder,
             trust_remote_code,
             revision,
+            local_files_only,
             token,
             use_auth_token,
+            truncate_dim,
+            model_kwargs,
+            tokenizer_kwargs,
+            config_kwargs,
+            model_card_data,
         )
         self.tokenizers = {}
 
@@ -1014,9 +1043,12 @@ def _get_tokenized(self, texts):
     def encode(
         self,
         sentences: Union[str, List[str]],
+        prompt_name: Optional[str] = None,
+        prompt: Optional[str] = None,
         batch_size: int = 32,
         show_progress_bar: bool = None,
         output_value: str = "sentence_embedding",
+        precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
         convert_to_numpy: bool = True,
         convert_to_tensor: bool = False,
         device: str = None,
@@ -1029,9 +1061,12 @@ def encode(
         Computes sentence embeddings
 
         :param sentences: the sentences to embed
+        :param prompt_name: Ignored here. Added for compatibility with super API.
+        :param prompt: Ignored here. Added for compatibility with super API.
         :param batch_size: the batch size used for the computation
         :param show_progress_bar: Ignored here. Added for compatibility with super API.
         :param output_value: Ignored here. Added for compatibility with super API.
+        :param precision: Ignored here. Added for compatibility with super API.
         :param convert_to_numpy: If true, the output is a list of numpy vectors. Else, it is a list
                 of pytorch tensors.
         :param convert_to_tensor: If true, you get one large tensor as return. Overwrites any
@@ -1057,8 +1092,11 @@ def encode(
 
         # These args are for API compatability, but are currently ignored in our version of encode()
         _ = (
+            prompt_name,
+            prompt,
             show_progress_bar,
             output_value,
+            precision,
             normalize_embeddings,
         )
 
diff --git a/pyproject.toml b/pyproject.toml
index e688a4b5..cbf6e401 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ dependencies = [
     "pandas>=1.5.0",
     "scikit-learn>=1.1",
     "scipy>=1.8.1",
-    "sentence-transformers>=2.3.1,<2.4.0",
+    "sentence-transformers>=3.0.0,<3.1.0",
     "tokenizers>=0.13.3",
     "torch>=2.3.1,<2.4.0",
     "tqdm>=4.65.0",
diff --git a/runtime_config.yaml b/runtime_config.yaml
index cbd27421..b88fb545 100644
--- a/runtime_config.yaml
+++ b/runtime_config.yaml
@@ -44,6 +44,8 @@ model_management:
 
 # Config used only in EmbeddingModule. Set here or use env vars like EMBEDDING_RETRIES=32
 embedding:
+  # Allow models with remote code.
+  trust_remote_code: false
   # Number of times to retry on error. Most deployments should use 0 retries.
   retries: 0
   # Batch size for encode() if <= 0 or invalid, the sentence-transformers default is used

From 3343d8867f822f5566774c0a87b4b0e6357001dd Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Mon, 12 Aug 2024 11:33:54 -0700
Subject: [PATCH 2/3] tox.ini passenv PYTORCH_ENABLE_MPS_FALLBACK to run tests
 on Mac M3

Setting PYTORCH_ENABLE_MPS_FALLBACK=1 allows the tests to run on Mac MPS (M3).

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 tox.ini | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tox.ini b/tox.ini
index c220f022..df4f5d15 100644
--- a/tox.ini
+++ b/tox.ini
@@ -15,6 +15,7 @@ passenv =
     LOG_FORMATTER
     LOG_THREAD_ID
     LOG_CHANNEL_WIDTH
+    PYTORCH_ENABLE_MPS_FALLBACK
 commands = pytest --durations=42 --cov=caikit_nlp --cov-report=term --cov-report=html {posargs:tests}
 
 ; Unclear: We probably want to test wheel packaging

From bcc1233221862280258c8d646ba15c08bdcd1428 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Mon, 12 Aug 2024 11:35:01 -0700
Subject: [PATCH 3/3] Bump transformers and stop at <= 4.44.0

* Use >= 4.38.0 because that is the sentence-transformers min anyway.
* Add <= 4.44.0 because 4.44.0 breaks our tests with errors like this:
  > `RuntimeError: The expanded size of the tensor (22) must match the existing size (30) at non-singleton dimension 2.  Target sizes: [4, 22, 22].  Tensor sizes: [4, 1, 30]`

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cbf6e401..3a2464f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
     "tokenizers>=0.13.3",
     "torch>=2.3.1,<2.4.0",
     "tqdm>=4.65.0",
-    "transformers>=4.32.0",
+    "transformers>=4.38.0,<4.44.0",
     "peft==0.6.0",
 ]