From 36fd0aafffee3034b3bc8b06df2e63c690dd4a47 Mon Sep 17 00:00:00 2001 From: Alexey Taymanov Date: Tue, 14 Nov 2023 23:30:54 -0500 Subject: [PATCH] black --- src/learn_to_pick/base.py | 18 +- src/learn_to_pick/features.py | 13 +- src/learn_to_pick/pick_best.py | 66 +++-- tests/unit_tests/test_pick_best_call.py | 52 ++-- .../test_pick_best_text_embedder.py | 186 ++++++++------ .../unit_tests/test_rl_loop_base_embedder.py | 226 ++++++++++-------- 6 files changed, 334 insertions(+), 227 deletions(-) diff --git a/src/learn_to_pick/base.py b/src/learn_to_pick/base.py index d3a57d6..dab5e45 100644 --- a/src/learn_to_pick/base.py +++ b/src/learn_to_pick/base.py @@ -13,7 +13,7 @@ Type, TypeVar, Union, - Callable + Callable, ) from learn_to_pick.metrics import MetricsTrackerAverage, MetricsTrackerRollingWindow @@ -183,9 +183,7 @@ def predict(self, event: TEvent) -> Any: import vowpal_wabbit_next as vw text_parser = vw.TextFormatParser(self.workspace) - return self.workspace.predict_one( - _parse_lines(text_parser, self.format(event)) - ) + return self.workspace.predict_one(_parse_lines(text_parser, self.format(event))) def learn(self, event: TEvent) -> None: import vowpal_wabbit_next as vw @@ -489,18 +487,20 @@ def run(self, *args, **kwargs) -> Dict[str, Any]: def _embed_string_type( - item: Union[str, _Embed], model: Any, namespace: str) -> Featurized: + item: Union[str, _Embed], model: Any, namespace: str +) -> Featurized: """Helper function to embed a string or an _Embed object.""" import re + result = Featurized() if isinstance(item, _Embed): result[namespace] = DenseFeatures(model.encode(item.value)) if item.keep: keep_str = item.value.replace(" ", "_") - result[namespace] = {'raw': re.sub(r"[\t\n\r\f\v]+", " ", keep_str)} + result[namespace] = {"raw": re.sub(r"[\t\n\r\f\v]+", " ", keep_str)} elif isinstance(item, str): encoded = item.replace(" ", "_") - result[namespace] = {'raw': re.sub(r"[\t\n\r\f\v]+", " ", encoded)} + result[namespace] = {"raw": re.sub(r"[\t\n\r\f\v]+", " ", encoded)} else: raise ValueError(f"Unsupported type {type(item)} for embedding") @@ -513,7 +513,7 @@ def _embed_dict_type(item: Dict, model: Any) -> Featurized: for ns, embed_item in item.items(): if isinstance(embed_item, list): for idx, embed_list_item in enumerate(embed_item): - result.merge(_embed_string_type(embed_list_item, model, f'{ns}_{idx}')) + result.merge(_embed_string_type(embed_list_item, model, f"{ns}_{idx}")) else: result.merge(_embed_string_type(embed_item, model, ns)) return result @@ -529,7 +529,7 @@ def _embed_list_type( elif isinstance(embed_item, list): result.append(Featurized()) for idx, embed_list_item in enumerate(embed_item): - result[-1].merge(_embed_string_type(embed_list_item, model, f'{idx}')) + result[-1].merge(_embed_string_type(embed_list_item, model, f"{idx}")) else: result.append(_embed_string_type(embed_item, model, namespace)) return result diff --git a/src/learn_to_pick/features.py b/src/learn_to_pick/features.py index b03c808..d5ded1c 100644 --- a/src/learn_to_pick/features.py +++ b/src/learn_to_pick/features.py @@ -1,6 +1,7 @@ from typing import Union, Optional, Dict, List import numpy as np + class SparseFeatures(dict): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -12,7 +13,11 @@ def __init__(self, *args, **kwargs): class Featurized: - def __init__(self, sparse: Optional[Dict[str, SparseFeatures]] = None, dense: Optional[Dict[str, DenseFeatures]] = None): + def __init__( + self, + sparse: Optional[Dict[str, SparseFeatures]] = None, + dense: Optional[Dict[str, DenseFeatures]] = None, + ): self.sparse = sparse or {} self.dense = dense or {} @@ -22,8 +27,10 @@ def __setitem__(self, key, value): elif isinstance(value, List) or isinstance(value, np.ndarray): self.dense[key] = DenseFeatures(value) else: - raise ValueError(f'Cannot convert {type(value)} to either DenseFeatures or SparseFeatures') - + raise ValueError( + f"Cannot convert {type(value)} to either DenseFeatures or SparseFeatures" + ) + def merge(self, other): self.sparse.update(other.sparse) self.dense.update(other.dense) diff --git a/src/learn_to_pick/pick_best.py b/src/learn_to_pick/pick_best.py index 963487a..df8c6ac 100644 --- a/src/learn_to_pick/pick_best.py +++ b/src/learn_to_pick/pick_best.py @@ -30,6 +30,7 @@ def __init__( self.probability = probability self.score = score + class PickBestEvent(base.Event[PickBestSelected]): def __init__( self, @@ -65,6 +66,7 @@ def actions(self, model) -> List[base.Featurized]: ) return action_embs + class VwTxt: @staticmethod def _dense_2_str(values: base.DenseFeatures) -> str: @@ -74,15 +76,27 @@ def _dense_2_str(values: base.DenseFeatures) -> str: def _sparse_2_str(values: base.SparseFeatures) -> str: def _to_str(v): import numbers - return v if isinstance(v, numbers.Number) else f'={v}' + + return v if isinstance(v, numbers.Number) else f"={v}" return " ".join([f"{k}:{_to_str(v)}" for k, v in values.items()]) - + @staticmethod def featurized_2_str(obj: base.Featurized) -> str: - return " ".join(chain.from_iterable([ - map(lambda kv: f'|{kv[0]}_dense {VwTxt._dense_2_str(kv[1])}', obj.dense.items()), - map(lambda kv: f'|{kv[0]}_sparse {VwTxt._sparse_2_str(kv[1])}', obj.sparse.items())])) + return " ".join( + chain.from_iterable( + [ + map( + lambda kv: f"|{kv[0]}_dense {VwTxt._dense_2_str(kv[1])}", + obj.dense.items(), + ), + map( + lambda kv: f"|{kv[0]}_sparse {VwTxt._sparse_2_str(kv[1])}", + obj.sparse.items(), + ), + ] + ) + ) class PickBestFeaturizer(base.Featurizer[PickBestEvent]): @@ -109,54 +123,64 @@ def __init__( def _dotproducts(self, context, actions): _context_dense = base.Featurized() for ns in context.sparse.keys(): - if 'raw' in context.sparse[ns]: - _context_dense[ns] = self.model.encode(context.sparse[ns]['raw']) + if "raw" in context.sparse[ns]: + _context_dense[ns] = self.model.encode(context.sparse[ns]["raw"]) _actions_dense = [base.Featurized() for _ in range(len(actions))] for _action, action in zip(_actions_dense, actions): for ns in action.sparse.keys(): - if 'raw' in action.sparse[ns]: - _action[ns] = self.model.encode(action.sparse[ns]['raw']) + if "raw" in action.sparse[ns]: + _action[ns] = self.model.encode(action.sparse[ns]["raw"]) context_names = list(_context_dense.dense.keys()) context_matrix = np.stack(list(_context_dense.dense.values())) for _a, a in zip(_actions_dense, actions): action_names = list(_a.dense.keys()) product = np.dot(context_matrix, np.stack(list(_a.dense.values())).T) - a['dotprod'] = {f'{context_names[i]}_{action_names[j]}': product[i, j] for i in range(len(context_names)) for j in range(len(action_names))} + a["dotprod"] = { + f"{context_names[i]}_{action_names[j]}": product[i, j] + for i in range(len(context_names)) + for j in range(len(action_names)) + } def _generic_namespace(self, featurized): result = base.SparseFeatures() for ns in featurized.sparse.keys(): - if 'raw' in featurized.sparse[ns]: - result[ns] = featurized.sparse[ns]['raw'] + if "raw" in featurized.sparse[ns]: + result[ns] = featurized.sparse[ns]["raw"] return result def _generic_namespaces(self, context, actions): - context['@'] = self._generic_namespace(context) + context["@"] = self._generic_namespace(context) for a in actions: - a['#'] = self._generic_namespace(a) + a["#"] = self._generic_namespace(a) - def featurize(self, event: PickBestEvent) -> Tuple[base.Featurized, List[base.Featurized], PickBestSelected]: + def featurize( + self, event: PickBestEvent + ) -> Tuple[base.Featurized, List[base.Featurized], PickBestSelected]: context = event.context(self.model) actions = event.actions(self.model) if self.auto_embed: self._dotproducts(context, actions) self._generic_namespaces(context, actions) - + return context, actions, event.selected -def vw_cb_formatter(context: base.Featurized, actions: List[base.Featurized], selected: PickBestSelected) -> str: +def vw_cb_formatter( + context: base.Featurized, actions: List[base.Featurized], selected: PickBestSelected +) -> str: nactions = len(actions) context_str = f"shared {VwTxt.featurized_2_str(context)}" labels = ["" for _ in range(nactions)] if selected.score is not None: - labels[selected.index] = f"{selected.index}:{-selected.score}:{selected.probability} " + labels[ + selected.index + ] = f"{selected.index}:{-selected.score}:{selected.probability} " actions_str = [f"{l}{VwTxt.featurized_2_str(a)}" for a, l in zip(actions, labels)] return "\n".join([context_str] + actions_str) - + class PickBestRandomPolicy(base.Policy[PickBestEvent]): def __init__(self): @@ -235,7 +259,9 @@ def _call_after_predict_before_scoring( sampled_ap = prediction[sampled_index] sampled_action = sampled_ap[0] sampled_prob = sampled_ap[1] - event.selected = PickBestSelected(index=sampled_action, probability=sampled_prob) + event.selected = PickBestSelected( + index=sampled_action, probability=sampled_prob + ) next_inputs = inputs.copy() diff --git a/tests/unit_tests/test_pick_best_call.py b/tests/unit_tests/test_pick_best_call.py index a9056e9..24b374b 100644 --- a/tests/unit_tests/test_pick_best_call.py +++ b/tests/unit_tests/test_pick_best_call.py @@ -163,15 +163,18 @@ def test_everything_embedded() -> None: str2 = "1" str3 = "2" action_dense = "0:1.0 1:0.0" - + ctx_str_1 = "context1" encoded_ctx_str_1 = "0:8.0 1:0.0" - expected = "\n".join([ - f"shared |User_dense {encoded_ctx_str_1} |User_sparse raw:={ctx_str_1}", - f"|action_dense {action_dense} |action_sparse raw:={str1}", - f"|action_dense {action_dense} |action_sparse raw:={str2}", - f"|action_dense {action_dense} |action_sparse raw:={str3}"]) # noqa + expected = "\n".join( + [ + f"shared |User_dense {encoded_ctx_str_1} |User_sparse raw:={ctx_str_1}", + f"|action_dense {action_dense} |action_sparse raw:={str1}", + f"|action_dense {action_dense} |action_sparse raw:={str2}", + f"|action_dense {action_dense} |action_sparse raw:={str3}", + ] + ) # noqa actions = [str1, str2, str3] @@ -193,11 +196,14 @@ def test_default_auto_embedder_is_off() -> None: str3 = "2" ctx_str_1 = "context1" - expected = "\n".join([ - f"shared |User_sparse raw:={ctx_str_1}", - f"|action_sparse raw:={str1}", - f"|action_sparse raw:={str2}", - f"|action_sparse raw:={str3}"]) # noqa + expected = "\n".join( + [ + f"shared |User_sparse raw:={ctx_str_1}", + f"|action_sparse raw:={str1}", + f"|action_sparse raw:={str2}", + f"|action_sparse raw:={str3}", + ] + ) # noqa actions = [str1, str2, str3] @@ -219,11 +225,14 @@ def test_default_w_embeddings_off() -> None: str3 = "2" ctx_str_1 = "context1" - expected = "\n".join([ - f"shared |User_sparse raw:={ctx_str_1}", - f"|action_sparse raw:={str1}", - f"|action_sparse raw:={str2}", - f"|action_sparse raw:={str3}"]) # noqa + expected = "\n".join( + [ + f"shared |User_sparse raw:={ctx_str_1}", + f"|action_sparse raw:={str1}", + f"|action_sparse raw:={str2}", + f"|action_sparse raw:={str3}", + ] + ) # noqa actions = [str1, str2, str3] @@ -247,10 +256,13 @@ def test_default_w_embeddings_on() -> None: ctx_str_1 = "context1" dot_prod = "dotprod_sparse User_action:5.0" # dot prod of [1.0, 2.0] and [1.0, 2.0] - expected = "\n".join([ - f"shared |User_sparse raw:={ctx_str_1} |@_sparse User:={ctx_str_1}", - f"|action_sparse raw:={str1} |{dot_prod} |#_sparse action:={str1} ", - f"|action_sparse raw:={str2} |{dot_prod} |#_sparse action:={str2} "]) # noqa + expected = "\n".join( + [ + f"shared |User_sparse raw:={ctx_str_1} |@_sparse User:={ctx_str_1}", + f"|action_sparse raw:={str1} |{dot_prod} |#_sparse action:={str1} ", + f"|action_sparse raw:={str2} |{dot_prod} |#_sparse action:={str2} ", + ] + ) # noqa actions = [str1, str2] diff --git a/tests/unit_tests/test_pick_best_text_embedder.py b/tests/unit_tests/test_pick_best_text_embedder.py index feca4e8..414341a 100644 --- a/tests/unit_tests/test_pick_best_text_embedder.py +++ b/tests/unit_tests/test_pick_best_text_embedder.py @@ -33,12 +33,15 @@ def test_pickbest_textembedder_no_label_no_emb() -> None: auto_embed=False, model=MockEncoder() ) named_actions = {"action": ["0", "1", "2"]} - expected = "\n".join([ - "shared |context_sparse raw:=context", - "|action_sparse raw:=0", - "|action_sparse raw:=1", - "|action_sparse raw:=2"]) - + expected = "\n".join( + [ + "shared |context_sparse raw:=context", + "|action_sparse raw:=0", + "|action_sparse raw:=1", + "|action_sparse raw:=2", + ] + ) + event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on={"context": "context"} ) @@ -51,11 +54,14 @@ def test_pickbest_textembedder_w_label_no_score_no_emb() -> None: auto_embed=False, model=MockEncoder() ) named_actions = {"action": ["0", "1", "2"]} - expected = "\n".join([ - "shared |context_sparse raw:=context", - "|action_sparse raw:=0", - "|action_sparse raw:=1", - "|action_sparse raw:=2"]) + expected = "\n".join( + [ + "shared |context_sparse raw:=context", + "|action_sparse raw:=0", + "|action_sparse raw:=1", + "|action_sparse raw:=2", + ] + ) selected = pick_best_chain.PickBestSelected(index=0, probability=1.0) event = pick_best_chain.PickBestEvent( inputs={}, @@ -72,11 +78,14 @@ def test_pickbest_textembedder_w_full_label_no_emb() -> None: auto_embed=False, model=MockEncoder() ) named_actions = {"action": ["0", "1", "2"]} - expected = "\n".join([ - "shared |context_sparse raw:=context", - "0:-0.0:1.0 |action_sparse raw:=0", - "|action_sparse raw:=1", - "|action_sparse raw:=2"]) + expected = "\n".join( + [ + "shared |context_sparse raw:=context", + "0:-0.0:1.0 |action_sparse raw:=0", + "|action_sparse raw:=1", + "|action_sparse raw:=2", + ] + ) selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( @@ -102,11 +111,14 @@ def test_pickbest_textembedder_w_full_label_w_emb() -> None: named_actions = {"action": rl_chain.Embed([str1, str2, str3])} context = {"context": rl_chain.Embed(ctx_str)} - expected = "\n".join([ - f"shared |context_dense {encoded_ctx_str}", - "0:-0.0:1.0 |action_dense 0:1.0 1:0.0", - "|action_dense 0:1.0 1:0.0", - "|action_dense 0:1.0 1:0.0"]) # noqa: E501 + expected = "\n".join( + [ + f"shared |context_dense {encoded_ctx_str}", + "0:-0.0:1.0 |action_dense 0:1.0 1:0.0", + "|action_dense 0:1.0 1:0.0", + "|action_dense 0:1.0 1:0.0", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context, selected=selected @@ -128,11 +140,14 @@ def test_pickbest_textembedder_w_full_label_w_embed_and_keep() -> None: named_actions = {"action": rl_chain.EmbedAndKeep([str1, str2, str3])} context = {"context": rl_chain.EmbedAndKeep(ctx_str)} - expected = "\n".join([ - f"shared |context_dense {encoded_ctx_str} |context_sparse raw:={ctx_str}", - "0:-0.0:1.0 |action_dense 0:1.0 1:0.0 |action_sparse raw:=0", - "|action_dense 0:1.0 1:0.0 |action_sparse raw:=1", - "|action_dense 0:1.0 1:0.0 |action_sparse raw:=2"]) # noqa: E501 + expected = "\n".join( + [ + f"shared |context_dense {encoded_ctx_str} |context_sparse raw:={ctx_str}", + "0:-0.0:1.0 |action_dense 0:1.0 1:0.0 |action_sparse raw:=0", + "|action_dense 0:1.0 1:0.0 |action_sparse raw:=1", + "|action_dense 0:1.0 1:0.0 |action_sparse raw:=2", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context, selected=selected @@ -147,11 +162,14 @@ def test_pickbest_textembedder_more_namespaces_no_label_no_emb() -> None: ) named_actions = {"action1": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} - expected = "\n".join([ - "shared |context1_sparse raw:=context1 |context2_sparse raw:=context2 ", - "|a_sparse raw:=0 |b_sparse raw:=0", - "|action1_sparse raw:=1", - "|action1_sparse raw:=2"]) # noqa: E501 + expected = "\n".join( + [ + "shared |context1_sparse raw:=context1 |context2_sparse raw:=context2 ", + "|a_sparse raw:=0 |b_sparse raw:=0", + "|action1_sparse raw:=1", + "|action1_sparse raw:=2", + ] + ) # noqa: E501 event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context ) @@ -165,11 +183,14 @@ def test_pickbest_textembedder_more_namespaces_w_label_no_emb() -> None: ) named_actions = {"action": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} - expected = "\n".join([ - "shared |context1_sparse raw:=context1 |context2_sparse raw:=context2", - "|a_sparse raw:=0 |b_sparse raw:=0", - "|action_sparse raw:=1", - "|action_sparse raw:=2"]) # noqa: E501 + expected = "\n".join( + [ + "shared |context1_sparse raw:=context1 |context2_sparse raw:=context2", + "|a_sparse raw:=0 |b_sparse raw:=0", + "|action_sparse raw:=1", + "|action_sparse raw:=2", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context, selected=selected @@ -184,11 +205,14 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_no_emb() -> None: ) named_actions = {"action": [{"a": "0", "b": "0"}, "1", "2"]} context = {"context1": "context1", "context2": "context2"} - expected = "\n".join([ - "shared |context1_sparse raw:=context1 |context2_sparse raw:=context2", - "0:-0.0:1.0 |a_sparse raw:=0 |b_sparse raw:=0", - "|action_sparse raw:=1", - "|action_sparse raw:=2"]) # noqa: E501 + expected = "\n".join( + [ + "shared |context1_sparse raw:=context1 |context2_sparse raw:=context2", + "0:-0.0:1.0 |a_sparse raw:=0 |b_sparse raw:=0", + "|action_sparse raw:=1", + "|action_sparse raw:=2", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context, selected=selected @@ -216,11 +240,14 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_emb() -> None "context1": rl_chain.Embed(ctx_str_1), "context2": rl_chain.Embed(ctx_str_2), } - expected = "\n".join([ - f"shared |context1_dense {encoded_ctx_str_1} |context2_dense {encoded_ctx_str_2}", - f"0:-0.0:1.0 |a_dense 0:1.0 1:0.0 |b_dense 0:1.0 1:0.0", - f"|action_dense 0:1.0 1:0.0", - f"|action_dense 0:1.0 1:0.0"]) # noqa: E501 + expected = "\n".join( + [ + f"shared |context1_dense {encoded_ctx_str_1} |context2_dense {encoded_ctx_str_2}", + f"0:-0.0:1.0 |a_dense 0:1.0 1:0.0 |b_dense 0:1.0 1:0.0", + f"|action_dense 0:1.0 1:0.0", + f"|action_dense 0:1.0 1:0.0", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( @@ -253,12 +280,15 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_full_embed_and_kee "context1": rl_chain.EmbedAndKeep(ctx_str_1), "context2": rl_chain.EmbedAndKeep(ctx_str_2), } - expected = "\n".join([ - f"shared |context1_dense {encoded_ctx_str_1} |context2_dense {encoded_ctx_str_2} |context1_sparse raw:={ctx_str_1} |context2_sparse raw:={ctx_str_2}", - f"0:-0.0:1.0 |a_dense 0:1.0 1:0.0 |b_dense 0:1.0 1:0.0 |a_sparse raw:=0 |b_sparse raw:=0", - f"|action_dense 0:1.0 1:0.0 |action_sparse raw:=1", - f"|action_dense 0:1.0 1:0.0 |action_sparse raw:=2"]) # noqa: E501 - + expected = "\n".join( + [ + f"shared |context1_dense {encoded_ctx_str_1} |context2_dense {encoded_ctx_str_2} |context1_sparse raw:={ctx_str_1} |context2_sparse raw:={ctx_str_2}", + f"0:-0.0:1.0 |a_dense 0:1.0 1:0.0 |b_dense 0:1.0 1:0.0 |a_sparse raw:=0 |b_sparse raw:=0", + f"|action_dense 0:1.0 1:0.0 |action_sparse raw:=1", + f"|action_dense 0:1.0 1:0.0 |action_sparse raw:=2", + ] + ) # noqa: E501 + selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context, selected=selected @@ -285,11 +315,14 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emb() -> N } context = {"context1": ctx_str_1, "context2": rl_chain.Embed(ctx_str_2)} - expected = "\n".join([ - f"shared |context2_dense {encoded_ctx_str_2} |context1_sparse raw:={ctx_str_1}", - f"0:-0.0:1.0 |b_dense 0:1.0 1:0.0 |a_sparse raw:=0", - f"|action_sparse raw:=1", - f"|action_dense 0:1.0 1:0.0"]) # noqa: E501 + expected = "\n".join( + [ + f"shared |context2_dense {encoded_ctx_str_2} |context1_sparse raw:={ctx_str_1}", + f"0:-0.0:1.0 |b_dense 0:1.0 1:0.0 |a_sparse raw:=0", + f"|action_sparse raw:=1", + f"|action_dense 0:1.0 1:0.0", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( @@ -320,11 +353,14 @@ def test_pickbest_textembedder_more_namespaces_w_full_label_w_partial_emakeep() ] } context = {"context1": ctx_str_1, "context2": rl_chain.EmbedAndKeep(ctx_str_2)} - expected = "\n".join([ - f"shared |context2_dense {encoded_ctx_str_2} |context1_sparse raw:={ctx_str_1} |context2_sparse raw:={ctx_str_2}", - f"0:-0.0:1.0 |b_dense 0:1.0 1:0.0 |a_sparse raw:=0 |b_sparse raw:=0", - f"|action_sparse raw:=1", - f"|action_dense 0:1.0 1:0.0 |action_sparse raw:=2"]) # noqa: E501 + expected = "\n".join( + [ + f"shared |context2_dense {encoded_ctx_str_2} |context1_sparse raw:={ctx_str_1} |context2_sparse raw:={ctx_str_2}", + f"0:-0.0:1.0 |b_dense 0:1.0 1:0.0 |a_sparse raw:=0 |b_sparse raw:=0", + f"|action_sparse raw:=1", + f"|action_dense 0:1.0 1:0.0 |action_sparse raw:=2", + ] + ) # noqa: E501 selected = pick_best_chain.PickBestSelected(index=0, probability=1.0, score=0.0) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context, selected=selected @@ -348,10 +384,13 @@ def test_raw_features_underscored() -> None: # No embeddings named_actions = {"action": [str1]} context = {"context": ctx_str} - expected_no_embed = "\n".join([ - f"shared |context_sparse raw:={ctx_str_underscored}", - f"|action_sparse raw:={str1_underscored}"]) - + expected_no_embed = "\n".join( + [ + f"shared |context_sparse raw:={ctx_str_underscored}", + f"|action_sparse raw:={str1_underscored}", + ] + ) + event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context ) @@ -361,9 +400,9 @@ def test_raw_features_underscored() -> None: # Just embeddings named_actions = {"action": rl_chain.Embed([str1])} context = {"context": rl_chain.Embed(ctx_str)} - expected_embed = "\n".join([ - f"shared |context_dense {encoded_ctx_str}", - f"|action_dense {encoded_str1}"]) + expected_embed = "\n".join( + [f"shared |context_dense {encoded_ctx_str}", f"|action_dense {encoded_str1}"] + ) event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context ) @@ -373,9 +412,12 @@ def test_raw_features_underscored() -> None: # Embeddings and raw features named_actions = {"action": rl_chain.EmbedAndKeep([str1])} context = {"context": rl_chain.EmbedAndKeep(ctx_str)} - expected_embed_and_keep = "\n".join([ - f"shared |context_dense {encoded_ctx_str} |context_sparse raw:={ctx_str_underscored}", - f"|action_dense {encoded_str1} |action_sparse raw:={str1_underscored}"]) # noqa: E501 + expected_embed_and_keep = "\n".join( + [ + f"shared |context_dense {encoded_ctx_str} |context_sparse raw:={ctx_str_underscored}", + f"|action_dense {encoded_str1} |action_sparse raw:={str1_underscored}", + ] + ) # noqa: E501 event = pick_best_chain.PickBestEvent( inputs={}, to_select_from=named_actions, based_on=context ) diff --git a/tests/unit_tests/test_rl_loop_base_embedder.py b/tests/unit_tests/test_rl_loop_base_embedder.py index c93f2df..544d8c5 100644 --- a/tests/unit_tests/test_rl_loop_base_embedder.py +++ b/tests/unit_tests/test_rl_loop_base_embedder.py @@ -34,11 +34,15 @@ def test_simple_context_str_w_nested_emb() -> None: expected_dense = {"a_namespace": [4.0, 0.0]} expected_sparse = {"a_namespace": {"raw": str1}} - featurized = base.embed(base.EmbedAndKeep(base.Embed(str1)), MockEncoder(), "a_namespace") + featurized = base.embed( + base.EmbedAndKeep(base.Embed(str1)), MockEncoder(), "a_namespace" + ) assert featurized.dense == expected_dense assert featurized.sparse == {} - featurized = base.embed(base.Embed(base.EmbedAndKeep(str1)), MockEncoder(), "a_namespace") + featurized = base.embed( + base.Embed(base.EmbedAndKeep(str1)), MockEncoder(), "a_namespace" + ) assert featurized.sparse == expected_sparse assert featurized.dense == expected_dense @@ -49,6 +53,7 @@ def test_context_w_namespace_no_emb() -> None: assert featurized.sparse == expected_sparse assert featurized.dense == {} + def test_context_w_namespace_w_emb() -> None: str1 = "test" expected_sparse = {"test_namespace": {"raw": str1}} @@ -67,7 +72,7 @@ def test_context_w_namespace_w_emb2() -> None: str1 = "test" expected_sparse = {"test_namespace": {"raw": str1}} expected_dense = {"test_namespace": [4.0, 0.0]} - + featurized = base.embed(base.Embed({"test_namespace": str1}), MockEncoder()) assert featurized.sparse == {} assert featurized.dense == expected_dense @@ -83,16 +88,19 @@ def test_context_w_namespace_w_some_emb() -> None: expected_sparse = {"test_namespace": {"raw": str1}} expected_dense = {"test_namespace2": [5.0, 0.0]} featurized = base.embed( - {"test_namespace": str1, "test_namespace2": base.Embed(str2)}, MockEncoder() - ) + {"test_namespace": str1, "test_namespace2": base.Embed(str2)}, MockEncoder() + ) assert featurized.sparse == expected_sparse assert featurized.dense == expected_dense - expected_sparse = {"test_namespace": {"raw": str1}, "test_namespace2": {"raw": str2}} + expected_sparse = { + "test_namespace": {"raw": str1}, + "test_namespace2": {"raw": str2}, + } featurized = base.embed( - {"test_namespace": str1, "test_namespace2": base.EmbedAndKeep(str2)}, - MockEncoder(), - ) + {"test_namespace": str1, "test_namespace2": base.EmbedAndKeep(str2)}, + MockEncoder(), + ) assert featurized.sparse == expected_sparse assert featurized.dense == expected_dense @@ -104,7 +112,8 @@ def test_simple_action_strlist_no_emb() -> None: expected_sparse = [ {"a_namespace": {"raw": str1}}, {"a_namespace": {"raw": str2}}, - {"a_namespace": {"raw": str3}}] + {"a_namespace": {"raw": str3}}, + ] to_embed: List[Union[str, base._Embed]] = [str1, str2, str3] featurized = base.embed(to_embed, MockEncoder(), "a_namespace") @@ -121,18 +130,24 @@ def test_simple_action_strlist_w_emb() -> None: expected_sparse = [ {"a_namespace": {"raw": str1}}, {"a_namespace": {"raw": str2}}, - {"a_namespace": {"raw": str3}}] + {"a_namespace": {"raw": str3}}, + ] expected_dense = [ {"a_namespace": [4.0, 0.0]}, {"a_namespace": [5.0, 0.0]}, - {"a_namespace": [6.0, 0.0]}] - - featurized = base.embed(base.Embed([str1, str2, str3]), MockEncoder(), "a_namespace") + {"a_namespace": [6.0, 0.0]}, + ] + + featurized = base.embed( + base.Embed([str1, str2, str3]), MockEncoder(), "a_namespace" + ) for i in range(len(featurized)): assert featurized[i].sparse == {} assert featurized[i].dense == expected_dense[i] - featurized = base.embed(base.EmbedAndKeep([str1, str2, str3]), MockEncoder(), "a_namespace") + featurized = base.embed( + base.EmbedAndKeep([str1, str2, str3]), MockEncoder(), "a_namespace" + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] @@ -143,24 +158,25 @@ def test_simple_action_strlist_w_some_emb() -> None: str2 = "test_" str3 = "test__" - expected_sparse = [ - {"a_namespace": {"raw": str1}}, - {}, - {}] - expected_dense = [ - {}, - {"a_namespace": [5.0, 0.0]}, - {"a_namespace": [6.0, 0.0]}] - featurized = base.embed([str1, base.Embed(str2), base.Embed(str3)], MockEncoder(), "a_namespace") + expected_sparse = [{"a_namespace": {"raw": str1}}, {}, {}] + expected_dense = [{}, {"a_namespace": [5.0, 0.0]}, {"a_namespace": [6.0, 0.0]}] + featurized = base.embed( + [str1, base.Embed(str2), base.Embed(str3)], MockEncoder(), "a_namespace" + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] - featurized = base.embed([str1, base.EmbedAndKeep(str2), base.EmbedAndKeep(str3)], MockEncoder(), "a_namespace") + featurized = base.embed( + [str1, base.EmbedAndKeep(str2), base.EmbedAndKeep(str3)], + MockEncoder(), + "a_namespace", + ) expected_sparse = [ {"a_namespace": {"raw": str1}}, {"a_namespace": {"raw": str2}}, - {"a_namespace": {"raw": str3}}] + {"a_namespace": {"raw": str3}}, + ] for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] @@ -177,13 +193,13 @@ def test_action_w_namespace_no_emb() -> None: ] featurized = base.embed( - [ - {"test_namespace": str1}, - {"test_namespace": str2}, - {"test_namespace": str3}, - ], - MockEncoder(), - ) + [ + {"test_namespace": str1}, + {"test_namespace": str2}, + {"test_namespace": str3}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == {} @@ -201,34 +217,34 @@ def test_action_w_namespace_w_emb() -> None: expected_dense = [ {"test_namespace": [4.0, 0.0]}, {"test_namespace": [5.0, 0.0]}, - {"test_namespace": [6.0, 0.0]}] + {"test_namespace": [6.0, 0.0]}, + ] featurized = base.embed( - [ - {"test_namespace": base.Embed(str1)}, - {"test_namespace": base.Embed(str2)}, - {"test_namespace": base.Embed(str3)}, - ], - MockEncoder(), - ) + [ + {"test_namespace": base.Embed(str1)}, + {"test_namespace": base.Embed(str2)}, + {"test_namespace": base.Embed(str3)}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == {} assert featurized[i].dense == expected_dense[i] featurized = base.embed( - [ - {"test_namespace": base.EmbedAndKeep(str1)}, - {"test_namespace": base.EmbedAndKeep(str2)}, - {"test_namespace": base.EmbedAndKeep(str3)}, - ], - MockEncoder(), - ) + [ + {"test_namespace": base.EmbedAndKeep(str1)}, + {"test_namespace": base.EmbedAndKeep(str2)}, + {"test_namespace": base.EmbedAndKeep(str3)}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] - def test_action_w_namespace_w_emb2() -> None: str1 = "test" str2 = "test_" @@ -241,32 +257,33 @@ def test_action_w_namespace_w_emb2() -> None: expected_dense = [ {"test_namespace1": [4.0, 0.0]}, {"test_namespace2": [5.0, 0.0]}, - {"test_namespace3": [6.0, 0.0]}] - + {"test_namespace3": [6.0, 0.0]}, + ] + featurized = base.embed( - base.Embed( - [ - {"test_namespace1": str1}, - {"test_namespace2": str2}, - {"test_namespace3": str3}, - ] - ), - MockEncoder(), - ) + base.Embed( + [ + {"test_namespace1": str1}, + {"test_namespace2": str2}, + {"test_namespace3": str3}, + ] + ), + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == {} assert featurized[i].dense == expected_dense[i] featurized = base.embed( - base.EmbedAndKeep( - [ - {"test_namespace1": str1}, - {"test_namespace2": str2}, - {"test_namespace3": str3}, - ] - ), - MockEncoder(), - ) + base.EmbedAndKeep( + [ + {"test_namespace1": str1}, + {"test_namespace2": str2}, + {"test_namespace3": str3}, + ] + ), + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] @@ -284,33 +301,34 @@ def test_action_w_namespace_w_some_emb() -> None: expected_dense = [ {}, {"test_namespace": [5.0, 0.0]}, - {"test_namespace": [6.0, 0.0]}] - + {"test_namespace": [6.0, 0.0]}, + ] + featurized = base.embed( - [ - {"test_namespace": str1}, - {"test_namespace": base.Embed(str2)}, - {"test_namespace": base.Embed(str3)}, - ], - MockEncoder(), - ) + [ + {"test_namespace": str1}, + {"test_namespace": base.Embed(str2)}, + {"test_namespace": base.Embed(str3)}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] - + expected_sparse = [ {"test_namespace": {"raw": str1}}, {"test_namespace": {"raw": str2}}, {"test_namespace": {"raw": str3}}, ] featurized = base.embed( - [ - {"test_namespace": str1}, - {"test_namespace": base.EmbedAndKeep(str2)}, - {"test_namespace": base.EmbedAndKeep(str3)}, - ], - MockEncoder(), - ) + [ + {"test_namespace": str1}, + {"test_namespace": base.EmbedAndKeep(str2)}, + {"test_namespace": base.EmbedAndKeep(str3)}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] @@ -328,16 +346,17 @@ def test_action_w_namespace_w_emb_w_more_than_one_item_in_first_dict() -> None: expected_dense = [ {"test_namespace": [4.0, 0.0]}, {"test_namespace": [5.0, 0.0]}, - {"test_namespace": [6.0, 0.0]}] - + {"test_namespace": [6.0, 0.0]}, + ] + featurized = base.embed( - [ - {"test_namespace": base.Embed(str1), "test_namespace2": str1}, - {"test_namespace": base.Embed(str2), "test_namespace2": str2}, - {"test_namespace": base.Embed(str3), "test_namespace2": str3}, - ], - MockEncoder(), - ) + [ + {"test_namespace": base.Embed(str1), "test_namespace2": str1}, + {"test_namespace": base.Embed(str2), "test_namespace2": str2}, + {"test_namespace": base.Embed(str3), "test_namespace2": str3}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] @@ -348,13 +367,13 @@ def test_action_w_namespace_w_emb_w_more_than_one_item_in_first_dict() -> None: {"test_namespace": {"raw": str3}, "test_namespace2": {"raw": str3}}, ] featurized = base.embed( - [ - {"test_namespace": base.EmbedAndKeep(str1), "test_namespace2": str1}, - {"test_namespace": base.EmbedAndKeep(str2), "test_namespace2": str2}, - {"test_namespace": base.EmbedAndKeep(str3), "test_namespace2": str3}, - ], - MockEncoder(), - ) + [ + {"test_namespace": base.EmbedAndKeep(str1), "test_namespace2": str1}, + {"test_namespace": base.EmbedAndKeep(str2), "test_namespace2": str2}, + {"test_namespace": base.EmbedAndKeep(str3), "test_namespace2": str3}, + ], + MockEncoder(), + ) for i in range(len(featurized)): assert featurized[i].sparse == expected_sparse[i] assert featurized[i].dense == expected_dense[i] @@ -365,7 +384,8 @@ def test_one_namespace_w_list_of_features_no_emb() -> None: str2 = "test2" expected_sparse = { "test_namespace_0": {"raw": str1}, - "test_namespace_1": {"raw": str2}} + "test_namespace_1": {"raw": str2}, + } featurized = base.embed({"test_namespace": [str1, str2]}, MockEncoder()) assert featurized.sparse == expected_sparse