diff --git a/contract-tests/client_entity.py b/contract-tests/client_entity.py index 4b5a717..9256409 100644 --- a/contract-tests/client_entity.py +++ b/contract-tests/client_entity.py @@ -50,6 +50,7 @@ def __init__(self, tag, config): opts["all_attributes_private"] = events.get("allAttributesPrivate", False) opts["private_attributes"] = events.get("globalPrivateAttributes", {}) _set_optional_time_prop(events, "flushIntervalMs", opts, "flush_interval") + opts["omit_anonymous_contexts"] = events.get("omitAnonymousContexts", False) else: opts["send_events"] = False diff --git a/contract-tests/service.py b/contract-tests/service.py index 1a069b7..888dfaf 100644 --- a/contract-tests/service.py +++ b/contract-tests/service.py @@ -76,7 +76,8 @@ def status(): 'polling-gzip', 'inline-context', 'anonymous-redaction', - 'evaluation-hooks' + 'evaluation-hooks', + 'omit-anonymous-contexts' ] } return (json.dumps(body), 200, {'Content-type': 'application/json'}) diff --git a/ldclient/config.py b/ldclient/config.py index 0c06e1d..c5c75a7 100644 --- a/ldclient/config.py +++ b/ldclient/config.py @@ -176,7 +176,8 @@ def __init__(self, big_segments: Optional[BigSegmentsConfig]=None, application: Optional[dict]=None, hooks: Optional[List[Hook]]=None, - enable_event_compression: bool=False): + enable_event_compression: bool=False, + omit_anonymous_contexts: bool=False): """ :param sdk_key: The SDK key for your LaunchDarkly account. This is always required. :param base_uri: The base URL for the LaunchDarkly server. Most users should use the default @@ -243,6 +244,7 @@ def __init__(self, :param application: Optional properties for setting application metadata. See :py:attr:`~application` :param hooks: Hooks provide entrypoints which allow for observation of SDK functions. :param enable_event_compression: Whether or not to enable GZIP compression for outgoing events. + :param omit_anonymous_contexts: Sets whether anonymous contexts should be omitted from index and identify events. """ self.__sdk_key = sdk_key @@ -277,6 +279,7 @@ def __init__(self, self.__application = validate_application_info(application or {}, log) self.__hooks = [hook for hook in hooks if isinstance(hook, Hook)] if hooks else [] self.__enable_event_compression = enable_event_compression + self.__omit_anonymous_contexts = omit_anonymous_contexts self._data_source_update_sink: Optional[DataSourceUpdateSink] = None def copy_with_new_sdk_key(self, new_sdk_key: str) -> 'Config': @@ -466,6 +469,13 @@ def hooks(self) -> List[Hook]: def enable_event_compression(self) -> bool: return self.__enable_event_compression + @property + def omit_anonymous_contexts(self) -> bool: + """ + Determines whether or not anonymous contexts will be omitted from index and identify events. + """ + return self.__omit_anonymous_contexts + @property def data_source_update_sink(self) -> Optional[DataSourceUpdateSink]: """ diff --git a/ldclient/context.py b/ldclient/context.py index 5add964..1e0a708 100644 --- a/ldclient/context.py +++ b/ldclient/context.py @@ -381,6 +381,28 @@ def anonymous(self) -> bool: """ return self.__anonymous + def without_anonymous_contexts(self) -> Context: + """ + For a multi-kind context: + + A multi-kind context is made up of two or more single-kind contexts. + This method will first discard any single-kind contexts which are + anonymous. It will then create a new multi-kind context from the + remaining single-kind contexts. This may result in an invalid context + (e.g. all single-kind contexts are anonymous). + + For a single-kind context: + + If the context is not anonymous, this method will return the current + context as is and unmodified. + + If the context is anonymous, this method will return an invalid context. + """ + contexts = self.__multi if self.__multi is not None else [self] + contexts = [c for c in contexts if not c.anonymous] + + return Context.create_multi(*contexts) + def get(self, attribute: str) -> Any: """ Looks up the value of any attribute of the context by name. diff --git a/ldclient/impl/events/event_processor.py b/ldclient/impl/events/event_processor.py index 7549027..216bf7d 100644 --- a/ldclient/impl/events/event_processor.py +++ b/ldclient/impl/events/event_processor.py @@ -7,14 +7,13 @@ from email.utils import parsedate import json from threading import Event, Lock, Thread -from typing import Any, List, Optional, Dict +from typing import Any, List, Optional, Dict, Callable import time import uuid import queue import urllib3 import gzip from ldclient.config import Config -from datetime import timedelta from random import Random from ldclient.context import Context @@ -341,6 +340,7 @@ def __init__(self, inbox, config, http_client, diagnostic_accumulator=None): self._deduplicated_contexts = 0 self._diagnostic_accumulator = None if config.diagnostic_opt_out else diagnostic_accumulator self._sampler = Sampler(Random()) + self._omit_anonymous_contexts = config.omit_anonymous_contexts self._flush_workers = FixedThreadPool(__MAX_FLUSH_THREADS__, "ldclient.flush") self._diagnostic_flush_workers = None if self._diagnostic_accumulator is None else FixedThreadPool(1, "ldclient.diag_flush") @@ -387,7 +387,6 @@ def _process_event(self, event: EventInput): # Decide whether to add the event to the payload. Feature events may be added twice, once for # the event (if tracked) and once for debugging. context = None # type: Optional[Context] - can_add_index = True full_event = None # type: Any debug_event = None # type: Optional[DebugEvent] sampling_ratio = 1 if event.sampling_ratio is None else event.sampling_ratio @@ -401,24 +400,21 @@ def _process_event(self, event: EventInput): if self._should_debug_event(event): debug_event = DebugEvent(event) elif isinstance(event, EventInputIdentify): - context = event.context + if self._omit_anonymous_contexts: + context = event.context.without_anonymous_contexts() + if not context.valid: + return + + event = EventInputIdentify(event.timestamp, context, event.sampling_ratio) + full_event = event - can_add_index = False # an index event would be redundant if there's an identify event elif isinstance(event, EventInputCustom): context = event.context full_event = event elif isinstance(event, MigrationOpEvent): full_event = event - # For each context we haven't seen before, we add an index event - unless this is already - # an identify event. - if context is not None: - already_seen = self._context_keys.put(context.fully_qualified_key, True) - if can_add_index: - if already_seen: - self._deduplicated_contexts += 1 - else: - self._outbox.add_event(IndexEvent(event.timestamp, context)) + self._get_indexable_context(event, lambda c: self._outbox.add_event(IndexEvent(event.timestamp, c))) if full_event and self._sampler.sample(sampling_ratio): self._outbox.add_event(full_event) @@ -426,6 +422,28 @@ def _process_event(self, event: EventInput): if debug_event and self._sampler.sample(sampling_ratio): self._outbox.add_event(debug_event) + def _get_indexable_context(self, event: EventInput, block: Callable[[Context], None]): + if event.context is None: + return + + context = event.context + if self._omit_anonymous_contexts: + context = context.without_anonymous_contexts() + + if not context.valid: + return + + already_seen = self._context_keys.put(context.fully_qualified_key, True) + if already_seen: + self._deduplicated_contexts += 1 + return + elif isinstance(event, EventInputIdentify) or isinstance(event, MigrationOpEvent): + return + + block(context) + + + def _should_debug_event(self, event: EventInputEvaluation): if event.flag is None: return False diff --git a/ldclient/testing/impl/events/test_event_processor.py b/ldclient/testing/impl/events/test_event_processor.py index e8893a5..abcc884 100644 --- a/ldclient/testing/impl/events/test_event_processor.py +++ b/ldclient/testing/impl/events/test_event_processor.py @@ -236,6 +236,37 @@ def test_context_is_filtered_in_identify_event(): assert len(output) == 1 check_identify_event(output[0], e, formatter.format_context(context)) + +def test_omit_anonymous_contexts_suppresses_identify_event(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + anon_context = Context.builder('userkey').name('Red').anonymous(True).build() + e = EventInputIdentify(timestamp, anon_context) + ep.send_event(e) + + try: + flush_and_get_events(ep) + pytest.fail("Expected no events") + except AssertionError: + pass + + +def test_omit_anonymous_contexts_strips_anonymous_contexts_correctly(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + a = Context.builder('a').kind('a').anonymous(True).build() + b = Context.builder('b').kind('b').anonymous(True).build() + c = Context.builder('c').kind('c').anonymous(False).build() + mc = Context.multi_builder().add(a).add(b).add(c).build() + + e = EventInputIdentify(timestamp, mc) + ep.send_event(e) + + output = flush_and_get_events(ep) + assert len(output) == 1 + + formatter = EventContextFormatter(True, []) + check_identify_event(output[0], e, formatter.format_context(c)) + + def test_individual_feature_event_is_queued_with_index_event(): with DefaultTestProcessor() as ep: e = EventInputEvaluation(timestamp, context, flag.key, flag, 1, 'value', None, 'default', None, True) @@ -248,6 +279,34 @@ def test_individual_feature_event_is_queued_with_index_event(): check_summary_event(output[2]) +def test_omit_anonymous_context_emits_feature_event_without_index(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + anon = Context.builder('a').anonymous(True).build() + e = EventInputEvaluation(timestamp, anon, flag.key, flag, 1, 'value', None, 'default', None, True) + ep.send_event(e) + + output = flush_and_get_events(ep) + assert len(output) == 2 + check_feature_event(output[0], e) + check_summary_event(output[1]) + + +def test_omit_anonymous_context_strips_anonymous_from_index_event(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + a = Context.builder('a').kind('a').anonymous(True).build() + b = Context.builder('b').kind('b').anonymous(True).build() + c = Context.builder('c').kind('c').anonymous(False).build() + mc = Context.multi_builder().add(a).add(b).add(c).build() + e = EventInputEvaluation(timestamp, mc, flag.key, flag, 1, 'value', None, 'default', None, True) + ep.send_event(e) + + output = flush_and_get_events(ep) + assert len(output) == 3 + check_index_event(output[0], e, c.to_dict()) # Should only contain non-anon context + check_feature_event(output[1], e) + check_summary_event(output[2]) + + def test_individual_feature_event_is_ignored_for_0_sampling_ratio(): with DefaultTestProcessor() as ep: e = EventInputEvaluation(timestamp, context, flag_with_0_sampling_ratio.key, flag_with_0_sampling_ratio, 1, 'value', None, 'default', None, True) diff --git a/ldclient/testing/test_context.py b/ldclient/testing/test_context.py index 37baa4c..1fe0fb7 100644 --- a/ldclient/testing/test_context.py +++ b/ldclient/testing/test_context.py @@ -110,22 +110,22 @@ def test_get_built_in_attribute_by_name(self): assert c.get('kind') == 'b' assert c.get('name') == 'c' assert c.get('anonymous') is True - + def test_get_unknown_attribute(self): c = Context.create('a') assert c.get('b') is None - + def test_private_attributes(self): assert list(Context.create('a').private_attributes) == [] c = Context.builder('a').private('b', '/c/d').private('e').build() assert list(c.private_attributes) == ['b', '/c/d', 'e'] - + def test_fully_qualified_key(self): assert Context.create('key1').fully_qualified_key == 'key1' assert Context.create('key1', 'kind1').fully_qualified_key == 'kind1:key1' assert Context.create('key%with:things', 'kind1').fully_qualified_key == 'kind1:key%25with%3Athings' - + def test_builder_from_context(self): c1 = Context.builder('a').kind('kind1').name('b').set('c', True).private('d').build() b = Context.builder_from_context(c1) @@ -167,7 +167,7 @@ def _assert_contexts_from_factory_equal(fn): Context.create_multi(Context.create('a', 'kind1'), Context.create('b', 'kind2')) assert Context.create_multi(Context.create('a', 'kind1'), Context.create('b', 'kind2')) != \ Context.create('a', 'kind1') - + _assert_contexts_from_factory_equal(lambda: Context.create('invalid', 'kind')) assert Context.create('invalid', 'kind') != Context.create_multi() # different errors @@ -195,10 +195,10 @@ def test_json_decoding(self): Context.builder('key1').kind('kind1').anonymous(True).build() assert Context.from_dict({'kind': 'kind1', 'key': 'key1', '_meta': {'privateAttributes': ['b']}}) == \ Context.builder('key1').kind('kind1').private('b').build() - + assert Context.from_dict({'kind': 'multi', 'kind1': {'key': 'key1'}, 'kind2': {'key': 'key2'}}) == \ Context.create_multi(Context.create('key1', 'kind1'), Context.create('key2', 'kind2')) - + assert_context_invalid(Context.from_dict({'kind': 'kind1'})) assert_context_invalid(Context.from_dict({'kind': 'kind1', 'key': 3})) assert_context_invalid(Context.from_dict({'kind': 'multi'})) @@ -256,15 +256,15 @@ class TestContextErrors: def test_key_empty_string(self): assert_context_invalid(Context.create('')) assert_context_invalid(Context.builder('').build()) - + @pytest.mark.parametrize('kind', ['kind', 'multi', 'b$c', '']) def test_kind_invalid_strings(self, kind): assert_context_invalid(Context.create('a', kind)) assert_context_invalid(Context.builder('a').kind(kind).build()) - + def test_create_multi_with_no_contexts(self): assert_context_invalid(Context.create_multi()) - + def test_multi_builder_with_no_contexts(self): assert_context_invalid(Context.multi_builder().build()) @@ -272,18 +272,54 @@ def test_create_multi_with_duplicate_kind(self): c1 = Context.create('a', 'kind1') c2 = Context.create('b', 'kind1') assert_context_invalid(Context.create_multi(c1, c2)) - + def test_multi_builder_with_duplicate_kind(self): c1 = Context.create('a', 'kind1') c2 = Context.create('b', 'kind1') assert_context_invalid(Context.multi_builder().add(c1).add(c2).build()) - + def test_create_multi_with_invalid_context(self): c1 = Context.create('a', 'kind1') c2 = Context.create('') assert_context_invalid(Context.create_multi(c1, c2)) - + def test_multi_builder_with_invalid_context(self): c1 = Context.create('a', 'kind1') c2 = Context.create('') assert_context_invalid(Context.multi_builder().add(c1).add(c2).build()) + + +class TestAnonymousRedaction: + def test_redacting_anonoymous_leads_to_invalid_context(self): + original = Context.builder('a').anonymous(True).build() + c = original.without_anonymous_contexts() + + assert_context_invalid(c) + + def test_redacting_non_anonymous_does_not_change_context(self): + original = Context.builder('a').anonymous(False).build() + c = original.without_anonymous_contexts() + + assert_context_valid(c) + assert c == original + + def test_can_find_non_anonymous_contexts_from_multi(self): + anon = Context.builder('a').anonymous(True).build() + nonanon = Context.create('b', 'kind2') + mc = Context.create_multi(anon, nonanon) + + filtered = mc.without_anonymous_contexts() + + assert_context_valid(filtered) + assert filtered.individual_context_count == 1 + assert filtered.key == 'b' + assert filtered.kind == 'kind2' + + def test_can_filter_all_from_multi(self): + a = Context.builder('a').anonymous(True).build() + b = Context.builder('b').anonymous(True).build() + mc = Context.create_multi(a, b) + + filtered = mc.without_anonymous_contexts() + + assert_context_invalid(filtered)