diff --git a/contract-tests/client_entity.py b/contract-tests/client_entity.py index 4b5a717..9256409 100644 --- a/contract-tests/client_entity.py +++ b/contract-tests/client_entity.py @@ -50,6 +50,7 @@ def __init__(self, tag, config): opts["all_attributes_private"] = events.get("allAttributesPrivate", False) opts["private_attributes"] = events.get("globalPrivateAttributes", {}) _set_optional_time_prop(events, "flushIntervalMs", opts, "flush_interval") + opts["omit_anonymous_contexts"] = events.get("omitAnonymousContexts", False) else: opts["send_events"] = False diff --git a/contract-tests/service.py b/contract-tests/service.py index 1a069b7..888dfaf 100644 --- a/contract-tests/service.py +++ b/contract-tests/service.py @@ -76,7 +76,8 @@ def status(): 'polling-gzip', 'inline-context', 'anonymous-redaction', - 'evaluation-hooks' + 'evaluation-hooks', + 'omit-anonymous-contexts' ] } return (json.dumps(body), 200, {'Content-type': 'application/json'}) diff --git a/ldclient/config.py b/ldclient/config.py index 0c06e1d..c5c75a7 100644 --- a/ldclient/config.py +++ b/ldclient/config.py @@ -176,7 +176,8 @@ def __init__(self, big_segments: Optional[BigSegmentsConfig]=None, application: Optional[dict]=None, hooks: Optional[List[Hook]]=None, - enable_event_compression: bool=False): + enable_event_compression: bool=False, + omit_anonymous_contexts: bool=False): """ :param sdk_key: The SDK key for your LaunchDarkly account. This is always required. :param base_uri: The base URL for the LaunchDarkly server. Most users should use the default @@ -243,6 +244,7 @@ def __init__(self, :param application: Optional properties for setting application metadata. See :py:attr:`~application` :param hooks: Hooks provide entrypoints which allow for observation of SDK functions. :param enable_event_compression: Whether or not to enable GZIP compression for outgoing events. + :param omit_anonymous_contexts: Sets whether anonymous contexts should be omitted from index and identify events. """ self.__sdk_key = sdk_key @@ -277,6 +279,7 @@ def __init__(self, self.__application = validate_application_info(application or {}, log) self.__hooks = [hook for hook in hooks if isinstance(hook, Hook)] if hooks else [] self.__enable_event_compression = enable_event_compression + self.__omit_anonymous_contexts = omit_anonymous_contexts self._data_source_update_sink: Optional[DataSourceUpdateSink] = None def copy_with_new_sdk_key(self, new_sdk_key: str) -> 'Config': @@ -466,6 +469,13 @@ def hooks(self) -> List[Hook]: def enable_event_compression(self) -> bool: return self.__enable_event_compression + @property + def omit_anonymous_contexts(self) -> bool: + """ + Determines whether or not anonymous contexts will be omitted from index and identify events. + """ + return self.__omit_anonymous_contexts + @property def data_source_update_sink(self) -> Optional[DataSourceUpdateSink]: """ diff --git a/ldclient/context.py b/ldclient/context.py index 5add964..1e0a708 100644 --- a/ldclient/context.py +++ b/ldclient/context.py @@ -381,6 +381,28 @@ def anonymous(self) -> bool: """ return self.__anonymous + def without_anonymous_contexts(self) -> Context: + """ + For a multi-kind context: + + A multi-kind context is made up of two or more single-kind contexts. + This method will first discard any single-kind contexts which are + anonymous. It will then create a new multi-kind context from the + remaining single-kind contexts. This may result in an invalid context + (e.g. all single-kind contexts are anonymous). + + For a single-kind context: + + If the context is not anonymous, this method will return the current + context as is and unmodified. + + If the context is anonymous, this method will return an invalid context. + """ + contexts = self.__multi if self.__multi is not None else [self] + contexts = [c for c in contexts if not c.anonymous] + + return Context.create_multi(*contexts) + def get(self, attribute: str) -> Any: """ Looks up the value of any attribute of the context by name. diff --git a/ldclient/impl/events/event_processor.py b/ldclient/impl/events/event_processor.py index 7549027..216bf7d 100644 --- a/ldclient/impl/events/event_processor.py +++ b/ldclient/impl/events/event_processor.py @@ -7,14 +7,13 @@ from email.utils import parsedate import json from threading import Event, Lock, Thread -from typing import Any, List, Optional, Dict +from typing import Any, List, Optional, Dict, Callable import time import uuid import queue import urllib3 import gzip from ldclient.config import Config -from datetime import timedelta from random import Random from ldclient.context import Context @@ -341,6 +340,7 @@ def __init__(self, inbox, config, http_client, diagnostic_accumulator=None): self._deduplicated_contexts = 0 self._diagnostic_accumulator = None if config.diagnostic_opt_out else diagnostic_accumulator self._sampler = Sampler(Random()) + self._omit_anonymous_contexts = config.omit_anonymous_contexts self._flush_workers = FixedThreadPool(__MAX_FLUSH_THREADS__, "ldclient.flush") self._diagnostic_flush_workers = None if self._diagnostic_accumulator is None else FixedThreadPool(1, "ldclient.diag_flush") @@ -387,7 +387,6 @@ def _process_event(self, event: EventInput): # Decide whether to add the event to the payload. Feature events may be added twice, once for # the event (if tracked) and once for debugging. context = None # type: Optional[Context] - can_add_index = True full_event = None # type: Any debug_event = None # type: Optional[DebugEvent] sampling_ratio = 1 if event.sampling_ratio is None else event.sampling_ratio @@ -401,24 +400,21 @@ def _process_event(self, event: EventInput): if self._should_debug_event(event): debug_event = DebugEvent(event) elif isinstance(event, EventInputIdentify): - context = event.context + if self._omit_anonymous_contexts: + context = event.context.without_anonymous_contexts() + if not context.valid: + return + + event = EventInputIdentify(event.timestamp, context, event.sampling_ratio) + full_event = event - can_add_index = False # an index event would be redundant if there's an identify event elif isinstance(event, EventInputCustom): context = event.context full_event = event elif isinstance(event, MigrationOpEvent): full_event = event - # For each context we haven't seen before, we add an index event - unless this is already - # an identify event. - if context is not None: - already_seen = self._context_keys.put(context.fully_qualified_key, True) - if can_add_index: - if already_seen: - self._deduplicated_contexts += 1 - else: - self._outbox.add_event(IndexEvent(event.timestamp, context)) + self._get_indexable_context(event, lambda c: self._outbox.add_event(IndexEvent(event.timestamp, c))) if full_event and self._sampler.sample(sampling_ratio): self._outbox.add_event(full_event) @@ -426,6 +422,28 @@ def _process_event(self, event: EventInput): if debug_event and self._sampler.sample(sampling_ratio): self._outbox.add_event(debug_event) + def _get_indexable_context(self, event: EventInput, block: Callable[[Context], None]): + if event.context is None: + return + + context = event.context + if self._omit_anonymous_contexts: + context = context.without_anonymous_contexts() + + if not context.valid: + return + + already_seen = self._context_keys.put(context.fully_qualified_key, True) + if already_seen: + self._deduplicated_contexts += 1 + return + elif isinstance(event, EventInputIdentify) or isinstance(event, MigrationOpEvent): + return + + block(context) + + + def _should_debug_event(self, event: EventInputEvaluation): if event.flag is None: return False diff --git a/ldclient/testing/impl/events/test_event_processor.py b/ldclient/testing/impl/events/test_event_processor.py index e8893a5..abcc884 100644 --- a/ldclient/testing/impl/events/test_event_processor.py +++ b/ldclient/testing/impl/events/test_event_processor.py @@ -236,6 +236,37 @@ def test_context_is_filtered_in_identify_event(): assert len(output) == 1 check_identify_event(output[0], e, formatter.format_context(context)) + +def test_omit_anonymous_contexts_suppresses_identify_event(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + anon_context = Context.builder('userkey').name('Red').anonymous(True).build() + e = EventInputIdentify(timestamp, anon_context) + ep.send_event(e) + + try: + flush_and_get_events(ep) + pytest.fail("Expected no events") + except AssertionError: + pass + + +def test_omit_anonymous_contexts_strips_anonymous_contexts_correctly(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + a = Context.builder('a').kind('a').anonymous(True).build() + b = Context.builder('b').kind('b').anonymous(True).build() + c = Context.builder('c').kind('c').anonymous(False).build() + mc = Context.multi_builder().add(a).add(b).add(c).build() + + e = EventInputIdentify(timestamp, mc) + ep.send_event(e) + + output = flush_and_get_events(ep) + assert len(output) == 1 + + formatter = EventContextFormatter(True, []) + check_identify_event(output[0], e, formatter.format_context(c)) + + def test_individual_feature_event_is_queued_with_index_event(): with DefaultTestProcessor() as ep: e = EventInputEvaluation(timestamp, context, flag.key, flag, 1, 'value', None, 'default', None, True) @@ -248,6 +279,34 @@ def test_individual_feature_event_is_queued_with_index_event(): check_summary_event(output[2]) +def test_omit_anonymous_context_emits_feature_event_without_index(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + anon = Context.builder('a').anonymous(True).build() + e = EventInputEvaluation(timestamp, anon, flag.key, flag, 1, 'value', None, 'default', None, True) + ep.send_event(e) + + output = flush_and_get_events(ep) + assert len(output) == 2 + check_feature_event(output[0], e) + check_summary_event(output[1]) + + +def test_omit_anonymous_context_strips_anonymous_from_index_event(): + with DefaultTestProcessor(omit_anonymous_contexts=True) as ep: + a = Context.builder('a').kind('a').anonymous(True).build() + b = Context.builder('b').kind('b').anonymous(True).build() + c = Context.builder('c').kind('c').anonymous(False).build() + mc = Context.multi_builder().add(a).add(b).add(c).build() + e = EventInputEvaluation(timestamp, mc, flag.key, flag, 1, 'value', None, 'default', None, True) + ep.send_event(e) + + output = flush_and_get_events(ep) + assert len(output) == 3 + check_index_event(output[0], e, c.to_dict()) # Should only contain non-anon context + check_feature_event(output[1], e) + check_summary_event(output[2]) + + def test_individual_feature_event_is_ignored_for_0_sampling_ratio(): with DefaultTestProcessor() as ep: e = EventInputEvaluation(timestamp, context, flag_with_0_sampling_ratio.key, flag_with_0_sampling_ratio, 1, 'value', None, 'default', None, True) diff --git a/ldclient/testing/test_context.py b/ldclient/testing/test_context.py index e4806c1..1fe0fb7 100644 --- a/ldclient/testing/test_context.py +++ b/ldclient/testing/test_context.py @@ -287,3 +287,39 @@ def test_multi_builder_with_invalid_context(self): c1 = Context.create('a', 'kind1') c2 = Context.create('') assert_context_invalid(Context.multi_builder().add(c1).add(c2).build()) + + +class TestAnonymousRedaction: + def test_redacting_anonoymous_leads_to_invalid_context(self): + original = Context.builder('a').anonymous(True).build() + c = original.without_anonymous_contexts() + + assert_context_invalid(c) + + def test_redacting_non_anonymous_does_not_change_context(self): + original = Context.builder('a').anonymous(False).build() + c = original.without_anonymous_contexts() + + assert_context_valid(c) + assert c == original + + def test_can_find_non_anonymous_contexts_from_multi(self): + anon = Context.builder('a').anonymous(True).build() + nonanon = Context.create('b', 'kind2') + mc = Context.create_multi(anon, nonanon) + + filtered = mc.without_anonymous_contexts() + + assert_context_valid(filtered) + assert filtered.individual_context_count == 1 + assert filtered.key == 'b' + assert filtered.kind == 'kind2' + + def test_can_filter_all_from_multi(self): + a = Context.builder('a').anonymous(True).build() + b = Context.builder('b').anonymous(True).build() + mc = Context.create_multi(a, b) + + filtered = mc.without_anonymous_contexts() + + assert_context_invalid(filtered)