feat: Add option to omit anonymous users from index and identify events

launchdarkly · Aug 20, 2024 · 346afaa · 346afaa
1 parent 5bfdde6
commit 346afaa
Show file tree

Hide file tree

Showing 7 changed files with 163 additions and 16 deletions.
diff --git a/contract-tests/client_entity.py b/contract-tests/client_entity.py
@@ -50,6 +50,7 @@ def __init__(self, tag, config):
  opts["all_attributes_private"] = events.get("allAttributesPrivate", False)
  opts["private_attributes"] = events.get("globalPrivateAttributes", {})
  _set_optional_time_prop(events, "flushIntervalMs", opts, "flush_interval")
+ opts["omit_anonymous_contexts"] = events.get("omitAnonymousContexts", False)
  else:
  opts["send_events"] = False
 

diff --git a/contract-tests/service.py b/contract-tests/service.py
@@ -76,7 +76,8 @@ def status():
  'polling-gzip',
  'inline-context',
  'anonymous-redaction',
- 'evaluation-hooks'
+ 'evaluation-hooks',
+ 'omit-anonymous-contexts'
  ]
  }
  return (json.dumps(body), 200, {'Content-type': 'application/json'})

diff --git a/ldclient/config.py b/ldclient/config.py
@@ -176,7 +176,8 @@ def __init__(self,
  big_segments: Optional[BigSegmentsConfig]=None,
  application: Optional[dict]=None,
  hooks: Optional[List[Hook]]=None,
- enable_event_compression: bool=False):
+ enable_event_compression: bool=False,
+ omit_anonymous_contexts: bool=False):
  """
  :param sdk_key: The SDK key for your LaunchDarkly account. This is always required.
  :param base_uri: The base URL for the LaunchDarkly server. Most users should use the default
@@ -243,6 +244,7 @@ def __init__(self,
  :param application: Optional properties for setting application metadata. See :py:attr:`~application`
  :param hooks: Hooks provide entrypoints which allow for observation of SDK functions.
  :param enable_event_compression: Whether or not to enable GZIP compression for outgoing events.
+ :param omit_anonymous_contexts: Sets whether anonymous contexts should be omitted from index and identify events.
  """
  self.__sdk_key = sdk_key
 
@@ -277,6 +279,7 @@ def __init__(self,
  self.__application = validate_application_info(application or {}, log)
  self.__hooks = [hook for hook in hooks if isinstance(hook, Hook)] if hooks else []
  self.__enable_event_compression = enable_event_compression
+ self.__omit_anonymous_contexts = omit_anonymous_contexts
  self._data_source_update_sink: Optional[DataSourceUpdateSink] = None
 
  def copy_with_new_sdk_key(self, new_sdk_key: str) -> 'Config':
@@ -466,6 +469,13 @@ def hooks(self) -> List[Hook]:
  def enable_event_compression(self) -> bool:
  return self.__enable_event_compression
 
+ @property
+ def omit_anonymous_contexts(self) -> bool:
+ """
+ Determines whether or not anonymous contexts will be omitted from index and identify events.
+ """
+ return self.__omit_anonymous_contexts
+
  @property
  def data_source_update_sink(self) -> Optional[DataSourceUpdateSink]:
  """

diff --git a/ldclient/context.py b/ldclient/context.py
@@ -381,6 +381,28 @@ def anonymous(self) -> bool:
  """
  return self.__anonymous
 
+ def without_anonymous_contexts(self) -> Context:
+ """
+ For a multi-kind context:
+
+ A multi-kind context is made up of two or more single-kind contexts.
+ This method will first discard any single-kind contexts which are
+ anonymous. It will then create a new multi-kind context from the
+ remaining single-kind contexts. This may result in an invalid context
+ (e.g. all single-kind contexts are anonymous).
+
+ For a single-kind context:
+
+ If the context is not anonymous, this method will return the current
+ context as is and unmodified.
+
+ If the context is anonymous, this method will return an invalid context.
+ """
+ contexts = self.__multi if self.__multi is not None else [self]
+ contexts = [c for c in contexts if not c.anonymous]
+
+ return Context.create_multi(*contexts)
+
  def get(self, attribute: str) -> Any:
  """
  Looks up the value of any attribute of the context by name.

diff --git a/ldclient/impl/events/event_processor.py b/ldclient/impl/events/event_processor.py
@@ -7,14 +7,13 @@
 from email.utils import parsedate
 import json
 from threading import Event, Lock, Thread
-from typing import Any, List, Optional, Dict
+from typing import Any, List, Optional, Dict, Callable
 import time
 import uuid
 import queue
 import urllib3
 import gzip
 from ldclient.config import Config
-from datetime import timedelta
 from random import Random
 
 from ldclient.context import Context
@@ -341,6 +340,7 @@ def __init__(self, inbox, config, http_client, diagnostic_accumulator=None):
  self._deduplicated_contexts = 0
  self._diagnostic_accumulator = None if config.diagnostic_opt_out else diagnostic_accumulator
  self._sampler = Sampler(Random())
+ self._omit_anonymous_contexts = config.omit_anonymous_contexts
 
  self._flush_workers = FixedThreadPool(__MAX_FLUSH_THREADS__, "ldclient.flush")
  self._diagnostic_flush_workers = None if self._diagnostic_accumulator is None else FixedThreadPool(1, "ldclient.diag_flush")
@@ -387,7 +387,6 @@ def _process_event(self, event: EventInput):
  # Decide whether to add the event to the payload. Feature events may be added twice, once for
  # the event (if tracked) and once for debugging.
  context = None # type: Optional[Context]
- can_add_index = True
  full_event = None # type: Any
  debug_event = None # type: Optional[DebugEvent]
  sampling_ratio = 1 if event.sampling_ratio is None else event.sampling_ratio
@@ -401,31 +400,50 @@ def _process_event(self, event: EventInput):
  if self._should_debug_event(event):
  debug_event = DebugEvent(event)
  elif isinstance(event, EventInputIdentify):
- context = event.context
+ if self._omit_anonymous_contexts:
+ context = event.context.without_anonymous_contexts()
+ if not context.valid:
+ return
+
+ event = EventInputIdentify(event.timestamp, context, event.sampling_ratio)
+
  full_event = event
- can_add_index = False # an index event would be redundant if there's an identify event
  elif isinstance(event, EventInputCustom):
  context = event.context
  full_event = event
  elif isinstance(event, MigrationOpEvent):
  full_event = event
 
- # For each context we haven't seen before, we add an index event - unless this is already
- # an identify event.
- if context is not None:
- already_seen = self._context_keys.put(context.fully_qualified_key, True)
- if can_add_index:
- if already_seen:
- self._deduplicated_contexts += 1
- else:
- self._outbox.add_event(IndexEvent(event.timestamp, context))
+ self._get_indexable_context(event, lambda c: self._outbox.add_event(IndexEvent(event.timestamp, c)))
 
  if full_event and self._sampler.sample(sampling_ratio):
  self._outbox.add_event(full_event)
 
  if debug_event and self._sampler.sample(sampling_ratio):
  self._outbox.add_event(debug_event)
 
+ def _get_indexable_context(self, event: EventInput, block: Callable[[Context], None]):
+ if event.context is None:
+ return
+
+ context = event.context
+ if self._omit_anonymous_contexts:
+ context = context.without_anonymous_contexts()
+
+ if not context.valid:
+ return
+
+ already_seen = self._context_keys.put(context.fully_qualified_key, True)
+ if already_seen:
+ self._deduplicated_contexts += 1
+ return
+ elif isinstance(event, EventInputIdentify) or isinstance(event, MigrationOpEvent):
+ return
+
+ block(context)
+
+
+
  def _should_debug_event(self, event: EventInputEvaluation):
  if event.flag is None:
  return False

diff --git a/ldclient/testing/impl/events/test_event_processor.py b/ldclient/testing/impl/events/test_event_processor.py
@@ -236,6 +236,37 @@ def test_context_is_filtered_in_identify_event():
  assert len(output) == 1
  check_identify_event(output[0], e, formatter.format_context(context))
 
+
+def test_omit_anonymous_contexts_suppresses_identify_event():
+ with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
+ anon_context = Context.builder('userkey').name('Red').anonymous(True).build()
+ e = EventInputIdentify(timestamp, anon_context)
+ ep.send_event(e)
+
+ try:
+ flush_and_get_events(ep)
+ pytest.fail("Expected no events")
+ except AssertionError:
+ pass
+
+
+def test_omit_anonymous_contexts_strips_anonymous_contexts_correctly():
+ with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
+ a = Context.builder('a').kind('a').anonymous(True).build()
+ b = Context.builder('b').kind('b').anonymous(True).build()
+ c = Context.builder('c').kind('c').anonymous(False).build()
+ mc = Context.multi_builder().add(a).add(b).add(c).build()
+
+ e = EventInputIdentify(timestamp, mc)
+ ep.send_event(e)
+
+ output = flush_and_get_events(ep)
+ assert len(output) == 1
+
+ formatter = EventContextFormatter(True, [])
+ check_identify_event(output[0], e, formatter.format_context(c))
+
+
 def test_individual_feature_event_is_queued_with_index_event():
  with DefaultTestProcessor() as ep:
  e = EventInputEvaluation(timestamp, context, flag.key, flag, 1, 'value', None, 'default', None, True)
@@ -248,6 +279,34 @@ def test_individual_feature_event_is_queued_with_index_event():
  check_summary_event(output[2])
 
 
+def test_omit_anonymous_context_emits_feature_event_without_index():
+ with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
+ anon = Context.builder('a').anonymous(True).build()
+ e = EventInputEvaluation(timestamp, anon, flag.key, flag, 1, 'value', None, 'default', None, True)
+ ep.send_event(e)
+
+ output = flush_and_get_events(ep)
+ assert len(output) == 2
+ check_feature_event(output[0], e)
+ check_summary_event(output[1])
+
+
+def test_omit_anonymous_context_strips_anonymous_from_index_event():
+ with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
+ a = Context.builder('a').kind('a').anonymous(True).build()
+ b = Context.builder('b').kind('b').anonymous(True).build()
+ c = Context.builder('c').kind('c').anonymous(False).build()
+ mc = Context.multi_builder().add(a).add(b).add(c).build()
+ e = EventInputEvaluation(timestamp, mc, flag.key, flag, 1, 'value', None, 'default', None, True)
+ ep.send_event(e)
+
+ output = flush_and_get_events(ep)
+ assert len(output) == 3
+ check_index_event(output[0], e, c.to_dict()) # Should only contain non-anon context
+ check_feature_event(output[1], e)
+ check_summary_event(output[2])
+
+
 def test_individual_feature_event_is_ignored_for_0_sampling_ratio():
  with DefaultTestProcessor() as ep:
  e = EventInputEvaluation(timestamp, context, flag_with_0_sampling_ratio.key, flag_with_0_sampling_ratio, 1, 'value', None, 'default', None, True)

diff --git a/ldclient/testing/test_context.py b/ldclient/testing/test_context.py
@@ -287,3 +287,39 @@ def test_multi_builder_with_invalid_context(self):
  c1 = Context.create('a', 'kind1')
  c2 = Context.create('')
  assert_context_invalid(Context.multi_builder().add(c1).add(c2).build())
+
+
+class TestAnonymousRedaction:
+ def test_redacting_anonoymous_leads_to_invalid_context(self):
+ original = Context.builder('a').anonymous(True).build()
+ c = original.without_anonymous_contexts()
+
+ assert_context_invalid(c)
+
+ def test_redacting_non_anonymous_does_not_change_context(self):
+ original = Context.builder('a').anonymous(False).build()
+ c = original.without_anonymous_contexts()
+
+ assert_context_valid(c)
+ assert c == original
+
+ def test_can_find_non_anonymous_contexts_from_multi(self):
+ anon = Context.builder('a').anonymous(True).build()
+ nonanon = Context.create('b', 'kind2')
+ mc = Context.create_multi(anon, nonanon)
+
+ filtered = mc.without_anonymous_contexts()
+
+ assert_context_valid(filtered)
+ assert filtered.individual_context_count == 1
+ assert filtered.key == 'b'
+ assert filtered.kind == 'kind2'
+
+ def test_can_filter_all_from_multi(self):
+ a = Context.builder('a').anonymous(True).build()
+ b = Context.builder('b').anonymous(True).build()
+ mc = Context.create_multi(a, b)
+
+ filtered = mc.without_anonymous_contexts()
+
+ assert_context_invalid(filtered)