Skip to content

Commit

Permalink
feat: Add option to omit anonymous users from index and identify events
Browse files Browse the repository at this point in the history
  • Loading branch information
keelerm84 committed Aug 20, 2024
1 parent 5bfdde6 commit 346afaa
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 16 deletions.
1 change: 1 addition & 0 deletions contract-tests/client_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(self, tag, config):
opts["all_attributes_private"] = events.get("allAttributesPrivate", False)
opts["private_attributes"] = events.get("globalPrivateAttributes", {})
_set_optional_time_prop(events, "flushIntervalMs", opts, "flush_interval")
opts["omit_anonymous_contexts"] = events.get("omitAnonymousContexts", False)
else:
opts["send_events"] = False

Expand Down
3 changes: 2 additions & 1 deletion contract-tests/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def status():
'polling-gzip',
'inline-context',
'anonymous-redaction',
'evaluation-hooks'
'evaluation-hooks',
'omit-anonymous-contexts'
]
}
return (json.dumps(body), 200, {'Content-type': 'application/json'})
Expand Down
12 changes: 11 additions & 1 deletion ldclient/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ def __init__(self,
big_segments: Optional[BigSegmentsConfig]=None,
application: Optional[dict]=None,
hooks: Optional[List[Hook]]=None,
enable_event_compression: bool=False):
enable_event_compression: bool=False,
omit_anonymous_contexts: bool=False):
"""
:param sdk_key: The SDK key for your LaunchDarkly account. This is always required.
:param base_uri: The base URL for the LaunchDarkly server. Most users should use the default
Expand Down Expand Up @@ -243,6 +244,7 @@ def __init__(self,
:param application: Optional properties for setting application metadata. See :py:attr:`~application`
:param hooks: Hooks provide entrypoints which allow for observation of SDK functions.
:param enable_event_compression: Whether or not to enable GZIP compression for outgoing events.
:param omit_anonymous_contexts: Sets whether anonymous contexts should be omitted from index and identify events.
"""
self.__sdk_key = sdk_key

Expand Down Expand Up @@ -277,6 +279,7 @@ def __init__(self,
self.__application = validate_application_info(application or {}, log)
self.__hooks = [hook for hook in hooks if isinstance(hook, Hook)] if hooks else []
self.__enable_event_compression = enable_event_compression
self.__omit_anonymous_contexts = omit_anonymous_contexts
self._data_source_update_sink: Optional[DataSourceUpdateSink] = None

def copy_with_new_sdk_key(self, new_sdk_key: str) -> 'Config':
Expand Down Expand Up @@ -466,6 +469,13 @@ def hooks(self) -> List[Hook]:
def enable_event_compression(self) -> bool:
return self.__enable_event_compression

@property
def omit_anonymous_contexts(self) -> bool:
"""
Determines whether or not anonymous contexts will be omitted from index and identify events.
"""
return self.__omit_anonymous_contexts

@property
def data_source_update_sink(self) -> Optional[DataSourceUpdateSink]:
"""
Expand Down
22 changes: 22 additions & 0 deletions ldclient/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,28 @@ def anonymous(self) -> bool:
"""
return self.__anonymous

def without_anonymous_contexts(self) -> Context:
"""
For a multi-kind context:
A multi-kind context is made up of two or more single-kind contexts.
This method will first discard any single-kind contexts which are
anonymous. It will then create a new multi-kind context from the
remaining single-kind contexts. This may result in an invalid context
(e.g. all single-kind contexts are anonymous).
For a single-kind context:
If the context is not anonymous, this method will return the current
context as is and unmodified.
If the context is anonymous, this method will return an invalid context.
"""
contexts = self.__multi if self.__multi is not None else [self]
contexts = [c for c in contexts if not c.anonymous]

return Context.create_multi(*contexts)

def get(self, attribute: str) -> Any:
"""
Looks up the value of any attribute of the context by name.
Expand Down
46 changes: 32 additions & 14 deletions ldclient/impl/events/event_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
from email.utils import parsedate
import json
from threading import Event, Lock, Thread
from typing import Any, List, Optional, Dict
from typing import Any, List, Optional, Dict, Callable
import time
import uuid
import queue
import urllib3
import gzip
from ldclient.config import Config
from datetime import timedelta
from random import Random

from ldclient.context import Context
Expand Down Expand Up @@ -341,6 +340,7 @@ def __init__(self, inbox, config, http_client, diagnostic_accumulator=None):
self._deduplicated_contexts = 0
self._diagnostic_accumulator = None if config.diagnostic_opt_out else diagnostic_accumulator
self._sampler = Sampler(Random())
self._omit_anonymous_contexts = config.omit_anonymous_contexts

self._flush_workers = FixedThreadPool(__MAX_FLUSH_THREADS__, "ldclient.flush")
self._diagnostic_flush_workers = None if self._diagnostic_accumulator is None else FixedThreadPool(1, "ldclient.diag_flush")
Expand Down Expand Up @@ -387,7 +387,6 @@ def _process_event(self, event: EventInput):
# Decide whether to add the event to the payload. Feature events may be added twice, once for
# the event (if tracked) and once for debugging.
context = None # type: Optional[Context]
can_add_index = True
full_event = None # type: Any
debug_event = None # type: Optional[DebugEvent]
sampling_ratio = 1 if event.sampling_ratio is None else event.sampling_ratio
Expand All @@ -401,31 +400,50 @@ def _process_event(self, event: EventInput):
if self._should_debug_event(event):
debug_event = DebugEvent(event)
elif isinstance(event, EventInputIdentify):
context = event.context
if self._omit_anonymous_contexts:
context = event.context.without_anonymous_contexts()
if not context.valid:
return

event = EventInputIdentify(event.timestamp, context, event.sampling_ratio)

full_event = event
can_add_index = False # an index event would be redundant if there's an identify event
elif isinstance(event, EventInputCustom):
context = event.context
full_event = event
elif isinstance(event, MigrationOpEvent):
full_event = event

# For each context we haven't seen before, we add an index event - unless this is already
# an identify event.
if context is not None:
already_seen = self._context_keys.put(context.fully_qualified_key, True)
if can_add_index:
if already_seen:
self._deduplicated_contexts += 1
else:
self._outbox.add_event(IndexEvent(event.timestamp, context))
self._get_indexable_context(event, lambda c: self._outbox.add_event(IndexEvent(event.timestamp, c)))

if full_event and self._sampler.sample(sampling_ratio):
self._outbox.add_event(full_event)

if debug_event and self._sampler.sample(sampling_ratio):
self._outbox.add_event(debug_event)

def _get_indexable_context(self, event: EventInput, block: Callable[[Context], None]):
if event.context is None:
return

context = event.context
if self._omit_anonymous_contexts:
context = context.without_anonymous_contexts()

if not context.valid:
return

already_seen = self._context_keys.put(context.fully_qualified_key, True)
if already_seen:
self._deduplicated_contexts += 1
return
elif isinstance(event, EventInputIdentify) or isinstance(event, MigrationOpEvent):
return

block(context)



def _should_debug_event(self, event: EventInputEvaluation):
if event.flag is None:
return False
Expand Down
59 changes: 59 additions & 0 deletions ldclient/testing/impl/events/test_event_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,37 @@ def test_context_is_filtered_in_identify_event():
assert len(output) == 1
check_identify_event(output[0], e, formatter.format_context(context))


def test_omit_anonymous_contexts_suppresses_identify_event():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
anon_context = Context.builder('userkey').name('Red').anonymous(True).build()
e = EventInputIdentify(timestamp, anon_context)
ep.send_event(e)

try:
flush_and_get_events(ep)
pytest.fail("Expected no events")
except AssertionError:
pass


def test_omit_anonymous_contexts_strips_anonymous_contexts_correctly():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
a = Context.builder('a').kind('a').anonymous(True).build()
b = Context.builder('b').kind('b').anonymous(True).build()
c = Context.builder('c').kind('c').anonymous(False).build()
mc = Context.multi_builder().add(a).add(b).add(c).build()

e = EventInputIdentify(timestamp, mc)
ep.send_event(e)

output = flush_and_get_events(ep)
assert len(output) == 1

formatter = EventContextFormatter(True, [])
check_identify_event(output[0], e, formatter.format_context(c))


def test_individual_feature_event_is_queued_with_index_event():
with DefaultTestProcessor() as ep:
e = EventInputEvaluation(timestamp, context, flag.key, flag, 1, 'value', None, 'default', None, True)
Expand All @@ -248,6 +279,34 @@ def test_individual_feature_event_is_queued_with_index_event():
check_summary_event(output[2])


def test_omit_anonymous_context_emits_feature_event_without_index():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
anon = Context.builder('a').anonymous(True).build()
e = EventInputEvaluation(timestamp, anon, flag.key, flag, 1, 'value', None, 'default', None, True)
ep.send_event(e)

output = flush_and_get_events(ep)
assert len(output) == 2
check_feature_event(output[0], e)
check_summary_event(output[1])


def test_omit_anonymous_context_strips_anonymous_from_index_event():
with DefaultTestProcessor(omit_anonymous_contexts=True) as ep:
a = Context.builder('a').kind('a').anonymous(True).build()
b = Context.builder('b').kind('b').anonymous(True).build()
c = Context.builder('c').kind('c').anonymous(False).build()
mc = Context.multi_builder().add(a).add(b).add(c).build()
e = EventInputEvaluation(timestamp, mc, flag.key, flag, 1, 'value', None, 'default', None, True)
ep.send_event(e)

output = flush_and_get_events(ep)
assert len(output) == 3
check_index_event(output[0], e, c.to_dict()) # Should only contain non-anon context
check_feature_event(output[1], e)
check_summary_event(output[2])


def test_individual_feature_event_is_ignored_for_0_sampling_ratio():
with DefaultTestProcessor() as ep:
e = EventInputEvaluation(timestamp, context, flag_with_0_sampling_ratio.key, flag_with_0_sampling_ratio, 1, 'value', None, 'default', None, True)
Expand Down
36 changes: 36 additions & 0 deletions ldclient/testing/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,39 @@ def test_multi_builder_with_invalid_context(self):
c1 = Context.create('a', 'kind1')
c2 = Context.create('')
assert_context_invalid(Context.multi_builder().add(c1).add(c2).build())


class TestAnonymousRedaction:
def test_redacting_anonoymous_leads_to_invalid_context(self):
original = Context.builder('a').anonymous(True).build()
c = original.without_anonymous_contexts()

assert_context_invalid(c)

def test_redacting_non_anonymous_does_not_change_context(self):
original = Context.builder('a').anonymous(False).build()
c = original.without_anonymous_contexts()

assert_context_valid(c)
assert c == original

def test_can_find_non_anonymous_contexts_from_multi(self):
anon = Context.builder('a').anonymous(True).build()
nonanon = Context.create('b', 'kind2')
mc = Context.create_multi(anon, nonanon)

filtered = mc.without_anonymous_contexts()

assert_context_valid(filtered)
assert filtered.individual_context_count == 1
assert filtered.key == 'b'
assert filtered.kind == 'kind2'

def test_can_filter_all_from_multi(self):
a = Context.builder('a').anonymous(True).build()
b = Context.builder('b').anonymous(True).build()
mc = Context.create_multi(a, b)

filtered = mc.without_anonymous_contexts()

assert_context_invalid(filtered)

0 comments on commit 346afaa

Please sign in to comment.