From 2a91105ac9cf04ed407e355242c6c8bc046d792e Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 8 Feb 2023 13:20:36 -0500 Subject: [PATCH 1/2] monthly-use report --- api/metrics/views.py | 1 + osf/metrics/counted_usage.py | 9 +--- osf/metrics/reporters/__init__.py | 2 + osf/metrics/reporters/monthly_use.py | 26 +++++++++ osf/metrics/reports.py | 56 ++++++++++++------- osf/metrics/utils.py | 7 +++ osf_tests/metrics/test_daily_report.py | 11 ++-- osf_tests/metrics/test_monthly_report.py | 69 ++++++++++++++++++++++++ 8 files changed, 151 insertions(+), 30 deletions(-) create mode 100644 osf/metrics/reporters/monthly_use.py create mode 100644 osf_tests/metrics/test_monthly_report.py diff --git a/api/metrics/views.py b/api/metrics/views.py index 510c8a9cad3..784a3932942 100644 --- a/api/metrics/views.py +++ b/api/metrics/views.py @@ -409,6 +409,7 @@ def _get_session_id(self, request, client_session_id=None): session_id_parts = [ client_session_id, current_date_str, + now.hour, ] elif user_is_authenticated: session_id_parts = [ diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py index 9f010ca1218..c0c4574a421 100644 --- a/osf/metrics/counted_usage.py +++ b/osf/metrics/counted_usage.py @@ -3,23 +3,18 @@ import logging from urllib.parse import urlsplit -from elasticsearch_dsl import InnerDoc, analyzer, tokenizer +from elasticsearch_dsl import InnerDoc from elasticsearch_metrics import metrics from elasticsearch_metrics.signals import pre_save from django.dispatch import receiver import pytz -from osf.metrics.utils import stable_key +from osf.metrics.utils import stable_key, route_prefix_analyzer from osf.models import Guid logger = logging.getLogger(__name__) -route_prefix_analyzer = analyzer( - 'route_prefix_analyzer', - tokenizer=tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.'), -) - class PageviewInfo(InnerDoc): """PageviewInfo diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py index b7a0f5e5363..8cf8adf551f 100644 --- a/osf/metrics/reporters/__init__.py +++ b/osf/metrics/reporters/__init__.py @@ -8,6 +8,7 @@ from .preprint_count import PreprintCountReporter from .user_count import UserCountReporter from .spam_count import SpamCountReporter +from .monthly_use import MonthlyUseReporter DAILY_REPORTERS = ( @@ -24,4 +25,5 @@ MONTHLY_REPORTERS = ( SpamCountReporter, + MonthlyUseReporter, ) diff --git a/osf/metrics/reporters/monthly_use.py b/osf/metrics/reporters/monthly_use.py new file mode 100644 index 00000000000..4fcaf0e9155 --- /dev/null +++ b/osf/metrics/reporters/monthly_use.py @@ -0,0 +1,26 @@ +from osf.metrics.counted_usage import CountedUsage +from osf.metrics.reports import MonthlyUseReport +from ._base import MonthlyReporter + + +class MonthlyUseReporter(MonthlyReporter): + def report(self, report_yearmonth): + start = report_yearmonth.target_month() + end = report_yearmonth.next_month() + search = ( + CountedUsage.search() + .filter('range', timestamp={'gte': start, 'lte': end}) + [:0] # just the aggregations, no hits + ) + search.aggs.metric('total_session_hours', 'cardinality', field='session_id') + result = search.execute() + total_session_hours = result.aggs.total_session_hours.value + month_timedelta = (end - start) + month_hours = (24 * month_timedelta.days) + int(month_timedelta.seconds / (60 * 60)) + average_sessions_per_hour = total_session_hours / month_hours + report = MonthlyUseReport( + report_yearmonth=report_yearmonth, + total_session_hours=total_session_hours, + average_sessions_per_hour=average_sessions_per_hour, + ) + return [report] diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 8410cebb019..1f9f2d3e090 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -5,7 +5,7 @@ from elasticsearch_metrics import metrics from elasticsearch_metrics.signals import pre_save as metrics_pre_save -from osf.metrics.utils import stable_key, YearMonth +from osf.metrics.utils import stable_key, YearMonth, route_prefix_analyzer class ReportInvalid(Exception): @@ -20,7 +20,7 @@ class DailyReport(metrics.Metric): There's something we'd like to know about every so often, so let's regularly run a report and stash the results here. """ - DAILY_UNIQUE_FIELD = None # set in subclasses that expect multiple reports per day + UNIQUE_TOGETHER = ('report_date',) # override in subclasses that expect multiple reports per day report_date = metrics.Date(format='strict_date', required=True) @@ -58,6 +58,7 @@ def serialize(self, data): class MonthlyReport(metrics.Metric): """MonthlyReport (abstract base for report-based metrics that run monthly) """ + UNIQUE_TOGETHER = ('report_yearmonth',) # override in subclasses that expect multiple reports per month report_yearmonth = YearmonthField() @@ -74,18 +75,19 @@ def set_report_id(sender, instance, **kwargs): # "ON CONFLICT UPDATE" behavior -- if the document # already exists, it will be updated rather than duplicated. # Cannot detect/avoid conflicts this way, but that's ok. - - if issubclass(sender, DailyReport): - duf_name = instance.DAILY_UNIQUE_FIELD - if duf_name is None: - instance.meta.id = stable_key(instance.report_date) - else: - duf_value = getattr(instance, duf_name) - if not duf_value or not isinstance(duf_value, str): - raise ReportInvalid(f'{sender.__name__}.{duf_name} MUST have a non-empty string value (got {duf_value})') - instance.meta.id = stable_key(instance.report_date, duf_value) - elif issubclass(sender, MonthlyReport): - instance.meta.id = stable_key(instance.report_yearmonth) + if issubclass(sender, (DailyReport, MonthlyReport)): + unique_together_fields = getattr(sender, 'UNIQUE_TOGETHER', None) + if not unique_together_fields: + raise ValueError(f'{sender.__name__}.UNIQUE_TOGETHER must be non-empty!') + unique_together_values = [] + for field_name in unique_together_fields: + field_value = getattr(instance, field_name) + field_value_str = str(field_value) + if (field_value is None) or (not field_value_str): + raise ReportInvalid(f'{sender.__name__}.{field_name} must have a non-empty stringable value (got {field_value})') + unique_together_values.append(field_value_str) + assert len(unique_together_values) > 0 + instance.meta.id = stable_key(*unique_together_values) #### BEGIN reusable inner objects ##### @@ -157,7 +159,7 @@ class DownloadCountReport(DailyReport): class InstitutionSummaryReport(DailyReport): - DAILY_UNIQUE_FIELD = 'institution_id' + UNIQUE_TOGETHER = ('report_date', 'institution_id',) institution_id = metrics.Keyword() institution_name = metrics.Keyword() @@ -169,7 +171,7 @@ class InstitutionSummaryReport(DailyReport): class NewUserDomainReport(DailyReport): - DAILY_UNIQUE_FIELD = 'domain_name' + UNIQUE_TOGETHER = ('report_date', 'domain_name',) domain_name = metrics.Keyword() new_user_count = metrics.Integer() @@ -187,7 +189,7 @@ class OsfstorageFileCountReport(DailyReport): class PreprintSummaryReport(DailyReport): - DAILY_UNIQUE_FIELD = 'provider_key' + UNIQUE_TOGETHER = ('report_date', 'provider_key',) provider_key = metrics.Keyword() preprint_count = metrics.Integer() @@ -212,5 +214,23 @@ class SpamSummaryReport(MonthlyReport): preprint_confirmed_spam = metrics.Integer() preprint_confirmed_ham = metrics.Integer() preprint_flagged = metrics.Integer() - user_marked_as_spam = metrics.Integer() + users_marked_as_spam = metrics.Integer() user_marked_as_ham = metrics.Integer() + + +class MonthlyUseReport(MonthlyReport): + total_session_hours = metrics.Integer() + average_sessions_per_hour = metrics.Float() + + +class MonthlyRouteUse(MonthlyReport): + UNIQUE_TOGETHER = ('report_yearmonth', 'route_name') + route_name = metrics.Keyword( + fields={ + 'by_prefix': metrics.Text(analyzer=route_prefix_analyzer), + }, + ) + use_count = metrics.Integer() + sessionhour_count = metrics.Integer() + use_count__with_subroutes = metrics.Integer() + sessionhour_count__with_subroutes = metrics.Integer() diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py index 5ea397fef39..825ab218b67 100644 --- a/osf/metrics/utils.py +++ b/osf/metrics/utils.py @@ -4,6 +4,7 @@ from hashlib import sha256 import pytz +from elasticsearch_dsl import analyzer, tokenizer def stable_key(*key_parts): @@ -52,3 +53,9 @@ def next_month(self): if self.month == 12: return datetime.datetime(self.year + 1, 1, 1, tzinfo=pytz.utc) return datetime.datetime(self.year, self.month + 1, 1, tzinfo=pytz.utc) + + +# for fields that represent dot-delimited paths to allow querying/aggregating by prefix +# (e.g. 'root.to.leaf' yields terms ['root', 'root.to', 'root.to.leaf']) +route_prefix_tokenizer = tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.') +route_prefix_analyzer = analyzer('route_prefix_analyzer', tokenizer=route_prefix_tokenizer) diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py index ed803bea465..4b993954724 100644 --- a/osf_tests/metrics/test_daily_report.py +++ b/osf_tests/metrics/test_daily_report.py @@ -37,11 +37,12 @@ class Meta: assert report.meta.id == expected_key mock_save.reset_mock() - def test_with_duf(self, mock_save): + def test_with_unique_together(self, mock_save): # multiple reports of this type per day, unique by given field class UniqueByDateAndField(DailyReport): - DAILY_UNIQUE_FIELD = 'duf' - duf = metrics.Keyword() + UNIQUE_TOGETHER = ('report_date', 'my_uniq_field',) + + my_uniq_field = metrics.Keyword() class Meta: app_label = 'osf' @@ -49,7 +50,7 @@ class Meta: today = date(2022, 5, 18) expected_blah = 'dca57e6cde89b19274ea24bc713971dab137a896b8e06d43a11a3f437cd1d151' - blah_report = UniqueByDateAndField(report_date=today, duf='blah') + blah_report = UniqueByDateAndField(report_date=today, my_uniq_field='blah') blah_report.save() assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is blah_report @@ -57,7 +58,7 @@ class Meta: mock_save.reset_mock() expected_fleh = 'e7dd5ff6b087807efcfa958077dc713878f21c65af79b3ccdb5dc2409bf5ad99' - fleh_report = UniqueByDateAndField(report_date=today, duf='fleh') + fleh_report = UniqueByDateAndField(report_date=today, my_uniq_field='fleh') fleh_report.save() assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is fleh_report diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py new file mode 100644 index 00000000000..d3c8bd10d49 --- /dev/null +++ b/osf_tests/metrics/test_monthly_report.py @@ -0,0 +1,69 @@ +from unittest import mock + +import pytest +from elasticsearch_metrics import metrics + +from osf.metrics.reports import MonthlyReport, ReportInvalid +from osf.metrics.utils import YearMonth + + +class TestMonthlyReportKey: + @pytest.fixture + def mock_save(self): + with mock.patch('elasticsearch_dsl.Document.save', autospec=True) as mock_save: + yield mock_save + + def test_default(self, mock_save): + # only one of this type of report per day + class UniqueByDate(MonthlyReport): + blah = metrics.Keyword() + + class Meta: + app_label = 'osf' + + this_month = YearMonth(2022, 5) + + reports = [ + UniqueByDate(report_yearmonth=this_month), + UniqueByDate(report_yearmonth=this_month, blah='blah'), + UniqueByDate(report_yearmonth=this_month, blah='fleh'), + ] + expected_key = '8463aac67c1e5a038049196781d8f100f069225352d1829651892cf3fbfc50e2' + + for report in reports: + report.save() + assert mock_save.call_count == 1 + assert mock_save.call_args[0][0] is report + assert report.meta.id == expected_key + mock_save.reset_mock() + + def test_with_unique_together(self, mock_save): + # multiple reports of this type per day, unique by given field + class UniqueByDateAndField(MonthlyReport): + UNIQUE_TOGETHER = ('report_yearmonth', 'my_uniq_field',) + my_uniq_field = metrics.Keyword() + + class Meta: + app_label = 'osf' + + this_month = YearMonth(2022, 5) + + expected_blah = '62ebf38317cd8402e27a50ce99f836d1734b3f545adf7d144d0e1cf37a0d9d08' + blah_report = UniqueByDateAndField(report_yearmonth=this_month, my_uniq_field='blah') + blah_report.save() + assert mock_save.call_count == 1 + assert mock_save.call_args[0][0] is blah_report + assert blah_report.meta.id == expected_blah + mock_save.reset_mock() + + expected_fleh = '385700db282f6d6089a0d21836db5ee8423f548615e515b6e034bcc90a14500f' + fleh_report = UniqueByDateAndField(report_yearmonth=this_month, my_uniq_field='fleh') + fleh_report.save() + assert mock_save.call_count == 1 + assert mock_save.call_args[0][0] is fleh_report + assert fleh_report.meta.id == expected_fleh + mock_save.reset_mock() + + bad_report = UniqueByDateAndField(report_yearmonth=this_month) + with pytest.raises(ReportInvalid): + bad_report.save() From 00f63a4371e26da73c2142bf755ccb289da4aa94 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 8 Feb 2023 16:38:29 -0500 Subject: [PATCH 2/2] sessionhour and route reports --- api/metrics/views.py | 4 +- osf/metrics/counted_usage.py | 2 +- osf/metrics/reporters/__init__.py | 6 ++- osf/metrics/reporters/monthly_route_use.py | 38 +++++++++++++++++++ osf/metrics/reporters/monthly_sessionhours.py | 31 +++++++++++++++ osf/metrics/reporters/monthly_use.py | 26 ------------- osf/metrics/reporters/spam_count.py | 4 +- osf/metrics/reports.py | 13 +++---- osf/metrics/utils.py | 25 +++++++----- 9 files changed, 100 insertions(+), 49 deletions(-) create mode 100644 osf/metrics/reporters/monthly_route_use.py create mode 100644 osf/metrics/reporters/monthly_sessionhours.py delete mode 100644 osf/metrics/reporters/monthly_use.py diff --git a/api/metrics/views.py b/api/metrics/views.py index 784a3932942..7c06676b62a 100644 --- a/api/metrics/views.py +++ b/api/metrics/views.py @@ -273,6 +273,8 @@ def get(self, request, *args, **kwargs): 'user_summary': reports.UserSummaryReport, 'spam_summary': reports.SpamSummaryReport, 'new_user_domains': reports.NewUserDomainReport, + 'monthly_sessionhours': reports.MonthlySessionhoursReport, + 'monthly_route_use': reports.MonthlyRouteUseReport, } @@ -344,7 +346,7 @@ def get(self, request, *args, report_name): range_field_name = 'report_yearmonth' range_parser = parse_yearmonth_range else: - raise ValueError(f'report class must subclass DailyReport or MonthlyReport: {report_class}') + raise ValueError(f'VIEWABLE_REPORTS values should subclass DailyReport or MonthlyReport ("{report_name}": {report_class})') range_filter = range_parser(request.GET) search_recent = ( report_class.search() diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py index c0c4574a421..29649c74089 100644 --- a/osf/metrics/counted_usage.py +++ b/osf/metrics/counted_usage.py @@ -26,7 +26,7 @@ class PageviewInfo(InnerDoc): page_title = metrics.Keyword() route_name = metrics.Keyword( fields={ - 'by_prefix': metrics.Text(analyzer=route_prefix_analyzer), + 'by_prefix': metrics.Text(analyzer=route_prefix_analyzer, fielddata=True), }, ) diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py index 8cf8adf551f..179a5d8e943 100644 --- a/osf/metrics/reporters/__init__.py +++ b/osf/metrics/reporters/__init__.py @@ -8,7 +8,8 @@ from .preprint_count import PreprintCountReporter from .user_count import UserCountReporter from .spam_count import SpamCountReporter -from .monthly_use import MonthlyUseReporter +from .monthly_sessionhours import MonthlySessionhoursReporter +from .monthly_route_use import MonthlyRouteUseReporter DAILY_REPORTERS = ( @@ -25,5 +26,6 @@ MONTHLY_REPORTERS = ( SpamCountReporter, - MonthlyUseReporter, + MonthlySessionhoursReporter, + MonthlyRouteUseReporter, ) diff --git a/osf/metrics/reporters/monthly_route_use.py b/osf/metrics/reporters/monthly_route_use.py new file mode 100644 index 00000000000..13de2a58cf4 --- /dev/null +++ b/osf/metrics/reporters/monthly_route_use.py @@ -0,0 +1,38 @@ +from osf.metrics.counted_usage import CountedUsage +from osf.metrics.reports import MonthlyRouteUseReport +from ._base import MonthlyReporter + + +class MonthlyRouteUseReporter(MonthlyReporter): + def report(self, report_yearmonth): + start = report_yearmonth.as_datetime() + end = report_yearmonth.next().as_datetime() + search = ( + CountedUsage.search() + .filter('range', timestamp={'gte': start, 'lte': end}) + [:0] # just the aggregations, no hits + ) + route_agg = search.aggs.bucket( + 'by_route', + 'terms', + field='pageview_info.route_name', + ) + route_agg.metric( + 'total_sessions', + 'cardinality', + field='session_id', + precision_threshold=40000, # maximum precision + ) + + result = search.execute() + + reports = [] + for route_bucket in result.aggs.by_route.buckets: + report = MonthlyRouteUseReport( + report_yearmonth=report_yearmonth, + route_name=route_bucket.key, + use_count=route_bucket.doc_count, + sessionhour_count=route_bucket.total_sessions.value, + ) + reports.append(report) + return reports diff --git a/osf/metrics/reporters/monthly_sessionhours.py b/osf/metrics/reporters/monthly_sessionhours.py new file mode 100644 index 00000000000..beff9d5effc --- /dev/null +++ b/osf/metrics/reporters/monthly_sessionhours.py @@ -0,0 +1,31 @@ +from osf.metrics.counted_usage import CountedUsage +from osf.metrics.reports import MonthlySessionhoursReport +from ._base import MonthlyReporter + + +class MonthlySessionhoursReporter(MonthlyReporter): + def report(self, report_yearmonth): + start = report_yearmonth.as_datetime() + end = report_yearmonth.next().as_datetime() + search = ( + CountedUsage.search() + .filter('range', timestamp={'gte': start, 'lte': end}) + [:0] # just the aggregations, no hits + ) + search.aggs.metric( + 'total_sessionhour_count', + 'cardinality', + field='session_id', + precision_threshold=40000, # maximum precision + ) + result = search.execute() + total_sessionhour_count = result.aggs.total_sessionhour_count.value + month_timedelta = (end - start) + month_hours = (24 * month_timedelta.days) + int(month_timedelta.seconds / (60 * 60)) + average_sessions_per_hour = total_sessionhour_count / month_hours + report = MonthlySessionhoursReport( + report_yearmonth=report_yearmonth, + total_sessionhour_count=total_sessionhour_count, + average_sessions_per_hour=average_sessions_per_hour, + ) + return [report] diff --git a/osf/metrics/reporters/monthly_use.py b/osf/metrics/reporters/monthly_use.py deleted file mode 100644 index 4fcaf0e9155..00000000000 --- a/osf/metrics/reporters/monthly_use.py +++ /dev/null @@ -1,26 +0,0 @@ -from osf.metrics.counted_usage import CountedUsage -from osf.metrics.reports import MonthlyUseReport -from ._base import MonthlyReporter - - -class MonthlyUseReporter(MonthlyReporter): - def report(self, report_yearmonth): - start = report_yearmonth.target_month() - end = report_yearmonth.next_month() - search = ( - CountedUsage.search() - .filter('range', timestamp={'gte': start, 'lte': end}) - [:0] # just the aggregations, no hits - ) - search.aggs.metric('total_session_hours', 'cardinality', field='session_id') - result = search.execute() - total_session_hours = result.aggs.total_session_hours.value - month_timedelta = (end - start) - month_hours = (24 * month_timedelta.days) + int(month_timedelta.seconds / (60 * 60)) - average_sessions_per_hour = total_session_hours / month_hours - report = MonthlyUseReport( - report_yearmonth=report_yearmonth, - total_session_hours=total_session_hours, - average_sessions_per_hour=average_sessions_per_hour, - ) - return [report] diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index 54feae8bee5..a77ef33c0bd 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -9,8 +9,8 @@ class SpamCountReporter(MonthlyReporter): def report(self, report_yearmonth): - target_month = report_yearmonth.target_month() - next_month = report_yearmonth.next_month() + target_month = report_yearmonth.as_datetime() + next_month = report_yearmonth.next().as_datetime() report = SpamSummaryReport( report_yearmonth=str(report_yearmonth), diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 1f9f2d3e090..7b4edac0c5d 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -218,19 +218,18 @@ class SpamSummaryReport(MonthlyReport): user_marked_as_ham = metrics.Integer() -class MonthlyUseReport(MonthlyReport): - total_session_hours = metrics.Integer() +class MonthlySessionhoursReport(MonthlyReport): + total_sessionhour_count = metrics.Integer() average_sessions_per_hour = metrics.Float() -class MonthlyRouteUse(MonthlyReport): - UNIQUE_TOGETHER = ('report_yearmonth', 'route_name') +class MonthlyRouteUseReport(MonthlyReport): + UNIQUE_TOGETHER = ('report_yearmonth', 'route_name',) route_name = metrics.Keyword( fields={ - 'by_prefix': metrics.Text(analyzer=route_prefix_analyzer), + # "route_name.by_prefix" subfield for aggregating subroutes + 'by_prefix': metrics.Text(analyzer=route_prefix_analyzer, fielddata=True), }, ) use_count = metrics.Integer() sessionhour_count = metrics.Integer() - use_count__with_subroutes = metrics.Integer() - sessionhour_count__with_subroutes = metrics.Integer() diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py index 825ab218b67..21851dc41af 100644 --- a/osf/metrics/utils.py +++ b/osf/metrics/utils.py @@ -3,7 +3,6 @@ import typing from hashlib import sha256 -import pytz from elasticsearch_dsl import analyzer, tokenizer @@ -22,8 +21,8 @@ def stable_key(*key_parts): class YearMonth(typing.NamedTuple): - year: int - month: int + year: int # assumed >= 1000, < 10000 + month: int # assumed >= 1, <= 12 YEARMONTH_RE = re.compile(r'(?P\d{4})-(?P\d{2})') @@ -46,16 +45,22 @@ def from_str(cls, input_str): def __str__(self): return f'{self.year}-{self.month:0>2}' - def target_month(self): - return datetime.datetime(self.year, self.month, 1, tzinfo=pytz.utc) + def as_datetime(self) -> datetime.datetime: + return datetime.datetime(self.year, self.month, 1, tzinfo=datetime.timezone.utc) - def next_month(self): + def next(self): if self.month == 12: - return datetime.datetime(self.year + 1, 1, 1, tzinfo=pytz.utc) - return datetime.datetime(self.year, self.month + 1, 1, tzinfo=pytz.utc) + return YearMonth(self.year + 1, 1) + return YearMonth(self.year, self.month + 1) + def prior(self): + if self.month == 1: + return YearMonth(self.year - 1, 12) + return YearMonth(self.year, self.month - 1) -# for fields that represent dot-delimited paths to allow querying/aggregating by prefix -# (e.g. 'root.to.leaf' yields terms ['root', 'root.to', 'root.to.leaf']) + +# for elasticsearch fields that hold dot-delimited paths, +# to allow querying/aggregating by prefix (e.g. 'root.to.leaf' +# yields tokens ['root', 'root.to', 'root.to.leaf']) route_prefix_tokenizer = tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.') route_prefix_analyzer = analyzer('route_prefix_analyzer', tokenizer=route_prefix_tokenizer)