Skip to content

Commit

Permalink
feat(relocation): Implement SaaS -> SaaS export (#73689)
Browse files Browse the repository at this point in the history
To perform a SaaS -> SaaS relocation, we must first successfully export
the serialized organization data from the source region to the target
region. This PR enables this capability, using a middleman proxy service
on the control silo to coordinate this work.
  • Loading branch information
azaslavsky committed Jul 15, 2024
1 parent 45292e9 commit 9b818ce
Show file tree
Hide file tree
Showing 21 changed files with 1,237 additions and 106 deletions.
4 changes: 4 additions & 0 deletions fixtures/backup/model_dependencies/detailed.json
Original file line number Diff line number Diff line change
Expand Up @@ -5099,6 +5099,10 @@
[
"file",
"relocation"
],
[
"kind",
"relocation"
]
]
},
Expand Down
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ hybridcloud: 0016_add_control_cacheversion
nodestore: 0002_nodestore_no_dictfield
remote_subscriptions: 0003_drop_remote_subscription
replays: 0004_index_together
sentry: 0739_backfill_group_info_to_group_attributes
sentry: 0740_one_relocation_file_kind_per_relocation
social_auth: 0002_default_auto_field
uptime: 0004_projectuptimesubscription_mode
4 changes: 2 additions & 2 deletions src/sentry/api/endpoints/relocations/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from sentry.search.utils import tokenize_query
from sentry.signals import relocation_link_promo_code
from sentry.slug.patterns import ORG_SLUG_PATTERN
from sentry.tasks.relocation import uploading_complete
from sentry.tasks.relocation import uploading_start
from sentry.users.services.user.model import RpcUser
from sentry.users.services.user.service import user_service
from sentry.utils.db import atomic_transaction
Expand Down Expand Up @@ -277,7 +277,7 @@ def post(self, request: Request) -> Response:
relocation_link_promo_code.send_robust(
relocation_uuid=relocation.uuid, promo_code=promo_code, sender=self.__class__
)
uploading_complete.delay(relocation.uuid)
uploading_start.delay(relocation.uuid)
try:
analytics.record(
"relocation.created",
Expand Down
4 changes: 2 additions & 2 deletions src/sentry/api/endpoints/relocations/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from sentry.models.files.file import File
from sentry.models.relocation import Relocation, RelocationFile
from sentry.signals import relocation_retry_link_promo_code
from sentry.tasks.relocation import uploading_complete
from sentry.tasks.relocation import uploading_start
from sentry.users.services.user.service import user_service
from sentry.utils.db import atomic_transaction

Expand Down Expand Up @@ -125,7 +125,7 @@ def post(self, request: Request, relocation_uuid: str) -> Response:
kind=RelocationFile.Kind.RAW_USER_DATA.value,
)

uploading_complete.delay(new_relocation.uuid)
uploading_start.delay(new_relocation.uuid)
try:
analytics.record(
"relocation.created",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Generated by Django 5.0.6 on 2024-07-08 22:33

from django.db import migrations

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = False

dependencies = [
("sentry", "0739_backfill_group_info_to_group_attributes"),
]

operations = [
migrations.AlterUniqueTogether(
name="relocationfile",
unique_together={("relocation", "file"), ("relocation", "kind")},
),
]
6 changes: 6 additions & 0 deletions src/sentry/models/outbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ class OutboxCategory(IntEnum):
ISSUE_COMMENT_UPDATE = 34
EXTERNAL_ACTOR_UPDATE = 35

RELOCATION_EXPORT_REQUEST = 36
RELOCATION_EXPORT_REPLY = 37

@classmethod
def as_choices(cls):
return [(i.value, i.value) for i in cls]
Expand Down Expand Up @@ -343,6 +346,9 @@ class OutboxScope(IntEnum):
},
)
SUBSCRIPTION_SCOPE = scope_categories(9, {OutboxCategory.SUBSCRIPTION_UPDATE})
RELOCATION_SCOPE = scope_categories(
10, {OutboxCategory.RELOCATION_EXPORT_REQUEST, OutboxCategory.RELOCATION_EXPORT_REPLY}
)

def __str__(self):
return self.name
Expand Down
14 changes: 7 additions & 7 deletions src/sentry/models/relocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,18 @@ class Kind(Enum):
#
# TODO(getsentry/team-ospo#216): Add a normalization step to the relocation flow
NORMALIZED_USER_DATA = 2
# (Deprecated) The global configuration we're going to validate against - pulled from the
# live Sentry instance, not supplied by the user.
# The global configuration we're going to validate against - pulled from the live Sentry
# instance, not supplied by the user.
#
# TODO(getsentry/team-ospo#216): Deprecated, since we no longer store these in main bucket.
# Remove in the future.
# Note: These files are only ever stored in the relocation-specific GCP bucket, never in the
# main filestore, so in practice no DB entry should have this value set.
BASELINE_CONFIG_VALIDATION_DATA = 3
# (Deprecated) The colliding users we're going to validate against - pulled from the live
# Sentry instance, not supplied by the user. However, to determine what is a "colliding
# user", we must inspect the user-provided data.
#
# TODO(getsentry/team-ospo#216): Deprecated, since we no longer store these in main bucket.
# Remove in the future.
# Note: These files are only ever stored in the relocation-specific GCP bucket, never in the
# main filestore, so in practice no DB entry should have this value set.
COLLIDING_USERS_VALIDATION_DATA = 4

# TODO(getsentry/team-ospo#190): Could we dedup this with a mixin in the future?
Expand Down Expand Up @@ -242,7 +242,7 @@ def to_filename(self, ext: str):
__repr__ = sane_repr("relocation", "file")

class Meta:
unique_together = (("relocation", "file"),)
unique_together = (("relocation", "file"), ("relocation", "kind"))
app_label = "sentry"
db_table = "sentry_relocationfile"

Expand Down
47 changes: 47 additions & 0 deletions src/sentry/receivers/outbox/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
are drained. Receivers are expected to make local state changes (tombstones)
and perform RPC calls to propagate changes to relevant region(s).
"""

from __future__ import annotations

import logging
Expand All @@ -17,13 +18,15 @@
from sentry.hybridcloud.rpc.caching import region_caching_service
from sentry.issues.services.issue import issue_service
from sentry.models.apiapplication import ApiApplication
from sentry.models.files.utils import get_relocation_storage
from sentry.models.integrations.integration import Integration
from sentry.models.integrations.sentry_app import SentryApp
from sentry.models.integrations.sentry_app_installation import SentryAppInstallation
from sentry.models.organizationmapping import OrganizationMapping
from sentry.models.outbox import OutboxCategory, process_control_outbox
from sentry.organizations.services.organization import RpcOrganizationSignal, organization_service
from sentry.receivers.outbox import maybe_process_tombstone
from sentry.relocation.services.relocation_export.service import region_relocation_export_service
from sentry.sentry_apps.services.app.service import get_by_application_id, get_installation

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -127,3 +130,47 @@ def process_issue_email_reply(shard_identifier: int, payload: Any, **kwds):
from_email=payload["from_email"],
text=payload["text"],
)


# See the comment on /src/sentry/tasks/relocation.py::uploading_start for a detailed description of
# how this outbox drain handler fits into the entire SAAS->SAAS relocation workflow.
@receiver(process_control_outbox, sender=OutboxCategory.RELOCATION_EXPORT_REQUEST)
def process_relocation_request_new_export(payload: Mapping[str, Any], **kwds):
encrypt_with_public_key = (
payload["encrypt_with_public_key"].encode("utf-8")
if isinstance(payload["encrypt_with_public_key"], str)
else payload["encrypt_with_public_key"]
)
region_relocation_export_service.request_new_export(
relocation_uuid=payload["relocation_uuid"],
requesting_region_name=payload["requesting_region_name"],
replying_region_name=payload["replying_region_name"],
org_slug=payload["org_slug"],
encrypt_with_public_key=encrypt_with_public_key,
)


# See the comment on /src/sentry/tasks/relocation.py::uploading_start for a detailed description of
# how this outbox drain handler fits into the entire SAAS->SAAS relocation workflow.
@receiver(process_control_outbox, sender=OutboxCategory.RELOCATION_EXPORT_REPLY)
def process_relocation_reply_with_export(payload: Mapping[str, Any], **kwds):
# We expect the `ProxyRelocationExportService::reply_with_export` implementation to have written
# the export data to the control silo's local relocation-specific GCS bucket. Here, we just read
# it into memory and attempt the RPC call back to the requesting region.
uuid = payload["relocation_uuid"]
slug = payload["org_slug"]
relocation_storage = get_relocation_storage()
path = f"runs/{uuid}/saas_to_saas_export/{slug}.tar"
try:
encrypted_contents = relocation_storage.open(path)
except Exception:
raise FileNotFoundError("Could not open SaaS -> SaaS export in proxy relocation bucket.")

with encrypted_contents:
region_relocation_export_service.reply_with_export(
relocation_uuid=payload["relocation_uuid"],
requesting_region_name=payload["requesting_region_name"],
replying_region_name=payload["replying_region_name"],
org_slug=payload["org_slug"],
encrypted_contents=encrypted_contents.read(),
)
28 changes: 28 additions & 0 deletions src/sentry/receivers/outbox/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
are drained. Receivers are expected to make local state changes (tombstones)
and perform RPC calls to propagate changes to Control Silo.
"""

from __future__ import annotations

from typing import Any
Expand All @@ -20,10 +21,12 @@
update_organization_mapping_from_instance,
)
from sentry.models.authproviderreplica import AuthProviderReplica
from sentry.models.files.utils import get_relocation_storage
from sentry.models.organization import Organization
from sentry.models.outbox import OutboxCategory, process_region_outbox
from sentry.models.project import Project
from sentry.receivers.outbox import maybe_process_tombstone
from sentry.relocation.services.relocation_export.service import control_relocation_export_service
from sentry.types.region import get_local_region


Expand Down Expand Up @@ -71,3 +74,28 @@ def process_disable_auth_provider(object_identifier: int, shard_identifier: int,
# Deprecated
auth_service.disable_provider(provider_id=object_identifier)
AuthProviderReplica.objects.filter(auth_provider_id=object_identifier).delete()


# See the comment on /src/sentry/tasks/relocation.py::uploading_start for a detailed description of
# how this outbox drain handler fits into the entire SAAS->SAAS relocation workflow.
@receiver(process_region_outbox, sender=OutboxCategory.RELOCATION_EXPORT_REPLY)
def process_relocation_reply_with_export(payload: Any, **kwds):
uuid = payload["relocation_uuid"]
slug = payload["org_slug"]
relocation_storage = get_relocation_storage()
path = f"runs/{uuid}/saas_to_saas_export/{slug}.tar"
try:
encrypted_contents = relocation_storage.open(path)
except Exception:
raise FileNotFoundError(
"Could not open SaaS -> SaaS export in export-side relocation bucket."
)

with encrypted_contents:
control_relocation_export_service.reply_with_export(
relocation_uuid=uuid,
requesting_region_name=payload["requesting_region_name"],
replying_region_name=payload["replying_region_name"],
org_slug=slug,
encrypted_contents=encrypted_contents.read(),
)
2 changes: 2 additions & 0 deletions src/sentry/relocation/services/relocation_export/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .model import * # noqa
from .service import * # noqa
Loading

0 comments on commit 9b818ce

Please sign in to comment.