Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hybridcloud) Use control file for user/sentryapp avatars #54118

Merged
merged 7 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,6 @@ module = [
"sentry.tasks.deliver_from_outbox",
"sentry.tasks.derive_code_mappings",
"sentry.tasks.digests",
"sentry.tasks.files",
"sentry.tasks.groupowner",
"sentry.tasks.integrations",
"sentry.tasks.integrations.create_comment",
Expand Down
42 changes: 26 additions & 16 deletions src/sentry/models/avatars/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ def save(self, *args, **kwargs):
return super().save(*args, **kwargs)

def get_file(self):
# Favor control_file_id if it exists and is set.
# Otherwise fallback to file_id. If still None, return.
# If we're getting a file, and the preferred write file
# type isn't present, move data over to new storage async.
file_id = getattr(self, self.file_write_fk(), None)
file_class = self.file_class()
file_id = getattr(self, self.file_fk())

if file_id is None:
file_id = self.file_id
file_class = File
Expand All @@ -70,9 +71,6 @@ def get_file(self):
self.update(**update)
return None

def get_file_id(self):
return self.file_id

def delete(self, *args, **kwargs):
file = self.get_file()
if file:
Expand Down Expand Up @@ -103,21 +101,32 @@ def get_cached_photo(self, size):
cache.set(cache_key, photo)
return photo

@classmethod
def file_class(cls):
from sentry.models import File

def file_class(self):
return File

@classmethod
def file_fk(cls) -> str:
def file_fk(self) -> str:
"""
Get the foreign key currently used by this record for blob storage.
Varies in ControlAvatarBase
"""
return "file_id"

def file_write_fk(self) -> str:
"""
Get the foreign key that should be used for writes.
Varies in ControlAvatarBase
"""
return "file_id"

@classmethod
def save_avatar(cls, relation, type, avatar=None, filename=None, color=None):
if avatar:
with atomic_transaction(using=router.db_for_write(cls.file_class())):
photo = cls.file_class().objects.create(name=filename, type=cls.FILE_TYPE)
# Create an instance of the current class so we can
# access where new files should be stored.
dummy = cls()
GabeVillalobos marked this conversation as resolved.
Show resolved Hide resolved
file_class = dummy.file_class()
with atomic_transaction(using=router.db_for_write(file_class)):
photo = file_class.objects.create(name=filename, type=cls.FILE_TYPE)
# XXX: Avatar may come in as a string instance in python2
# if it's not wrapped in BytesIO.
if isinstance(avatar, str):
Expand All @@ -141,11 +150,12 @@ def save_avatar(cls, relation, type, avatar=None, filename=None, color=None):
file.delete()

if photo:
setattr(instance, cls.file_fk(), photo.id)
if instance.file_fk() != instance.file_write_fk():
setattr(instance, instance.file_fk(), None)
GabeVillalobos marked this conversation as resolved.
Show resolved Hide resolved
setattr(instance, instance.file_write_fk(), photo.id)
instance.ident = uuid4().hex

instance.avatar_type = [i for i, n in cls.AVATAR_TYPES if n == type][0]

instance.save()

if photo and not created:
Expand Down
36 changes: 24 additions & 12 deletions src/sentry/models/avatars/control_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Type, Union

from sentry.db.models import BoundedBigIntegerField
from sentry.models.avatars.base import AvatarBase
from sentry.models.files import ControlFileBlob, File
from sentry.models.files import ControlFile, File


class ControlAvatarBase(AvatarBase):
Expand All @@ -9,19 +11,29 @@ class ControlAvatarBase(AvatarBase):
class Meta:
abstract = True

@classmethod
def file_class(cls):
from sentry.models import ControlFile

if ControlFileBlob._storage_config():
def file_class(self) -> Union[Type[ControlFile], Type[File]]:
"""
Select the file class this avatar has used.
File classes can vary by the avatar as we have migrated
storage for saas, but self-hosted and single-tenant instances
did not have relations and storage migrated.
"""
if self.control_file_id:
return ControlFile
return File
if self.file_id:
return File
return ControlFile

@classmethod
def file_fk(cls) -> str:
if ControlFileBlob._storage_config():
def file_fk(self) -> str:
if self.control_file_id:
return "control_file_id"
return "file_id"
if self.file_id:
return "file_id"
return "control_file_id"
Comment on lines -22 to +32
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This behavior may be a bit problematic in split DB test contexts, since accessing the normal file_id in a fallback case will likely result in a cross-DB access in some transaction contexts (see test_doc_integration_avatar.py for example). Are we sure we don't want to just default to control_file_id at this point?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Non-saas environments never moved useravatar or sentryapp avatars to control file (as they don't have the separate bucket config). We need to continue using file_id for those environments as that is where their files are.

I'm considering following up this change with a backfill migration that copies all the file/fileblob/fileblobindex records for user & sentryapp avatars into ControlFile. If we do that backfill then all environments will become consistent and we can simplify this code as we don't need to split reads anymore.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh I didn't realize we hadn't backfilled the non-saas deploys 😞 Yeah that makes sense. Should we maybe do an env check for split db mode and fail fast in that scenario just to ensure we don't keep around these cross-db accesses?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah a split db check could work too. I'd like to get rid of the cross-db reads, but how to get there is the part I'm still trying to figure out. Migrations only have access to historical models which don't have the methods we need to move file records between backends. We could spawn the copy file task, but we'll need to account for the task changing signature/going missing in the future.


def file_write_fk(self) -> str:
"""Prefer controlfile as user/sentryapp avatars are stored in control silo"""
return "control_file_id"

def get_file_id(self):
def get_file_id(self) -> int:
return self.control_file_id or self.file_id
23 changes: 21 additions & 2 deletions src/sentry/models/avatars/user_avatar.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,44 @@
from __future__ import annotations

from enum import IntEnum

from django.db import models

from sentry.db.models import BaseManager, FlexibleForeignKey, control_silo_only_model

from . import ControlAvatarBase


class UserAvatarType(IntEnum):
LETTER_AVATAR = 0
UPLOAD = 1
GRAVATAR = 2

def api_name(self):
return self.name.lower()

@classmethod
def as_choices(cls) -> tuple[tuple[int, str], ...]:
return (
(cls.LETTER_AVATAR, cls.LETTER_AVATAR.api_name()),
(cls.UPLOAD, cls.UPLOAD.api_name()),
(cls.GRAVATAR, cls.GRAVATAR.api_name()),
)


@control_silo_only_model
class UserAvatar(ControlAvatarBase):
"""
A UserAvatar associates a User with their avatar photo File
and contains their preferences for avatar type.
"""

AVATAR_TYPES = ((0, "letter_avatar"), (1, "upload"), (2, "gravatar"))
AVATAR_TYPES = UserAvatarType.as_choices()

FILE_TYPE = "avatar.file"

user = FlexibleForeignKey("sentry.User", unique=True, related_name="avatar")
avatar_type = models.PositiveSmallIntegerField(default=0, choices=AVATAR_TYPES)
avatar_type = models.PositiveSmallIntegerField(default=0, choices=UserAvatarType.as_choices())

objects = BaseManager(cache_fields=["user"])

Expand Down
52 changes: 33 additions & 19 deletions src/sentry/models/files/control_fileblob.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,41 @@
from __future__ import annotations

from typing import Any, Dict

from sentry import options
from sentry.db.models import control_silo_only_model
from sentry.models.files.abstractfileblob import AbstractFileBlob
from sentry.models.files.control_fileblobowner import ControlFileBlobOwner
from sentry.options.manager import UnknownOption
from sentry.tasks.files import delete_file_control


def control_file_storage_config() -> Dict[str, Any] | None:
"""
When sentry is deployed in a siloed mode file relations
used by control silo models are stored separately from
region silo resources.

While we consistently write to the ControlFile django
model for control silo resources, we can't ensure
that each deployment has separate control + region storage
backends. We coalesce those options here. None means use the
global default storage options.
"""
try:
# If these options exist, use them. Otherwise fallback to default behavior
storage_backend = options.get("filestore.control.backend")
storage_options = options.get("filestore.control.options")
if storage_backend:
return {
"backend": storage_backend,
"options": storage_options,
}
except UnknownOption:
pass
return None


@control_silo_only_model
class ControlFileBlob(AbstractFileBlob):
class Meta:
Expand All @@ -14,23 +45,6 @@ class Meta:
FILE_BLOB_OWNER_MODEL = ControlFileBlobOwner
DELETE_FILE_TASK = delete_file_control

# TODO(hybrid-cloud): This is a temporary measure to allow concurrent production deployments of filestore
# We override the behavior of only the control silo file storage implementation to use
# the new control instance while production runs in monolith mode
@classmethod
def _storage_config(cls):
from sentry import options as options_store

config = None
try:
# If these options exist, use them. Otherwise fallback to default behavior
backend = options_store.get("filestore.control.backend")
options = options_store.get("filestore.control.options")
if backend:
config = {
"backend": backend,
"options": options,
}
except UnknownOption:
pass
return config
def _storage_config(cls) -> Dict[str, Any] | None:
return control_file_storage_config()
15 changes: 7 additions & 8 deletions src/sentry/tasks/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,15 @@ def copy_file_to_control_and_update_model(
file_id: int,
**kwargs,
):
from sentry.models.files import ControlFile, ControlFileBlob, File

if ControlFileBlob._storage_config() is None:
return
from sentry.models.files import ControlFile, File

lock = f"copy-file-lock-{model_name}:{model_id}"

with locks.get(lock, duration=60, name="copy-file-lock").acquire():
# Short circuit duplicate copy calls
model = apps.get_app_config(app_name).get_model(model_name).objects.get(id=model_id)
if model.control_file_id:
model_class = apps.get_model(app_name, model_name)
instance = model_class.objects.get(id=model_id)
if instance.control_file_id:
return

file_model = File.objects.get(id=file_id)
Expand All @@ -139,5 +137,6 @@ def copy_file_to_control_and_update_model(
)
control_file.putfile(file_handle)

model.control_file_id = control_file.id
model.save()
instance.control_file_id = control_file.id
instance.file_id = None
instance.save()
48 changes: 46 additions & 2 deletions tests/sentry/api/endpoints/test_sentry_app_avatar.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from sentry import options as options_store
from sentry.models import SentryAppAvatar
from sentry.models.files.control_file import ControlFile
from sentry.testutils.cases import APITestCase
from sentry.testutils.silo import control_silo_test

Expand Down Expand Up @@ -37,7 +38,7 @@ def create_avatar(self, is_color):


@control_silo_test(stable=True)
class SentryAppAvatarTest(SentryAppAvatarTestBase):
class SentryAppAvatarGetTest(SentryAppAvatarTestBase):
def test_get(self):
response = self.get_success_response(self.unpublished_app.slug)

Expand All @@ -53,6 +54,30 @@ def test_get(self):
assert simple_avatar["color"] is False
assert response.data["uuid"] == str(self.unpublished_app.uuid)

def test_get_upload_control_file(self):
self.method = "put"
self.create_avatar(is_color=True)

self.method = "get"
self.create_avatar(is_color=True)
response = self.get_success_response(self.unpublished_app.slug)

assert response.status_code == 200, response.content
assert response.data["uuid"] == str(self.unpublished_app.uuid)
uploads = [
avatar for avatar in response.data["avatars"] if avatar["avatarType"] == "upload"
]
upload = uploads[0]
assert upload["avatarType"] == "upload"
assert upload["avatarUuid"]

avatar = SentryAppAvatar.objects.filter(sentry_app_id=self.unpublished_app.id).first()
assert avatar
assert avatar.get_file_id()
assert avatar.control_file_id
file = avatar.get_file()
assert isinstance(file, ControlFile)


@control_silo_test(stable=True)
class SentryAppAvatarPutTest(SentryAppAvatarTestBase):
Expand All @@ -65,11 +90,25 @@ def test_upload(self):
assert avatar.get_file_id()
assert avatar.get_avatar_type_display() == "upload"
color_avatar = self.get_avatar(resp)
assert color_avatar
assert color_avatar["avatarType"] == "upload"
assert color_avatar["avatarUuid"] is not None
assert color_avatar["color"] is True

def test_upload_control_file(self):
resp = self.create_avatar(is_color=True)

avatar = SentryAppAvatar.objects.get(sentry_app=self.unpublished_app, color=True)
assert avatar.get_file_id()
assert avatar.control_file_id
assert avatar.get_avatar_type_display() == "upload"
color_avatar = self.get_avatar(resp)
assert color_avatar
assert color_avatar["avatarType"] == "upload"
assert color_avatar["avatarUuid"] is not None
assert color_avatar["color"] is True

def test_upload_control(self):
def test_upload_control_with_storage_options(self):
with self.options(
{
"filestore.control.backend": options_store.get("filestore.backend"),
Expand All @@ -82,6 +121,7 @@ def test_upload_control(self):
assert avatar.get_file_id()
assert avatar.get_avatar_type_display() == "upload"
color_avatar = self.get_avatar(resp)
assert color_avatar
assert color_avatar["avatarType"] == "upload"
assert color_avatar["avatarUuid"] is not None
assert color_avatar["color"] is True
Expand All @@ -96,13 +136,15 @@ def test_upload_both(self):
assert avatars[0].get_file_id()
assert avatars[0].get_avatar_type_display() == "upload"
color_avatar = self.get_avatar(resp)
assert color_avatar
assert color_avatar["color"] is True
assert color_avatar["avatarType"] == "upload"
assert color_avatar["avatarUuid"] is not None

assert avatars[1].get_file_id()
assert avatars[1].get_avatar_type_display() == "upload"
simple_avatar = self.get_avatar(resp, False)
assert simple_avatar
assert simple_avatar["color"] is False
assert simple_avatar["avatarType"] == "upload"
assert simple_avatar["avatarUuid"] is not None
Expand All @@ -129,10 +171,12 @@ def test_revert_to_default(self):
color_avatar = self.get_avatar(response)
simple_avatar = self.get_avatar(response, False)

assert color_avatar
assert color_avatar["avatarType"] == "default"
assert color_avatar["avatarUuid"] is not None
assert color_avatar["color"] is True

assert simple_avatar
assert simple_avatar["avatarType"] == "default"
assert simple_avatar["avatarUuid"] is not None
assert simple_avatar["color"] is False
Expand Down
Loading
Loading