From 14d1eeba8e47b9a8103555ad87c6ed1012d29a17 Mon Sep 17 00:00:00 2001 From: Joas Schilling Date: Tue, 12 Mar 2024 13:13:01 +0100 Subject: [PATCH] fix(federation): Fix the handling of retrying OCM notifications when the remote was not available Signed-off-by: Joas Schilling --- appinfo/info.xml | 3 +- lib/BackgroundJob/RetryJob.php | 100 ---------------- lib/BackgroundJob/RetryNotificationsJob.php | 49 ++++++++ lib/Federation/BackendNotifier.php | 109 +++++++++++++----- .../Version19000Date20240312105627.php | 106 +++++++++++++++++ lib/Model/RetryNotification.php | 66 +++++++++++ lib/Model/RetryNotificationMapper.php | 62 ++++++++++ 7 files changed, 367 insertions(+), 128 deletions(-) delete mode 100644 lib/BackgroundJob/RetryJob.php create mode 100644 lib/BackgroundJob/RetryNotificationsJob.php create mode 100644 lib/Migration/Version19000Date20240312105627.php create mode 100644 lib/Model/RetryNotification.php create mode 100644 lib/Model/RetryNotificationMapper.php diff --git a/appinfo/info.xml b/appinfo/info.xml index e141bb015598..d0d84c689732 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -16,7 +16,7 @@ And in the works for the [coming versions](https://github.com/nextcloud/spreed/m ]]> - 19.0.0-beta.1 + 19.0.0-beta.1.1 agpl Daniel Calviño Sánchez @@ -65,6 +65,7 @@ And in the works for the [coming versions](https://github.com/nextcloud/spreed/m OCA\Talk\BackgroundJob\Reminder OCA\Talk\BackgroundJob\RemoveEmptyRooms OCA\Talk\BackgroundJob\ResetAssignedSignalingServer + OCA\Talk\BackgroundJob\RetryNotificationsJob diff --git a/lib/BackgroundJob/RetryJob.php b/lib/BackgroundJob/RetryJob.php deleted file mode 100644 index 49c088fcbcef..000000000000 --- a/lib/BackgroundJob/RetryJob.php +++ /dev/null @@ -1,100 +0,0 @@ - - * - * @author Bjoern Schiessle - * @author Björn Schießle - * @author Joas Schilling - * @author Lukas Reschke - * @author Morris Jobke - * @author Roeland Jago Douma - * @author Gary Kim - * - * @license AGPL-3.0 - * - * This code is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License, version 3, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License, version 3, - * along with this program. If not, see - * - */ -namespace OCA\Talk\BackgroundJob; - -use OCA\Talk\Federation\BackendNotifier; -use OCP\AppFramework\Utility\ITimeFactory; -use OCP\BackgroundJob\IJobList; -use OCP\BackgroundJob\Job; -use OCP\ILogger; - -/** - * Class RetryJob - * - * Background job to re-send update of federated re-shares to the remote server in - * case the server was not available on the first try - * - * @package OCA\Talk\BackgroundJob - */ -class RetryJob extends Job { - - /** @var int max number of attempts to send the request */ - private int $maxTry = 20; - - - public function __construct( - private BackendNotifier $backendNotifier, - ITimeFactory $timeFactory, - ) { - parent::__construct($timeFactory); - } - - /** - * run the job, then remove it from the jobList - * - * @param IJobList $jobList - * @param ILogger|null $logger - */ - public function start(IJobList $jobList): void { - if (((int)$this->argument['try']) > $this->maxTry) { - $jobList->remove($this, $this->argument); - return; - } - if ($this->shouldRun($this->argument)) { - parent::start($jobList); - $jobList->remove($this, $this->argument); - } - } - - protected function run($argument): void { - $remote = $argument['remote']; - $data = json_decode($argument['data'], true, flags: JSON_THROW_ON_ERROR); - $try = (int)$argument['try'] + 1; - - $this->backendNotifier->sendUpdateDataToRemote($remote, $data, $try); - } - - /** - * test if it is time for the next run - * - * @param array $argument - * @return bool - */ - protected function shouldRun(array $argument): bool { - $lastRun = (int)$argument['lastRun']; - $try = (int)$argument['try']; - return (($this->time->getTime() - $lastRun) > $this->nextRunBreak($try)); - } - - protected function nextRunBreak(int $try): int { - return min(($try + 1) * 300, 3600); - } -} diff --git a/lib/BackgroundJob/RetryNotificationsJob.php b/lib/BackgroundJob/RetryNotificationsJob.php new file mode 100644 index 000000000000..de0454a9a279 --- /dev/null +++ b/lib/BackgroundJob/RetryNotificationsJob.php @@ -0,0 +1,49 @@ + + * + * @author Joas Schilling + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ +namespace OCA\Talk\BackgroundJob; + +use OCA\Talk\Federation\BackendNotifier; +use OCP\AppFramework\Utility\ITimeFactory; +use OCP\BackgroundJob\TimedJob; + +/** + * Retry to send OCM notifications + */ +class RetryNotificationsJob extends TimedJob { + public function __construct( + private BackendNotifier $backendNotifier, + ITimeFactory $timeFactory, + ) { + parent::__construct($timeFactory); + + // Every time the jobs run + $this->setInterval(1); + } + + protected function run($argument): void { + $this->backendNotifier->retrySendingFailedNotifications($this->time->getDateTime()); + } +} diff --git a/lib/Federation/BackendNotifier.php b/lib/Federation/BackendNotifier.php index ccb422b23bde..0b6346d18a78 100644 --- a/lib/Federation/BackendNotifier.php +++ b/lib/Federation/BackendNotifier.php @@ -31,10 +31,13 @@ use OCA\Talk\Config; use OCA\Talk\Exceptions\RoomHasNoModeratorException; use OCA\Talk\Model\Attendee; +use OCA\Talk\Model\RetryNotification; +use OCA\Talk\Model\RetryNotificationMapper; use OCA\Talk\Room; use OCP\App\IAppManager; use OCP\AppFramework\Http; use OCP\AppFramework\Services\IAppConfig; +use OCP\AppFramework\Utility\ITimeFactory; use OCP\BackgroundJob\IJobList; use OCP\DB\Exception; use OCP\Federation\ICloudFederationFactory; @@ -62,6 +65,8 @@ public function __construct( private IAppManager $appManager, private Config $talkConfig, private IAppConfig $appConfig, + private RetryNotificationMapper $retryNotificationMapper, + private ITimeFactory $timeFactory, ) { } @@ -344,29 +349,11 @@ public function sendMessageUpdate( $this->sendUpdateToRemote($remote, $notification); } - /** - * @param string $remote - * @param array{notificationType: string, resourceType: string, providerId: string, notification: array} $data - * @param int $try - * @return void - * @internal Used to send retries in background jobs - */ - public function sendUpdateDataToRemote(string $remote, array $data, int $try): void { - $notification = $this->cloudFederationFactory->getCloudFederationNotification(); - $notification->setMessage( - $data['notificationType'], - $data['resourceType'], - $data['providerId'], - $data['notification'] - ); - $this->sendUpdateToRemote($remote, $notification, $try); - } - - protected function sendUpdateToRemote(string $remote, ICloudFederationNotification $notification, int $try = 0): void { + protected function sendUpdateToRemote(string $remote, ICloudFederationNotification $notification, int $try = 0): bool { try { $response = $this->federationProviderManager->sendCloudNotification($remote, $notification); if ($response->getStatusCode() === Http::STATUS_CREATED) { - return; + return true; } $this->logger->warning("Failed to send notification for share from $remote, received status code {code}\n{body}", [ @@ -377,14 +364,82 @@ protected function sendUpdateToRemote(string $remote, ICloudFederationNotificati $this->logger->error("Failed to send notification for share from $remote, received OCMProviderException", ['exception' => $e]); } - $this->jobList->add( - RetryJob::class, - [ - 'remote' => $remote, - 'data' => json_encode($notification->getMessage(), JSON_THROW_ON_ERROR), - 'try' => $try, - ] + if ($try === 0) { + $now = $this->timeFactory->getTime(); + $now += $this->getRetryDelay(1); + + $retryNotification = new RetryNotification(); + $retryNotification->setRemoteServer($remote); + $retryNotification->setNumAttempts(1); + $retryNotification->setNextRetry($this->timeFactory->getDateTime('@' . $now)); + + $data = $notification->getMessage(); + $retryNotification->setNotificationType($data['notificationType']); + $retryNotification->setResourceType($data['resourceType']); + $retryNotification->setProviderId($data['providerId']); + $retryNotification->setNotification($data['notification']); + + $this->retryNotificationMapper->insert($retryNotification); + } + + return false; + } + + public function retrySendingFailedNotifications(\DateTimeInterface $dueDateTime): void { + $retryNotifications = $this->retryNotificationMapper->getAllDue($dueDateTime); + + foreach ($retryNotifications as $retryNotification) { + $this->retrySendingFailedNotification($retryNotification); + } + } + + protected function retrySendingFailedNotification(RetryNotification $retryNotification): void { + $notification = $this->cloudFederationFactory->getCloudFederationNotification(); + $notification->setMessage( + $retryNotification->getNotificationType(), + $retryNotification->getResourceType(), + $retryNotification->getProviderId(), + json_decode($retryNotification->getNotification(), true, flags: JSON_THROW_ON_ERROR), ); + + $success = $this->sendUpdateToRemote($retryNotification->getRemoteServer(), $notification, $retryNotification->getNumAttempts()); + + if ($success) { + $this->retryNotificationMapper->delete($retryNotification); + } elseif ($retryNotification->getNumAttempts() === RetryNotification::MAX_NUM_ATTEMPTS) { + $this->logger->error('Failed to send notification to ' . $retryNotification->getRemoteServer() . ' ' . RetryNotification::MAX_NUM_ATTEMPTS . ' times, giving up!'); + $this->retryNotificationMapper->delete($retryNotification); + } else { + $retryNotification->setNumAttempts($retryNotification->getNumAttempts() + 1); + + $now = $this->timeFactory->getTime(); + $now += $this->getRetryDelay($retryNotification->getNumAttempts()); + + $retryNotification->setNextRetry($this->timeFactory->getDateTime('@' . $now)); + $this->retryNotificationMapper->update($retryNotification); + } + } + + /** + * First 5 attempts are retried on the next cron run. + * Attempts 6-10 we back off to cover slightly longer maintenance/downtimes (5 minutes * per attempt) + * And the last tries 11-20 are retried with ~6 hours delay + * + * This means the last retry is after ~84 hours so a downtime from Friday to Monday would be covered + */ + protected function getRetryDelay(int $attempt): int { + if ($attempt < 5) { + // Retry after "attempt" minutes + return 5 * 60; + } + + if ($attempt > 10) { + // Retry after 8 hours + return 8 * 3600; + } + + // Retry after "attempt" * 5 minutes + return $attempt * 5 * 60; } protected function prepareRemoteUrl(string $remote): string { diff --git a/lib/Migration/Version19000Date20240312105627.php b/lib/Migration/Version19000Date20240312105627.php new file mode 100644 index 000000000000..e72253ae87f7 --- /dev/null +++ b/lib/Migration/Version19000Date20240312105627.php @@ -0,0 +1,106 @@ + + * + * @author Joas Schilling + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +namespace OCA\Talk\Migration; + +use Closure; +use OCP\DB\ISchemaWrapper; +use OCP\DB\Types; +use OCP\IDBConnection; +use OCP\Migration\IOutput; +use OCP\Migration\SimpleMigrationStep; + +/** + * Add table to queue federation notifications to retry + */ +class Version19000Date20240312105627 extends SimpleMigrationStep { + public function __construct( + protected IDBConnection $connection, + ) { + } + + /** + * @param IOutput $output + * @param Closure(): ISchemaWrapper $schemaClosure + * @param array $options + * @return null|ISchemaWrapper + */ + public function changeSchema(IOutput $output, Closure $schemaClosure, array $options): ?ISchemaWrapper { + /** @var ISchemaWrapper $schema */ + $schema = $schemaClosure(); + + $table = $schema->createTable('talk_retry_ocm'); + $table->addColumn('id', Types::BIGINT, [ + 'autoincrement' => true, + 'notnull' => true, + ]); + $table->addColumn('remote_server', Types::STRING, [ + 'notnull' => true, + 'length' => 255, + ]); + $table->addColumn('num_attempt', Types::INTEGER, [ + 'default' => 0, + 'unsigned' => true, + ]); + $table->addColumn('next_retry', Types::DATETIME, [ + 'notnull' => false, + ]); + $table->addColumn('notification_type', Types::STRING, [ + 'notnull' => true, + 'length' => 64, + ]); + $table->addColumn('resource_type', Types::STRING, [ + 'notnull' => true, + 'length' => 64, + ]); + $table->addColumn('provider_id', Types::STRING, [ + 'notnull' => true, + 'length' => 64, + ]); + $table->addColumn('notification', Types::TEXT, [ + 'notnull' => true, + ]); + + + $table->setPrimaryKey(['id']); + $table->addIndex(['next_retry'], 'talk_retry_ocm_next'); + + return $schema; + } + + /** + * Remove legacy RetryJobs + */ + public function postSchemaChange(IOutput $output, \Closure $schemaClosure, array $options) { + /** @psalm-suppress UndefinedClass */ + $formerClassName = \OCA\Talk\BackgroundJob\RetryJob::class; + + $query = $this->connection->getQueryBuilder(); + $query->delete('jobs') + ->where($query->expr()->eq('class', $query->createNamedParameter($formerClassName))); + $query->executeStatement(); + } +} diff --git a/lib/Model/RetryNotification.php b/lib/Model/RetryNotification.php new file mode 100644 index 000000000000..d460f934c09f --- /dev/null +++ b/lib/Model/RetryNotification.php @@ -0,0 +1,66 @@ + + * + * @author Joas Schilling + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +namespace OCA\Talk\Model; + +use OCP\AppFramework\Db\Entity; + +/** + * @method void setRemoteServer(string $remoteServer) + * @method string getRemoteServer() + * @method void setNumAttempts(int $numAttempts) + * @method int getNumAttempts() + * @method void setNextRetry(\DateTime $nextRetry) + * @method \DateTime getNextRetry() + * @method void setNotificationType(string $notificationType) + * @method string getNotificationType() + * @method void setResourceType(string $resourceType) + * @method string getResourceType() + * @method void setProviderId(string $providerId) + * @method string getProviderId() + * @method void setNotification(string $notification) + * @method string getNotification() + */ +class RetryNotification extends Entity { + public const MAX_NUM_ATTEMPTS = 20; + + protected string $remoteServer = ''; + protected int $numAttempts = 0; + protected ?\DateTime $nextRetry = null; + protected string $notificationType = ''; + protected string $resourceType = ''; + protected string $providerId = ''; + protected string $notification = ''; + + public function __construct() { + $this->addType('remoteServer', 'string'); + $this->addType('numAttempts', 'int'); + $this->addType('nextRetry', 'datetime'); + $this->addType('notificationType', 'string'); + $this->addType('resourceType', 'string'); + $this->addType('providerId', 'string'); + $this->addType('notification', 'string'); + } +} diff --git a/lib/Model/RetryNotificationMapper.php b/lib/Model/RetryNotificationMapper.php new file mode 100644 index 000000000000..fe518593a7c8 --- /dev/null +++ b/lib/Model/RetryNotificationMapper.php @@ -0,0 +1,62 @@ + + * + * @author Joas Schilling + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +namespace OCA\Talk\Model; + +use OCP\AppFramework\Db\QBMapper; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; + +/** + * @method RetryNotification mapRowToEntity(array $row) + * @method RetryNotification findEntity(IQueryBuilder $query) + * @method RetryNotification[] findEntities(IQueryBuilder $query) + * @template-extends QBMapper + */ +class RetryNotificationMapper extends QBMapper { + public function __construct( + IDBConnection $db, + ) { + parent::__construct($db, 'talk_retry_ocm', RetryNotification::class); + } + + /** + * @return RetryNotification[] + */ + public function getAllDue(\DateTimeInterface $dueDateTime, ?int $limit = 500): array { + $query = $this->db->getQueryBuilder(); + $query->select('*') + ->from($this->getTableName()) + ->where($query->expr()->lte('next_retry', $query->createNamedParameter($dueDateTime, IQueryBuilder::PARAM_DATE), IQueryBuilder::PARAM_DATE)); + + if ($limit !== null) { + $query->setMaxResults($limit) + ->orderBy('next_retry', 'ASC') + ->addOrderBy('id', 'ASC'); + } + + return $this->findEntities($query); + } +}