From fd19415994e7af78069823b04e51810e806c0300 Mon Sep 17 00:00:00 2001 From: Marien Fressinaud Date: Wed, 2 Oct 2024 17:47:22 +0200 Subject: [PATCH] tec: Improve the performance of news refreshing --- ...2AddCreatedAtIndexOnLinksToCollections.php | 28 ++++++++++++ src/models/dao/links/NewsQueries.php | 43 +++++++++++++++++-- src/schema.sql | 1 + src/services/NewsPicker.php | 21 +-------- tests/services/NewsPickerTest.php | 31 ++++++++----- 5 files changed, 90 insertions(+), 34 deletions(-) create mode 100644 src/migrations/Migration202410020002AddCreatedAtIndexOnLinksToCollections.php diff --git a/src/migrations/Migration202410020002AddCreatedAtIndexOnLinksToCollections.php b/src/migrations/Migration202410020002AddCreatedAtIndexOnLinksToCollections.php new file mode 100644 index 00000000..dffbc6ba --- /dev/null +++ b/src/migrations/Migration202410020002AddCreatedAtIndexOnLinksToCollections.php @@ -0,0 +1,28 @@ +exec(<<<'SQL' + CREATE INDEX idx_links_to_collections_created_at ON links_to_collections(created_at); + SQL); + + return true; + } + + public function rollback(): bool + { + $database = \Minz\Database::get(); + + $database->exec(<<<'SQL' + DROP INDEX idx_links_to_collections_created_at; + SQL); + + return true; + } +} diff --git a/src/models/dao/links/NewsQueries.php b/src/models/dao/links/NewsQueries.php index e3287f7f..e01c85d5 100644 --- a/src/models/dao/links/NewsQueries.php +++ b/src/models/dao/links/NewsQueries.php @@ -18,16 +18,22 @@ trait NewsQueries * * @return self[] */ - public static function listFromFollowedCollections(string $user_id): array + public static function listFromFollowedCollections(string $user_id, int $max): array { $values = [ ':user_id' => $user_id, + ':until_hard_limit' => \Minz\Time::ago(1, 'year')->format(Database\Column::DATETIME_FORMAT), ':until_strict' => \Minz\Time::ago(1, 'day')->format(Database\Column::DATETIME_FORMAT), ':until_normal' => \Minz\Time::ago(1, 'week')->format(Database\Column::DATETIME_FORMAT), ]; $sql = <<= :until_hard_limit AND ( (fc.time_filter = 'strict' AND lc.created_at >= :until_strict) OR (fc.time_filter = 'normal' AND lc.created_at >= :until_normal) OR (fc.time_filter = 'all' AND lc.created_at >= fc.created_at - INTERVAL '1 week') ) - ORDER BY published_at DESC, l.id + -- This order by is not very performant, but it gives a chance to + -- get links that wouldn't show up otherwise. Indeed, if the first + -- 1000 links were removed from the news, even if the next links + -- had to be returned, nothing would be returned from the method. + ORDER BY random() + + LIMIT 1000 SQL; $database = Database::get(); $statement = $database->prepare($sql); $statement->execute($values); - return self::fromDatabaseRows($statement->fetchAll()); + // Get the results indexed by the url_hash (i.e. the first column) + $results = $statement->fetchAll(\PDO::FETCH_UNIQUE); + + // Remove the links that must be excluded from the news + $excluded_hashes = self::listHashesExcludedFromNews($user_id); + $results = array_diff_key($results, $excluded_hashes); + + // Sort the links + usort($results, function ($db_link1, $db_link2) { + $comparison = $db_link2['published_at'] <=> $db_link1['published_at']; + + if ($comparison === 0) { + return $db_link1['id'] <=> $db_link2['id']; + } + + return $comparison; + }); + + // Limit the set of results + $results = array_slice($results, 0, $max); + + // Transform the raw results to Links + return self::fromDatabaseRows($results); } /** diff --git a/src/schema.sql b/src/schema.sql index f6a1b965..7aab2567 100644 --- a/src/schema.sql +++ b/src/schema.sql @@ -181,6 +181,7 @@ CREATE TABLE links_to_collections ( CREATE UNIQUE INDEX idx_links_to_collections ON links_to_collections(link_id, collection_id); CREATE INDEX idx_links_to_collections_collection_id ON links_to_collections(collection_id); +CREATE INDEX idx_links_to_collections_created_at ON links_to_collections(created_at); CREATE TABLE followed_collections ( id BIGSERIAL PRIMARY KEY, diff --git a/src/services/NewsPicker.php b/src/services/NewsPicker.php index be2a4316..1dd7daf3 100644 --- a/src/services/NewsPicker.php +++ b/src/services/NewsPicker.php @@ -27,25 +27,6 @@ public function __construct(models\User $user) */ public function pick(int $max = 25): array { - $excluded_hashes = models\Link::listHashesExcludedFromNews($this->user->id); - $links_from_followed = models\Link::listFromFollowedCollections($this->user->id); - - $links = []; - - foreach ($links_from_followed as $link) { - $hash = $link->url_hash; - - if (isset($excluded_hashes[$hash])) { - continue; - } - - $links[$hash] = $link; - - if (count($links) >= $max) { - break; - } - } - - return array_values($links); + return models\Link::listFromFollowedCollections($this->user->id, $max); } } diff --git a/tests/services/NewsPickerTest.php b/tests/services/NewsPickerTest.php index dda784b2..eb9dc896 100644 --- a/tests/services/NewsPickerTest.php +++ b/tests/services/NewsPickerTest.php @@ -31,11 +31,14 @@ public function testPickSelectsFromFollowed(): void /** @var \DateTimeImmutable */ $now = $this->fake('dateTime'); $this->freeze($now); - /** @var int */ - $days_ago = $this->fake('numberBetween', 0, 7); - $published_at = \Minz\Time::ago($days_ago, 'days'); + $published_at1 = \Minz\Time::ago(3, 'days'); + $published_at2 = \Minz\Time::ago(1, 'days'); $news_picker = new NewsPicker($this->user); - $link = LinkFactory::create([ + $link1 = LinkFactory::create([ + 'user_id' => $this->other_user->id, + 'is_hidden' => false, + ]); + $link2 = LinkFactory::create([ 'user_id' => $this->other_user->id, 'is_hidden' => false, ]); @@ -45,9 +48,14 @@ public function testPickSelectsFromFollowed(): void 'is_public' => true, ]); LinkToCollectionFactory::create([ - 'created_at' => $published_at, + 'created_at' => $published_at1, 'collection_id' => $collection->id, - 'link_id' => $link->id, + 'link_id' => $link1->id, + ]); + LinkToCollectionFactory::create([ + 'created_at' => $published_at2, + 'collection_id' => $collection->id, + 'link_id' => $link2->id, ]); FollowedCollectionFactory::create([ 'user_id' => $this->user->id, @@ -56,10 +64,13 @@ public function testPickSelectsFromFollowed(): void $links = $news_picker->pick(); - $this->assertSame(1, count($links)); - $this->assertSame($link->id, $links[0]->id); + $this->assertSame(2, count($links)); + $this->assertSame($link2->id, $links[0]->id); $this->assertSame('collection', $links[0]->source_news_type); $this->assertSame($collection->id, $links[0]->source_news_resource_id); + $this->assertSame($link1->id, $links[1]->id); + $this->assertSame('collection', $links[1]->source_news_type); + $this->assertSame($collection->id, $links[1]->source_news_resource_id); } public function testPickSelectsHiddenLinkIfCollectionIsShared(): void @@ -148,7 +159,7 @@ public function testPickRespectsFromFollowedIfOldLinksButWithTimeFilterAll(): vo $now = $this->fake('dateTime'); $this->freeze($now); /** @var int */ - $days_ago = $this->fake('numberBetween', 8, 9999); + $days_ago = $this->fake('numberBetween', 8, 180); $published_at = \Minz\Time::ago($days_ago, 'days'); // time_filter 'all' will search links until 7 days before the date // when the user started to follow the collection @@ -190,7 +201,7 @@ public function testPickDoesNotPickFromFollowedIfTooOld(): void $now = $this->fake('dateTime'); $this->freeze($now); /** @var int */ - $days_ago = $this->fake('numberBetween', 8, 9999); + $days_ago = $this->fake('numberBetween', 8, 180); $published_at = \Minz\Time::ago($days_ago, 'days'); $news_picker = new NewsPicker($this->user); $link = LinkFactory::create([