Skip to content

Commit

Permalink
tec: Improve the algorithm of news refreshing
Browse files Browse the repository at this point in the history
Part of the algorithm is now handled directly by PHP in order to avoid
some time-consuming JOIN.
  • Loading branch information
marienfressinaud committed Sep 19, 2024
1 parent c907dd6 commit fe59a92
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 56 deletions.
82 changes: 43 additions & 39 deletions src/models/dao/links/NewsQueries.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,51 +14,22 @@ trait NewsQueries
{
/**
* Return public links listed in followed collections of the given user,
* ordered by publication date. Links with a matching url in bookmarks or
* read list are not returned.
* ordered by publication date.
*
* @return self[]
*/
public static function listFromFollowedCollectionsForNews(string $user_id): array
public static function listFromFollowedCollections(string $user_id): array
{
$where_placeholder = '';
$values = [
':user_id' => $user_id,
':until_strict' => \Minz\Time::ago(1, 'day')->format(Database\Column::DATETIME_FORMAT),
':until_normal' => \Minz\Time::ago(1, 'week')->format(Database\Column::DATETIME_FORMAT),
];

$where_placeholder .= <<<'SQL'
AND (
(fc.time_filter = 'strict' AND lc.created_at >= :until_strict) OR
(fc.time_filter = 'normal' AND lc.created_at >= :until_normal) OR
(fc.time_filter = 'all' AND lc.created_at >= fc.created_at - INTERVAL '1 week')
)
SQL;
$values[':until_strict'] = \Minz\Time::ago(1, 'day')->format(Database\Column::DATETIME_FORMAT);
$values[':until_normal'] = \Minz\Time::ago(1, 'week')->format(Database\Column::DATETIME_FORMAT);

$sql = <<<SQL
WITH excluded_links AS (
SELECT l_exclude.id, l_exclude.url_hash
FROM links l_exclude, collections c_exclude, links_to_collections lc_exclude
WHERE c_exclude.user_id = :user_id
AND (
c_exclude.type = 'news'
OR c_exclude.type = 'bookmarks'
OR c_exclude.type = 'read'
OR c_exclude.type = 'never'
)
AND lc_exclude.link_id = l_exclude.id
AND lc_exclude.collection_id = c_exclude.id
)
SELECT l.*, lc.created_at AS published_at, 'collection' AS source_news_type, c.id AS source_news_resource_id
FROM collections c, links_to_collections lc, followed_collections fc, links l
LEFT JOIN excluded_links
ON excluded_links.url_hash = l.url_hash
WHERE fc.user_id = :user_id
AND fc.collection_id = lc.collection_id
Expand All @@ -76,21 +47,54 @@ public static function listFromFollowedCollectionsForNews(string $user_id): arra
AND l.user_id != :user_id
AND excluded_links.id IS NULL
AND (
(fc.time_filter = 'strict' AND lc.created_at >= :until_strict) OR
(fc.time_filter = 'normal' AND lc.created_at >= :until_normal) OR
(fc.time_filter = 'all' AND lc.created_at >= fc.created_at - INTERVAL '1 week')
)
ORDER BY published_at DESC, l.id
SQL;

$database = Database::get();
$statement = $database->prepare($sql);
$statement->execute($values);

return self::fromDatabaseRows($statement->fetchAll());
}

/**
* Return hashes of links that are in news, bookmarks, never or read lists.
*
* @return array<string, bool>
*/
public static function listHashesExcludedFromNews(string $user_id): array
{
$values = [
':user_id' => $user_id,
];

{$where_placeholder}
$sql = <<<SQL
SELECT l.url_hash, true
FROM links l, collections c, links_to_collections lc
GROUP BY l.id, lc.created_at, c.id
WHERE c.user_id = :user_id
AND (
c.type = 'news'
OR c.type = 'bookmarks'
OR c.type = 'read'
OR c.type = 'never'
)
ORDER BY lc.created_at DESC, l.id
LIMIT 100
AND lc.link_id = l.id
AND lc.collection_id = c.id
SQL;

$database = Database::get();
$statement = $database->prepare($sql);
$statement->execute($values);

return self::fromDatabaseRows($statement->fetchAll());
return $statement->fetchAll(\PDO::FETCH_KEY_PAIR);
}

/**
Expand Down
35 changes: 18 additions & 17 deletions src/services/NewsPicker.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,24 +44,25 @@ public function __construct(models\User $user, array $options = [])
*/
public function pick(): array
{
$links = models\Link::listFromFollowedCollectionsForNews($this->user->id);
$links = $this->mergeByUrl($links);
return array_slice($links, 0, $this->options['number_links']);
}
$excluded_hashes = models\Link::listHashesExcludedFromNews($this->user->id);
$links_from_followed = models\Link::listFromFollowedCollections($this->user->id);

/**
* Removes duplicated links urls.
*
* @param models\Link[] $links
*
* @return models\Link[]
*/
private function mergeByUrl(array $links): array
{
$by_url = [];
foreach ($links as $link) {
$by_url[$link->url] = $link;
$links = [];

foreach ($links_from_followed as $link) {
$hash = $link->url_hash;

if (isset($excluded_hashes[$hash])) {
continue;
}

$links[$hash] = $link;

if (count($links) >= $this->options['number_links']) {
break;
}
}
return array_values($by_url);

return array_values($links);
}
}

0 comments on commit fe59a92

Please sign in to comment.