Skip to content

Commit

Permalink
new: Allow to search for tags and URL parts
Browse files Browse the repository at this point in the history
  • Loading branch information
marienfressinaud committed Oct 4, 2024
1 parent 4cec5ba commit a42ec7e
Show file tree
Hide file tree
Showing 12 changed files with 1,171 additions and 17 deletions.
57 changes: 40 additions & 17 deletions src/controllers/Links.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use Minz\Response;
use App\auth;
use App\models;
use App\search_engine;
use App\services;
use App\utils;

Expand Down Expand Up @@ -36,36 +37,58 @@ public function index(Request $request): Response
return Response::redirect('login', ['redirect_to' => \Minz\Url::for('links')]);
}

$beta_enabled = models\FeatureFlag::isEnabled('beta', $user->id);

$query = $request->param('q');
$pagination_page = $request->paramInteger('page', 1);

if ($query) {
$number_links = models\Link::countByQueryAndUserId(
$query,
$user->id,
[
'exclude_never_only' => true,
]
);
if ($beta_enabled) {
$search_query = search_engine\Query::fromString($query);

$number_links = search_engine\LinksSearcher::countLinks($user, $search_query);
} else {
$number_links = models\Link::countByQueryAndUserId(
$query,
$user->id,
[
'exclude_never_only' => true,
]
);
}

$number_per_page = 30;

$pagination = new utils\Pagination($number_links, $number_per_page, $pagination_page);

if ($pagination_page !== $pagination->currentPage()) {
return Response::redirect('links', [
'q' => $query,
'page' => $pagination->currentPage(),
]);
}

$links = models\Link::listComputedByQueryAndUserId(
$query,
$user->id,
['published_at', 'number_comments'],
[
'exclude_never_only' => true,
'offset' => $pagination->currentOffset(),
'limit' => $pagination->numberPerPage(),
]
);
if ($beta_enabled) {
$links = search_engine\LinksSearcher::getLinks(
$user,
$search_query,
pagination: [
'offset' => $pagination->currentOffset(),
'limit' => $pagination->numberPerPage(),
]
);
} else {
$links = models\Link::listComputedByQueryAndUserId(
$query,
$user->id,
['published_at', 'number_comments'],
[
'exclude_never_only' => true,
'offset' => $pagination->currentOffset(),
'limit' => $pagination->numberPerPage(),
]
);
}

return Response::ok('links/search.phtml', [
'links' => $links,
Expand Down
29 changes: 29 additions & 0 deletions src/migrations/Migration202410040001AddIndexLinksUrl.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php

namespace App\migrations;

class Migration202410040001AddIndexLinksUrl
{
public function migrate(): bool
{
$database = \Minz\Database::get();

$database->exec(<<<'SQL'
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX idx_links_url ON links USING gin (url gin_trgm_ops);
SQL);

return true;
}

public function rollback(): bool
{
$database = \Minz\Database::get();

$database->exec(<<<'SQL'
DROP INDEX idx_links_url;
SQL);

return true;
}
}
2 changes: 2 additions & 0 deletions src/schema.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
CREATE EXTENSION IF NOT EXISTS pgcrypto;
CREATE EXTENSION IF NOT EXISTS pg_trgm;

CREATE TABLE jobs (
id SERIAL PRIMARY KEY,
Expand Down Expand Up @@ -168,6 +169,7 @@ CREATE TABLE links (

CREATE INDEX idx_links_user_id_url_hash ON links USING btree(user_id, url_hash);
CREATE INDEX idx_links_url_hash ON links USING hash(url_hash);
CREATE INDEX idx_links_url ON links USING gin (url gin_trgm_ops);
CREATE INDEX idx_links_to_be_fetched ON links(to_be_fetched) WHERE to_be_fetched = true;
CREATE INDEX idx_links_image_filename ON links(image_filename) WHERE image_filename IS NOT NULL;
CREATE INDEX idx_links_search ON links USING GIN (search_index);
Expand Down
190 changes: 190 additions & 0 deletions src/search_engine/LinksSearcher.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
<?php

namespace App\search_engine;

use App\models;
use Minz\Database;

/**
* @author Marien Fressinaud <dev@marienfressinaud.fr>
* @license http://www.gnu.org/licenses/agpl-3.0.en.html AGPL
*/
class LinksSearcher
{
/**
* @param array{
* 'offset'?: int,
* 'limit'?: int|'ALL',
* } $pagination
*
* @return models\Link[]
*/
public static function getLinks(
models\User $user,
Query $query,
array $pagination = [],
): array {
$default_pagination = [
'offset' => 0,
'limit' => 'ALL',
];

$pagination = array_merge($default_pagination, $pagination);

$parameters = [
':query' => '',
':user_id' => $user->id,
':offset' => $pagination['offset'],
];

$limit_statement = '';
if ($pagination['limit'] !== 'ALL') {
$limit_statement = 'LIMIT :limit';
$parameters[':limit'] = $pagination['limit'];
}

list($query_statement, $query_parameters) = self::buildWhereQuery($query);
$parameters = array_merge($parameters, $query_parameters);

$sql = <<<SQL
SELECT
l.*,
l.created_at AS published_at,
(
SELECT COUNT(*) FROM messages m
WHERE m.link_id = l.id
) AS number_comments
FROM links l, plainto_tsquery('french', :query) AS query
WHERE l.user_id = :user_id
{$query_statement}
-- Exclude the links that are ONLY in the "never" collection
AND NOT EXISTS (
SELECT 1
FROM links_to_collections lc, collections c
WHERE lc.link_id = l.id
AND lc.collection_id = c.id
HAVING COUNT(CASE WHEN c.type='never' THEN 1 END) = 1
AND COUNT(c.*) = 1
)
ORDER BY published_at DESC, l.id
OFFSET :offset
{$limit_statement}
SQL;

$database = Database::get();
$statement = $database->prepare($sql);
$statement->execute($parameters);

return models\Link::fromDatabaseRows($statement->fetchAll());
}

public static function countLinks(models\User $user, Query $query): int
{
$parameters = [
':query' => '',
':user_id' => $user->id,
];

list($query_statement, $query_parameters) = self::buildWhereQuery($query);
$parameters = array_merge($parameters, $query_parameters);

$sql = <<<SQL
SELECT COUNT(l.id)
FROM links l, plainto_tsquery('french', :query) AS query
WHERE l.user_id = :user_id
{$query_statement}
-- Exclude the links that are ONLY in the "never" collection
AND NOT EXISTS (
SELECT 1
FROM links_to_collections lc, collections c
WHERE lc.link_id = l.id
AND lc.collection_id = c.id
HAVING COUNT(CASE WHEN c.type='never' THEN 1 END) = 1
AND COUNT(c.*) = 1
)
SQL;

$database = Database::get();
$statement = $database->prepare($sql);
$statement->execute($parameters);

return intval($statement->fetchColumn());
}

/**
* @return array{string, array<string, mixed>}
*/
private static function buildWhereQuery(Query $query): array
{
$where_sql = '';
$parameters = [];

$textConditions = $query->getConditions('text');
$textValues = array_map(function (Query\Condition $condition): string {
return $condition->getValue();
}, $textConditions);
$textQuery = implode(' ', $textValues);

if ($textQuery !== '') {
$where_sql .= ' AND search_index @@ query';
$parameters['query'] = $textQuery;
}

$qualifierConditions = $query->getConditions('qualifier');

foreach ($qualifierConditions as $condition) {
$qualifier = $condition->getQualifier();
if ($qualifier === 'url') {
$value = $condition->getValue();

$parameter_name = ':url' . (count($parameters) + 1);

$where_sql .= " AND l.url ILIKE {$parameter_name}";

$parameters[$parameter_name] = "%{$value}%";
}
}

$tagConditions = $query->getConditions('tag');

$tags_parameters = [];
$not_tags_parameters = [];

foreach ($tagConditions as $condition) {
$parameter_name = ':tag' . (count($parameters) + 1);

$value = $condition->getValue();

$parameters[$parameter_name] = $value;

if ($condition->not()) {
$not_tags_parameters[] = $parameter_name;
} else {
$tags_parameters[] = $parameter_name;
}
}

if ($tags_parameters) {
$tags_statement = implode(',', $tags_parameters);
$where_sql .= " AND l.tags::jsonb ??& array[{$tags_statement}]";
}

if ($not_tags_parameters) {
$not_tags_statement = implode(',', $not_tags_parameters);
$where_sql .= " AND NOT (l.tags::jsonb ??| array[{$not_tags_statement}])";
}

return [$where_sql, $parameters];
}
}
48 changes: 48 additions & 0 deletions src/search_engine/Query.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

namespace App\search_engine;

/**
* @author Marien Fressinaud <dev@marienfressinaud.fr>
* @license http://www.gnu.org/licenses/agpl-3.0.en.html AGPL
*/
class Query
{
/** @var Query\Condition[] */
private array $conditions = [];

public function addCondition(Query\Condition $condition): void
{
$this->conditions[] = $condition;
}

/**
* @param 'text'|'qualifier'|'tag'|'any' $type
*
* @return Query\Condition[]
*/
public function getConditions(string $type = 'any'): array
{
if ($type === 'any') {
return $this->conditions;
}

return array_filter($this->conditions, function ($condition) use ($type) {
if ($type === 'text') {
return $condition->isTextCondition();
} elseif ($type === 'qualifier') {
return $condition->isQualifierCondition();
} elseif ($type === 'tag') {
return $condition->isTagCondition();
}
});
}

public static function fromString(string $queryString): Query
{
$tokenizer = new Query\Tokenizer();
$parser = new Query\Parser();
$tokens = $tokenizer->tokenize($queryString);
return $parser->parse($tokens);
}
}
Loading

0 comments on commit a42ec7e

Please sign in to comment.