From 24076dd1dccb1271f13f681c6b93cab2e30e4f34 Mon Sep 17 00:00:00 2001 From: Laurent Constantin Date: Mon, 9 Dec 2024 22:45:30 +0100 Subject: [PATCH] Custom typesense bundle --- .php-cs-fixer.dist.php | 1 + .../src/BibliotecaTypesenseBundle.php | 53 +++++ .../src/Client/ClientAdapter.php | 78 +++++++ .../src/Client/ClientFactory.php | 44 ++++ .../src/Client/ClientInterface.php | 37 +++ .../src/Command/TypesensePopulateCommand.php | 65 ++++++ .../src/Mapper/MapperInterface.php | 15 ++ .../src/Mapper/MapperLocator.php | 33 +++ .../src/Mapper/Mapping.php | 40 ++++ .../src/PopulateService.php | 60 +++++ .../src/Query/TypesenseQuery.php | 216 ++++++++++++++++++ .../src/Resources/config/services.yaml | 24 ++ composer.json | 3 +- config/bundles.php | 1 + config/packages/biblioteca_typesense.yaml | 4 + config/services.yaml | 3 + src/Mapper/BookMapper.php | 68 ++++++ 17 files changed, 744 insertions(+), 1 deletion(-) create mode 100644 BibliotecaTypesenseBundle/src/BibliotecaTypesenseBundle.php create mode 100644 BibliotecaTypesenseBundle/src/Client/ClientAdapter.php create mode 100644 BibliotecaTypesenseBundle/src/Client/ClientFactory.php create mode 100644 BibliotecaTypesenseBundle/src/Client/ClientInterface.php create mode 100644 BibliotecaTypesenseBundle/src/Command/TypesensePopulateCommand.php create mode 100644 BibliotecaTypesenseBundle/src/Mapper/MapperInterface.php create mode 100644 BibliotecaTypesenseBundle/src/Mapper/MapperLocator.php create mode 100644 BibliotecaTypesenseBundle/src/Mapper/Mapping.php create mode 100644 BibliotecaTypesenseBundle/src/PopulateService.php create mode 100644 BibliotecaTypesenseBundle/src/Query/TypesenseQuery.php create mode 100644 BibliotecaTypesenseBundle/src/Resources/config/services.yaml create mode 100644 config/packages/biblioteca_typesense.yaml create mode 100644 src/Mapper/BookMapper.php diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php index a94870c7..2a55528f 100644 --- a/.php-cs-fixer.dist.php +++ b/.php-cs-fixer.dist.php @@ -7,6 +7,7 @@ ->in(__DIR__.'/src') ->in(__DIR__.'/tests') ->in(__DIR__.'/migrations') + ->in(__DIR__.'/BibliotecaTypesenseBundle') ; $config = new PhpCsFixer\Config('Biblioteca'); diff --git a/BibliotecaTypesenseBundle/src/BibliotecaTypesenseBundle.php b/BibliotecaTypesenseBundle/src/BibliotecaTypesenseBundle.php new file mode 100644 index 00000000..08a880a8 --- /dev/null +++ b/BibliotecaTypesenseBundle/src/BibliotecaTypesenseBundle.php @@ -0,0 +1,53 @@ +rootNode() + ->children() + ->arrayNode('typesense') + ->info('Typesense server configuration') + ->isRequired() + ->children() + ->scalarNode('uri') + ->info('The URL of the Typesense server') + ->isRequired() + ->cannotBeEmpty() + ->end() + ->scalarNode('key') + ->info('The API key for accessing the Typesense server') + ->isRequired() + ->cannotBeEmpty() + ->end() + ->scalarNode('connection_timeout_seconds') + ->defaultValue(5) + ->cannotBeEmpty() + ->end() + ->end() + ->end() + ->end(); + } + + public function loadExtension(array $config, ContainerConfigurator $container, ContainerBuilder $builder): void + { + foreach ($config['typesense'] as $key => $value) { + $container->parameters()->set('biblioteca_typesense.config.'.$key, $value); + } + + $container->services() + ->instanceof(MapperInterface::class) + ->tag('typesense.mapper') + ->autowire(); + + $container->import(__DIR__.'/Resources/config/services.yaml'); + } +} diff --git a/BibliotecaTypesenseBundle/src/Client/ClientAdapter.php b/BibliotecaTypesenseBundle/src/Client/ClientAdapter.php new file mode 100644 index 00000000..a299705c --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Client/ClientAdapter.php @@ -0,0 +1,78 @@ +client->$name(...$arguments); + } + + public function getCollections(): Collections + { + return $this->client->getCollections(); + } + + public function getAliases(): Aliases + { + return $this->client->getAliases(); + } + + public function getKeys(): Keys + { + return $this->client->getKeys(); + } + + public function getDebug(): Debug + { + return $this->client->getDebug(); + } + + public function getMetrics(): Metrics + { + return $this->client->getMetrics(); + } + + public function getHealth(): Health + { + return $this->client->getHealth(); + } + + public function getOperations(): Operations + { + return $this->client->getOperations(); + } + + public function getMultiSearch(): MultiSearch + { + return $this->client->getMultiSearch(); + } + + public function getPresets(): Presets + { + return $this->client->getPresets(); + } + + public function getAnalytics(): Analytics + { + return $this->client->getAnalytics(); + } +} diff --git a/BibliotecaTypesenseBundle/src/Client/ClientFactory.php b/BibliotecaTypesenseBundle/src/Client/ClientFactory.php new file mode 100644 index 00000000..07d1ee5e --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Client/ClientFactory.php @@ -0,0 +1,44 @@ +getConfiguration())); + } + + private function getConfiguration(): array + { + $urlParsed = parse_url($this->uri); + if ($urlParsed === false) { + throw new \InvalidArgumentException('Invalid URI'); + } + + return [ + 'nodes' => [ + [ + 'host' => $urlParsed['host'], + 'port' => $urlParsed['port'], + 'protocol' => $urlParsed['scheme'], + ], + ], + 'api_key' => $this->apiKey, + 'connection_timeout_seconds' => $this->connectionTimeoutSeconds, + ]; + } +} diff --git a/BibliotecaTypesenseBundle/src/Client/ClientInterface.php b/BibliotecaTypesenseBundle/src/Client/ClientInterface.php new file mode 100644 index 00000000..bc6215de --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Client/ClientInterface.php @@ -0,0 +1,37 @@ +mapperLocator->count(); + if ($count === 0) { + $io->warning('No mappers found. Declare at least one service implementing '.MapperInterface::class); + + return Command::SUCCESS; + } + + $progress = new ProgressBar($output, 0); + + foreach ($this->mapperLocator->getMappers() as $mapper) { + $io->writeln('Deleting collection '.$mapper->getMapping()->getName()); + $this->populateService->deleteCollection($mapper); + + $io->writeln('Creating collection '.$mapper->getMapping()->getName()); + $this->populateService->createCollection($mapper); + + $io->writeln('Filling collection '.$mapper->getMapping()->getName()); + $progress->start($mapper->getDataCount()); + foreach ($this->populateService->fillCollection($mapper) as $_) { + $progress->advance(); + } + $progress->clear(); + } + $progress->finish(); + + $io->success('Finished'); + + return Command::SUCCESS; + } +} diff --git a/BibliotecaTypesenseBundle/src/Mapper/MapperInterface.php b/BibliotecaTypesenseBundle/src/Mapper/MapperInterface.php new file mode 100644 index 00000000..7c05bbe7 --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Mapper/MapperInterface.php @@ -0,0 +1,15 @@ +> + */ + public function getData(): \generator; + + public function getDataCount(): ?int; +} diff --git a/BibliotecaTypesenseBundle/src/Mapper/MapperLocator.php b/BibliotecaTypesenseBundle/src/Mapper/MapperLocator.php new file mode 100644 index 00000000..0aec9bb1 --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Mapper/MapperLocator.php @@ -0,0 +1,33 @@ + $mappers + */ + public function __construct(private iterable $mappers) + { + } + + /** + * @return \generator + */ + public function getMappers(): \Generator + { + foreach ($this->mappers as $mapper) { + yield $mapper; + } + } + + public function addMapper(MapperInterface $mapper): void + { + $this->mappers[] = $mapper; + } + + public function count(): int + { + return count($this->mappers); + } +} diff --git a/BibliotecaTypesenseBundle/src/Mapper/Mapping.php b/BibliotecaTypesenseBundle/src/Mapper/Mapping.php new file mode 100644 index 00000000..d41149a4 --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Mapper/Mapping.php @@ -0,0 +1,40 @@ +fields; + } + + public function setField(string $name, array $options): self + { + $optionResolver = new OptionsResolver(); + $optionResolver->setRequired(['name', 'type']); + $optionResolver->setDefined(['facet', 'optional']); + + $data = $optionResolver->resolve($options); + unset($data['optional']); + $this->fields[$name] = $data; + + return $this; + } + + public function getName(): string + { + return $this->name; + } + + public function getCollectionOptions(): array + { + return []; + } +} diff --git a/BibliotecaTypesenseBundle/src/PopulateService.php b/BibliotecaTypesenseBundle/src/PopulateService.php new file mode 100644 index 00000000..13f3ced9 --- /dev/null +++ b/BibliotecaTypesenseBundle/src/PopulateService.php @@ -0,0 +1,60 @@ +client->getCollections()->retrieve(); + $names = array_map(fn ($collection) => $collection['name'], $list); + $name = $this->getMappingName($mapper->getMapping()); + if (in_array($name, $names)) { + $this->client->getCollections()->__get($name)->delete(); + } + } + + public function createCollection(MapperInterface $mapper): Collection + { + $mapping = $mapper->getMapping(); + $name = $this->getMappingName($mapping); + + $payload = [ + 'name' => $name, + 'fields' => array_values($mapping->getFields()), + ...$mapping->getCollectionOptions(), + ]; + + $this->client->getCollections()->create($payload); + + return $this->client->getCollections()->__get($name); + } + + public function fillCollection(MapperInterface $mapper): \Generator + { + $mapping = $mapper->getMapping(); + $name = $this->getMappingName($mapping); + + $collection = $this->client->getCollections()->offsetGet($name); + $data = $mapper->getData(); + foreach ($data as $item) { + $collection->documents->create($item); + yield $item; + } + } + + private function getMappingName(Mapper\Mapping $mapping): string + { + return $this->collectionPrefix.$mapping->getName(); + } +} diff --git a/BibliotecaTypesenseBundle/src/Query/TypesenseQuery.php b/BibliotecaTypesenseBundle/src/Query/TypesenseQuery.php new file mode 100644 index 00000000..3fd1ae79 --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Query/TypesenseQuery.php @@ -0,0 +1,216 @@ + + */ + private array $searchParameters; + + public function __construct(?string $q = null, ?string $queryBy = null) + { + $this->searchParameters = []; + if ($q !== null) { + $this->addParameter('q', $q); + } + if ($queryBy !== null) { + $this->addParameter('query_by', $queryBy); + } + + return $this; + } + + /** + * @return array + */ + public function getParameters(): array + { + return $this->searchParameters; + } + + public function hasParameter($key): bool + { + return isset($this->searchParameters[$key]); + } + + /** + * Maximum number of hits returned. Increasing this value might increase search latency. Use all to return all hits found. + * + * @param [type] $maxHits + */ + public function maxHits($maxHits): self + { + return $this->addParameter('max_hits', $maxHits); + } + + /** + * Boolean field to indicate that the last word in the query should be treated as a prefix, and not as a whole word. This is necessary for building autocomplete and instant search interfaces. + */ + public function prefix(bool $prefix): self + { + return $this->addParameter('prefix', $prefix); + } + + /** + * Filter conditions for refining your search results. A field can be matched against one or more values. + */ + public function filterBy(string $filterBy): self + { + return $this->addParameter('filter_by', $filterBy); + } + + /** + * A list of numerical fields and their corresponding sort orders that will be used for ordering your results. Separate multiple fields with a comma. Upto 3 sort fields can be specified. + */ + public function sortBy(string $sortBy): self + { + return $this->addParameter('sort_by', $sortBy); + } + + /** + * A list of fields that will be used for faceting your results on. Separate multiple fields with a comma. + */ + public function facetBy(string $facetBy): self + { + return $this->addParameter('facet_by', $facetBy); + } + + /** + * Maximum number of facet values to be returned. + */ + public function maxFacetValues(int $maxFacetValues): self + { + return $this->addParameter('max_facet_values', $maxFacetValues); + } + + /** + * Facet values that are returned can now be filtered via this parameter. The matching facet text is also highlighted. For example, when faceting by category, you can set facet_query=category:shoe to return only facet values that contain the prefix "shoe". + */ + public function facetQuery(string $facetQuery): self + { + return $this->addParameter('facet_query', $facetQuery); + } + + /** + * Number of typographical errors (1 or 2) that would be tolerated. + */ + public function numTypos(int $numTypos): self + { + return $this->addParameter('num_typos', $numTypos); + } + + /** + * Results from this specific page number would be fetched. + */ + public function page(int $page): self + { + return $this->addParameter('page', $page); + } + + /** + * Number of results to fetch per page. + */ + public function perPage(int $perPage): self + { + return $this->addParameter('per_page', $perPage); + } + + /** + * You can aggregate search results into groups or buckets by specify one or more group_by fields. Separate multiple fields with a comma. + */ + public function groupBy(string $groupBy): self + { + return $this->addParameter('group_by', $groupBy); + } + + /** + * Maximum number of hits to be returned for every group. If the group_limit is set as K then only the top K hits in each group are returned in the response. + */ + public function groupLimit(int $groupLimit): self + { + return $this->addParameter('group_limit', $groupLimit); + } + + /** + * Comma-separated list of fields from the document to include in the search result. + */ + public function includeFields(string $includeFields): self + { + return $this->addParameter('include_fields', $includeFields); + } + + /** + * Comma-separated list of fields from the document to exclude in the search result. + */ + public function excludeFields(string $excludeFields): self + { + return $this->addParameter('exclude_fields', $excludeFields); + } + + /** + * Comma separated list of fields which should be highlighted fully without snippeting. + */ + public function highlightFullFields(string $highlightFullFields): self + { + return $this->addParameter('highlight_full_fields', $highlightFullFields); + } + + /** + * Field values under this length will be fully highlighted, instead of showing a snippet of relevant portion. + */ + public function snippetThreshold(int $snippetThreshold): self + { + return $this->addParameter('snippet_threshold', $snippetThreshold); + } + + /** + * If the number of results found for a specific query is less than this number, Typesense will attempt to drop the tokens in the query until enough results are found. Tokens that have the least individual hits are dropped first. Set drop_tokens_threshold to 0 to disable dropping of tokens. + */ + public function dropTokensThreshold(int $dropTokensThreshold): self + { + return $this->addParameter('drop_tokens_threshold', $dropTokensThreshold); + } + + /** + * If the number of results found for a specific query is less than this number, Typesense will attempt to look for tokens with more typos until enough results are found. + */ + public function typoTokensThreshold(int $typoTokensThreshold): self + { + return $this->addParameter('typo_tokens_threshold', $typoTokensThreshold); + } + + /** + * A list of records to unconditionally include in the search results at specific positions. + * An example use case would be to feature or promote certain items on the top of search results. + * A comma separated list of record_id:hit_position. Eg: to include a record with ID 123 at Position 1 and another record with ID 456 at Position 5, you'd specify 123:1,456:5. + * You could also use the Overrides feature to override search results based on rules. Overrides are applied first, followed by pinned_hits and finally hidden_hits. + */ + public function pinnedHits(string $pinnedHits): self + { + return $this->addParameter('pinned_hits', $pinnedHits); + } + + /** + * A list of records to unconditionally hide from search results. + * A comma separated list of record_ids to hide. Eg: to hide records with IDs 123 and 456, you'd specify 123,456. + * You could also use the Overrides feature to override search results based on rules. Overrides are applied first, followed by pinned_hits and finally hidden_hits. + */ + public function hiddenHits(string $hiddenHits): self + { + return $this->addParameter('hidden_hits', $hiddenHits); + } + + /** + * Generic method that allows to add any parameter to the TypesenseQuery. + */ + public function addParameter(string $key, mixed $value): self + { + $this->searchParameters[$key] = $value; + + return $this; + } +} diff --git a/BibliotecaTypesenseBundle/src/Resources/config/services.yaml b/BibliotecaTypesenseBundle/src/Resources/config/services.yaml new file mode 100644 index 00000000..cb19d68e --- /dev/null +++ b/BibliotecaTypesenseBundle/src/Resources/config/services.yaml @@ -0,0 +1,24 @@ +services: + _defaults: + autowire: true + autoconfigure: true + public: false + + Biblioteca\TypesenseBundle\Client\ClientFactory: + arguments: + $uri: '%biblioteca_typesense.config.uri%' + $apiKey: '%biblioteca_typesense.config.key%' + $connectionTimeoutSeconds: '%biblioteca_typesense.config.connection_timeout_seconds%' + + Biblioteca\TypesenseBundle\Client\ClientInterface: + factory: '@Biblioteca\TypesenseBundle\Client\ClientFactory' + + Biblioteca\TypesenseBundle\PopulateService: ~ + + Biblioteca\TypesenseBundle\Mapper\MapperLocator: + arguments: + $mappers: !tagged typesense.mapper + + Biblioteca\TypesenseBundle\Command\: + resource: '../../Command/' + tags: ['console.command'] \ No newline at end of file diff --git a/composer.json b/composer.json index e652491d..d2f1395a 100644 --- a/composer.json +++ b/composer.json @@ -83,7 +83,8 @@ "autoload": { "psr-4": { "App\\": "src/", - "DoctrineMigrations\\": "migrations/" + "DoctrineMigrations\\": "migrations/", + "Biblioteca\\TypesenseBundle\\": "BibliotecaTypesenseBundle/src" } }, "autoload-dev": { diff --git a/config/bundles.php b/config/bundles.php index 0d7a965a..78a8814b 100644 --- a/config/bundles.php +++ b/config/bundles.php @@ -22,5 +22,6 @@ Andante\PageFilterFormBundle\AndantePageFilterFormBundle::class => ['all' => true], Symandy\DatabaseBackupBundle\SymandyDatabaseBackupBundle::class => ['all' => true], ACSEO\TypesenseBundle\ACSEOTypesenseBundle::class => ['all' => true], + Biblioteca\TypesenseBundle\BibliotecaTypesenseBundle::class => ['all' => true], Doctrine\Bundle\FixturesBundle\DoctrineFixturesBundle::class => ['dev' => true, 'test' => true], ]; diff --git a/config/packages/biblioteca_typesense.yaml b/config/packages/biblioteca_typesense.yaml new file mode 100644 index 00000000..d7d168bc --- /dev/null +++ b/config/packages/biblioteca_typesense.yaml @@ -0,0 +1,4 @@ +biblioteca_typesense: + typesense: + uri: '%env(resolve:TYPESENSE_URL)%' + key: '%env(resolve:TYPESENSE_KEY)%' diff --git a/config/services.yaml b/config/services.yaml index 9dffd4e3..b6b39231 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -125,6 +125,9 @@ services: $accessTokenExtractor: '@App\Security\KoboTokenExtractor' $accessTokenHandler: '@App\Security\KoboTokenHandler' + App\Mapper\BookMapper: + tags: + - { name: typesense.mapper } when@dev: services: Symfony\Component\HttpKernel\Profiler\Profiler: '@profiler' \ No newline at end of file diff --git a/src/Mapper/BookMapper.php b/src/Mapper/BookMapper.php new file mode 100644 index 00000000..5714595e --- /dev/null +++ b/src/Mapper/BookMapper.php @@ -0,0 +1,68 @@ +setField('id', ['name' => 'id', 'type' => 'primary']); + $mapping->setField('title', ['name' => 'title', 'type' => 'string']); + $mapping->setField('sortable_id', ['name' => 'sortable_id', 'type' => 'int32']); + $mapping->setField('serie', ['name' => 'serie', 'type' => 'string', 'optional' => true, 'facet' => true]); + $mapping->setField('summary', ['name' => 'summary', 'type' => 'string', 'optional' => true]); + $mapping->setField('serieIndex', ['name' => 'serieIndex', 'type' => 'string', 'optional' => true]); + $mapping->setField('extension', ['name' => 'extension', 'type' => 'string', 'facet' => true]); + $mapping->setField('authors', ['name' => 'authors', 'type' => 'string[]', 'facet' => true]); + $mapping->setField('tags', ['name' => 'tags', 'type' => 'string[]', 'facet' => true, 'optional' => true]); + + return $mapping; + } + + public function getData(): \Generator + { + $queryBuilder = $this->bookRepository->createQueryBuilder('book') + ->select('book') + ->orderBy('book.id', 'ASC'); + + $query = $queryBuilder->getQuery(); + + foreach ($query->toIterable() as $data) { + yield $this->transform($data); + } + } + + public function getDataCount(): ?int + { + $queryBuilder = $this->bookRepository->createQueryBuilder('book') + ->select('COUNT(distinct book.id)'); + + return (int) $queryBuilder->getQuery()->getSingleScalarResult(); + } + + private function transform(Book $book): array + { + return [ + 'id' => (string) $book->getId(), + 'title' => $book->getTitle(), + 'sortable_id' => $book->getId(), + 'serie' => (string) $book->getSerie(), + 'summary' => (string) $book->getSummary(), + 'serieIndex' => (string) $book->getSerieIndex(), + 'extension' => $book->getExtension(), + 'authors' => $book->getAuthors(), + 'tags' => $book->getTags(), + ]; + } +}