From b0df20ff66f78e0dcc29695e7b21b2c82b01c3cd Mon Sep 17 00:00:00 2001 From: Javier Torres Date: Wed, 18 Sep 2024 12:50:15 +0200 Subject: [PATCH] wip --- .../src/nucliadb_models/internal/shards.py | 1 + nucliadb_node/src/shards/shard_reader.rs | 1 + nucliadb_node/src/shards/shard_writer.rs | 1 + nucliadb_protos/noderesources.proto | 1 + .../src/nucliadb_protos/noderesources_pb2.py | 130 +++++------ .../src/nucliadb_protos/noderesources_pb2.pyi | 2 + nucliadb_protos/rust/src/noderesources.rs | 3 + nucliadb_texts3/src/reader.rs | 210 ++++++++---------- nucliadb_texts3/src/schema.rs | 6 +- nucliadb_texts3/src/search_query.rs | 18 +- nucliadb_texts3/src/writer.rs | 4 +- 11 files changed, 186 insertions(+), 191 deletions(-) diff --git a/nucliadb_models/src/nucliadb_models/internal/shards.py b/nucliadb_models/src/nucliadb_models/internal/shards.py index ceae4be23e..aecc35a4dc 100644 --- a/nucliadb_models/src/nucliadb_models/internal/shards.py +++ b/nucliadb_models/src/nucliadb_models/internal/shards.py @@ -33,6 +33,7 @@ class DocumentServiceEnum(str, Enum): DOCUMENT_V0 = "DOCUMENT_V0" DOCUMENT_V1 = "DOCUMENT_V1" DOCUMENT_V2 = "DOCUMENT_V2" + DOCUMENT_V3 = "DOCUMENT_V3" class ParagraphServiceEnum(str, Enum): diff --git a/nucliadb_node/src/shards/shard_reader.rs b/nucliadb_node/src/shards/shard_reader.rs index 8406f7395c..e2ddcd1da5 100644 --- a/nucliadb_node/src/shards/shard_reader.rs +++ b/nucliadb_node/src/shards/shard_reader.rs @@ -156,6 +156,7 @@ impl ShardReader { 0 => DocumentService::DocumentV0, 1 => DocumentService::DocumentV1, 2 => DocumentService::DocumentV2, + 3 => DocumentService::DocumentV3, i => panic!("Unknown document version {i}"), } } diff --git a/nucliadb_node/src/shards/shard_writer.rs b/nucliadb_node/src/shards/shard_writer.rs index e7147d4394..d9dd015bfd 100644 --- a/nucliadb_node/src/shards/shard_writer.rs +++ b/nucliadb_node/src/shards/shard_writer.rs @@ -72,6 +72,7 @@ impl ShardWriter { 0 => DocumentService::DocumentV0, 1 => DocumentService::DocumentV1, 2 => DocumentService::DocumentV2, + 3 => DocumentService::DocumentV3, i => panic!("Unknown document version {i}"), } } diff --git a/nucliadb_protos/noderesources.proto b/nucliadb_protos/noderesources.proto index 7eae30f924..85f45a6e20 100644 --- a/nucliadb_protos/noderesources.proto +++ b/nucliadb_protos/noderesources.proto @@ -29,6 +29,7 @@ message ShardCreated { DOCUMENT_V0 = 0; DOCUMENT_V1 = 1; DOCUMENT_V2 = 2; + DOCUMENT_V3 = 3; } DocumentService document_service = 2; enum ParagraphService { diff --git a/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.py b/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.py index 0b1f4ade84..2e1869591e 100644 --- a/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.py +++ b/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.py @@ -17,7 +17,7 @@ from nucliadb_protos.utils_pb2 import * -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n#nucliadb_protos/noderesources.proto\x12\rnoderesources\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bnucliadb_protos/utils.proto\"/\n\x0fTextInformation\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x0e\n\x06labels\x18\x02 \x03(\t\"j\n\rIndexMetadata\x12,\n\x08modified\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12+\n\x07\x63reated\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x15\n\x07ShardId\x12\n\n\x02id\x18\x01 \x01(\t\"/\n\x08ShardIds\x12#\n\x03ids\x18\x01 \x03(\x0b\x32\x16.noderesources.ShardId\"\xcb\x04\n\x0cShardCreated\x12\n\n\x02id\x18\x01 \x01(\t\x12\x45\n\x10\x64ocument_service\x18\x02 \x01(\x0e\x32+.noderesources.ShardCreated.DocumentService\x12G\n\x11paragraph_service\x18\x03 \x01(\x0e\x32,.noderesources.ShardCreated.ParagraphService\x12\x41\n\x0evector_service\x18\x04 \x01(\x0e\x32).noderesources.ShardCreated.VectorService\x12\x45\n\x10relation_service\x18\x05 \x01(\x0e\x32+.noderesources.ShardCreated.RelationService\"D\n\x0f\x44ocumentService\x12\x0f\n\x0b\x44OCUMENT_V0\x10\x00\x12\x0f\n\x0b\x44OCUMENT_V1\x10\x01\x12\x0f\n\x0b\x44OCUMENT_V2\x10\x02\"Z\n\x10ParagraphService\x12\x10\n\x0cPARAGRAPH_V0\x10\x00\x12\x10\n\x0cPARAGRAPH_V1\x10\x01\x12\x10\n\x0cPARAGRAPH_V2\x10\x02\x12\x10\n\x0cPARAGRAPH_V3\x10\x03\"-\n\rVectorService\x12\r\n\tVECTOR_V0\x10\x00\x12\r\n\tVECTOR_V1\x10\x01\"D\n\x0fRelationService\x12\x0f\n\x0bRELATION_V0\x10\x00\x12\x0f\n\x0bRELATION_V1\x10\x01\x12\x0f\n\x0bRELATION_V2\x10\x02\",\n\nResourceID\x12\x10\n\x08shard_id\x18\x01 \x01(\t\x12\x0c\n\x04uuid\x18\x02 \x01(\t\"\x80\x01\n\x05Shard\x12.\n\x08metadata\x18\x05 \x01(\x0b\x32\x1c.noderesources.ShardMetadata\x12\x10\n\x08shard_id\x18\x01 \x01(\t\x12\x0e\n\x06\x66ields\x18\x02 \x01(\x04\x12\x12\n\nparagraphs\x18\x03 \x01(\x04\x12\x11\n\tsentences\x18\x04 \x01(\x04\"\x0f\n\rEmptyResponse\"\x0c\n\nEmptyQuery\"\x87\x01\n\x08Position\x12\r\n\x05index\x18\x01 \x01(\x04\x12\r\n\x05start\x18\x02 \x01(\x04\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x04\x12\x13\n\x0bpage_number\x18\x04 \x01(\x04\x12\x0f\n\x07in_page\x18\x07 \x01(\x08\x12\x15\n\rstart_seconds\x18\x05 \x03(\r\x12\x13\n\x0b\x65nd_seconds\x18\x06 \x03(\r\"2\n\x0eRepresentation\x12\x12\n\nis_a_table\x18\x01 \x01(\x08\x12\x0c\n\x04\x66ile\x18\x02 \x01(\t\"\x8e\x01\n\x10SentenceMetadata\x12)\n\x08position\x18\x01 \x01(\x0b\x32\x17.noderesources.Position\x12\x18\n\x10page_with_visual\x18\x02 \x01(\x08\x12\x35\n\x0erepresentation\x18\x03 \x01(\x0b\x32\x1d.noderesources.Representation\"S\n\x0eVectorSentence\x12\x0e\n\x06vector\x18\x01 \x03(\x02\x12\x31\n\x08metadata\x18\t \x01(\x0b\x32\x1f.noderesources.SentenceMetadata\"\xaa\x01\n\x12VectorsetSentences\x12\x43\n\tsentences\x18\x01 \x03(\x0b\x32\x30.noderesources.VectorsetSentences.SentencesEntry\x1aO\n\x0eSentencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.noderesources.VectorSentence:\x02\x38\x01\"\x8f\x01\n\x11ParagraphMetadata\x12)\n\x08position\x18\x01 \x01(\x0b\x32\x17.noderesources.Position\x12\x18\n\x10page_with_visual\x18\x02 \x01(\x08\x12\x35\n\x0erepresentation\x18\x03 \x01(\x0b\x32\x1d.noderesources.Representation\"\xff\x03\n\x0eIndexParagraph\x12\r\n\x05start\x18\x01 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x02 \x01(\x05\x12\x0e\n\x06labels\x18\x03 \x03(\t\x12?\n\tsentences\x18\x04 \x03(\x0b\x32,.noderesources.IndexParagraph.SentencesEntry\x12T\n\x14vectorsets_sentences\x18\n \x03(\x0b\x32\x36.noderesources.IndexParagraph.VectorsetsSentencesEntry\x12\r\n\x05\x66ield\x18\x05 \x01(\t\x12\r\n\x05split\x18\x06 \x01(\t\x12\r\n\x05index\x18\x07 \x01(\x04\x12\x19\n\x11repeated_in_field\x18\x08 \x01(\x08\x12\x32\n\x08metadata\x18\t \x01(\x0b\x32 .noderesources.ParagraphMetadata\x1aO\n\x0eSentencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.noderesources.VectorSentence:\x02\x38\x01\x1a]\n\x18VectorsetsSentencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x30\n\x05value\x18\x02 \x01(\x0b\x32!.noderesources.VectorsetSentences:\x02\x38\x01\"G\n\x0bVectorSetID\x12%\n\x05shard\x18\x01 \x01(\x0b\x32\x16.noderesources.ShardId\x12\x11\n\tvectorset\x18\x02 \x01(\t\"J\n\rVectorSetList\x12%\n\x05shard\x18\x01 \x01(\x0b\x32\x16.noderesources.ShardId\x12\x12\n\nvectorsets\x18\x02 \x03(\t\"\xa7\x01\n\x0fIndexParagraphs\x12\x42\n\nparagraphs\x18\x01 \x03(\x0b\x32..noderesources.IndexParagraphs.ParagraphsEntry\x1aP\n\x0fParagraphsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.noderesources.IndexParagraph:\x02\x38\x01\"\xec\x07\n\x08Resource\x12+\n\x08resource\x18\x01 \x01(\x0b\x32\x19.noderesources.ResourceID\x12.\n\x08metadata\x18\x02 \x01(\x0b\x32\x1c.noderesources.IndexMetadata\x12\x31\n\x05texts\x18\x03 \x03(\x0b\x32\".noderesources.Resource.TextsEntry\x12\x0e\n\x06labels\x18\x04 \x03(\t\x12\x36\n\x06status\x18\x05 \x01(\x0e\x32&.noderesources.Resource.ResourceStatus\x12;\n\nparagraphs\x18\x06 \x03(\x0b\x32\'.noderesources.Resource.ParagraphsEntry\x12\x1c\n\x14paragraphs_to_delete\x18\x07 \x03(\t\x12\x1b\n\x13sentences_to_delete\x18\x08 \x03(\t\x12\"\n\trelations\x18\t \x03(\x0b\x32\x0f.utils.Relation\x12\x10\n\x08shard_id\x18\x0b \x01(\t\x12\x39\n\x07vectors\x18\x0c \x03(\x0b\x32$.noderesources.Resource.VectorsEntryB\x02\x18\x01\x12K\n\x11vectors_to_delete\x18\r \x03(\x0b\x32,.noderesources.Resource.VectorsToDeleteEntryB\x02\x18\x01\x12&\n\x08security\x18\x0e \x01(\x0b\x32\x0f.utils.SecurityH\x00\x88\x01\x01\x1aL\n\nTextsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12-\n\x05value\x18\x02 \x01(\x0b\x32\x1e.noderesources.TextInformation:\x02\x38\x01\x1aQ\n\x0fParagraphsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12-\n\x05value\x18\x02 \x01(\x0b\x32\x1e.noderesources.IndexParagraphs:\x02\x38\x01\x1a\x42\n\x0cVectorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.utils.UserVectors:\x02\x38\x01\x1aN\n\x14VectorsToDeleteEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.utils.UserVectorsList:\x02\x38\x01\"h\n\x0eResourceStatus\x12\r\n\tPROCESSED\x10\x00\x12\t\n\x05\x45MPTY\x10\x01\x12\t\n\x05\x45RROR\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03\x12\x0b\n\x07PENDING\x10\x04\x12\x0b\n\x07\x42LOCKED\x10\x05\x12\x0b\n\x07\x45XPIRED\x10\x06\x42\x0b\n\t_security\"M\n\rShardMetadata\x12\x0c\n\x04kbid\x18\x01 \x01(\t\x12.\n\x0frelease_channel\x18\x02 \x01(\x0e\x32\x15.utils.ReleaseChannel\"\xf8\x02\n\x0cNodeMetadata\x12\x16\n\nload_score\x18\x01 \x01(\x02\x42\x02\x18\x01\x12\x13\n\x0bshard_count\x18\x02 \x01(\x04\x12;\n\x06shards\x18\x03 \x03(\x0b\x32\'.noderesources.NodeMetadata.ShardsEntryB\x02\x18\x01\x12\x0f\n\x07node_id\x18\x04 \x01(\t\x12\x1c\n\x0fprimary_node_id\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x16\n\x0e\x61vailable_disk\x18\x06 \x01(\x04\x12\x12\n\ntotal_disk\x18\x07 \x01(\x04\x1a\x35\n\rShardMetadata\x12\x0c\n\x04kbid\x18\x01 \x01(\t\x12\x16\n\nload_score\x18\x02 \x01(\x02\x42\x02\x18\x01\x1aX\n\x0bShardsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32).noderesources.NodeMetadata.ShardMetadata:\x02\x38\x01\x42\x12\n\x10_primary_node_idP\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n#nucliadb_protos/noderesources.proto\x12\rnoderesources\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1bnucliadb_protos/utils.proto\"/\n\x0fTextInformation\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\x0e\n\x06labels\x18\x02 \x03(\t\"j\n\rIndexMetadata\x12,\n\x08modified\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12+\n\x07\x63reated\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x15\n\x07ShardId\x12\n\n\x02id\x18\x01 \x01(\t\"/\n\x08ShardIds\x12#\n\x03ids\x18\x01 \x03(\x0b\x32\x16.noderesources.ShardId\"\xdc\x04\n\x0cShardCreated\x12\n\n\x02id\x18\x01 \x01(\t\x12\x45\n\x10\x64ocument_service\x18\x02 \x01(\x0e\x32+.noderesources.ShardCreated.DocumentService\x12G\n\x11paragraph_service\x18\x03 \x01(\x0e\x32,.noderesources.ShardCreated.ParagraphService\x12\x41\n\x0evector_service\x18\x04 \x01(\x0e\x32).noderesources.ShardCreated.VectorService\x12\x45\n\x10relation_service\x18\x05 \x01(\x0e\x32+.noderesources.ShardCreated.RelationService\"U\n\x0f\x44ocumentService\x12\x0f\n\x0b\x44OCUMENT_V0\x10\x00\x12\x0f\n\x0b\x44OCUMENT_V1\x10\x01\x12\x0f\n\x0b\x44OCUMENT_V2\x10\x02\x12\x0f\n\x0b\x44OCUMENT_V3\x10\x03\"Z\n\x10ParagraphService\x12\x10\n\x0cPARAGRAPH_V0\x10\x00\x12\x10\n\x0cPARAGRAPH_V1\x10\x01\x12\x10\n\x0cPARAGRAPH_V2\x10\x02\x12\x10\n\x0cPARAGRAPH_V3\x10\x03\"-\n\rVectorService\x12\r\n\tVECTOR_V0\x10\x00\x12\r\n\tVECTOR_V1\x10\x01\"D\n\x0fRelationService\x12\x0f\n\x0bRELATION_V0\x10\x00\x12\x0f\n\x0bRELATION_V1\x10\x01\x12\x0f\n\x0bRELATION_V2\x10\x02\",\n\nResourceID\x12\x10\n\x08shard_id\x18\x01 \x01(\t\x12\x0c\n\x04uuid\x18\x02 \x01(\t\"\x80\x01\n\x05Shard\x12.\n\x08metadata\x18\x05 \x01(\x0b\x32\x1c.noderesources.ShardMetadata\x12\x10\n\x08shard_id\x18\x01 \x01(\t\x12\x0e\n\x06\x66ields\x18\x02 \x01(\x04\x12\x12\n\nparagraphs\x18\x03 \x01(\x04\x12\x11\n\tsentences\x18\x04 \x01(\x04\"\x0f\n\rEmptyResponse\"\x0c\n\nEmptyQuery\"\x87\x01\n\x08Position\x12\r\n\x05index\x18\x01 \x01(\x04\x12\r\n\x05start\x18\x02 \x01(\x04\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x04\x12\x13\n\x0bpage_number\x18\x04 \x01(\x04\x12\x0f\n\x07in_page\x18\x07 \x01(\x08\x12\x15\n\rstart_seconds\x18\x05 \x03(\r\x12\x13\n\x0b\x65nd_seconds\x18\x06 \x03(\r\"2\n\x0eRepresentation\x12\x12\n\nis_a_table\x18\x01 \x01(\x08\x12\x0c\n\x04\x66ile\x18\x02 \x01(\t\"\x8e\x01\n\x10SentenceMetadata\x12)\n\x08position\x18\x01 \x01(\x0b\x32\x17.noderesources.Position\x12\x18\n\x10page_with_visual\x18\x02 \x01(\x08\x12\x35\n\x0erepresentation\x18\x03 \x01(\x0b\x32\x1d.noderesources.Representation\"S\n\x0eVectorSentence\x12\x0e\n\x06vector\x18\x01 \x03(\x02\x12\x31\n\x08metadata\x18\t \x01(\x0b\x32\x1f.noderesources.SentenceMetadata\"\xaa\x01\n\x12VectorsetSentences\x12\x43\n\tsentences\x18\x01 \x03(\x0b\x32\x30.noderesources.VectorsetSentences.SentencesEntry\x1aO\n\x0eSentencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.noderesources.VectorSentence:\x02\x38\x01\"\x8f\x01\n\x11ParagraphMetadata\x12)\n\x08position\x18\x01 \x01(\x0b\x32\x17.noderesources.Position\x12\x18\n\x10page_with_visual\x18\x02 \x01(\x08\x12\x35\n\x0erepresentation\x18\x03 \x01(\x0b\x32\x1d.noderesources.Representation\"\xff\x03\n\x0eIndexParagraph\x12\r\n\x05start\x18\x01 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x02 \x01(\x05\x12\x0e\n\x06labels\x18\x03 \x03(\t\x12?\n\tsentences\x18\x04 \x03(\x0b\x32,.noderesources.IndexParagraph.SentencesEntry\x12T\n\x14vectorsets_sentences\x18\n \x03(\x0b\x32\x36.noderesources.IndexParagraph.VectorsetsSentencesEntry\x12\r\n\x05\x66ield\x18\x05 \x01(\t\x12\r\n\x05split\x18\x06 \x01(\t\x12\r\n\x05index\x18\x07 \x01(\x04\x12\x19\n\x11repeated_in_field\x18\x08 \x01(\x08\x12\x32\n\x08metadata\x18\t \x01(\x0b\x32 .noderesources.ParagraphMetadata\x1aO\n\x0eSentencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.noderesources.VectorSentence:\x02\x38\x01\x1a]\n\x18VectorsetsSentencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x30\n\x05value\x18\x02 \x01(\x0b\x32!.noderesources.VectorsetSentences:\x02\x38\x01\"G\n\x0bVectorSetID\x12%\n\x05shard\x18\x01 \x01(\x0b\x32\x16.noderesources.ShardId\x12\x11\n\tvectorset\x18\x02 \x01(\t\"J\n\rVectorSetList\x12%\n\x05shard\x18\x01 \x01(\x0b\x32\x16.noderesources.ShardId\x12\x12\n\nvectorsets\x18\x02 \x03(\t\"\xa7\x01\n\x0fIndexParagraphs\x12\x42\n\nparagraphs\x18\x01 \x03(\x0b\x32..noderesources.IndexParagraphs.ParagraphsEntry\x1aP\n\x0fParagraphsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12,\n\x05value\x18\x02 \x01(\x0b\x32\x1d.noderesources.IndexParagraph:\x02\x38\x01\"\xec\x07\n\x08Resource\x12+\n\x08resource\x18\x01 \x01(\x0b\x32\x19.noderesources.ResourceID\x12.\n\x08metadata\x18\x02 \x01(\x0b\x32\x1c.noderesources.IndexMetadata\x12\x31\n\x05texts\x18\x03 \x03(\x0b\x32\".noderesources.Resource.TextsEntry\x12\x0e\n\x06labels\x18\x04 \x03(\t\x12\x36\n\x06status\x18\x05 \x01(\x0e\x32&.noderesources.Resource.ResourceStatus\x12;\n\nparagraphs\x18\x06 \x03(\x0b\x32\'.noderesources.Resource.ParagraphsEntry\x12\x1c\n\x14paragraphs_to_delete\x18\x07 \x03(\t\x12\x1b\n\x13sentences_to_delete\x18\x08 \x03(\t\x12\"\n\trelations\x18\t \x03(\x0b\x32\x0f.utils.Relation\x12\x10\n\x08shard_id\x18\x0b \x01(\t\x12\x39\n\x07vectors\x18\x0c \x03(\x0b\x32$.noderesources.Resource.VectorsEntryB\x02\x18\x01\x12K\n\x11vectors_to_delete\x18\r \x03(\x0b\x32,.noderesources.Resource.VectorsToDeleteEntryB\x02\x18\x01\x12&\n\x08security\x18\x0e \x01(\x0b\x32\x0f.utils.SecurityH\x00\x88\x01\x01\x1aL\n\nTextsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12-\n\x05value\x18\x02 \x01(\x0b\x32\x1e.noderesources.TextInformation:\x02\x38\x01\x1aQ\n\x0fParagraphsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12-\n\x05value\x18\x02 \x01(\x0b\x32\x1e.noderesources.IndexParagraphs:\x02\x38\x01\x1a\x42\n\x0cVectorsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.utils.UserVectors:\x02\x38\x01\x1aN\n\x14VectorsToDeleteEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.utils.UserVectorsList:\x02\x38\x01\"h\n\x0eResourceStatus\x12\r\n\tPROCESSED\x10\x00\x12\t\n\x05\x45MPTY\x10\x01\x12\t\n\x05\x45RROR\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03\x12\x0b\n\x07PENDING\x10\x04\x12\x0b\n\x07\x42LOCKED\x10\x05\x12\x0b\n\x07\x45XPIRED\x10\x06\x42\x0b\n\t_security\"M\n\rShardMetadata\x12\x0c\n\x04kbid\x18\x01 \x01(\t\x12.\n\x0frelease_channel\x18\x02 \x01(\x0e\x32\x15.utils.ReleaseChannel\"\xf8\x02\n\x0cNodeMetadata\x12\x16\n\nload_score\x18\x01 \x01(\x02\x42\x02\x18\x01\x12\x13\n\x0bshard_count\x18\x02 \x01(\x04\x12;\n\x06shards\x18\x03 \x03(\x0b\x32\'.noderesources.NodeMetadata.ShardsEntryB\x02\x18\x01\x12\x0f\n\x07node_id\x18\x04 \x01(\t\x12\x1c\n\x0fprimary_node_id\x18\x05 \x01(\tH\x00\x88\x01\x01\x12\x16\n\x0e\x61vailable_disk\x18\x06 \x01(\x04\x12\x12\n\ntotal_disk\x18\x07 \x01(\x04\x1a\x35\n\rShardMetadata\x12\x0c\n\x04kbid\x18\x01 \x01(\t\x12\x16\n\nload_score\x18\x02 \x01(\x02\x42\x02\x18\x01\x1aX\n\x0bShardsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32).noderesources.NodeMetadata.ShardMetadata:\x02\x38\x01\x42\x12\n\x10_primary_node_idP\x01\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -61,69 +61,69 @@ _globals['_SHARDIDS']._serialized_start=296 _globals['_SHARDIDS']._serialized_end=343 _globals['_SHARDCREATED']._serialized_start=346 - _globals['_SHARDCREATED']._serialized_end=933 + _globals['_SHARDCREATED']._serialized_end=950 _globals['_SHARDCREATED_DOCUMENTSERVICE']._serialized_start=656 - _globals['_SHARDCREATED_DOCUMENTSERVICE']._serialized_end=724 - _globals['_SHARDCREATED_PARAGRAPHSERVICE']._serialized_start=726 - _globals['_SHARDCREATED_PARAGRAPHSERVICE']._serialized_end=816 - _globals['_SHARDCREATED_VECTORSERVICE']._serialized_start=818 - _globals['_SHARDCREATED_VECTORSERVICE']._serialized_end=863 - _globals['_SHARDCREATED_RELATIONSERVICE']._serialized_start=865 - _globals['_SHARDCREATED_RELATIONSERVICE']._serialized_end=933 - _globals['_RESOURCEID']._serialized_start=935 - _globals['_RESOURCEID']._serialized_end=979 - _globals['_SHARD']._serialized_start=982 - _globals['_SHARD']._serialized_end=1110 - _globals['_EMPTYRESPONSE']._serialized_start=1112 - _globals['_EMPTYRESPONSE']._serialized_end=1127 - _globals['_EMPTYQUERY']._serialized_start=1129 - _globals['_EMPTYQUERY']._serialized_end=1141 - _globals['_POSITION']._serialized_start=1144 - _globals['_POSITION']._serialized_end=1279 - _globals['_REPRESENTATION']._serialized_start=1281 - _globals['_REPRESENTATION']._serialized_end=1331 - _globals['_SENTENCEMETADATA']._serialized_start=1334 - _globals['_SENTENCEMETADATA']._serialized_end=1476 - _globals['_VECTORSENTENCE']._serialized_start=1478 - _globals['_VECTORSENTENCE']._serialized_end=1561 - _globals['_VECTORSETSENTENCES']._serialized_start=1564 - _globals['_VECTORSETSENTENCES']._serialized_end=1734 - _globals['_VECTORSETSENTENCES_SENTENCESENTRY']._serialized_start=1655 - _globals['_VECTORSETSENTENCES_SENTENCESENTRY']._serialized_end=1734 - _globals['_PARAGRAPHMETADATA']._serialized_start=1737 - _globals['_PARAGRAPHMETADATA']._serialized_end=1880 - _globals['_INDEXPARAGRAPH']._serialized_start=1883 - _globals['_INDEXPARAGRAPH']._serialized_end=2394 - _globals['_INDEXPARAGRAPH_SENTENCESENTRY']._serialized_start=1655 - _globals['_INDEXPARAGRAPH_SENTENCESENTRY']._serialized_end=1734 - _globals['_INDEXPARAGRAPH_VECTORSETSSENTENCESENTRY']._serialized_start=2301 - _globals['_INDEXPARAGRAPH_VECTORSETSSENTENCESENTRY']._serialized_end=2394 - _globals['_VECTORSETID']._serialized_start=2396 - _globals['_VECTORSETID']._serialized_end=2467 - _globals['_VECTORSETLIST']._serialized_start=2469 - _globals['_VECTORSETLIST']._serialized_end=2543 - _globals['_INDEXPARAGRAPHS']._serialized_start=2546 - _globals['_INDEXPARAGRAPHS']._serialized_end=2713 - _globals['_INDEXPARAGRAPHS_PARAGRAPHSENTRY']._serialized_start=2633 - _globals['_INDEXPARAGRAPHS_PARAGRAPHSENTRY']._serialized_end=2713 - _globals['_RESOURCE']._serialized_start=2716 - _globals['_RESOURCE']._serialized_end=3720 - _globals['_RESOURCE_TEXTSENTRY']._serialized_start=3294 - _globals['_RESOURCE_TEXTSENTRY']._serialized_end=3370 - _globals['_RESOURCE_PARAGRAPHSENTRY']._serialized_start=3372 - _globals['_RESOURCE_PARAGRAPHSENTRY']._serialized_end=3453 - _globals['_RESOURCE_VECTORSENTRY']._serialized_start=3455 - _globals['_RESOURCE_VECTORSENTRY']._serialized_end=3521 - _globals['_RESOURCE_VECTORSTODELETEENTRY']._serialized_start=3523 - _globals['_RESOURCE_VECTORSTODELETEENTRY']._serialized_end=3601 - _globals['_RESOURCE_RESOURCESTATUS']._serialized_start=3603 - _globals['_RESOURCE_RESOURCESTATUS']._serialized_end=3707 - _globals['_SHARDMETADATA']._serialized_start=3722 - _globals['_SHARDMETADATA']._serialized_end=3799 - _globals['_NODEMETADATA']._serialized_start=3802 - _globals['_NODEMETADATA']._serialized_end=4178 - _globals['_NODEMETADATA_SHARDMETADATA']._serialized_start=4015 - _globals['_NODEMETADATA_SHARDMETADATA']._serialized_end=4068 - _globals['_NODEMETADATA_SHARDSENTRY']._serialized_start=4070 - _globals['_NODEMETADATA_SHARDSENTRY']._serialized_end=4158 + _globals['_SHARDCREATED_DOCUMENTSERVICE']._serialized_end=741 + _globals['_SHARDCREATED_PARAGRAPHSERVICE']._serialized_start=743 + _globals['_SHARDCREATED_PARAGRAPHSERVICE']._serialized_end=833 + _globals['_SHARDCREATED_VECTORSERVICE']._serialized_start=835 + _globals['_SHARDCREATED_VECTORSERVICE']._serialized_end=880 + _globals['_SHARDCREATED_RELATIONSERVICE']._serialized_start=882 + _globals['_SHARDCREATED_RELATIONSERVICE']._serialized_end=950 + _globals['_RESOURCEID']._serialized_start=952 + _globals['_RESOURCEID']._serialized_end=996 + _globals['_SHARD']._serialized_start=999 + _globals['_SHARD']._serialized_end=1127 + _globals['_EMPTYRESPONSE']._serialized_start=1129 + _globals['_EMPTYRESPONSE']._serialized_end=1144 + _globals['_EMPTYQUERY']._serialized_start=1146 + _globals['_EMPTYQUERY']._serialized_end=1158 + _globals['_POSITION']._serialized_start=1161 + _globals['_POSITION']._serialized_end=1296 + _globals['_REPRESENTATION']._serialized_start=1298 + _globals['_REPRESENTATION']._serialized_end=1348 + _globals['_SENTENCEMETADATA']._serialized_start=1351 + _globals['_SENTENCEMETADATA']._serialized_end=1493 + _globals['_VECTORSENTENCE']._serialized_start=1495 + _globals['_VECTORSENTENCE']._serialized_end=1578 + _globals['_VECTORSETSENTENCES']._serialized_start=1581 + _globals['_VECTORSETSENTENCES']._serialized_end=1751 + _globals['_VECTORSETSENTENCES_SENTENCESENTRY']._serialized_start=1672 + _globals['_VECTORSETSENTENCES_SENTENCESENTRY']._serialized_end=1751 + _globals['_PARAGRAPHMETADATA']._serialized_start=1754 + _globals['_PARAGRAPHMETADATA']._serialized_end=1897 + _globals['_INDEXPARAGRAPH']._serialized_start=1900 + _globals['_INDEXPARAGRAPH']._serialized_end=2411 + _globals['_INDEXPARAGRAPH_SENTENCESENTRY']._serialized_start=1672 + _globals['_INDEXPARAGRAPH_SENTENCESENTRY']._serialized_end=1751 + _globals['_INDEXPARAGRAPH_VECTORSETSSENTENCESENTRY']._serialized_start=2318 + _globals['_INDEXPARAGRAPH_VECTORSETSSENTENCESENTRY']._serialized_end=2411 + _globals['_VECTORSETID']._serialized_start=2413 + _globals['_VECTORSETID']._serialized_end=2484 + _globals['_VECTORSETLIST']._serialized_start=2486 + _globals['_VECTORSETLIST']._serialized_end=2560 + _globals['_INDEXPARAGRAPHS']._serialized_start=2563 + _globals['_INDEXPARAGRAPHS']._serialized_end=2730 + _globals['_INDEXPARAGRAPHS_PARAGRAPHSENTRY']._serialized_start=2650 + _globals['_INDEXPARAGRAPHS_PARAGRAPHSENTRY']._serialized_end=2730 + _globals['_RESOURCE']._serialized_start=2733 + _globals['_RESOURCE']._serialized_end=3737 + _globals['_RESOURCE_TEXTSENTRY']._serialized_start=3311 + _globals['_RESOURCE_TEXTSENTRY']._serialized_end=3387 + _globals['_RESOURCE_PARAGRAPHSENTRY']._serialized_start=3389 + _globals['_RESOURCE_PARAGRAPHSENTRY']._serialized_end=3470 + _globals['_RESOURCE_VECTORSENTRY']._serialized_start=3472 + _globals['_RESOURCE_VECTORSENTRY']._serialized_end=3538 + _globals['_RESOURCE_VECTORSTODELETEENTRY']._serialized_start=3540 + _globals['_RESOURCE_VECTORSTODELETEENTRY']._serialized_end=3618 + _globals['_RESOURCE_RESOURCESTATUS']._serialized_start=3620 + _globals['_RESOURCE_RESOURCESTATUS']._serialized_end=3724 + _globals['_SHARDMETADATA']._serialized_start=3739 + _globals['_SHARDMETADATA']._serialized_end=3816 + _globals['_NODEMETADATA']._serialized_start=3819 + _globals['_NODEMETADATA']._serialized_end=4195 + _globals['_NODEMETADATA_SHARDMETADATA']._serialized_start=4032 + _globals['_NODEMETADATA_SHARDMETADATA']._serialized_end=4085 + _globals['_NODEMETADATA_SHARDSENTRY']._serialized_start=4087 + _globals['_NODEMETADATA_SHARDSENTRY']._serialized_end=4175 # @@protoc_insertion_point(module_scope) diff --git a/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.pyi b/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.pyi index f4281736b3..bf411e6aa3 100644 --- a/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.pyi +++ b/nucliadb_protos/python/src/nucliadb_protos/noderesources_pb2.pyi @@ -129,11 +129,13 @@ class ShardCreated(google.protobuf.message.Message): DOCUMENT_V0: ShardCreated._DocumentService.ValueType # 0 DOCUMENT_V1: ShardCreated._DocumentService.ValueType # 1 DOCUMENT_V2: ShardCreated._DocumentService.ValueType # 2 + DOCUMENT_V3: ShardCreated._DocumentService.ValueType # 3 class DocumentService(_DocumentService, metaclass=_DocumentServiceEnumTypeWrapper): ... DOCUMENT_V0: ShardCreated.DocumentService.ValueType # 0 DOCUMENT_V1: ShardCreated.DocumentService.ValueType # 1 DOCUMENT_V2: ShardCreated.DocumentService.ValueType # 2 + DOCUMENT_V3: ShardCreated.DocumentService.ValueType # 3 class _ParagraphService: ValueType = typing.NewType("ValueType", builtins.int) diff --git a/nucliadb_protos/rust/src/noderesources.rs b/nucliadb_protos/rust/src/noderesources.rs index 3bae19c24c..541a54c764 100644 --- a/nucliadb_protos/rust/src/noderesources.rs +++ b/nucliadb_protos/rust/src/noderesources.rs @@ -61,6 +61,7 @@ pub mod shard_created { DocumentV0 = 0, DocumentV1 = 1, DocumentV2 = 2, + DocumentV3 = 3, } impl DocumentService { /// String value of the enum field names used in the ProtoBuf definition. @@ -72,6 +73,7 @@ pub mod shard_created { DocumentService::DocumentV0 => "DOCUMENT_V0", DocumentService::DocumentV1 => "DOCUMENT_V1", DocumentService::DocumentV2 => "DOCUMENT_V2", + DocumentService::DocumentV3 => "DOCUMENT_V3", } } /// Creates an enum from field names used in the ProtoBuf definition. @@ -80,6 +82,7 @@ pub mod shard_created { "DOCUMENT_V0" => Some(Self::DocumentV0), "DOCUMENT_V1" => Some(Self::DocumentV1), "DOCUMENT_V2" => Some(Self::DocumentV2), + "DOCUMENT_V3" => Some(Self::DocumentV3), _ => None, } } diff --git a/nucliadb_texts3/src/reader.rs b/nucliadb_texts3/src/reader.rs index d5fe0ff0da..435ec7ed8a 100644 --- a/nucliadb_texts3/src/reader.rs +++ b/nucliadb_texts3/src/reader.rs @@ -95,42 +95,50 @@ impl Debug for TextReaderService { } } -struct PrefilterSegmentCollector { - reader: BytesFastFieldReader, - uuids: Vec, +struct FieldUuidSegmentCollector { + uuid_reader: BytesFastFieldReader, + field_reader: BytesFastFieldReader, + results: Vec, } -impl SegmentCollector for PrefilterSegmentCollector { - type Fruit = Vec; +impl SegmentCollector for FieldUuidSegmentCollector { + type Fruit = Vec; - fn collect(&mut self, doc: tantivy::DocId, score: tantivy::Score) { - let bytes = self.reader.get_bytes(doc); - self.uuids.push(String::from_utf8_lossy(bytes).to_string()); + fn collect(&mut self, doc: tantivy::DocId, _score: tantivy::Score) { + let uuid = self.uuid_reader.get_bytes(doc); + let field = self.field_reader.get_bytes(doc); + self.results.push(ValidField { + resource_id: String::from_utf8_lossy(uuid).to_string(), + field_id: String::from_utf8_lossy(field).to_string(), + }); } fn harvest(self) -> Self::Fruit { - self.uuids + self.results } } -struct PrefilterCollector { +struct FieldUuidCollector { + uuid: Field, field: Field, } -impl Collector for PrefilterCollector { - type Fruit = Vec; +impl Collector for FieldUuidCollector { + type Fruit = Vec; - type Child = PrefilterSegmentCollector; + type Child = FieldUuidSegmentCollector; fn for_segment( &self, - segment_local_id: tantivy::SegmentOrdinal, + _segment_local_id: tantivy::SegmentOrdinal, segment: &tantivy::SegmentReader, ) -> tantivy::Result { - let reader = segment.fast_fields().bytes(self.field)?; - Ok(PrefilterSegmentCollector { - reader, - uuids: vec![], + let uuid_reader = segment.fast_fields().bytes(self.uuid)?; + let field_reader = segment.fast_fields().bytes(self.field)?; + Ok(FieldUuidSegmentCollector { + uuid_reader, + field_reader, + results: vec![], }) } @@ -224,65 +232,29 @@ impl FieldReader for TextReaderService { let prefilter_query: Box = Box::new(BooleanQuery::intersection(subqueries)); let searcher = self.reader.searcher(); - // let collector = PrefilterCollector { - // field: self.schema.uuid_field, - // }; - // let mut docs_fulfilled = searcher.search(&prefilter_query, &collector)?; - // let mut inverted = false; - - // // If none of the fields match the pre-filter, thats all the query planner needs to know. - // if docs_fulfilled.is_empty() { - // return Ok(PreFilterResponse { - // valid_fields: ValidFieldCollector::None, - // }); - // } - - // // If all the fields match the pre-filter, thats all the query planner needs to know - // if docs_fulfilled.len() as u64 == searcher.num_docs() { - // return Ok(PreFilterResponse { - // valid_fields: ValidFieldCollector::All, - // }); - // } - - // // More than half in results - // if docs_fulfilled.len() * 2 > searcher.num_docs() as usize { - // let inverted_prefilter_query: Box = - // Box::new(BooleanQuery::new(vec![(Occur::MustNot, prefilter_query)])); - // docs_fulfilled = searcher.search(&inverted_prefilter_query, &DocSetCollector)?; - // inverted = true; - // } - - // The fields matching the pre-filter are a non-empty subset of all the fields, so they are - // brought to memory - // let mut valid_fields = Vec::new(); - // for fulfilled_doc in docs_fulfilled { - // let fulfilled_field = ValidField { - // resource_id: fulfilled_doc, - // field_id: String::new(), - // }; - // valid_fields.push(fulfilled_field); - // } - // println!("FAST Prefiltered down to {} of {}", valid_fields.len(), searcher.num_docs()); - - let mut docs_fulfilled = searcher.search(&prefilter_query, &DocSetCollector)?; - println!("Prefiltered down to {} of {}", docs_fulfilled.len(), searcher.num_docs()); - let mut inverted = false; - - // The fields matching the pre-filter are a non-empty subset of all the fields, so they are - // brought to memory - let mut valid_fields = Vec::new(); - for fulfilled_doc in docs_fulfilled { - if let Ok(doc) = searcher.doc(fulfilled_doc) { - let resource_id = doc.get_first(self.schema.uuid).unwrap().as_text().unwrap().to_string(); - valid_fields.push(ValidField { - resource_id, - field_id: String::new(), - }); - } + let collector = FieldUuidCollector { + uuid: self.schema.uuid, + field: self.schema.field, + }; + let docs_fulfilled = searcher.search(&prefilter_query, &collector)?; + + // If none of the fields match the pre-filter, thats all the query planner needs to know. + if docs_fulfilled.is_empty() { + return Ok(PreFilterResponse { + valid_fields: ValidFieldCollector::None, + }); + } + + // If all the fields match the pre-filter, thats all the query planner needs to know + if docs_fulfilled.len() as u64 == searcher.num_docs() { + return Ok(PreFilterResponse { + valid_fields: ValidFieldCollector::All, + }); } + // The fields matching the pre-filter are a non-empty subset of all the fields Ok(PreFilterResponse { - valid_fields: ValidFieldCollector::Some(valid_fields), + valid_fields: ValidFieldCollector::Some(docs_fulfilled), }) } @@ -394,19 +366,23 @@ impl TextReaderService { bm25: 0.0, booster: id as f32, }); - let uuid = doc - .get_first(self.schema.uuid) - .expect("document doesn't appear to have uuid.") - .as_text() - .unwrap() - .to_string(); - - let field = doc - .get_first(self.schema.field) - .expect("document doesn't appear to have field.") - .as_facet() - .unwrap() - .to_path_string(); + let uuid = String::from_utf8( + doc.get_first(self.schema.uuid) + .expect("document doesn't appear to have uuid.") + .as_bytes() + .unwrap() + .to_vec(), + ) + .unwrap(); + + let field = String::from_utf8( + doc.get_first(self.schema.field) + .expect("document doesn't appear to have field.") + .as_bytes() + .unwrap() + .to_vec(), + ) + .unwrap(); let labels = doc .get_all(self.schema.facets) @@ -462,19 +438,23 @@ impl TextReaderService { bm25: score, booster: id as f32, }); - let uuid = doc - .get_first(self.schema.uuid) - .expect("document doesn't appear to have uuid.") - .as_text() - .unwrap() - .to_string(); - - let field = doc - .get_first(self.schema.field) - .expect("document doesn't appear to have field.") - .as_facet() - .unwrap() - .to_path_string(); + let uuid = String::from_utf8( + doc.get_first(self.schema.uuid) + .expect("document doesn't appear to have uuid.") + .as_bytes() + .unwrap() + .to_vec(), + ) + .unwrap(); + + let field = String::from_utf8( + doc.get_first(self.schema.field) + .expect("document doesn't appear to have field.") + .as_bytes() + .unwrap() + .to_vec(), + ) + .unwrap(); let labels = doc .get_all(self.schema.facets) @@ -642,19 +622,23 @@ impl Iterator for BatchProducer { let top_docs = self.searcher.search(&self.query, &top_docs).unwrap(); let mut items = vec![]; for doc in top_docs.into_iter().flat_map(|i| self.searcher.doc(i.1)) { - let uuid = doc - .get_first(self.uuid_field) - .expect("document doesn't appear to have uuid.") - .as_text() - .unwrap() - .to_string(); - - let field = doc - .get_first(self.field_field) - .expect("document doesn't appear to have field.") - .as_facet() - .unwrap() - .to_path_string(); + let uuid = String::from_utf8( + doc.get_first(self.uuid_field) + .expect("document doesn't appear to have uuid.") + .as_bytes() + .unwrap() + .to_vec(), + ) + .unwrap(); + + let field = String::from_utf8( + doc.get_first(self.field_field) + .expect("document doesn't appear to have field.") + .as_bytes() + .unwrap() + .to_vec(), + ) + .unwrap(); let labels = doc .get_all(self.facet_field) diff --git a/nucliadb_texts3/src/schema.rs b/nucliadb_texts3/src/schema.rs index 1d62a4acbd..abce9232bf 100644 --- a/nucliadb_texts3/src/schema.rs +++ b/nucliadb_texts3/src/schema.rs @@ -19,7 +19,7 @@ // use nucliadb_core::protos::*; use tantivy::chrono::{DateTime, Utc}; -use tantivy::schema::{Cardinality, FacetOptions, Field, NumericOptions, Schema, FAST, STORED, STRING, TEXT}; +use tantivy::schema::{Cardinality, FacetOptions, Field, NumericOptions, Schema, FAST, INDEXED, STORED, TEXT}; #[derive(Debug, Clone)] pub struct TextSchema { @@ -55,8 +55,8 @@ impl TextSchema { let facet_options = FacetOptions::default().set_stored(); - let uuid = sb.add_bytes_field("uuid", FAST); - let field = sb.add_bytes_field("field", FAST); + let uuid = sb.add_bytes_field("uuid", STORED | FAST | INDEXED); + let field = sb.add_bytes_field("field", STORED | FAST | INDEXED); let text = sb.add_text_field("text", TEXT); diff --git a/nucliadb_texts3/src/search_query.rs b/nucliadb_texts3/src/search_query.rs index f5341c35a1..5d25dc82ff 100644 --- a/nucliadb_texts3/src/search_query.rs +++ b/nucliadb_texts3/src/search_query.rs @@ -80,16 +80,18 @@ pub fn create_query( queries.push((Occur::Must, main_q)); // Field types filter - search + let field_filter: Vec<_> = search .fields .iter() - .map(|value| format!("/{}", value)) - .flat_map(|facet_key| Facet::from_text(facet_key.as_str()).ok().into_iter()) - .for_each(|facet| { - let facet_term = Term::from_facet(schema.field, &facet); - let facet_term_query = TermQuery::new(facet_term, IndexRecordOption::Basic); - queries.push((Occur::Must, Box::new(facet_term_query))); - }); + .map(|field_name| { + let term = Term::from_field_bytes(schema.field, field_name.as_bytes()); + let term_query: Box = Box::new(TermQuery::new(term, IndexRecordOption::Basic)); + (Occur::Should, term_query) + }) + .collect(); + if !field_filter.is_empty() { + queries.push((Occur::Must, Box::new(BooleanQuery::new(field_filter)))); + } if let Some(filter) = search.filter.as_ref() { let context = QueryContext { diff --git a/nucliadb_texts3/src/writer.rs b/nucliadb_texts3/src/writer.rs index c536413dc6..a2907919f5 100644 --- a/nucliadb_texts3/src/writer.rs +++ b/nucliadb_texts3/src/writer.rs @@ -81,7 +81,7 @@ impl FieldWriter for TextWriterService { debug!("{id:?} - Delete existing uuid: starts at {v} ms"); let uuid_field = self.schema.uuid; - let uuid_term = Term::from_field_text(uuid_field, &resource_id.uuid); + let uuid_term = Term::from_field_bytes(uuid_field, resource_id.uuid.as_bytes()); self.writer.delete_term(uuid_term); let v = time.elapsed().as_millis(); debug!("{id:?} - Delete existing uuid: ends at {v} ms"); @@ -227,7 +227,7 @@ impl TextWriterService { }; let mut base_doc = doc!( - self.schema.uuid => resource_id, + self.schema.uuid => resource_id.as_bytes(), self.schema.modified => timestamp_to_datetime_utc(modified), self.schema.created => timestamp_to_datetime_utc(created), self.schema.status => resource.status as u64,