Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
use fixtures for large documents
Browse files Browse the repository at this point in the history
  • Loading branch information
acatav committed Sep 28, 2023
1 parent c69dcdb commit 9e0c873
Showing 1 changed file with 23 additions and 20 deletions.
43 changes: 23 additions & 20 deletions tests/system/knowledge_base/test_knowledge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,20 @@ def documents():
for i in range(5)]


@pytest.fixture
def documents_large():
return [Document(id=f"doc_{i}_large",
text=f"Sample document {i}",
metadata={"test": i})
for i in range(1000)]


@pytest.fixture
def encoded_chunks_large(documents_large, chunker, encoder):
chunks = chunker.chunk_documents(documents_large)
return encoder.encode_documents(chunks)


@pytest.fixture
def encoded_chunks(documents, chunker, encoder):
chunks = chunker.chunk_documents(documents)
Expand Down Expand Up @@ -240,32 +254,21 @@ def test_update_documents(encoder, documents, encoded_chunks, knowledge_base):


def test_upsert_large_list_happy_path(knowledge_base,
documents,
chunker):
documents = [Document(id=f"doc_{i}_large",
text=f"Sample document {i}",
metadata={"test": i})
for i in range(1000)]
documents_large,
encoded_chunks_large):
knowledge_base.upsert(documents_large)

knowledge_base.upsert(documents)

expected_chunks = chunker.chunk_documents(documents)
chunks_for_validation = expected_chunks[:10] + expected_chunks[-10:]
chunks_for_validation = encoded_chunks_large[:10] + encoded_chunks_large[-10:]
assert_ids_in_index(knowledge_base, [chunk.id
for chunk in chunks_for_validation])


def test_delete_large_df_happy_path(knowledge_base,
documents,
chunker):
documents = [Document(id=f"doc_{i}_large",
text=f"Sample document {i}",
metadata={"test": i})
for i in range(1000)]
knowledge_base.delete([doc.id for doc in documents])

expected_chunks = chunker.chunk_documents(documents)
chunks_for_validation = expected_chunks[:10] + expected_chunks[-10:]
documents_large,
encoded_chunks_large):
knowledge_base.delete([doc.id for doc in documents_large])

chunks_for_validation = encoded_chunks_large[:10] + encoded_chunks_large[-10:]
assert_ids_not_in_index(knowledge_base, [chunk.id
for chunk in chunks_for_validation])

Expand Down

0 comments on commit 9e0c873

Please sign in to comment.