Skip to content

Commit

Permalink
Add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
Riccorl committed Aug 6, 2024
1 parent 9d94478 commit 5902ece
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
23 changes: 23 additions & 0 deletions relik/retriever/indexers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,29 @@ def from_pretrained(
*args,
**kwargs,
) -> "BaseDocumentIndex":
"""
Loads a pre-trained document index from the specified location.
Args:
name_or_path (Union[str, os.PathLike]): The name or path of the pre-trained model.
device (str, optional): The device to load the model on. Defaults to "cpu".
precision (str | None, optional): The precision of the model. Defaults to None.
config_file_name (str | None, optional): The name of the configuration file. Defaults to None.
document_file_name (str | None, optional): The name of the document file. Defaults to None.
embedding_file_name (str | None, optional): The name of the embedding file. Defaults to None.
index_file_name (str | None, optional): The name of the index file. Defaults to None.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Returns:
BaseDocumentIndex: The loaded pre-trained document index.
Raises:
FileNotFoundError: If the model configuration file is not found.
ValueError: If the document file does not exist.
ImportError: If the `faiss` package is not installed when trying to load a FAISS index.
"""
cache_dir = kwargs.pop("cache_dir", None)
force_download = kwargs.pop("force_download", False)
skip_metadata = kwargs.pop("skip_metadata", False)
Expand Down
17 changes: 14 additions & 3 deletions relik/retriever/indexers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,16 +310,27 @@ def from_dict(cls, d):
return cls([Document.from_dict(doc) for doc in d])

@classmethod
def from_file(cls, file_path: Union[str, Path], skip_metadata: bool = False, **kwargs):
def from_file(
cls, file_path: Union[str, Path], skip_metadata: bool = False, **kwargs
):
"""
Load documents from a file.
Args:
file_path (Union[str, Path]): The path to the file containing the documents.
skip_metadata (bool, optional): Whether to skip loading metadata for each document. Defaults to False.
**kwargs: Additional keyword arguments.
Returns:
cls: An instance of the class with the loaded documents.
"""
with open(file_path, "r") as f:
docs = []
for line in f:
doc = json.loads(line)
if skip_metadata:
doc.pop("metadata", None)
docs.append(Document.from_dict(doc))
# load a json lines file
# d = [Document.from_dict(json.loads(line)) for line in f]
return cls(docs)

@classmethod
Expand Down

0 comments on commit 5902ece

Please sign in to comment.