Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement file searching #31

Merged
merged 6 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 99 additions & 4 deletions hera_librarian/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
"""

import argparse
import datetime
import json
import os
import sys
import time
from pathlib import Path

import dateutil.parser

from . import LibrarianClient
from .exceptions import LibrarianClientRemovedFunctionality, LibrarianError
from .settings import client_settings
Expand Down Expand Up @@ -251,10 +254,75 @@ def search_files(args):
Search for files in the librarian.
"""

raise NotImplementedError(
"This needs to be implemented, but requires a change to the Librarian API."
if args.search is not None:
raise LibrarianClientRemovedFunctionality(
"search_files", "JSON search functionality is removed. See help."
)

# Create the search request

# Start with the most complex part, parsing dates...
create_time_window = None

if args.create_time_start is not None or args.create_time_end is not None:
create_time_window = []

if args.create_time_start is not None:
create_time_window.append(dateutil.parser.parse(args.create_time_start))
else:
create_time_window.append(datetime.datetime.min)

if args.create_time_end is not None:
create_time_window.append(dateutil.parser.parse(args.create_time_end))
else:
create_time_window.append(datetime.datetime.max)

create_time_window = tuple(create_time_window)

# Perform the search

client = LibrarianClient.from_info(client_settings.connections[args.conn_name])

search_response = client.search_files(
name=args.name,
create_time_window=create_time_window,
uploader=args.uploader,
source=args.source,
max_results=args.max_results,
)

if len(search_response) == 0:
print("No results found.")
return 1

# Print the results
for file in search_response:
print(
"\033[1m"
+ f"{file.name} ({sizeof_fmt(file.size)}) - {file.create_time} - {file.uploader} - {file.source}"
+ "\033[0m"
)

if len(file.instances) == 0:
print("No instances of this file found.")
else:
print("Instances:")

for instance in file.instances:
print(
f" {instance.path} - {'AVAILABLE' if instance.available else 'NOT AVAILABLE'}"
)

if len(file.remote_instances) == 0:
print("No remote instances of this file found.")
else:
print("Remote instances:")

for remote_instance in file.remote_instances:
print(f" {remote_instance.librarian_name}")

return 0


def set_file_deletion_policy(args):
"""
Expand Down Expand Up @@ -309,7 +377,6 @@ def upload(args):
local_path=Path(args.local_path),
dest_path=Path(args.dest_store_path),
deletion_policy=args.deletion,
null_obsid=args.null_obsid,
)
except ValueError as e:
die("Upload failed, check paths: {}".format(e))
Expand Down Expand Up @@ -674,10 +741,38 @@ def config_search_files_subparser(sub_parsers):
)
sp.add_argument("conn_name", metavar="CONNECTION-NAME", help=_conn_name_help)
sp.add_argument(
"search",
"--search",
metavar="JSON-SEARCH",
help="A JSON search specification; files that match will be displayed.",
required=False,
)
sp.add_argument(
"-n",
"--name",
default=None,
help="Only search for files with this name.",
)
sp.add_argument(
"--create-time-start",
help="Search for files who were created after this date and time. Use a parseable date string, if no timezone is specified, UTC is assumed.",
)
sp.add_argument(
"--create-time-end",
help="Search for files who were created before this date and time. Use a parseable date string, if no timezone is specified, UTC is assumed.",
)
sp.add_argument(
"-u", "--uploader", help="Search for files uploaded by this uploader."
)
sp.add_argument(
"-s", "--source", help="Search for files uploaded from this source."
)
sp.add_argument(
"--max-results",
type=int,
default=64,
help="Maximum number of results to return.",
)

sp.set_defaults(func=search_files)

return
Expand Down
61 changes: 59 additions & 2 deletions hera_librarian/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
The public-facing LibrarianClient object.
"""

from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING, Optional

Expand All @@ -11,6 +12,8 @@
from .deletion import DeletionPolicy
from .exceptions import LibrarianError, LibrarianHTTPError
from .models.ping import PingRequest, PingResponse
from .models.search import (FileSearchRequest, FileSearchResponse,
FileSearchResponses)
from .models.uploads import (UploadCompletionRequest, UploadInitiationRequest,
UploadInitiationResponse)
from .settings import ClientInfo
Expand Down Expand Up @@ -206,9 +209,11 @@ def upload(
local_path : Path
Path of the file or directory to upload.
dest_path : Path
The destination 'path' on the librarian store (often the same as your filename, but may be under some root directory).
The destination 'path' on the librarian store (often the same as your
filename, but may be under some root directory).
deletion_policy : DeletionPolicy | str, optional
Whether or not this file may be deleted, by default DeletionPolicy.DISALLOWED
Whether or not this file may be deleted, by default
DeletionPolicy.DISALLOWED

Returns
-------
Expand Down Expand Up @@ -295,3 +300,55 @@ def upload(
)

return

def search_files(
self,
name: Optional[str] = None,
create_time_window: Optional[tuple[datetime, ...]] = None,
uploader: Optional[str] = None,
source: Optional[str] = None,
max_results: int = 64,
) -> list[FileSearchResponse]:
"""
Search for files on this librarain.

Parameters
----------
name : Optional[str], optional
The name o files to search for, by default None
create_time_window : Optional[tuple[datetime, ...]], optional
A time window to search files within (make sure these are UTC
times), by default None
uploader : Optional[str], optional
The person who uploaded this file, by default None
source : Optional[str], optional
The source of this file, could be another librarian, by default None
max_results : int, optional
The maximal number of results., by default 64. Note that this can be
lower as it is also set by the server.

Returns
-------
list[FileSearchResponse]
A list of files that match the query.
"""

try:
response: FileSearchResponses = self.post(
endpoint="search/file",
request=FileSearchRequest(
name=name,
create_time_window=create_time_window,
uploader=uploader,
source=source,
max_results=max_results,
),
response=FileSearchResponses,
)
except LibrarianHTTPError as e:
if e.status_code == 404 and e.reason == "No files found.":
return []
else:
raise e

return response.root
4 changes: 3 additions & 1 deletion hera_librarian/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path

from pydantic import BaseModel
from pydantic_settings import BaseSettings
from pydantic_settings import BaseSettings, SettingsConfigDict

from typing import TYPE_CHECKING

Expand All @@ -30,6 +30,8 @@ class ClientInfo(BaseModel):
class ClientSettings(BaseSettings):
connections: dict[str, ClientInfo] = {}

model_config = SettingsConfigDict(env_prefix='librarian_client_')

@classmethod
def from_file(cls, config_path: Path | str) -> "ClientSettings":
"""
Expand Down
2 changes: 1 addition & 1 deletion librarian_background/send_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from schedule import CancelJob
from pathlib import Path

from librarian_server.database import get_session()
from librarian_server.database import get_session
from librarian_server.orm import (
StoreMetadata,
Instance,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies = [
"requests",
"schedule",
"checksumdir",
"python-dateutil",
]
authors = [
{name = "HERA Team", email = "hera@lists.berkeley.edu"},
Expand Down
4 changes: 0 additions & 4 deletions tests/client_unit_test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,8 @@

"""


import os

import pytest

import hera_librarian
from hera_librarian import cli


Expand Down
51 changes: 51 additions & 0 deletions tests/client_unit_test/test_cli_parse_search_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
Tests the search-files parser.
"""

import datetime

import dateutil.parser

from hera_librarian import cli


def test_parser_simple_name():
parser = cli.generate_parser()

args = parser.parse_args(
[
"search-files",
"fake_connection",
"--name=test_file",
]
)

assert args.name == "test_file"


def test_parser_lots():
parser = cli.generate_parser()

args = parser.parse_args(
[
"search-files",
"fake_connection",
"--name=test_file",
"--create-time-start=2020-01-01",
"--create-time-end=2020-01-02",
"--uploader=uploader",
"--source=source",
"--max-results=10",
]
)

assert args.name == "test_file"
assert dateutil.parser.parse(args.create_time_start) == datetime.datetime(
year=2020, month=1, day=1
)
assert dateutil.parser.parse(args.create_time_end) == datetime.datetime(
year=2020, month=1, day=2
)
assert args.uploader == "uploader"
assert args.source == "source"
assert args.max_results == 10
36 changes: 23 additions & 13 deletions tests/integration_test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,10 @@

import json
import os
import random
import shutil
import socket
import subprocess
import sys
from pathlib import Path
from socket import gethostname
from subprocess import run

import pytest
from pydantic import BaseModel
from xprocess import ProcessStarter

from hera_librarian import LibrarianClient
Expand Down Expand Up @@ -42,7 +35,7 @@ class Starter(ProcessStarter):
for label, key in setup.env.items():
if key is None:
raise ValueError(f"Environment variable {label} is None.")

xprocess.ensure("server", Starter)

setup.process = "server"
Expand Down Expand Up @@ -76,12 +69,29 @@ def librarian_client(server) -> LibrarianClient:
Returns a LibrarianClient connected to the server.
"""

client = LibrarianClient(
host="http://localhost",
port=server.id,
user="test-A"
)
client = LibrarianClient(host="http://localhost", port=server.id, user="test-A")

yield client

del client


@pytest.fixture
def librarian_client_command_line(server):
"""
Sets up the required environment variables for the command line client.
"""

connections = json.dumps(
{
"test-A": {
"user": "test-B",
"port": server.id,
"host": "http://localhost",
}
}
)

os.environ["LIBRARIAN_CLIENT_CONNECTIONS"] = connections

yield "test-A"
Loading