Skip to content

Commit

Permalink
Improve search (#250)
Browse files Browse the repository at this point in the history
* first round of adjustment to new search

* update doc for search

* ruff

* pylint

* Use ibridges 1.1.0

* renaming vars

* ruff and lint

* typo/bug

---------

Co-authored-by: Staiger, Christine <christine.staiger@wur.nl>
  • Loading branch information
chStaiger and Staiger, Christine authored Aug 22, 2024
1 parent 5c7660a commit 5e87796
Show file tree
Hide file tree
Showing 8 changed files with 412 additions and 303 deletions.
Binary file modified docs/screenshots/search.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 12 additions & 2 deletions docs/userdoc.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,20 @@ In the case you want to download a large collection from iRODS, create the desti
`Search` provides a mask in which you can type in path information and iRODS metadata information. Click the `Search` button. If there are data objects or collections with these specifications they will be listed. A successful search will also open two more buttons: `Clear` to clear the search results and `Download`.

![](screenshots/search.png)


**Search fields**

The path at the top of the page shows the collection in which you are going to search. The collection needs to exist and by default your `home` collection will be used.

In the field next to `Obj/Coll name` you can specify which data objects or sub collections you are looking for. E.g. if you fill in `demo` the search will look for all collections and data objects called *demo*.

In the example above we are looking for all *.txt* data objects in a collection called `my_books` denoted by `my_books/%.txt`.

In the metadata mask we narrow the search results down to all *.txt* data objects which are labeled with the metadata key `author` and the metadata value containing the substring `Doyle` denoted by `%Doyle%`.

**Note that the wild card is `%`**!

In the example search we are looking for data in a collection called *my_books* which can be located anywhere in the iRODS tree. This is denoted by `%` and we specify that we want to retrieve data which carries the metadata key *author* where the value ends with *Carroll*.
**Search results**

*Single click* on items in the list and click the `Download` button to open a dialog to select the destination for the data and start the download

Expand Down
10 changes: 10 additions & 0 deletions ibridgesgui/gui_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ def populate_table(table_widget, rows: int, data_by_row: list):
table_widget.resizeColumnsToContents()


def append_table(table_widget, curr_len_table, data_by_row):
"""Append more rows to an existing table widget."""
table_widget.setRowCount(curr_len_table+len(data_by_row))
for data in data_by_row:
for col, item in enumerate(data):
table_widget.setItem(curr_len_table, col,
PyQt6.QtWidgets.QTableWidgetItem(str(item)))
curr_len_table+=1
table_widget.resizeColumnsToContents()

def populate_textfield(text_widget, text_by_row: Union[str, list]):
"""Populate a text viewer or editor with text."""
text_widget.clear()
Expand Down
143 changes: 88 additions & 55 deletions ibridgesgui/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

import PyQt6.uic
from ibridges import IrodsPath, download
from ibridges.search import MetaSearch
from PyQt6 import QtCore, QtGui, QtWidgets
from PyQt6.QtWidgets import QMessageBox

from ibridgesgui.config import get_last_ienv_path, is_session_from_config
from ibridgesgui.gui_utils import UI_FILE_DIR, combine_operations, populate_table
from ibridgesgui.gui_utils import UI_FILE_DIR, append_table, combine_operations
from ibridgesgui.threads import SearchThread, TransferDataThread
from ibridgesgui.ui_files.tabSearch import Ui_tabSearch

Expand Down Expand Up @@ -42,85 +43,107 @@ def __init__(self, session, app_name, browser):

self.logger = logging.getLogger(app_name)
self.session = session
self.results = None
self.current_batch_num = 0 # number of batches of 50; loading results
self.browser = browser
self.search_thread = None
self.download_thread = None

self.hide_result_elements()
self.load_more_button.clicked.connect(self.next_batch)
self.search_button.clicked.connect(self.search)
self.clear_button.clicked.connect(self.hide_result_elements)
self.download_button.clicked.connect(self.download)

# group textfields for gathering key, value, unit
self.keys = [self.key1, self.key2, self.key3, self.key4]
self.vals = [self.val1, self.val2, self.val3, self.val4]

self.meta_fields = [
(self.key1, self.val1, self.units1),
(self.key2, self.val2, self.units2),
(self.key3, self.val3, self.units3),
(self.key4, self.val4, self.units4),
]
self.search_path_field.setText(self.session.home)
self.search_table.doubleClicked.connect(self.send_to_browser)

def hide_result_elements(self):
"""Hide the GUI elemnts that show and manipulate search results."""
self.error_label.clear()
self.search_table.hide()
self.download_button.hide()
self.load_more_button.hide()
self.clear_button.hide()
self.info_label.hide()
self.search_table.setRowCount(0)

def show_result_elements(self):
"""Show the GUI elemnts that show and manipulate search results."""
self.search_table.show()
self.download_button.show()
self.clear_button.show()
self.info_label.show()

def search(self):
"""Validate search parameters and start search."""
self.hide_result_elements()
self.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.WaitCursor))
self.error_label.clear()

msg, key_vals, path, checksum = self._validate_search_params()
self.current_batch_num = 0
self.results = None

msg, search_path, path_pattern, meta_searches, checksum = self._validate_search_params()
self.logger.debug(
"Search parameters %s, %s, %s, %s, %s",
msg,
str(search_path),
path_pattern,
str(meta_searches),
checksum,
)
if msg is not None:
self.error_label.setText(msg)
self.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.ArrowCursor))
return

if key_vals is None and path is None and checksum is None:
self.error_label.setText("No search critera given.")
self.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.ArrowCursor))
return
self._start_search(search_path, path_pattern, meta_searches, checksum)

self._start_search(key_vals, path, checksum)
def next_batch(self):
"""Load next batch of results."""
self.load_results(batch_size=25)

def load_results(self, results):
def load_results(self, batch_size=25):
"""Load seach results into the table view."""
self.error_label.clear()
table_data = [] # (Path, Name, Size, Checksum, created, modified)
for result in results:
if "DATA_NAME" in result:
obj = IrodsPath(self.session, result["COLL_NAME"], result["DATA_NAME"]).dataobject

start = self.current_batch_num * batch_size
end = min((self.current_batch_num + 1) * 25, len(self.results))
for ipath in self.results[start:end]:
ipath = IrodsPath(self.session, str(ipath))
if ipath.dataobject_exists():
table_data.append(
(
"-d",
obj.path,
obj.size,
obj.create_time.strftime("%d-%m-%Y"),
obj.modify_time.strftime("%d-%m-%Y"),
str(ipath),
ipath.size,
ipath.dataobject.create_time.strftime("%d-%m-%Y"),
ipath.dataobject.modify_time.strftime("%d-%m-%Y"),
)
)
else:
coll = IrodsPath(self.session, result["COLL_NAME"]).collection
table_data.append(
(
"-C",
coll.path,
str(ipath),
"",
coll.create_time.strftime("%d-%m-%Y"),
coll.modify_time.strftime("%d-%m-%Y"),
ipath.collection.create_time.strftime("%d-%m-%Y"),
ipath.collection.modify_time.strftime("%d-%m-%Y"),
)
)
populate_table(self.search_table, len(table_data), table_data)
self.current_batch_num = self.current_batch_num + 1
append_table(self.search_table, self.search_table.rowCount(), table_data)

if len(self.results) > batch_size * self.current_batch_num:
self.load_more_button.show()
self.load_more_button.setText(f"Load next {batch_size} results.")
else:
self.load_more_button.hide()

def download(self):
"""Determine iRODS paths, select destination and start download."""
Expand Down Expand Up @@ -172,24 +195,31 @@ def send_to_browser(self):
self.error_label.setText(f"Browser tab switched to {irods_path.parent}")
self.browser.load_browser_table()

def _validate_search_params(self) -> tuple[str, dict, str, str]:
# All metadata values need a specific key
if any(key.text() == "" and val.text() != "" for key, val in zip(self.keys, self.vals)):
return "There are metadata values without keys. Stop search.", None, None, None
if all(key.text() == "" for key in self.keys):
key_vals = None
else:
# Replace empty values with the wild card, turn into search key_vals
key_vals = {
key.text(): "%" if val.text() == "" else val.text()
for key, val in zip(self.keys, self.vals)
}
if "" in key_vals:
del key_vals[""]

path = self.path_field.text() if self.path_field.text() != "" else None
def _validate_search_params(self) -> tuple[IrodsPath, str, dict, str, str]:
meta_searches = []
meta_triples = [(k.text(), v.text(), u.text()) for k, v, u in self.meta_fields]
for key, value, units in meta_triples:
if key != "" or value != "" or units != "":
if key == "":
key = "%"
if value == "":
value = "%"
if units == "":
units = "%"
meta_searches.append(MetaSearch(key, value, units))

search_path = IrodsPath(self.session, self.search_path_field.text())
path_pattern = (
self.path_pattern_field.text() if self.path_pattern_field.text() != "" else None
)
checksum = self.checksum_field.text() if self.checksum_field.text() != "" else None
return None, key_vals, path, checksum
if not search_path.collection_exists():
msg = f"Search in {str(search_path)}: Collection dos not exist."
return msg, search_path, path_pattern, meta_searches, checksum
if len(meta_searches) == 0 and path_pattern is None and checksum is None:
msg = "Please provide some search criteria."
return msg, search_path, path_pattern, meta_searches, checksum
return None, search_path, path_pattern, meta_searches, checksum

def _retrieve_selected_paths(self) -> list[IrodsPath]:
"""Retrieve paths from all selected rows in search results table."""
Expand All @@ -215,7 +245,7 @@ def _start_download(self, irods_paths, folder, overwrite):
# get diff dictionary
single_ops = []
for ipath in irods_paths:
single_ops.append(download(self.session, ipath, folder, overwrite = True, dry_run=True))
single_ops.append(download(self.session, ipath, folder, overwrite=True, dry_run=True))
ops = combine_operations(single_ops)

self.error_label.setText(f"Downloading to {folder} ....")
Expand Down Expand Up @@ -246,14 +276,14 @@ def _download_status(self, state):
text = f"{obj_count} of {num_objs} files; failed: {obj_failed}."
self.error_label.setText(text)

def _download_fetch_result(self, thread_output: dict):
if thread_output["error"] == "":
def _download_fetch_result(self, thread: dict):
if thread["error"] == "":
self.error_label.setText("Download finished.")
else:
self.error_label.setText("Errors occurred during download. Consult the logs.")
self.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.ArrowCursor))

def _start_search(self, key_vals, path, checksum):
def _start_search(self, search_path, path_pattern, meta_searches, checksum):
self.search_button.setEnabled(False)
# check if session comes from env file in ibridges config
if is_session_from_config(self.session):
Expand All @@ -265,7 +295,9 @@ def _start_search(self, key_vals, path, checksum):
return
self.error_label.setText("Searching ...")
try:
self.search_thread = SearchThread(self.logger, env_path, path, checksum, key_vals)
self.search_thread = SearchThread(
self.logger, env_path, search_path, path_pattern, meta_searches, checksum
)
except Exception:
self.error_label.setText(
"Could not instantiate a new session from{env_path}.Check configuration"
Expand All @@ -280,12 +312,13 @@ def _finish_search(self):
self.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.ArrowCursor))
del self.search_thread

def _fetch_results(self, therad_output: dict):
if "error" in therad_output:
self.error_label.setText(therad_output["error"])
elif len(therad_output["results"]) == 0:
def _fetch_results(self, thread: dict):
if "error" in thread:
self.error_label.setText(thread["error"])
elif len(thread["results"]) == 0:
self.error_label.setText("No objects or collections found.")
else:
self.show_result_elements()
self.load_results(therad_output["results"])
self.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.ArrowCursor))
self.results = thread["results"]
self.load_results()
self.setCursor(QtGui.QCursor(QtCore.Qt.CursoreShape.ArrowCursor))
18 changes: 11 additions & 7 deletions ibridgesgui/threads.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pathlib import Path

from ibridges import Session, search_data, sync
from ibridges import IrodsPath, Session, search_data, sync
from ibridges.executor import Operations, _obj_get, _obj_put
from irods.exception import CAT_NO_ACCESS_PERMISSION, NetworkException
from PyQt6.QtCore import QThread, pyqtSignal
Expand All @@ -13,16 +13,18 @@ class SearchThread(QThread):

result = pyqtSignal(dict)

def __init__(self, logger, ienv_path, path: str, checksum: str, key_vals: dict):
def __init__(self, logger, ienv_path: Path, search_path: IrodsPath, path_pattern: str,
meta_searches: list, checksum: str):
"""Pass searh parameters."""
super().__init__()
self.logger = logger
self.thread_session = Session(irods_env=ienv_path)
self.logger.debug("Search thread: created new session")
self.sync_thread = None
self.path = path
self.search_path = search_path
self.path_pattern = path_pattern
self.checksum = checksum
self.key_vals = key_vals
self.ms = meta_searches

def _delete_session(self):
self.thread_session.close()
Expand All @@ -35,16 +37,18 @@ def run(self):
"""Run the thread."""
search_out = {}
try:
search_out["results"] = search_data(
self.thread_session, path=self.path, checksum=self.checksum, key_vals=self.key_vals
res = search_data(
self.thread_session, path=self.search_path, path_pattern = self.path_pattern,
checksum=self.checksum, metadata=self.ms
)
# convert IrodsPaths to strings, the session will be destroyed at the end of the thread
search_out["results"] = [str(ipath) for ipath in res]
self._delete_session()
except NetworkException:
self._delete_session()
search_out["error"] = "Search takes too long. Please provide more parameters."
self.result.emit(search_out)


class TransferDataThread(QThread):
"""Transfer data between local and iRODS."""

Expand Down
Loading

0 comments on commit 5e87796

Please sign in to comment.