diff --git a/README.md b/README.md index a52b486..2298deb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CountESS 0.0.51 +# CountESS 0.0.57 This is CountESS, a modular, Python 3 reimplementation of Enrich2. @@ -10,7 +10,8 @@ Source code is available at [https://github.com/CountESS-Project/CountESS](https ## Installing -The latest version of CountESS can be installed from pypi: +The latest version of +[CountESS can be installed from pypi](https://pypi.org/project/countess/): ``` pip install CountESS ``` diff --git a/countess/__init__.py b/countess/__init__.py index 8518c87..1793d56 100644 --- a/countess/__init__.py +++ b/countess/__init__.py @@ -1,3 +1,3 @@ """CountESS Project""" -VERSION = "0.0.51" +VERSION = "0.0.57" diff --git a/countess/core/config.py b/countess/core/config.py index 2be6272..63d1ff3 100644 --- a/countess/core/config.py +++ b/countess/core/config.py @@ -1,4 +1,5 @@ import ast +import io import os.path import re import sys @@ -17,6 +18,45 @@ def default_output_callback(output): sys.stderr.write(repr(output)) +def read_config_dict(name: str, base_dir: str, config_dict: dict) -> PipelineNode: + if "_module" in config_dict: + module_name = config_dict["_module"] + class_name = config_dict["_class"] + # XXX version = config_dict.get("_version") + # XXX hash_digest = config_dict.get("_hash") + plugin = load_plugin(module_name, class_name) + else: + plugin = None + + position_str = config_dict.get("_position") + notes = config_dict.get("_notes") + + position = None + if position_str: + position_match = re.match(r"(\d+) (\d+)$", position_str) + if position_match: + position = ( + int(position_match.group(1)) / 1000, + int(position_match.group(2)) / 1000, + ) + + sort = config_dict.get("_sort", "0 0").split() + + # XXX check version and hash_digest and emit warnings. + + config = [(key, ast.literal_eval(val), base_dir) for key, val in config_dict.items() if not key.startswith("_")] + + return PipelineNode( + name=name, + plugin=plugin, + config=config, + position=position, + notes=notes, + sort_column=int(sort[0]), + sort_descending=bool(int(sort[1])), + ) + + def read_config( filename: str, logger: Logger = ConsoleLogger(), @@ -32,46 +72,14 @@ def read_config( nodes_by_name: dict[str, PipelineNode] = {} for section_name in cp.sections(): - config_dict = cp[section_name] - - if "_module" in config_dict: - module_name = config_dict["_module"] - class_name = config_dict["_class"] - # XXX version = config_dict.get("_version") - # XXX hash_digest = config_dict.get("_hash") - plugin = load_plugin(module_name, class_name) - else: - plugin = None - - position_str = config_dict.get("_position") - notes = config_dict.get("_notes") - - position = None - if position_str: - position_match = re.match(r"(\d+) (\d+)$", position_str) - if position_match: - position = ( - int(position_match.group(1)) / 1000, - int(position_match.group(2)) / 1000, - ) - - # XXX check version and hash_digest and emit warnings. - - config = [(key, ast.literal_eval(val), base_dir) for key, val in config_dict.items() if not key.startswith("_")] - - node = PipelineNode( - name=section_name, - plugin=plugin, - config=config, - position=position, - notes=notes, - ) - pipeline_graph.nodes.append(node) + config_dict = dict(cp[section_name]) + node = read_config_dict(section_name, base_dir, config_dict) for key, val in config_dict.items(): if key.startswith("_parent."): node.add_parent(nodes_by_name[val]) + pipeline_graph.nodes.append(node) nodes_by_name[section_name] = node return pipeline_graph @@ -80,47 +88,53 @@ def read_config( def write_config(pipeline_graph: PipelineGraph, filename: str): """Write `pipeline_graph`'s configuration out to `filename`""" + pipeline_graph.reset_node_names() + cp = ConfigParser() base_dir = os.path.dirname(filename) - node_names_seen = set() for node in pipeline_graph.traverse_nodes(): - while node.name in node_names_seen: - num = 0 - if match := re.match(r"(.*?)\s+(\d+)$", node.name): - node.name = match.group(1) - num = int(match.group(2)) - node.name += f" {num + 1}" - node_names_seen.add(node.name) - - cp.add_section(node.name) - if node.plugin: - cp[node.name].update( - { - "_module": node.plugin.__module__, - "_class": node.plugin.__class__.__name__, - "_version": node.plugin.version, - "_hash": node.plugin.hash(), - } - ) - if node.position: - xx, yy = node.position - cp[node.name]["_position"] = "%d %d" % (xx * 1000, yy * 1000) - if node.notes: - cp[node.name]["_notes"] = node.notes - for n, parent in enumerate(node.parent_nodes): - cp[node.name][f"_parent.{n}"] = parent.name - if node.config: - for k, v, _ in node.config: - cp[node.name][k] = repr(v) - elif node.plugin: - for k, v in node.plugin.get_parameters(base_dir): - cp[node.name][k] = repr(v) + write_config_node(node, cp, base_dir) with open(filename, "w", encoding="utf-8") as fh: cp.write(fh) +def write_config_node_string(node: PipelineNode, base_dir: str = ""): + cp = ConfigParser() + write_config_node(node, cp, base_dir) + buf = io.StringIO() + cp.write(buf) + return buf.getvalue() + + +def write_config_node(node: PipelineNode, cp: ConfigParser, base_dir: str): + cp.add_section(node.name) + if node.plugin: + cp[node.name].update( + { + "_module": node.plugin.__module__, + "_class": node.plugin.__class__.__name__, + "_version": node.plugin.version, + "_hash": node.plugin.hash(), + "_sort": "%d %d" % (node.sort_column, 1 if node.sort_descending else 0), + } + ) + if node.position: + xx, yy = node.position + cp[node.name]["_position"] = "%d %d" % (xx * 1000, yy * 1000) + if node.notes: + cp[node.name]["_notes"] = node.notes + for n, parent in enumerate(node.parent_nodes): + cp[node.name][f"_parent.{n}"] = parent.name + if node.config: + for k, v, _ in node.config: + cp[node.name][k] = repr(v) + elif node.plugin: + for k, v in node.plugin.get_parameters(base_dir): + cp[node.name][k] = repr(v) + + def export_config_graphviz(pipeline_graph: PipelineGraph, filename: str): with open(filename, "w", encoding="utf-8") as fh: fh.write("digraph {\n") diff --git a/countess/core/parameters.py b/countess/core/parameters.py index 3c1ca80..11173e3 100644 --- a/countess/core/parameters.py +++ b/countess/core/parameters.py @@ -158,29 +158,6 @@ def copy(self) -> "StringCharacterSetParam": return self.__class__(self.label, self.value, self.read_only, character_set=self.character_set) -def clean_file_types(file_types): - # MacOS in particular is crashy if file_types is not to it's liking. - # This leads to very confusing errors. Better to throw an assertion - # error here than bomb out later. See #27. - - # XXX MacOS also doesn't seem to handle multiple file - # extensions, eg: .csv.gz, whereas Linux can. - # So maybe this function could accept those and censor - # them if running MacOS. - - assert type(file_types) is list - for ft in file_types: - assert type(ft) in (tuple, list) - assert type(ft[0]) is str - assert type(ft[1]) in (tuple, list, str) - if type(ft[1]) in (tuple, list): - for ext in ft[1]: - assert type(ext) is str - assert ext == "*" or ext == "" or re.match(r"\.\w+$", ext), f"Invalid FileType Extension {ext}" - - return file_types - - class FileParam(StringParam): """A StringParam for holding a filename. Defaults to `read_only` because it really should be populated from a file dialog or simiar.""" @@ -192,7 +169,7 @@ class FileParam(StringParam): def __init__(self, label: str, value=None, read_only: bool = True, file_types=None): super().__init__(label, value, read_only) if file_types is not None: - self.file_types = clean_file_types(file_types) + self.file_types = file_types def get_file_hash(self): if not self.value: @@ -215,11 +192,14 @@ def get_file_hash(self): def get_parameters(self, key, base_dir="."): if self.value: - relpath = os.path.relpath(self.value, base_dir) + if base_dir: + path = os.path.relpath(self.value, base_dir) + else: + path = os.path.abspath(self.value) else: - relpath = None + path = None - return [(key, relpath)] + return [(key, path)] def copy(self) -> "FileParam": return self.__class__(self.label, self.value, self.read_only, file_types=self.file_types) diff --git a/countess/core/pipeline.py b/countess/core/pipeline.py index b70b78a..b689eb5 100644 --- a/countess/core/pipeline.py +++ b/countess/core/pipeline.py @@ -1,3 +1,4 @@ +import re import time from queue import Empty, Queue from threading import Thread @@ -59,6 +60,8 @@ class PipelineNode: name: str plugin: Optional[BasePlugin] = None position: Optional[tuple[float, float]] = None + sort_column: int = 0 + sort_descending: bool = False notes: Optional[str] = None parent_nodes: set["PipelineNode"] child_nodes: set["PipelineNode"] @@ -74,11 +77,13 @@ class PipelineNode: # at config load time, if it is present it is loaded the # first time the plugin is prerun. - def __init__(self, name, plugin=None, config=None, position=None, notes=None): + def __init__(self, name, plugin=None, config=None, position=None, notes=None, sort_column=0, sort_descending=0): self.name = name self.plugin = plugin self.config = config or [] self.position = position + self.sort_column = sort_column + self.sort_descending = sort_descending self.notes = notes self.parent_nodes = set() self.child_nodes = set() @@ -182,8 +187,10 @@ def load_config(self, logger: Logger): self.config = None def prerun(self, logger: Logger, row_limit=PRERUN_ROW_LIMIT): + if not self.plugin: + return self.load_config(logger) - if self.is_dirty and self.plugin: + if self.is_dirty: assert isinstance(self.plugin, (ProcessPlugin, FileInputPlugin)) self.result = [] self.plugin.prepare([node.name for node in self.parent_nodes], row_limit) @@ -205,9 +212,10 @@ def mark_dirty(self): child_node.mark_dirty() def add_parent(self, parent): - self.parent_nodes.add(parent) - parent.child_nodes.add(self) - self.mark_dirty() + if (not self.plugin or self.plugin.num_inputs) and (not parent.plugin or parent.plugin.num_outputs): + self.parent_nodes.add(parent) + parent.child_nodes.add(self) + self.mark_dirty() def del_parent(self, parent): self.parent_nodes.discard(parent) @@ -248,7 +256,17 @@ def __init__(self): self.plugin_classes = get_plugin_classes() self.nodes = [] + def reset_node_name(self, node): + node_names_seen = set(n.name for n in self.nodes if n != node) + while node.name in node_names_seen: + num = 1 + if match := re.match(r"(.*?)\s+(\d+)$", node.name): + node.name = match.group(1) + num = int(match.group(2)) + node.name += f" {num + 1}" + def add_node(self, node): + self.reset_node_name(node) self.nodes.append(node) def del_node(self, node): @@ -298,6 +316,17 @@ def reset(self): node.result = None node.is_dirty = True + def reset_node_names(self): + node_names_seen = set() + for node in self.traverse_nodes(): + while node.name in node_names_seen: + num = 0 + if match := re.match(r"(.*?)\s+(\d+)$", node.name): + node.name = match.group(1) + num = int(match.group(2)) + node.name += f" {num + 1}" + node_names_seen.add(node.name) + def tidy(self): """Tidies the graph (sets all the node positions)""" diff --git a/countess/core/plugins.py b/countess/core/plugins.py index 616869b..0d12099 100644 --- a/countess/core/plugins.py +++ b/countess/core/plugins.py @@ -19,8 +19,8 @@ import importlib.metadata import logging import os.path -from collections.abc import Mapping, MutableMapping -from typing import Dict, Iterable, List, Optional, Union +from collections.abc import MutableMapping +from typing import Dict, Iterable, List, Optional, Sequence, Union import numpy as np import pandas as pd @@ -81,6 +81,8 @@ class BasePlugin: description: str = "" additional: str = "" link: Optional[str] = None + num_inputs: int = 1 + num_outputs: int = 1 parameters: MutableMapping[str, BaseParam] = {} show_preview: bool = True @@ -182,9 +184,10 @@ class FileInputPlugin(BasePlugin): file_number = 0 name = "" row_limit = None + num_inputs = 0 # used by the GUI file dialog - file_types: List[tuple[str, Union[str, list[str]]]] = [("Any", "*")] + file_types: Sequence[tuple[str, Union[str, list[str]]]] = [("Any", "*")] file_params: MutableMapping[str, BaseParam] = {} def num_files(self) -> int: @@ -288,6 +291,7 @@ class PandasProductPlugin(PandasProcessPlugin): source2 = None mem1: Optional[List] = None mem2: Optional[List] = None + num_inputs = 2 def prepare(self, sources: list[str], row_limit: Optional[int] = None): if len(sources) != 2: @@ -655,17 +659,7 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None) -> pd.Data class PandasOutputPlugin(PandasProcessPlugin): - def process_inputs(self, inputs: Mapping[str, Iterable[pd.DataFrame]], logger: Logger, row_limit: Optional[int]): - iterators = set(iter(input) for input in inputs.values()) - - while iterators: - for it in list(iterators): - try: - df_in = next(it) - assert isinstance(df_in, pd.DataFrame) - self.output_dataframe(df_in, logger) - except StopIteration: - iterators.remove(it) - - def output_dataframe(self, dataframe: pd.DataFrame, logger: Logger): - raise NotImplementedError(f"{self.__class__}.output_dataframe") + num_outputs = 0 + + def process(self, data: pd.DataFrame, source: str, logger: Logger) -> Iterable[pd.DataFrame]: + raise NotImplementedError(f"{self.__class__}.process") diff --git a/countess/gui/config.py b/countess/gui/config.py index a9c6ede..0ac31bb 100644 --- a/countess/gui/config.py +++ b/countess/gui/config.py @@ -2,7 +2,7 @@ import math import tkinter as tk from functools import partial -from tkinter import filedialog, ttk +from tkinter import ttk from typing import Mapping, MutableMapping, Optional import numpy as np @@ -22,7 +22,14 @@ TextParam, ) from ..core.plugins import BasePlugin -from .widgets import BooleanCheckbox, add_button, delete_button +from .widgets import ( + BooleanCheckbox, + add_button, + ask_open_filename, + ask_open_filenames, + ask_saveas_filename, + delete_button, +) def is_nan(v): @@ -319,11 +326,11 @@ def add_row_callback(self, *_): if isinstance(self.parameter, FileSaveParam): file_types = self.parameter.file_types - filename = filedialog.asksaveasfilename(filetypes=file_types) + filename = ask_saveas_filename(self.parameter.value, file_types) self.parameter.value = filename if isinstance(self.parameter, FileArrayParam): file_types = self.parameter.file_types - filenames = filedialog.askopenfilenames(filetypes=file_types) + filenames = ask_open_filenames(file_types) self.parameter.add_files(filenames) else: self.parameter.add_row() @@ -347,7 +354,7 @@ def delete_row_callback(self, parameter_wrapper, row=None): def change_file_callback(self, *_): file_types = self.parameter.file_types - filename = filedialog.askopenfilename(filetypes=file_types) + filename = ask_open_filename(file_types) self.parameter.value = filename self.entry["text"] = self.parameter.value self.callback(self.parameter) diff --git a/countess/gui/icons/hbar.gif b/countess/gui/icons/hbar.gif new file mode 100644 index 0000000..9c36a0b Binary files /dev/null and b/countess/gui/icons/hbar.gif differ diff --git a/countess/gui/icons/redbar.gif b/countess/gui/icons/redbar.gif new file mode 100644 index 0000000..a2d1204 Binary files /dev/null and b/countess/gui/icons/redbar.gif differ diff --git a/countess/gui/icons/sort_dn.gif b/countess/gui/icons/sort_dn.gif new file mode 100644 index 0000000..8b3b511 Binary files /dev/null and b/countess/gui/icons/sort_dn.gif differ diff --git a/countess/gui/icons/sort_un.gif b/countess/gui/icons/sort_un.gif new file mode 100644 index 0000000..cfbf4fb Binary files /dev/null and b/countess/gui/icons/sort_un.gif differ diff --git a/countess/gui/icons/sort_up.gif b/countess/gui/icons/sort_up.gif new file mode 100644 index 0000000..de80012 Binary files /dev/null and b/countess/gui/icons/sort_up.gif differ diff --git a/countess/gui/icons/vbar.gif b/countess/gui/icons/vbar.gif new file mode 100644 index 0000000..a2d1204 Binary files /dev/null and b/countess/gui/icons/vbar.gif differ diff --git a/countess/gui/main.py b/countess/gui/main.py index d293901..cd46abf 100644 --- a/countess/gui/main.py +++ b/countess/gui/main.py @@ -3,8 +3,7 @@ import sys import threading import tkinter as tk -import webbrowser -from tkinter import filedialog, messagebox, ttk +from tkinter import messagebox, ttk from typing import Optional import psutil @@ -15,9 +14,10 @@ from countess.core.plugins import get_plugin_classes from countess.gui.config import PluginConfigurator from countess.gui.logger import LoggerFrame +from countess.gui.mini_browser import MiniBrowserFrame from countess.gui.tabular import TabularDataFrame from countess.gui.tree import FlippyCanvas, GraphWrapper -from countess.gui.widgets import info_button +from countess.gui.widgets import ask_open_filename, ask_saveas_filename, info_button from countess.utils.pandas import concat_dataframes # import faulthandler @@ -28,7 +28,7 @@ class PluginChooserFrame(tk.Frame): - def __init__(self, master, title, callback, *a, **k): + def __init__(self, master, title, callback, has_parents, has_children, *a, **k): super().__init__(master, *a, **k) self.columnconfigure(0, weight=1) @@ -37,6 +37,13 @@ def __init__(self, master, title, callback, *a, **k): label_frame.grid(row=1, column=0, sticky=tk.EW, padx=10, pady=10) for n, plugin_class in enumerate(plugin_classes): + if ( + (has_parents and plugin_class.num_inputs == 0) + or (not has_parents and plugin_class.num_inputs > 0) + or (has_children and plugin_class.num_outputs == 0) + ): + continue + label_text = plugin_class.description tk.Button( label_frame, @@ -57,6 +64,8 @@ class ConfiguratorWrapper: config_change_task = None notes_widget = None node_update_thread = None + info_toplevel = None + info_frame = None def __init__(self, frame, node, change_callback): self.frame = frame @@ -115,11 +124,14 @@ def show_config_subframe(self): # self.node.plugin.update() self.configurator = PluginConfigurator(self.config_canvas, self.node.plugin, self.config_change_callback) self.config_subframe = self.configurator.frame - self.frame.rowconfigure(3, weight=1) - self.frame.rowconfigure(4, weight=1) + self.frame.rowconfigure(3, weight=1, minsize=self.frame.winfo_height() / 3) else: - self.config_subframe = PluginChooserFrame(self.config_canvas, "Choose Plugin", self.choose_plugin) + has_parents = len(self.node.parent_nodes) > 0 + has_children = len(self.node.child_nodes) > 0 + self.config_subframe = PluginChooserFrame( + self.config_canvas, "Choose Plugin", self.choose_plugin, has_parents, has_children + ) self.config_subframe.grid(sticky=tk.NSEW) self.frame.rowconfigure(3, weight=1) self.frame.rowconfigure(4, weight=0) @@ -138,7 +150,17 @@ def on_label_configure(self, *_): self.label["wraplength"] = self.label.winfo_width() - 20 def on_info_button_press(self, *_): - webbrowser.open_new_tab(self.node.plugin.link) + if self.info_toplevel is None: + self.info_toplevel = tk.Toplevel() + self.info_toplevel.protocol("WM_DELETE_WINDOW", self.on_info_toplevel_close) + self.info_frame = MiniBrowserFrame(self.info_toplevel, self.node.plugin.link) + self.info_frame.pack(fill="both", expand=True) + else: + self.info_frame.load_url(self.node.plugin.link) + + def on_info_toplevel_close(self): + self.info_toplevel.destroy() + self.info_toplevel = None def on_add_notes(self, *_): self.notes_widget.destroy() @@ -177,6 +199,8 @@ def show_preview_subframe(self): df = concat_dataframes(self.node.result) self.preview_subframe = TabularDataFrame(self.frame, highlightbackground="black", highlightthickness=3) self.preview_subframe.set_dataframe(df) + self.preview_subframe.set_sort_order(self.node.sort_column or 0, self.node.sort_descending) + self.preview_subframe.set_callback(self.preview_changed_callback) except (TypeError, ValueError): self.preview_subframe = tk.Frame(self.frame) self.preview_subframe.columnconfigure(0, weight=1) @@ -184,6 +208,10 @@ def show_preview_subframe(self): self.preview_subframe.grid(row=4, columnspan=2, sticky=tk.NSEW) + def preview_changed_callback(self, offset: int, sort_col: int, sort_desc: bool) -> None: + self.node.sort_column = sort_col + self.node.sort_descending = sort_desc + def name_changed_callback(self, *_): name = self.name_var.get() self.node.name = name @@ -197,7 +225,7 @@ def config_change_callback(self, *_): self.node.mark_dirty() if self.config_change_task: self.frame.after_cancel(self.config_change_task) - self.config_change_task = self.frame.after(500, self.config_change_task_callback) + self.config_change_task = self.frame.after(1000, self.config_change_task_callback) def config_change_task_callback(self): self.config_change_task = None @@ -239,9 +267,6 @@ def choose_plugin(self, plugin_class): self.node.prerun(self.logger) self.node.is_dirty = True self.show_config_subframe() - if self.node.name.startswith("NEW "): - self.node.name = self.node.plugin.name + self.node.name.removeprefix("NEW") - self.name_var.set(self.node.name) self.change_callback(self.node) def destroy(self): @@ -390,11 +415,11 @@ def config_new(self): self.graph_wrapper.destroy() self.graph = PipelineGraph() self.graph_wrapper = GraphWrapper(self.canvas, self.graph, self.node_select) - self.graph_wrapper.add_new_node(select=True) + self.graph_wrapper.add_new_node() def config_load(self, filename=None): if not filename: - filename = filedialog.askopenfilename(filetypes=[(".INI Config File", "*.ini")]) + filename = ask_open_filename(file_types=[(".INI Config File", "*.ini")]) if not filename: return self.config_filename = filename @@ -406,9 +431,9 @@ def config_load(self, filename=None): def config_save(self, filename=None): if not filename: - filename = filedialog.asksaveasfilename( - initialfile=self.config_filename, - filetypes=[(".INI Config File", "*.ini")], + filename = ask_saveas_filename( + initial_file=self.config_filename, + file_types=[(".INI Config File", "*.ini")], ) if not filename: return @@ -425,7 +450,7 @@ def config_export(self, filename=None): initialfile = self.config_filename.removesuffix(".ini") + ".dot" else: initialfile = None - filename = filedialog.asksaveasfilename(initialfile=initialfile, filetypes=[("Graphviz File", "*.dot")]) + filename = ask_saveas_filename(initial_file=initialfile, file_types=[("Graphviz File", "*.dot")]) if not filename: return export_config_graphviz(self.graph, filename) diff --git a/countess/gui/mini_browser.py b/countess/gui/mini_browser.py new file mode 100644 index 0000000..f597be9 --- /dev/null +++ b/countess/gui/mini_browser.py @@ -0,0 +1,54 @@ +import tkinter as tk +import webbrowser +from urllib.parse import urlparse + +from tkinterweb import HtmlFrame # type: ignore + +MINI_CSS = """ + * { padding: 10px; line-height: 150% } + th, td { border: 1px solid #AAA; border-collapse: collapse; } + code { border: 1px solid #AAA; font-family: monospace; } + th code, td code { border: 0px; } +""" + + +class MiniBrowserFrame(tk.Frame): + def __init__(self, tk_parent, start_url, *a, **k): + super().__init__(tk_parent, *a, **k) + tk.Label(self, text="Documentation Preview").pack(fill="both") + + self.html_frame = HtmlFrame(self, messages_enabled=False) + self.html_frame.enable_stylesheets(False) + self.html_frame.enable_objects(False) + self.html_frame.enable_forms(False) + self.html_frame.on_done_loading(self.on_done_loading) + self.html_frame.on_link_click(self.on_link_click) + + self.html_frame.pack(fill="both", expand=True) + + tk.Button(self, text="Open in Browser", command=self.on_browser_button).pack() + + self.load_url(start_url) + + def load_url(self, link_url): + self.current_url = link_url + self.html_frame.load_url(link_url) + + def on_browser_button(self): + webbrowser.open_new_tab(self.current_url) + + def on_done_loading(self): + self.html_frame.add_css(MINI_CSS) + + def on_link_click(self, link_url): + if urlparse(self.current_url)[0:2] == urlparse(link_url)[0:2]: + self.load_url(link_url) + else: + webbrowser.open_new_tab(link_url) + + +if __name__ == "__main__": + root = tk.Tk() + url = "https://countess-project.github.io/CountESS/" + MiniBrowserFrame(root, url).pack(fill="both", expand=True) + root.mainloop() diff --git a/countess/gui/tabular.py b/countess/gui/tabular.py index f989a64..f218075 100644 --- a/countess/gui/tabular.py +++ b/countess/gui/tabular.py @@ -3,9 +3,13 @@ from functools import partial from math import ceil, floor, isinf, isnan from tkinter import ttk +from typing import Callable, Optional, Union +import pandas as pd from pandas.api.types import is_integer_dtype, is_numeric_dtype +from countess.gui.widgets import copy_to_clipboard, get_icon + # XXX columns should automatically resize based on information # from _column_xscrollcommand which can tell if they're # overflowing. Or maybe use @@ -13,7 +17,7 @@ # etc etc. -def column_format_for(df_column): +def column_format_for(df_column: Union[pd.Index, pd.Series]) -> str: if is_numeric_dtype(df_column.dtype): # Work out the maximum width required to represent the integer part in this # column, so we can pad values to that width. @@ -34,7 +38,7 @@ def column_format_for(df_column): return "%s" -def format_value(value, column_format): +def format_value(value: Optional[Union[int, float, str]], column_format: str) -> str: """Format value for display in a table: >>> format_value(None, "%s") '—' @@ -55,6 +59,7 @@ def format_value(value, column_format): return "—F" # remove trailing 0's from floats (%g doesn't align correctly) + # 100.0 => "100.000000000000" => "100." try: if column_format.endswith("f"): return (column_format % value).rstrip("0") @@ -65,14 +70,12 @@ def format_value(value, column_format): class TabularDataFrame(tk.Frame): - """A frame for displaying a pandas (or similar) - dataframe. Columns are displayed as individual tk.Text - widgets which seems to be relatively efficient as they - only hold the currently displayed rows. - Tested up to a million or so rows.""" - - subframe = None - dataframe = None + """A frame for displaying a pandas (or similar) dataframe. + Columns are displayed as individual tk.Text widgets which seems to be relatively efficient + as they only hold the currently displayed rows. Tested up to a million or so rows.""" + + subframe: Optional[tk.Frame] = None + dataframe: Optional[pd.DataFrame] = None offset = 0 height = 1000 length = 0 @@ -84,6 +87,7 @@ class TabularDataFrame(tk.Frame): index_cols = 0 sort_by_col = None sort_ascending = True + callback: Optional[Callable[[int, int, bool], None]] = None def reset(self): if self.subframe: @@ -96,8 +100,10 @@ def reset(self): self.subframe.rowconfigure(2, weight=1) self.subframe.grid(sticky=tk.NSEW) - def set_dataframe(self, dataframe): + def set_dataframe(self, dataframe: pd.DataFrame, offset: Optional[int] = 0): self.reset() + assert self.subframe + self.dataframe = dataframe self.length = len(dataframe) @@ -132,10 +138,12 @@ def set_dataframe(self, dataframe): return title = tk.Label(self.subframe, text=f"Dataframe Preview {len(self.dataframe)} rows") - title.grid(row=0, column=0, columnspan=len(column_names), sticky=tk.NSEW, pady=5) + title.grid(row=0, column=0, columnspan=len(column_names) * 2 + 1, sticky=tk.NSEW, pady=5) ### XXX add in proper handling for MultiIndexes here + # Even-numbered columns are the data columns + self.labels = [] for num, (name, dtype) in enumerate(zip(column_names, column_dtypes)): if type(name) is tuple: @@ -143,13 +151,25 @@ def set_dataframe(self, dataframe): else: name = str(name) is_index = " (index)" if num < self.index_cols else "" - label = tk.Label(self.subframe, text=f"{name}\n{dtype}{is_index}") - label.grid(row=1, column=num, sticky=tk.EW) + label = tk.Label( + self.subframe, + text=f"{name}\n{dtype}{is_index}", + image=get_icon(self, "sort_un"), + compound=tk.RIGHT, + ) + label.grid(row=1, column=num * 2, sticky=tk.EW) label.bind("", partial(self._label_button_1, num)) - label.bind("", partial(self._label_b1_motion, num)) - self.subframe.columnconfigure(num, minsize=10, weight=1) + self.subframe.columnconfigure(num * 2, minsize=10, weight=1) self.labels.append(label) + # Between them are blank columns which provide a handle for adjusting the column + # widths left and right + + for num in range(0, len(column_names) - 1): + adjuster = tk.Frame(self.subframe, width=3, cursor="sb_h_double_arrow") + adjuster.grid(row=1, rowspan=2, column=num * 2 + 1, sticky=tk.NSEW) + adjuster.bind("", partial(self._column_adjust, num)) + if len(self.dataframe) == 0: label = tk.Label(self.subframe, text="no data") label.grid(row=2, column=0, columnspan=len(column_names), sticky=tk.NSEW) @@ -157,7 +177,7 @@ def set_dataframe(self, dataframe): self.columns = [tk.Text(self.subframe) for _ in column_names] for num, column in enumerate(self.columns): - column.grid(sticky=tk.NSEW, row=2, column=num) + column.grid(sticky=tk.NSEW, row=2, column=num * 2) column["wrap"] = tk.NONE column["xscrollcommand"] = partial(self._column_xscrollcommand, num) column["yscrollcommand"] = self._column_yscrollcommand @@ -170,9 +190,9 @@ def set_dataframe(self, dataframe): self.columns[0].bind("", self._column_configure) self.scrollbar = ttk.Scrollbar(self.subframe, orient=tk.VERTICAL) - self.scrollbar.grid(sticky=tk.NS, row=2, column=len(self.columns)) + self.scrollbar.grid(sticky=tk.NS, row=2, column=len(self.columns) * 2 - 1) self.scrollbar["command"] = self._scrollbar_command - self.refresh() + self.refresh(offset) def refresh(self, new_offset=0): # Refreshes the column widgets. @@ -233,33 +253,45 @@ def refresh(self, new_offset=0): if self.length: self.scrollbar.set(self.offset / self.length, (self.offset + self.height) / self.length) - def scrollto(self, new_offset): - self.offset = min(max(int(new_offset), 0), self.length - self.height) - self.refresh() + if self.callback: + self.callback(self.offset, self.sort_by_col, not self.sort_ascending) - def _label_button_1(self, num, event): - label_width = self.labels[num].winfo_width() - if 2 * label_width / 5 < event.x < 3 * label_width / 5: - self.sort_ascending = (num != self.sort_by_col) or not self.sort_ascending - self.sort_by_col = num - if num < self.index_cols: - self.dataframe = self.dataframe.sort_index(level=num, ascending=self.sort_ascending) - else: - self.dataframe = self.dataframe.sort_values( - self.dataframe.columns[num - self.index_cols], ascending=self.sort_ascending - ) - self.refresh() + def set_callback(self, callback) -> None: + self.callback = callback + + def set_sort_order(self, column_num: int, descending: Optional[bool] = None): + assert self.dataframe is not None - def _label_b1_motion(self, num, event): - # Detect label drags left and right. - # XXX still not quite right - label = self.labels[num] - label_width = label.winfo_width() + if descending is None and column_num == self.sort_by_col: + self.sort_ascending = not self.sort_ascending + else: + self.sort_by_col = column_num + self.sort_ascending = not descending + if column_num < self.index_cols: + self.dataframe = self.dataframe.sort_index(level=column_num, ascending=self.sort_ascending) + elif column_num < self.index_cols + len(self.dataframe.columns): + self.dataframe = self.dataframe.sort_values( + self.dataframe.columns[column_num - self.index_cols], ascending=self.sort_ascending + ) + + for n, label in enumerate(self.labels): + icon = "sort_un" if n != column_num else "sort_up" if self.sort_ascending else "sort_dn" + label.configure(image=get_icon(self, icon)) + + self.refresh() - if event.x > label_width: - self.subframe.columnconfigure(num, minsize=event.x) - elif event.x < 0: - self.subframe.columnconfigure(num, minsize=label_width + event.x) + def _label_button_1(self, num, event): + """Click on column labels to set sort order""" + self.set_sort_order(num) + if self.callback: + self.callback(self.offset, self.sort_by_col, not self.sort_ascending) + + def _column_adjust(self, num, event): + """Adjust column widths left and right by dragging the dummy columns""" + w0 = self.labels[num].winfo_width() + w1 = self.labels[num + 1].winfo_width() + self.subframe.columnconfigure(num * 2, minsize=w0 + event.x) + self.subframe.columnconfigure(num * 2 + 2, minsize=w1 - event.x) def _scrollbar_command(self, command, *parameters): # Detect scrollbar movement and move self.offset @@ -334,18 +366,11 @@ def _column_copy(self, _): if not self.select_rows: return # not multi-row, keep it. + # Dump TSV into a StringIO ... r1, r2 = self.select_rows df = self.dataframe.iloc[self.offset + r1 - 1 : self.offset + r2] buf = io.StringIO() df.to_csv(buf, sep="\t") - # XXX very cheesy, but self.clipboard_append() etc didn't - # seem to work, so this is a terrible workaround ... dump the - # TSV into a new tk.Text, select the whole thing and copy it - # into the clipboard. - top = tk.Toplevel() - text = tk.Text(top) - text.insert(tk.END, buf.getvalue()) - text.tag_add("sel", "1.0", tk.END) - text.event_generate("<>") - top.destroy() + # ... and then push that onto the clipboard + copy_to_clipboard(buf.getvalue()) diff --git a/countess/gui/tree.py b/countess/gui/tree.py index 5705f0e..6439786 100644 --- a/countess/gui/tree.py +++ b/countess/gui/tree.py @@ -9,10 +9,13 @@ import random import re import tkinter as tk +from configparser import ConfigParser from enum import Enum, IntFlag from functools import partial +from countess.core.config import read_config_dict, write_config_node_string from countess.core.pipeline import PipelineNode +from countess.gui.widgets import copy_to_clipboard, get_icon def _limit(value, min_value, max_value): @@ -254,8 +257,20 @@ class DraggableLabel(DraggableMixin, FixedUnbindMixin, tk.Label): pass -class DraggableMessage(DraggableMixin, FixedUnbindMixin, tk.Message): - pass +class NodeWrapper(DraggableLabel): + def update_node(self, node, vertical=False): + input_bar = node.plugin and node.plugin.num_inputs == 0 + output_bar = node.plugin and node.plugin.num_outputs == 0 + if not input_bar and not output_bar: + image = None + compound = tk.NONE + elif vertical: + image = get_icon(self, "hbar") + compound = tk.TOP if input_bar else tk.BOTTOM + else: + image = get_icon(self, "vbar") + compound = tk.LEFT if input_bar else tk.RIGHT + self.configure(text=node.name, image=image, compound=compound) class GraphWrapper: @@ -282,9 +297,10 @@ def __init__(self, canvas, graph, node_select_callback): self.canvas.bind("", self.on_canvas_leave) self.canvas.bind("", self.on_canvas_delete) self.canvas.bind("", self.on_canvas_delete) + self.canvas.bind("<>", self.on_paste) def label_for_node(self, node): - label = DraggableLabel(self.canvas, text=node.name, wraplength=125, cursor="hand1", takefocus=True) + label = NodeWrapper(self.canvas, wraplength=125, cursor="hand1", takefocus=True) if not node.position: node.position = (random.random() * 0.8 + 0.1, random.random() * 0.8 + 0.1) # XXX should be more elegant way of answering the question "are we flipped?" @@ -300,6 +316,8 @@ def label_for_node(self, node): label.bind("", partial(self.on_delete, node), add=True) label.bind("", partial(self.on_enter, node), add=True) label.bind("", partial(self.on_leave, node), add=True) + label.bind("<>", partial(self.on_copy, node), add=True) + label.bind("<>", partial(self.on_cut, node), add=True) return label @@ -330,19 +348,21 @@ def on_mousedown(self, node, event): def on_configure(self, node, label, event): """Stores the updated position of the label in node.position""" - xx = float(label.place_info()["relx"]) * self.canvas.winfo_width() - yy = float(label.place_info()["rely"]) * self.canvas.winfo_height() + height = self.canvas.winfo_height() + width = self.canvas.winfo_width() + + xx = float(label.place_info()["relx"]) * width + yy = float(label.place_info()["rely"]) * height node.position = self.new_node_position(xx, yy) + label.update_node(node, width < height) # Adapt label sizes to suit the window size, as best we can ... # XXX very arbitrary and definitely open to tweaking - height = self.canvas.winfo_height() - width = self.canvas.winfo_width() if height > width: - label_max_width = max(width // 9, 25) + label_max_width = max(width // 6, 25) label_font_size = int(math.sqrt(width) / 3) else: - label_max_width = max(width // 20, 16) + label_max_width = max(width // 12, 16) label_font_size = int(math.sqrt(width) / 5) label["wraplength"] = label_max_width label["font"] = ("TkDefaultFont", label_font_size) @@ -366,6 +386,33 @@ def on_leave(self, node, event): self.highlight_rectangle = None self.canvas.focus_set() + def on_copy(self, node, event): + # Copy the selected node's config to the clipboard + copy_to_clipboard(write_config_node_string(node)) + + def on_cut(self, node, event): + self.on_copy(node, event) + self.on_delete(node, event) + + def on_paste(self, event): + # Try and interpret whatever is in the clipboard as a config + # file section and create it as a node! This lets users + # cut and paste between CountESS instances! + cp = ConfigParser() + cp.read_string(self.canvas.clipboard_get()) + + for section_name in cp.sections(): + config_dict = cp[section_name] + node = read_config_dict(section_name, "", config_dict) + node.position = self.new_node_position(event.x, event.y) + self.add_node(node) + + # try and reconnect parents if they exist + nodes_by_name = {n.name: n for n in self.graph.nodes} + for key, val in config_dict.items(): + if key.startswith("_parent.") and val in nodes_by_name: + self.add_parent(nodes_by_name[val], node) + def on_canvas_motion(self, event): """Show a preview of line selection when the cursor is over line(s)""" items = self.canvas.find_overlapping(event.x - 10, event.y - 10, event.x + 10, event.y + 10) @@ -403,7 +450,7 @@ def on_canvas_button(self, event): # return position = self.new_node_position(event.x, event.y) - new_node = self.add_new_node(position) + new_node = self.add_new_node(position, select=False) for item in items: _, child_node, parent_node = self.lines_lookup[item] @@ -411,6 +458,9 @@ def on_canvas_button(self, event): self.add_parent(new_node, child_node) self.add_parent(parent_node, new_node) + self.highlight_node(new_node) + self.node_select_callback(new_node) + def on_canvas_delete(self, event): """Delete key on canvas: delete line(s).""" items = self.canvas.find_overlapping(event.x - 10, event.y - 10, event.x + 10, event.y + 10) @@ -445,7 +495,7 @@ def on_delete(self, node, event): event.widget.destroy() if len(self.graph.nodes) == 0: - self.add_new_node(select=True) + new_node = self.add_new_node(select=True) elif node == self.selected_node: # arbitrarily pick another node to show new_node = parent_nodes[0] if parent_nodes else child_nodes[0] if child_nodes else list(self.graph.nodes)[0] @@ -459,8 +509,7 @@ def find_node_at_position(self, x, y): return node return None - def add_new_node(self, position=(0.5, 0.5), select=True): - new_node = PipelineNode(name=f"NEW {len(self.graph.nodes)+1}", position=position) + def add_node(self, new_node, select: bool = True): self.graph.add_node(new_node) self.labels[new_node] = self.label_for_node(new_node) self.labels[new_node].update() @@ -468,6 +517,10 @@ def add_new_node(self, position=(0.5, 0.5), select=True): if select: self.highlight_node(new_node) self.node_select_callback(new_node) + + def add_new_node(self, position=(0.5, 0.5), select: bool = True): + new_node = PipelineNode(name="NEW", position=position) + self.add_node(new_node, select) return new_node def on_ghost_release(self, start_node, event): @@ -476,7 +529,7 @@ def on_ghost_release(self, start_node, event): other_node = self.find_node_at_position(event.x + xl, event.y + yl) if other_node is None: position = self.new_node_position(event.x + xl, event.y + yl) - other_node = self.add_new_node(position) + other_node = self.add_new_node(position, select=False) elif other_node == start_node: return elif start_node in other_node.parent_nodes: @@ -499,7 +552,11 @@ def on_ghost_release(self, start_node, event): self.node_select_callback(other_node) def add_parent(self, parent_node, child_node): - if parent_node not in child_node.parent_nodes: + if ( + (not parent_node.plugin or parent_node.plugin.num_outputs) + and (not child_node.plugin or child_node.plugin.num_inputs) + and parent_node not in child_node.parent_nodes + ): child_node.add_parent(parent_node) connecting_line = ConnectingLine(self.canvas, self.labels[parent_node], self.labels[child_node]) self.lines[child_node][parent_node] = connecting_line @@ -518,7 +575,13 @@ def del_parent(self, parent_node, child_node): def node_changed(self, node): """Called when something external updates the node's name, status or configuration.""" - self.labels[node]["text"] = node.name + flipped = self.canvas.winfo_width() >= self.canvas.winfo_height() + + if node.name.startswith("NEW"): + node.name = node.plugin.name + self.graph.reset_node_name(node) + + self.labels[node].update_node(node, not flipped) def destroy(self): for node_lines in self.lines.values(): diff --git a/countess/gui/widgets.py b/countess/gui/widgets.py index 61f3109..833f88f 100644 --- a/countess/gui/widgets.py +++ b/countess/gui/widgets.py @@ -1,28 +1,37 @@ +import platform import tkinter as tk from functools import cache from importlib.resources import as_file, files -from typing import Optional +from tkinter import filedialog +from typing import List, Optional, Sequence, Tuple, Union + +# To keep the cache of bitmaps smaller, we always associate the image with +# the toplevel, not the individual widget it appears on. @cache -def get_bitmap_image(parent: tk.Widget, name: str) -> tk.PhotoImage: +def get_icon_toplevel(toplevel: tk.Toplevel, name: str) -> tk.Image: source = files("countess.gui").joinpath("icons").joinpath(f"{name}.gif") with as_file(source) as filepath: - return tk.PhotoImage(master=parent, file=filepath) + return tk.PhotoImage(master=toplevel, file=filepath) + + +def get_icon(widget: tk.Widget, name: str) -> tk.Image: + return get_icon_toplevel(widget.winfo_toplevel(), name) def info_button(parent: tk.Widget, *args, **kwargs) -> tk.Button: - kwargs["image"] = get_bitmap_image(parent.winfo_toplevel(), "info") + kwargs["image"] = get_icon(parent, "info") return tk.Button(parent, *args, **kwargs) def add_button(parent: tk.Widget, *args, **kwargs) -> tk.Button: - kwargs["image"] = get_bitmap_image(parent.winfo_toplevel(), "add") + kwargs["image"] = get_icon(parent, "add") return tk.Button(parent, *args, **kwargs) def delete_button(parent: tk.Widget, *args, **kwargs) -> tk.Button: - kwargs["image"] = get_bitmap_image(parent.winfo_toplevel(), "del") + kwargs["image"] = get_icon(parent, "del") return tk.Button(parent, *args, **kwargs) @@ -36,10 +45,62 @@ def set_value(self, value: Optional[bool]): self["state"] = tk.DISABLED self["bd"] = 0 elif value: - self["image"] = get_bitmap_image(self.winfo_toplevel(), "check") + self["image"] = get_icon(self, "check") self["state"] = tk.NORMAL self["bd"] = 1 else: - self["image"] = get_bitmap_image(self.winfo_toplevel(), "uncheck") + self["image"] = get_icon(self, "uncheck") self["state"] = tk.NORMAL self["bd"] = 1 + + +def copy_to_clipboard(s: str): + # XXX very cheesy, but self.clipboard_append() etc didn't + # seem to work, so this is a terrible workaround ... dump the + # string into a new tk.Text, select the whole thing and copy it + # into the clipboard. + top = tk.Toplevel() + text = tk.Text(top) + text.insert(tk.END, s) + text.tag_add("sel", "1.0", tk.END) + text.event_generate("<>") + top.destroy() + + +def _clean_filetype_extension(ext: str): + # See https://tcl.tk/man/tcl8.6/TkCmd/getOpenFile.htm#M16 + # for the rules of file type extensions + + if ext != "*" and ext.startswith("*"): + ext = ext[1:] + + if platform.system() == "Darwin": + # Mac OSX crashes if given a double-dotted extension like ".csv.gz" #27 + try: + return ext[ext.rindex(".") :] + except ValueError: + return "." + ext + else: + return ext + + +def _clean_filetype_extensions(extensions: Union[str, List[str]]): + if type(extensions) is str: + extensions = extensions.split() + return [_clean_filetype_extension(ext) for ext in extensions] + + +def _clean_filetypes(file_types: Sequence[Tuple[str, Union[str, List[str]]]]): + return [(label, _clean_filetype_extensions(extensions)) for label, extensions in file_types] + + +def ask_saveas_filename(initial_file: str, file_types: Sequence[Tuple[str, Union[str, List[str]]]]): + return filedialog.asksaveasfilename(initialfile=initial_file, filetypes=_clean_filetypes(file_types)) + + +def ask_open_filenames(file_types: Sequence[Tuple[str, Union[str, List[str]]]]): + return filedialog.askopenfilenames(filetypes=_clean_filetypes(file_types)) + + +def ask_open_filename(file_types: Sequence[Tuple[str, Union[str, List[str]]]]): + return filedialog.askopenfilename(filetypes=_clean_filetypes(file_types)) diff --git a/countess/plugins/csv.py b/countess/plugins/csv.py index 8bdc98d..c32f8d1 100644 --- a/countess/plugins/csv.py +++ b/countess/plugins/csv.py @@ -1,7 +1,7 @@ import csv import gzip from io import BufferedWriter, BytesIO -from typing import Optional, Union +from typing import List, Optional, Sequence, Tuple, Union import pandas as pd @@ -16,9 +16,15 @@ MultiParam, StringParam, ) -from countess.core.plugins import PandasInputFilesPlugin, PandasProcessPlugin +from countess.core.plugins import PandasInputFilesPlugin, PandasOutputPlugin from countess.utils.pandas import flatten_columns +CSV_FILE_TYPES: Sequence[Tuple[str, Union[str, List[str]]]] = [ + ("CSV", [".csv", ".csv.gz"]), + ("TSV", [".tsv", ".tsv.gz"]), + ("TXT", [".txt", ".txt.gz"]), +] + class LoadCsvPlugin(PandasInputFilesPlugin): """Load CSV files""" @@ -27,8 +33,7 @@ class LoadCsvPlugin(PandasInputFilesPlugin): description = "Loads data from CSV or similar delimited text files and assigns types to columns" link = "https://countess-project.github.io/CountESS/included-plugins/#csv-reader" version = VERSION - - file_types = [("CSV", [".csv", ".gz"]), ("TSV", [".tsv", ".gz"]), ("TXT", ".txt")] + file_types = CSV_FILE_TYPES parameters = { "delimiter": ChoiceParam("Delimiter", ",", choices=[",", ";", "TAB", "|", "WHITESPACE"]), @@ -119,13 +124,12 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None): return df -class SaveCsvPlugin(PandasProcessPlugin): +class SaveCsvPlugin(PandasOutputPlugin): name = "CSV Save" description = "Save data as CSV or similar delimited text files" link = "https://countess-project.github.io/CountESS/included-plugins/#csv-writer" version = VERSION - - file_types = [("CSV", [".csv", ".gz"]), ("TSV", [".tsv", ".gz"]), ("TXT", ".txt")] + file_types = CSV_FILE_TYPES parameters = { "header": BooleanParam("CSV header row?", True), diff --git a/countess/plugins/fastq.py b/countess/plugins/fastq.py index f898a04..c9f3d27 100644 --- a/countess/plugins/fastq.py +++ b/countess/plugins/fastq.py @@ -25,7 +25,7 @@ class LoadFastqPlugin(PandasInputFilesPlugin): link = "https://countess-project.github.io/CountESS/included-plugins/#fastq-load" version = VERSION - file_types = [("FASTQ", ".fastq"), ("FASTQ (gzipped)", ".gz")] + file_types = [("FASTQ", [".fastq", ".fastq.gz"])] parameters = { "min_avg_quality": FloatParam("Minimum Average Quality", 10), diff --git a/countess/plugins/pivot.py b/countess/plugins/pivot.py index 6f36f41..d34fbae 100644 --- a/countess/plugins/pivot.py +++ b/countess/plugins/pivot.py @@ -19,8 +19,8 @@ class PivotPlugin(PandasProcessPlugin): """Groups a Pandas Dataframe by an arbitrary column and rolls up rows""" name = "Pivot Tool" - description = """Groups a dataframe and pivots column values into columns. - Expanded column values are duplicated for each combination of pivot values. + description = "Groups a dataframe and pivots column values into columns." + additional = """Expanded column values are duplicated for each combination of pivot values. Missing values default to 0, and duplicate values are summed.""" version = VERSION link = "https://countess-project.github.io/CountESS/included-plugins/#pivot-tool" @@ -42,6 +42,8 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger): assert isinstance(self.parameters["columns"], PerColumnArrayParam) self.input_columns.update(get_all_columns(data)) + data.reset_index(drop=data.index.names == [None], inplace=True) + column_parameters = list(zip(self.input_columns, self.parameters["columns"])) index_cols = [col for col, param in column_parameters if param.value == "Index"] pivot_cols = [col for col, param in column_parameters if param.value == "Pivot"] diff --git a/countess/utils/variant.py b/countess/utils/variant.py index 619e384..4cfa5cb 100644 --- a/countess/utils/variant.py +++ b/countess/utils/variant.py @@ -353,6 +353,9 @@ def find_variant_protein(ref_seq: str, var_seq: str, offset: Optional[int] = 0): >>> list(find_variant_protein("ATGGTTGGTTCA", "ATGGTTTAGACA")) ['Gly3Ter'] + >>> list(find_variant_protein("ATGGTTTGGTAG", "ATGGTTTAGTAG")) + ['Trp3Ter'] + Offset lets you set the frame offset (0, 1 or 2, practically) """ @@ -387,6 +390,11 @@ def _ref(ref_offset): if opcode.tag == "delete": assert dest_pro == "" + if ref_pro[src_end] == '*': + # if the codon just after this deletion is a terminator, + # consider this an early termination. + yield f"{_ref(src_start)}Ter" + return if len(src_pro) == 1: yield f"{_ref(src_start)}del" else: diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html index 5d577a2..8cba9b2 100644 --- a/docs/_layouts/default.html +++ b/docs/_layouts/default.html @@ -2,9 +2,9 @@ {{ page.title }} - - - + + +