Skip to content

Commit

Permalink
Merge pull request #93 from maximz/dotplot-grid-improvement
Browse files Browse the repository at this point in the history
Fix dotplot bug
  • Loading branch information
maximz authored Oct 23, 2024
2 parents 3990836 + 7c7744f commit 14de2d5
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.7.4
current_version = 0.7.5
commit = False
tag = False

Expand Down
2 changes: 1 addition & 1 deletion genetools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = """Maxim Zaslavsky"""
__email__ = "maxim@maximz.com"
__version__ = "0.7.4"
__version__ = "0.7.5"

# We want genetools.[submodule].[function] to be accessible simply by importing genetools, without having to import genetools.[submodule]
from . import helpers, plots, stats, arrays
Expand Down
119 changes: 73 additions & 46 deletions genetools/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,19 +794,19 @@ def _common_dotplot(
size_vmax: Optional[float] = None,
size_vcenter: Optional[float] = None,
###
# Configure which entries will be displayed in size legend:
# n_legend_items_for_size: int = 5,
extend_size_legend_to_vmin_vmax: bool = False,
# Providing representative_sizes_for_legend will override n_legend_items_for_size and extend_size_legend_to_vmin_vmax
# Configure which entries will be displayed in size legend (note: this will override extend_size_legend_to_vmin_vmax)
representative_sizes_for_legend: Optional[List[float]] = None,
# If should_size_legend_items_be_colored is False, all size legend items will be uniform black
# If should_size_legend_items_be_colored is False, all size legend items will be uniform grey:
should_size_legend_items_be_colored: bool = False,
###
figsize: Optional[Tuple[float, float]] = None,
inverse_size: bool = False,
min_marker_size: int = 1,
marker_size_scale_factor: int = 100,
grid: bool = True,
# Control grid transparency:
grid_alpha: float = 0.3,
) -> Tuple[
matplotlib.figure.Figure,
matplotlib.axes.Axes,
Expand Down Expand Up @@ -839,63 +839,73 @@ def _generate_size_norm_input_data(
if size_vcenter is not None:
# Support a vcenter for size, similar to color_vcenter, to allow divergent size palettes so negative values can have big sizes.
# Example: size_vcenter=0 would make 0 have small size while 1 and -1 have large size.

# Just to reiterate how this affects sizes in diverging data: Values on both sides of the center are mapped to large sizes.
# (Using size_vcenter will make values equally distant from the center appear the same size. So if we have data like -5, -3, 0, 3, 5, with size_vcenter=0, the size of -3 and 3 will be the same, as will the size of -5 and 5.)

# Calculate the absolute difference from vcenter
abs_diff_from_vcenter = np.abs(clipped_size_data - size_vcenter)
clipped_size_data = abs_diff_from_vcenter

# Note we have to reshape/ravel the data because this scaler expects a 2d array
# Note we have to reshape/ravel the data because the MinMaxScaler expects a 2d array
return clipped_size_data.reshape(-1, 1)

# Fit the scaler to the clipped data
# Due to clip=True, we do not need to repeat manual np.clip before calling size_scaller.transform.
# Note we have to reshape/ravel the data because this scaler expects a 2d array
size_scaler = MinMaxScaler(clip=True).fit(
_generate_size_norm_input_data(data[size_key])
)

def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
"""convert raw size data to plot marker size"""
# Note we have to reshape/ravel the data because this scaler expects a 2d array
transformed = size_scaler.transform(
_generate_size_norm_input_data(x)
).ravel()
if inverse_size:
transformed = 1 - transformed
size_data = data[size_key]
if size_data.nunique() < 2:
# Handle zero variance case (i.e. all values are identical)
def size_norm(x):
# Return constant size
return marker_size_scale_factor
else:
# Fit the scaler to the clipped data
# Due to clip=True, we do not need to repeat manual np.clip before calling size_scaller.transform.
# Note we have to reshape/ravel the data in _generate_size_norm_input_data because MinMaxScaler expects a 2d array
size_scaler = MinMaxScaler(clip=True).fit(
_generate_size_norm_input_data(size_data)
)

return min_marker_size + marker_size_scale_factor * transformed
def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
"""convert raw size data to plot marker size"""
# Note we have to reshape/ravel the data in _generate_size_norm_input_data because MinMaxScaler expects a 2d array
transformed = size_scaler.transform(
_generate_size_norm_input_data(x)
).ravel()
if inverse_size:
transformed = 1 - transformed
return min_marker_size + marker_size_scale_factor * transformed

fig, ax = plt.subplots(figsize=figsize)

# Set up color normalization
if color_vcenter is not None:
# Create norm centered at color_vcenter.

# We have to also handle color_vmin and color_vmax if they are not None.
# They can't be passed alongside a norm to ax.scatter: "ValueError: Passing a Normalize instance simultaneously with vmin/vmax is not supported. Please pass vmin/vmax directly to the norm when creating it."
plot_vmin, plot_vmax = None, None
# They can't be passed alongside a norm to ax.scatter: "ValueError: Passing a Normalize instance simultaneously with vmin/vmax is not supported. Please pass vmin/vmax directly to the norm when creating it."

# Previously we used: norm = matplotlib.colors.CenteredNorm(vcenter=color_vcenter)
# But CenteredNorm does not accept vmin, vmax in its constructor, so we have to switch to TwoSlopeNorm
color_norm = matplotlib.colors.TwoSlopeNorm(
vcenter=color_vcenter, vmin=color_vmin, vmax=color_vmax
)
else:
plot_vmin, plot_vmax = color_vmin, color_vmax
# Create a norm even when vcenter is None
color_norm = matplotlib.colors.Normalize(vmin=plot_vmin, vmax=plot_vmax)
color_norm = matplotlib.colors.Normalize(vmin=color_vmin, vmax=color_vmax)

scatter = ax.scatter(
data[x_axis_key].values,
data[y_axis_key].values,
c=data[color_key].values,
s=size_norm(data[size_key]),
cmap=color_cmap,
norm=color_norm,
vmin=plot_vmin,
vmax=plot_vmax,
norm=color_norm, # includes the vmin, vmax, vcenter
alpha=1,
# Add outlines to the scatter points:
edgecolors="lightgrey",
linewidths=0.5,
zorder=3, # Ensure points are drawn above grid
)

ax.set_xlim(-0.5, max(ax.get_xticks()) + 0.5)
ax.set_ylim(-0.5, max(ax.get_yticks()) + 0.5)
ax.invert_yaxis() # respect initial ordering - go top to bottom
Expand All @@ -904,11 +914,17 @@ def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
if grid:
ax.set_xticks(np.array(ax.get_xticks()) - 0.5, minor=True)
ax.set_yticks(np.array(ax.get_yticks()) - 0.5, minor=True)
ax.grid(which="minor")
ax.grid(
which="minor",
alpha=grid_alpha,
# Put grid behind points
zorder=1,
)

# Aspect ratio
ax.set_aspect("equal", "box")

# Handle tick labels
# At this point, ax.get_xticklabels() may return empty tick labels and emit UserWarning: FixedFormatter should only be used together with FixedLocator
# Must draw the canvas to position the ticks: https://stackoverflow.com/a/41124884/130164
# And must assign tick locations prior to assigning tick labels, i.e. set_ticks(get_ticks()): https://stackoverflow.com/a/68794383/130164
Expand All @@ -917,18 +933,17 @@ def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
ax.set_xticklabels(ax.get_xticklabels(), rotation="vertical")

## Produce size legend entries containing a cross section of values.

# User may have passed representative_sizes_for_legend.
# Generate legend entries
if representative_sizes_for_legend is not None:
# User may have passed representative_sizes_for_legend.
representative_sizes_for_legend = np.array(representative_sizes_for_legend)
else:
# Generate evenly spaced values

# First, find the extents:
data_min_size_clipped, data_max_size_clipped = (
np.min(data[size_key]),
np.max(data[size_key]),
)
data_min_size_clipped = np.min(size_data)
data_max_size_clipped = np.max(size_data)

if size_vmin is not None or size_vmax is not None:
data_min_size_clipped = np.clip(
data_min_size_clipped, size_vmin, size_vmax
Expand All @@ -944,7 +959,7 @@ def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
if size_vmax is not None:
data_max_size_clipped = size_vmax

# Generate values:
# Include center point in legend if specified:
if (
size_vcenter is not None
and data_min_size_clipped <= size_vcenter <= data_max_size_clipped
Expand All @@ -955,32 +970,37 @@ def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
else:
add_extra = np.array([])

# Generate values:
# n_legend_items_for_size = 5
# representative_sizes_for_legend = np.linspace(data_min_size_clipped, data_max_size_clipped, n_legend_items_for_size)
# Instead of linspace, use a Locator as in legend_elements(): https://github.com/matplotlib/matplotlib/blob/eb02b108ea181930ab37717c75e07ba792e01f1d/lib/matplotlib/collections.py#L1143-L1152
# num = n_legend_items_for_size - len(add_extra)
# locator = matplotlib.ticker.MaxNLocator(nbins=num-1, min_n_ticks=num, steps=[1, 2, 2.5, 3, 5, 6, 8, 10])
# Actually, use AutoLocator for nice round numbers:
locator = matplotlib.ticker.AutoLocator()
representative_sizes_for_legend: np.ndarray = np.hstack(
[
locator.tick_values(data_min_size_clipped, data_max_size_clipped),
add_extra,
]
)

# Clip:
representative_sizes_for_legend = np.clip(
representative_sizes_for_legend,
data_min_size_clipped,
data_max_size_clipped,
)
representative_sizes_for_legend = np.unique(
representative_sizes_for_legend
) # remove any duplicates

# Remove any duplicates:
representative_sizes_for_legend = np.unique(representative_sizes_for_legend)

representative_sizes_for_legend.sort()

# Calculate corresponding plot sizes
# Calculate corresponding sizes for legend
representative_sizes_after_norm = size_norm(representative_sizes_for_legend)

# Calculate corresponding plot colors
# Calculate corresponding colors for legend
if should_size_legend_items_be_colored:
representative_colors = representative_sizes_for_legend
if color_vmin is not None or color_vmax is not None:
Expand Down Expand Up @@ -1018,9 +1038,11 @@ def size_norm(x: Union[np.ndarray, pd.Series, List[float]]) -> np.ndarray:
representative_sizes_after_norm, representative_colors
)
]
# Create legend labels with %g formatting to remove any trailing zeros when converting a float to string

# Format legend labels
# Create legend labels with %g formatting to remove any trailing zeros when converting a float to string:
# labels = [f"{value:g}" for value in representative_sizes_for_legend]
# Use this instead to set a maximum precision and avoid scientific notation:
# Actually, use this instead to set a maximum precision and avoid scientific notation:
labels = [
np.format_float_positional(val, trim="-", unique=True, precision=4)
for val in representative_sizes_for_legend
Expand All @@ -1046,12 +1068,14 @@ def plot_color_and_size_dotplot(
color_and_size_vcenter: Optional[float] = None,
figsize: Optional[Tuple[float, float]] = None,
legend_text: Optional[str] = None,
# n_legend_items: int = 5,
extend_legend_to_vmin_vmax: bool = False,
# Configure which entries will be displayed in legend (note: this will override extend_legend_to_vmin_vmax)
representative_values_for_legend: Optional[List[float]] = None,
min_marker_size: int = 1,
marker_size_scale_factor: int = 100,
grid: bool = True,
# Control grid transparency:
grid_alpha: float = 0.3,
) -> Tuple[matplotlib.figure.Figure, matplotlib.axes.Axes]:
"""
Plot dotplot heatmap showing a key as both color and size.
Expand All @@ -1071,7 +1095,6 @@ def plot_color_and_size_dotplot(
size_vmin=color_and_size_vmin,
size_vmax=color_and_size_vmax,
size_vcenter=color_and_size_vcenter,
# n_legend_items_for_size=n_legend_items,
extend_size_legend_to_vmin_vmax=extend_legend_to_vmin_vmax,
representative_sizes_for_legend=representative_values_for_legend,
# Size legend will represent both size_key and color_key:
Expand All @@ -1081,6 +1104,7 @@ def plot_color_and_size_dotplot(
min_marker_size=min_marker_size,
marker_size_scale_factor=marker_size_scale_factor,
grid=grid,
grid_alpha=grid_alpha,
)

def add_padding_to_multiline_string(text: str, padding: int):
Expand Down Expand Up @@ -1142,8 +1166,8 @@ def plot_two_key_color_and_size_dotplot(
size_vmin: Optional[float] = None,
size_vmax: Optional[float] = None,
size_vcenter: Optional[float] = None,
# n_legend_items_for_size: int = 5,
extend_size_legend_to_vmin_vmax: bool = False,
# Configure which entries will be displayed in size legend (note: this will override extend_size_legend_to_vmin_vmax)
representative_sizes_for_legend: Optional[List[float]] = None,
inverse_size: bool = False,
color_legend_text: Optional[str] = None,
Expand All @@ -1152,6 +1176,8 @@ def plot_two_key_color_and_size_dotplot(
min_marker_size: int = 1,
marker_size_scale_factor: int = 100,
grid: bool = True,
# Control grid transparency:
grid_alpha: float = 0.3,
) -> Tuple[matplotlib.figure.Figure, matplotlib.axes.Axes]:
"""
Plot dotplot heatmap showing two keys together.
Expand Down Expand Up @@ -1189,6 +1215,7 @@ def plot_two_key_color_and_size_dotplot(
min_marker_size=min_marker_size,
marker_size_scale_factor=marker_size_scale_factor,
grid=grid,
grid_alpha=grid_alpha,
)

# Make colorbar axes:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
test_suite="tests",
tests_require=test_requirements,
url="https://github.com/maximz/genetools",
version="0.7.4",
version="0.7.5",
zip_safe=False,
extras_require={
# This will also install anndata and UMAP packages
Expand Down
Binary file modified tests/baseline/test_dotplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified tests/baseline/test_dotplot_single_key.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
54 changes: 50 additions & 4 deletions tests/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,8 @@ def test_dotplot_single_key(dotplot_data):
return fig


@snapshot_image
def test_dotplot_confirm_legend_colors_match_dot_colors():
# Regression test to confirm that the colors in the legend match the colors of the dots, for data where the max value is a small positive float.
@pytest.fixture
def dotplot_data2():
df = pd.DataFrame(
[
{"feature": "feature1", "value": 0.12},
Expand All @@ -740,15 +739,62 @@ def test_dotplot_confirm_legend_colors_match_dot_colors():
{"feature": "feature8", "value": 0.0},
]
).assign(classname="class1")
return df


@snapshot_image
def test_dotplot_confirm_legend_colors_match_dot_colors(dotplot_data2):
# Regression test to confirm that the colors in the legend match the colors of the dots, for data where the max value is a small positive float.
with sns.plotting_context("paper"), sns.axes_style("white"):
fig, _ = genetools.plots.plot_color_and_size_dotplot(
data=dotplot_data2,
x_axis_key="classname",
y_axis_key="feature",
value_key="value",
color_cmap=sns.color_palette("magma_r", as_cmap=True),
figsize=(5, 6),
grid=False,
)
return fig


@snapshot_image
def test_dotplot_confirm_vmin_vmax_supported(dotplot_data2):
# Regression test to confirm that the colors in the legend match the colors of the dots, for data where the max value is a small positive float.
with sns.plotting_context("paper"), sns.axes_style("white"):
fig, _ = genetools.plots.plot_color_and_size_dotplot(
data=dotplot_data2,
x_axis_key="classname",
y_axis_key="feature",
value_key="value",
color_cmap=sns.color_palette("magma_r", as_cmap=True),
figsize=(5, 6),
grid=False,
# Testing these parameters:
color_and_size_vmin=0,
color_and_size_vmax=1,
)
return fig


@snapshot_image
def test_dotplot_confirm_vmin_vmax_supported_with_extend_legend_to_vmin_vmax(
dotplot_data2,
):
# Regression test to confirm that the colors in the legend match the colors of the dots, for data where the max value is a small positive float.
with sns.plotting_context("paper"), sns.axes_style("white"):
fig, _ = genetools.plots.plot_color_and_size_dotplot(
data=df,
data=dotplot_data2,
x_axis_key="classname",
y_axis_key="feature",
value_key="value",
color_cmap=sns.color_palette("magma_r", as_cmap=True),
figsize=(5, 6),
grid=False,
# Testing these parameters:
color_and_size_vmin=0,
color_and_size_vmax=1,
extend_legend_to_vmin_vmax=True,
)
return fig

Expand Down

0 comments on commit 14de2d5

Please sign in to comment.