Skip to content

Commit

Permalink
statistics visualize_annotation_duration : 引数--bin_width , `--tim…
Browse files Browse the repository at this point in the history
…e_unit`を追加 (#1190)

* 引数追加

* 可視化

* ラベルごとのヒストグラムの感さえい

* 属性のヒストグラムを修正

* タイトルを表示

* metadataの表示

* format

* 可視化

* udpate pylintrc

* update docs
  • Loading branch information
yuji38kwmt authored May 8, 2024
1 parent 8711957 commit 49072b5
Show file tree
Hide file tree
Showing 9 changed files with 524 additions and 136 deletions.
33 changes: 33 additions & 0 deletions annofabcli/common/bokeh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import annotations

import json
import math
from typing import Any, Optional

from bokeh.models import LayoutDOM
from bokeh.models.widgets.markups import PreText
from bokeh.plotting import figure


def create_pretext_from_metadata(metadata: dict[str, Any]) -> PreText:
text_lines = [f"{key} = {json.dumps(value)}" for key, value in metadata.items()]
text = "\n".join(text_lines)
return PreText(text=text)


def convert_1d_figure_list_to_2d(figure_list: list[figure], *, ncols: int = 4) -> list[list[Optional[LayoutDOM]]]:
"""
1次元のfigure_listを、grid layout用に2次元のfigureリストに変換する。
"""
row_list: list[list[Optional[LayoutDOM]]] = []

for i in range(math.ceil(len(figure_list) / ncols)):
start = i * ncols
end = (i + 1) * ncols
row: list[Optional[LayoutDOM]] = []
row.extend(figure_list[start:end])
if len(row) < ncols:
row.extend([None] * (ncols - len(row)))
row_list.append(row)

return row_list
68 changes: 68 additions & 0 deletions annofabcli/statistics/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@ def get_sub_title_from_series(ser: pandas.Series, decimals: int = 3) -> str:
return sub_title


def get_bin_edges(min_value: float, max_value: float, bin_width: float) -> numpy.ndarray:
"""
numpy.histogramのbins引数に渡す、ビンの境界値を取得します。
* min_value=0, max_value=3, bin_width=2の場合: [0, 2, 4]を返す。
* min_value=0, max_value=4, bin_width=2の場合: [0, 2, 4, 6]を返す。
Args:
min_value: ヒストグラムに表示するデータの最小値
max_value: ヒストグラムに表示するデータの最大値
bin_width: ヒストグラムに表示するビンの幅
"""
# stop引数に、`bin_width*2`を指定している理由:
# 引数が小数のときは`len(bin_edges)``期待通りにならないときがあるので、ビンの数を少し増やしている
# https://qiita.com/yuji38kwmt/items/ff00f3cb9083567d083f
bin_edges = numpy.arange(start=min_value, stop=max_value + bin_width * 2, step=bin_width)
return bin_edges


def get_histogram_figure(
ser: pandas.Series,
x_axis_label: str,
Expand Down Expand Up @@ -51,3 +70,52 @@ def get_histogram_figure(

fig.add_tools(hover)
return fig


def create_histogram_figure2(
hist: numpy.ndarray,
bin_edges: numpy.ndarray,
*,
x_axis_label: str,
y_axis_label: str,
title: Optional[str] = None,
sub_title: Optional[str] = None,
width: int = 400,
height: int = 300,
) -> figure:
"""
ヒストグラムのbokeh.figureを生成します。
Args:
hist: `numpy.histogram`の戻り値 tuple[0]
bin_edges: `numpy.histogram`の戻り値 tuple[1]
x_axis_label: X軸の名前
y_axis_label: Y軸の名前
title: グラフのタイトル
sub_title: グラフのサブタイトル
width: グラフの幅
height: グラフの高さ
"""
df_histogram = pandas.DataFrame({"frequency": hist, "left": bin_edges[:-1], "right": bin_edges[1:]})
df_histogram["interval"] = [f"{left:.1f} to {right:.1f}" for left, right in zip(df_histogram["left"], df_histogram["right"])]
df_histogram["width"] = [f"{(right-left):.1f}" for left, right in zip(df_histogram["left"], df_histogram["right"])]

source = ColumnDataSource(df_histogram)
fig = figure(
width=width,
height=height,
x_axis_label=x_axis_label,
y_axis_label=y_axis_label,
)

if sub_title is not None:
fig.add_layout(Title(text=sub_title, text_font_size="11px"), "above")
if title is not None:
fig.add_layout(Title(text=title), "above")

hover = HoverTool(tooltips=[("interval", "@interval"), ("width", "@width"), ("frequency", "@frequency")])

fig.quad(source=source, top="frequency", bottom=0, left="left", right="right", line_color="white")

fig.add_tools(hover)
return fig
Loading

0 comments on commit 49072b5

Please sign in to comment.