From aa545e08bb63296391270842a4945a62f7975581 Mon Sep 17 00:00:00 2001 From: yuji38kwmt Date: Fri, 8 Nov 2024 15:27:38 +0900 Subject: [PATCH] =?UTF-8?q?`statistics=20visualize`=20:=20`--custom=5Fprod?= =?UTF-8?q?uction=5Fvolume`=E3=81=AB=E9=96=A2=E3=81=99=E3=82=8B=E8=AA=AC?= =?UTF-8?q?=E6=98=8E=E3=82=92=E8=BF=BD=E5=8A=A0=20(#1295)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ドキュメントの生成 * `summarize_whole_performance_csv`の修正 * 生産量を追加 * format * format * format * QualityIndicatorの修正 * format --- .../mask_visualization_dir.py | 6 +- .../merge_visualization_dir.py | 7 +- .../summarize_whole_performance_csv.py | 36 ++++- annofabcli/stat_visualization/write_graph.py | 7 +- .../write_performance_rating_csv.py | 132 +++++++++++++----- .../dataframe/project_performance.py | 11 +- annofabcli/statistics/visualize_statistics.py | 25 +++- .../statistics/visualize.rst | 71 +++++++++- .../test_write_performance_rating_csv.py | 22 +-- 9 files changed, 251 insertions(+), 66 deletions(-) diff --git a/annofabcli/stat_visualization/mask_visualization_dir.py b/annofabcli/stat_visualization/mask_visualization_dir.py index 806ef2af7..87d3105cf 100755 --- a/annofabcli/stat_visualization/mask_visualization_dir.py +++ b/annofabcli/stat_visualization/mask_visualization_dir.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import json import logging from dataclasses import dataclass from pathlib import Path @@ -266,11 +267,14 @@ def parse_args(parser: argparse.ArgumentParser) -> None: action="store_true", help="必要最小限のファイルを出力します。", ) + custom_production_volume_sample = { + "column_list": [{"value": "video_duration_minute", "name": "動画長さ"}], + } parser.add_argument( "--custom_production_volume", type=str, - help=("プロジェクト独自の生産量の指標をJSON形式で指定します。"), + help=("プロジェクト独自の生産量をJSON形式で指定します。" f"(例) ``{json.dumps(custom_production_volume_sample, ensure_ascii=False)}`` \n"), ) parser.add_argument("-o", "--output_dir", type=Path, required=True, help="出力先ディレクトリ。") diff --git a/annofabcli/stat_visualization/merge_visualization_dir.py b/annofabcli/stat_visualization/merge_visualization_dir.py index 8810c197a..cd5b4add4 100755 --- a/annofabcli/stat_visualization/merge_visualization_dir.py +++ b/annofabcli/stat_visualization/merge_visualization_dir.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import json import logging import sys from pathlib import Path @@ -288,10 +289,14 @@ def parse_args(parser: argparse.ArgumentParser) -> None: help="必要最小限のファイルを出力します。", ) + custom_production_volume_sample = { + "column_list": [{"value": "video_duration_minute", "name": "動画長さ"}], + } + parser.add_argument( "--custom_production_volume", type=str, - help=("プロジェクト独自の生産量の指標をJSON形式で指定します。"), + help=("プロジェクト独自の生産量をJSON形式で指定します。" f"(例) ``{json.dumps(custom_production_volume_sample, ensure_ascii=False)}`` \n"), ) parser.set_defaults(subcommand_func=main) diff --git a/annofabcli/stat_visualization/summarize_whole_performance_csv.py b/annofabcli/stat_visualization/summarize_whole_performance_csv.py index b8ca2222e..a7926eefc 100644 --- a/annofabcli/stat_visualization/summarize_whole_performance_csv.py +++ b/annofabcli/stat_visualization/summarize_whole_performance_csv.py @@ -1,20 +1,42 @@ +from __future__ import annotations + import argparse +import json import logging from pathlib import Path from typing import Optional import annofabcli +from annofabcli.common.cli import ( + get_json_from_args, +) from annofabcli.statistics.visualization.dataframe.project_performance import ProjectPerformance +from annofabcli.statistics.visualization.model import ProductionVolumeColumn from annofabcli.statistics.visualization.project_dir import ProjectDir logger = logging.getLogger(__name__) +def create_custom_production_volume_list(cli_value: str) -> list[ProductionVolumeColumn]: + """ + コマンドラインから渡された文字列を元に、独自の生産量を表す列情報を生成します。 + """ + dict_data = get_json_from_args(cli_value) + + column_list = dict_data["column_list"] + custom_production_volume_list = [ProductionVolumeColumn(column["value"], column["name"]) for column in column_list] + + return custom_production_volume_list + + def main(args: argparse.Namespace) -> None: root_dir: Path = args.dir project_dir_list = [ProjectDir(elm) for elm in root_dir.iterdir() if elm.is_dir()] - project_performance = ProjectPerformance.from_project_dirs(project_dir_list) + custom_production_volume_list = ( + create_custom_production_volume_list(args.custom_production_volume) if args.custom_production_volume is not None else None + ) + project_performance = ProjectPerformance.from_project_dirs(project_dir_list, custom_production_volume_list=custom_production_volume_list) project_performance.to_csv(args.output) @@ -28,6 +50,18 @@ def parse_args(parser: argparse.ArgumentParser) -> None: parser.add_argument("-o", "--output", type=Path, required=True, help="出力先のファイルパスを指定します。") + custom_production_volume_sample = { + "column_list": [{"value": "video_duration_minute", "name": "動画長さ"}], + } + + parser.add_argument( + "--custom_production_volume", + type=str, + help=("プロジェクト独自の生産量をJSON形式で指定します。" f"(例) ``{json.dumps(custom_production_volume_sample, ensure_ascii=False)}`` \n"), + ) + + parser.set_defaults(subcommand_func=main) + parser.set_defaults(subcommand_func=main) diff --git a/annofabcli/stat_visualization/write_graph.py b/annofabcli/stat_visualization/write_graph.py index 910380af7..89192b7fb 100755 --- a/annofabcli/stat_visualization/write_graph.py +++ b/annofabcli/stat_visualization/write_graph.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import json import logging from pathlib import Path from typing import List, Optional @@ -163,10 +164,14 @@ def parse_args(parser: argparse.ArgumentParser) -> None: help="必要最小限のファイルを出力します。", ) + custom_production_volume_sample = { + "column_list": [{"value": "video_duration_minute", "name": "動画長さ"}], + } + parser.add_argument( "--custom_production_volume", type=str, - help=("プロジェクト独自の生産量の指標をJSON形式で指定します。"), + help=("プロジェクト独自の生産量をJSON形式で指定します。" f"(例) ``{json.dumps(custom_production_volume_sample, ensure_ascii=False)}`` \n"), ) parser.add_argument("-o", "--output_dir", type=Path, required=True, help="出力先ディレクトリ。配下にプロジェクトディレクトリが生成されます。") diff --git a/annofabcli/stat_visualization/write_performance_rating_csv.py b/annofabcli/stat_visualization/write_performance_rating_csv.py index e94b6b7c6..1696364ca 100755 --- a/annofabcli/stat_visualization/write_performance_rating_csv.py +++ b/annofabcli/stat_visualization/write_performance_rating_csv.py @@ -21,7 +21,7 @@ ProjectPerformance, ProjectWorktimePerMonth, ) -from annofabcli.statistics.visualization.model import WorktimeColumn +from annofabcli.statistics.visualization.model import ProductionVolumeColumn, WorktimeColumn from annofabcli.statistics.visualization.project_dir import ProjectDir logger = logging.getLogger(__name__) @@ -67,33 +67,50 @@ class WorktimeType(Enum): """計測作業時間""" -class ProductivityIndicator(Enum): +@dataclass(frozen=True) +class ProductivityIndicator: """ 生産性の指標 """ - MONITORED_WORKTIME_HOUR_PER_INPUT_DATA_COUNT = "monitored_worktime_hour/input_data_count" - ACTUAL_WORKTIME_HOUR_PER_INPUT_DATA_COUNT = "actual_worktime_hour/input_data_count" - MONITORED_WORKTIME_HOUR_PER_ANNOTATION_COUNT = "monitored_worktime_hour/annotation_count" - ACTUAL_WORKTIME_HOUR_PER_ANNOTATION_COUNT = "actual_worktime_hour/annotation_count" + column: str @property def worktime_type(self) -> WorktimeType: """ 作業時間の種類 """ - denominator = self.value.split("/")[0] + denominator = self.column.split("/")[0] return WorktimeType(denominator) + @property + def production_volume(self) -> str: + """ + 生産量(`annotation_count`など)を表す文字列 + """ + return self.column.split("/")[1] + -class QualityIndicator(Enum): +@dataclass(frozen=True) +class QualityIndicator: """ 品質の指標 """ - POINTED_OUT_INSPECTION_COMMENT_COUNT_PER_ANNOTATION_COUNT = "pointed_out_inspection_comment_count/annotation_count" - POINTED_OUT_INSPECTION_COMMENT_COUNT_PER_INPUT_DATA_COUNT = "pointed_out_inspection_comment_count/input_data_count" - REJECTED_COUNT_PER_TASK_COUNT = "rejected_count/task_count" + column: str + + def quality_penalty(self) -> str: + """ + 品質の悪さ(`pointed_out_inspection_comment_count`など)を表す文字列 + """ + return self.column.split("/")[0] + + @property + def production_volume(self) -> str: + """ + 生産量(`annotation_count`など)を表す文字列 + """ + return self.column.split("/")[1] class ProductivityType(Enum): @@ -145,15 +162,19 @@ class CollectingPerformanceInfo: def __init__( self, *, - productivity_indicator: ProductivityIndicator = ProductivityIndicator.ACTUAL_WORKTIME_HOUR_PER_ANNOTATION_COUNT, - quality_indicator: QualityIndicator = QualityIndicator.POINTED_OUT_INSPECTION_COMMENT_COUNT_PER_ANNOTATION_COUNT, + productivity_indicator: Optional[ProductivityIndicator] = None, + quality_indicator: Optional[QualityIndicator] = None, threshold_info: Optional[ThresholdInfo] = None, productivity_indicator_by_directory: Optional[ProductivityIndicatorByDirectory] = None, quality_indicator_by_directory: Optional[QualityIndicatorByDirectory] = None, threshold_infos_by_directory: Optional[ThresholdInfoSettings] = None, ) -> None: - self.quality_indicator = quality_indicator - self.productivity_indicator = productivity_indicator + self.quality_indicator = ( + quality_indicator if quality_indicator is not None else QualityIndicator("pointed_out_inspection_comment_count/annotation_count") + ) + self.productivity_indicator = ( + productivity_indicator if productivity_indicator is not None else ProductivityIndicator("actual_worktime_hour/annotation_count") + ) self.threshold_info = threshold_info if threshold_info is not None else ThresholdInfo() self.threshold_infos_by_directory = threshold_infos_by_directory if threshold_infos_by_directory is not None else {} self.productivity_indicator_by_directory = productivity_indicator_by_directory if productivity_indicator_by_directory is not None else {} @@ -207,8 +228,13 @@ def join_annotation_productivity(self, df: pandas.DataFrame, df_performance: pan df_joined = self.filter_df_with_threshold(df_joined, phase, project_title=project_title) productivity_indicator = self.productivity_indicator_by_directory.get(project_title, self.productivity_indicator) - df_tmp = df_joined[[(productivity_indicator.value, phase.value)]] - df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{productivity_indicator.value}__{phase.value}")]) + column = (productivity_indicator.column, phase.value) + if column in df_joined.columns: + df_tmp = df_joined[[column]] + else: + logger.warning(f"'{project_title}'に生産性の指標である'{column}'の列が存在しませんでした。") + df_tmp = pandas.DataFrame(index=df_joined.index, columns=[column]) + df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{productivity_indicator.column}__{phase.value}")]) return df.join(df_tmp) def join_inspection_acceptance_productivity(self, df: pandas.DataFrame, df_performance: pandas.DataFrame, project_title: str) -> pandas.DataFrame: @@ -223,29 +249,29 @@ def join_inspection_acceptance_productivity(self, df: pandas.DataFrame, df_perfo productivity_indicator = self.productivity_indicator_by_directory.get(project_title, self.productivity_indicator) - def _join_inspection(): # noqa: ANN202 + def _join_inspection() -> pandas.DataFrame: phase = TaskPhase.INSPECTION - if (self.productivity_indicator.value, phase.value) not in df_performance.columns: + if (self.productivity_indicator.column, phase.value) not in df_performance.columns: return df df_joined = df_performance df_joined = self.filter_df_with_threshold(df_joined, phase, project_title=project_title) - df_tmp = df_joined[[(productivity_indicator.value, phase.value)]] - df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{productivity_indicator.value}__{phase.value}")]) + df_tmp = df_joined[[(productivity_indicator.column, phase.value)]] + df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{productivity_indicator.column}__{phase.value}")]) return df.join(df_tmp) - def _join_acceptance(): # noqa: ANN202 + def _join_acceptance() -> pandas.DataFrame: phase = TaskPhase.ACCEPTANCE - if (productivity_indicator.value, phase.value) not in df_performance.columns: + if (productivity_indicator.column, phase.value) not in df_performance.columns: return df df_joined = df_performance df_joined = self.filter_df_with_threshold(df_joined, phase, project_title=project_title) - df_tmp = df_joined[[(productivity_indicator.value, phase.value)]] - df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{productivity_indicator.value}__{phase.value}")]) + df_tmp = df_joined[[(productivity_indicator.column, phase.value)]] + df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{productivity_indicator.column}__{phase.value}")]) return df.join(df_tmp) @@ -270,15 +296,21 @@ def join_annotation_quality(self, df: pandas.DataFrame, df_performance: pandas.D quality_indicator = self.quality_indicator_by_directory.get(project_title, self.quality_indicator) - df_tmp = df_joined[[(quality_indicator.value, phase.value)]] + column = (quality_indicator.column, phase.value) + if column in df_joined.columns: + df_tmp = df_joined[[column]] + else: + logger.warning(f"'{project_title}'に品質の指標である'{column}'の列が存在しませんでした。") + df_tmp = pandas.DataFrame(index=df_joined.index, columns=[column]) - df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{quality_indicator.value}__{phase.value}")]) + df_tmp.columns = pandas.MultiIndex.from_tuples([(project_title, f"{quality_indicator.column}__{phase.value}")]) return df.join(df_tmp) def create_rating_df( self, df_user: pandas.DataFrame, target_dir: Path, + custom_production_volume_list_by_directory: Optional[dict[str, list[ProductionVolumeColumn]]], ) -> ResultDataframe: """対象ディレクトリから、評価対象の指標になる情報を取得します。""" df_annotation_productivity = df_user @@ -290,8 +322,11 @@ def create_rating_df( if not p_project_dir.is_dir(): continue + custom_production_volume_list = ( + custom_production_volume_list_by_directory.get(p_project_dir.name) if custom_production_volume_list_by_directory is not None else None + ) project_title = p_project_dir.name - project_dir = ProjectDir(p_project_dir) + project_dir = ProjectDir(p_project_dir, custom_production_volume_list=custom_production_volume_list) project_dir_list.append(project_dir) try: @@ -456,12 +491,23 @@ def create_user_df(target_dir: Path) -> pandas.DataFrame: return df_user.sort_values("user_id").set_index("user_id") +def create_custom_production_volume_by_directory(cli_value: str) -> dict[str, list[ProductionVolumeColumn]]: + """ + コマンドラインから渡された文字列を元に、独自の生産量を表す列情報を生成します。 + + Returns: + keyはディレクトリ名, valueは独自の生産量の列名 + """ + dict_data = get_json_from_args(cli_value) + return {dirname: [ProductionVolumeColumn(col, col) for col in column_list] for dirname, column_list in dict_data.items()} + + class WritingCsv: def __init__(self, threshold_deviation_user_count: Optional[int] = None, user_ids: Optional[Collection[str]] = None) -> None: self.threshold_deviation_user_count = threshold_deviation_user_count self.user_ids = user_ids - def write(self, df: pandas.DataFrame, csv_basename: str, output_dir: Path): # noqa: ANN201 + def write(self, df: pandas.DataFrame, csv_basename: str, output_dir: Path) -> None: print_csv(df, str(output_dir / f"{csv_basename}__original.csv")) # 偏差値のCSVを出力 @@ -537,7 +583,11 @@ def main(self) -> None: target_dir: Path = args.dir user_id_list = get_list_from_args(args.user_id) if args.user_id is not None else None df_user = create_user_df(target_dir) - + custom_production_volume_by_directory = ( + create_custom_production_volume_by_directory(args.custom_production_volume_by_directory) + if args.custom_production_volume_by_directory is not None + else None + ) result = CollectingPerformanceInfo( productivity_indicator=ProductivityIndicator(args.productivity_indicator), productivity_indicator_by_directory=create_productivity_indicator_by_directory(args.productivity_indicator_by_directory), @@ -548,10 +598,7 @@ def main(self) -> None: threshold_task_count=args.threshold_task_count, ), threshold_infos_by_directory=create_threshold_infos_per_project(args.threshold_settings), - ).create_rating_df( - df_user, - target_dir, - ) + ).create_rating_df(df_user, target_dir, custom_production_volume_by_directory) output_dir: Path = args.output_dir @@ -600,8 +647,7 @@ def parse_args(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--productivity_indicator", type=str, - choices=[e.value for e in ProductivityIndicator], - default=ProductivityIndicator.ACTUAL_WORKTIME_HOUR_PER_ANNOTATION_COUNT.value, + default="actual_worktime_hour/annotation_count", help="生産性の指標", ) @@ -619,8 +665,7 @@ def parse_args(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--quality_indicator", type=str, - choices=[e.value for e in QualityIndicator], - default=QualityIndicator.POINTED_OUT_INSPECTION_COMMENT_COUNT_PER_ANNOTATION_COUNT.value, + default="pointed_out_inspection_comment_count/annotation_count", help="品質の指標", ) @@ -662,6 +707,17 @@ def parse_args(parser: argparse.ArgumentParser) -> None: help=f"JSON形式で、ディレクトリ名ごとに閾値を指定してください。\n(ex) ``{json.dumps(THRESHOLD_SETTINGS_SAMPLE)}``", ) + custom_production_volume_sample = { + "dirname1": ["video_duration_minute"], + "dirname2": ["segment_area"], + } + + parser.add_argument( + "--custom_production_volume_by_directory", + type=str, + help=("プロジェクト独自の生産量をJSON形式で指定します。" f"(例) ``{json.dumps(custom_production_volume_sample, ensure_ascii=False)}`` \n"), + ) + parser.add_argument("-o", "--output_dir", required=True, type=Path, help="出力ディレクトリ") parser.set_defaults(subcommand_func=main) diff --git a/annofabcli/statistics/visualization/dataframe/project_performance.py b/annofabcli/statistics/visualization/dataframe/project_performance.py index 0fd3cf79c..9ddd38817 100644 --- a/annofabcli/statistics/visualization/dataframe/project_performance.py +++ b/annofabcli/statistics/visualization/dataframe/project_performance.py @@ -84,12 +84,11 @@ def _get_series_from_project_dir(cls, project_dir: ProjectDir) -> pandas.Series: return series @classmethod - def from_project_dirs(cls, project_dir_list: list[ProjectDir]) -> ProjectPerformance: - row_list: list[pandas.Series] = [] - for project_dir in project_dir_list: - row_list.append(cls._get_series_from_project_dir(project_dir)) # noqa: PERF401 - - return cls(pandas.DataFrame(row_list)) + def from_project_dirs( + cls, project_dir_list: list[ProjectDir], *, custom_production_volume_list: Optional[list[ProductionVolumeColumn]] = None + ) -> ProjectPerformance: + row_list: list[pandas.Series] = [cls._get_series_from_project_dir(project_dir) for project_dir in project_dir_list] + return cls(pandas.DataFrame(row_list), custom_production_volume_list=custom_production_volume_list) def to_csv(self, output_file: Path) -> None: """ diff --git a/annofabcli/statistics/visualize_statistics.py b/annofabcli/statistics/visualize_statistics.py index 5034d6710..b0d4b4e1a 100644 --- a/annofabcli/statistics/visualize_statistics.py +++ b/annofabcli/statistics/visualize_statistics.py @@ -2,6 +2,7 @@ import argparse import functools +import json import logging.handlers import re import sys @@ -412,12 +413,12 @@ def create_custom_production_volume(cli_value: str) -> CustomProductionVolume: コマンドラインから渡された文字列を元に、`CustomProductionVolume`インスタンスを生成します。 """ dict_data = get_json_from_args(cli_value) - csv_path = dict_data["csv_path"] - df = pandas.read_csv(csv_path) - column_list = dict_data["column_list"] custom_production_volume_list = [ProductionVolumeColumn(column["value"], column["name"]) for column in column_list] + csv_path = dict_data["csv_path"] + df = pandas.read_csv(csv_path) + return CustomProductionVolume(df=df, custom_production_volume_list=custom_production_volume_list) @@ -496,7 +497,12 @@ def visualize_statistics( # noqa: PLR0913 # pylint: disable=too-many-positiona if len(output_project_dir_list) > 0: project_dir_list = [ProjectDir(e) for e in output_project_dir_list] - project_performance = ProjectPerformance.from_project_dirs(project_dir_list) + custom_production_volume_list = ( + custom_production_volume.custom_production_volume_list if custom_production_volume is not None else None + ) + project_performance = ProjectPerformance.from_project_dirs( + project_dir_list, custom_production_volume_list=custom_production_volume_list + ) project_performance.to_csv(root_output_dir / "プロジェクトごとの生産性と品質.csv") project_actual_worktime = ProjectWorktimePerMonth.from_project_dirs(project_dir_list, WorktimeColumn.ACTUAL_WORKTIME_HOUR) @@ -671,10 +677,19 @@ def parse_args(parser: argparse.ArgumentParser) -> None: ), ) + custom_production_volume_sample = { + "csv_path": "custom_production_volume.csv", + "column_list": [{"value": "video_duration_minute", "name": "動画長さ"}], + } + parser.add_argument( "--custom_production_volume", type=str, - help=("プロジェクト独自の生産量の指標をJSON形式で指定します。"), + help=( + "プロジェクト独自の生産量をJSON形式で指定します。" + f"(例) ``{json.dumps(custom_production_volume_sample, ensure_ascii=False)}`` \n" + "詳細は https://annofab-cli.readthedocs.io/ja/latest/command_reference/statistics/visualize.html#custom-project-volume を参照してください。" # noqa: E501 + ), ) parser.add_argument( diff --git a/docs/command_reference/statistics/visualize.rst b/docs/command_reference/statistics/visualize.rst index 482fa428a..cc6deb46b 100644 --- a/docs/command_reference/statistics/visualize.rst +++ b/docs/command_reference/statistics/visualize.rst @@ -86,8 +86,6 @@ Examples - - 複数のプロジェクトをマージする ---------------------------------------------- ``--project_id`` に複数のproject_idを指定したときに ``--merge`` を指定すると、指定したプロジェクトをマージしたディレクトリも出力します。ディレクトリ名は ``merge`` です。 @@ -99,6 +97,8 @@ Examples + + 並列処理 ---------------------------------------------- @@ -111,6 +111,72 @@ Examples +生産量のカスタマイズ +================================= + +.. _annotation_count_csv: + +アノテーション数を変更する +---------------------------------------------- +デフォルトでは、アノテーションZIPからアノテーション数を算出します。 +しかし、プリアノテーションを用いたプロジェクトなどでは、実際に生産していないプリアノテーションも「アノテーション数」に含まれてしまい、正しい生産性が算出できない場合があります。 + +``--annotation_count_csv`` に実際に生産したアノテーションの個数が記載CSVファイルを指定することで、正しい生産量と生産性を算出できます。 + +以下はCSVファイルのサンプルです。 + +.. code-block:: + :caption: annotation_count.csv + + project_id,task_id,annotation_count + prj1,task1,10 + prj1,task2,20 + + +CSVには以下の列が存在している必要があります。 + +* ``project_id`` +* ``task_id`` +* ``annotation_count`` + + +.. _custom_project_volume: + +独自の生産量を指定する +---------------------------------------------- +デフォルトでは、入力データ数とアノテーション数を生産量としています。しかし、この生産量はプロジェクトによっては適切でない場合があります。 +たとえば、動画プロジェクトでは動画時間が生産量として適切かもしれません。また、セマンティックセグメンテーションプロジェクトでは塗りつぶしの面積や輪郭線の方が生産量として適切かもしれません。 + +``--custom_project_volume`` に以下のようなJSON文字列を指定することで、入力データ数とアノテーション数以外の生産量を指定することができます。 + +.. code-block:: json + + { + "csv_path": "custom_production_volume.csv", // 生産量が記載されたCSVファイルのパス + "column_list":[ // 生産量の情報 + { + "value": "video_duration_minute", // CSVの列名 + "name": "動画長さ" // CSVの列名を補足する内容。出力されるグラフなどに用いられる。 + } + ] + } + + +以下は、 ``csv_path`` キーに指定するCSVファイルのサンプルです。 + +.. code-block:: + :caption: custom_production_volume.csv + + project_id,task_id,video_duration_minute + prj1,task1,10 + prj1,task2,20 + +CSVには以下の列が存在している必要があります。 + +* ``project_id`` +* ``task_id`` +* ``column_list[].value`` で指定した列名 + 出力結果 @@ -240,3 +306,4 @@ Usage Details :prog: annofabcli statistics visualize :nosubcommands: :nodefaultconst: + diff --git a/tests/statistics/visualization/test_write_performance_rating_csv.py b/tests/statistics/visualization/test_write_performance_rating_csv.py index dfb957c50..d55d360d8 100644 --- a/tests/statistics/visualization/test_write_performance_rating_csv.py +++ b/tests/statistics/visualization/test_write_performance_rating_csv.py @@ -26,12 +26,12 @@ def test__create_threshold_infos_per_project(): def test__create_productivity_indicator_by_directory(): actual = create_productivity_indicator_by_directory('{"dirname": "monitored_worktime_hour/annotation_count"}') - assert actual == {"dirname": ProductivityIndicator.MONITORED_WORKTIME_HOUR_PER_ANNOTATION_COUNT} + assert actual == {"dirname": ProductivityIndicator("monitored_worktime_hour/annotation_count")} def test__create_quality_indicator_by_directory(): actual = create_quality_indicator_by_directory('{"dirname": "rejected_count/task_count"}') - assert actual == {"dirname": QualityIndicator.REJECTED_COUNT_PER_TASK_COUNT} + assert actual == {"dirname": QualityIndicator("rejected_count/task_count")} df_user = pandas.DataFrame( @@ -70,14 +70,14 @@ def test__join_annotation_productivity(self): assert df_actual.columns[3] == ("project1", "actual_worktime_hour/annotation_count__annotation") assert df_actual.iloc[0][("project1", "actual_worktime_hour/annotation_count__annotation")] == approx(0.00070, rel=1e-2) - obj2 = CollectingPerformanceInfo(productivity_indicator=ProductivityIndicator.MONITORED_WORKTIME_HOUR_PER_ANNOTATION_COUNT) + obj2 = CollectingPerformanceInfo(productivity_indicator=ProductivityIndicator("monitored_worktime_hour/annotation_count")) df_actual2 = obj2.join_annotation_productivity(df=df_user, df_performance=user_performance.df, project_title="project1") assert df_actual2.columns[3] == ("project1", "monitored_worktime_hour/annotation_count__annotation") # productivity_indicator_by_directoryが優先されることを確認 obj3 = CollectingPerformanceInfo( - productivity_indicator=ProductivityIndicator.ACTUAL_WORKTIME_HOUR_PER_ANNOTATION_COUNT, - productivity_indicator_by_directory={"project1": ProductivityIndicator.MONITORED_WORKTIME_HOUR_PER_ANNOTATION_COUNT}, + productivity_indicator=ProductivityIndicator("actual_worktime_hour/annotation_count"), + productivity_indicator_by_directory={"project1": ProductivityIndicator("monitored_worktime_hour/annotation_count")}, ) df_actual3 = obj3.join_annotation_productivity(df=df_user, df_performance=user_performance.df, project_title="project1") assert df_actual3.columns[3] == ("project1", "monitored_worktime_hour/annotation_count__annotation") @@ -88,14 +88,14 @@ def test__join_inspection_acceptance_productivity(self): assert df_actual.columns[3] == ("project1", "actual_worktime_hour/annotation_count__acceptance") assert df_actual.iloc[1][("project1", "actual_worktime_hour/annotation_count__acceptance")] == approx(0.000145, rel=1e-2) - obj2 = CollectingPerformanceInfo(productivity_indicator=ProductivityIndicator.MONITORED_WORKTIME_HOUR_PER_ANNOTATION_COUNT) + obj2 = CollectingPerformanceInfo(productivity_indicator=ProductivityIndicator("monitored_worktime_hour/annotation_count")) df_actual2 = obj2.join_inspection_acceptance_productivity(df=df_user, df_performance=user_performance.df, project_title="project1") assert df_actual2.columns[3] == ("project1", "monitored_worktime_hour/annotation_count__acceptance") # productivity_indicator_by_directoryが優先されることを確認 obj3 = CollectingPerformanceInfo( - productivity_indicator=ProductivityIndicator.ACTUAL_WORKTIME_HOUR_PER_ANNOTATION_COUNT, - productivity_indicator_by_directory={"project1": ProductivityIndicator.MONITORED_WORKTIME_HOUR_PER_ANNOTATION_COUNT}, + productivity_indicator=ProductivityIndicator("actual_worktime_hour/annotation_count"), + productivity_indicator_by_directory={"project1": ProductivityIndicator("monitored_worktime_hour/annotation_count")}, ) df_actual3 = obj3.join_inspection_acceptance_productivity(df=df_user, df_performance=user_performance.df, project_title="project1") assert df_actual3.columns[3] == ("project1", "monitored_worktime_hour/annotation_count__acceptance") @@ -106,14 +106,14 @@ def test__join_annotation_quality(self): assert df_actual.columns[3] == ("project1", "pointed_out_inspection_comment_count/annotation_count__annotation") assert df_actual.iloc[0][("project1", "pointed_out_inspection_comment_count/annotation_count__annotation")] == approx(0.000854, rel=1e-2) - obj2 = CollectingPerformanceInfo(quality_indicator=QualityIndicator.REJECTED_COUNT_PER_TASK_COUNT) + obj2 = CollectingPerformanceInfo(quality_indicator=QualityIndicator("rejected_count/task_count")) df_actual2 = obj2.join_annotation_quality(df=df_user, df_performance=user_performance.df, project_title="project1") assert df_actual2.columns[3] == ("project1", "rejected_count/task_count__annotation") # quality_indicator_by_directoryが優先されることを確認 obj3 = CollectingPerformanceInfo( - quality_indicator=QualityIndicator.POINTED_OUT_INSPECTION_COMMENT_COUNT_PER_INPUT_DATA_COUNT, - quality_indicator_by_directory={"project1": QualityIndicator.REJECTED_COUNT_PER_TASK_COUNT}, + quality_indicator=QualityIndicator("pointed_out_inspection_comment_count/input_data_count"), + quality_indicator_by_directory={"project1": QualityIndicator("rejected_count/task_count")}, ) df_actual3 = obj3.join_annotation_quality(df=df_user, df_performance=user_performance.df, project_title="project1") assert df_actual3.columns[3] == ("project1", "rejected_count/task_count__annotation")