Skip to content

Commit

Permalink
stat_visualization merge : bokehの3.0バージョンアップの対応漏れに対処 (#946)
Browse files Browse the repository at this point in the history
* bokehのupdateに対応

* docstring

* 全件ファイル取得APIをasyncを使わずに、同期的に実行するようにした。

* pylintの警告を無視する

* update pylint
  • Loading branch information
yuji38kwmt authored Feb 22, 2023
1 parent 8f536fa commit af9a3c1
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 42 deletions.
61 changes: 19 additions & 42 deletions annofabcli/statistics/database.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import datetime
import json
import logging
Expand Down Expand Up @@ -296,7 +295,7 @@ def update_task_json(self, project_id: str, should_update_task_json: bool = Fals
self.annofab_service.api.post_project_tasks_update(project_id)
self.wait_for_completion_updated_task_json(project_id)

def _write_task_histories_json(self):
def _write_task_histories_json_with_executing_api_one_of_each(self):
"""
タスク履歴取得APIを1個ずつ実行して、全タスクのタスク履歴が格納されたJSONを出力します。
事前に、タスク全件ファイルをダウンロードする必要がある。
Expand Down Expand Up @@ -339,59 +338,37 @@ def _download_db_file(self, is_latest: bool, is_get_task_histories_one_of_each:

wait_options = WaitOptions(interval=60, max_tries=360)

DOWNLOADED_FILE_COUNT = 4

TASK_JSON_INDEX = 0
ANNOTATION_ZIP_INDEX = 1
COMMENT_JSON_INDEX = 2
TASK_HISTORY_JSON_INDEX = 3

loop = asyncio.get_event_loop()
coroutines: List[Any] = [None] * DOWNLOADED_FILE_COUNT
coroutines[TASK_JSON_INDEX] = downloading_obj.download_task_json_with_async(
downloading_obj.download_task_json(
self.project_id, dest_path=str(self.tasks_json_path), is_latest=is_latest, wait_options=wait_options
)
coroutines[ANNOTATION_ZIP_INDEX] = downloading_obj.download_annotation_zip_with_async(
downloading_obj.download_annotation_zip(
self.project_id,
dest_path=str(self.annotations_zip_path),
is_latest=is_latest,
wait_options=wait_options,
)
coroutines[COMMENT_JSON_INDEX] = downloading_obj.download_comment_json_with_async(
self.project_id, dest_path=str(self.comment_json_path)
)

try:
downloading_obj.download_comment_json(self.project_id, dest_path=str(self.comment_json_path))
except DownloadingFileNotFoundError:
# プロジェクトを作成した日だと、検査コメントファイルが作成されていないので、DownloadingFileNotFoundErrorが発生する
# その場合でも、処理は継続できるので、空listのJSONファイルを作成しておく
self.comment_json_path.write_text("[]", encoding="utf-8")

if is_get_task_histories_one_of_each:
# タスク履歴APIを一つずつ実行して、JSONファイルを生成する
# 先にタスク全件ファイルをダウンロードする必要がある
coroutines.pop(DOWNLOADED_FILE_COUNT - 1)
gather = asyncio.gather(*coroutines, return_exceptions=True)
results = loop.run_until_complete(gather)
self._write_task_histories_json()
self._write_task_histories_json_with_executing_api_one_of_each()

else:
coroutines[TASK_HISTORY_JSON_INDEX] = downloading_obj.download_task_history_json_with_async(
self.project_id, dest_path=str(self.task_histories_json_path)
)
gather = asyncio.gather(*coroutines, return_exceptions=True)
results = loop.run_until_complete(gather)

if isinstance(results[COMMENT_JSON_INDEX], DownloadingFileNotFoundError):
# 空のJSONファイルを作り、検査コメント0件として処理する
self.comment_json_path.write_text("{}", encoding="utf-8")
elif isinstance(results[COMMENT_JSON_INDEX], Exception):
raise results[COMMENT_JSON_INDEX]

if not is_get_task_histories_one_of_each:
if isinstance(results[TASK_HISTORY_JSON_INDEX], DownloadingFileNotFoundError):
# タスク履歴APIを一つずつ実行して、JSONファイルを生成する
self._write_task_histories_json()
elif isinstance(results[TASK_HISTORY_JSON_INDEX], Exception):
raise results[TASK_HISTORY_JSON_INDEX]

for result in [results[TASK_JSON_INDEX], results[ANNOTATION_ZIP_INDEX]]:
if isinstance(result, Exception):
raise result
try:
downloading_obj.download_task_history_json(
self.project_id, dest_path=str(self.task_histories_json_path)
)
except DownloadingFileNotFoundError:
# プロジェクトを作成した日だと、タスク履歴全県ファイルが作成されていないので、DownloadingFileNotFoundErrorが発生する
# その場合でも、処理は継続できるので、タスク履歴APIを1個ずつ実行して、タスク履歴ファイルを作成する
self._write_task_histories_json_with_executing_api_one_of_each()

@staticmethod
def _to_datetime_with_tz(str_date: str) -> datetime.datetime:
Expand Down
2 changes: 2 additions & 0 deletions annofabcli/statistics/linegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ def __init__(

self.exists_secondary_y_axis = False
self.line_glyphs: dict[str, GlyphRenderer] = {}
"""key:凡例, value: 描画している折れ線"""
self.marker_glyphs: dict[str, GlyphRenderer] = {}
"""key:凡例, value: 描画しているマーカー"""

def add_secondary_y_axis(
self,
Expand Down
38 changes: 38 additions & 0 deletions annofabcli/statistics/visualization/dataframe/user_performance.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=too-many-lines
"""
各ユーザの合計の生産性と品質
"""
Expand Down Expand Up @@ -598,6 +599,18 @@ def create_figure(title: str) -> figure:
df[(f"{worktime_type.value}_worktime_hour/annotation_count", phase)] * 60
)

# bokeh3.0.3では、string型の列を持つpandas.DataFrameを描画できないため、改めてobject型に戻す
# TODO この問題が解決されたら、削除する
# https://qiita.com/yuji38kwmt/items/b5da6ed521e827620186
df = df.astype(
{
("user_id", ""): "object",
("username", ""): "object",
("biography", ""): "object",
("last_working_date", ""): "object",
}
)

for biography_index, biography in enumerate(sorted(set(df["biography"]))):
for fig, phase in zip(figure_list, self.phase_list):
filtered_df = df[
Expand Down Expand Up @@ -703,6 +716,18 @@ def create_figure(title: str, x_axis_label: str, y_axis_label: str) -> figure:

phase = "annotation"

# bokeh3.0.3では、string型の列を持つpandas.DataFrameを描画できないため、改めてobject型に戻す
# TODO この問題が解決されたら、削除する
# https://qiita.com/yuji38kwmt/items/b5da6ed521e827620186
df = df.astype(
{
("user_id", ""): "object",
("username", ""): "object",
("biography", ""): "object",
("last_working_date", ""): "object",
}
)

df["biography"] = df["biography"].fillna("")
for biography_index, biography in enumerate(sorted(set(df["biography"]))):
for column_pair, fig in zip(column_pair_list, figure_list):
Expand Down Expand Up @@ -891,6 +916,19 @@ def set_tooltip():
]
phase = TaskPhase.ANNOTATION.value
df["biography"] = df["biography"].fillna("")

# bokeh3.0.3では、string型の列を持つpandas.DataFrameを描画できないため、改めてobject型に戻す
# TODO この問題が解決されたら、削除する
# https://qiita.com/yuji38kwmt/items/b5da6ed521e827620186
df = df.astype(
{
("user_id", ""): "object",
("username", ""): "object",
("biography", ""): "object",
("last_working_date", ""): "object",
}
)

for biography_index, biography in enumerate(sorted(set(df["biography"]))):
for fig, column_pair in zip(figure_list, column_pair_list):
x_column, y_column = column_pair
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,13 @@ def plot_cumulatively(
df_cumulative = self._get_cumulative_dataframe(df_continuous_date)
df_cumulative["dt_date"] = df_cumulative["date"].map(lambda e: datetime.datetime.fromisoformat(e).date())

# bokeh3.0.3では、string型の列を持つpandas.DataFrameを描画できないため、改めてobject型に戻す
# TODO この問題が解決されたら、削除する
# https://qiita.com/yuji38kwmt/items/b5da6ed521e827620186
df_cumulative = df_cumulative.astype(
{"date": "object", "user_id": "object", "username": "object", "biography": "object"}
)

line_count = 0
for user_index, user_id in enumerate(user_id_list):
df_subset = df_cumulative[df_cumulative["user_id"] == user_id]
Expand Down

0 comments on commit af9a3c1

Please sign in to comment.