OpenAdaptAI · abrichr · Jun 26, 2024 · Jun 26, 2024
diff --git a/openadapt/strategies/segment.py b/openadapt/strategies/segment.py
@@ -24,6 +24,7 @@
 INCLUDE_WINDOW_DATA = False
 FILTER_MASKS = True
 INCLUDE_CURRENT_SCREENSHOT = False
+INCLUDE_SEGMENTATIONS = True
 
 
 class SegmentReplayStrategy(strategies.base.BaseReplayStrategy):
@@ -80,6 +81,7 @@ def get_next_action_event(
  include_active_window_data: bool = INCLUDE_WINDOW_DATA,
  include_replay_instructions: bool = INCLUDE_REPLAY_INSTRUCTIONS,
  include_current_screenshot: bool = INCLUDE_CURRENT_SCREENSHOT,
+ include_segmentations: bool = INCLUDE_SEGMENTATIONS,
  ) -> models.ActionEvent | None:
  """Get the next ActionEvent for replay.
 
@@ -102,6 +104,8 @@ def get_next_action_event(
  in the prompt.
  include_current_screenshot (bool): Whether to include the current screenshot
  in the prompt.
+ include_segmentations (bool): Whether to include window segmentations
+ in the prompt.
 
  Returns:
  models.ActionEvent or None: The next ActionEvent for replay or None
@@ -148,7 +152,8 @@ def get_next_action_event(
  logger.info(f"{active_window=}")
 
  if (
- generated_action_event.name in common.MOUSE_EVENTS
+ include_segmentations
+ and generated_action_event.name in common.MOUSE_EVENTS
  and generated_action_event.active_segment_description
  ):
  generated_action_event.screenshot = active_screenshot
@@ -168,17 +173,18 @@ def get_next_action_event(
  exceptions.append(exc)
  # TODO XXX this does not update the prompts, even though it should
  logger.exception(exc)
- import ipdb
-
- ipdb.set_trace()
+ import ipdb; ipdb.set_trace()
  logger.warning(f"{exc=} {len(exceptions)=}")
  else:
  break
  target_centroid = active_window_segmentation.centroids[target_segment_idx]
+
  # <image space position> = scale_ratio * <window/action space position>
+ # TODO: move this
  width_ratio, height_ratio = utils.get_scale_ratios(generated_action_event)
  target_mouse_x = target_centroid[0] / width_ratio + active_window.left
  target_mouse_y = target_centroid[1] / height_ratio + active_window.top
+
  generated_action_event.mouse_x = target_mouse_x
  generated_action_event.mouse_y = target_mouse_y
  else:

diff --git a/openadapt/strategies/vanilla.py b/openadapt/strategies/vanilla.py
@@ -198,10 +198,11 @@ def generate_action_event(
  replay_instructions=replay_instructions,
  )
  prompt_adapter = adapters.get_default_prompt_adapter()
+ images = [current_image]
  content = prompt_adapter.prompt(
  prompt,
  system_prompt,
- [current_image],
+ images,
  )
  action_dict = utils.parse_code_snippet(content)
  logger.info(f"{action_dict=}")