import base64 import io import json import re from pathlib import Path from typing import Any from PIL import Image from agent.prompts import * from browser_env import ( Action, ActionTypes, ObservationMetadata, StateInfo, action2str, ) HTML_TEMPLATE = """
{body} """ def get_render_action( action: Action, observation_metadata: dict[str, ObservationMetadata], action_set_tag: str, ) -> str: """Parse the predicted actions for rendering purpose. More comprehensive information""" match action_set_tag: case "id_accessibility_tree": text_meta_data = observation_metadata["text"] if action["element_id"] in text_meta_data["obs_nodes_info"]: node_content = text_meta_data["obs_nodes_info"][ action["element_id"] ]["text"] else: node_content = "No match found" action_str = f"{action['raw_prediction']}{repr(action)}{action2str(action, action_set_tag, node_content)}{_config_str}\n"
task_id = _config["task_id"]
self.action_set_tag = action_set_tag
self.render_file = open(
Path(result_dir) / f"render_{task_id}.html", "a+"
)
self.render_file.truncate(0)
# write init template
self.render_file.write(HTML_TEMPLATE.format(body=f"{_config_str}"))
self.render_file.read()
self.render_file.flush()
def render(
self,
action: Action,
state_info: StateInfo,
meta_data: dict[str, Any],
render_screenshot: bool = False,
) -> None:
"""Render the trajectory"""
# text observation
observation = state_info["observation"]
text_obs = observation["text"]
info = state_info["info"]
new_content = f"{text_obs}