mirrored 9 minutes ago
0
timothyxxxEnhance PPTX comparison logic and update evaluation examples - Introduced a new helper function `nonempty_runs` to filter out formatting-only runs in PPTX comparisons, improving accuracy in slide evaluations. - Updated the logic for comparing runs in paragraphs to handle cases where both paragraphs are empty, ensuring consistent evaluation results. - Revised JSON examples to clarify instructions and enhance evaluator configurations, including adding post-execution commands for script downloads and permissions. These changes improve the robustness of the PPTX file comparison and ensure that evaluation examples are more precise and functional. 0e92af6
{
  "id": "f0b971a1-6831-4b9b-a50e-22a6e47f45ba",
  "snapshot": "chrome",
  "instruction": "Please help me find the score record for the Super Bowl of the 2019 NFL season (played in 2020) in the NFL website.",
  "source": "Mind2Web",
  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.nfl.com/"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
    "func": "is_expected_active_tab",
    "result": {
      "type": "active_url_from_accessTree",
      "goto_prefix": "https://www."
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://www.nfl.com/scores/2019/post4"
      }
    }
  },
  "proxy": true,
  "fixed_ip": false,
  "possibility_of_env_change": "low"
}