mirrored 9 minutes ago
0
timothyxxxUpdate evaluation examples with clearer instructions and improved configurations - Revised instructions in multiple JSON examples to enhance clarity and specificity, ensuring users understand the tasks better. - Adjusted configurations for tasks involving Chrome, GIMP, LibreOffice, and VLC to reflect more precise requirements and expected behaviors. - These changes aim to improve the usability and effectiveness of the evaluation examples, making them more intuitive for users. 98b6fb6
{
  "id": "a96b564e-dbe9-42c3-9ccf-b4498073438a",
  "snapshot": "chrome",
  "instruction": "Find discussions of community and open one with most replies.",
  "source": "test_task_0",
  "config": [
    {
      "type": "launch",
      "parameters": {
        "command": [
          "google-chrome",
          "--remote-debugging-port=1337"
        ]
      }
    },
    {
      "type": "launch",
      "parameters": {
        "command": [
          "socat",
          "tcp-listen:9222,fork",
          "tcp:localhost:1337"
        ]
      }
    },
    {
      "type": "chrome_open_tabs",
      "parameters": {
        "urls_to_open": [
          "https://www.flightaware.com/"
        ]
      }
    },
    {
      "type": "activate_window",
      "parameters": {
        "window_name": "Google Chrome"
      }
    }
  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "chrome"
  ],
  "evaluator": {
    "func": "is_expected_active_tab",
    "result": {
      "type": "active_tab_info",
      "goto_prefix": "https://"
    },
    "expected": {
      "type": "rule",
      "rules": {
        "type": "url",
        "url": "https://discussions.flightaware.com/t/the-banter-thread/4412"
      }
    }
  },
  "proxy": true,
  "fixed_ip": false,
  "possibility_of_env_change": "low"
}