From 8ffed8e8e180c713e85fea84a2a6b77568cc4e2d Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Thu, 6 Mar 2025 11:14:12 -0800 Subject: [PATCH 1/3] expose a11y --- examples/example.ts | 2 ++ lib/StagehandPage.ts | 14 +++++++++++++- lib/a11y/utils.ts | 3 +++ lib/handlers/observeHandler.ts | 7 ++----- types/page.ts | 6 ++++++ 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/examples/example.ts b/examples/example.ts index 372d72e5..e3d41ae9 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -17,6 +17,8 @@ async function example() { /** * Add your code here! */ + const tree = await stagehand.page.getAccessibilityTree(); + console.log(tree); await stagehand.close(); } diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index cf8d7a8e..c05862ec 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -16,8 +16,9 @@ import { StagehandObserveHandler } from "./handlers/observeHandler"; import { ActOptions, ActResult, GotoOptions, Stagehand } from "./index"; import { LLMClient } from "./llm/LLMClient"; import { StagehandContext } from "./StagehandContext"; -import { EnhancedContext } from "../types/context"; +import { EnhancedContext, TreeResult } from "../types/context"; import { clearOverlays } from "./utils"; +import { getAccessibilityTree } from "./a11y/utils"; const BROWSERBASE_REGION_DOMAIN = { "us-west-2": "wss://connect.usw2.browserbase.com", @@ -58,6 +59,7 @@ export class StagehandPage { (prop === ("act" as keyof Page) || prop === ("extract" as keyof Page) || prop === ("observe" as keyof Page) || + prop === ("getAccessibilityTree" as keyof Page) || prop === ("on" as keyof Page)) ) { return () => { @@ -248,6 +250,12 @@ export class StagehandPage { }; } + if (prop === "getAccessibilityTree") { + return async () => { + return await this.getAccessibilityTree(); + }; + } + // Handle goto specially if (prop === "goto") { return async (url: string, options: GotoOptions) => { @@ -829,4 +837,8 @@ export class StagehandPage { async disableCDP(domain: string): Promise { await this.sendCDP(`${domain}.disable`, {}); } + + async getAccessibilityTree(): Promise { + return getAccessibilityTree(this, this.stagehand.logger); + } } diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 6c426ee9..02f2b432 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -244,6 +244,9 @@ export async function buildHierarchicalTree( /** * Retrieves the full accessibility tree via CDP and transforms it into a hierarchical structure. + * + * DO NOT USE THIS FUNCTION DIRECTLY. + * Instead, use `StagehandPage.getAccessibilityTree()` */ export async function getAccessibilityTree( page: StagehandPage, diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 9b598b6e..6f50c691 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -4,10 +4,7 @@ import { observe } from "../inference"; import { LLMClient } from "../llm/LLMClient"; import { StagehandPage } from "../StagehandPage"; import { generateId, drawObserveOverlay } from "../utils"; -import { - getAccessibilityTree, - getXPathByResolvedObjectId, -} from "../a11y/utils"; +import { getXPathByResolvedObjectId } from "../a11y/utils"; import { AccessibilityNode } from "../../types/context"; export class StagehandObserveHandler { @@ -88,7 +85,7 @@ export class StagehandObserveHandler { const useAccessibilityTree = !onlyVisible; if (useAccessibilityTree) { await this.stagehandPage._waitForSettledDom(); - const tree = await getAccessibilityTree(this.stagehandPage, this.logger); + const tree = await this.stagehandPage.getAccessibilityTree(); this.logger({ category: "observation", message: "Getting accessibility tree data", diff --git a/types/page.ts b/types/page.ts index 3d570601..c5627a8f 100644 --- a/types/page.ts +++ b/types/page.ts @@ -12,6 +12,7 @@ import type { ObserveOptions, ObserveResult, } from "./stagehand"; +import { TreeResult as TreeResultType } from "./context"; export const defaultExtractSchema = z.object({ extraction: z.string(), @@ -21,6 +22,9 @@ export const pageTextSchema = z.object({ page_text: z.string(), }); +// Need to re-export TreeResult to make it into dist/types +export type TreeResult = TreeResultType; + export interface Page extends Omit { act(action: string): Promise; act(options: ActOptions): Promise; @@ -41,6 +45,8 @@ export interface Page extends Omit { on: { (event: "popup", listener: (page: Page) => unknown): Page; } & PlaywrightPage["on"]; + + getAccessibilityTree(): Promise; } // Empty type for now, but will be used in the future From 6f4f855bbdf17a94cf3ef7dc3e5fc6c7aa48f3a4 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Thu, 6 Mar 2025 11:14:56 -0800 Subject: [PATCH 2/3] comment --- examples/example.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/example.ts b/examples/example.ts index e3d41ae9..164a0517 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -17,6 +17,7 @@ async function example() { /** * Add your code here! */ + // delete before pushing to main const tree = await stagehand.page.getAccessibilityTree(); console.log(tree); await stagehand.close(); From ef608c342190437e2f6366d71555f1b055947f76 Mon Sep 17 00:00:00 2001 From: miguel Date: Thu, 6 Mar 2025 12:20:01 -0800 Subject: [PATCH 3/3] empty observe returns getAccessibilityTree --- lib/StagehandPage.ts | 2 +- lib/handlers/observeHandler.ts | 7 ++++++- types/page.ts | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index c05862ec..720fca59 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -687,7 +687,7 @@ export class StagehandPage { async observe( instructionOrOptions?: string | ObserveOptions, - ): Promise { + ): Promise { if (!this.observeHandler) { throw new Error("Observe handler not initialized"); } diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 6f50c691..77577b38 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -64,7 +64,12 @@ export class StagehandObserveHandler { drawOverlay?: boolean; }) { if (!instruction) { - instruction = `Find elements that can be used for any future actions in the page. These may be navigation links, related pages, section/subsection links, buttons, or other interactive elements. Be comprehensive: if there are multiple elements that may be relevant for future actions, return all of them.`; + this.logger({ + category: "observation", + message: "No instruction provided, returning hybrid tree", + level: 1, + }); + return await this.stagehandPage.getAccessibilityTree(); } this.logger({ diff --git a/types/page.ts b/types/page.ts index c5627a8f..01be88c3 100644 --- a/types/page.ts +++ b/types/page.ts @@ -38,7 +38,7 @@ export interface Page extends Omit { ): Promise>; extract(): Promise>; - observe(): Promise; + observe(): Promise; observe(instruction: string): Promise; observe(options?: ObserveOptions): Promise;