From 85a33d9e76d75e50968e03ca2e355e79abb59d11 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Fri, 28 Feb 2025 20:17:23 -0800 Subject: [PATCH 1/9] working --- lib/llm/BraintrustClient.ts | 328 ++++++++++++++++++++++++++++++++++++ lib/llm/LLMClient.ts | 2 +- lib/llm/LLMProvider.ts | 29 ++-- stagehand.config.ts | 5 +- types/model.ts | 2 +- 5 files changed, 353 insertions(+), 13 deletions(-) create mode 100644 lib/llm/BraintrustClient.ts diff --git a/lib/llm/BraintrustClient.ts b/lib/llm/BraintrustClient.ts new file mode 100644 index 00000000..e885eff0 --- /dev/null +++ b/lib/llm/BraintrustClient.ts @@ -0,0 +1,328 @@ +import OpenAI from "openai"; +import type { ClientOptions } from "openai"; +import { zodToJsonSchema } from "zod-to-json-schema"; +import { LogLine } from "../../types/log"; +import { AvailableModel } from "../../types/model"; +import { LLMCache } from "../cache/LLMCache"; +import { + ChatMessage, + CreateChatCompletionOptions, + LLMClient, + LLMResponse, +} from "./LLMClient"; + +export class BraintrustClient extends LLMClient { + public type = "braintrust" as const; + private client: OpenAI; + private cache: LLMCache | undefined; + private enableCaching: boolean; + public clientOptions: ClientOptions; + public hasVision = false; + + constructor({ + enableCaching = false, + cache, + modelName, + clientOptions, + userProvidedInstructions, + }: { + logger: (message: LogLine) => void; + enableCaching?: boolean; + cache?: LLMCache; + modelName: AvailableModel; + clientOptions?: ClientOptions; + userProvidedInstructions?: string; + }) { + super(modelName, userProvidedInstructions); + + // Create OpenAI client with the base URL set to Braintrust API + this.client = new OpenAI({ + baseURL: "https://api.braintrust.dev/v1/proxy", + apiKey: clientOptions?.apiKey || process.env.BRAINTRUST_API_KEY, + ...clientOptions, + }); + + this.cache = cache; + this.enableCaching = enableCaching; + this.modelName = modelName; + this.clientOptions = clientOptions; + } + + async createChatCompletion({ + options, + retries, + logger, + }: CreateChatCompletionOptions): Promise { + const optionsWithoutImage = { ...options }; + delete optionsWithoutImage.image; + + logger({ + category: "braintrust", + message: "creating chat completion", + level: 1, + auxiliary: { + options: { + value: JSON.stringify(optionsWithoutImage), + type: "object", + }, + }, + }); + + // Try to get cached response + const cacheOptions = { + model: this.modelName, + messages: options.messages, + temperature: options.temperature, + response_model: options.response_model, + tools: options.tools, + retries: retries, + }; + + if (this.enableCaching) { + const cachedResponse = await this.cache.get( + cacheOptions, + options.requestId, + ); + if (cachedResponse) { + logger({ + category: "llm_cache", + message: "LLM cache hit - returning cached response", + level: 1, + auxiliary: { + cachedResponse: { + value: JSON.stringify(cachedResponse), + type: "object", + }, + requestId: { + value: options.requestId, + type: "string", + }, + cacheOptions: { + value: JSON.stringify(cacheOptions), + type: "object", + }, + }, + }); + return cachedResponse as T; + } + } + + // Format messages for Braintrust API (using OpenAI format) + const formattedMessages = options.messages.map((msg: ChatMessage) => { + const baseMessage = { + content: + typeof msg.content === "string" + ? msg.content + : Array.isArray(msg.content) && + msg.content.length > 0 && + "text" in msg.content[0] + ? msg.content[0].text + : "", + }; + + // Braintrust only supports system, user, and assistant roles + if (msg.role === "system") { + return { ...baseMessage, role: "system" as const }; + } else if (msg.role === "assistant") { + return { ...baseMessage, role: "assistant" as const }; + } else { + // Default to user for any other role + return { ...baseMessage, role: "user" as const }; + } + }); + + // Format tools if provided + let tools = options.tools?.map((tool) => ({ + type: "function" as const, + function: { + name: tool.name, + description: tool.description, + parameters: { + type: "object", + properties: tool.parameters.properties, + required: tool.parameters.required, + }, + }, + })); + + // Add response model as a tool if provided + if (options.response_model) { + const jsonSchema = zodToJsonSchema(options.response_model.schema) as { + properties?: Record; + required?: string[]; + }; + const schemaProperties = jsonSchema.properties || {}; + const schemaRequired = jsonSchema.required || []; + + const responseTool = { + type: "function" as const, + function: { + name: "print_extracted_data", + description: + "Prints the extracted data based on the provided schema.", + parameters: { + type: "object", + properties: schemaProperties, + required: schemaRequired, + }, + }, + }; + + tools = tools ? [...tools, responseTool] : [responseTool]; + } + + try { + // Use OpenAI client with Braintrust API + const apiResponse = await this.client.chat.completions.create({ + model: this.modelName, + messages: [ + ...formattedMessages, + // Add explicit instruction to return JSON if we have a response model + ...(options.response_model + ? [ + { + role: "system" as const, + content: `IMPORTANT: Your response must be valid JSON that matches this schema: ${JSON.stringify(options.response_model.schema)}`, + }, + ] + : []), + ], + temperature: options.temperature || 0.7, + max_tokens: options.maxTokens, + tools: tools, + tool_choice: options.tool_choice || "auto", + }); + + // Format the response to match the expected LLMResponse format + const response: LLMResponse = { + id: apiResponse.id, + object: "chat.completion", + created: Date.now(), + model: this.modelName, + choices: [ + { + index: 0, + message: { + role: "assistant", + content: apiResponse.choices[0]?.message?.content || null, + tool_calls: apiResponse.choices[0]?.message?.tool_calls || [], + }, + finish_reason: apiResponse.choices[0]?.finish_reason || "stop", + }, + ], + usage: { + prompt_tokens: apiResponse.usage?.prompt_tokens || 0, + completion_tokens: apiResponse.usage?.completion_tokens || 0, + total_tokens: apiResponse.usage?.total_tokens || 0, + }, + }; + + logger({ + category: "braintrust", + message: "response", + level: 1, + auxiliary: { + response: { + value: JSON.stringify(response), + type: "object", + }, + requestId: { + value: options.requestId, + type: "string", + }, + }, + }); + + if (options.response_model) { + // First try standard function calling format + const toolCall = response.choices[0]?.message?.tool_calls?.[0]; + if (toolCall?.function?.arguments) { + try { + const result = JSON.parse(toolCall.function.arguments); + if (this.enableCaching) { + this.cache.set(cacheOptions, result, options.requestId); + } + return result as T; + } catch (e) { + // If JSON parse fails, the model might be returning a different format + logger({ + category: "braintrust", + message: "failed to parse tool call arguments as JSON, retrying", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + }, + }); + } + } + + // If we have content but no tool calls, try to parse the content as JSON + const content = response.choices[0]?.message?.content; + if (content) { + try { + // Try to extract JSON from the content + const jsonMatch = content.match(/\{[\s\S]*\}/); + if (jsonMatch) { + const result = JSON.parse(jsonMatch[0]); + if (this.enableCaching) { + this.cache.set(cacheOptions, result, options.requestId); + } + return result as T; + } + } catch (e) { + logger({ + category: "braintrust", + message: "failed to parse content as JSON", + level: 1, + auxiliary: { + error: { + value: e.message, + type: "string", + }, + }, + }); + } + } + + // If we still haven't found valid JSON and have retries left, try again + if (!retries || retries < 5) { + return this.createChatCompletion({ + options, + logger, + retries: (retries ?? 0) + 1, + }); + } + + throw new Error( + "Create Chat Completion Failed: Could not extract valid JSON from response", + ); + } + + if (this.enableCaching) { + this.cache.set(cacheOptions, response, options.requestId); + } + + return response as T; + } catch (error) { + logger({ + category: "braintrust", + message: "error creating chat completion", + level: 1, + auxiliary: { + error: { + value: error.message, + type: "string", + }, + requestId: { + value: options.requestId, + type: "string", + }, + }, + }); + throw error; + } + } +} diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index a23e9ee9..c0833d9b 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -81,7 +81,7 @@ export interface CreateChatCompletionOptions { } export abstract class LLMClient { - public type: "openai" | "anthropic" | "cerebras" | string; + public type: "openai" | "anthropic" | "cerebras" | "braintrust" | string; public modelName: AvailableModel; public hasVision: boolean; public clientOptions: ClientOptions; diff --git a/lib/llm/LLMProvider.ts b/lib/llm/LLMProvider.ts index 33d71bf5..462873d7 100644 --- a/lib/llm/LLMProvider.ts +++ b/lib/llm/LLMProvider.ts @@ -9,18 +9,19 @@ import { AnthropicClient } from "./AnthropicClient"; import { CerebrasClient } from "./CerebrasClient"; import { LLMClient } from "./LLMClient"; import { OpenAIClient } from "./OpenAIClient"; +import { BraintrustClient } from "./BraintrustClient"; const modelToProviderMap: { [key in AvailableModel]: ModelProvider } = { - "gpt-4o": "openai", - "gpt-4o-mini": "openai", - "gpt-4o-2024-08-06": "openai", - "o1-mini": "openai", - "o1-preview": "openai", - "o3-mini": "openai", - "claude-3-5-sonnet-latest": "anthropic", - "claude-3-5-sonnet-20240620": "anthropic", - "claude-3-5-sonnet-20241022": "anthropic", - "claude-3-7-sonnet-20250219": "anthropic", + "gpt-4o": "braintrust", + "gpt-4o-mini": "braintrust", + "gpt-4o-2024-08-06": "braintrust", + "o1-mini": "braintrust", + "o1-preview": "braintrust", + "o3-mini": "braintrust", + "claude-3-5-sonnet-latest": "braintrust", + "claude-3-5-sonnet-20240620": "braintrust", + "claude-3-5-sonnet-20241022": "braintrust", + "claude-3-7-sonnet-20250219": "braintrust", "cerebras-llama-3.3-70b": "cerebras", "cerebras-llama-3.1-8b": "cerebras", }; @@ -89,6 +90,14 @@ export class LLMProvider { modelName, clientOptions, }); + case "braintrust": + return new BraintrustClient({ + logger: this.logger, + enableCaching: this.enableCaching, + cache: this.cache, + modelName, + clientOptions, + }); default: throw new Error(`Unsupported provider: ${provider}`); } diff --git a/stagehand.config.ts b/stagehand.config.ts index ea951d1b..e1066d83 100644 --- a/stagehand.config.ts +++ b/stagehand.config.ts @@ -23,7 +23,10 @@ const StagehandConfig: ConstructorParams = { undefined /* Session ID for resuming Browserbase sessions */, modelName: "gpt-4o" /* Name of the model to use */, modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, } /* Configuration options for the model client */, + selfHeal: true /* Enable self-healing functionality */, + waitForCaptchaSolves: + true /* Wait for captchas to be solved after navigation when using Browserbase environment */, }; export default StagehandConfig; diff --git a/types/model.ts b/types/model.ts index 8d1a6b49..4c0a8e04 100644 --- a/types/model.ts +++ b/types/model.ts @@ -19,7 +19,7 @@ export const AvailableModelSchema = z.enum([ export type AvailableModel = z.infer; -export type ModelProvider = "openai" | "anthropic" | "cerebras"; +export type ModelProvider = "openai" | "anthropic" | "cerebras" | "braintrust"; export type ClientOptions = OpenAIClientOptions | AnthropicClientOptions; From 8020598674047dfb8ab49e9ed8d81adf9f19cc07 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Fri, 28 Feb 2025 20:37:31 -0800 Subject: [PATCH 2/9] support braintrust --- examples/example.ts | 7 ++++++- lib/llm/LLMProvider.ts | 39 +++++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/examples/example.ts b/examples/example.ts index 41d8d861..760e2114 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -5,15 +5,20 @@ * npx create-browser-app@latest my-browser-app */ -import { Stagehand } from "@/dist"; +import { AvailableModel, Stagehand } from "@/dist"; import StagehandConfig from "@/stagehand.config"; async function example() { const stagehand = new Stagehand({ ...StagehandConfig, + modelName: "braintrust-gpt-4o" as AvailableModel, + modelClientOptions: { + apiKey: process.env.BRAINTRUST_API_KEY, + }, }); await stagehand.init(); await stagehand.page.goto("https://docs.stagehand.dev"); + await stagehand.page.act("click the quickstart"); } (async () => { diff --git a/lib/llm/LLMProvider.ts b/lib/llm/LLMProvider.ts index 462873d7..22ebcc57 100644 --- a/lib/llm/LLMProvider.ts +++ b/lib/llm/LLMProvider.ts @@ -12,16 +12,16 @@ import { OpenAIClient } from "./OpenAIClient"; import { BraintrustClient } from "./BraintrustClient"; const modelToProviderMap: { [key in AvailableModel]: ModelProvider } = { - "gpt-4o": "braintrust", - "gpt-4o-mini": "braintrust", - "gpt-4o-2024-08-06": "braintrust", - "o1-mini": "braintrust", - "o1-preview": "braintrust", - "o3-mini": "braintrust", - "claude-3-5-sonnet-latest": "braintrust", - "claude-3-5-sonnet-20240620": "braintrust", - "claude-3-5-sonnet-20241022": "braintrust", - "claude-3-7-sonnet-20250219": "braintrust", + "gpt-4o": "openai", + "gpt-4o-mini": "openai", + "gpt-4o-2024-08-06": "openai", + "o1-mini": "openai", + "o1-preview": "openai", + "o3-mini": "openai", + "claude-3-5-sonnet-latest": "anthropic", + "claude-3-5-sonnet-20240620": "anthropic", + "claude-3-5-sonnet-20241022": "anthropic", + "claude-3-7-sonnet-20250219": "anthropic", "cerebras-llama-3.3-70b": "cerebras", "cerebras-llama-3.1-8b": "cerebras", }; @@ -60,6 +60,17 @@ export class LLMProvider { modelName: AvailableModel, clientOptions?: ClientOptions, ): LLMClient { + // Handle braintrust models first + if (modelName.startsWith("braintrust-")) { + return new BraintrustClient({ + logger: this.logger, + enableCaching: this.enableCaching, + cache: this.cache, + modelName: modelName.split("braintrust-")[1] as AvailableModel, + clientOptions, + }); + } + const provider = modelToProviderMap[modelName]; if (!provider) { throw new Error(`Unsupported model: ${modelName}`); @@ -90,14 +101,6 @@ export class LLMProvider { modelName, clientOptions, }); - case "braintrust": - return new BraintrustClient({ - logger: this.logger, - enableCaching: this.enableCaching, - cache: this.cache, - modelName, - clientOptions, - }); default: throw new Error(`Unsupported provider: ${provider}`); } From a866680529accca86d443cc10a8c5606c628312c Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sat, 1 Mar 2025 11:13:01 -0800 Subject: [PATCH 3/9] example --- examples/example.ts | 12 +++++++++++- package-lock.json | 4 ++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/example.ts b/examples/example.ts index 760e2114..fef58924 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -7,6 +7,7 @@ import { AvailableModel, Stagehand } from "@/dist"; import StagehandConfig from "@/stagehand.config"; +import { z } from "zod"; async function example() { const stagehand = new Stagehand({ @@ -15,10 +16,19 @@ async function example() { modelClientOptions: { apiKey: process.env.BRAINTRUST_API_KEY, }, + env: "LOCAL", }); await stagehand.init(); await stagehand.page.goto("https://docs.stagehand.dev"); - await stagehand.page.act("click the quickstart"); + const result = await stagehand.page.extract({ + instruction: "get the heading", + schema: z.object({ + heading: z.string().describe("The heading of the page"), + }), + useTextExtract: true, + }); + console.log(result); + await stagehand.close(); } (async () => { diff --git a/package-lock.json b/package-lock.json index 1cce442f..dc1e7db0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@browserbasehq/stagehand", - "version": "1.12.0", + "version": "1.13.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@browserbasehq/stagehand", - "version": "1.12.0", + "version": "1.13.1", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0.27.3", From a67eae79e08d78b43278e2dac7357f974622a134 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sat, 1 Mar 2025 16:52:04 -0800 Subject: [PATCH 4/9] add braintrust models to evals --- evals/initStagehand.ts | 5 +---- evals/taskConfig.ts | 2 +- examples/example.ts | 2 +- types/model.ts | 11 ++++++++++- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/evals/initStagehand.ts b/evals/initStagehand.ts index f818ff5f..11aa972d 100644 --- a/evals/initStagehand.ts +++ b/evals/initStagehand.ts @@ -61,10 +61,7 @@ export const initStagehand = async ({ logger: EvalLogger; configOverrides?: Partial; }) => { - let chosenApiKey: string | undefined = process.env.OPENAI_API_KEY; - if (modelName.startsWith("claude")) { - chosenApiKey = process.env.ANTHROPIC_API_KEY; - } + const chosenApiKey = process.env.BRAINTRUST_API_KEY; const config = { ...StagehandConfig, diff --git a/evals/taskConfig.ts b/evals/taskConfig.ts index 0031b8a8..b29e8a50 100644 --- a/evals/taskConfig.ts +++ b/evals/taskConfig.ts @@ -49,7 +49,7 @@ if (filterByEvalName && !tasksByName[filterByEvalName]) { */ const DEFAULT_EVAL_MODELS = process.env.EVAL_MODELS ? process.env.EVAL_MODELS.split(",") - : ["gpt-4o", "claude-3-5-sonnet-latest"]; + : ["braintrust-gpt-4o", "braintrust-claude-3-5-sonnet-latest"]; /** * getModelList: diff --git a/examples/example.ts b/examples/example.ts index fef58924..d516b43a 100644 --- a/examples/example.ts +++ b/examples/example.ts @@ -12,7 +12,7 @@ import { z } from "zod"; async function example() { const stagehand = new Stagehand({ ...StagehandConfig, - modelName: "braintrust-gpt-4o" as AvailableModel, + modelName: "braintrust-claude-3-7-sonnet-latest" as AvailableModel, modelClientOptions: { apiKey: process.env.BRAINTRUST_API_KEY, }, diff --git a/types/model.ts b/types/model.ts index 4c0a8e04..5399e3e5 100644 --- a/types/model.ts +++ b/types/model.ts @@ -2,7 +2,8 @@ import type { ClientOptions as AnthropicClientOptions } from "@anthropic-ai/sdk" import type { ClientOptions as OpenAIClientOptions } from "openai"; import { z } from "zod"; -export const AvailableModelSchema = z.enum([ +// Create a base schema for specific known models +const BaseModelSchema = z.enum([ "gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", @@ -17,6 +18,14 @@ export const AvailableModelSchema = z.enum([ "cerebras-llama-3.1-8b", ]); +// Create a schema that also accepts any string starting with "braintrust-" +export const AvailableModelSchema = z.union([ + BaseModelSchema, + z.string().refine((val) => val.startsWith("braintrust-"), { + message: "Braintrust models must start with 'braintrust-'", + }), +]); + export type AvailableModel = z.infer; export type ModelProvider = "openai" | "anthropic" | "cerebras" | "braintrust"; From f8c1394c96bcd86b4d966eb60f5c4354184dbf2c Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sun, 2 Mar 2025 15:41:31 -0800 Subject: [PATCH 5/9] changeset --- .changeset/shy-poets-run.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/shy-poets-run.md diff --git a/.changeset/shy-poets-run.md b/.changeset/shy-poets-run.md new file mode 100644 index 00000000..8387bade --- /dev/null +++ b/.changeset/shy-poets-run.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": minor +--- + +add support for braintrust ai proxy From 69e02b41b0a6b3ac929e996af141392f5df2f1eb Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sun, 2 Mar 2025 15:57:09 -0800 Subject: [PATCH 6/9] evals baby --- evals/taskConfig.ts | 12 +++++++++++- evals/tasks/allrecipes.ts | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/evals/taskConfig.ts b/evals/taskConfig.ts index b29e8a50..2e194ff5 100644 --- a/evals/taskConfig.ts +++ b/evals/taskConfig.ts @@ -49,7 +49,17 @@ if (filterByEvalName && !tasksByName[filterByEvalName]) { */ const DEFAULT_EVAL_MODELS = process.env.EVAL_MODELS ? process.env.EVAL_MODELS.split(",") - : ["braintrust-gpt-4o", "braintrust-claude-3-5-sonnet-latest"]; + : [ + "braintrust-gpt-4o", + "braintrust-gpt-4.5-preview", + "braintrust-gpt-4o-mini", + "braintrust-claude-3-5-sonnet-latest", + "braintrust-claude-3-7-sonnet-latest", + "braintrust-gemini-2.0-flash", + "braintrust-llama-3.3-70b-versatile", + "braintrust-llama-3.1-8b-instant", + "braintrust-deepseek-r1-distill-llama-70b", + ]; /** * getModelList: diff --git a/evals/tasks/allrecipes.ts b/evals/tasks/allrecipes.ts index 6da2bf8d..aa79321a 100644 --- a/evals/tasks/allrecipes.ts +++ b/evals/tasks/allrecipes.ts @@ -22,7 +22,7 @@ export const allrecipes: EvalFunction = async ({ action: 'Type "chocolate chip cookies" in the search bar', }); await stagehand.page.act({ - action: "hit enter", + action: "click search", }); const recipeDetails = await stagehand.page.extract({ From b795865f677927e5b6a6b124c92af469c8f3cc88 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sun, 2 Mar 2025 16:53:01 -0800 Subject: [PATCH 7/9] pls --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be50b0af..bb4ef32c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ on: - unlabeled env: - EVAL_MODELS: "gpt-4o,gpt-4o-mini,claude-3-5-sonnet-latest" + EVAL_MODELS: "braintrust-gpt-4o,braintrust-gpt-4.5-preview,braintrust-gpt-4o-mini,braintrust-claude-3-5-sonnet-latest,braintrust-claude-3-7-sonnet-latest,braintrust-gemini-2.0-flash,braintrust-llama-3.3-70b-versatile,braintrust-llama-3.1-8b-instant,braintrust-deepseek-r1-distill-llama-70b" EVAL_CATEGORIES: "observe,act,combination,extract,text_extract" concurrency: From 9cc46ee360da908e87f09a6541ae088560d4eb5e Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sun, 2 Mar 2025 16:57:27 -0800 Subject: [PATCH 8/9] only use braintrust api key --- .github/workflows/ci.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb4ef32c..5a2e847b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,8 +151,7 @@ jobs: github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} HEADLESS: true @@ -185,8 +184,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 40 env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} @@ -234,8 +231,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 25 env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} @@ -302,8 +297,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 50 env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} @@ -387,8 +380,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 120 env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} @@ -456,8 +447,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 60 env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }} From 5b83692413d47623580d4f4cc575008287f38f62 Mon Sep 17 00:00:00 2001 From: Anirudh Kamath Date: Sun, 2 Mar 2025 17:05:50 -0800 Subject: [PATCH 9/9] working locally --- .../deterministic/tests/local/create.test.ts | 34 +++++++++---------- evals/initStagehand.ts | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/evals/deterministic/tests/local/create.test.ts b/evals/deterministic/tests/local/create.test.ts index 81f91c5a..fdcd1ba4 100644 --- a/evals/deterministic/tests/local/create.test.ts +++ b/evals/deterministic/tests/local/create.test.ts @@ -14,9 +14,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, }); await stagehand.init(); @@ -38,9 +38,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, localBrowserLaunchOptions: { userDataDir: customUserDataDir, @@ -66,9 +66,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, localBrowserLaunchOptions: { viewport: customViewport, @@ -105,9 +105,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, localBrowserLaunchOptions: { cookies: testCookies, @@ -139,9 +139,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, localBrowserLaunchOptions: { geolocation: customGeolocation, @@ -151,7 +151,7 @@ test.describe("Local browser launch options", () => { await stagehand.init(); const page = await stagehand.context.newPage(); - await page.goto("https://example.com"); + await page.goto("https://docs.stagehand.dev"); const location = await page.evaluate(() => { return new Promise((resolve) => { @@ -180,9 +180,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, localBrowserLaunchOptions: { locale: "ja-JP", @@ -192,7 +192,7 @@ test.describe("Local browser launch options", () => { await stagehand.init(); const page = await stagehand.context.newPage(); - await page.goto("https://example.com"); + await page.goto("https://docs.stagehand.dev"); const { locale, timezone } = await page.evaluate(() => ({ locale: navigator.language, @@ -216,9 +216,9 @@ test.describe("Local browser launch options", () => { debugDom: true, domSettleTimeoutMs: 30_000, enableCaching: true, - modelName: "gpt-4o", + modelName: "braintrust-gpt-4o", modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, localBrowserLaunchOptions: { recordVideo: { @@ -230,7 +230,7 @@ test.describe("Local browser launch options", () => { await stagehand.init(); const page = await stagehand.context.newPage(); - await page.goto("https://example.com"); + await page.goto("https://docs.stagehand.dev"); await stagehand.close(); const videos = fs.readdirSync(videoDir); diff --git a/evals/initStagehand.ts b/evals/initStagehand.ts index 11aa972d..9e9f035c 100644 --- a/evals/initStagehand.ts +++ b/evals/initStagehand.ts @@ -31,9 +31,9 @@ const StagehandConfig = { headless: false, enableCaching, domSettleTimeoutMs: 30_000, - modelName: "gpt-4o", // default model, can be overridden by initStagehand arguments + modelName: "braintrust-gpt-4o", // default model, can be overridden by initStagehand arguments modelClientOptions: { - apiKey: process.env.OPENAI_API_KEY, + apiKey: process.env.BRAINTRUST_API_KEY, }, logger: (logLine: LogLine) => console.log(`[stagehand::${logLine.category}] ${logLine.message}`),