diff --git a/README.md b/README.md index d70fc2e..70b6150 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,67 @@ -# Stagehand Java API Library - - - -[](https://central.sonatype.com/artifact/com.browserbase.api/stagehand-java/0.6.0) -[](https://javadoc.io/doc/com.browserbase.api/stagehand-java/0.6.0) - - - -The Stagehand Java SDK provides convenient access to the [Stagehand REST API](https://docs.stagehand.dev) from applications written in Java. - -It is generated with [Stainless](https://www.stainless.com/). - -## MCP Server - -Use the Stagehand MCP Server to enable AI assistants to interact with this API, allowing them to explore endpoints, make test requests, and use documentation to help integrate this SDK into your application. - -[](https://cursor.com/en-US/install-mcp?name=stagehand-mcp&config=eyJjb21tYW5kIjoibnB4IiwiYXJncyI6WyIteSIsInN0YWdlaGFuZC1tY3AiXX0) -[](https://vscode.stainless.com/mcp/%7B%22name%22%3A%22stagehand-mcp%22%2C%22command%22%3A%22npx%22%2C%22args%22%3A%5B%22-y%22%2C%22stagehand-mcp%22%5D%7D) - -> Note: You may need to set environment variables in your MCP client. - - - -The REST API documentation can be found on [docs.stagehand.dev](https://docs.stagehand.dev). Javadocs are available on [javadoc.io](https://javadoc.io/doc/com.browserbase.api/stagehand-java/0.6.0). - - +
+
+ The AI Browser Automation Framework
+ Read the Docs
+
+If you're looking for other languages, you can find them + here +
+ + + +## What is Stagehand? + +Stagehand is a browser automation framework used to control web browsers with natural language and code. By combining the power of AI with the precision of code, Stagehand makes web automation flexible, maintainable, and actually reliable. + +## Why Stagehand? + +Most existing browser automation tools either require you to write low-level code in a framework like Selenium, Playwright, or Puppeteer, or use high-level agents that can be unpredictable in production. By letting developers choose what to write in code vs. natural language (and bridging the gap between the two) Stagehand is the natural choice for browser automations in production. + +1. **Choose when to write code vs. natural language**: use AI when you want to navigate unfamiliar pages, and use code when you know exactly what you want to do. + +2. **Go from AI-driven to repeatable workflows**: Stagehand lets you preview AI actions before running them, and also helps you easily cache repeatable actions to save time and tokens. + +3. **Write once, run forever**: Stagehand's auto-caching combined with self-healing remembers previous actions, runs without LLM inference, and knows when to involve AI whenever the website changes and your automation breaks. ## Installation @@ -81,8 +118,6 @@ import java.util.Map; import java.util.Optional; public class Main { - private static final String SDK_VERSION = "3.0.6"; - public static void main(String[] args) { // Create client using environment variables: // BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY @@ -92,8 +127,6 @@ public class Main { SessionStartResponse startResponse = client.sessions().start( SessionStartParams.builder() .modelName("openai/gpt-5-nano") - .xLanguage(SessionStartParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -102,14 +135,10 @@ public class Main { try { // Navigate to a webpage - // frameId is required - use empty string for the main frame client.sessions().navigate( SessionNavigateParams.builder() .id(sessionId) .url("https://news.ycombinator.com") - .frameId("") - .xLanguage(SessionNavigateParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); System.out.println("Navigated to Hacker News"); @@ -119,8 +148,6 @@ public class Main { SessionObserveParams.builder() .id(sessionId) .instruction("find the link to view comments for the top post") - .xLanguage(SessionObserveParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -143,8 +170,6 @@ public class Main { SessionActParams.builder() .id(sessionId) .input(action) - .xLanguage(SessionActParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); System.out.println("Act completed: " + actResponse.data().result().message()); @@ -168,8 +193,6 @@ public class Main { ))) .putAdditionalProperty("required", JsonValue.from(List.of("commentText"))) .build()) - .xLanguage(SessionExtractParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -204,8 +227,6 @@ public class Main { )) .cua(false) .build()) - .xLanguage(SessionExecuteParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -217,8 +238,6 @@ public class Main { client.sessions().end( SessionEndParams.builder() .id(sessionId) - .xLanguage(SessionEndParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); System.out.println("Session ended"); diff --git a/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java b/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java index f5b98ae..e2c7a9f 100644 --- a/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java +++ b/stagehand-java-example/src/main/java/com/stagehand/api/example/Main.java @@ -29,10 +29,6 @@ * - MODEL_API_KEY: Your OpenAI API key */ public class Main { - - // SDK version for API compatibility (matches TypeScript SDK v3) - private static final String SDK_VERSION = "3.0.6"; - public static void main(String[] args) { // Create client using environment variables // BROWSERBASE_API_KEY, BROWSERBASE_PROJECT_ID, MODEL_API_KEY @@ -42,8 +38,6 @@ public static void main(String[] args) { SessionStartResponse startResponse = client.sessions().start( SessionStartParams.builder() .modelName("openai/gpt-5-nano") - .xLanguage(SessionStartParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -56,9 +50,6 @@ public static void main(String[] args) { SessionNavigateParams.builder() .id(sessionId) .url("https://news.ycombinator.com") - .frameId("") // Empty string for main frame - .xLanguage(SessionNavigateParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); System.out.println("Navigated to Hacker News"); @@ -68,8 +59,6 @@ public static void main(String[] args) { SessionObserveParams.builder() .id(sessionId) .instruction("find the link to view comments for the top post") - .xLanguage(SessionObserveParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -91,8 +80,6 @@ public static void main(String[] args) { SessionActParams.builder() .id(sessionId) .input(action) - .xLanguage(SessionActParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); System.out.println("Act completed: " + actResponse.data().result().message()); @@ -117,8 +104,6 @@ public static void main(String[] args) { ))) .putAdditionalProperty("required", JsonValue.from(List.of("commentText"))) .build()) - .xLanguage(SessionExtractParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); @@ -161,8 +146,6 @@ public static void main(String[] args) { )) .cua(false) .build()) - .xLanguage(SessionExecuteParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build(), RequestOptions.builder().timeout(Duration.ofMinutes(5)).build() ); @@ -176,8 +159,6 @@ public static void main(String[] args) { client.sessions().end( SessionEndParams.builder() .id(sessionId) - .xLanguage(SessionEndParams.XLanguage.TYPESCRIPT) - .xSdkVersion(SDK_VERSION) .build() ); System.out.println("Session ended");