Skip to content

Commit

Permalink
video recording in browser (#988)
Browse files Browse the repository at this point in the history
* add context and page closing

* feat: ✨ add incognito mode and video recording options

* docs: ✏️ update incognito and video recording details

* feat: 🎥 add video recording support to trace functionality
  • Loading branch information
pelikhan authored Jan 8, 2025
1 parent 3d89b2b commit 6c294a1
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 12 deletions.
33 changes: 30 additions & 3 deletions docs/src/content/docs/reference/scripts/browser.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,41 @@ This function launches a new browser instance and optionally navigates to a page
const page = await host.browse(url)
```

You can configure a number of options for the browser instance:
### `incognito``

Setting `incognito: true` will create a isolated non-persistent browser context. Non-persistent browser contexts don't write any browsing data to disk.

```js
const page = await host.browse(url, { incognito: true })
```

### `recordVideo`

Playwright can record a video of each page in the browser session. You can enable it by passing the `recordVideo` option.
Recording video also implies `incognito` mode as it requires creating a new browsing context.

```js
const page = await host.browse(url, { recordVideo: true })
```

By default, the video size will be 800x600 but you can change it by passing the sizes as the `recordVideo` option.

```js
const page = await host.browse(url, {
recordVideo: { width: 500, height: 500 },
})
```

The video will be saved in a temporary directory under `.genaiscript/videos/<timestamp>/` once the page is closed.
**You need to close the page before accessing the video file.**

```js
await page.close()
const videoPath = await page.video().path()
```

The video file can be further processed using video tools.

## Locators

You can select elements on the page using the `page.get...` or `page.locator` method.
Expand Down Expand Up @@ -100,8 +129,6 @@ const screenshot = await page.screenshot() // returns a node.js Buffer
defImages(screenshot)
```

## Interacting with Elements

## (Advanced) Native Playwright APIs

The `page` instance returned is a native [Playwright Page](https://playwright.dev/docs/api/class-page) object.
Expand Down
50 changes: 45 additions & 5 deletions packages/cli/src/playwright.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import type { Browser, Page } from "playwright"
import type {
Browser,
BrowserContext,
BrowserContextOptions,
Page,
} from "playwright"
import { TraceOptions } from "../../core/src/trace"
import { logError, logVerbose } from "../../core/src/util"
import { dotGenaiscriptPath, logError, logVerbose } from "../../core/src/util"
import { runtimeHost } from "../../core/src/host"
import { PLAYWRIGHT_VERSION } from "./version"
import { ellipseUri } from "../../core/src/url"
import { PLAYWRIGHT_DEFAULT_BROWSER } from "../../core/src/constants"
import { ensureDir } from "fs-extra"

/**
* Manages browser instances using Playwright, including launching,
Expand All @@ -13,6 +19,7 @@ import { PLAYWRIGHT_DEFAULT_BROWSER } from "../../core/src/constants"
*/
export class BrowserManager {
private _browsers: Browser[] = [] // Stores active browser instances
private _contexts: BrowserContext[] = [] // Stores active browser contexts
private _pages: Page[] = [] // Stores active pages

constructor() {}
Expand Down Expand Up @@ -77,9 +84,11 @@ export class BrowserManager {
*/
async stopAndRemove() {
const browsers = this._browsers.slice(0)
const contexts = this._contexts.slice(0)
const pages = this._pages.slice(0)

this._browsers = []
this._contexts = []
this._pages = []

// Close all active pages
Expand All @@ -94,6 +103,15 @@ export class BrowserManager {
}
}

for (const context of contexts) {
try {
logVerbose(`browsers: closing context`)
await context.close()
} catch (e) {
logError(e)
}
}

// Close all active browsers
for (const browser of browsers) {
try {
Expand All @@ -117,19 +135,41 @@ export class BrowserManager {
url: string,
options?: BrowseSessionOptions & TraceOptions
): Promise<BrowserPage> {
const { trace, incognito, timeout, ...rest } = options || {}
const { trace, incognito, timeout, recordVideo, ...rest } =
options || {}

logVerbose(`browsing ${ellipseUri(url)}`)
const browser = await this.launchBrowser(options)
let page: Page

// Open a new page in incognito mode if specified
if (incognito) {
const context = await browser.newContext(rest)
if (incognito || recordVideo) {
const options = { ...rest } as BrowserContextOptions
if (recordVideo) {
const dir = dotGenaiscriptPath(
"videos",
`${new Date().toISOString().replace(/[:.]/g, "-")}`
)
await ensureDir(dir)
trace?.itemValue(`video dir`, dir)
options.recordVideo = { dir }
if (typeof recordVideo === "object")
options.recordVideo.size = recordVideo
}
const context = await browser.newContext(options)
this._contexts.push(context)
page = await context.newPage()
} else {
page = await browser.newPage(rest)
}
page.on("close", async () => {
const video = page.video()
if (video) {
const p = await video.path()
if (p) trace?.video(`video recording of ${page.url()}`, p)
}
})
this._pages.push(page)

// Set page timeout if specified
if (timeout !== undefined) page.setDefaultTimeout(timeout)
Expand Down
16 changes: 16 additions & 0 deletions packages/core/src/trace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ import { ellipse, toStringList } from "./util"
import { estimateTokens } from "./tokens"
import { renderWithPrecision } from "./precision"
import { fenceMD } from "./mkmd"
import { HTMLEscape } from "./html"
import { resolve } from "node:path"
import { pathToFileURL } from "node:url"
import { dedent } from "./indent"

export class TraceChunkEvent extends Event {
constructor(readonly chunk: string) {
Expand Down Expand Up @@ -122,6 +126,18 @@ ${this.toResultIcon(success, "")}${title}
}
}

video(name: string, filepath: string, alt?: string) {
const url = pathToFileURL(resolve(filepath))
this.appendContent(
dedent`
- ${name}
<video src="${url.href}" title="${HTMLEscape(name)}" aria-label="${HTMLEscape(alt || name)}" controls="true"></video>
`
)
}

details(
title: string,
body: string | object,
Expand Down
23 changes: 22 additions & 1 deletion packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2958,6 +2958,14 @@ interface BrowseSessionOptions extends BrowserOptions, TimeoutOptions {
* @link https://playwright.dev/docs/api/class-browser#browser-new-context-option-java-script-enabled
*/
javaScriptEnabled?: boolean

/**
* Enable recording video for all pages. Implies incognito mode.
*/
recordVideo?: boolean | {
width: number
height: number
}
}

interface TimeoutOptions {
Expand Down Expand Up @@ -3234,6 +3242,13 @@ interface BrowseResponse {

interface BrowserJSHandle {}

interface BrowserVideo {
/**
* Returns the video path once the page is closed.
*/
path(): Promise<string>
}

/**
* A playwright Page instance
* @link https://playwright.dev/docs/api/class-page
Expand Down Expand Up @@ -3283,7 +3298,8 @@ interface BrowserPage extends BrowserLocatorSelector {
locator(selector: string): BrowserLocator

/**
* Closes the browser page, context and other resources
* Closes the browser page, context and other resources.
* If video recording is enabled, the video will be saved at this time.
*/
close(): Promise<void>

Expand All @@ -3305,6 +3321,11 @@ interface BrowserPage extends BrowserLocatorSelector {
selector: string,
arg?: any
): Promise<BrowserJSHandle>

/**
* Video object associated with this page, if `recordVideo` option is enabled.
*/
video(): BrowserVideo | null
}

interface ShellSelectOptions {}
Expand Down
4 changes: 2 additions & 2 deletions packages/sample/genaisrc/browse-text.genai.mts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
script({
model: "gpt-3.5-turbo",
model: "small",
group: "browser",
parameters: {
headless: {
Expand All @@ -9,7 +9,7 @@ script({
},
},
})
const { headless } = env.vars
const { headless, recordVideo } = env.vars
const page = await host.browse(
"https://github.com/microsoft/genaiscript/blob/main/packages/sample/src/penguins.csv",
{ headless }
Expand Down
13 changes: 13 additions & 0 deletions packages/sample/genaisrc/browse-video.genai.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { delay} from "genaiscript/runtime"
script({
model: "small",
group: "browser",
})
const page = await host.browse("https://microsoft.github.io/genaiscript/", {
headless: true,
recordVideo: true,
})
await delay(1000)
await page.close()
const video = await page.video().path()
console.log(`video ${video}`)
2 changes: 1 addition & 1 deletion packages/sample/genaisrc/browse-vision.genai.mts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const { error, fences } = await runPrompt(
_.defImages(screenshot)
_.$`Extract the text in the request image. Format the output as a CSV table. If you cannot find text in the image, return 'no data'.`
},
{ model: "openai:gpt-4o" }
{ model: "large" }
)
if (error) throw error
const csv = fences.find((f) => f.language == "csv")
Expand Down

0 comments on commit 6c294a1

Please sign in to comment.