Skip to content

Commit

Permalink
add cli to transcribe audio from videos
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Jan 9, 2025
1 parent 16499aa commit 2a0ad04
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 10 deletions.
29 changes: 29 additions & 0 deletions docs/src/content/docs/reference/cli/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,35 @@ Options:
-h, --help display help for command
```

## `audio`

```
Usage: genaiscript audio [options] [command]
Audio tasks
Options:
-h, --help display help for command
Commands:
transcode <file> Transcode video/audio file
help [command] display help for command
```

### `audio transcode`

```
Usage: genaiscript audio transcode [options] <file>
Transcode video/audio file
Arguments:
file Audio or video file to transcode
Options:
-h, --help display help for command
```

## `retrieval`

```
Expand Down
11 changes: 11 additions & 0 deletions packages/cli/src/audio.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { writeFile } from "node:fs/promises"
import { convertToAudioBlob } from "../../core/src/ffmpeg"

export async function transcodeFile(file: string, options: { force: boolean }) {
const { force } = options || {}
const res = await convertToAudioBlob(file, { forceConversion: force })

const fn = file + ".wav"
console.log(`transcoded file to ${fn}`)
await writeFile(fn, Buffer.from(await res.arrayBuffer()))
}
8 changes: 8 additions & 0 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import { CORE_VERSION, GITHUB_REPO } from "../../core/src/version" // Core versi
import { logVerbose } from "../../core/src/util" // Utility logging
import { semverSatisfies } from "../../core/src/semver" // Semantic version checking
import { convertFiles } from "./convert"
import { transcodeFile } from "./audio"

/**
* Main function to initialize and run the CLI.
Expand Down Expand Up @@ -329,6 +330,13 @@ export async function cli() {
.argument("[name]", "Name of the cache, tests")
.action(cacheClear) // Action to clear cache

const audio = program.command("audio").description("Audio tasks")
audio
.command("transcode")
.description("Transcode video/audio file")
.argument("<file>", "Audio or video file to transcode")
.action(transcodeFile)

// Define 'retrieval' command group for RAG support
const retrieval = program
.command("retrieval")
Expand Down
45 changes: 35 additions & 10 deletions packages/core/src/ffmpeg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ import { fileTypeFromBuffer } from "file-type"
import { PassThrough } from "stream"
import { logError, logVerbose } from "./util"
import { TraceOptions } from "./trace"
import Ffmpeg from "fluent-ffmpeg"
import { lookupMime } from "./mime"
import { host } from "./host"

async function importFfmpeg() {
const ffmpeg = await import("fluent-ffmpeg")
return ffmpeg.default
async function ffmpeg() {
const m = await import("fluent-ffmpeg")
const cmd = m.default
return cmd()
.on("start", (commandLine) => logVerbose(commandLine))
.on("progress", () => process.stderr.write("."))
.on("stderr", (s) => logVerbose(s))
}

export async function convertToAudioBlob(
Expand All @@ -29,7 +32,6 @@ export async function convertToAudioBlob(
return new Promise<Blob>(async (resolve, reject) => {
const outputStream = new PassThrough()
const chunks: Buffer[] = []

outputStream.on("data", (chunk) => chunks.push(chunk))
outputStream.on("end", async () => {
const buffer = Buffer.concat(chunks)
Expand All @@ -41,14 +43,37 @@ export async function convertToAudioBlob(
logError(e)
reject(e)
})
const ffmpeg = await importFfmpeg()
ffmpeg(file)
.on("start", (commandLine) => logVerbose(commandLine))
.on("progress", () => process.stderr.write("."))
.on("stderr", (s) => logVerbose(s))
const cmd = await ffmpeg()
cmd.input(file)
.noVideo()
.input(file)
.toFormat("wav")
.on("error", reject)
.pipe(outputStream, { end: true })
})
}

export async function renderVideoFrames(file: string, timestamps: number[]) {
const frames: Buffer[] = []
return new Promise<Buffer[]>(async (resolve, reject) => {
const cmd = await ffmpeg()
cmd.input(file)
.outputOptions([
"-f image2pipe",
"-pix_fmt rgb24",
"-vcodec rawvideo",
"-vf select='eq(pict_type,I)'",
`-ss ${timestamps.join(",")}`,
])
.on("error", reject)
.on("end", () => resolve(frames))
.pipe(
new PassThrough({
transform: (chunk, _, callback) => {
frames.push(chunk)
callback()
},
})
)
})
}

0 comments on commit 2a0ad04

Please sign in to comment.