feat: add m4b-extractor package and script for audiobook chapter extr…

…action
BCNelson · Jan 3, 2025 · 209dc67 · 209dc67
1 parent 8a5b2f8
commit 209dc67
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 0 deletions.
diff --git a/home-manager/bcnelson/golf.nix b/home-manager/bcnelson/golf.nix
@@ -7,5 +7,7 @@
 
   home.packages = [
     pkgs.yt-dlp
+    pkgs.libation
+    pkgs.mb4-extractor
   ];
 }
diff --git a/pkgs/default.nix b/pkgs/default.nix
@@ -5,4 +5,5 @@ pkgs: {
   mdns-reflector = pkgs.callPackage ./mdns-reflector.nix { };
   install-system = pkgs.callPackage ./install-system { };
   dolphin-shred = pkgs.callPackage ./dolphin-shred.nix { };
+  mb4-extractor = pkgs.callPackage ./m4b-extractor { };
 }
diff --git a/pkgs/m4b-extractor/default.nix b/pkgs/m4b-extractor/default.nix
@@ -0,0 +1,5 @@
+{ writeShellApplication, ffmpeg, jq, atomicparsley, coreutils }: writeShellApplication {
+    name = "m4b-extractor";
+    runtimeInputs = [ ffmpeg jq atomicparsley coreutils ];
+    text = builtins.readFile ./m4b-chapter-extractor.sh;
+}
diff --git a/pkgs/m4b-extractor/m4b-chapter-extractor.sh b/pkgs/m4b-extractor/m4b-chapter-extractor.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+set -u
+
+# script to convert m4b (audiobook) files with embedded chapted (for eg. converted from Audbile) into individual chapter files
+
+# required: ffmpeg; jq (json interpreter) & AtomicParsley (to embed pictures and add additional metadata to m4a/m4b AAC files)
+
+# discover the file type (extension) of the input file
+if [ -z "${1+x}" ]; then
+  echo "No input file provided."
+  exit 1
+fi
+
+if [ -z "${1+x}" ]; then
+  echo "No input file provided."
+  exit 1
+fi 
+
+ext=${1##*.}
+echo "extension: $ext"
+# all files / folders are named based on the "shortname" of the input file
+shortname=$(basename "$1" ".$ext")
+picture=$shortname.jpg
+chapterdata=$shortname.dat
+metadata=$shortname.tmp
+echo "shortname: $shortname"
+
+extension="${1##*.}"
+
+forcemp3=0
+
+if [ "$extension" == "mp3" ]; then
+  forcemp3=1
+fi
+
+# if an output type has been given on the command line, set parameters (used in ffmpeg command later)
+if [[  ${2+x} = "mp3"  ||  $forcemp3 = 1  ]] ; then
+  outputtype="mp3"
+  codec="libmp3lame"
+  echo mp3
+elif [[ ${2+x} = "m4a" ]]; then
+  outputtype="m4a"
+  codec="copy"
+else
+  outputtype="m4b"
+  codec="copy"
+fi
+echo "outputtype: |$outputtype|"
+
+# if it doesn't already exist, create a json file containing the chapter breaks (you can edit this file if you want chapters to be named rather than simply "Chapter 1", etc that Audible use)
+[ ! -e "$chapterdata" ] && ffprobe -loglevel error \
+            -i "$1" -print_format json -show_chapters -loglevel error -sexagesimal \
+            >"$chapterdata"
+read -rp "Now edit the file $chapterdata if required. Press ENTER to continue."
+# comment out above if you don't want the script to pause!
+
+# read the chapters into arrays for later processing
+readarray -t id <<< "$(jq -r '.chapters[].id' "$chapterdata")"
+readarray -t start <<< "$(jq -r '.chapters[].start_time' "$chapterdata")"
+readarray -t end <<< "$(jq -r '.chapters[].end_time' "$chapterdata")"
+readarray -t title <<< "$(jq -r '.chapters[].tags.title' "$chapterdata")"
+
+# create a ffmpeg metadata file to extract addition metadata lost in splitting files - deleted afterwards
+ffmpeg -loglevel error -i "$1" -f ffmetadata "$metadata"
+artist_sort=$(grep -m 1 ^sort_artist "$metadata")
+artist_sort=${artist_sort#*=}
+album_sort=$(grep -m 1 ^sort_album "$metadata")
+album_sort=${album_sort#*=}
+rm "$metadata"
+
+# create directory for the output
+mkdir -p "$shortname"
+echo -e "\fID\tStart Time\tEnd Time\tTitle\t\tFilename"
+for i in "${!id[@]}"; do
+  trackno="(($i+1))"
+  # set the name for output - currently in format <bookname>/<tranck number>
+  outname="$shortname/$(printf "%02d" "$trackno"). $shortname - ${title[$i]}.$outputtype"
+  #outname=$(sed -e 's/[^A-Za-z0-9._- ]/_/g' <<< $outname)
+  outname="${outname//:/_}"
+  echo -e "${id[$i]}\t${start[$i]}\t${end[$i]}\t${title[$i]}\n\t\t$(basename "$outname")"
+  ffmpeg -loglevel error -i "$1" -vn -c $codec \
+            -ss "${start[$i]}" -to "${end[$i]}" \
+            -metadata title="${title[$i]}" \
+            -metadata track="$trackno" \
+            -map_metadata 0 -id3v2_version 3 \
+            "$outname"
+  [[ $outputtype == m4* ]] && AtomicParsley "$outname" \
+            --artwork "$picture" --overWrite \
+            --sortOrder artist "$artist_sort" \
+            --sortOrder album "$album_sort" \
+            > /dev/null
+done