diff --git a/README.md b/README.md index 1a143f1..0855f0b 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,10 @@ const DEFAULT_MARKDOWN_OPTIONS = { }; ``` +#### `--unsafe` + +Disables some [JSDOM validations](https://github.com/jsdom/jsdom/blob/main/lib/jsdom/living/helpers/validate-names.js) that may throw an error when parsing invalid HTML pages (See [#177](https://github.com/danburzo/percollate/issues/177)). + ## Recipes ### Basic bundling diff --git a/cli.js b/cli.js index afdb152..32ab0c2 100755 --- a/cli.js +++ b/cli.js @@ -137,6 +137,9 @@ Commmon options: --inline Embed images inline with the content. Fetches and converts images to Base64 'data:' URLs. + --unsafe Disable some validations in JSDOM to suppress some + errors thrown for invalid HTML inputs. + Options to disable features: --no-amp Don't prefer the AMP version of the web page. diff --git a/index.js b/index.js index 2bf7ada..1ed4a2f 100755 --- a/index.js +++ b/index.js @@ -8,6 +8,8 @@ import pup from 'puppeteer'; import archiver from 'archiver'; import fetch from 'node-fetch'; import { JSDOM } from 'jsdom'; +import validateNames from 'jsdom/lib/jsdom/living/helpers/validate-names.js'; + import nunjucks from 'nunjucks'; import css from 'css'; import { Readability } from '@mozilla/readability'; @@ -229,6 +231,18 @@ async function cleanup(url, options) { ? url : 'file://' + path.resolve(url); + /* + Disable some validations in JSDOM to allow + some invalid HTML files to be processed correctly. + + Currently done via monkey-patching. See: + https://github.com/danburzo/percollate/issues/177 + */ + if (options.unsafe) { + // make no-op + validateNames.name = () => {}; + } + const dom = new JSDOM(buffer, { contentType, url: final_url