From 3bfc3cf1a19eadad0fcb54a36edd3bbb4d8f3cbe Mon Sep 17 00:00:00 2001 From: Andrew Seier Date: Sat, 23 Nov 2024 18:35:28 -0800 Subject: [PATCH] =?UTF-8?q?Introduce=20=E2=80=9CUnforgivingHtml=E2=80=9D?= =?UTF-8?q?=20parser.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Goals of the parser: * Tighten control over things like double-quotes & closing tags. * Improve error messaging for malformed markup. * Improve performance. --- test/forgiving.js | 275 +++++++ test/test-template-engine.js | 471 ++++++------ ts/x-template.d.ts.map | 2 +- x-template.js | 1343 +++++++++++++++++++++++++++------- 4 files changed, 1612 insertions(+), 479 deletions(-) create mode 100644 test/forgiving.js diff --git a/test/forgiving.js b/test/forgiving.js new file mode 100644 index 0000000..a1441bc --- /dev/null +++ b/test/forgiving.js @@ -0,0 +1,275 @@ +// This is just kept here as an example alternative to our more “unforgiving” +// parsing solution. In particular, it could be interesting to try and keep the +// interfaces to both “forgiving” and “unforgiving” as similar as possible to +// enable us to show performance-testing deltas in the future. +/** Forgiving HTML parser which leverages innerHTML. */ +export default class Forgiving { + // Special markers added to markup enabling discovery post-instantiation. + static #NEXT_MARKER = 'forgiving-next:'; // The ":" helps for debugging. + static #CONTENT_MARKER = 'forgiving-content'; + + // Types of bindings that we can have. + static #ATTRIBUTE = 'attribute'; + static #BOOLEAN = 'boolean'; + static #DEFINED = 'defined'; + static #PROPERTY = 'property'; + + // TODO: Could be more forgiving here! + // Patterns to find special edges in original html strings. + static #OPEN_REGEX = /<[a-z][a-z0-9-]*(?=\s)/g; + static #STEP_REGEX = /(?:\s+[a-z][a-z0-9-]*(?=[\s>])|\s+[a-z][a-zA-Z0-9-]*="[^"]*")+/y; + static #ATTRIBUTE_OR_PROPERTY_REGEX = /\s+(?:(?\?{0,2})?(?([a-z][a-zA-Z0-9-]*))|\.(?[a-z][a-zA-Z0-9_]*))="$/y; + static #CLOSE_REGEX = />/g; + + // Walk through each string from our tagged template function “strings” array + // in a stateful way so that we know what kind of bindings are implied at + // each interpolated value. + static #exhaustString(string, state, context) { + if (!state.inside) { + // We're outside the opening tag. + Forgiving.#OPEN_REGEX.lastIndex = state.index; + const openMatch = Forgiving.#OPEN_REGEX.exec(string); + if (openMatch) { + state.inside = true; + state.index = Forgiving.#OPEN_REGEX.lastIndex; + state.lastOpenContext = context; + state.lastOpenIndex = openMatch.index; + Forgiving.#exhaustString(string, state, context); + } + } else { + // We're inside the opening tag. + Forgiving.#STEP_REGEX.lastIndex = state.index; + if (Forgiving.#STEP_REGEX.test(string)) { + state.index = Forgiving.#STEP_REGEX.lastIndex; + } + Forgiving.#CLOSE_REGEX.lastIndex = state.index; + if (Forgiving.#CLOSE_REGEX.test(string)) { + state.inside = false; + state.index = Forgiving.#CLOSE_REGEX.lastIndex; + Forgiving.#exhaustString(string, state, context); + } + } + } + + // Flesh out an html string from our tagged template function “strings” array + // and add special markers that we can detect later, after instantiation. + // + // E.g., the user might have passed this interpolation: + // + //
+ // ${content} + //
+ // + // … and we would instrument it as follows: + // + //
+ // + //
+ // + static #createHtml(language, strings) { + const keyToKeyState = new Map(); + const htmlStrings = []; + const state = { inside: false, index: 0, lastOpenContext: 0, lastOpenIndex: 0 }; + // We don’t have to test the last string since it is already on the other + // side of the last interpolation, by definition. Hence the “- 1” below. + // Note that this final string is added just after the loop completes. + for (let iii = 0; iii < strings.length - 1; iii++) { + // The index may be set to “1” here, which indicates we are slicing off a + // trailing quote character from a attribute-or-property match. After + // slicing, we reset the index to zero so regular expressions know to + // match from the start in “exhaustString”. + let string = strings[iii]; + if (state.index !== 0) { + string = string.slice(state.index); + state.index = 0; + } + Forgiving.#exhaustString(string, state, iii); + if (state.inside) { + Forgiving.#ATTRIBUTE_OR_PROPERTY_REGEX.lastIndex = state.index; + const match = Forgiving.#ATTRIBUTE_OR_PROPERTY_REGEX.exec(string); + if (match) { + const { questions, attribute, property } = match.groups; + if (attribute) { + // We found a match like this: html``. + // … or this: html`
`. + // … or this: html`
`. + // Syntax is 3-5 characters: `${questions}${attribute}="` + `"`. + let syntax = 3; + let kind = Forgiving.#ATTRIBUTE; + switch (questions) { + case '??': kind = Forgiving.#DEFINED; syntax = 5; break; + case '?': kind = Forgiving.#BOOLEAN; syntax = 4; break; + } + string = string.slice(0, -syntax - attribute.length); + const key = state.lastOpenContext; + const keyState = Forgiving.#setIfMissing(keyToKeyState, key, () => ({ index: state.lastOpenIndex, items: [] })); + keyState.items.push(`${kind}=${attribute}`); + } else { + // We found a match like this: html`
`. + // Syntax is 4 characters: `.${property}="` + `"`. + const syntax = 4; + const kind = Forgiving.#PROPERTY; + string = string.slice(0, -syntax - property.length); + const key = state.lastOpenContext; + const keyState = Forgiving.#setIfMissing(keyToKeyState, key, () => ({ index: state.lastOpenIndex, items: [] })); + keyState.items.push(`${kind}=${property}`); + } + state.index = 1; // Accounts for an expected quote character next. + } else { + // It’s “on or after” because interpolated JS can span multiple lines. + const handled = [...strings.slice(0, iii), string.slice(0, state.index)].join(''); + const lineCount = handled.split('\n').length; + throw new Error(`Found invalid template on or after line ${lineCount} in substring \`${string}\`. Failed to parse \`${string.slice(state.index)}\`.`); + } + } else { + // Assume it’s a match like this: html`
${value}
`. + string += ``; + state.index = 0; // No characters to account for. Reset to zero. + } + htmlStrings[iii] = string; + } + // Again, there might be a quote we need to slice off here still. + let lastString = strings.at(-1); + if (state.index > 0) { + lastString = lastString.slice(state.index); + } + htmlStrings.push(lastString); + for (const [iii, { index, items }] of keyToKeyState.entries()) { + const comment = ``; + const htmlString = htmlStrings[iii]; + htmlStrings[iii] = `${htmlString.slice(0, index)}${comment}${htmlString.slice(index)}`; + } + const html = htmlStrings.join(''); + return language === Forgiving.svg + ? `${html}` + : html; + } + + static #createFragment(language, strings) { + const template = document.createElement('template'); + const html = Forgiving.#createHtml(language, strings); + template.innerHTML = html; + return template.content; + } + + // Walk through our fragment that we added special markers to and notify + // integrator when we hit target “paths”. The integrator can use this with + // a subsequent clone of the fragment to establish “targets”. And, while we + // walk, clean up our bespoke markers. + // Note that we are always walking the interpolated strings and the resulting, + // instantiated DOM _in the same depth-first manner_. This means that the + // ordering is fairly reliable. + // + // For example, we walk this structure: + // + //
+ // + //
+ // + // And end up with this (which is ready to be injected into a container): + // + //
+ // + // + //
+ // + static #walkFragment( + onBoolean, + onDefined, + onAttribute, + onProperty, + onContent, + onText, + node, + nodeType = Node.DOCUMENT_FRAGMENT_NODE, + path = [], + ) { + // @ts-ignore — TypeScript doesn’t seem to understand the nodeType param. + if (nodeType === Node.ELEMENT_NODE) { + // Special case to handle elements which only allow text content (no comments). + const { localName } = node; + if ( + (localName === 'style' || localName === 'script') && + node.textContent.includes(Forgiving.#CONTENT_MARKER) + ) { + throw new Error(`Interpolation of <${localName}> tags is not allowed.`); + } else if (localName === 'textarea' || localName === 'title') { + if (node.textContent.includes(Forgiving.#CONTENT_MARKER)) { + if (node.textContent === ``) { + node.textContent = ''; + onText(path); + } else { + throw new Error(`Only basic interpolation of <${localName}> tags is allowed.`); + } + } + } + } + if (nodeType === Node.DOCUMENT_FRAGMENT_NODE || nodeType === Node.ELEMENT_NODE) { + // It’s expensive to make a copy of “childNodes”. Instead, we carefully + // manage our index as we iterate over the live collection. + const childNodes = node.childNodes; + for (let iii = 0; iii < childNodes.length; iii++) { + const childNode = childNodes[iii]; + const childNodeType = childNode.nodeType; + if (childNodeType === Node.COMMENT_NODE) { + const textContent = childNode.textContent; + if (textContent.startsWith(Forgiving.#CONTENT_MARKER)) { + childNode.textContent = ''; + const startNode = document.createComment(''); + node.insertBefore(startNode, childNode); + iii++; + onContent([...path, iii]); + } else if (textContent.startsWith(Forgiving.#NEXT_MARKER)) { + const data = textContent.slice(Forgiving.#NEXT_MARKER.length); + const items = data.split(','); + for (const item of items) { + const [binding, name] = item.split('='); + switch (binding) { + case Forgiving.#ATTRIBUTE: onAttribute(name, [...path, iii]); break; + case Forgiving.#BOOLEAN: onBoolean(name, [...path, iii]); break; + case Forgiving.#DEFINED: onDefined(name, [...path, iii]); break; + case Forgiving.#PROPERTY: onProperty(name, [...path, iii]); break; + } + } + iii--; + node.removeChild(childNode); + } + } else if (childNodeType === Node.ELEMENT_NODE) { + Forgiving.#walkFragment( + onBoolean, + onDefined, + onAttribute, + onProperty, + onContent, + onText, + childNode, + childNodeType, + [...path, iii], + ); + } + } + } + } + + // TODO: Replace with Map.prototype.getOrInsert when TC39 proposal lands. + // https://github.com/tc39/proposal-upsert + static #setIfMissing(map, key, callback) { + // Values set in this file are ALL truthy, so "get" is used (versus "has"). + let value = map.get(key); + if (!value) { + value = callback(); + map.set(key, value); + } + return value; + } + + // Languages. + static html = 'html'; + static svg = 'svg'; + + static parse(strings, onBoolean, onDefined, onAttribute, onProperty, onContent, onText, language) { + const fragment = Forgiving.#createFragment(language, strings); + Forgiving.#walkFragment(onBoolean, onDefined, onAttribute, onProperty, onContent, onText, fragment); + return fragment; + } +} diff --git a/test/test-template-engine.js b/test/test-template-engine.js index d257935..52520aa 100644 --- a/test/test-template-engine.js +++ b/test/test-template-engine.js @@ -19,6 +19,7 @@ const localMessages = [ 'Deprecated "unsafeSVG" from default templating engine interface.', 'Deprecated "repeat" from default templating engine interface.', 'Deprecated "map" from default templating engine interface.', + 'Support for the "style" tag is deprecated and will be removed in future versions.', ]; console.warn = (...args) => { // eslint-disable-line no-console if (!localMessages.includes(args[0]?.message)) { @@ -29,13 +30,17 @@ console.warn = (...args) => { // eslint-disable-line no-console }; // Simple helper for asserting thrown messages. -const assertThrows = (callback, expectedMessage) => { +const assertThrows = (callback, expectedMessage, options) => { let thrown = false; try { callback(); } catch (error) { thrown = true; - assert(error.message === expectedMessage, error.message); + if (options?.startsWith === true) { + assert(error.message.startsWith(expectedMessage), error.message); + } else { + assert(error.message === expectedMessage, error.message); + } } assert(thrown, 'no error was thrown'); }; @@ -80,6 +85,20 @@ describe('html rendering', () => { assert(container.children[0].getAttribute('foo') === `--{<&>'"}--`); }); + it('renders named html entities which require surrogate pairs', () => { + const container = document.createElement('div'); + render(container, html`
--𝕓𝕓--𝕓--
`); + assert(container.childElementCount === 1); + assert(container.children[0].textContent === `--\uD835\uDD53\uD835\uDD53--\uD835\uDD53--`); + }); + + it('renders malformed, named html entities', () => { + const container = document.createElement('div'); + render(container, html`
--&:^);--
`); + assert(container.childElementCount === 1); + assert(container.children[0].textContent === `--&:^);--`); + }); + it('renders surprisingly-accepted characters in text', () => { const container = document.createElement('div'); render(container, html`>'"&& & &
&`); @@ -654,16 +673,10 @@ describe('html rendering', () => { assert(container.querySelector('textarea').value === 'foo'); }); - it('title elements with no interpolation work', () => { + it('pre elements with optional, initial newline work', () => { const container = document.createElement('div'); - render(container, html`<em>this</em> is the “default” value`); - assert(container.querySelector('title').textContent === 'this is the “default” value'); - }); - - it('title elements with strict interpolation work', () => { - const container = document.createElement('div'); - render(container, html`${'foo'}`); - assert(container.querySelector('title').textContent === 'foo'); + render(container, html`
\n\nhi
`); + assert(container.querySelector('pre').textContent === '\nhi'); // first newline is removed }); it('renders instantiated elements as dumb text', () => { @@ -775,24 +788,12 @@ describe('html rendering', () => { #item = null; set item(value) { updates.push(`outer-${value}`); this.#item = value; } get item() { return this.#item; } - connectedCallback() { - // Prevent property shadowing by deleting before setting on connect. - const item = this.item ?? '???'; - Reflect.deleteProperty(this, 'item'); - Reflect.set(this, 'item', item); - } } customElements.define('test-depth-first-outer', TestDepthFirstOuter); class TestDepthFirstInner extends HTMLElement { #item = null; set item(value) { updates.push(`inner-${value}`); this.#item = value; } get item() { return this.#item; } - connectedCallback() { - // Prevent property shadowing by deleting before setting on connect. - const item = this.item ?? '???'; - Reflect.deleteProperty(this, 'item'); - Reflect.set(this, 'item', item); - } } customElements.define('test-depth-first-inner', TestDepthFirstInner); @@ -1099,91 +1100,62 @@ describe('html errors', () => { div { background-color: ${'red'}; } `; - const expectedMessage = 'Interpolation of + Unforgiving.#throughStyle.lastIndex = nextStringIndex; + if (Unforgiving.#throughStyle.test(string)) { + const content = string.slice(nextStringIndex, Unforgiving.#throughStyle.lastIndex - closeTagLength); + element.value.textContent = content; + } else { + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('style-interpolation'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + throw new Error(`[${errorMessagesKey}] ${errorMessage}`); } - const html = htmlStrings.join(''); - return language === Forgiving.svg - ? `${html}` - : html; - } - - static #createFragment(language, strings) { - const template = document.createElement('template'); - const html = Forgiving.#createHtml(language, strings); - template.innerHTML = html; - return template.content; - } - - // Walk through our fragment that we added special markers to and notify - // integrator when we hit target “paths”. The integrator can use this with - // a subsequent clone of the fragment to establish “targets”. And, while we - // walk, clean up our bespoke markers. - // Note that we are always walking the interpolated strings and the resulting, - // instantiated DOM _in the same depth-first manner_. This means that the - // ordering is fairly reliable. - // - // For example, we walk this structure: - // - //
- // - //
- // - // And end up with this (which is ready to be injected into a container): - // - //
- // - // - //
- // - static #walkFragment( - onBoolean, - onDefined, - onAttribute, - onProperty, - onContent, - onText, - node, - nodeType = Node.DOCUMENT_FRAGMENT_NODE, - path = [], - ) { - // @ts-ignore — TypeScript doesn’t seem to understand the nodeType param. - if (nodeType === Node.ELEMENT_NODE) { - // Special case to handle elements which only allow text content (no comments). - const { localName } = node; - if ( - (localName === 'style' || localName === 'script') && - node.textContent.includes(Forgiving.#CONTENT_MARKER) - ) { - throw new Error(`Interpolation of <${localName}> tags is not allowed.`); - } else if (localName === 'textarea' || localName === 'title') { - if (node.textContent.includes(Forgiving.#CONTENT_MARKER)) { - if (node.textContent === ``) { - node.textContent = ''; - onText(path); - } else { - throw new Error(`Only basic interpolation of <${localName}> tags is allowed.`); - } - } - } + childNodesIndex.value = path.pop(); + element.value = element.value[Unforgiving.#parentNode]; + Unforgiving.#closeTag.lastIndex = Unforgiving.#throughStyle.lastIndex; + return Unforgiving.#closeTag; + } + + static #addUnboundContent(string, stringIndex, element, childNodesIndex, nextStringIndex) { + const encoded = string.slice(stringIndex, nextStringIndex); + const decoded = Unforgiving.#replaceHtmlEntities(encoded); + element.value.appendChild(document.createTextNode(decoded)); + childNodesIndex.value += 1; + } + + static #addUnboundComment(string, stringIndex, element, childNodesIndex, nextStringIndex) { + const content = string.slice(stringIndex, nextStringIndex); + const data = content.slice(4, -3); + // https://w3c.github.io/html-reference/syntax.html#comments + if (data.startsWith('>') || data.startsWith('->') || data.includes('--') || data.endsWith('-')) { + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('malformed-comment'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + const substringMessage = `See substring \`${content}\`.`; + throw new Error(`[${errorMessagesKey}] ${errorMessage}\n${substringMessage}`); } - if (nodeType === Node.DOCUMENT_FRAGMENT_NODE || nodeType === Node.ELEMENT_NODE) { - // It’s expensive to make a copy of “childNodes”. Instead, we carefully - // manage our index as we iterate over the live collection. - const childNodes = node.childNodes; - for (let iii = 0; iii < childNodes.length; iii++) { - const childNode = childNodes[iii]; - const childNodeType = childNode.nodeType; - if (childNodeType === Node.COMMENT_NODE) { - const textContent = childNode.textContent; - if (textContent.startsWith(Forgiving.#CONTENT_MARKER)) { - childNode.textContent = ''; - const startNode = document.createComment(''); - node.insertBefore(startNode, childNode); - iii++; - onContent([...path, iii]); - } else if (textContent.startsWith(Forgiving.#NEXT_MARKER)) { - const data = textContent.slice(Forgiving.#NEXT_MARKER.length); - const items = data.split(','); - for (const item of items) { - const [binding, name] = item.split('='); - switch (binding) { - case Forgiving.#ATTRIBUTE: onAttribute(name, [...path, iii]); break; - case Forgiving.#BOOLEAN: onBoolean(name, [...path, iii]); break; - case Forgiving.#DEFINED: onDefined(name, [...path, iii]); break; - case Forgiving.#PROPERTY: onProperty(name, [...path, iii]); break; - } - } - iii--; - node.removeChild(childNode); - } - } else if (childNodeType === Node.ELEMENT_NODE) { - Forgiving.#walkFragment( - onBoolean, - onDefined, - onAttribute, - onProperty, - onContent, - onText, - childNode, - childNodeType, - [...path, iii], - ); - } + element.value.appendChild(document.createComment(data)); + childNodesIndex.value += 1; + } + + static #addBoundContent(onContent, path, element, childNodesIndex) { + element.value.append(document.createComment(''), document.createComment('')); + childNodesIndex.value += 2; + path.push(childNodesIndex.value); + onContent(path); + path.pop(); + } + + // This can only happen with a “textarea” element, currently. + static #addBoundText(onText, string, path, element, sloppyStartInterpolation) { + // If the prior match isn’t our opening tag… that’s a problem. If the next + // match isn’t our closing tag… that’s also a problem. + // Because we tightly control the end-tag format, we can predict what the + // next string’s prefix should be. + if (sloppyStartInterpolation || !string.startsWith(``)) { + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('complex-textarea-interpolation'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + throw new Error(`[${errorMessagesKey}] ${errorMessage}`); } + onText(path); + } + + static #addUnboundBoolean(string, stringIndex, element, nextStringIndex) { + const attributeName = string.slice(stringIndex, nextStringIndex); + element.value.setAttribute(attributeName, ''); + } + + static #addUnboundAttribute(string, stringIndex, element, nextStringIndex) { + const unboundAttribute = string.slice(stringIndex, nextStringIndex); + const equalsIndex = unboundAttribute.indexOf('='); + const attributeName = unboundAttribute.slice(0, equalsIndex); + const encoded = unboundAttribute.slice(equalsIndex + 2, -1); + const decoded = Unforgiving.#replaceHtmlEntities(encoded); + element.value.setAttribute(attributeName, decoded); + } + + static #addBoundBoolean(onBoolean, string, stringIndex, path, nextStringIndex) { + const boundBoolean = string.slice(stringIndex, nextStringIndex); + const equalsIndex = boundBoolean.indexOf('='); + const attributeName = boundBoolean.slice(1, equalsIndex); + onBoolean(attributeName, path); + } + + static #addBoundDefined(onDefined, string, stringIndex, path, nextStringIndex) { + const boundDefined = string.slice(stringIndex, nextStringIndex); + const equalsIndex = boundDefined.indexOf('='); + const attributeName = boundDefined.slice(2, equalsIndex); + onDefined(attributeName, path); + } + + static #addBoundAttribute(onAttribute, string, stringIndex, path, nextStringIndex) { + const boundAttribute = string.slice(stringIndex, nextStringIndex); + const equalsIndex = boundAttribute.indexOf('='); + const attributeName = boundAttribute.slice(0, equalsIndex); + onAttribute(attributeName, path); + } + + static #addBoundProperty(onProperty, string, stringIndex, path, nextStringIndex) { + const boundProperty = string.slice(stringIndex, nextStringIndex); + const equalsIndex = boundProperty.indexOf('='); + const propertyName = boundProperty.slice(1, equalsIndex); + onProperty(propertyName, path); + } + + static #validateTagName(namespace, tagName) { + switch (namespace) { + case Unforgiving.html: + if ( + tagName.indexOf('-') === -1 && + !Unforgiving.#allowedHtmlElements.has(tagName) + ) { + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('forbidden-html-element'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + const substringMessage = `The <${tagName}> html element is forbidden.`; + throw new Error(`[${errorMessagesKey}] ${errorMessage}\n${substringMessage}`); + } + break; + case Unforgiving.svg: + if (!Unforgiving.#allowedSvgElements.has(tagName)) { + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('forbidden-svg-element'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + const substringMessage = `The <${tagName}> svg element is forbidden.`; + throw new Error(`[${errorMessagesKey}] ${errorMessage}\n${substringMessage}`); + } + break; + case Unforgiving.math: + if (!Unforgiving.#allowedMathElements.has(tagName)) { + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('forbidden-math-element'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + const substringMessage = `The <${tagName}> math element is forbidden.`; + throw new Error(`[${errorMessagesKey}] ${errorMessage}\n${substringMessage}`); + } + break; } } - // TODO: Replace with Map.prototype.getOrInsert when TC39 proposal lands. - // https://github.com/tc39/proposal-upsert - static #setIfMissing(map, key, callback) { - // Values set in this file are ALL truthy, so "get" is used (versus "has"). - let value = map.get(key); - if (!value) { - value = callback(); - map.set(key, value); + static #addElement(string, stringIndex, path, element, childNodesIndex, nextStringIndex) { + const prefixedTagName = string.slice(stringIndex, nextStringIndex); + const tagName = prefixedTagName.slice(1); + const currentNamespace = element.value[Unforgiving.#namespace]; + Unforgiving.#validateTagName(currentNamespace, tagName); + let namespace; + switch (tagName) { + case 'svg': namespace = Unforgiving.svg; break; + case 'math': namespace = Unforgiving.math; break; + default: namespace = currentNamespace; break; } - return value; + const childNode = document.createElementNS(namespace, tagName); + element.value[Unforgiving.#localName] === 'template' + ? element.value.content.appendChild(childNode) + : element.value.appendChild(childNode); + childNode[Unforgiving.#localName] = tagName; + childNode[Unforgiving.#parentNode] = element.value; + childNode[Unforgiving.#namespace] = namespace; + element.value = childNode; + childNodesIndex.value += 1; + path.push(childNodesIndex.value); + } + + static #finalizeElement(strings, stringsIndex, string, stringIndex, path, element, childNodesIndex, nextStringIndex) { + const closeTag = string.slice(stringIndex, nextStringIndex); + const tagName = closeTag.slice(2, -1); + const expectedTagName = element.value[Unforgiving.#localName]; + if (tagName !== expectedTagName) { + const { parsed } = Unforgiving.#getErrorInfo(strings, stringsIndex, string, stringIndex); + const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('mismatched-closing-tag'); + const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey); + const substringMessage = `The closing tag does not match <${expectedTagName}>.`; + const parsedThroughMessage = `Your HTML was parsed through: \`${parsed}\`.`; + throw new Error(`[${errorMessagesKey}] ${errorMessage}\n${substringMessage}\n${parsedThroughMessage}`); + } + childNodesIndex.value = path.pop(); + element.value = element.value[Unforgiving.#parentNode]; } - // Languages. - static html = 'html'; - static svg = 'svg'; + static #styleDeprecationWarning() { + if (!Unforgiving.#hasWarnedAboutStyleDeprecation) { + Unforgiving.#hasWarnedAboutStyleDeprecation = true; + const error = new Error('Support for the "style" tag is deprecated and will be removed in future versions.'); + console.warn(error); // eslint-disable-line no-console + } + } + + ////////////////////////////////////////////////////////////////////////////// + // Public parsing interface ////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + + static html = 'http://www.w3.org/1999/xhtml'; + static svg = 'http://www.w3.org/2000/svg'; + static math = 'http://www.w3.org/1998/Math/MathML'; + + static parse(strings, onBoolean, onDefined, onAttribute, onProperty, onContent, onText, namespace) { + const fragment = Unforgiving.#fragment.cloneNode(false); + fragment[Unforgiving.#namespace] = namespace ??= Unforgiving.html; + + const path = []; + const childNodesIndex = { value: -1 }; // Wrapper to allow better factoring. + const element = { value: fragment }; // Wrapper to allow better factoring. + + const stringsLength = strings.length; + let stringsIndex = 0; + let string = null; + let stringLength = null; + let stringIndex = null; + let nextStringIndex = null; + let value = Unforgiving.#initial; + + while (stringsIndex < stringsLength) { + string = strings[stringsIndex]; + + Unforgiving.#validateRawString(strings.raw[stringsIndex]); + if (stringsIndex > 0) { + switch (value) { + case Unforgiving.#initial: + case Unforgiving.#boundContent: + case Unforgiving.#unboundContent: + case Unforgiving.#openTagEnd: + case Unforgiving.#closeTag: + if (element.value[Unforgiving.#localName] === 'textarea') { + // The textarea tag only accepts text, we restrict interpolation + // there. See note on “replaceable character data” in the + // following reference document: + // https://w3c.github.io/html-reference/syntax.html#text-syntax + const sloppyStartInterpolation = value !== Unforgiving.#openTagEnd; + Unforgiving.#addBoundText(onText, string, path, element, sloppyStartInterpolation); + } else { + Unforgiving.#addBoundContent(onContent, path, element, childNodesIndex); + } + value = Unforgiving.#boundContent; + nextStringIndex = value.lastIndex; + break; + } + } + + stringLength = string.length; + stringIndex = 0; + while (stringIndex < stringLength) { + // The string will be empty if we have a template like this `${…}${…}`. + // See related logic at the end of the inner loop; + if (string.length > 0) { + const nextValue = Unforgiving.#validTransition(string, stringIndex, value); + if (!nextValue) { + Unforgiving.#throwTransitionError(strings, stringsIndex, string, stringIndex, value); + } + value = nextValue; + nextStringIndex = value.lastIndex; + } - static parse(strings, onBoolean, onDefined, onAttribute, onProperty, onContent, onText, language) { - const fragment = Forgiving.#createFragment(language, strings); - Forgiving.#walkFragment(onBoolean, onDefined, onAttribute, onProperty, onContent, onText, fragment); + // When we transition into certain values, we need to take action. + switch (value) { + case Unforgiving.#unboundContent: + Unforgiving.#addUnboundContent(string, stringIndex, element, childNodesIndex, nextStringIndex); + break; + case Unforgiving.#unboundComment: + Unforgiving.#addUnboundComment(string, stringIndex, element, childNodesIndex, nextStringIndex); + break; + case Unforgiving.#openTagStart: + Unforgiving.#addElement(string, stringIndex, path, element, childNodesIndex, nextStringIndex); + break; + case Unforgiving.#unboundBoolean: + Unforgiving.#addUnboundBoolean(string, stringIndex, element, nextStringIndex); + break; + case Unforgiving.#unboundAttribute: + Unforgiving.#addUnboundAttribute(string, stringIndex, element, nextStringIndex); + break; + case Unforgiving.#boundBoolean: + Unforgiving.#addBoundBoolean(onBoolean, string, stringIndex, path, nextStringIndex); + break; + case Unforgiving.#boundDefined: + Unforgiving.#addBoundDefined(onDefined, string, stringIndex, path, nextStringIndex); + break; + case Unforgiving.#boundAttribute: + Unforgiving.#addBoundAttribute(onAttribute, string, stringIndex, path, nextStringIndex); + break; + case Unforgiving.#boundProperty: + Unforgiving.#addBoundProperty(onProperty, string, stringIndex, path, nextStringIndex); + break; + case Unforgiving.#openTagEnd: + if (element.value[Unforgiving.#namespace] === Unforgiving.html) { + const tagName = element.value[Unforgiving.#localName]; + if (Unforgiving.#voidHtmlElements.has(tagName)) { + value = Unforgiving.#finalizeVoidElement(path, element, childNodesIndex, nextStringIndex); + nextStringIndex = value.lastIndex; + } else if (tagName === 'style') { + Unforgiving.#styleDeprecationWarning(); + value = Unforgiving.#finalizeStyle(string, path, element, childNodesIndex, nextStringIndex); + nextStringIndex = value.lastIndex; + } else if ( + tagName === 'textarea' && + Unforgiving.#openTagEnd.lastIndex !== string.length + ) { + value = Unforgiving.#finalizeTextarea(string, path, element, childNodesIndex, nextStringIndex); + nextStringIndex = value.lastIndex; + } else if (tagName === 'pre' && string[value.lastIndex] === '\n') { + // An initial newline character is optional for
 tags.
+                //  https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
+                value.lastIndex++;
+                nextStringIndex = value.lastIndex;
+                // Assume we’re traversing into the new element and reset index.
+                childNodesIndex.value = -1;
+              } else if (
+                tagName === 'template' &&
+                // @ts-ignore — TypeScript doesn’t get that this is a “template”.
+                element.value.hasAttribute('shadowrootmode')
+              ) {
+                const errorMessagesKey = Unforgiving.#namedErrorsToErrorMessagesKey.get('declarative-shadow-root');
+                const errorMessage = Unforgiving.#errorMessages.get(errorMessagesKey);
+                throw new Error(`[${errorMessagesKey}] ${errorMessage}`);
+              } else {
+                // Assume we’re traversing into the new element and reset index.
+                childNodesIndex.value = -1;
+              }
+            } else {
+              // Assume we’re traversing into the new element and reset index.
+              childNodesIndex.value = -1;
+            }
+            break;
+          case Unforgiving.#closeTag:
+            Unforgiving.#finalizeElement(strings, stringsIndex, string, stringIndex, path, element, childNodesIndex, nextStringIndex);
+            break;
+        }
+        stringIndex = nextStringIndex; // Update out pointer from our pattern match.
+      }
+      stringsIndex++;
+    }
+    Unforgiving.#validateExit(fragment, element);
     return fragment;
   }
 }
@@ -796,6 +1518,77 @@ class TemplateEngine {
     }
   }
 
+  // TODO: Future state here — we’ll eventually just guard against value changes
+  //  at a higher level and will remove all updater logic.
+  // static #commitAttribute(node, name, value) {
+  //   node.setAttribute(name, value);
+  // }
+  // static #commitBoolean(node, name, value) {
+  //   value ? node.setAttribute(name, '') : node.removeAttribute(name);
+  // }
+  // static #commitDefined(node, name, value) {
+  //   value === undefined || value === null
+  //     ? node.removeAttribute(name)
+  //     : node.setAttribute(name, value);
+  // }
+  // static #commitProperty(node, name, value) {
+  //   node[name] = value;
+  // }
+  // static #commitContent(node, startNode, value, lastValue) {
+  //   const category = TemplateEngine.#getCategory(value);
+  //   const lastCategory = TemplateEngine.#getCategory(lastValue);
+  //   if (category !== lastCategory && lastValue !== TemplateEngine.#UNSET) {
+  //     // Reset content under certain conditions. E.g., `map(…)` >> `null`.
+  //     const state = TemplateEngine.#getState(node, TemplateEngine.#STATE);
+  //     const arrayState = TemplateEngine.#getState(startNode, TemplateEngine.#ARRAY_STATE);
+  //     TemplateEngine.#removeBetween(startNode, node);
+  //     TemplateEngine.#clearObject(state);
+  //     TemplateEngine.#clearObject(arrayState);
+  //   }
+  //   if (category === 'result') {
+  //     const state = TemplateEngine.#getState(node, TemplateEngine.#STATE);
+  //     const rawResult = value;
+  //     if (!TemplateEngine.#canReuseDom(state.preparedResult, rawResult)) {
+  //       TemplateEngine.#removeBetween(startNode, node);
+  //       TemplateEngine.#clearObject(state);
+  //       const preparedResult = TemplateEngine.#inject(rawResult, node, true);
+  //       state.preparedResult = preparedResult;
+  //     } else {
+  //       TemplateEngine.#update(state.preparedResult, rawResult);
+  //     }
+  //   } else if (category === 'array' || category === 'map') {
+  //     TemplateEngine.#list(node, startNode, value, category);
+  //   } else if (category === 'fragment') {
+  //     if (value.childElementCount === 0) {
+  //       throw new Error(`Unexpected child element count of zero for given DocumentFragment.`);
+  //     }
+  //     const previousSibling = node.previousSibling;
+  //     if (previousSibling !== startNode) {
+  //       TemplateEngine.#removeBetween(startNode, node);
+  //     }
+  //     node.parentNode.insertBefore(value, node);
+  //   } else {
+  //     // TODO: Is there a way to more-performantly skip this init step? E.g., if
+  //     //  the prior value here was not “unset” and we didn’t just reset? We
+  //     //  could cache the target node in these cases or something?
+  //     const previousSibling = node.previousSibling;
+  //     if (previousSibling === startNode) {
+  //       // The `?? ''` is a shortcut for creating a text node and then
+  //       //  setting its textContent. It’s exactly equivalent to the
+  //       //  following code, but faster.
+  //       // const textNode = document.createTextNode('');
+  //       // textNode.textContent = value;
+  //       const textNode = document.createTextNode(value ?? '');
+  //       node.parentNode.insertBefore(textNode, node);
+  //     } else {
+  //       previousSibling.textContent = value;
+  //     }
+  //   }
+  // }
+  // static #commitText(node, value) {
+  //   node.textContent = value;
+  // }
+
   static #commitContent(node, startNode, value, lastValue) {
     const introspection = TemplateEngine.#getValueIntrospection(value);
     const lastIntrospection = TemplateEngine.#getValueIntrospection(lastValue);
@@ -882,6 +1675,23 @@ class TemplateEngine {
     }
   }
 
+  // TODO: Future state — we’ll later do change-by-reference detection here.
+  // // Bind the current values from a result by walking through each target and
+  // //  updating the DOM if things have changed.
+  // static #commit(preparedResult) {
+  //   preparedResult.values ??= preparedResult.rawResult.values;
+  //   preparedResult.lastValues ??= preparedResult.values.map(() => TemplateEngine.#UNSET);
+  //   const { targets, values, lastValues } = preparedResult;
+  //   for (let iii = 0; iii < targets.length; iii++) {
+  //     const value = values[iii];
+  //     const lastValue = lastValues[iii];
+  //     if (value !== lastValue) {
+  //       const target = targets[iii];
+  //       target(value, lastValue);
+  //     }
+  //   }
+  // }
+
   // Bind the current values from a result by walking through each target and
   //  updating the DOM if things have changed.
   static #commit(preparedResult) {
@@ -935,7 +1745,7 @@ class TemplateEngine {
 
   // Inject a given result into a node for the first time.
   static #inject(rawResult, node, before) {
-    // Get fragment created from a tagged template function’s “strings”.
+    // Create and prepare a document fragment to be injected.
     const { [TemplateEngine.#ANALYSIS]: analysis } = rawResult;
     const fragment = analysis.fragment.cloneNode(true);
     const targets = TemplateEngine.#findTargets(fragment, analysis.lookups);
@@ -970,8 +1780,8 @@ class TemplateEngine {
       const onProperty =  TemplateEngine.#storeKeyLookup.bind(null, lookups, TemplateEngine.#PROPERTY);
       const onContent = TemplateEngine.#storeContentLookup.bind(null, lookups);
       const onText = TemplateEngine.#storeTextLookup.bind(null, lookups);
-      const forgivingLanguage = language === TemplateEngine.#SVG ? Forgiving.svg : Forgiving.html;
-      const fragment = Forgiving.parse(strings, onBoolean, onDefined, onAttribute, onProperty, onContent, onText, forgivingLanguage);
+      const namespace = language === TemplateEngine.#SVG ? Unforgiving.svg : Unforgiving.html;
+      const fragment = Unforgiving.parse(strings, onBoolean, onDefined, onAttribute, onProperty, onContent, onText, namespace);
       analysis.fragment = fragment;
       analysis.lookups = lookups;
       analysis.done = true;
@@ -1059,6 +1869,16 @@ class TemplateEngine {
     }
   }
 
+  // TODO: Future state — we may choose to iterate differently as an
+  //  optimization in later versions.
+  // static #removeWithin(node) {
+  //   let childNode = node.lastChild;
+  //   while (childNode) {
+  //     const nextChildNode = childNode.previousSibling;
+  //     node.removeChild(childNode);
+  //     childNode = nextChildNode;
+  //   }
+  // }
   static #removeWithin(node) {
     // Iterate backwards over the live node collection since we’re mutating it.
     const childNodes = node.childNodes;
@@ -1067,12 +1887,31 @@ class TemplateEngine {
     }
   }
 
+  // TODO: Future state — we may choose to iterate differently as an
+  //  optimization in later versions.
+  // static #removeBetween(startNode, node, parentNode) {
+  //   parentNode ??= node.parentNode;
+  //   let childNode = node.previousSibling;
+  //   while(childNode !== startNode) {
+  //     const nextChildNode = childNode.previousSibling;
+  //     parentNode.removeChild(childNode);
+  //     childNode = nextChildNode;
+  //   }
+  // }
   static #removeBetween(startNode, node) {
     while(node.previousSibling !== startNode) {
       node.previousSibling.remove();
     }
   }
 
+  // TODO: Future state — we may choose to iterate differently as an
+  //  optimization in later versions.
+  // static #removeThrough(startNode, node, parentNode) {
+  //   parentNode ??= node.parentNode;
+  //   TemplateEngine.#removeBetween(startNode, node, parentNode);
+  //   parentNode.removeChild(startNode);
+  //   parentNode.removeChild(node);
+  // }
   static #removeThrough(startNode, node) {
     TemplateEngine.#removeBetween(startNode, node);
     startNode.remove();