Skip to content

Commit

Permalink
[CVE-2024-53277] Sanitise form messages against XSS attacks
Browse files Browse the repository at this point in the history
Includes some new additional XSS protection inspired by Symfony
  • Loading branch information
GuySartorelli committed Jan 14, 2025
1 parent cd1d5de commit c233963
Show file tree
Hide file tree
Showing 6 changed files with 804 additions and 11 deletions.
213 changes: 213 additions & 0 deletions src/Core/XssSanitiser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
<?php

namespace SilverStripe\Core;

use DOMAttr;
use DOMElement;
use SilverStripe\Core\Injector\Injectable;
use SilverStripe\View\Parsers\HTMLValue;

/**
* Sanitises HTML to prevent XSS attacks.
*/
class XssSanitiser
{
use Injectable;

/**
* Attributes which will be removed from any element.
* If an asterisk is at the start of the attribute name, all attributes ending with this name will be removed.
* If an asterisk is at the end of the attribute name, all attributes starting with this name will be removed.
* For example `on*` will remove `onerror`, `onmouseover`, etc
*/
private array $attributesToRemove = [
'on*',
'accesskey',
];

private array $elementsToRemove = [
'embed',
'object',
'script',
'style',
'svg',
];

private bool $keepInnerHtmlOnRemoveElement = true;

private bool $removeDataSvg = true;

private bool $removeSvgFile = true;

/**
* Remove XSS attack vectors from an HTML fragment string
*/
public function sanitiseString(string $html): string
{
$htmlValue = HTMLValue::create($html);
$this->sanitiseHtmlValue($htmlValue);
return $htmlValue->getContent();
}

/**
* Remove XSS attack vectors from HTMLValue content
*/
public function sanitiseHtmlValue(HTMLValue $html): void
{
foreach ($html->query('//*') as $element) {
if (!is_a($element, DOMElement::class)) {
continue;
}
$this->sanitiseElement($element);
}
}

/**
* Remove XSS attack vectors from a DOMElement
*/
public function sanitiseElement(DOMElement $element): void
{
// Remove elements first - if we remove the element, we don't have any attributes to check so exit early
$removed = $this->stripElement($element);
if ($removed) {
return;
}
$this->stripAttributes($element);
$this->stripAttributeContents($element);
}

/**
* Get the names of elements which will be removed.
*/
public function getElementsToRemove(): array
{
return $this->elementsToRemove;
}

/**
* Set the names of elements which will be removed.
* Note that allowing the elements which are included in the default list could result in XSS vulnerabilities.
*/
public function setElementsToRemove(array $elements): static
{
$this->elementsToRemove = $elements;
return $this;
}

/**
* Get the names of attributes which will be removed from any elements that have them.
*/
public function getAttributesToRemove(): array
{
return $this->attributesToRemove;
}

/**
* Set the names of attributes which will be removed from any elements that have them.
* Note that allowing the attributes which are included in the default list could result in XSS vulnerabilities.
*/
public function setAttributesToRemove(array $attributes): static
{
$this->attributesToRemove = $attributes;
return $this;
}

/**
* Get whether the inner contents of an element will be kept for elements that get removed.
*/
public function getKeepInnerHtmlOnRemoveElement(): bool
{
return $this->keepInnerHtmlOnRemoveElement;
}

/**
* Set whether to keep the inner contents of an element if it gets removed.
*/
public function setKeepInnerHtmlOnRemoveElement(bool $keep): static
{
$this->keepInnerHtmlOnRemoveElement = $keep;
return $this;
}

/**
* If $element is one of the elements in $elementsToRemove, replace it
* with a text node.
*/
private function stripElement(DOMElement $element): bool
{
if (!in_array($element->tagName, $this->getElementsToRemove())) {
return false;
}
// Make sure we don't remove any child nodes
$parentNode = $element->parentNode;
if ($this->getKeepInnerHtmlOnRemoveElement() && $parentNode && $element->hasChildNodes()) {
// We can't just iterate through $node->childNodes because that seems to skip some children
while ($element->hasChildNodes()) {
$parentNode->insertBefore($element->firstChild, $element);
}
}
$element->remove();
return true;
}

/**
* Remove all attributes in $attributesToRemove from the element.
*/
private function stripAttributes(DOMElement $element): void
{
$attributesToRemove = $this->getAttributesToRemove();
if (empty($attributesToRemove)) {
return;
}
$attributes = $element->attributes;
for ($i = count($attributes) - 1; $i >= 0; $i--) {
/** @var DOMAttr $attr */
$attr = $attributes->item($i);
foreach ($attributesToRemove as $toRemove) {
if (str_starts_with($toRemove, '*') && str_ends_with($attr->name, str_replace('*', '', $toRemove))) {
$element->removeAttributeNode($attr);
} elseif (str_ends_with($toRemove, '*') && str_starts_with($attr->name, str_replace('*', '', $toRemove))) {
$element->removeAttributeNode($attr);
} elseif (!str_contains($toRemove, '*') && $attr->name === $toRemove) {
$element->removeAttributeNode($attr);
}
}
}
}

/**
* Strip out attributes which have dangerous content which might otherwise execute javascript.
* This is content that we will always remove regardless of whether the attributes and elements in question
* are otherwise allowed, e.g. via WYSIWYG configuration.
*/
private function stripAttributeContents(DOMElement $element): void
{
$regex = $this->getStripAttributeContentsRegex();
foreach (['lowsrc', 'src', 'href', 'data'] as $dangerAttribute) {
if ($element->hasAttribute($dangerAttribute)) {
$attrContent = $element->getAttribute($dangerAttribute);
if (preg_match($regex, $attrContent)) {
$element->removeAttribute($dangerAttribute);
}
}
}
}

private function getStripAttributeContentsRegex(): string
{
$regexes = [
$this->splitWithWhitespaceRegex('javascript:'),
$this->splitWithWhitespaceRegex('data:text/html'),
$this->splitWithWhitespaceRegex('vbscript:'),
];
// Regex is "starts with any of these, with optional whitespace at the start, case insensitive"
return '#^\s*(' . implode('|', $regexes) . ')#iu';
}

private function splitWithWhitespaceRegex(string $string): string
{
// Note that `\s` explicitly includes ALL invisible characters when used with the `u` modifier.
// That includes unicode characters like the non-breaking space.
return implode('\s*', str_split($string));
}
}
8 changes: 7 additions & 1 deletion src/Forms/FormMessage.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace SilverStripe\Forms;

use InvalidArgumentException;
use SilverStripe\Core\XssSanitiser;
use SilverStripe\ORM\ValidationResult;
use SilverStripe\View\ViewableData;

Expand Down Expand Up @@ -33,14 +34,19 @@ trait FormMessage

/**
* Returns the field message, used by form validation.
* If the current cast is ValidationResult::CAST_HTML, the message will be sanitised.
*
* Use {@link setError()} to set this property.
*
* @return string
*/
public function getMessage()
{
return $this->message;
$message = $this->message;
if ($this->getMessageCast() === ValidationResult::CAST_HTML) {
$message = XssSanitiser::create()->sanitiseString($message);
}
return $message;
}

/**
Expand Down
17 changes: 7 additions & 10 deletions src/Forms/HTMLEditor/HTMLEditorSanitiser.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use DOMElement;
use SilverStripe\Core\Config\Configurable;
use SilverStripe\Core\Injector\Injectable;
use SilverStripe\Core\XssSanitiser;
use SilverStripe\View\Parsers\HTMLValue;
use stdClass;

Expand Down Expand Up @@ -289,6 +290,10 @@ public function sanitise(HTMLValue $html)
{
$linkRelValue = $this->config()->get('link_rel_value');
$doc = $html->getDocument();
// Get a sanitiser but don't deny any specific attributes or elements, since that's
// handled as part of the element rules.
$xssSanitiser = XssSanitiser::create();
$xssSanitiser->setElementsToRemove([])->setAttributesToRemove([]);

/** @var DOMElement $el */
foreach ($html->query('//body//*') as $el) {
Expand Down Expand Up @@ -342,16 +347,8 @@ public function sanitise(HTMLValue $html)
$el->setAttribute($attr, $forced);
}

// Matches "javascript:" with any arbitrary linebreaks inbetween the characters.
$regex = '#^\s*(' . implode('\s*', str_split('javascript:')) . '|' . implode('\s*', str_split('data:text/html;')) . ')#i';
// Strip out javascript execution in href or src attributes.
foreach (['src', 'href', 'data'] as $dangerAttribute) {
if ($el->hasAttribute($dangerAttribute)) {
if (preg_match($regex, $el->getAttribute($dangerAttribute))) {
$el->removeAttribute($dangerAttribute);
}
}
}
// Explicit XSS sanitisation for anything that there's really no sensible use case for in a WYSIWYG
$xssSanitiser->sanitiseElement($el);
}

if ($el->tagName === 'a' && $linkRelValue !== null) {
Expand Down
Loading

0 comments on commit c233963

Please sign in to comment.