From 10ebe53bd9c9f086efaa35eecd3656f1a201d84d Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 13 Feb 2026 14:14:28 +0000 Subject: [PATCH 01/16] Page Content: Added more complex & configurable content filtering - Added new option to control parts of the filter. - Added whitelist filtering pass via HTMLPurifier. --- app/Activity/Models/Comment.php | 4 +- app/Config/app.php | 12 ++ app/Entities/Tools/EntityHtmlDescription.php | 4 +- app/Entities/Tools/PageContent.php | 24 +++- app/Theming/CustomHtmlHeadContentProvider.php | 22 +-- app/Util/HtmlContentFilter.php | 129 ++++++++++++------ app/Util/HtmlContentFilterConfig.php | 31 +++++ composer.json | 4 +- composer.lock | 123 ++++++++++++++++- storage/purifier/.gitignore | 2 + 10 files changed, 292 insertions(+), 63 deletions(-) create mode 100644 app/Util/HtmlContentFilterConfig.php create mode 100644 storage/purifier/.gitignore diff --git a/app/Activity/Models/Comment.php b/app/Activity/Models/Comment.php index ce05e3df35b..ab7d917729c 100644 --- a/app/Activity/Models/Comment.php +++ b/app/Activity/Models/Comment.php @@ -8,6 +8,7 @@ use BookStack\Users\Models\HasCreatorAndUpdater; use BookStack\Users\Models\OwnableInterface; use BookStack\Util\HtmlContentFilter; +use BookStack\Util\HtmlContentFilterConfig; use Illuminate\Database\Eloquent\Builder; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Relations\BelongsTo; @@ -82,7 +83,8 @@ public function logDescriptor(): string public function safeHtml(): string { - return HtmlContentFilter::removeActiveContentFromHtmlString($this->html ?? ''); + $filter = new HtmlContentFilter(new HtmlContentFilterConfig()); + return $filter->filterString($this->html ?? ''); } public function jointPermissions(): HasMany diff --git a/app/Config/app.php b/app/Config/app.php index 40e542d3e16..acd27e98c02 100644 --- a/app/Config/app.php +++ b/app/Config/app.php @@ -42,6 +42,18 @@ // Even when overridden the WYSIWYG editor may still escape script content. 'allow_content_scripts' => env('ALLOW_CONTENT_SCRIPTS', false), + // Control the behaviour of page content filtering. + // This setting is a collection of characters which represent different available filters: + // - j - Filter out JavaScript based content + // - h - Filter out unexpected, potentially dangerous, HTML elements + // - f - Filter out unexpected form elements + // - a - Run content through a more complex allow-list filter + // This defaults to using all filters, unless ALLOW_CONTENT_SCRIPTS is set to true in which case no filters are used. + // Note: These filters are a best attempt, and may not be 100% effective. They are typically a layer used in addition to other security measures. + // TODO - Add to example env + // TODO - Remove allow_content_scripts option above + 'content_filtering' => env('CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jfha'), + // Allow server-side fetches to be performed to potentially unknown // and user-provided locations. Primarily used in exports when loading // in externally referenced assets. diff --git a/app/Entities/Tools/EntityHtmlDescription.php b/app/Entities/Tools/EntityHtmlDescription.php index b14deb257a7..6bbfb9b6651 100644 --- a/app/Entities/Tools/EntityHtmlDescription.php +++ b/app/Entities/Tools/EntityHtmlDescription.php @@ -6,6 +6,7 @@ use BookStack\Entities\Models\Bookshelf; use BookStack\Entities\Models\Chapter; use BookStack\Util\HtmlContentFilter; +use BookStack\Util\HtmlContentFilterConfig; class EntityHtmlDescription { @@ -50,7 +51,8 @@ public function getHtml(bool $raw = false): string return $html; } - return HtmlContentFilter::removeActiveContentFromHtmlString($html); + $filter = new HtmlContentFilter(new HtmlContentFilterConfig()); + return $filter->filterString($html); } public function getPlain(): string diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 5358e8f0c5b..ca06e696185 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -13,6 +13,7 @@ use BookStack\Uploads\ImageService; use BookStack\Users\Models\User; use BookStack\Util\HtmlContentFilter; +use BookStack\Util\HtmlContentFilterConfig; use BookStack\Util\HtmlDocument; use BookStack\Util\WebSafeMimeSniffer; use Closure; @@ -317,11 +318,28 @@ public function render(bool $blankIncludes = false): string $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap); } - if (!config('app.allow_content_scripts')) { - HtmlContentFilter::removeActiveContentFromDocument($doc); + $cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml()); + $cached = cache()->get($cacheKey, null); + if ($cached !== null) { + return $cached; } - return $doc->getBodyInnerHtml(); + $filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering')); + $filter = new HtmlContentFilter($filterConfig); + $filtered = $filter->filterDocument($doc); + + $cacheTime = 86400 * 7; // 1 week + cache()->put($cacheKey, $filtered, $cacheTime); + + return $filtered; + } + + protected function getContentCacheKey(string $html): string + { + $contentHash = md5($html); + $contentId = $this->page->id; + $contentTime = $this->page->updated_at->timestamp; + return "page-content-cache::{$contentId}::{$contentTime}::{$contentHash}"; } /** diff --git a/app/Theming/CustomHtmlHeadContentProvider.php b/app/Theming/CustomHtmlHeadContentProvider.php index e0cf5b3b5c7..dab30606c34 100644 --- a/app/Theming/CustomHtmlHeadContentProvider.php +++ b/app/Theming/CustomHtmlHeadContentProvider.php @@ -4,25 +4,16 @@ use BookStack\Util\CspService; use BookStack\Util\HtmlContentFilter; +use BookStack\Util\HtmlContentFilterConfig; use BookStack\Util\HtmlNonceApplicator; use Illuminate\Contracts\Cache\Repository as Cache; class CustomHtmlHeadContentProvider { - /** - * @var CspService - */ - protected $cspService; - - /** - * @var Cache - */ - protected $cache; - - public function __construct(CspService $cspService, Cache $cache) - { - $this->cspService = $cspService; - $this->cache = $cache; + public function __construct( + protected CspService $cspService, + protected Cache $cache + ) { } /** @@ -50,7 +41,8 @@ public function forExport(): string $hash = md5($content); return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) { - return HtmlContentFilter::removeActiveContentFromHtmlString($content); + $config = new HtmlContentFilterConfig(filterOutNonContentElements: false); + return (new HtmlContentFilter($config))->filterString($content); }); } diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php index ad5bf8c5fd3..842e4246736 100644 --- a/app/Util/HtmlContentFilter.php +++ b/app/Util/HtmlContentFilter.php @@ -5,15 +5,53 @@ use DOMAttr; use DOMElement; use DOMNodeList; +use HTMLPurifier; +use HTMLPurifier_HTML5Config; class HtmlContentFilter { - /** - * Remove all active content from the given HTML document. - * This aims to cover anything which can dynamically deal with, or send, data - * like any JavaScript actions or form content. - */ - public static function removeActiveContentFromDocument(HtmlDocument $doc): void + public function __construct( + protected HtmlContentFilterConfig $config + ) { + } + + public function filterDocument(HtmlDocument $doc): string + { + if ($this->config->filterOutJavaScript) { + $this->filterOutScriptsFromDocument($doc); + } + if ($this->config->filterOutFormElements) { + $this->filterOutFormElementsFromDocument($doc); + } + if ($this->config->filterOutBadHtmlElements) { + $this->filterOutBadHtmlElementsFromDocument($doc); + } + if ($this->config->filterOutNonContentElements) { + $this->filterOutNonContentElementsFromDocument($doc); + } + + $filtered = $doc->getBodyInnerHtml(); + if ($this->config->useAllowListFilter) { + $filtered = $this->applyAllowListFiltering($filtered); + } + + return $filtered; + } + + public function filterString(string $html): string + { + return $this->filterDocument(new HtmlDocument($html)); + } + + protected function applyAllowListFiltering(string $html): string + { + $config = HTMLPurifier_HTML5Config::createDefault(); + $config->set('Cache.SerializerPath', storage_path('purifier')); + $purifier = new HTMLPurifier($config); + return $purifier->purify($html); + } + + protected function filterOutScriptsFromDocument(HtmlDocument $doc): void { // Remove standard script tags $scriptElems = $doc->queryXPath('//script'); @@ -27,10 +65,6 @@ public static function removeActiveContentFromDocument(HtmlDocument $doc): void $badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']'); static::removeNodes($badForms); - // Remove meta tag to prevent external redirects - $metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']'); - static::removeNodes($metaTags); - // Remove data or JavaScript iFrames $badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]'); static::removeNodes($badIframes); @@ -49,7 +83,10 @@ public static function removeActiveContentFromDocument(HtmlDocument $doc): void // Remove 'on*' attributes $onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]'); static::removeAttributes($onAttributes); + } + protected function filterOutFormElementsFromDocument(HtmlDocument $doc): void + { // Remove form elements $formElements = ['form', 'fieldset', 'button', 'textarea', 'select']; foreach ($formElements as $formElement) { @@ -75,41 +112,21 @@ public static function removeActiveContentFromDocument(HtmlDocument $doc): void } } - /** - * Remove active content from the given HTML string. - * This aims to cover anything which can dynamically deal with, or send, data - * like any JavaScript actions or form content. - */ - public static function removeActiveContentFromHtmlString(string $html): string + protected function filterOutBadHtmlElementsFromDocument(HtmlDocument $doc): void { - if (empty($html)) { - return $html; - } - - $doc = new HtmlDocument($html); - static::removeActiveContentFromDocument($doc); - - return $doc->getBodyInnerHtml(); - } - - /** - * Alias using the old method name to avoid potential compatibility breaks during patch release. - * To remove in future feature release. - * @deprecated Use removeActiveContentFromDocument instead. - */ - public static function removeScriptsFromDocument(HtmlDocument $doc): void - { - static::removeActiveContentFromDocument($doc); + // Remove meta tag to prevent external redirects + $metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']'); + static::removeNodes($metaTags); } - /** - * Alias using the old method name to avoid potential compatibility breaks during patch release. - * To remove in future feature release. - * @deprecated Use removeActiveContentFromHtmlString instead. - */ - public static function removeScriptsFromHtmlString(string $html): string + protected function filterOutNonContentElementsFromDocument(HtmlDocument $doc): void { - return static::removeActiveContentFromHtmlString($html); + // Remove non-content elements + $formElements = ['link', 'style', 'meta', 'title', 'template']; + foreach ($formElements as $formElement) { + $matchingFormElements = $doc->queryXPath('//' . $formElement); + static::removeNodes($matchingFormElements); + } } /** @@ -147,4 +164,34 @@ protected static function removeAttributes(DOMNodeList $attrs): void $parentNode->removeAttribute($attrName); } } + + /** + * Alias using the old method name to avoid potential compatibility breaks during patch release. + * To remove in future feature release. + * @deprecated Use filterDocument instead. + */ + public static function removeScriptsFromDocument(HtmlDocument $doc): void + { + $config = new HtmlContentFilterConfig( + filterOutNonContentElements: false, + useAllowListFilter: false, + ); + $filter = new static($config); + $filter->filterDocument($doc); + } + + /** + * Alias using the old method name to avoid potential compatibility breaks during patch release. + * To remove in future feature release. + * @deprecated Use filterString instead. + */ + public static function removeScriptsFromHtmlString(string $html): string + { + $config = new HtmlContentFilterConfig( + filterOutNonContentElements: false, + useAllowListFilter: false, + ); + $filter = new static($config); + return $filter->filterString($html); + } } diff --git a/app/Util/HtmlContentFilterConfig.php b/app/Util/HtmlContentFilterConfig.php new file mode 100644 index 00000000000..2cb77ea5815 --- /dev/null +++ b/app/Util/HtmlContentFilterConfig.php @@ -0,0 +1,31 @@ +=5.3" + }, + "require-dev": { + "masterminds/html5": "^2.7", + "php-coveralls/php-coveralls": "^1.1|^2.1", + "phpunit/phpunit": ">=4.7 <10.0" + }, + "suggest": { + "masterminds/html5": "Required to use HTMLPurifier_Lexer_HTML5" + }, + "type": "library", + "autoload": { + "classmap": [ + "library/HTMLPurifier/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "xemlock", + "email": "xemlock@gmail.com" + } + ], + "description": "HTML5 support for HTML Purifier", + "homepage": "https://github.com/xemlock/htmlpurifier-html5", + "keywords": [ + "HTML5", + "Purifier", + "html", + "htmlpurifier", + "security", + "tidy", + "validator", + "xss" + ], + "support": { + "issues": "https://github.com/xemlock/htmlpurifier-html5/issues", + "source": "https://github.com/xemlock/htmlpurifier-html5/tree/v0.1.12" + }, + "time": "2026-02-09T21:03:14+00:00" } ], "packages-dev": [ diff --git a/storage/purifier/.gitignore b/storage/purifier/.gitignore new file mode 100644 index 00000000000..c96a04f008e --- /dev/null +++ b/storage/purifier/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file From 0f040fe8b1bbaaee4d088262aa4482a4b19b1c46 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sun, 15 Feb 2026 16:17:03 +0000 Subject: [PATCH 02/16] Content: Tuned HTML purifier for our use Tested it with a range of supported, including uncommon, content types and added support, or changed config, where needed. Been through docs for all HTMLPurifier options to assess what's relevant. --- app/Entities/Tools/PageContent.php | 3 +- app/Util/ConfiguredHtmlPurifier.php | 101 ++++++++++++++++++++++++++++ app/Util/HtmlContentFilter.php | 6 +- 3 files changed, 104 insertions(+), 6 deletions(-) create mode 100644 app/Util/ConfiguredHtmlPurifier.php diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index ca06e696185..67c6e4cf6e8 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -321,12 +321,13 @@ public function render(bool $blankIncludes = false): string $cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml()); $cached = cache()->get($cacheKey, null); if ($cached !== null) { - return $cached; +// return $cached; } $filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering')); $filter = new HtmlContentFilter($filterConfig); $filtered = $filter->filterDocument($doc); +// $filtered = $doc->getBodyInnerHtml(); $cacheTime = 86400 * 7; // 1 week cache()->put($cacheKey, $filtered, $cacheTime); diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php new file mode 100644 index 00000000000..5aab25b4745 --- /dev/null +++ b/app/Util/ConfiguredHtmlPurifier.php @@ -0,0 +1,101 @@ +setConfig($config); + + $htmlDef = $config->getDefinition('HTML', true, true); + if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) { + $this->configureDefinition($htmlDef); + } + + $this->purifier = new HTMLPurifier($config); + } + + protected function setConfig(HTMLPurifier_Config $config): void + { + $config->set('Cache.SerializerPath', storage_path('purifier')); + $config->set('CSS.AllowTricky', true); + $config->set('HTML.SafeIframe', true); + $config->set('Attr.EnableID', true); + $config->set('Attr.ID.HTML5', true); + $config->set('Output.FixInnerHTML', false); + $config->set('URI.SafeIframeRegexp', '%^(http://|https://)%'); + $config->set('URI.AllowedSchemes', [ + 'http' => true, + 'https' => true, + 'mailto' => true, + 'ftp' => true, + 'nntp' => true, + 'news' => true, + 'tel' => true, + 'file' => true, + ]); + + $config->set('Cache.DefinitionImpl', null); // Disable cache during testing + } + + public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void + { + // Allow the object element + $definition->addElement( + 'object', + 'Inline', + 'Flow', + 'Common', + [ + 'data' => 'URI', + 'type' => 'Text', + 'width' => 'Length', + 'height' => 'Length', + ] + ); + + // Allow the embed element + $definition->addElement( + 'embed', + 'Inline', + 'Empty', + 'Common', + [ + 'src' => 'URI', + 'type' => 'Text', + 'width' => 'Length', + 'height' => 'Length', + ] + ); + + // Allow checkbox inputs + $definition->addElement( + 'input', + 'Formctrl', + 'Empty', + 'Common', + [ + 'checked' => 'Bool#checked', + 'disabled' => 'Bool#disabled', + 'name' => 'Text', + 'readonly' => 'Bool#readonly', + 'type' => 'Enum#checkbox', + 'value' => 'Text', + ] + ); + } + + public function purify(string $html): string + { + return $this->purifier->purify($html); + } +} diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php index 842e4246736..79b1cdc93c4 100644 --- a/app/Util/HtmlContentFilter.php +++ b/app/Util/HtmlContentFilter.php @@ -5,8 +5,6 @@ use DOMAttr; use DOMElement; use DOMNodeList; -use HTMLPurifier; -use HTMLPurifier_HTML5Config; class HtmlContentFilter { @@ -45,9 +43,7 @@ public function filterString(string $html): string protected function applyAllowListFiltering(string $html): string { - $config = HTMLPurifier_HTML5Config::createDefault(); - $config->set('Cache.SerializerPath', storage_path('purifier')); - $purifier = new HTMLPurifier($config); + $purifier = new ConfiguredHtmlPurifier(); return $purifier->purify($html); } From 227027fc4570270395fe5dd0aa2bb8201163752a Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sun, 15 Feb 2026 16:46:09 +0000 Subject: [PATCH 03/16] Content: Updated purifier and content caching - Updated page content cache to use app version in cache key - Moved purifier cache into framework to better work with existing expected folders. - Added app version check to purifier so that it will reset its own cache on app version change. --- app/Entities/Tools/PageContent.php | 7 +++-- app/Util/ConfiguredHtmlPurifier.php | 34 +++++++++++++++++++-- storage/{ => framework}/purifier/.gitignore | 0 3 files changed, 36 insertions(+), 5 deletions(-) rename storage/{ => framework}/purifier/.gitignore (100%) diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 67c6e4cf6e8..436c4f0bed8 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -2,6 +2,7 @@ namespace BookStack\Entities\Tools; +use BookStack\App\AppVersion; use BookStack\Entities\Models\Page; use BookStack\Entities\Queries\PageQueries; use BookStack\Entities\Tools\Markdown\MarkdownToHtml; @@ -321,13 +322,12 @@ public function render(bool $blankIncludes = false): string $cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml()); $cached = cache()->get($cacheKey, null); if ($cached !== null) { -// return $cached; + return $cached; } $filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering')); $filter = new HtmlContentFilter($filterConfig); $filtered = $filter->filterDocument($doc); -// $filtered = $doc->getBodyInnerHtml(); $cacheTime = 86400 * 7; // 1 week cache()->put($cacheKey, $filtered, $cacheTime); @@ -340,7 +340,8 @@ protected function getContentCacheKey(string $html): string $contentHash = md5($html); $contentId = $this->page->id; $contentTime = $this->page->updated_at->timestamp; - return "page-content-cache::{$contentId}::{$contentTime}::{$contentHash}"; + $appVersion = AppVersion::get(); + return "page-content-cache::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}"; } /** diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php index 5aab25b4745..d63d2ad5f3c 100644 --- a/app/Util/ConfiguredHtmlPurifier.php +++ b/app/Util/ConfiguredHtmlPurifier.php @@ -2,19 +2,29 @@ namespace BookStack\Util; +use BookStack\App\AppVersion; use HTMLPurifier; use HTMLPurifier_Config; +use HTMLPurifier_DefinitionCache_Serializer; use HTMLPurifier_HTML5Config; use HTMLPurifier_HTMLDefinition; +/** + * Provides a configured HTML Purifier instance. + * https://github.com/ezyang/htmlpurifier + * Also uses this to extend support to HTML5 elements: + * https://github.com/xemlock/htmlpurifier-html5 + */ class ConfiguredHtmlPurifier { protected HTMLPurifier $purifier; + protected static bool $cachedChecked = false; public function __construct() { $config = HTMLPurifier_HTML5Config::createDefault(); $this->setConfig($config); + $this->resetCacheIfNeeded($config); $htmlDef = $config->getDefinition('HTML', true, true); if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) { @@ -24,9 +34,29 @@ public function __construct() $this->purifier = new HTMLPurifier($config); } + protected function resetCacheIfNeeded(HTMLPurifier_Config $config): void + { + if (self::$cachedChecked) { + return; + } + + $cachedForVersion = cache('htmlpurifier::cache-version'); + $appVersion = AppVersion::get(); + if ($cachedForVersion !== $appVersion) { + foreach (['HTML', 'CSS', 'URI'] as $name) { + $cache = new HTMLPurifier_DefinitionCache_Serializer($name); + $cache->flush($config); + } + cache()->set('htmlpurifier::cache-version', $appVersion); + } + + self::$cachedChecked = true; + } + protected function setConfig(HTMLPurifier_Config $config): void { - $config->set('Cache.SerializerPath', storage_path('purifier')); + $config->set('Cache.SerializerPath', storage_path('framework/purifier')); + $config->set('Core.AllowHostnameUnderscore', true); $config->set('CSS.AllowTricky', true); $config->set('HTML.SafeIframe', true); $config->set('Attr.EnableID', true); @@ -44,7 +74,7 @@ protected function setConfig(HTMLPurifier_Config $config): void 'file' => true, ]); - $config->set('Cache.DefinitionImpl', null); // Disable cache during testing + // $config->set('Cache.DefinitionImpl', null); // Disable cache during testing } public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void diff --git a/storage/purifier/.gitignore b/storage/framework/purifier/.gitignore similarity index 100% rename from storage/purifier/.gitignore rename to storage/framework/purifier/.gitignore From 035be66ebc7d4a312b5a240283a5a13da9694779 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Sun, 15 Feb 2026 18:44:14 +0000 Subject: [PATCH 04/16] Content: Updated tests and CSP usage of content script setting Updates CSP to use new content_filtering option. Splits out content filtering tests to their own class. Updated tests where needed to adapt to changes. --- app/Entities/Tools/PageContent.php | 2 +- app/Theming/CustomHtmlHeadContentProvider.php | 2 +- app/Util/ConfiguredHtmlPurifier.php | 2 +- app/Util/CspService.php | 9 +- tests/Entity/PageContentFilteringTest.php | 353 ++++++++++++++++++ tests/Entity/PageContentTest.php | 348 +---------------- tests/SecurityHeaderTest.php | 6 +- 7 files changed, 368 insertions(+), 354 deletions(-) create mode 100644 tests/Entity/PageContentFilteringTest.php diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index 436c4f0bed8..f8a0617395b 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -339,7 +339,7 @@ protected function getContentCacheKey(string $html): string { $contentHash = md5($html); $contentId = $this->page->id; - $contentTime = $this->page->updated_at->timestamp; + $contentTime = $this->page->updated_at?->timestamp ?? time(); $appVersion = AppVersion::get(); return "page-content-cache::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}"; } diff --git a/app/Theming/CustomHtmlHeadContentProvider.php b/app/Theming/CustomHtmlHeadContentProvider.php index dab30606c34..9f794a077ba 100644 --- a/app/Theming/CustomHtmlHeadContentProvider.php +++ b/app/Theming/CustomHtmlHeadContentProvider.php @@ -41,7 +41,7 @@ public function forExport(): string $hash = md5($content); return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) { - $config = new HtmlContentFilterConfig(filterOutNonContentElements: false); + $config = new HtmlContentFilterConfig(filterOutNonContentElements: false, useAllowListFilter: false); return (new HtmlContentFilter($config))->filterString($content); }); } diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php index d63d2ad5f3c..014b2a3bf2b 100644 --- a/app/Util/ConfiguredHtmlPurifier.php +++ b/app/Util/ConfiguredHtmlPurifier.php @@ -62,7 +62,7 @@ protected function setConfig(HTMLPurifier_Config $config): void $config->set('Attr.EnableID', true); $config->set('Attr.ID.HTML5', true); $config->set('Output.FixInnerHTML', false); - $config->set('URI.SafeIframeRegexp', '%^(http://|https://)%'); + $config->set('URI.SafeIframeRegexp', '%^(http://|https://|//)%'); $config->set('URI.AllowedSchemes', [ 'http' => true, 'https' => true, diff --git a/app/Util/CspService.php b/app/Util/CspService.php index 4262b5c98f8..466acb49148 100644 --- a/app/Util/CspService.php +++ b/app/Util/CspService.php @@ -65,7 +65,7 @@ public function allowedIFrameHostsConfigured(): bool */ protected function getScriptSrc(): string { - if (config('app.allow_content_scripts')) { + if ($this->scriptFilteringDisabled()) { return ''; } @@ -108,7 +108,7 @@ protected function getFrameSrc(): string */ protected function getObjectSrc(): string { - if (config('app.allow_content_scripts')) { + if ($this->scriptFilteringDisabled()) { return ''; } @@ -124,6 +124,11 @@ protected function getBaseUri(): string return "base-uri 'self'"; } + protected function scriptFilteringDisabled(): bool + { + return !HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'))->filterOutJavaScript; + } + protected function getAllowedIframeHosts(): array { $hosts = config('app.iframe_hosts') ?? ''; diff --git a/tests/Entity/PageContentFilteringTest.php b/tests/Entity/PageContentFilteringTest.php new file mode 100644 index 00000000000..e1295034d68 --- /dev/null +++ b/tests/Entity/PageContentFilteringTest.php @@ -0,0 +1,353 @@ +asEditor(); + $page = $this->entities->page(); + $script = 'abc123abc123'; + $page->html = "escape {$script}"; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $pageView->assertDontSee($script, false); + $pageView->assertSee('abc123abc123'); + } + + public function test_more_complex_content_script_escaping_scenarios() + { + $checks = [ + "

Some script

", + "

Some script

", + "

Some script

", + "

Some script

", + "

Some script

", + "

Some script

", + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $this->withHtml($pageView)->assertElementNotContains('.page-content', ''); + } + } + + public function test_js_and_base64_src_urls_are_removed() + { + $checks = [ + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $html = $this->withHtml($pageView); + $html->assertElementNotContains('.page-content', ''); + $html->assertElementNotContains('.page-content', 'src='); + $html->assertElementNotContains('.page-content', 'javascript:'); + $html->assertElementNotContains('.page-content', 'data:'); + $html->assertElementNotContains('.page-content', 'base64'); + } + } + + public function test_javascript_uri_links_are_removed() + { + $checks = [ + 'withHtml($pageView)->assertElementNotContains('.page-content', 'href=javascript:'); + } + } + + public function test_form_actions_with_javascript_are_removed() + { + $checks = [ + '', + 'Click me', + 'Click me', + '', + '', + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $pageView->assertDontSee('id="xss"', false); + $pageView->assertDontSee('action=javascript:', false); + $pageView->assertDontSee('action=JaVaScRiPt:', false); + $pageView->assertDontSee('formaction=javascript:', false); + $pageView->assertDontSee('formaction=JaVaScRiPt:', false); + } + } + + public function test_form_elements_are_removed() + { + $checks = [ + '

thisisacattofind

thisdogshouldnotbefound
', + '

thisisacattofind

', + '

thisisacattofind

', + '

thisisacattofind

', + '

thisisacattofind

thisdogshouldnotbefound
', + '

thisisacattofind

', + '

thisisacattofind

', + <<<'TESTCASE' + + + + +

thisisacattofind

+
+

thisdogshouldnotbefound

+
+ + + + +
+
+TESTCASE + + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $pageView->assertSee('thisisacattofind'); + $pageView->assertDontSee('thisdogshouldnotbefound'); + } + } + + public function test_form_attributes_are_removed() + { + $withinSvgSample = <<<'TESTCASE' + + + + +

thisisacattofind

+

thisisacattofind

+ + +
+
+TESTCASE; + + $checks = [ + 'formaction' => '

thisisacattofind

', + 'form' => '

thisisacattofind

', + 'formmethod' => '

thisisacattofind

', + 'formtarget' => '

thisisacattofind

', + 'FORMTARGET' => '

thisisacattofind

', + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $attribute => $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $pageView->assertSee('thisisacattofind'); + $this->withHtml($pageView)->assertElementNotExists(".page-content [{$attribute}]"); + } + + $page->html = $withinSvgSample; + $page->save(); + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $html = $this->withHtml($pageView); + foreach ($checks as $attribute => $check) { + $pageView->assertSee('thisisacattofind'); + $html->assertElementNotExists(".page-content [{$attribute}]"); + } + } + + public function test_metadata_redirects_are_removed() + { + $checks = [ + '', + '', + '', + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $this->withHtml($pageView)->assertElementNotContains('.page-content', ''); + $this->withHtml($pageView)->assertElementNotContains('.page-content', ''); + $this->withHtml($pageView)->assertElementNotContains('.page-content', 'content='); + $this->withHtml($pageView)->assertElementNotContains('.page-content', 'external_url'); + } + } + + public function test_page_inline_on_attributes_removed_by_default() + { + $this->asEditor(); + $page = $this->entities->page(); + $script = '

Hello

'; + $page->html = "escape {$script}"; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $pageView->assertDontSee($script, false); + $pageView->assertSee('

Hello

', false); + } + + public function test_more_complex_inline_on_attributes_escaping_scenarios() + { + $checks = [ + '

Hello

', + '

Hello

', + '
Lorem ipsum dolor sit amet.

Hello

', + '
Lorem ipsum dolor sit amet.

Hello

', + '
Lorem ipsum dolor sit amet.

Hello

', + '
Lorem ipsum dolor sit amet.

Hello

', + '
xss link\', + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $this->withHtml($pageView)->assertElementNotContains('.page-content', 'onclick'); + } + } + + public function test_page_content_scripts_show_with_filters_disabled() + { + $this->asEditor(); + $page = $this->entities->page(); + config()->set('app.content_filtering', ''); + + $script = 'abc123abc123'; + $page->html = "no escape {$script}"; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertSee($script, false); + $pageView->assertDontSee('abc123abc123'); + } + + public function test_svg_script_usage_is_removed() + { + $checks = [ + '', + '', + '', + '', + '', + 'XSS', + 'XSS', + '', + ]; + + $this->asEditor(); + $page = $this->entities->page(); + + foreach ($checks as $check) { + $page->html = $check; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertStatus(200); + $html = $this->withHtml($pageView); + $html->assertElementNotContains('.page-content', 'alert'); + $html->assertElementNotContains('.page-content', 'xlink:href'); + $html->assertElementNotContains('.page-content', 'application/xml'); + $html->assertElementNotContains('.page-content', 'javascript'); + } + } + + public function test_page_inline_on_attributes_show_with_filters_disabled() + { + $this->asEditor(); + $page = $this->entities->page(); + config()->set('app.content_filtering', ''); + + $script = '

Hello

'; + $page->html = "escape {$script}"; + $page->save(); + + $pageView = $this->get($page->getUrl()); + $pageView->assertSee($script, false); + $pageView->assertDontSee('

Hello

', false); + } +} diff --git a/tests/Entity/PageContentTest.php b/tests/Entity/PageContentTest.php index 77026113012..deae153e192 100644 --- a/tests/Entity/PageContentTest.php +++ b/tests/Entity/PageContentTest.php @@ -101,351 +101,6 @@ public function test_page_includes_to_nonexisting_pages_does_not_error() $pageResp->assertSee('Hello Barry'); } - public function test_page_content_scripts_removed_by_default() - { - $this->asEditor(); - $page = $this->entities->page(); - $script = 'abc123abc123'; - $page->html = "escape {$script}"; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $pageView->assertDontSee($script, false); - $pageView->assertSee('abc123abc123'); - } - - public function test_more_complex_content_script_escaping_scenarios() - { - $checks = [ - "

Some script

", - "

Some script

", - "

Some script

", - "

Some script

", - "

Some script

", - "

Some script

", - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $this->withHtml($pageView)->assertElementNotContains('.page-content', ''); - } - } - - public function test_js_and_base64_src_urls_are_removed() - { - $checks = [ - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - '', - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $html = $this->withHtml($pageView); - $html->assertElementNotContains('.page-content', ''); - $html->assertElementNotContains('.page-content', 'src='); - $html->assertElementNotContains('.page-content', 'javascript:'); - $html->assertElementNotContains('.page-content', 'data:'); - $html->assertElementNotContains('.page-content', 'base64'); - } - } - - public function test_javascript_uri_links_are_removed() - { - $checks = [ - '
withHtml($pageView)->assertElementNotContains('.page-content', 'href=javascript:'); - } - } - - public function test_form_actions_with_javascript_are_removed() - { - $checks = [ - '', - 'Click me', - 'Click me', - '', - '', - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $pageView->assertDontSee('id="xss"', false); - $pageView->assertDontSee('action=javascript:', false); - $pageView->assertDontSee('action=JaVaScRiPt:', false); - $pageView->assertDontSee('formaction=javascript:', false); - $pageView->assertDontSee('formaction=JaVaScRiPt:', false); - } - } - - public function test_form_elements_are_removed() - { - $checks = [ - '

thisisacattofind

thisdogshouldnotbefound
', - '

thisisacattofind

', - '

thisisacattofind

', - '

thisisacattofind

', - '

thisisacattofind

thisdogshouldnotbefound
', - '

thisisacattofind

', - '

thisisacattofind

', - <<<'TESTCASE' - - - - -

thisisacattofind

-
-

thisdogshouldnotbefound

-
- - - - -
-
-TESTCASE - - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $pageView->assertSee('thisisacattofind'); - $pageView->assertDontSee('thisdogshouldnotbefound'); - } - } - - public function test_form_attributes_are_removed() - { - $withinSvgSample = <<<'TESTCASE' - - - - -

thisisacattofind

-

thisisacattofind

- - -
-
-TESTCASE; - - $checks = [ - 'formaction' => '

thisisacattofind

', - 'form' => '

thisisacattofind

', - 'formmethod' => '

thisisacattofind

', - 'formtarget' => '

thisisacattofind

', - 'FORMTARGET' => '

thisisacattofind

', - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $attribute => $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $pageView->assertSee('thisisacattofind'); - $this->withHtml($pageView)->assertElementNotExists(".page-content [{$attribute}]"); - } - - $page->html = $withinSvgSample; - $page->save(); - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $html = $this->withHtml($pageView); - foreach ($checks as $attribute => $check) { - $pageView->assertSee('thisisacattofind'); - $html->assertElementNotExists(".page-content [{$attribute}]"); - } - } - - public function test_metadata_redirects_are_removed() - { - $checks = [ - '', - '', - '', - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $this->withHtml($pageView)->assertElementNotContains('.page-content', ''); - $this->withHtml($pageView)->assertElementNotContains('.page-content', ''); - $this->withHtml($pageView)->assertElementNotContains('.page-content', 'content='); - $this->withHtml($pageView)->assertElementNotContains('.page-content', 'external_url'); - } - } - - public function test_page_inline_on_attributes_removed_by_default() - { - $this->asEditor(); - $page = $this->entities->page(); - $script = '

Hello

'; - $page->html = "escape {$script}"; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $pageView->assertDontSee($script, false); - $pageView->assertSee('

Hello

', false); - } - - public function test_more_complex_inline_on_attributes_escaping_scenarios() - { - $checks = [ - '

Hello

', - '

Hello

', - '
Lorem ipsum dolor sit amet.

Hello

', - '
Lorem ipsum dolor sit amet.

Hello

', - '
Lorem ipsum dolor sit amet.

Hello

', - '
Lorem ipsum dolor sit amet.

Hello

', - '
xss link\', - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $this->withHtml($pageView)->assertElementNotContains('.page-content', 'onclick'); - } - } - - public function test_page_content_scripts_show_when_configured() - { - $this->asEditor(); - $page = $this->entities->page(); - config()->set('app.allow_content_scripts', 'true'); - - $script = 'abc123abc123'; - $page->html = "no escape {$script}"; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertSee($script, false); - $pageView->assertDontSee('abc123abc123'); - } - - public function test_svg_script_usage_is_removed() - { - $checks = [ - '', - '', - '', - '', - '', - 'XSS', - 'XSS', - '', - ]; - - $this->asEditor(); - $page = $this->entities->page(); - - foreach ($checks as $check) { - $page->html = $check; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertStatus(200); - $html = $this->withHtml($pageView); - $html->assertElementNotContains('.page-content', 'alert'); - $html->assertElementNotContains('.page-content', 'xlink:href'); - $html->assertElementNotContains('.page-content', 'application/xml'); - $html->assertElementNotContains('.page-content', 'javascript'); - } - } - - public function test_page_inline_on_attributes_show_if_configured() - { - $this->asEditor(); - $page = $this->entities->page(); - config()->set('app.allow_content_scripts', 'true'); - - $script = '

Hello

'; - $page->html = "escape {$script}"; - $page->save(); - - $pageView = $this->get($page->getUrl()); - $pageView->assertSee($script, false); - $pageView->assertDontSee('

Hello

', false); - } - public function test_duplicate_ids_does_not_break_page_render() { $this->asEditor(); @@ -649,6 +304,7 @@ public function test_page_markdown_strikethrough_rendering() public function test_page_markdown_single_html_comment_saving() { + config()->set('app.content_filtering', 'jfh'); $this->asEditor(); $page = $this->entities->page(); @@ -656,7 +312,7 @@ public function test_page_markdown_single_html_comment_saving() $this->put($page->getUrl(), [ 'name' => $page->name, 'markdown' => $content, 'html' => '', 'summary' => '', - ]); + ])->assertRedirect(); $page->refresh(); $this->assertStringMatchesFormat($content, $page->html); diff --git a/tests/SecurityHeaderTest.php b/tests/SecurityHeaderTest.php index fe98e32080b..3f4b7d193ce 100644 --- a/tests/SecurityHeaderTest.php +++ b/tests/SecurityHeaderTest.php @@ -93,14 +93,14 @@ public function test_script_csp_nonce_changes_per_request() $this->assertNotEquals($firstHeader, $secondHeader); } - public function test_allow_content_scripts_settings_controls_csp_script_headers() + public function test_content_filtering_config_controls_csp_script_headers() { - config()->set('app.allow_content_scripts', true); + config()->set('app.content_filtering', ''); $resp = $this->get('/'); $scriptHeader = $this->getCspHeader($resp, 'script-src'); $this->assertEmpty($scriptHeader); - config()->set('app.allow_content_scripts', false); + config()->set('app.content_filtering', 'j'); $resp = $this->get('/'); $scriptHeader = $this->getCspHeader($resp, 'script-src'); $this->assertNotEmpty($scriptHeader); From 8a221f64e4e1453966576a06b3bb82393ae735c3 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 16 Feb 2026 10:11:48 +0000 Subject: [PATCH 05/16] Content Filtering: Covered new config options and filters with tests --- app/Config/app.php | 8 +-- app/Entities/Tools/PageContent.php | 3 +- phpunit.xml | 1 + tests/Entity/PageContentFilteringTest.php | 75 +++++++++++++++++++++++ tests/Unit/ConfigTest.php | 14 +++++ 5 files changed, 96 insertions(+), 5 deletions(-) diff --git a/app/Config/app.php b/app/Config/app.php index acd27e98c02..7aa94b4f2f9 100644 --- a/app/Config/app.php +++ b/app/Config/app.php @@ -42,17 +42,17 @@ // Even when overridden the WYSIWYG editor may still escape script content. 'allow_content_scripts' => env('ALLOW_CONTENT_SCRIPTS', false), - // Control the behaviour of page content filtering. + // Control the behaviour of content filtering, primarily used for page content. // This setting is a collection of characters which represent different available filters: - // - j - Filter out JavaScript based content - // - h - Filter out unexpected, potentially dangerous, HTML elements + // - j - Filter out JavaScript and unknown binary data based content + // - h - Filter out unexpected, and potentially dangerous, HTML elements // - f - Filter out unexpected form elements // - a - Run content through a more complex allow-list filter // This defaults to using all filters, unless ALLOW_CONTENT_SCRIPTS is set to true in which case no filters are used. // Note: These filters are a best attempt, and may not be 100% effective. They are typically a layer used in addition to other security measures. // TODO - Add to example env // TODO - Remove allow_content_scripts option above - 'content_filtering' => env('CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jfha'), + 'content_filtering' => env('APP_CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jhfa'), // Allow server-side fetches to be performed to potentially unknown // and user-provided locations. Primarily used in exports when loading diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php index f8a0617395b..4f72e7c490d 100644 --- a/app/Entities/Tools/PageContent.php +++ b/app/Entities/Tools/PageContent.php @@ -341,7 +341,8 @@ protected function getContentCacheKey(string $html): string $contentId = $this->page->id; $contentTime = $this->page->updated_at?->timestamp ?? time(); $appVersion = AppVersion::get(); - return "page-content-cache::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}"; + $filterConfig = config('app.content_filtering') ?? ''; + return "page-content-cache::{$filterConfig}::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}"; } /** diff --git a/phpunit.xml b/phpunit.xml index 8a7ab9cb7a3..94fc002b704 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -34,6 +34,7 @@ + diff --git a/tests/Entity/PageContentFilteringTest.php b/tests/Entity/PageContentFilteringTest.php index e1295034d68..8103fae1d2b 100644 --- a/tests/Entity/PageContentFilteringTest.php +++ b/tests/Entity/PageContentFilteringTest.php @@ -22,6 +22,8 @@ public function test_page_content_scripts_removed_by_default() public function test_more_complex_content_script_escaping_scenarios() { + config()->set('app.content_filtering', 'j'); + $checks = [ "

Some script

", "

Some script

", @@ -47,6 +49,8 @@ public function test_more_complex_content_script_escaping_scenarios() public function test_js_and_base64_src_urls_are_removed() { + config()->set('app.content_filtering', 'j'); + $checks = [ '', '', @@ -89,6 +93,8 @@ public function test_js_and_base64_src_urls_are_removed() public function test_javascript_uri_links_are_removed() { + config()->set('app.content_filtering', 'j'); + $checks = [ '
'; + $page->save(); + + $this->asEditor()->get($page->getUrl())->assertSee('dont-see-this', false); + + config()->set('app.content_filtering', 'f'); + $this->get($page->getUrl())->assertDontSee('dont-see-this', false); + } + public function test_form_actions_with_javascript_are_removed() { + config()->set('app.content_filtering', 'j'); + $checks = [ '', 'Click me', @@ -139,6 +160,8 @@ public function test_form_actions_with_javascript_are_removed() public function test_form_elements_are_removed() { + config()->set('app.content_filtering', 'f'); + $checks = [ '

thisisacattofind

thisdogshouldnotbefound
', '

thisisacattofind

', @@ -182,6 +205,8 @@ public function test_form_elements_are_removed() public function test_form_attributes_are_removed() { + config()->set('app.content_filtering', 'f'); + $withinSvgSample = <<<'TESTCASE' @@ -229,6 +254,8 @@ public function test_form_attributes_are_removed() public function test_metadata_redirects_are_removed() { + config()->set('app.content_filtering', 'h'); + $checks = [ '', '', @@ -253,6 +280,8 @@ public function test_metadata_redirects_are_removed() public function test_page_inline_on_attributes_removed_by_default() { + config()->set('app.content_filtering', 'j'); + $this->asEditor(); $page = $this->entities->page(); $script = '

Hello

'; @@ -267,6 +296,8 @@ public function test_page_inline_on_attributes_removed_by_default() public function test_more_complex_inline_on_attributes_escaping_scenarios() { + config()->set('app.content_filtering', 'j'); + $checks = [ '

Hello

', '

Hello

', @@ -308,6 +339,8 @@ public function test_page_content_scripts_show_with_filters_disabled() public function test_svg_script_usage_is_removed() { + config()->set('app.content_filtering', 'j'); + $checks = [ '', '', @@ -350,4 +383,46 @@ public function test_page_inline_on_attributes_show_with_filters_disabled() $pageView->assertSee($script, false); $pageView->assertDontSee('

Hello

', false); } + + public function test_non_content_filtering_is_controlled_by_config() + { + config()->set('app.content_filtering', 'h'); + $page = $this->entities->page(); + $html = <<<'HTML' + +

inbetweenpsection

+ + +superbeans! + +HTML; + + $page->html = $html; + $page->save(); + + $resp = $this->asEditor()->get($page->getUrl()); + $resp->assertDontSee('superbeans', false); + $resp->assertSee('inbetweenpsection', false); + } + + public function test_non_content_filtering() + { + config()->set('app.content_filtering', 'h'); + } + + public function test_allow_list_filtering_is_controlled_by_config() + { + config()->set('app.content_filtering', ''); + $page = $this->entities->page(); + $page->html = '
Hello!
'; + $page->save(); + + $resp = $this->asEditor()->get($page->getUrl()); + $resp->assertSee('style="position: absolute; left: 0;color:#00FFEE;"', false); + + config()->set('app.content_filtering', 'a'); + $resp = $this->get($page->getUrl()); + $resp->assertDontSee('style="position: absolute; left: 0;color:#00FFEE;"', false); + $resp->assertSee('style="color:#00FFEE;"', false); + } } diff --git a/tests/Unit/ConfigTest.php b/tests/Unit/ConfigTest.php index 7795a861a0c..63dd04fb1d2 100644 --- a/tests/Unit/ConfigTest.php +++ b/tests/Unit/ConfigTest.php @@ -170,6 +170,20 @@ public function test_mysql_host_parsed_as_expected() } } + public function test_content_filtering_defaults_to_enabled() + { + $this->runWithEnv(['APP_CONTENT_FILTERING' => null, 'ALLOW_CONTENT_SCRIPTS' => null], function () { + $this->assertEquals('jhfa', config('app.content_filtering')); + }); + } + + public function test_allow_content_scripts_disables_content_filtering() + { + $this->runWithEnv(['APP_CONTENT_FILTERING' => null, 'ALLOW_CONTENT_SCRIPTS' => 'true'], function () { + $this->assertEquals('', config('app.content_filtering')); + }); + } + /** * Set an environment variable of the given name and value * then check the given config key to see if it matches the given result. From 50e8501027115a70983c6fcecc7c411474221cf1 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 16 Feb 2026 13:02:24 +0000 Subject: [PATCH 06/16] Content Filter: Added extra object filtering Was blocked by CSP anyway, but best to have an extra layer. --- app/Util/HtmlContentFilter.php | 8 +++-- tests/Entity/PageContentFilteringTest.php | 37 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php index 79b1cdc93c4..45144a0e823 100644 --- a/app/Util/HtmlContentFilter.php +++ b/app/Util/HtmlContentFilter.php @@ -61,13 +61,17 @@ protected function filterOutScriptsFromDocument(HtmlDocument $doc): void $badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']'); static::removeNodes($badForms); - // Remove data or JavaScript iFrames + // Remove data or JavaScript iFrames & embeds $badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]'); static::removeNodes($badIframes); + // Remove data or JavaScript objects + $badObjects = $doc->queryXPath('//*[' . static::xpathContains('@data', 'data:') . '] | //*[' . static::xpathContains('@data', 'javascript:') . ']'); + static::removeNodes($badObjects); + // Remove attributes, within svg children, hiding JavaScript or data uris. // A bunch of svg element and attribute combinations expose xss possibilities. - // For example, SVG animate tag can exploit javascript in values. + // For example, SVG animate tag can exploit JavaScript in values. $badValuesAttrs = $doc->queryXPath('//svg//@*[' . static::xpathContains('.', 'data:') . '] | //svg//@*[' . static::xpathContains('.', 'javascript:') . ']'); static::removeAttributes($badValuesAttrs); diff --git a/tests/Entity/PageContentFilteringTest.php b/tests/Entity/PageContentFilteringTest.php index 8103fae1d2b..98297a093b8 100644 --- a/tests/Entity/PageContentFilteringTest.php +++ b/tests/Entity/PageContentFilteringTest.php @@ -69,6 +69,20 @@ public function test_js_and_base64_src_urls_are_removed() '', '', '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', + '', ]; $this->asEditor(); @@ -81,6 +95,8 @@ public function test_js_and_base64_src_urls_are_removed() $pageView = $this->get($page->getUrl()); $pageView->assertStatus(200); $html = $this->withHtml($pageView); + $html->assertElementNotContains('.page-content', 'assertElementNotContains('.page-content', 'data='); $html->assertElementNotContains('.page-content', ''); @@ -425,4 +441,25 @@ public function test_allow_list_filtering_is_controlled_by_config() $resp->assertDontSee('style="position: absolute; left: 0;color:#00FFEE;"', false); $resp->assertSee('style="color:#00FFEE;"', false); } + + public function test_allow_list_style_filtering() + { + $testCasesExpectedByInput = [ + '
Hello!
' => '
Hello!
', + '
Hello!
' => '
Hello!
', + '
Hello!
' => '
Hello!
', + ]; + + config()->set('app.content_filtering', 'a'); + $page = $this->entities->page(); + $this->asEditor(); + + foreach ($testCasesExpectedByInput as $input => $expected) { + $page->html = $input; + $page->save(); + $resp = $this->get($page->getUrl()); + + $resp->assertSee($expected, false); + } + } } From 3fa1174e7a879fe8d128e89709889628c496838d Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 16 Feb 2026 13:46:45 +0000 Subject: [PATCH 07/16] Content filtering: Updated config and readme attribution --- .env.example.complete | 17 ++++++++++++++++- app/Config/app.php | 13 +++---------- readme.md | 5 +++-- tests/Unit/ConfigTest.php | 7 +++++++ 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/.env.example.complete b/.env.example.complete index 18e7bd00d9c..ebebaf9e3e8 100644 --- a/.env.example.complete +++ b/.env.example.complete @@ -351,10 +351,25 @@ EXPORT_PDF_COMMAND_TIMEOUT=15 # Only used if 'ALLOW_UNTRUSTED_SERVER_FETCHING=true' which disables security protections. WKHTMLTOPDF=false -# Allow

'; + $page->save(); $this->getJson('/ajax/page/' . $page->id)->assertJson([ - 'html' => $page->html, + 'html' => '

test content

', ]); } From a2017ffa559d96669f207863ca202d2e50363622 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 17 Feb 2026 18:22:13 +0000 Subject: [PATCH 12/16] Caching: Altered purifier cache folder to be server-created Moved from a static folder to a dynamically created folder in the framework/cache directory, to increase the chance that it's created with server-writable permissions. This is due to an issue where users had permission issues, since adding a new folder means it's created by the git user and often non-web-writable. --- app/Util/ConfiguredHtmlPurifier.php | 18 +++++++++++++++--- storage/framework/purifier/.gitignore | 2 -- 2 files changed, 15 insertions(+), 5 deletions(-) delete mode 100644 storage/framework/purifier/.gitignore diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php index 014b2a3bf2b..87580da8bf1 100644 --- a/app/Util/ConfiguredHtmlPurifier.php +++ b/app/Util/ConfiguredHtmlPurifier.php @@ -22,8 +22,13 @@ class ConfiguredHtmlPurifier public function __construct() { + // This is done by the web-server at run-time, with the existing + // storage/framework/cache folder to ensure we're using a server-writable folder. + $cachePath = storage_path('framework/cache/purifier'); + $this->createCacheFolderIfNeeded($cachePath); + $config = HTMLPurifier_HTML5Config::createDefault(); - $this->setConfig($config); + $this->setConfig($config, $cachePath); $this->resetCacheIfNeeded($config); $htmlDef = $config->getDefinition('HTML', true, true); @@ -34,6 +39,13 @@ public function __construct() $this->purifier = new HTMLPurifier($config); } + protected function createCacheFolderIfNeeded(string $cachePath): void + { + if (!file_exists($cachePath)) { + mkdir($cachePath, 0777, true); + } + } + protected function resetCacheIfNeeded(HTMLPurifier_Config $config): void { if (self::$cachedChecked) { @@ -53,9 +65,9 @@ protected function resetCacheIfNeeded(HTMLPurifier_Config $config): void self::$cachedChecked = true; } - protected function setConfig(HTMLPurifier_Config $config): void + protected function setConfig(HTMLPurifier_Config $config, string $cachePath): void { - $config->set('Cache.SerializerPath', storage_path('framework/purifier')); + $config->set('Cache.SerializerPath', $cachePath); $config->set('Core.AllowHostnameUnderscore', true); $config->set('CSS.AllowTricky', true); $config->set('HTML.SafeIframe', true); diff --git a/storage/framework/purifier/.gitignore b/storage/framework/purifier/.gitignore deleted file mode 100644 index c96a04f008e..00000000000 --- a/storage/framework/purifier/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file From e1de1f0583352a71161a1eea5ca3b54c847dc95f Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 17 Feb 2026 18:34:14 +0000 Subject: [PATCH 13/16] git: Added old purifier location to gitignore --- storage/framework/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/storage/framework/.gitignore b/storage/framework/.gitignore index 05c4471f2b5..8d89041780c 100755 --- a/storage/framework/.gitignore +++ b/storage/framework/.gitignore @@ -7,3 +7,4 @@ routes.php routes.scanned.php schedule-* services.json +purifier/ From 9d15c79feec450be21b44f6828d62f6feef01b6a Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Wed, 18 Feb 2026 19:24:06 +0000 Subject: [PATCH 14/16] Deps: Updated PHP package versions --- composer.lock | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/composer.lock b/composer.lock index 08a932a38a1..c72b6dd1bdb 100644 --- a/composer.lock +++ b/composer.lock @@ -62,16 +62,16 @@ }, { "name": "aws/aws-sdk-php", - "version": "3.369.35", + "version": "3.369.36", "source": { "type": "git", "url": "https://github.com/aws/aws-sdk-php.git", - "reference": "0f3e296342fe965271b5dd0bded4a18bdab8aba5" + "reference": "2a69e7df5e03be9e08f9f73fb6a8cc9dd63b59c0" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/0f3e296342fe965271b5dd0bded4a18bdab8aba5", - "reference": "0f3e296342fe965271b5dd0bded4a18bdab8aba5", + "url": "https://api.github.com/repos/aws/aws-sdk-php/zipball/2a69e7df5e03be9e08f9f73fb6a8cc9dd63b59c0", + "reference": "2a69e7df5e03be9e08f9f73fb6a8cc9dd63b59c0", "shasum": "" }, "require": { @@ -153,9 +153,9 @@ "support": { "forum": "https://github.com/aws/aws-sdk-php/discussions", "issues": "https://github.com/aws/aws-sdk-php/issues", - "source": "https://github.com/aws/aws-sdk-php/tree/3.369.35" + "source": "https://github.com/aws/aws-sdk-php/tree/3.369.36" }, - "time": "2026-02-16T19:15:41+00:00" + "time": "2026-02-17T19:45:01+00:00" }, { "name": "bacon/bacon-qr-code", @@ -1800,16 +1800,16 @@ }, { "name": "laravel/framework", - "version": "v12.51.0", + "version": "v12.52.0", "source": { "type": "git", "url": "https://github.com/laravel/framework.git", - "reference": "ce4de3feb211e47c4f959d309ccf8a2733b1bc16" + "reference": "d5511fa74f4608dbb99864198b1954042aa8d5a7" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/laravel/framework/zipball/ce4de3feb211e47c4f959d309ccf8a2733b1bc16", - "reference": "ce4de3feb211e47c4f959d309ccf8a2733b1bc16", + "url": "https://api.github.com/repos/laravel/framework/zipball/d5511fa74f4608dbb99864198b1954042aa8d5a7", + "reference": "d5511fa74f4608dbb99864198b1954042aa8d5a7", "shasum": "" }, "require": { @@ -2018,7 +2018,7 @@ "issues": "https://github.com/laravel/framework/issues", "source": "https://github.com/laravel/framework" }, - "time": "2026-02-10T18:20:19+00:00" + "time": "2026-02-17T17:07:04+00:00" }, { "name": "laravel/prompts", @@ -8946,36 +8946,36 @@ }, { "name": "nunomaduro/collision", - "version": "v8.9.0", + "version": "v8.9.1", "source": { "type": "git", "url": "https://github.com/nunomaduro/collision.git", - "reference": "f52cab234f37641bd759c0ad56de17f632851419" + "reference": "a1ed3fa530fd60bc515f9303e8520fcb7d4bd935" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/nunomaduro/collision/zipball/f52cab234f37641bd759c0ad56de17f632851419", - "reference": "f52cab234f37641bd759c0ad56de17f632851419", + "url": "https://api.github.com/repos/nunomaduro/collision/zipball/a1ed3fa530fd60bc515f9303e8520fcb7d4bd935", + "reference": "a1ed3fa530fd60bc515f9303e8520fcb7d4bd935", "shasum": "" }, "require": { "filp/whoops": "^2.18.4", - "nunomaduro/termwind": "^2.3.3", + "nunomaduro/termwind": "^2.4.0", "php": "^8.2.0", "symfony/console": "^7.4.4 || ^8.0.4" }, "conflict": { "laravel/framework": "<11.48.0 || >=14.0.0", - "phpunit/phpunit": "<11.5.50 || >=13.0.0" + "phpunit/phpunit": "<11.5.50 || >=14.0.0" }, "require-dev": { "brianium/paratest": "^7.8.5", "larastan/larastan": "^3.9.2", - "laravel/framework": "^11.48.0 || ^12.51.0", + "laravel/framework": "^11.48.0 || ^12.52.0", "laravel/pint": "^1.27.1", "orchestra/testbench-core": "^9.12.0 || ^10.9.0", - "pestphp/pest": "^3.8.5 || ^4.3.2", - "sebastian/environment": "^7.2.1 || ^8.0.3" + "pestphp/pest": "^3.8.5 || ^4.4.1 || ^5.0.0", + "sebastian/environment": "^7.2.1 || ^8.0.3 || ^9.0.0" }, "type": "library", "extra": { @@ -9038,7 +9038,7 @@ "type": "patreon" } ], - "time": "2026-02-16T23:05:52+00:00" + "time": "2026-02-17T17:33:08+00:00" }, { "name": "phar-io/manifest", @@ -9560,16 +9560,16 @@ }, { "name": "phpunit/phpunit", - "version": "11.5.53", + "version": "11.5.55", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "a997a653a82845f1240d73ee73a8a4e97e4b0607" + "reference": "adc7262fccc12de2b30f12a8aa0b33775d814f00" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/a997a653a82845f1240d73ee73a8a4e97e4b0607", - "reference": "a997a653a82845f1240d73ee73a8a4e97e4b0607", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/adc7262fccc12de2b30f12a8aa0b33775d814f00", + "reference": "adc7262fccc12de2b30f12a8aa0b33775d814f00", "shasum": "" }, "require": { @@ -9642,7 +9642,7 @@ "support": { "issues": "https://github.com/sebastianbergmann/phpunit/issues", "security": "https://github.com/sebastianbergmann/phpunit/security/policy", - "source": "https://github.com/sebastianbergmann/phpunit/tree/11.5.53" + "source": "https://github.com/sebastianbergmann/phpunit/tree/11.5.55" }, "funding": [ { @@ -9666,7 +9666,7 @@ "type": "tidelift" } ], - "time": "2026-02-10T12:28:25+00:00" + "time": "2026-02-18T12:37:06+00:00" }, { "name": "sebastian/cli-parser", From a8d96fd3892c5f98136e93c7d50fa689f0869b3d Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Wed, 18 Feb 2026 19:33:35 +0000 Subject: [PATCH 15/16] Content filter: Allowed custom diagram attribute in allow-list For #6026 --- app/Util/ConfiguredHtmlPurifier.php | 7 +++++++ tests/Entity/PageContentFilteringTest.php | 1 + 2 files changed, 8 insertions(+) diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php index 87580da8bf1..ab23333882a 100644 --- a/app/Util/ConfiguredHtmlPurifier.php +++ b/app/Util/ConfiguredHtmlPurifier.php @@ -134,6 +134,13 @@ public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): vo 'value' => 'Text', ] ); + + // Allow the drawio-diagram attribute on div elements + $definition->addAttribute( + 'div', + 'drawio-diagram', + 'Number', + ); } public function purify(string $html): string diff --git a/tests/Entity/PageContentFilteringTest.php b/tests/Entity/PageContentFilteringTest.php index c048c09dc5f..4f77e063369 100644 --- a/tests/Entity/PageContentFilteringTest.php +++ b/tests/Entity/PageContentFilteringTest.php @@ -463,6 +463,7 @@ public function test_allow_list_style_filtering() '
Hello!
' => '
Hello!
', '
Hello!
' => '
Hello!
', '
Hello!
' => '
Hello!
', + '
Hello!
' => '
Hello!
', ]; config()->set('app.content_filtering', 'a'); From 80204518a25bb98beb0790f06496f19ec66d61ca Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Thu, 19 Feb 2026 23:25:00 +0000 Subject: [PATCH 16/16] Page Content: Better handling for empty content filtering For #6028 --- app/Util/HtmlDocument.php | 8 +++++++- tests/Entity/PageEditorTest.php | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php index 7517955b9f6..c1ec2cf415b 100644 --- a/app/Util/HtmlDocument.php +++ b/app/Util/HtmlDocument.php @@ -103,7 +103,13 @@ public function getElementById(string $elementId): ?DOMElement */ public function getBody(): DOMNode { - return $this->document->getElementsByTagName('body')[0]; + $bodies = $this->document->getElementsByTagName('body'); + + if ($bodies->length === 0) { + return new DOMElement('body', ''); + } + + return $bodies[0]; } /** diff --git a/tests/Entity/PageEditorTest.php b/tests/Entity/PageEditorTest.php index 4cd3c1671c5..67283f70420 100644 --- a/tests/Entity/PageEditorTest.php +++ b/tests/Entity/PageEditorTest.php @@ -282,4 +282,23 @@ public function test_editor_html_content_is_filtered_if_loaded_by_a_different_us $resp->assertOk(); $resp->assertDontSee('hellotherethisisaturtlemonster', false); } + + public function test_editor_html_filtered_does_not_cause_error_if_empty() + { + $emptyExamples = ['', '

', '

 

', ' ', "\n"]; + $editor = $this->users->editor(); + $page = $this->entities->page(); + $page->updated_by = $editor->id; + + foreach ($emptyExamples as $emptyExample) { + $page->html = $emptyExample; + $page->save(); + + $resp = $this->asAdmin()->get($page->getUrl('edit')); + $resp->assertOk(); + + $resp = $this->asAdmin()->get("/ajax/page/{$page->id}"); + $resp->assertOk(); + } + } }