From 10ebe53bd9c9f086efaa35eecd3656f1a201d84d Mon Sep 17 00:00:00 2001
From: Dan Brown
Date: Fri, 13 Feb 2026 14:14:28 +0000
Subject: [PATCH 01/16] Page Content: Added more complex & configurable content
filtering
- Added new option to control parts of the filter.
- Added whitelist filtering pass via HTMLPurifier.
---
app/Activity/Models/Comment.php | 4 +-
app/Config/app.php | 12 ++
app/Entities/Tools/EntityHtmlDescription.php | 4 +-
app/Entities/Tools/PageContent.php | 24 +++-
app/Theming/CustomHtmlHeadContentProvider.php | 22 +--
app/Util/HtmlContentFilter.php | 129 ++++++++++++------
app/Util/HtmlContentFilterConfig.php | 31 +++++
composer.json | 4 +-
composer.lock | 123 ++++++++++++++++-
storage/purifier/.gitignore | 2 +
10 files changed, 292 insertions(+), 63 deletions(-)
create mode 100644 app/Util/HtmlContentFilterConfig.php
create mode 100644 storage/purifier/.gitignore
diff --git a/app/Activity/Models/Comment.php b/app/Activity/Models/Comment.php
index ce05e3df35b..ab7d917729c 100644
--- a/app/Activity/Models/Comment.php
+++ b/app/Activity/Models/Comment.php
@@ -8,6 +8,7 @@
use BookStack\Users\Models\HasCreatorAndUpdater;
use BookStack\Users\Models\OwnableInterface;
use BookStack\Util\HtmlContentFilter;
+use BookStack\Util\HtmlContentFilterConfig;
use Illuminate\Database\Eloquent\Builder;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
@@ -82,7 +83,8 @@ public function logDescriptor(): string
public function safeHtml(): string
{
- return HtmlContentFilter::removeActiveContentFromHtmlString($this->html ?? '');
+ $filter = new HtmlContentFilter(new HtmlContentFilterConfig());
+ return $filter->filterString($this->html ?? '');
}
public function jointPermissions(): HasMany
diff --git a/app/Config/app.php b/app/Config/app.php
index 40e542d3e16..acd27e98c02 100644
--- a/app/Config/app.php
+++ b/app/Config/app.php
@@ -42,6 +42,18 @@
// Even when overridden the WYSIWYG editor may still escape script content.
'allow_content_scripts' => env('ALLOW_CONTENT_SCRIPTS', false),
+ // Control the behaviour of page content filtering.
+ // This setting is a collection of characters which represent different available filters:
+ // - j - Filter out JavaScript based content
+ // - h - Filter out unexpected, potentially dangerous, HTML elements
+ // - f - Filter out unexpected form elements
+ // - a - Run content through a more complex allow-list filter
+ // This defaults to using all filters, unless ALLOW_CONTENT_SCRIPTS is set to true in which case no filters are used.
+ // Note: These filters are a best attempt, and may not be 100% effective. They are typically a layer used in addition to other security measures.
+ // TODO - Add to example env
+ // TODO - Remove allow_content_scripts option above
+ 'content_filtering' => env('CONTENT_FILTERING', env('ALLOW_CONTENT_SCRIPTS', false) === true ? '' : 'jfha'),
+
// Allow server-side fetches to be performed to potentially unknown
// and user-provided locations. Primarily used in exports when loading
// in externally referenced assets.
diff --git a/app/Entities/Tools/EntityHtmlDescription.php b/app/Entities/Tools/EntityHtmlDescription.php
index b14deb257a7..6bbfb9b6651 100644
--- a/app/Entities/Tools/EntityHtmlDescription.php
+++ b/app/Entities/Tools/EntityHtmlDescription.php
@@ -6,6 +6,7 @@
use BookStack\Entities\Models\Bookshelf;
use BookStack\Entities\Models\Chapter;
use BookStack\Util\HtmlContentFilter;
+use BookStack\Util\HtmlContentFilterConfig;
class EntityHtmlDescription
{
@@ -50,7 +51,8 @@ public function getHtml(bool $raw = false): string
return $html;
}
- return HtmlContentFilter::removeActiveContentFromHtmlString($html);
+ $filter = new HtmlContentFilter(new HtmlContentFilterConfig());
+ return $filter->filterString($html);
}
public function getPlain(): string
diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index 5358e8f0c5b..ca06e696185 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -13,6 +13,7 @@
use BookStack\Uploads\ImageService;
use BookStack\Users\Models\User;
use BookStack\Util\HtmlContentFilter;
+use BookStack\Util\HtmlContentFilterConfig;
use BookStack\Util\HtmlDocument;
use BookStack\Util\WebSafeMimeSniffer;
use Closure;
@@ -317,11 +318,28 @@ public function render(bool $blankIncludes = false): string
$this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
}
- if (!config('app.allow_content_scripts')) {
- HtmlContentFilter::removeActiveContentFromDocument($doc);
+ $cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
+ $cached = cache()->get($cacheKey, null);
+ if ($cached !== null) {
+ return $cached;
}
- return $doc->getBodyInnerHtml();
+ $filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
+ $filter = new HtmlContentFilter($filterConfig);
+ $filtered = $filter->filterDocument($doc);
+
+ $cacheTime = 86400 * 7; // 1 week
+ cache()->put($cacheKey, $filtered, $cacheTime);
+
+ return $filtered;
+ }
+
+ protected function getContentCacheKey(string $html): string
+ {
+ $contentHash = md5($html);
+ $contentId = $this->page->id;
+ $contentTime = $this->page->updated_at->timestamp;
+ return "page-content-cache::{$contentId}::{$contentTime}::{$contentHash}";
}
/**
diff --git a/app/Theming/CustomHtmlHeadContentProvider.php b/app/Theming/CustomHtmlHeadContentProvider.php
index e0cf5b3b5c7..dab30606c34 100644
--- a/app/Theming/CustomHtmlHeadContentProvider.php
+++ b/app/Theming/CustomHtmlHeadContentProvider.php
@@ -4,25 +4,16 @@
use BookStack\Util\CspService;
use BookStack\Util\HtmlContentFilter;
+use BookStack\Util\HtmlContentFilterConfig;
use BookStack\Util\HtmlNonceApplicator;
use Illuminate\Contracts\Cache\Repository as Cache;
class CustomHtmlHeadContentProvider
{
- /**
- * @var CspService
- */
- protected $cspService;
-
- /**
- * @var Cache
- */
- protected $cache;
-
- public function __construct(CspService $cspService, Cache $cache)
- {
- $this->cspService = $cspService;
- $this->cache = $cache;
+ public function __construct(
+ protected CspService $cspService,
+ protected Cache $cache
+ ) {
}
/**
@@ -50,7 +41,8 @@ public function forExport(): string
$hash = md5($content);
return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
- return HtmlContentFilter::removeActiveContentFromHtmlString($content);
+ $config = new HtmlContentFilterConfig(filterOutNonContentElements: false);
+ return (new HtmlContentFilter($config))->filterString($content);
});
}
diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php
index ad5bf8c5fd3..842e4246736 100644
--- a/app/Util/HtmlContentFilter.php
+++ b/app/Util/HtmlContentFilter.php
@@ -5,15 +5,53 @@
use DOMAttr;
use DOMElement;
use DOMNodeList;
+use HTMLPurifier;
+use HTMLPurifier_HTML5Config;
class HtmlContentFilter
{
- /**
- * Remove all active content from the given HTML document.
- * This aims to cover anything which can dynamically deal with, or send, data
- * like any JavaScript actions or form content.
- */
- public static function removeActiveContentFromDocument(HtmlDocument $doc): void
+ public function __construct(
+ protected HtmlContentFilterConfig $config
+ ) {
+ }
+
+ public function filterDocument(HtmlDocument $doc): string
+ {
+ if ($this->config->filterOutJavaScript) {
+ $this->filterOutScriptsFromDocument($doc);
+ }
+ if ($this->config->filterOutFormElements) {
+ $this->filterOutFormElementsFromDocument($doc);
+ }
+ if ($this->config->filterOutBadHtmlElements) {
+ $this->filterOutBadHtmlElementsFromDocument($doc);
+ }
+ if ($this->config->filterOutNonContentElements) {
+ $this->filterOutNonContentElementsFromDocument($doc);
+ }
+
+ $filtered = $doc->getBodyInnerHtml();
+ if ($this->config->useAllowListFilter) {
+ $filtered = $this->applyAllowListFiltering($filtered);
+ }
+
+ return $filtered;
+ }
+
+ public function filterString(string $html): string
+ {
+ return $this->filterDocument(new HtmlDocument($html));
+ }
+
+ protected function applyAllowListFiltering(string $html): string
+ {
+ $config = HTMLPurifier_HTML5Config::createDefault();
+ $config->set('Cache.SerializerPath', storage_path('purifier'));
+ $purifier = new HTMLPurifier($config);
+ return $purifier->purify($html);
+ }
+
+ protected function filterOutScriptsFromDocument(HtmlDocument $doc): void
{
// Remove standard script tags
$scriptElems = $doc->queryXPath('//script');
@@ -27,10 +65,6 @@ public static function removeActiveContentFromDocument(HtmlDocument $doc): void
$badForms = $doc->queryXPath('//*[' . static::xpathContains('@action', 'javascript:') . '] | //*[' . static::xpathContains('@formaction', 'javascript:') . ']');
static::removeNodes($badForms);
- // Remove meta tag to prevent external redirects
- $metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
- static::removeNodes($metaTags);
-
// Remove data or JavaScript iFrames
$badIframes = $doc->queryXPath('//*[' . static::xpathContains('@src', 'data:') . '] | //*[' . static::xpathContains('@src', 'javascript:') . '] | //*[@srcdoc]');
static::removeNodes($badIframes);
@@ -49,7 +83,10 @@ public static function removeActiveContentFromDocument(HtmlDocument $doc): void
// Remove 'on*' attributes
$onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
static::removeAttributes($onAttributes);
+ }
+ protected function filterOutFormElementsFromDocument(HtmlDocument $doc): void
+ {
// Remove form elements
$formElements = ['form', 'fieldset', 'button', 'textarea', 'select'];
foreach ($formElements as $formElement) {
@@ -75,41 +112,21 @@ public static function removeActiveContentFromDocument(HtmlDocument $doc): void
}
}
- /**
- * Remove active content from the given HTML string.
- * This aims to cover anything which can dynamically deal with, or send, data
- * like any JavaScript actions or form content.
- */
- public static function removeActiveContentFromHtmlString(string $html): string
+ protected function filterOutBadHtmlElementsFromDocument(HtmlDocument $doc): void
{
- if (empty($html)) {
- return $html;
- }
-
- $doc = new HtmlDocument($html);
- static::removeActiveContentFromDocument($doc);
-
- return $doc->getBodyInnerHtml();
- }
-
- /**
- * Alias using the old method name to avoid potential compatibility breaks during patch release.
- * To remove in future feature release.
- * @deprecated Use removeActiveContentFromDocument instead.
- */
- public static function removeScriptsFromDocument(HtmlDocument $doc): void
- {
- static::removeActiveContentFromDocument($doc);
+ // Remove meta tag to prevent external redirects
+ $metaTags = $doc->queryXPath('//meta[' . static::xpathContains('@content', 'url') . ']');
+ static::removeNodes($metaTags);
}
- /**
- * Alias using the old method name to avoid potential compatibility breaks during patch release.
- * To remove in future feature release.
- * @deprecated Use removeActiveContentFromHtmlString instead.
- */
- public static function removeScriptsFromHtmlString(string $html): string
+ protected function filterOutNonContentElementsFromDocument(HtmlDocument $doc): void
{
- return static::removeActiveContentFromHtmlString($html);
+ // Remove non-content elements
+ $formElements = ['link', 'style', 'meta', 'title', 'template'];
+ foreach ($formElements as $formElement) {
+ $matchingFormElements = $doc->queryXPath('//' . $formElement);
+ static::removeNodes($matchingFormElements);
+ }
}
/**
@@ -147,4 +164,34 @@ protected static function removeAttributes(DOMNodeList $attrs): void
$parentNode->removeAttribute($attrName);
}
}
+
+ /**
+ * Alias using the old method name to avoid potential compatibility breaks during patch release.
+ * To remove in future feature release.
+ * @deprecated Use filterDocument instead.
+ */
+ public static function removeScriptsFromDocument(HtmlDocument $doc): void
+ {
+ $config = new HtmlContentFilterConfig(
+ filterOutNonContentElements: false,
+ useAllowListFilter: false,
+ );
+ $filter = new static($config);
+ $filter->filterDocument($doc);
+ }
+
+ /**
+ * Alias using the old method name to avoid potential compatibility breaks during patch release.
+ * To remove in future feature release.
+ * @deprecated Use filterString instead.
+ */
+ public static function removeScriptsFromHtmlString(string $html): string
+ {
+ $config = new HtmlContentFilterConfig(
+ filterOutNonContentElements: false,
+ useAllowListFilter: false,
+ );
+ $filter = new static($config);
+ return $filter->filterString($html);
+ }
}
diff --git a/app/Util/HtmlContentFilterConfig.php b/app/Util/HtmlContentFilterConfig.php
new file mode 100644
index 00000000000..2cb77ea5815
--- /dev/null
+++ b/app/Util/HtmlContentFilterConfig.php
@@ -0,0 +1,31 @@
+=5.3"
+ },
+ "require-dev": {
+ "masterminds/html5": "^2.7",
+ "php-coveralls/php-coveralls": "^1.1|^2.1",
+ "phpunit/phpunit": ">=4.7 <10.0"
+ },
+ "suggest": {
+ "masterminds/html5": "Required to use HTMLPurifier_Lexer_HTML5"
+ },
+ "type": "library",
+ "autoload": {
+ "classmap": [
+ "library/HTMLPurifier/"
+ ]
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "xemlock",
+ "email": "xemlock@gmail.com"
+ }
+ ],
+ "description": "HTML5 support for HTML Purifier",
+ "homepage": "https://github.com/xemlock/htmlpurifier-html5",
+ "keywords": [
+ "HTML5",
+ "Purifier",
+ "html",
+ "htmlpurifier",
+ "security",
+ "tidy",
+ "validator",
+ "xss"
+ ],
+ "support": {
+ "issues": "https://github.com/xemlock/htmlpurifier-html5/issues",
+ "source": "https://github.com/xemlock/htmlpurifier-html5/tree/v0.1.12"
+ },
+ "time": "2026-02-09T21:03:14+00:00"
}
],
"packages-dev": [
diff --git a/storage/purifier/.gitignore b/storage/purifier/.gitignore
new file mode 100644
index 00000000000..c96a04f008e
--- /dev/null
+++ b/storage/purifier/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
\ No newline at end of file
From 0f040fe8b1bbaaee4d088262aa4482a4b19b1c46 Mon Sep 17 00:00:00 2001
From: Dan Brown
Date: Sun, 15 Feb 2026 16:17:03 +0000
Subject: [PATCH 02/16] Content: Tuned HTML purifier for our use
Tested it with a range of supported, including uncommon, content types
and added support, or changed config, where needed.
Been through docs for all HTMLPurifier options to assess what's
relevant.
---
app/Entities/Tools/PageContent.php | 3 +-
app/Util/ConfiguredHtmlPurifier.php | 101 ++++++++++++++++++++++++++++
app/Util/HtmlContentFilter.php | 6 +-
3 files changed, 104 insertions(+), 6 deletions(-)
create mode 100644 app/Util/ConfiguredHtmlPurifier.php
diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index ca06e696185..67c6e4cf6e8 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -321,12 +321,13 @@ public function render(bool $blankIncludes = false): string
$cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
$cached = cache()->get($cacheKey, null);
if ($cached !== null) {
- return $cached;
+// return $cached;
}
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
$filter = new HtmlContentFilter($filterConfig);
$filtered = $filter->filterDocument($doc);
+// $filtered = $doc->getBodyInnerHtml();
$cacheTime = 86400 * 7; // 1 week
cache()->put($cacheKey, $filtered, $cacheTime);
diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php
new file mode 100644
index 00000000000..5aab25b4745
--- /dev/null
+++ b/app/Util/ConfiguredHtmlPurifier.php
@@ -0,0 +1,101 @@
+setConfig($config);
+
+ $htmlDef = $config->getDefinition('HTML', true, true);
+ if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) {
+ $this->configureDefinition($htmlDef);
+ }
+
+ $this->purifier = new HTMLPurifier($config);
+ }
+
+ protected function setConfig(HTMLPurifier_Config $config): void
+ {
+ $config->set('Cache.SerializerPath', storage_path('purifier'));
+ $config->set('CSS.AllowTricky', true);
+ $config->set('HTML.SafeIframe', true);
+ $config->set('Attr.EnableID', true);
+ $config->set('Attr.ID.HTML5', true);
+ $config->set('Output.FixInnerHTML', false);
+ $config->set('URI.SafeIframeRegexp', '%^(http://|https://)%');
+ $config->set('URI.AllowedSchemes', [
+ 'http' => true,
+ 'https' => true,
+ 'mailto' => true,
+ 'ftp' => true,
+ 'nntp' => true,
+ 'news' => true,
+ 'tel' => true,
+ 'file' => true,
+ ]);
+
+ $config->set('Cache.DefinitionImpl', null); // Disable cache during testing
+ }
+
+ public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void
+ {
+ // Allow the object element
+ $definition->addElement(
+ 'object',
+ 'Inline',
+ 'Flow',
+ 'Common',
+ [
+ 'data' => 'URI',
+ 'type' => 'Text',
+ 'width' => 'Length',
+ 'height' => 'Length',
+ ]
+ );
+
+ // Allow the embed element
+ $definition->addElement(
+ 'embed',
+ 'Inline',
+ 'Empty',
+ 'Common',
+ [
+ 'src' => 'URI',
+ 'type' => 'Text',
+ 'width' => 'Length',
+ 'height' => 'Length',
+ ]
+ );
+
+ // Allow checkbox inputs
+ $definition->addElement(
+ 'input',
+ 'Formctrl',
+ 'Empty',
+ 'Common',
+ [
+ 'checked' => 'Bool#checked',
+ 'disabled' => 'Bool#disabled',
+ 'name' => 'Text',
+ 'readonly' => 'Bool#readonly',
+ 'type' => 'Enum#checkbox',
+ 'value' => 'Text',
+ ]
+ );
+ }
+
+ public function purify(string $html): string
+ {
+ return $this->purifier->purify($html);
+ }
+}
diff --git a/app/Util/HtmlContentFilter.php b/app/Util/HtmlContentFilter.php
index 842e4246736..79b1cdc93c4 100644
--- a/app/Util/HtmlContentFilter.php
+++ b/app/Util/HtmlContentFilter.php
@@ -5,8 +5,6 @@
use DOMAttr;
use DOMElement;
use DOMNodeList;
-use HTMLPurifier;
-use HTMLPurifier_HTML5Config;
class HtmlContentFilter
{
@@ -45,9 +43,7 @@ public function filterString(string $html): string
protected function applyAllowListFiltering(string $html): string
{
- $config = HTMLPurifier_HTML5Config::createDefault();
- $config->set('Cache.SerializerPath', storage_path('purifier'));
- $purifier = new HTMLPurifier($config);
+ $purifier = new ConfiguredHtmlPurifier();
return $purifier->purify($html);
}
From 227027fc4570270395fe5dd0aa2bb8201163752a Mon Sep 17 00:00:00 2001
From: Dan Brown
Date: Sun, 15 Feb 2026 16:46:09 +0000
Subject: [PATCH 03/16] Content: Updated purifier and content caching
- Updated page content cache to use app version in cache key
- Moved purifier cache into framework to better work with existing
expected folders.
- Added app version check to purifier so that it will reset its own
cache on app version change.
---
app/Entities/Tools/PageContent.php | 7 +++--
app/Util/ConfiguredHtmlPurifier.php | 34 +++++++++++++++++++--
storage/{ => framework}/purifier/.gitignore | 0
3 files changed, 36 insertions(+), 5 deletions(-)
rename storage/{ => framework}/purifier/.gitignore (100%)
diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index 67c6e4cf6e8..436c4f0bed8 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -2,6 +2,7 @@
namespace BookStack\Entities\Tools;
+use BookStack\App\AppVersion;
use BookStack\Entities\Models\Page;
use BookStack\Entities\Queries\PageQueries;
use BookStack\Entities\Tools\Markdown\MarkdownToHtml;
@@ -321,13 +322,12 @@ public function render(bool $blankIncludes = false): string
$cacheKey = $this->getContentCacheKey($doc->getBodyInnerHtml());
$cached = cache()->get($cacheKey, null);
if ($cached !== null) {
-// return $cached;
+ return $cached;
}
$filterConfig = HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'));
$filter = new HtmlContentFilter($filterConfig);
$filtered = $filter->filterDocument($doc);
-// $filtered = $doc->getBodyInnerHtml();
$cacheTime = 86400 * 7; // 1 week
cache()->put($cacheKey, $filtered, $cacheTime);
@@ -340,7 +340,8 @@ protected function getContentCacheKey(string $html): string
$contentHash = md5($html);
$contentId = $this->page->id;
$contentTime = $this->page->updated_at->timestamp;
- return "page-content-cache::{$contentId}::{$contentTime}::{$contentHash}";
+ $appVersion = AppVersion::get();
+ return "page-content-cache::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}";
}
/**
diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php
index 5aab25b4745..d63d2ad5f3c 100644
--- a/app/Util/ConfiguredHtmlPurifier.php
+++ b/app/Util/ConfiguredHtmlPurifier.php
@@ -2,19 +2,29 @@
namespace BookStack\Util;
+use BookStack\App\AppVersion;
use HTMLPurifier;
use HTMLPurifier_Config;
+use HTMLPurifier_DefinitionCache_Serializer;
use HTMLPurifier_HTML5Config;
use HTMLPurifier_HTMLDefinition;
+/**
+ * Provides a configured HTML Purifier instance.
+ * https://github.com/ezyang/htmlpurifier
+ * Also uses this to extend support to HTML5 elements:
+ * https://github.com/xemlock/htmlpurifier-html5
+ */
class ConfiguredHtmlPurifier
{
protected HTMLPurifier $purifier;
+ protected static bool $cachedChecked = false;
public function __construct()
{
$config = HTMLPurifier_HTML5Config::createDefault();
$this->setConfig($config);
+ $this->resetCacheIfNeeded($config);
$htmlDef = $config->getDefinition('HTML', true, true);
if ($htmlDef instanceof HTMLPurifier_HTMLDefinition) {
@@ -24,9 +34,29 @@ public function __construct()
$this->purifier = new HTMLPurifier($config);
}
+ protected function resetCacheIfNeeded(HTMLPurifier_Config $config): void
+ {
+ if (self::$cachedChecked) {
+ return;
+ }
+
+ $cachedForVersion = cache('htmlpurifier::cache-version');
+ $appVersion = AppVersion::get();
+ if ($cachedForVersion !== $appVersion) {
+ foreach (['HTML', 'CSS', 'URI'] as $name) {
+ $cache = new HTMLPurifier_DefinitionCache_Serializer($name);
+ $cache->flush($config);
+ }
+ cache()->set('htmlpurifier::cache-version', $appVersion);
+ }
+
+ self::$cachedChecked = true;
+ }
+
protected function setConfig(HTMLPurifier_Config $config): void
{
- $config->set('Cache.SerializerPath', storage_path('purifier'));
+ $config->set('Cache.SerializerPath', storage_path('framework/purifier'));
+ $config->set('Core.AllowHostnameUnderscore', true);
$config->set('CSS.AllowTricky', true);
$config->set('HTML.SafeIframe', true);
$config->set('Attr.EnableID', true);
@@ -44,7 +74,7 @@ protected function setConfig(HTMLPurifier_Config $config): void
'file' => true,
]);
- $config->set('Cache.DefinitionImpl', null); // Disable cache during testing
+ // $config->set('Cache.DefinitionImpl', null); // Disable cache during testing
}
public function configureDefinition(HTMLPurifier_HTMLDefinition $definition): void
diff --git a/storage/purifier/.gitignore b/storage/framework/purifier/.gitignore
similarity index 100%
rename from storage/purifier/.gitignore
rename to storage/framework/purifier/.gitignore
From 035be66ebc7d4a312b5a240283a5a13da9694779 Mon Sep 17 00:00:00 2001
From: Dan Brown
Date: Sun, 15 Feb 2026 18:44:14 +0000
Subject: [PATCH 04/16] Content: Updated tests and CSP usage of content script
setting
Updates CSP to use new content_filtering option.
Splits out content filtering tests to their own class.
Updated tests where needed to adapt to changes.
---
app/Entities/Tools/PageContent.php | 2 +-
app/Theming/CustomHtmlHeadContentProvider.php | 2 +-
app/Util/ConfiguredHtmlPurifier.php | 2 +-
app/Util/CspService.php | 9 +-
tests/Entity/PageContentFilteringTest.php | 353 ++++++++++++++++++
tests/Entity/PageContentTest.php | 348 +----------------
tests/SecurityHeaderTest.php | 6 +-
7 files changed, 368 insertions(+), 354 deletions(-)
create mode 100644 tests/Entity/PageContentFilteringTest.php
diff --git a/app/Entities/Tools/PageContent.php b/app/Entities/Tools/PageContent.php
index 436c4f0bed8..f8a0617395b 100644
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@@ -339,7 +339,7 @@ protected function getContentCacheKey(string $html): string
{
$contentHash = md5($html);
$contentId = $this->page->id;
- $contentTime = $this->page->updated_at->timestamp;
+ $contentTime = $this->page->updated_at?->timestamp ?? time();
$appVersion = AppVersion::get();
return "page-content-cache::{$appVersion}::{$contentId}::{$contentTime}::{$contentHash}";
}
diff --git a/app/Theming/CustomHtmlHeadContentProvider.php b/app/Theming/CustomHtmlHeadContentProvider.php
index dab30606c34..9f794a077ba 100644
--- a/app/Theming/CustomHtmlHeadContentProvider.php
+++ b/app/Theming/CustomHtmlHeadContentProvider.php
@@ -41,7 +41,7 @@ public function forExport(): string
$hash = md5($content);
return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
- $config = new HtmlContentFilterConfig(filterOutNonContentElements: false);
+ $config = new HtmlContentFilterConfig(filterOutNonContentElements: false, useAllowListFilter: false);
return (new HtmlContentFilter($config))->filterString($content);
});
}
diff --git a/app/Util/ConfiguredHtmlPurifier.php b/app/Util/ConfiguredHtmlPurifier.php
index d63d2ad5f3c..014b2a3bf2b 100644
--- a/app/Util/ConfiguredHtmlPurifier.php
+++ b/app/Util/ConfiguredHtmlPurifier.php
@@ -62,7 +62,7 @@ protected function setConfig(HTMLPurifier_Config $config): void
$config->set('Attr.EnableID', true);
$config->set('Attr.ID.HTML5', true);
$config->set('Output.FixInnerHTML', false);
- $config->set('URI.SafeIframeRegexp', '%^(http://|https://)%');
+ $config->set('URI.SafeIframeRegexp', '%^(http://|https://|//)%');
$config->set('URI.AllowedSchemes', [
'http' => true,
'https' => true,
diff --git a/app/Util/CspService.php b/app/Util/CspService.php
index 4262b5c98f8..466acb49148 100644
--- a/app/Util/CspService.php
+++ b/app/Util/CspService.php
@@ -65,7 +65,7 @@ public function allowedIFrameHostsConfigured(): bool
*/
protected function getScriptSrc(): string
{
- if (config('app.allow_content_scripts')) {
+ if ($this->scriptFilteringDisabled()) {
return '';
}
@@ -108,7 +108,7 @@ protected function getFrameSrc(): string
*/
protected function getObjectSrc(): string
{
- if (config('app.allow_content_scripts')) {
+ if ($this->scriptFilteringDisabled()) {
return '';
}
@@ -124,6 +124,11 @@ protected function getBaseUri(): string
return "base-uri 'self'";
}
+ protected function scriptFilteringDisabled(): bool
+ {
+ return !HtmlContentFilterConfig::fromConfigString(config('app.content_filtering'))->filterOutJavaScript;
+ }
+
protected function getAllowedIframeHosts(): array
{
$hosts = config('app.iframe_hosts') ?? '';
diff --git a/tests/Entity/PageContentFilteringTest.php b/tests/Entity/PageContentFilteringTest.php
new file mode 100644
index 00000000000..e1295034d68
--- /dev/null
+++ b/tests/Entity/PageContentFilteringTest.php
@@ -0,0 +1,353 @@
+asEditor();
+ $page = $this->entities->page();
+ $script = 'abc123abc123';
+ $page->html = "escape {$script}";
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $pageView->assertDontSee($script, false);
+ $pageView->assertSee('abc123abc123');
+ }
+
+ public function test_more_complex_content_script_escaping_scenarios()
+ {
+ $checks = [
+ "Some script
",
+ "",
+ "Some script
",
+ "Some script
",
+ "Some script
",
+ "Some script
",
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $this->withHtml($pageView)->assertElementNotContains('.page-content', '');
+ }
+ }
+
+ public function test_js_and_base64_src_urls_are_removed()
+ {
+ $checks = [
+ '',
+ '',
+ '',
+ '',
+ '',
+ '',
+ '',
+ '
',
+ '
',
+ '
',
+ '
',
+ '
',
+ '
',
+ '
',
+ '',
+ '',
+ '
',
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $html = $this->withHtml($pageView);
+ $html->assertElementNotContains('.page-content', '');
+ $html->assertElementNotContains('.page-content', 'src=');
+ $html->assertElementNotContains('.page-content', 'javascript:');
+ $html->assertElementNotContains('.page-content', 'data:');
+ $html->assertElementNotContains('.page-content', 'base64');
+ }
+ }
+
+ public function test_javascript_uri_links_are_removed()
+ {
+ $checks = [
+ 'withHtml($pageView)->assertElementNotContains('.page-content', 'href=javascript:');
+ }
+ }
+
+ public function test_form_actions_with_javascript_are_removed()
+ {
+ $checks = [
+ '',
+ 'Click me',
+ 'Click me',
+ '',
+ '',
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $pageView->assertDontSee('id="xss"', false);
+ $pageView->assertDontSee('action=javascript:', false);
+ $pageView->assertDontSee('action=JaVaScRiPt:', false);
+ $pageView->assertDontSee('formaction=javascript:', false);
+ $pageView->assertDontSee('formaction=JaVaScRiPt:', false);
+ }
+ }
+
+ public function test_form_elements_are_removed()
+ {
+ $checks = [
+ 'thisisacattofind
',
+ 'thisisacattofind
',
+ 'thisisacattofind
',
+ 'thisisacattofind
',
+ 'thisisacattofind
',
+ 'thisisacattofind
',
+ 'thisisacattofind
',
+ <<<'TESTCASE'
+
+TESTCASE
+
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $pageView->assertSee('thisisacattofind');
+ $pageView->assertDontSee('thisdogshouldnotbefound');
+ }
+ }
+
+ public function test_form_attributes_are_removed()
+ {
+ $withinSvgSample = <<<'TESTCASE'
+
+TESTCASE;
+
+ $checks = [
+ 'formaction' => 'thisisacattofind
',
+ 'form' => 'thisisacattofind
',
+ 'formmethod' => 'thisisacattofind
',
+ 'formtarget' => 'thisisacattofind
',
+ 'FORMTARGET' => 'thisisacattofind
',
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $attribute => $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $pageView->assertSee('thisisacattofind');
+ $this->withHtml($pageView)->assertElementNotExists(".page-content [{$attribute}]");
+ }
+
+ $page->html = $withinSvgSample;
+ $page->save();
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $html = $this->withHtml($pageView);
+ foreach ($checks as $attribute => $check) {
+ $pageView->assertSee('thisisacattofind');
+ $html->assertElementNotExists(".page-content [{$attribute}]");
+ }
+ }
+
+ public function test_metadata_redirects_are_removed()
+ {
+ $checks = [
+ '',
+ '',
+ '',
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $this->withHtml($pageView)->assertElementNotContains('.page-content', '');
+ $this->withHtml($pageView)->assertElementNotContains('.page-content', '');
+ $this->withHtml($pageView)->assertElementNotContains('.page-content', 'content=');
+ $this->withHtml($pageView)->assertElementNotContains('.page-content', 'external_url');
+ }
+ }
+
+ public function test_page_inline_on_attributes_removed_by_default()
+ {
+ $this->asEditor();
+ $page = $this->entities->page();
+ $script = 'Hello
';
+ $page->html = "escape {$script}";
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $pageView->assertDontSee($script, false);
+ $pageView->assertSee('Hello
', false);
+ }
+
+ public function test_more_complex_inline_on_attributes_escaping_scenarios()
+ {
+ $checks = [
+ 'Hello
',
+ 'Hello
',
+ 'Lorem ipsum dolor sit amet.
Hello
',
+ 'Lorem ipsum dolor sit amet.
Hello
',
+ 'Lorem ipsum dolor sit amet.
Hello
',
+ 'Lorem ipsum dolor sit amet.
Hello
',
+ 'xss link\',
+ ];
+
+ $this->asEditor();
+ $page = $this->entities->page();
+
+ foreach ($checks as $check) {
+ $page->html = $check;
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertStatus(200);
+ $this->withHtml($pageView)->assertElementNotContains('.page-content', 'onclick');
+ }
+ }
+
+ public function test_page_content_scripts_show_with_filters_disabled()
+ {
+ $this->asEditor();
+ $page = $this->entities->page();
+ config()->set('app.content_filtering', '');
+
+ $script = 'abc123abc123';
+ $page->html = "no escape {$script}";
+ $page->save();
+
+ $pageView = $this->get($page->getUrl());
+ $pageView->assertSee($script, false);
+ $pageView->assertDontSee('abc123abc123');
+ }
+
+ public function test_svg_script_usage_is_removed()
+ {
+ $checks = [
+ '',
+ '',
+ '',
+ '',
+ '',
+ '