From 3f7f182a3ca1033fec0a42805b45c59d00a41d7e Mon Sep 17 00:00:00 2001 From: egeekial Date: Wed, 3 Jun 2026 15:59:25 -0400 Subject: [PATCH 1/2] [newsfeed] add allowBasicHtmlTags option for basic emphasis Render a strict allowlist of basic formatting tags (b, strong, i, em, u) in news titles and descriptions, while neutralizing all other HTML. Feeds such as The Atlantic encode emphasis as entities (<em>), which html-to-text decoded to a literal string that the template then auto-escaped, so the raw tag was shown on screen. The new opt-in allowBasicHtmlTags option (default false) sanitizes both fields by escaping everything and restoring only the exact, attribute-free allowlisted tags, so the result is safe to render and arbitrary HTML/script injection is impossible. Adds unit tests for the sanitizer and an e2e test covering rendering and an injection attempt. Co-Authored-By: Claude Opus 4.8 --- defaultmodules/newsfeed/newsfeed.js | 3 +- defaultmodules/newsfeed/newsfeed.njk | 12 +-- defaultmodules/newsfeed/newsfeedfetcher.js | 80 ++++++++++++++++--- defaultmodules/newsfeed/node_helper.js | 2 +- tests/configs/modules/newsfeed/basic_html.js | 28 +++++++ tests/e2e/modules/newsfeed_spec.js | 26 ++++++ tests/mocks/newsfeed_basic_html.xml | 15 ++++ .../default/newsfeed/newsfeedfetcher_spec.js | 52 ++++++++++++ 8 files changed, 199 insertions(+), 19 deletions(-) create mode 100644 tests/configs/modules/newsfeed/basic_html.js create mode 100644 tests/mocks/newsfeed_basic_html.xml create mode 100644 tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js diff --git a/defaultmodules/newsfeed/newsfeed.js b/defaultmodules/newsfeed/newsfeed.js index fa1476faf5..d593b4c086 100644 --- a/defaultmodules/newsfeed/newsfeed.js +++ b/defaultmodules/newsfeed/newsfeed.js @@ -33,7 +33,8 @@ Module.register("newsfeed", { prohibitedWords: [], scrollLength: 500, logFeedWarnings: false, - dangerouslyDisableAutoEscaping: false + dangerouslyDisableAutoEscaping: false, + allowBasicHtmlTags: false }, getUrlPrefix (item) { diff --git a/defaultmodules/newsfeed/newsfeed.njk b/defaultmodules/newsfeed/newsfeed.njk index 9eacc261c2..57592c5971 100644 --- a/defaultmodules/newsfeed/newsfeed.njk +++ b/defaultmodules/newsfeed/newsfeed.njk @@ -45,13 +45,13 @@ {% if config.showPublishDate %}{{ item.publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% else %} - {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% endif %}
{% endif %} @@ -68,13 +68,13 @@ {% if config.showPublishDate %}{{ publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% else %} - {{ escapeText(description, config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} {% endif %}
{% endif %} diff --git a/defaultmodules/newsfeed/newsfeedfetcher.js b/defaultmodules/newsfeed/newsfeedfetcher.js index 1a5f421ab8..1942bba64b 100644 --- a/defaultmodules/newsfeed/newsfeedfetcher.js +++ b/defaultmodules/newsfeed/newsfeedfetcher.js @@ -6,6 +6,23 @@ const { htmlToText } = require("html-to-text"); const Log = require("logger"); const HTTPFetcher = require("#http_fetcher"); +// Inline formatting tags that are safe to render: bold, italic and underline. +// These never carry attributes once sanitized, so they cannot be used for injection. +const ALLOWED_TAGS = ["b", "strong", "i", "em", "u"]; + +// html-to-text formatter that re-emits an allowed inline tag around its content, +// so feeds that send real / elements keep their emphasis. +const keepTagFormatter = (elem, walk, builder, formatOptions) => { + builder.addLiteral(`<${formatOptions.tagName}>`); + walk(elem.children, builder); + builder.addLiteral(``); +}; + +const allowedTagSelectors = ALLOWED_TAGS.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } })); + +// Matches only the exact, attribute-free opening/closing allowlisted tags after escaping. +const restoreAllowedTags = new RegExp(`<(/?(?:${ALLOWED_TAGS.join("|")}))>`, "g"); + /** * NewsfeedFetcher - Fetches and parses RSS/Atom feed data * Uses HTTPFetcher for HTTP handling with intelligent error handling @@ -20,12 +37,14 @@ class NewsfeedFetcher { * @param {string} encoding - Encoding of the feed (e.g., 'UTF-8') * @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article * @param {boolean} useCorsProxy - If true cors proxy is used for article url's + * @param {boolean} allowBasicHtmlTags - If true keep basic formatting tags (bold/italic/underline) in title and description */ - constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy) { + constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowBasicHtmlTags = false) { this.url = url; this.encoding = encoding; this.logFeedWarnings = logFeedWarnings; this.useCorsProxy = useCorsProxy; + this.allowBasicHtmlTags = allowBasicHtmlTags; this.items = []; this.fetchFailedCallback = () => {}; this.itemsReceivedCallback = () => {}; @@ -44,6 +63,37 @@ class NewsfeedFetcher { this.httpFetcher.on("error", (errorInfo) => this.fetchFailedCallback(this, errorInfo)); } + /** + * Sanitizes a feed string, keeping only a strict allowlist of basic + * formatting tags (bold, italic, underline) and neutralizing everything else. + * + * The approach is allowlist-only and therefore safe to render unescaped: + * html-to-text first strips all real markup (scripts, links, images, …) and + * decodes entities to text, then EVERYTHING is HTML-escaped and ONLY the exact, + * attribute-free allowlisted tags are restored. No attributes, event handlers, + * or other tags can survive, so arbitrary HTML/script injection is impossible. + * @param {string} html - The raw title or description from the feed. + * @returns {string} Safe HTML containing at most the allowlisted formatting tags. + */ + static sanitizeBasicHtml (html) { + const text = htmlToText(html, { + wordwrap: false, + formatters: { keepTag: keepTagFormatter }, + selectors: [ + { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, + { selector: "br", format: "inlineSurround", options: { prefix: " " } }, + { selector: "img", format: "skip" }, + ...allowedTagSelectors + ] + }); + + return text + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replace(restoreAllowedTags, "<$1>"); + } + /** * Creates a parse error info object * @param {string} message - Error message @@ -78,22 +128,30 @@ class NewsfeedFetcher { const url = item.url || item.link || ""; if (title && pubdate) { - // Convert HTML entities, codes and tag - description = htmlToText(description, { - wordwrap: false, - selectors: [ - { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, - { selector: "br", format: "inlineSurround", options: { prefix: " " } }, - { selector: "img", format: "skip" } - ] - }); + let displayTitle = title; + if (this.allowBasicHtmlTags) { + // Keep basic formatting (bold/italic/underline) in both fields, strip everything else + description = NewsfeedFetcher.sanitizeBasicHtml(description); + displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title); + } else { + // Convert HTML entities, codes and tag + description = htmlToText(description, { + wordwrap: false, + selectors: [ + { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, + { selector: "br", format: "inlineSurround", options: { prefix: " " } }, + { selector: "img", format: "skip" } + ] + }); + } this.items.push({ - title, + title: displayTitle, description, pubdate, url, useCorsProxy: this.useCorsProxy, + // Hash on the original title so the dedup identity is stable regardless of allowBasicHtmlTags hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex") }); } else if (this.logFeedWarnings) { diff --git a/defaultmodules/newsfeed/node_helper.js b/defaultmodules/newsfeed/node_helper.js index cede93f698..665da970fd 100644 --- a/defaultmodules/newsfeed/node_helper.js +++ b/defaultmodules/newsfeed/node_helper.js @@ -61,7 +61,7 @@ module.exports = NodeHelper.create({ let fetcher; if (typeof this.fetchers[url] === "undefined") { Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`); - fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy); + fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowBasicHtmlTags); fetcher.onReceive(() => { this.broadcastFeeds(); diff --git a/tests/configs/modules/newsfeed/basic_html.js b/tests/configs/modules/newsfeed/basic_html.js new file mode 100644 index 0000000000..a1edac4980 --- /dev/null +++ b/tests/configs/modules/newsfeed/basic_html.js @@ -0,0 +1,28 @@ +let config = { + address: "0.0.0.0", + ipWhitelist: [], + timeFormat: 12, + + modules: [ + { + module: "newsfeed", + position: "bottom_bar", + config: { + feeds: [ + { + title: "Formatting Feed", + url: "http://localhost:8080/tests/mocks/newsfeed_basic_html.xml" + } + ], + showDescription: true, + truncDescription: false, + allowBasicHtmlTags: true + } + } + ] +}; + +/*************** DO NOT EDIT THE LINE BELOW ***************/ +if (typeof module !== "undefined") { + module.exports = config; +} diff --git a/tests/e2e/modules/newsfeed_spec.js b/tests/e2e/modules/newsfeed_spec.js index b68b6ea5a6..88a6205bfd 100644 --- a/tests/e2e/modules/newsfeed_spec.js +++ b/tests/e2e/modules/newsfeed_spec.js @@ -45,6 +45,32 @@ const runTests = () => { }); }); + describe("Basic HTML tags", () => { + beforeAll(async () => { + await helpers.startApplication("tests/configs/modules/newsfeed/basic_html.js"); + await helpers.getDocument(); + page = helpers.getPage(); + }); + + it("should render allowlisted formatting tags in title and description", async () => { + await expect(page.locator(".newsfeed .newsfeed-desc")).toBeVisible(); + const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML(); + expect(descHtml).toContain(""); + expect(descHtml).toContain(""); + expect(descHtml).toContain(""); + const titleHtml = await page.locator(".newsfeed .newsfeed-title").innerHTML(); + expect(titleHtml).toContain(""); + }); + + it("should strip disallowed HTML and not execute injected scripts", async () => { + const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML(); + expect(descHtml).not.toContain(" window.__newsfeedXss); + expect(xss).toBeUndefined(); + }); + }); + describe("Invalid configuration", () => { beforeAll(async () => { await helpers.startApplication("tests/configs/modules/newsfeed/incorrect_url.js"); diff --git a/tests/mocks/newsfeed_basic_html.xml b/tests/mocks/newsfeed_basic_html.xml new file mode 100644 index 0000000000..342564879e --- /dev/null +++ b/tests/mocks/newsfeed_basic_html.xml @@ -0,0 +1,15 @@ + + + + Formatting Feed + http://localhost:8080 + Feed used to test the allowBasicHtmlTags option. + + News <em>Flash</em> + http://localhost:8080/article + Tue, 20 Sep 2016 11:16:08 +0000 + http://localhost:8080/?p=1 + Italic and Bold and Underlined text.

]]>
+
+
+
diff --git a/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js new file mode 100644 index 0000000000..0379bc32be --- /dev/null +++ b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js @@ -0,0 +1,52 @@ +const defaults = require("../../../../../js/defaults"); + +const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`); + +const { sanitizeBasicHtml } = NewsfeedFetcher; + +describe("NewsfeedFetcher.sanitizeBasicHtml", () => { + it("keeps real basic formatting tags", () => { + expect(sanitizeBasicHtml("a b c d e")) + .toBe("a b c d e"); + }); + + it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => { + // Feeds like theatlantic.com ship emphasis as escaped entities + expect(sanitizeBasicHtml("the <em>Atlantic</em> ocean")).toBe("the Atlantic ocean"); + }); + + it("handles emphasis inside titles regardless of how the parser delivers it", () => { + // The Atlantic uses in titles, e.g. "That's Enough, Euphoria" + const expected = "That’s Enough, Euphoria"; + expect(sanitizeBasicHtml("That’s Enough, Euphoria")).toBe(expected); + expect(sanitizeBasicHtml("That’s Enough, <em>Euphoria</em>")).toBe(expected); + }); + + it("strips attributes from allowed tags", () => { + const result = sanitizeBasicHtml("bold"); + expect(result).toBe("bold"); + expect(result).not.toContain("onclick"); + expect(result).not.toContain("class"); + }); + + it("neutralizes script tags", () => { + expect(sanitizeBasicHtml("hello")).not.toContain(" { + const result = sanitizeBasicHtml("link

title

"); + expect(result).not.toContain("onerror"); + expect(result).not.toContain("href"); + expect(result).not.toContain("

"); + expect(result).toContain("link"); + expect(result.toLowerCase()).toContain("title"); + }); + + it("escapes bare HTML special characters in plain text", () => { + expect(sanitizeBasicHtml("Fish & Chips for < 5")).toBe("Fish & Chips for < 5"); + }); +}); From 872876bf1ec477c0d23913c6871bd7df60514e71 Mon Sep 17 00:00:00 2001 From: egeekial Date: Sun, 7 Jun 2026 20:05:06 -0400 Subject: [PATCH 2/2] [newsfeed] replace allowBasicHtmlTags boolean with allowedBasicHtmlTags allowlist Per PR feedback, expose a single configurable array option instead of a boolean. Users opt into individual formatting tags chosen from a hardcoded safe list (b, strong, i, em, u, br, code, s, sub, sup); the default is [] so current behavior is unchanged. Any requested tag outside the safe list is ignored with a warning, keeping the strict allowlist-only security model. sanitizeBasicHtml now takes the validated allowlist as an argument and builds its selectors/restore-regex from it. br is handled as a void element (emitted as a single
) when allowed, and otherwise keeps collapsing to a space. The Nunjucks template uses a length check since an empty array is truthy. Updates the unit and e2e tests/config to the new option, adding cases for per-tag allowlisting, the empty-list default, the new safe tags, and br. Co-Authored-By: Claude Opus 4.8 --- defaultmodules/newsfeed/newsfeed.js | 2 +- defaultmodules/newsfeed/newsfeed.njk | 12 +-- defaultmodules/newsfeed/newsfeedfetcher.js | 76 ++++++++++++------- defaultmodules/newsfeed/node_helper.js | 2 +- tests/configs/modules/newsfeed/basic_html.js | 2 +- .../default/newsfeed/newsfeedfetcher_spec.js | 51 ++++++++++--- 6 files changed, 99 insertions(+), 46 deletions(-) diff --git a/defaultmodules/newsfeed/newsfeed.js b/defaultmodules/newsfeed/newsfeed.js index d593b4c086..9a12a4b8ed 100644 --- a/defaultmodules/newsfeed/newsfeed.js +++ b/defaultmodules/newsfeed/newsfeed.js @@ -34,7 +34,7 @@ Module.register("newsfeed", { scrollLength: 500, logFeedWarnings: false, dangerouslyDisableAutoEscaping: false, - allowBasicHtmlTags: false + allowedBasicHtmlTags: [] }, getUrlPrefix (item) { diff --git a/defaultmodules/newsfeed/newsfeed.njk b/defaultmodules/newsfeed/newsfeed.njk index 57592c5971..033bf20966 100644 --- a/defaultmodules/newsfeed/newsfeed.njk +++ b/defaultmodules/newsfeed/newsfeed.njk @@ -45,13 +45,13 @@ {% if config.showPublishDate %}{{ item.publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}
+
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} + {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% else %} - {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} + {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% endif %}
{% endif %} @@ -68,13 +68,13 @@ {% if config.showPublishDate %}{{ publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags, config.showTitleAsUrl) }}
+
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} + {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% else %} - {{ escapeText(description, config.dangerouslyDisableAutoEscaping or config.allowBasicHtmlTags) }} + {{ escapeText(description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% endif %}
{% endif %} diff --git a/defaultmodules/newsfeed/newsfeedfetcher.js b/defaultmodules/newsfeed/newsfeedfetcher.js index 1942bba64b..8fcd6c9cf8 100644 --- a/defaultmodules/newsfeed/newsfeedfetcher.js +++ b/defaultmodules/newsfeed/newsfeedfetcher.js @@ -6,23 +6,26 @@ const { htmlToText } = require("html-to-text"); const Log = require("logger"); const HTTPFetcher = require("#http_fetcher"); -// Inline formatting tags that are safe to render: bold, italic and underline. -// These never carry attributes once sanitized, so they cannot be used for injection. -const ALLOWED_TAGS = ["b", "strong", "i", "em", "u"]; +// The complete set of basic formatting tags users are allowed to opt into via the +// `allowedBasicHtmlTags` config option. These are inline emphasis / line-break tags that +// never carry attributes once sanitized, so they cannot be used for injection. Anything +// requested outside this list is ignored (see the constructor). +const SAFE_HTML_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"]; // html-to-text formatter that re-emits an allowed inline tag around its content, -// so feeds that send real / elements keep their emphasis. +// so feeds that send real / elements keep their emphasis. `br` is a void +// element, so it is emitted as a single self-contained tag with no children/closing tag. const keepTagFormatter = (elem, walk, builder, formatOptions) => { - builder.addLiteral(`<${formatOptions.tagName}>`); + const { tagName } = formatOptions; + if (tagName === "br") { + builder.addLiteral("
"); + return; + } + builder.addLiteral(`<${tagName}>`); walk(elem.children, builder); - builder.addLiteral(``); + builder.addLiteral(``); }; -const allowedTagSelectors = ALLOWED_TAGS.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } })); - -// Matches only the exact, attribute-free opening/closing allowlisted tags after escaping. -const restoreAllowedTags = new RegExp(`<(/?(?:${ALLOWED_TAGS.join("|")}))>`, "g"); - /** * NewsfeedFetcher - Fetches and parses RSS/Atom feed data * Uses HTTPFetcher for HTTP handling with intelligent error handling @@ -37,14 +40,22 @@ class NewsfeedFetcher { * @param {string} encoding - Encoding of the feed (e.g., 'UTF-8') * @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article * @param {boolean} useCorsProxy - If true cors proxy is used for article url's - * @param {boolean} allowBasicHtmlTags - If true keep basic formatting tags (bold/italic/underline) in title and description + * @param {string[]} allowedBasicHtmlTags - Basic formatting tags to keep in title and description. Only tags from the safe list are honored; anything else is ignored. */ - constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowBasicHtmlTags = false) { + constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowedBasicHtmlTags = []) { this.url = url; this.encoding = encoding; this.logFeedWarnings = logFeedWarnings; this.useCorsProxy = useCorsProxy; - this.allowBasicHtmlTags = allowBasicHtmlTags; + + // Keep only tags from the hardcoded safe list; warn about (and ignore) anything else. + const requestedTags = (Array.isArray(allowedBasicHtmlTags) ? allowedBasicHtmlTags : []).map((tag) => String(tag).trim().toLowerCase()); + this.allowedBasicHtmlTags = requestedTags.filter((tag) => SAFE_HTML_TAGS.includes(tag)); + const ignoredTags = requestedTags.filter((tag) => !SAFE_HTML_TAGS.includes(tag)); + if (ignoredTags.length > 0) { + Log.warn(`Ignoring unsupported allowedBasicHtmlTags [${ignoredTags.join(", ")}] for url ${url}. Allowed tags are: ${SAFE_HTML_TAGS.join(", ")}`); + } + this.items = []; this.fetchFailedCallback = () => {}; this.itemsReceivedCallback = () => {}; @@ -64,8 +75,8 @@ class NewsfeedFetcher { } /** - * Sanitizes a feed string, keeping only a strict allowlist of basic - * formatting tags (bold, italic, underline) and neutralizing everything else. + * Sanitizes a feed string, keeping only the given allowlist of basic + * formatting tags and neutralizing everything else. * * The approach is allowlist-only and therefore safe to render unescaped: * html-to-text first strips all real markup (scripts, links, images, …) and @@ -73,9 +84,13 @@ class NewsfeedFetcher { * attribute-free allowlisted tags are restored. No attributes, event handlers, * or other tags can survive, so arbitrary HTML/script injection is impossible. * @param {string} html - The raw title or description from the feed. - * @returns {string} Safe HTML containing at most the allowlisted formatting tags. + * @param {string[]} [allowedTags] - Tags to keep. Callers pass an already-validated subset of SAFE_HTML_TAGS. + * @returns {string} Safe HTML containing at most the allowed formatting tags. */ - static sanitizeBasicHtml (html) { + static sanitizeBasicHtml (html, allowedTags = []) { + // `br` keeps its default "collapse to a space" behavior unless explicitly allowed. + const keepTagSelectors = allowedTags.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } })); + const text = htmlToText(html, { wordwrap: false, formatters: { keepTag: keepTagFormatter }, @@ -83,15 +98,22 @@ class NewsfeedFetcher { { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, { selector: "br", format: "inlineSurround", options: { prefix: " " } }, { selector: "img", format: "skip" }, - ...allowedTagSelectors + ...keepTagSelectors ] }); - return text + const escaped = text .replaceAll("&", "&") .replaceAll("<", "<") - .replaceAll(">", ">") - .replace(restoreAllowedTags, "<$1>"); + .replaceAll(">", ">"); + + if (allowedTags.length === 0) { + return escaped; + } + + // Restore only the exact, attribute-free allowed opening/closing tags after escaping. + const restoreAllowedTags = new RegExp(`<(/?(?:${allowedTags.join("|")}))>`, "g"); + return escaped.replace(restoreAllowedTags, "<$1>"); } /** @@ -129,10 +151,10 @@ class NewsfeedFetcher { if (title && pubdate) { let displayTitle = title; - if (this.allowBasicHtmlTags) { - // Keep basic formatting (bold/italic/underline) in both fields, strip everything else - description = NewsfeedFetcher.sanitizeBasicHtml(description); - displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title); + if (this.allowedBasicHtmlTags.length > 0) { + // Keep the configured basic formatting tags in both fields, strip everything else + description = NewsfeedFetcher.sanitizeBasicHtml(description, this.allowedBasicHtmlTags); + displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title, this.allowedBasicHtmlTags); } else { // Convert HTML entities, codes and tag description = htmlToText(description, { @@ -151,7 +173,7 @@ class NewsfeedFetcher { pubdate, url, useCorsProxy: this.useCorsProxy, - // Hash on the original title so the dedup identity is stable regardless of allowBasicHtmlTags + // Hash on the original title so the dedup identity is stable regardless of allowedBasicHtmlTags hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex") }); } else if (this.logFeedWarnings) { diff --git a/defaultmodules/newsfeed/node_helper.js b/defaultmodules/newsfeed/node_helper.js index 665da970fd..ad391a518d 100644 --- a/defaultmodules/newsfeed/node_helper.js +++ b/defaultmodules/newsfeed/node_helper.js @@ -61,7 +61,7 @@ module.exports = NodeHelper.create({ let fetcher; if (typeof this.fetchers[url] === "undefined") { Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`); - fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowBasicHtmlTags); + fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowedBasicHtmlTags); fetcher.onReceive(() => { this.broadcastFeeds(); diff --git a/tests/configs/modules/newsfeed/basic_html.js b/tests/configs/modules/newsfeed/basic_html.js index a1edac4980..4c4b6bc0b5 100644 --- a/tests/configs/modules/newsfeed/basic_html.js +++ b/tests/configs/modules/newsfeed/basic_html.js @@ -16,7 +16,7 @@ let config = { ], showDescription: true, truncDescription: false, - allowBasicHtmlTags: true + allowedBasicHtmlTags: ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"] } } ] diff --git a/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js index 0379bc32be..7d195840cd 100644 --- a/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js +++ b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js @@ -2,43 +2,50 @@ const defaults = require("../../../../../js/defaults"); const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`); -const { sanitizeBasicHtml } = NewsfeedFetcher; +// The full safe list users may opt into; most tests run with it enabled. +const ALL_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"]; +const sanitize = (html, allowedTags = ALL_TAGS) => NewsfeedFetcher.sanitizeBasicHtml(html, allowedTags); describe("NewsfeedFetcher.sanitizeBasicHtml", () => { it("keeps real basic formatting tags", () => { - expect(sanitizeBasicHtml("a b c d e")) + expect(sanitize("a b c d e")) .toBe("a b c d e"); }); + it("keeps the additional safe tags (code, s, sub, sup)", () => { + expect(sanitize("x y z w")) + .toBe("x y z w"); + }); + it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => { // Feeds like theatlantic.com ship emphasis as escaped entities - expect(sanitizeBasicHtml("the <em>Atlantic</em> ocean")).toBe("the Atlantic ocean"); + expect(sanitize("the <em>Atlantic</em> ocean")).toBe("the Atlantic ocean"); }); it("handles emphasis inside titles regardless of how the parser delivers it", () => { // The Atlantic uses in titles, e.g. "That's Enough, Euphoria" const expected = "That’s Enough, Euphoria"; - expect(sanitizeBasicHtml("That’s Enough, Euphoria")).toBe(expected); - expect(sanitizeBasicHtml("That’s Enough, <em>Euphoria</em>")).toBe(expected); + expect(sanitize("That’s Enough, Euphoria")).toBe(expected); + expect(sanitize("That’s Enough, <em>Euphoria</em>")).toBe(expected); }); it("strips attributes from allowed tags", () => { - const result = sanitizeBasicHtml("bold"); + const result = sanitize("bold"); expect(result).toBe("bold"); expect(result).not.toContain("onclick"); expect(result).not.toContain("class"); }); it("neutralizes script tags", () => { - expect(sanitizeBasicHtml("hello")).not.toContain("alert(1)hello")).not.toContain(" { - const result = sanitizeBasicHtml("link

title

"); + const result = sanitize("link

title

"); expect(result).not.toContain("onerror"); expect(result).not.toContain("href"); expect(result).not.toContain("

"); @@ -47,6 +54,30 @@ describe("NewsfeedFetcher.sanitizeBasicHtml", () => { }); it("escapes bare HTML special characters in plain text", () => { - expect(sanitizeBasicHtml("Fish & Chips for < 5")).toBe("Fish & Chips for < 5"); + expect(sanitize("Fish & Chips for < 5")).toBe("Fish & Chips for < 5"); + }); + + it("only keeps tags present in the supplied allowlist", () => { + // Allow just : a safe-but-not-allowed must become plain text. + const result = sanitize("kept dropped", ["em"]); + expect(result).toBe("kept dropped"); + expect(result).not.toContain(""); + }); + + it("escapes everything when the allowlist is empty", () => { + expect(sanitize("hi & bye", [])).toBe("hi & bye"); + }); + + it("renders
as a single self-closing tag when allowed", () => { + const result = sanitize("a
b", ["br"]); + expect(result).toContain("
"); + expect(result).not.toContain("

"); + expect(result).not.toContain("<br>"); + }); + + it("collapses
to a space when not allowed", () => { + const result = sanitize("a
b", ["em"]); + expect(result).not.toContain("
"); + expect(result).toBe("a b"); }); });