diff --git a/defaultmodules/newsfeed/newsfeed.js b/defaultmodules/newsfeed/newsfeed.js index fa1476faf5..9a12a4b8ed 100644 --- a/defaultmodules/newsfeed/newsfeed.js +++ b/defaultmodules/newsfeed/newsfeed.js @@ -33,7 +33,8 @@ Module.register("newsfeed", { prohibitedWords: [], scrollLength: 500, logFeedWarnings: false, - dangerouslyDisableAutoEscaping: false + dangerouslyDisableAutoEscaping: false, + allowedBasicHtmlTags: [] }, getUrlPrefix (item) { diff --git a/defaultmodules/newsfeed/newsfeed.njk b/defaultmodules/newsfeed/newsfeed.njk index 9eacc261c2..033bf20966 100644 --- a/defaultmodules/newsfeed/newsfeed.njk +++ b/defaultmodules/newsfeed/newsfeed.njk @@ -45,13 +45,13 @@ {% if config.showPublishDate %}{{ item.publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% else %} - {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% endif %}
{% endif %} @@ -68,13 +68,13 @@ {% if config.showPublishDate %}{{ publishDate }}:{% endif %} {% endif %} -
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+
{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %} - {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% else %} - {{ escapeText(description, config.dangerouslyDisableAutoEscaping) }} + {{ escapeText(description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }} {% endif %}
{% endif %} diff --git a/defaultmodules/newsfeed/newsfeedfetcher.js b/defaultmodules/newsfeed/newsfeedfetcher.js index 1a5f421ab8..8fcd6c9cf8 100644 --- a/defaultmodules/newsfeed/newsfeedfetcher.js +++ b/defaultmodules/newsfeed/newsfeedfetcher.js @@ -6,6 +6,26 @@ const { htmlToText } = require("html-to-text"); const Log = require("logger"); const HTTPFetcher = require("#http_fetcher"); +// The complete set of basic formatting tags users are allowed to opt into via the +// `allowedBasicHtmlTags` config option. These are inline emphasis / line-break tags that +// never carry attributes once sanitized, so they cannot be used for injection. Anything +// requested outside this list is ignored (see the constructor). +const SAFE_HTML_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"]; + +// html-to-text formatter that re-emits an allowed inline tag around its content, +// so feeds that send real / elements keep their emphasis. `br` is a void +// element, so it is emitted as a single self-contained tag with no children/closing tag. +const keepTagFormatter = (elem, walk, builder, formatOptions) => { + const { tagName } = formatOptions; + if (tagName === "br") { + builder.addLiteral("
"); + return; + } + builder.addLiteral(`<${tagName}>`); + walk(elem.children, builder); + builder.addLiteral(``); +}; + /** * NewsfeedFetcher - Fetches and parses RSS/Atom feed data * Uses HTTPFetcher for HTTP handling with intelligent error handling @@ -20,12 +40,22 @@ class NewsfeedFetcher { * @param {string} encoding - Encoding of the feed (e.g., 'UTF-8') * @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article * @param {boolean} useCorsProxy - If true cors proxy is used for article url's + * @param {string[]} allowedBasicHtmlTags - Basic formatting tags to keep in title and description. Only tags from the safe list are honored; anything else is ignored. */ - constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy) { + constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowedBasicHtmlTags = []) { this.url = url; this.encoding = encoding; this.logFeedWarnings = logFeedWarnings; this.useCorsProxy = useCorsProxy; + + // Keep only tags from the hardcoded safe list; warn about (and ignore) anything else. + const requestedTags = (Array.isArray(allowedBasicHtmlTags) ? allowedBasicHtmlTags : []).map((tag) => String(tag).trim().toLowerCase()); + this.allowedBasicHtmlTags = requestedTags.filter((tag) => SAFE_HTML_TAGS.includes(tag)); + const ignoredTags = requestedTags.filter((tag) => !SAFE_HTML_TAGS.includes(tag)); + if (ignoredTags.length > 0) { + Log.warn(`Ignoring unsupported allowedBasicHtmlTags [${ignoredTags.join(", ")}] for url ${url}. Allowed tags are: ${SAFE_HTML_TAGS.join(", ")}`); + } + this.items = []; this.fetchFailedCallback = () => {}; this.itemsReceivedCallback = () => {}; @@ -44,6 +74,48 @@ class NewsfeedFetcher { this.httpFetcher.on("error", (errorInfo) => this.fetchFailedCallback(this, errorInfo)); } + /** + * Sanitizes a feed string, keeping only the given allowlist of basic + * formatting tags and neutralizing everything else. + * + * The approach is allowlist-only and therefore safe to render unescaped: + * html-to-text first strips all real markup (scripts, links, images, …) and + * decodes entities to text, then EVERYTHING is HTML-escaped and ONLY the exact, + * attribute-free allowlisted tags are restored. No attributes, event handlers, + * or other tags can survive, so arbitrary HTML/script injection is impossible. + * @param {string} html - The raw title or description from the feed. + * @param {string[]} [allowedTags] - Tags to keep. Callers pass an already-validated subset of SAFE_HTML_TAGS. + * @returns {string} Safe HTML containing at most the allowed formatting tags. + */ + static sanitizeBasicHtml (html, allowedTags = []) { + // `br` keeps its default "collapse to a space" behavior unless explicitly allowed. + const keepTagSelectors = allowedTags.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } })); + + const text = htmlToText(html, { + wordwrap: false, + formatters: { keepTag: keepTagFormatter }, + selectors: [ + { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, + { selector: "br", format: "inlineSurround", options: { prefix: " " } }, + { selector: "img", format: "skip" }, + ...keepTagSelectors + ] + }); + + const escaped = text + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">"); + + if (allowedTags.length === 0) { + return escaped; + } + + // Restore only the exact, attribute-free allowed opening/closing tags after escaping. + const restoreAllowedTags = new RegExp(`<(/?(?:${allowedTags.join("|")}))>`, "g"); + return escaped.replace(restoreAllowedTags, "<$1>"); + } + /** * Creates a parse error info object * @param {string} message - Error message @@ -78,22 +150,30 @@ class NewsfeedFetcher { const url = item.url || item.link || ""; if (title && pubdate) { - // Convert HTML entities, codes and tag - description = htmlToText(description, { - wordwrap: false, - selectors: [ - { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, - { selector: "br", format: "inlineSurround", options: { prefix: " " } }, - { selector: "img", format: "skip" } - ] - }); + let displayTitle = title; + if (this.allowedBasicHtmlTags.length > 0) { + // Keep the configured basic formatting tags in both fields, strip everything else + description = NewsfeedFetcher.sanitizeBasicHtml(description, this.allowedBasicHtmlTags); + displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title, this.allowedBasicHtmlTags); + } else { + // Convert HTML entities, codes and tag + description = htmlToText(description, { + wordwrap: false, + selectors: [ + { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } }, + { selector: "br", format: "inlineSurround", options: { prefix: " " } }, + { selector: "img", format: "skip" } + ] + }); + } this.items.push({ - title, + title: displayTitle, description, pubdate, url, useCorsProxy: this.useCorsProxy, + // Hash on the original title so the dedup identity is stable regardless of allowedBasicHtmlTags hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex") }); } else if (this.logFeedWarnings) { diff --git a/defaultmodules/newsfeed/node_helper.js b/defaultmodules/newsfeed/node_helper.js index cede93f698..ad391a518d 100644 --- a/defaultmodules/newsfeed/node_helper.js +++ b/defaultmodules/newsfeed/node_helper.js @@ -61,7 +61,7 @@ module.exports = NodeHelper.create({ let fetcher; if (typeof this.fetchers[url] === "undefined") { Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`); - fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy); + fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowedBasicHtmlTags); fetcher.onReceive(() => { this.broadcastFeeds(); diff --git a/tests/configs/modules/newsfeed/basic_html.js b/tests/configs/modules/newsfeed/basic_html.js new file mode 100644 index 0000000000..4c4b6bc0b5 --- /dev/null +++ b/tests/configs/modules/newsfeed/basic_html.js @@ -0,0 +1,28 @@ +let config = { + address: "0.0.0.0", + ipWhitelist: [], + timeFormat: 12, + + modules: [ + { + module: "newsfeed", + position: "bottom_bar", + config: { + feeds: [ + { + title: "Formatting Feed", + url: "http://localhost:8080/tests/mocks/newsfeed_basic_html.xml" + } + ], + showDescription: true, + truncDescription: false, + allowedBasicHtmlTags: ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"] + } + } + ] +}; + +/*************** DO NOT EDIT THE LINE BELOW ***************/ +if (typeof module !== "undefined") { + module.exports = config; +} diff --git a/tests/e2e/modules/newsfeed_spec.js b/tests/e2e/modules/newsfeed_spec.js index b68b6ea5a6..88a6205bfd 100644 --- a/tests/e2e/modules/newsfeed_spec.js +++ b/tests/e2e/modules/newsfeed_spec.js @@ -45,6 +45,32 @@ const runTests = () => { }); }); + describe("Basic HTML tags", () => { + beforeAll(async () => { + await helpers.startApplication("tests/configs/modules/newsfeed/basic_html.js"); + await helpers.getDocument(); + page = helpers.getPage(); + }); + + it("should render allowlisted formatting tags in title and description", async () => { + await expect(page.locator(".newsfeed .newsfeed-desc")).toBeVisible(); + const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML(); + expect(descHtml).toContain(""); + expect(descHtml).toContain(""); + expect(descHtml).toContain(""); + const titleHtml = await page.locator(".newsfeed .newsfeed-title").innerHTML(); + expect(titleHtml).toContain(""); + }); + + it("should strip disallowed HTML and not execute injected scripts", async () => { + const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML(); + expect(descHtml).not.toContain(" window.__newsfeedXss); + expect(xss).toBeUndefined(); + }); + }); + describe("Invalid configuration", () => { beforeAll(async () => { await helpers.startApplication("tests/configs/modules/newsfeed/incorrect_url.js"); diff --git a/tests/mocks/newsfeed_basic_html.xml b/tests/mocks/newsfeed_basic_html.xml new file mode 100644 index 0000000000..342564879e --- /dev/null +++ b/tests/mocks/newsfeed_basic_html.xml @@ -0,0 +1,15 @@ + + + + Formatting Feed + http://localhost:8080 + Feed used to test the allowBasicHtmlTags option. + + News <em>Flash</em> + http://localhost:8080/article + Tue, 20 Sep 2016 11:16:08 +0000 + http://localhost:8080/?p=1 + Italic and Bold and Underlined text.

]]>
+
+
+
diff --git a/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js new file mode 100644 index 0000000000..7d195840cd --- /dev/null +++ b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js @@ -0,0 +1,83 @@ +const defaults = require("../../../../../js/defaults"); + +const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`); + +// The full safe list users may opt into; most tests run with it enabled. +const ALL_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"]; +const sanitize = (html, allowedTags = ALL_TAGS) => NewsfeedFetcher.sanitizeBasicHtml(html, allowedTags); + +describe("NewsfeedFetcher.sanitizeBasicHtml", () => { + it("keeps real basic formatting tags", () => { + expect(sanitize("a b c d e")) + .toBe("a b c d e"); + }); + + it("keeps the additional safe tags (code, s, sub, sup)", () => { + expect(sanitize("x y z w")) + .toBe("x y z w"); + }); + + it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => { + // Feeds like theatlantic.com ship emphasis as escaped entities + expect(sanitize("the <em>Atlantic</em> ocean")).toBe("the Atlantic ocean"); + }); + + it("handles emphasis inside titles regardless of how the parser delivers it", () => { + // The Atlantic uses in titles, e.g. "That's Enough, Euphoria" + const expected = "That’s Enough, Euphoria"; + expect(sanitize("That’s Enough, Euphoria")).toBe(expected); + expect(sanitize("That’s Enough, <em>Euphoria</em>")).toBe(expected); + }); + + it("strips attributes from allowed tags", () => { + const result = sanitize("bold"); + expect(result).toBe("bold"); + expect(result).not.toContain("onclick"); + expect(result).not.toContain("class"); + }); + + it("neutralizes script tags", () => { + expect(sanitize("hello")).not.toContain(" { + const result = sanitize("link

title

"); + expect(result).not.toContain("onerror"); + expect(result).not.toContain("href"); + expect(result).not.toContain("

"); + expect(result).toContain("link"); + expect(result.toLowerCase()).toContain("title"); + }); + + it("escapes bare HTML special characters in plain text", () => { + expect(sanitize("Fish & Chips for < 5")).toBe("Fish & Chips for < 5"); + }); + + it("only keeps tags present in the supplied allowlist", () => { + // Allow just : a safe-but-not-allowed must become plain text. + const result = sanitize("kept dropped", ["em"]); + expect(result).toBe("kept dropped"); + expect(result).not.toContain(""); + }); + + it("escapes everything when the allowlist is empty", () => { + expect(sanitize("hi & bye", [])).toBe("hi & bye"); + }); + + it("renders
as a single self-closing tag when allowed", () => { + const result = sanitize("a
b", ["br"]); + expect(result).toContain("
"); + expect(result).not.toContain("

"); + expect(result).not.toContain("<br>"); + }); + + it("collapses
to a space when not allowed", () => { + const result = sanitize("a
b", ["em"]); + expect(result).not.toContain("
"); + expect(result).toBe("a b"); + }); +});