diff --git a/defaultmodules/newsfeed/newsfeed.js b/defaultmodules/newsfeed/newsfeed.js
index fa1476faf5..9a12a4b8ed 100644
--- a/defaultmodules/newsfeed/newsfeed.js
+++ b/defaultmodules/newsfeed/newsfeed.js
@@ -33,7 +33,8 @@ Module.register("newsfeed", {
prohibitedWords: [],
scrollLength: 500,
logFeedWarnings: false,
- dangerouslyDisableAutoEscaping: false
+ dangerouslyDisableAutoEscaping: false,
+ allowedBasicHtmlTags: []
},
getUrlPrefix (item) {
diff --git a/defaultmodules/newsfeed/newsfeed.njk b/defaultmodules/newsfeed/newsfeed.njk
index 9eacc261c2..033bf20966 100644
--- a/defaultmodules/newsfeed/newsfeed.njk
+++ b/defaultmodules/newsfeed/newsfeed.njk
@@ -45,13 +45,13 @@
{% if config.showPublishDate %}{{ item.publishDate }}:{% endif %}
{% endif %}
-
{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+ {{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %}
- {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }}
+ {{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% else %}
- {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping) }}
+ {{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% endif %}
{% endif %}
@@ -68,13 +68,13 @@
{% if config.showPublishDate %}{{ publishDate }}:{% endif %}
{% endif %}
- {{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}
+ {{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}
{% if config.showDescription %}
{% if config.truncDescription %}
- {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }}
+ {{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% else %}
- {{ escapeText(description, config.dangerouslyDisableAutoEscaping) }}
+ {{ escapeText(description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% endif %}
{% endif %}
diff --git a/defaultmodules/newsfeed/newsfeedfetcher.js b/defaultmodules/newsfeed/newsfeedfetcher.js
index 1a5f421ab8..8fcd6c9cf8 100644
--- a/defaultmodules/newsfeed/newsfeedfetcher.js
+++ b/defaultmodules/newsfeed/newsfeedfetcher.js
@@ -6,6 +6,26 @@ const { htmlToText } = require("html-to-text");
const Log = require("logger");
const HTTPFetcher = require("#http_fetcher");
+// The complete set of basic formatting tags users are allowed to opt into via the
+// `allowedBasicHtmlTags` config option. These are inline emphasis / line-break tags that
+// never carry attributes once sanitized, so they cannot be used for injection. Anything
+// requested outside this list is ignored (see the constructor).
+const SAFE_HTML_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"];
+
+// html-to-text formatter that re-emits an allowed inline tag around its content,
+// so feeds that send real / elements keep their emphasis. `br` is a void
+// element, so it is emitted as a single self-contained tag with no children/closing tag.
+const keepTagFormatter = (elem, walk, builder, formatOptions) => {
+ const { tagName } = formatOptions;
+ if (tagName === "br") {
+ builder.addLiteral("
");
+ return;
+ }
+ builder.addLiteral(`<${tagName}>`);
+ walk(elem.children, builder);
+ builder.addLiteral(`${tagName}>`);
+};
+
/**
* NewsfeedFetcher - Fetches and parses RSS/Atom feed data
* Uses HTTPFetcher for HTTP handling with intelligent error handling
@@ -20,12 +40,22 @@ class NewsfeedFetcher {
* @param {string} encoding - Encoding of the feed (e.g., 'UTF-8')
* @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article
* @param {boolean} useCorsProxy - If true cors proxy is used for article url's
+ * @param {string[]} allowedBasicHtmlTags - Basic formatting tags to keep in title and description. Only tags from the safe list are honored; anything else is ignored.
*/
- constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy) {
+ constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowedBasicHtmlTags = []) {
this.url = url;
this.encoding = encoding;
this.logFeedWarnings = logFeedWarnings;
this.useCorsProxy = useCorsProxy;
+
+ // Keep only tags from the hardcoded safe list; warn about (and ignore) anything else.
+ const requestedTags = (Array.isArray(allowedBasicHtmlTags) ? allowedBasicHtmlTags : []).map((tag) => String(tag).trim().toLowerCase());
+ this.allowedBasicHtmlTags = requestedTags.filter((tag) => SAFE_HTML_TAGS.includes(tag));
+ const ignoredTags = requestedTags.filter((tag) => !SAFE_HTML_TAGS.includes(tag));
+ if (ignoredTags.length > 0) {
+ Log.warn(`Ignoring unsupported allowedBasicHtmlTags [${ignoredTags.join(", ")}] for url ${url}. Allowed tags are: ${SAFE_HTML_TAGS.join(", ")}`);
+ }
+
this.items = [];
this.fetchFailedCallback = () => {};
this.itemsReceivedCallback = () => {};
@@ -44,6 +74,48 @@ class NewsfeedFetcher {
this.httpFetcher.on("error", (errorInfo) => this.fetchFailedCallback(this, errorInfo));
}
+ /**
+ * Sanitizes a feed string, keeping only the given allowlist of basic
+ * formatting tags and neutralizing everything else.
+ *
+ * The approach is allowlist-only and therefore safe to render unescaped:
+ * html-to-text first strips all real markup (scripts, links, images, …) and
+ * decodes entities to text, then EVERYTHING is HTML-escaped and ONLY the exact,
+ * attribute-free allowlisted tags are restored. No attributes, event handlers,
+ * or other tags can survive, so arbitrary HTML/script injection is impossible.
+ * @param {string} html - The raw title or description from the feed.
+ * @param {string[]} [allowedTags] - Tags to keep. Callers pass an already-validated subset of SAFE_HTML_TAGS.
+ * @returns {string} Safe HTML containing at most the allowed formatting tags.
+ */
+ static sanitizeBasicHtml (html, allowedTags = []) {
+ // `br` keeps its default "collapse to a space" behavior unless explicitly allowed.
+ const keepTagSelectors = allowedTags.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } }));
+
+ const text = htmlToText(html, {
+ wordwrap: false,
+ formatters: { keepTag: keepTagFormatter },
+ selectors: [
+ { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
+ { selector: "br", format: "inlineSurround", options: { prefix: " " } },
+ { selector: "img", format: "skip" },
+ ...keepTagSelectors
+ ]
+ });
+
+ const escaped = text
+ .replaceAll("&", "&")
+ .replaceAll("<", "<")
+ .replaceAll(">", ">");
+
+ if (allowedTags.length === 0) {
+ return escaped;
+ }
+
+ // Restore only the exact, attribute-free allowed opening/closing tags after escaping.
+ const restoreAllowedTags = new RegExp(`<(/?(?:${allowedTags.join("|")}))>`, "g");
+ return escaped.replace(restoreAllowedTags, "<$1>");
+ }
+
/**
* Creates a parse error info object
* @param {string} message - Error message
@@ -78,22 +150,30 @@ class NewsfeedFetcher {
const url = item.url || item.link || "";
if (title && pubdate) {
- // Convert HTML entities, codes and tag
- description = htmlToText(description, {
- wordwrap: false,
- selectors: [
- { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
- { selector: "br", format: "inlineSurround", options: { prefix: " " } },
- { selector: "img", format: "skip" }
- ]
- });
+ let displayTitle = title;
+ if (this.allowedBasicHtmlTags.length > 0) {
+ // Keep the configured basic formatting tags in both fields, strip everything else
+ description = NewsfeedFetcher.sanitizeBasicHtml(description, this.allowedBasicHtmlTags);
+ displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title, this.allowedBasicHtmlTags);
+ } else {
+ // Convert HTML entities, codes and tag
+ description = htmlToText(description, {
+ wordwrap: false,
+ selectors: [
+ { selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
+ { selector: "br", format: "inlineSurround", options: { prefix: " " } },
+ { selector: "img", format: "skip" }
+ ]
+ });
+ }
this.items.push({
- title,
+ title: displayTitle,
description,
pubdate,
url,
useCorsProxy: this.useCorsProxy,
+ // Hash on the original title so the dedup identity is stable regardless of allowedBasicHtmlTags
hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex")
});
} else if (this.logFeedWarnings) {
diff --git a/defaultmodules/newsfeed/node_helper.js b/defaultmodules/newsfeed/node_helper.js
index cede93f698..ad391a518d 100644
--- a/defaultmodules/newsfeed/node_helper.js
+++ b/defaultmodules/newsfeed/node_helper.js
@@ -61,7 +61,7 @@ module.exports = NodeHelper.create({
let fetcher;
if (typeof this.fetchers[url] === "undefined") {
Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`);
- fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy);
+ fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowedBasicHtmlTags);
fetcher.onReceive(() => {
this.broadcastFeeds();
diff --git a/tests/configs/modules/newsfeed/basic_html.js b/tests/configs/modules/newsfeed/basic_html.js
new file mode 100644
index 0000000000..4c4b6bc0b5
--- /dev/null
+++ b/tests/configs/modules/newsfeed/basic_html.js
@@ -0,0 +1,28 @@
+let config = {
+ address: "0.0.0.0",
+ ipWhitelist: [],
+ timeFormat: 12,
+
+ modules: [
+ {
+ module: "newsfeed",
+ position: "bottom_bar",
+ config: {
+ feeds: [
+ {
+ title: "Formatting Feed",
+ url: "http://localhost:8080/tests/mocks/newsfeed_basic_html.xml"
+ }
+ ],
+ showDescription: true,
+ truncDescription: false,
+ allowedBasicHtmlTags: ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"]
+ }
+ }
+ ]
+};
+
+/*************** DO NOT EDIT THE LINE BELOW ***************/
+if (typeof module !== "undefined") {
+ module.exports = config;
+}
diff --git a/tests/e2e/modules/newsfeed_spec.js b/tests/e2e/modules/newsfeed_spec.js
index b68b6ea5a6..88a6205bfd 100644
--- a/tests/e2e/modules/newsfeed_spec.js
+++ b/tests/e2e/modules/newsfeed_spec.js
@@ -45,6 +45,32 @@ const runTests = () => {
});
});
+ describe("Basic HTML tags", () => {
+ beforeAll(async () => {
+ await helpers.startApplication("tests/configs/modules/newsfeed/basic_html.js");
+ await helpers.getDocument();
+ page = helpers.getPage();
+ });
+
+ it("should render allowlisted formatting tags in title and description", async () => {
+ await expect(page.locator(".newsfeed .newsfeed-desc")).toBeVisible();
+ const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML();
+ expect(descHtml).toContain("");
+ expect(descHtml).toContain("");
+ expect(descHtml).toContain("");
+ const titleHtml = await page.locator(".newsfeed .newsfeed-title").innerHTML();
+ expect(titleHtml).toContain("");
+ });
+
+ it("should strip disallowed HTML and not execute injected scripts", async () => {
+ const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML();
+ expect(descHtml).not.toContain("
]]>
+
+
+
diff --git a/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js
new file mode 100644
index 0000000000..7d195840cd
--- /dev/null
+++ b/tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js
@@ -0,0 +1,83 @@
+const defaults = require("../../../../../js/defaults");
+
+const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`);
+
+// The full safe list users may opt into; most tests run with it enabled.
+const ALL_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"];
+const sanitize = (html, allowedTags = ALL_TAGS) => NewsfeedFetcher.sanitizeBasicHtml(html, allowedTags);
+
+describe("NewsfeedFetcher.sanitizeBasicHtml", () => {
+ it("keeps real basic formatting tags", () => {
+ expect(sanitize("a b c d e"))
+ .toBe("a b c d e");
+ });
+
+ it("keeps the additional safe tags (code, s, sub, sup)", () => {
+ expect(sanitize("x y z w"))
+ .toBe("x y z w");
+ });
+
+ it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => {
+ // Feeds like theatlantic.com ship emphasis as escaped entities
+ expect(sanitize("the <em>Atlantic</em> ocean")).toBe("the Atlantic ocean");
+ });
+
+ it("handles emphasis inside titles regardless of how the parser delivers it", () => {
+ // The Atlantic uses in titles, e.g. "That's Enough, Euphoria"
+ const expected = "That’s Enough, Euphoria";
+ expect(sanitize("That’s Enough, Euphoria")).toBe(expected);
+ expect(sanitize("That’s Enough, <em>Euphoria</em>")).toBe(expected);
+ });
+
+ it("strips attributes from allowed tags", () => {
+ const result = sanitize("bold");
+ expect(result).toBe("bold");
+ expect(result).not.toContain("onclick");
+ expect(result).not.toContain("class");
+ });
+
+ it("neutralizes script tags", () => {
+ expect(sanitize("hello")).not.toContain("