Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion defaultmodules/newsfeed/newsfeed.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Module.register("newsfeed", {
prohibitedWords: [],
scrollLength: 500,
logFeedWarnings: false,
dangerouslyDisableAutoEscaping: false
dangerouslyDisableAutoEscaping: false,
allowedBasicHtmlTags: []
},

getUrlPrefix (item) {
Expand Down
12 changes: 6 additions & 6 deletions defaultmodules/newsfeed/newsfeed.njk
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@
{% if config.showPublishDate %}{{ item.publishDate }}:{% endif %}
</div>
{% endif %}
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}</div>
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(item.title, item.url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}</div>
{% if config.showDescription %}
<div class="newsfeed-desc small light{{ ' no-wrap' if not config.wrapDescription }}">
{% if config.truncDescription %}
{{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(item.description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% else %}
{{ escapeText(item.description, config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(item.description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% endif %}
</div>
{% endif %}
Expand All @@ -68,13 +68,13 @@
{% if config.showPublishDate %}{{ publishDate }}:{% endif %}
</div>
{% endif %}
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping, config.showTitleAsUrl) }}</div>
<div class="newsfeed-title bright medium light{{ ' no-wrap' if not config.wrapTitle }}">{{ escapeTitle(title, url, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length), config.showTitleAsUrl) }}</div>
{% if config.showDescription %}
<div class="newsfeed-desc small light{{ ' no-wrap' if not config.wrapDescription }}">
{% if config.truncDescription %}
{{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(description | truncate(config.lengthDescription) , config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% else %}
{{ escapeText(description, config.dangerouslyDisableAutoEscaping) }}
{{ escapeText(description, config.dangerouslyDisableAutoEscaping or (config.allowedBasicHtmlTags | length)) }}
{% endif %}
</div>
{% endif %}
Expand Down
102 changes: 91 additions & 11 deletions defaultmodules/newsfeed/newsfeedfetcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,26 @@ const { htmlToText } = require("html-to-text");
const Log = require("logger");
const HTTPFetcher = require("#http_fetcher");

// The complete set of basic formatting tags users are allowed to opt into via the
// `allowedBasicHtmlTags` config option. These are inline emphasis / line-break tags that
// never carry attributes once sanitized, so they cannot be used for injection. Anything
// requested outside this list is ignored (see the constructor).
const SAFE_HTML_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"];

// html-to-text formatter that re-emits an allowed inline tag around its content,
// so feeds that send real <em>/<strong> elements keep their emphasis. `br` is a void
// element, so it is emitted as a single self-contained tag with no children/closing tag.
const keepTagFormatter = (elem, walk, builder, formatOptions) => {
const { tagName } = formatOptions;
if (tagName === "br") {
builder.addLiteral("<br>");
return;
}
builder.addLiteral(`<${tagName}>`);
walk(elem.children, builder);
builder.addLiteral(`</${tagName}>`);
};

/**
* NewsfeedFetcher - Fetches and parses RSS/Atom feed data
* Uses HTTPFetcher for HTTP handling with intelligent error handling
Expand All @@ -20,12 +40,22 @@ class NewsfeedFetcher {
* @param {string} encoding - Encoding of the feed (e.g., 'UTF-8')
* @param {boolean} logFeedWarnings - If true log warnings when there is an error parsing a news article
* @param {boolean} useCorsProxy - If true cors proxy is used for article url's
* @param {string[]} allowedBasicHtmlTags - Basic formatting tags to keep in title and description. Only tags from the safe list are honored; anything else is ignored.
*/
constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy) {
constructor (url, reloadInterval, encoding, logFeedWarnings, useCorsProxy, allowedBasicHtmlTags = []) {
this.url = url;
this.encoding = encoding;
this.logFeedWarnings = logFeedWarnings;
this.useCorsProxy = useCorsProxy;

// Keep only tags from the hardcoded safe list; warn about (and ignore) anything else.
const requestedTags = (Array.isArray(allowedBasicHtmlTags) ? allowedBasicHtmlTags : []).map((tag) => String(tag).trim().toLowerCase());
this.allowedBasicHtmlTags = requestedTags.filter((tag) => SAFE_HTML_TAGS.includes(tag));
const ignoredTags = requestedTags.filter((tag) => !SAFE_HTML_TAGS.includes(tag));
if (ignoredTags.length > 0) {
Log.warn(`Ignoring unsupported allowedBasicHtmlTags [${ignoredTags.join(", ")}] for url ${url}. Allowed tags are: ${SAFE_HTML_TAGS.join(", ")}`);
}

this.items = [];
this.fetchFailedCallback = () => {};
this.itemsReceivedCallback = () => {};
Expand All @@ -44,6 +74,48 @@ class NewsfeedFetcher {
this.httpFetcher.on("error", (errorInfo) => this.fetchFailedCallback(this, errorInfo));
}

/**
* Sanitizes a feed string, keeping only the given allowlist of basic
* formatting tags and neutralizing everything else.
*
* The approach is allowlist-only and therefore safe to render unescaped:
* html-to-text first strips all real markup (scripts, links, images, …) and
* decodes entities to text, then EVERYTHING is HTML-escaped and ONLY the exact,
* attribute-free allowlisted tags are restored. No attributes, event handlers,
* or other tags can survive, so arbitrary HTML/script injection is impossible.
* @param {string} html - The raw title or description from the feed.
* @param {string[]} [allowedTags] - Tags to keep. Callers pass an already-validated subset of SAFE_HTML_TAGS.
* @returns {string} Safe HTML containing at most the allowed formatting tags.
*/
static sanitizeBasicHtml (html, allowedTags = []) {
// `br` keeps its default "collapse to a space" behavior unless explicitly allowed.
const keepTagSelectors = allowedTags.map((tagName) => ({ selector: tagName, format: "keepTag", options: { tagName } }));

const text = htmlToText(html, {
wordwrap: false,
formatters: { keepTag: keepTagFormatter },
selectors: [
{ selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
{ selector: "br", format: "inlineSurround", options: { prefix: " " } },
{ selector: "img", format: "skip" },
...keepTagSelectors
]
});

const escaped = text
.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;");

if (allowedTags.length === 0) {
return escaped;
}

// Restore only the exact, attribute-free allowed opening/closing tags after escaping.
const restoreAllowedTags = new RegExp(`&lt;(/?(?:${allowedTags.join("|")}))&gt;`, "g");
return escaped.replace(restoreAllowedTags, "<$1>");
}

/**
* Creates a parse error info object
* @param {string} message - Error message
Expand Down Expand Up @@ -78,22 +150,30 @@ class NewsfeedFetcher {
const url = item.url || item.link || "";

if (title && pubdate) {
// Convert HTML entities, codes and tag
description = htmlToText(description, {
wordwrap: false,
selectors: [
{ selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
{ selector: "br", format: "inlineSurround", options: { prefix: " " } },
{ selector: "img", format: "skip" }
]
});
let displayTitle = title;
if (this.allowedBasicHtmlTags.length > 0) {
// Keep the configured basic formatting tags in both fields, strip everything else
description = NewsfeedFetcher.sanitizeBasicHtml(description, this.allowedBasicHtmlTags);
displayTitle = NewsfeedFetcher.sanitizeBasicHtml(title, this.allowedBasicHtmlTags);
} else {
// Convert HTML entities, codes and tag
description = htmlToText(description, {
wordwrap: false,
selectors: [
{ selector: "a", options: { ignoreHref: true, noAnchorUrl: true } },
{ selector: "br", format: "inlineSurround", options: { prefix: " " } },
{ selector: "img", format: "skip" }
]
});
}

this.items.push({
title,
title: displayTitle,
description,
pubdate,
url,
useCorsProxy: this.useCorsProxy,
// Hash on the original title so the dedup identity is stable regardless of allowedBasicHtmlTags
hash: crypto.createHash("sha256").update(`${pubdate} :: ${title} :: ${url}`).digest("hex")
});
} else if (this.logFeedWarnings) {
Expand Down
2 changes: 1 addition & 1 deletion defaultmodules/newsfeed/node_helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ module.exports = NodeHelper.create({
let fetcher;
if (typeof this.fetchers[url] === "undefined") {
Log.log(`Create new newsfetcher for url: ${url} - Interval: ${reloadInterval}`);
fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy);
fetcher = new NewsfeedFetcher(url, reloadInterval, encoding, config.logFeedWarnings, useCorsProxy, config.allowedBasicHtmlTags);

fetcher.onReceive(() => {
this.broadcastFeeds();
Expand Down
28 changes: 28 additions & 0 deletions tests/configs/modules/newsfeed/basic_html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
let config = {
address: "0.0.0.0",
ipWhitelist: [],
timeFormat: 12,

modules: [
{
module: "newsfeed",
position: "bottom_bar",
config: {
feeds: [
{
title: "Formatting Feed",
url: "http://localhost:8080/tests/mocks/newsfeed_basic_html.xml"
}
],
showDescription: true,
truncDescription: false,
allowedBasicHtmlTags: ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"]
}
}
]
};

/*************** DO NOT EDIT THE LINE BELOW ***************/
if (typeof module !== "undefined") {
module.exports = config;
}
26 changes: 26 additions & 0 deletions tests/e2e/modules/newsfeed_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,32 @@ const runTests = () => {
});
});

describe("Basic HTML tags", () => {
beforeAll(async () => {
await helpers.startApplication("tests/configs/modules/newsfeed/basic_html.js");
await helpers.getDocument();
page = helpers.getPage();
});

it("should render allowlisted formatting tags in title and description", async () => {
await expect(page.locator(".newsfeed .newsfeed-desc")).toBeVisible();
const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML();
expect(descHtml).toContain("<em>");
expect(descHtml).toContain("<strong>");
expect(descHtml).toContain("<u>");
const titleHtml = await page.locator(".newsfeed .newsfeed-title").innerHTML();
expect(titleHtml).toContain("<em>");
});

it("should strip disallowed HTML and not execute injected scripts", async () => {
const descHtml = await page.locator(".newsfeed .newsfeed-desc").innerHTML();
expect(descHtml).not.toContain("<script");
expect(descHtml).not.toContain("onerror");
const xss = await page.evaluate(() => window.__newsfeedXss);
expect(xss).toBeUndefined();
});
});

describe("Invalid configuration", () => {
beforeAll(async () => {
await helpers.startApplication("tests/configs/modules/newsfeed/incorrect_url.js");
Expand Down
15 changes: 15 additions & 0 deletions tests/mocks/newsfeed_basic_html.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Formatting Feed</title>
<link>http://localhost:8080</link>
<description>Feed used to test the allowBasicHtmlTags option.</description>
<item>
<title>News &lt;em&gt;Flash&lt;/em&gt;</title>
<link>http://localhost:8080/article</link>
<pubDate>Tue, 20 Sep 2016 11:16:08 +0000</pubDate>
<guid isPermaLink="false">http://localhost:8080/?p=1</guid>
<description><![CDATA[<p><em>Italic</em> and <strong>Bold</strong> and <u>Underlined</u> text.</p><script>window.__newsfeedXss = true;</script><img src="x" onerror="window.__newsfeedXss = true">]]></description>
</item>
</channel>
</rss>
83 changes: 83 additions & 0 deletions tests/unit/modules/default/newsfeed/newsfeedfetcher_spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
const defaults = require("../../../../../js/defaults");

const NewsfeedFetcher = require(`../../../../../${defaults.defaultModulesDir}/newsfeed/newsfeedfetcher`);

// The full safe list users may opt into; most tests run with it enabled.
const ALL_TAGS = ["b", "strong", "i", "em", "u", "br", "code", "s", "sub", "sup"];
const sanitize = (html, allowedTags = ALL_TAGS) => NewsfeedFetcher.sanitizeBasicHtml(html, allowedTags);

describe("NewsfeedFetcher.sanitizeBasicHtml", () => {
it("keeps real basic formatting tags", () => {
expect(sanitize("<b>a</b> <strong>b</strong> <i>c</i> <em>d</em> <u>e</u>"))
.toBe("<b>a</b> <strong>b</strong> <i>c</i> <em>d</em> <u>e</u>");
});

it("keeps the additional safe tags (code, s, sub, sup)", () => {
expect(sanitize("<code>x</code> <s>y</s> <sub>z</sub> <sup>w</sup>"))
.toBe("<code>x</code> <s>y</s> <sub>z</sub> <sup>w</sup>");
});

it("renders entity-encoded formatting tags (e.g. The Atlantic feed)", () => {
// Feeds like theatlantic.com ship emphasis as escaped entities
expect(sanitize("the &lt;em&gt;Atlantic&lt;/em&gt; ocean")).toBe("the <em>Atlantic</em> ocean");
});

it("handles emphasis inside titles regardless of how the parser delivers it", () => {
// The Atlantic uses <em> in titles, e.g. "That's Enough, <em>Euphoria</em>"
const expected = "That’s Enough, <em>Euphoria</em>";
expect(sanitize("That’s Enough, <em>Euphoria</em>")).toBe(expected);
expect(sanitize("That’s Enough, &lt;em&gt;Euphoria&lt;/em&gt;")).toBe(expected);
});

it("strips attributes from allowed tags", () => {
const result = sanitize("<b onclick=\"steal()\" class=\"x\">bold</b>");
expect(result).toBe("<b>bold</b>");
expect(result).not.toContain("onclick");
expect(result).not.toContain("class");
});

it("neutralizes script tags", () => {
expect(sanitize("<script>alert(1)</script>hello")).not.toContain("<script");
// Entity-encoded scripts must stay inert text, never become live markup
const encoded = sanitize("&lt;script&gt;alert(1)&lt;/script&gt;");
expect(encoded).not.toContain("<script");
expect(encoded).toContain("&lt;script&gt;");
});

it("drops images and link hrefs but keeps disallowed-tag text", () => {
const result = sanitize("<img src=\"x\" onerror=\"alert(1)\"><a href=\"https://evil.example\">link</a><h1>title</h1>");
expect(result).not.toContain("onerror");
expect(result).not.toContain("href");
expect(result).not.toContain("<h1>");
expect(result).toContain("link");
expect(result.toLowerCase()).toContain("title");
});

it("escapes bare HTML special characters in plain text", () => {
expect(sanitize("Fish &amp; Chips for &lt; 5")).toBe("Fish &amp; Chips for &lt; 5");
});

it("only keeps tags present in the supplied allowlist", () => {
// Allow just <em>: a safe-but-not-allowed <strong> must become plain text.
const result = sanitize("<em>kept</em> <strong>dropped</strong>", ["em"]);
expect(result).toBe("<em>kept</em> dropped");
expect(result).not.toContain("<strong>");
});

it("escapes everything when the allowlist is empty", () => {
expect(sanitize("<em>hi</em> &amp; <b>bye</b>", [])).toBe("hi &amp; bye");
});

it("renders <br> as a single self-closing tag when allowed", () => {
const result = sanitize("a<br>b", ["br"]);
expect(result).toContain("<br>");
expect(result).not.toContain("<br></br>");
expect(result).not.toContain("&lt;br&gt;");
});

it("collapses <br> to a space when not allowed", () => {
const result = sanitize("a<br>b", ["em"]);
expect(result).not.toContain("<br>");
expect(result).toBe("a b");
});
});