From f3fb94b4a75347d2593281193e8704d7cafdbb38 Mon Sep 17 00:00:00 2001
From: T9knauf <93224614+T9knauf@users.noreply.github.com>
Date: Sun, 10 May 2026 19:17:07 -0400
Subject: [PATCH] Preserve images when pasting HTML
---
src/cloud/lib/editor/htmlToMarkdown.spec.ts | 28 ++++
src/cloud/lib/editor/htmlToMarkdown.ts | 139 ++++++++++++++++++++
src/cloud/lib/editor/plugins/fileHandler.ts | 12 ++
3 files changed, 179 insertions(+)
create mode 100644 src/cloud/lib/editor/htmlToMarkdown.spec.ts
create mode 100644 src/cloud/lib/editor/htmlToMarkdown.ts
diff --git a/src/cloud/lib/editor/htmlToMarkdown.spec.ts b/src/cloud/lib/editor/htmlToMarkdown.spec.ts
new file mode 100644
index 0000000000..15715d96ba
--- /dev/null
+++ b/src/cloud/lib/editor/htmlToMarkdown.spec.ts
@@ -0,0 +1,28 @@
+import { htmlContainsImage, htmlToMarkdown } from './htmlToMarkdown'
+
+describe('htmlToMarkdown', () => {
+ it('ignores html without images so normal paste behavior is preserved', () => {
+ expect(htmlContainsImage('
Hello world
')).toBe(false)
+ expect(htmlToMarkdown('Hello world
')).toBe(null)
+ })
+
+ it('converts web clipboard html with images to markdown', () => {
+ expect(
+ htmlToMarkdown(
+ 'Hello world

'
+ )
+ ).toBe(
+ 'Hello **world**\n\n'
+ )
+ })
+
+ it('preserves links around pasted web content', () => {
+ expect(
+ htmlToMarkdown(
+ ''
+ )
+ ).toBe(
+ '[Read post](https://example.com/post)\n'
+ )
+ })
+})
diff --git a/src/cloud/lib/editor/htmlToMarkdown.ts b/src/cloud/lib/editor/htmlToMarkdown.ts
new file mode 100644
index 0000000000..e02b034a3d
--- /dev/null
+++ b/src/cloud/lib/editor/htmlToMarkdown.ts
@@ -0,0 +1,139 @@
+const blockTags = new Set([
+ 'ADDRESS',
+ 'ARTICLE',
+ 'ASIDE',
+ 'BLOCKQUOTE',
+ 'DIV',
+ 'FIGURE',
+ 'FOOTER',
+ 'FORM',
+ 'H1',
+ 'H2',
+ 'H3',
+ 'H4',
+ 'H5',
+ 'H6',
+ 'HEADER',
+ 'HR',
+ 'MAIN',
+ 'P',
+ 'PRE',
+ 'SECTION',
+ 'TABLE',
+])
+
+const escapeMarkdown = (value: string) => {
+ return value.replace(/([\\`*_{}[\]()#+\-.!|>])/g, '\\$1')
+}
+
+const escapeLinkTitle = (value: string) => {
+ return value.replace(/"/g, '\\"')
+}
+
+const normalizeInlineWhitespace = (value: string) => {
+ return value.replace(/\s+/g, ' ')
+}
+
+const joinBlock = (value: string) => {
+ const trimmed = value.trim()
+ return trimmed.length > 0 ? `\n\n${trimmed}\n\n` : ''
+}
+
+const listItem = (value: string) => {
+ const trimmed = value.trim()
+ return trimmed.length > 0 ? `- ${trimmed}\n` : ''
+}
+
+const nodeToMarkdown = (node: Node): string => {
+ if (node.nodeType === Node.TEXT_NODE) {
+ return escapeMarkdown(normalizeInlineWhitespace(node.textContent || ''))
+ }
+
+ if (node.nodeType !== Node.ELEMENT_NODE) {
+ return ''
+ }
+
+ const element = node as HTMLElement
+ const tagName = element.tagName
+
+ if (tagName === 'BR') {
+ return '\n'
+ }
+
+ if (tagName === 'IMG') {
+ const image = element as HTMLImageElement
+ const src = image.getAttribute('src')
+ if (src == null || src.trim() === '') {
+ return ''
+ }
+ const alt = escapeMarkdown(image.getAttribute('alt') || '')
+ const title = image.getAttribute('title')
+ const titlePart =
+ title != null && title.trim() !== ''
+ ? ` "${escapeLinkTitle(title.trim())}"`
+ : ''
+ return `}${titlePart})`
+ }
+
+ const children = Array.from(element.childNodes)
+ .map((child) => nodeToMarkdown(child))
+ .join('')
+
+ if (tagName === 'A') {
+ const href = element.getAttribute('href')
+ if (href == null || href.trim() === '') {
+ return children
+ }
+ const label = children.trim() || href.trim()
+ return `[${label}](${href.trim()})`
+ }
+
+ if (tagName === 'STRONG' || tagName === 'B') {
+ const trimmed = children.trim()
+ return trimmed.length > 0 ? `**${trimmed}**` : ''
+ }
+
+ if (tagName === 'EM' || tagName === 'I') {
+ const trimmed = children.trim()
+ return trimmed.length > 0 ? `_${trimmed}_` : ''
+ }
+
+ if (tagName === 'CODE') {
+ return `\`${children.trim().replace(/`/g, '\\`')}\``
+ }
+
+ if (tagName === 'LI') {
+ return listItem(children)
+ }
+
+ if (tagName === 'UL' || tagName === 'OL') {
+ return joinBlock(children)
+ }
+
+ if (blockTags.has(tagName)) {
+ return joinBlock(children)
+ }
+
+ return children
+}
+
+export const htmlContainsImage = (html: string) => {
+ return /
]*\bsrc\s*=/i.test(html)
+}
+
+export const htmlToMarkdown = (html: string): string | null => {
+ if (!htmlContainsImage(html)) {
+ return null
+ }
+
+ const parser = new DOMParser()
+ const doc = parser.parseFromString(html, 'text/html')
+ const markdown = Array.from(doc.body.childNodes)
+ .map((node) => nodeToMarkdown(node))
+ .join('')
+ .replace(/[ \t]+\n/g, '\n')
+ .replace(/\n{3,}/g, '\n\n')
+ .trim()
+
+ return markdown.length > 0 ? markdown : null
+}
diff --git a/src/cloud/lib/editor/plugins/fileHandler.ts b/src/cloud/lib/editor/plugins/fileHandler.ts
index 63812c1367..617e16b28e 100644
--- a/src/cloud/lib/editor/plugins/fileHandler.ts
+++ b/src/cloud/lib/editor/plugins/fileHandler.ts
@@ -1,4 +1,5 @@
import { boostHubBaseUrl } from '../../consts'
+import { htmlToMarkdown } from '../htmlToMarkdown'
export type OnFileCallback = (file: File) => Promise
@@ -64,6 +65,17 @@ const attachFileHandlerToCodeMirrorEditor = (
for (let i = 0; i < files.length; i++) {
await handler(i > 0 ? instance.getCursor() : pos, files[i])
}
+ return
+ }
+
+ const html = event.clipboardData?.getData('text/html')
+ if (html != null && html !== '') {
+ const markdown = htmlToMarkdown(html)
+ if (markdown != null) {
+ event.stopPropagation()
+ event.preventDefault()
+ instance.replaceSelection(markdown, 'end')
+ }
}
}
)