diff --git a/src/cloud/lib/editor/htmlToMarkdown.spec.ts b/src/cloud/lib/editor/htmlToMarkdown.spec.ts new file mode 100644 index 0000000000..15715d96ba --- /dev/null +++ b/src/cloud/lib/editor/htmlToMarkdown.spec.ts @@ -0,0 +1,28 @@ +import { htmlContainsImage, htmlToMarkdown } from './htmlToMarkdown' + +describe('htmlToMarkdown', () => { + it('ignores html without images so normal paste behavior is preserved', () => { + expect(htmlContainsImage('

Hello world

')).toBe(false) + expect(htmlToMarkdown('

Hello world

')).toBe(null) + }) + + it('converts web clipboard html with images to markdown', () => { + expect( + htmlToMarkdown( + '

Hello world

Cat photo

' + ) + ).toBe( + 'Hello **world**\n\n![Cat photo](https://example.com/cat.png "Source")' + ) + }) + + it('preserves links around pasted web content', () => { + expect( + htmlToMarkdown( + '
Read post
' + ) + ).toBe( + '[Read post](https://example.com/post)\n![](https://example.com/post.jpg)' + ) + }) +}) diff --git a/src/cloud/lib/editor/htmlToMarkdown.ts b/src/cloud/lib/editor/htmlToMarkdown.ts new file mode 100644 index 0000000000..e02b034a3d --- /dev/null +++ b/src/cloud/lib/editor/htmlToMarkdown.ts @@ -0,0 +1,139 @@ +const blockTags = new Set([ + 'ADDRESS', + 'ARTICLE', + 'ASIDE', + 'BLOCKQUOTE', + 'DIV', + 'FIGURE', + 'FOOTER', + 'FORM', + 'H1', + 'H2', + 'H3', + 'H4', + 'H5', + 'H6', + 'HEADER', + 'HR', + 'MAIN', + 'P', + 'PRE', + 'SECTION', + 'TABLE', +]) + +const escapeMarkdown = (value: string) => { + return value.replace(/([\\`*_{}[\]()#+\-.!|>])/g, '\\$1') +} + +const escapeLinkTitle = (value: string) => { + return value.replace(/"/g, '\\"') +} + +const normalizeInlineWhitespace = (value: string) => { + return value.replace(/\s+/g, ' ') +} + +const joinBlock = (value: string) => { + const trimmed = value.trim() + return trimmed.length > 0 ? `\n\n${trimmed}\n\n` : '' +} + +const listItem = (value: string) => { + const trimmed = value.trim() + return trimmed.length > 0 ? `- ${trimmed}\n` : '' +} + +const nodeToMarkdown = (node: Node): string => { + if (node.nodeType === Node.TEXT_NODE) { + return escapeMarkdown(normalizeInlineWhitespace(node.textContent || '')) + } + + if (node.nodeType !== Node.ELEMENT_NODE) { + return '' + } + + const element = node as HTMLElement + const tagName = element.tagName + + if (tagName === 'BR') { + return '\n' + } + + if (tagName === 'IMG') { + const image = element as HTMLImageElement + const src = image.getAttribute('src') + if (src == null || src.trim() === '') { + return '' + } + const alt = escapeMarkdown(image.getAttribute('alt') || '') + const title = image.getAttribute('title') + const titlePart = + title != null && title.trim() !== '' + ? ` "${escapeLinkTitle(title.trim())}"` + : '' + return `![${alt}](${src.trim()}${titlePart})` + } + + const children = Array.from(element.childNodes) + .map((child) => nodeToMarkdown(child)) + .join('') + + if (tagName === 'A') { + const href = element.getAttribute('href') + if (href == null || href.trim() === '') { + return children + } + const label = children.trim() || href.trim() + return `[${label}](${href.trim()})` + } + + if (tagName === 'STRONG' || tagName === 'B') { + const trimmed = children.trim() + return trimmed.length > 0 ? `**${trimmed}**` : '' + } + + if (tagName === 'EM' || tagName === 'I') { + const trimmed = children.trim() + return trimmed.length > 0 ? `_${trimmed}_` : '' + } + + if (tagName === 'CODE') { + return `\`${children.trim().replace(/`/g, '\\`')}\`` + } + + if (tagName === 'LI') { + return listItem(children) + } + + if (tagName === 'UL' || tagName === 'OL') { + return joinBlock(children) + } + + if (blockTags.has(tagName)) { + return joinBlock(children) + } + + return children +} + +export const htmlContainsImage = (html: string) => { + return /]*\bsrc\s*=/i.test(html) +} + +export const htmlToMarkdown = (html: string): string | null => { + if (!htmlContainsImage(html)) { + return null + } + + const parser = new DOMParser() + const doc = parser.parseFromString(html, 'text/html') + const markdown = Array.from(doc.body.childNodes) + .map((node) => nodeToMarkdown(node)) + .join('') + .replace(/[ \t]+\n/g, '\n') + .replace(/\n{3,}/g, '\n\n') + .trim() + + return markdown.length > 0 ? markdown : null +} diff --git a/src/cloud/lib/editor/plugins/fileHandler.ts b/src/cloud/lib/editor/plugins/fileHandler.ts index 63812c1367..617e16b28e 100644 --- a/src/cloud/lib/editor/plugins/fileHandler.ts +++ b/src/cloud/lib/editor/plugins/fileHandler.ts @@ -1,4 +1,5 @@ import { boostHubBaseUrl } from '../../consts' +import { htmlToMarkdown } from '../htmlToMarkdown' export type OnFileCallback = (file: File) => Promise @@ -64,6 +65,17 @@ const attachFileHandlerToCodeMirrorEditor = ( for (let i = 0; i < files.length; i++) { await handler(i > 0 ? instance.getCursor() : pos, files[i]) } + return + } + + const html = event.clipboardData?.getData('text/html') + if (html != null && html !== '') { + const markdown = htmlToMarkdown(html) + if (markdown != null) { + event.stopPropagation() + event.preventDefault() + instance.replaceSelection(markdown, 'end') + } } } )