diff --git a/CHANGELOG.md b/CHANGELOG.md index 69a5f3fc9..d49dd422c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Unreleased +- Add COLR/CPAL and CBDT/CBLC color emoji support (#1692) - Fix garbled text copying in Chrome/Edge for PDFs with >256 unique characters (#1659) - Fix Link accessibility issues - Fix Table Accessibility Issue: Operator CS/cs not allowed in this current state diff --git a/docs/text.md b/docs/text.md index 411505ae9..d4cf97528 100644 --- a/docs/text.md +++ b/docs/text.md @@ -260,5 +260,95 @@ every time you want to use it. doc.font('Heading Font') .text('This is a heading.'); -That's about all there is too it for text in PDFKit. Let's move on now to +## Color Emoji + +PDFKit supports rendering color emoji when an emoji font is registered. +Emoji are automatically detected in your text and rendered using the glyphs +from the emoji font, while surrounding text continues to use the current +document font. Three color emoji formats are supported: + +* **`sbix`** (Standard Bitmap Graphics) โ€” bitmap emoji, used by Apple Color Emoji +* **`COLR`/`CPAL`** (Color Layers) โ€” vector emoji rendered as colored glyph layers, used by fonts like Twemoji Mozilla +* **`CBDT`/`CBLC`** (Color Bitmap Data) โ€” bitmap emoji, used by Google Noto Color Emoji + +### Registering an emoji font + +To enable emoji support, register an emoji font that contains one of the +supported color tables (`sbix`, `COLR`/`CPAL`, or `CBDT`/`CBLC`). On macOS, +the built-in Apple Color Emoji font (sbix) works out of the box. + +You can register the emoji font via constructor options: + +```javascript +const doc = new PDFDocument({ + emojiFont: '/System/Library/Fonts/Apple Color Emoji.ttc', + emojiFontFamily: 'AppleColorEmoji', +}); +``` + +Or register it at any time using the `registerEmojiFont` method: + +```javascript +doc.registerEmojiFont('/System/Library/Fonts/Apple Color Emoji.ttc', 'AppleColorEmoji'); +``` + +The `emojiFontFamily` (or second argument) is the PostScript name or family +name used to select the correct font from a TrueType Collection (`.ttc`) file. +If the font file contains only a single font, this parameter can be omitted. + +### Using emoji in text + +Once an emoji font is registered, you can use emoji characters directly in +any `text` call. PDFKit automatically segments the string into text and emoji +runs, rendering each with the appropriate font. + +```javascript +doc.font('Helvetica') + .fontSize(18) + .text('Hello ๐Ÿ˜€ World ๐ŸŽ‰ PDFKit ๐Ÿš€'); +``` + +All standard text options (alignment, line wrapping, `continued`, columns, +etc.) work with emoji. The `widthOfString` method is also emoji-aware, so +layout calculations account for emoji width correctly. + +### Supported emoji types + +The emoji segmenter handles the full range of modern emoji sequences: + +* **Simple emoji** โ€” single code point emoji like ๐Ÿ˜€, ๐ŸŽ‰, ๐Ÿš€ +* **ZWJ sequences** โ€” composite emoji joined with Zero-Width Joiner, such as + family groups (๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ) and profession emoji (๐Ÿ‘ฉโ€๐Ÿ’ป) +* **Flag emoji** โ€” regional indicator pairs like ๐Ÿ‡บ๐Ÿ‡ธ, ๐Ÿ‡ฏ๐Ÿ‡ต, ๐Ÿ‡ซ๐Ÿ‡ท +* **Skin tone modifiers** โ€” emoji with Fitzpatrick skin tone modifiers (๐Ÿ‘‹๐Ÿป ๐Ÿ‘‹๐Ÿฟ) +* **Variation selectors** โ€” text vs emoji presentation (โค๏ธ) +* **Keycap sequences** โ€” digit + variation selector + combining enclosing + keycap (1๏ธโƒฃ 2๏ธโƒฃ 3๏ธโƒฃ) + +### Different font sizes + +Emoji scale to match the current font size. For bitmap fonts (sbix, CBDT), +PDFKit selects the closest available bitmap size and scales it to fit. +COLR/CPAL vector emoji scale smoothly to any size. + +```javascript +doc.font('Helvetica'); + +doc.fontSize(12).text('Small emoji: ๐ŸŽ‰'); +doc.fontSize(24).text('Medium emoji: ๐ŸŽ‰'); +doc.fontSize(48).text('Large emoji: ๐ŸŽ‰'); +``` + +### Limitations + +* The emoji font file must be accessible on the system where the PDF is + generated. Apple Color Emoji is included with macOS but is not + redistributable. +* `sbix` and `CBDT`/`CBLC` emoji are rendered as raster images (PNG), so they + may appear slightly less sharp than vector text at very large sizes. + `COLR`/`CPAL` emoji are fully vector and scale cleanly. +* `COLR` v1 (gradient-based) emoji are not currently supported โ€” only `COLR` + v0 (flat color layers) is handled. + +That's about all there is to it for text in PDFKit. Let's move on now to images. diff --git a/lib/document.js b/lib/document.js index b3296f4cd..746bb00de 100644 --- a/lib/document.js +++ b/lib/document.js @@ -102,6 +102,11 @@ class PDFDocument extends stream.Readable { this.initTables(); this.initSubset(options); + // Register emoji font if provided in options + if (options.emojiFont) { + this.registerEmojiFont(options.emojiFont, options.emojiFontFamily); + } + // Initialize the metadata this.info = { Producer: 'PDFKit', diff --git a/lib/emoji/segmenter.js b/lib/emoji/segmenter.js new file mode 100644 index 000000000..d0dccd2ae --- /dev/null +++ b/lib/emoji/segmenter.js @@ -0,0 +1,250 @@ +/** + * Emoji Segmenter โ€” splits text into plain-text vs emoji runs. + * + * Uses Unicode ranges as a heuristic to detect emoji code points, + * then groups consecutive emoji (including ZWJ sequences, variation + * selectors, skin-tone modifiers, and regional indicators) into + * single segments. + */ + +// Zero-Width Joiner +const ZWJ = 0x200d; + +// Variation Selector 16 (emoji presentation) +const VS16 = 0xfe0f; + +// Variation Selector 15 (text presentation) +const VS15 = 0xfe0e; + +// Combining Enclosing Keycap +const KEYCAP = 0x20e3; + +/** + * Check if a code point is in an emoji-related Unicode range. + * This is a heuristic โ€” it catches the vast majority of emoji + * but may include some non-emoji symbols. The font's glyph + * lookup is the final authority at render time. + */ +function isEmojiCodePoint(cp) { + return ( + // Miscellaneous Symbols and Dingbats + (cp >= 0x2600 && cp <= 0x27bf) || + // Supplemental Arrows / Misc Symbols + (cp >= 0x2b50 && cp <= 0x2b55) || + // CJK Symbols (wavy dash, etc.) + cp === 0x3030 || + cp === 0x303d || + // Enclosed CJK Letters + cp === 0x3297 || + cp === 0x3299 || + // Enclosed Alphanumeric Supplement (circled letters, etc.) + (cp >= 0x1f100 && cp <= 0x1f1ff) || + // Regional Indicator Symbols + (cp >= 0x1f1e6 && cp <= 0x1f1ff) || + // Miscellaneous Symbols and Pictographs + (cp >= 0x1f300 && cp <= 0x1f5ff) || + // Emoticons + (cp >= 0x1f600 && cp <= 0x1f64f) || + // Transport and Map Symbols + (cp >= 0x1f680 && cp <= 0x1f6ff) || + // Supplemental Symbols and Pictographs + (cp >= 0x1f900 && cp <= 0x1f9ff) || + // Symbols and Pictographs Extended-A + (cp >= 0x1fa00 && cp <= 0x1fa6f) || + // Symbols and Pictographs Extended-B + (cp >= 0x1fa70 && cp <= 0x1faff) || + // Miscellaneous Symbols (copyright, registered, etc.) + cp === 0x00a9 || + cp === 0x00ae || + // General Punctuation (trade mark, etc.) + cp === 0x2122 || + // Arrows + (cp >= 0x2190 && cp <= 0x21aa) || + // Misc Technical (phone, hourglass, etc.) + (cp >= 0x2300 && cp <= 0x23ff) || + // Enclosed Alphanumerics + (cp >= 0x24c2 && cp <= 0x24c2) || + // Geometric Shapes + (cp >= 0x25aa && cp <= 0x25fe) || + // Playing cards, mahjong + (cp >= 0x1f004 && cp <= 0x1f0cf) || + // Skin tone modifiers (Fitzpatrick) + (cp >= 0x1f3fb && cp <= 0x1f3ff) || + // Tags block (used in flag sequences like England, Scotland) + (cp >= 0xe0020 && cp <= 0xe007f) + ); +} + +/** + * Check if a code point is a modifier/joiner that extends an emoji sequence. + */ +function isEmojiModifier(cp) { + return ( + cp === ZWJ || + cp === VS16 || + cp === KEYCAP || + // Skin tone modifiers + (cp >= 0x1f3fb && cp <= 0x1f3ff) || + // Tags block (flag sub-sequences) + (cp >= 0xe0020 && cp <= 0xe007f) + ); +} + +/** + * Check if a code point is a keycap base character (0-9, #, *). + * These become emoji when followed by VS16 + Combining Enclosing Keycap. + */ +function isKeycapBase(cp) { + return (cp >= 0x30 && cp <= 0x39) || cp === 0x23 || cp === 0x2a; +} + +/** + * Check if a code point is a Regional Indicator Symbol letter. + */ +function isRegionalIndicator(cp) { + return cp >= 0x1f1e6 && cp <= 0x1f1ff; +} + +/** + * Extract code points from a string, handling UTF-16 surrogate pairs. + * Returns an array of { cp: number, index: number, length: number } + * where index is the position in the original string and length is the + * number of UTF-16 code units consumed. + */ +function codePoints(str) { + const result = []; + for (let i = 0; i < str.length; ) { + const code = str.codePointAt(i); + const len = code > 0xffff ? 2 : 1; + result.push({ cp: code, index: i, length: len }); + i += len; + } + return result; +} + +/** + * Segment a text string into runs of plain text and emoji. + * + * @param {string} text - The input text + * @returns {Array<{type: 'text'|'emoji', text: string}>} Segments in order + */ +function segmentEmojiText(text) { + if (!text) return []; + + const cps = codePoints(text); + const segments = []; + let currentType = null; + let currentStart = 0; + let i = 0; + + while (i < cps.length) { + const { cp } = cps[i]; + + // Keycap sequences: digit/# /* + VS16 + KEYCAP (U+20E3) + // The base character (0-9, #, *) is NOT in the emoji range, + // so we must detect the full sequence by lookahead. + if ( + isKeycapBase(cp) && + i + 1 < cps.length && + (cps[i + 1].cp === VS16 || cps[i + 1].cp === KEYCAP) + ) { + // Looks like a keycap sequence โ€” treat as emoji + if (currentType === 'text') { + const end = cps[i].index; + if (end > currentStart) { + segments.push({ type: 'text', text: text.slice(currentStart, end) }); + } + currentStart = cps[i].index; + } + currentType = 'emoji'; + i++; // consume base character + // Consume VS16 if present + if (i < cps.length && cps[i].cp === VS16) { + i++; + } + // Consume KEYCAP if present + if (i < cps.length && cps[i].cp === KEYCAP) { + i++; + } + } else if (isEmojiCodePoint(cp)) { + // Start or continue an emoji segment + if (currentType === 'text') { + // Flush text segment + const end = cps[i].index; + if (end > currentStart) { + segments.push({ type: 'text', text: text.slice(currentStart, end) }); + } + currentStart = cps[i].index; + } + currentType = 'emoji'; + + // Consume the full emoji sequence + i++; + + // Handle regional indicator pairs (flags) + if ( + isRegionalIndicator(cp) && + i < cps.length && + isRegionalIndicator(cps[i].cp) + ) { + i++; // consume second regional indicator + } + + // Consume trailing modifiers, VS16, ZWJ + next emoji + while (i < cps.length) { + const next = cps[i].cp; + if (isEmojiModifier(next) || next === VS16) { + i++; + } else if (next === ZWJ && i + 1 < cps.length) { + // ZWJ sequence: consume ZWJ + following emoji + i++; // consume ZWJ + if ( + i < cps.length && + (isEmojiCodePoint(cps[i].cp) || cps[i].cp === VS16) + ) { + i++; // consume next emoji + // Continue consuming modifiers after ZWJ target + } else { + break; + } + } else { + break; + } + } + } else if (cp === VS15) { + // Text presentation selector โ€” force previous emoji to text if it was a single cp + // For simplicity, just treat VS15 as part of the current run + if (currentType !== 'emoji') { + if (currentType !== 'text') { + currentType = 'text'; + currentStart = cps[i].index; + } + } + i++; + } else { + // Plain text code point + if (currentType === 'emoji') { + // Flush emoji segment + const end = cps[i].index; + if (end > currentStart) { + segments.push({ type: 'emoji', text: text.slice(currentStart, end) }); + } + currentStart = cps[i].index; + } + if (currentType !== 'text') { + currentType = 'text'; + currentStart = cps[i].index; + } + i++; + } + } + + // Flush final segment + if (currentType && currentStart < text.length) { + segments.push({ type: currentType, text: text.slice(currentStart) }); + } + + return segments; +} + +export { segmentEmojiText, isEmojiCodePoint, codePoints }; diff --git a/lib/font_factory.js b/lib/font_factory.js index e9c0a6089..9313d732d 100644 --- a/lib/font_factory.js +++ b/lib/font_factory.js @@ -25,6 +25,53 @@ class PDFFontFactory { return new EmbeddedFont(document, font, id); } + + /** + * Open an emoji font (SBIX/COLR) for bitmap or vector emoji rendering. + * Returns the raw fontkit font object โ€” not an EmbeddedFont, since emoji + * glyphs are rendered as image XObjects rather than CIDFont text. + * + * @param {string|Uint8Array|ArrayBuffer} src - Path or buffer + * @param {string} [family] - Font name within a TTC collection + * @returns {object} fontkit font object + */ + static openEmoji(src, family) { + if (typeof src === 'string') { + src = fs.readFileSync(src); + } + let font; + + if (src instanceof Uint8Array) { + font = fontkit.create(src); + } else if (src instanceof ArrayBuffer) { + font = fontkit.create(new Uint8Array(src)); + } + + if (font == null) { + throw new Error('Not a supported emoji font format.'); + } + + // If fontkit returned a TrueTypeCollection, find the right font + if (typeof font.fonts !== 'undefined' && Array.isArray(font.fonts)) { + if (!family) { + font = font.fonts[0]; + } else { + // Match by postscriptName or familyName + const match = font.fonts.find( + (f) => f.postscriptName === family || f.familyName === family, + ); + font = match || font.fonts[0]; + } + } + + if (font == null) { + throw new Error( + `Could not find font "${family}" in the emoji font collection.`, + ); + } + + return font; + } } export default PDFFontFactory; diff --git a/lib/mixins/fonts.js b/lib/mixins/fonts.js index 9b8d915d0..2a6aa172c 100644 --- a/lib/mixins/fonts.js +++ b/lib/mixins/fonts.js @@ -1,5 +1,6 @@ import PDFFontFactory from '../font_factory'; import { CM_TO_IN, IN_TO_PT, MM_TO_CM, PC_TO_PT, PX_TO_IN } from '../utils'; +import { segmentEmojiText } from '../emoji/segmenter'; const isEqualFont = (font1, font2) => { // compare font checksum @@ -44,6 +45,10 @@ export default { this._registeredFonts = {}; + // Emoji font state + this._emojiFont = null; // fontkit font object for emoji + this._emojiImageCache = new Map(); // glyphId+ppem โ†’ PDFImage object + // Set the default font if (defaultFont) { this.font(defaultFont, defaultFontFamily); @@ -124,6 +129,72 @@ export default { return this; }, + /** + * Register an emoji font for color emoji rendering. + * The font is loaded via fontkit and stored on the document. + * Emoji glyphs from this font are rendered as image XObjects (SBIX) + * rather than embedded CIDFont text. + * + * @param {string|Uint8Array|ArrayBuffer} src - Path or buffer of the emoji font + * @param {string} [family] - Font name within a TTC collection (e.g. 'AppleColorEmoji') + * @returns {this} + */ + registerEmojiFont(src, family) { + this._emojiFont = PDFFontFactory.openEmoji(src, family); + return this; + }, + + /** + * Segment text into plain-text and emoji runs. + * Returns null if no emoji font is registered (fast path). + * + * @param {string} text + * @returns {Array<{type: 'text'|'emoji', text: string}>|null} + */ + _segmentEmojiText(text) { + if (!this._emojiFont) return null; + const segments = segmentEmojiText(text); + // If there's only one text segment covering the whole string, no emoji detected + if ( + segments.length === 1 && + segments[0].type === 'text' && + segments[0].text === text + ) { + return null; + } + // If there are any emoji segments, return the segmentation + if (segments.some((s) => s.type === 'emoji')) { + return segments; + } + return null; + }, + + /** + * Calculate the width of an emoji string using the emoji font metrics. + * Uses font shaping (layout) to get the advance width of the full sequence, + * which handles ZWJ sequences, regional indicators, etc. + * + * @param {string} text - The emoji text (a single emoji sequence) + * @param {number} fontSize - The font size in points + * @returns {number} Width in points + */ + _emojiWidthOfString(text, fontSize) { + if (!this._emojiFont) return 0; + try { + const run = this._emojiFont.layout(text); + let totalAdvance = 0; + for (let i = 0; i < run.glyphs.length; i++) { + totalAdvance += run.positions[i].xAdvance; + } + return (totalAdvance / this._emojiFont.unitsPerEm) * fontSize; + } catch { + // Some fonts (e.g. NotoColorEmoji CBDT) don't support layout() because + // fontkit cannot create glyph objects (no glyf/CFF table). + // Fall back to fontSize per emoji โ€” emoji are typically square. + return fontSize; + } + }, + /** * Convert a {@link Size} into a point measurement * diff --git a/lib/mixins/text.js b/lib/mixins/text.js index 829065319..8dc81ea17 100644 --- a/lib/mixins/text.js +++ b/lib/mixins/text.js @@ -1,5 +1,6 @@ import LineWrapper from '../line_wrapper'; import PDFObject from '../object'; +import PDFImage from '../image'; import { cosine, sine } from '../utils'; const { number } = PDFObject; @@ -112,6 +113,27 @@ export default { widthOfString(string, options = {}) { const horizontalScaling = options.horizontalScaling || 100; + + // If we have an emoji font, check for emoji segments + const segments = this._segmentEmojiText(string); + if (segments) { + let totalWidth = 0; + for (const seg of segments) { + if (seg.type === 'emoji') { + totalWidth += this._emojiWidthOfString(seg.text, this._fontSize); + } else { + totalWidth += + this._font.widthOfString( + seg.text, + this._fontSize, + options.features, + ) + + (options.characterSpacing || 0) * (seg.text.length - 1); + } + } + return (totalWidth * horizontalScaling) / 100; + } + return ( ((this._font.widthOfString(string, this._fontSize, options.features) + (options.characterSpacing || 0) * (string.length - 1)) * @@ -440,13 +462,51 @@ export default { }, _line(text, options = {}, wrapper) { - this._fragment(text, this.x, this.y, options); + const segments = this._segmentEmojiText(text); - if (wrapper) { - const lineGap = options.lineGap || this._lineGap || 0; - this.y += this.currentLineHeight(true) + lineGap; + if (segments) { + // Mixed text/emoji line โ€” render each segment sequentially + let curX = this.x; + const curY = this.y; + + for (const seg of segments) { + if (seg.type === 'emoji') { + this._emojiFragment(seg.text, curX, curY, options); + curX += this._emojiWidthOfString(seg.text, this._fontSize); + } else { + // Build sub-options with correct textWidth/wordCount for this sub-fragment + const subOpts = Object.assign({}, options, { + textWidth: this._font.widthOfString( + seg.text, + this._fontSize, + options.features, + ), + wordCount: seg.text.trim().split(/\s+/).length, + // Disable alignment for sub-fragments โ€” alignment was already + // applied at the line level by the LineWrapper + width: undefined, + }); + this._fragment(seg.text, curX, curY, subOpts); + curX += this.widthOfString(seg.text, options); + } + } + + if (wrapper) { + const lineGap = options.lineGap || this._lineGap || 0; + this.y += this.currentLineHeight(true) + lineGap; + } else { + this.x = curX; + } } else { - this.x += this.widthOfString(text, options); + // No emoji โ€” original path + this._fragment(text, this.x, this.y, options); + + if (wrapper) { + const lineGap = options.lineGap || this._lineGap || 0; + this.y += this.currentLineHeight(true) + lineGap; + } else { + this.x += this.widthOfString(text, options); + } } }, @@ -742,4 +802,589 @@ export default { // restore flipped coordinate system this.restore(); }, + + /** + * Render an emoji sequence using the appropriate renderer based on glyph type. + * Supports SBIX (bitmap), COLR/CPAL (vector), and CBDT/CBLC (bitmap) formats. + * Uses the emoji font's shaping to get the correct glyph for + * ZWJ sequences, then dispatches to the appropriate renderer. + * + * @param {string} text - The emoji text (single emoji sequence) + * @param {number} x - X position + * @param {number} y - Y position + */ + _emojiFragment(text, x, y) { + if (!this._emojiFont) return; + + const font = this._emojiFont; + const fontSize = this._fontSize; + + // Use font shaping to resolve ZWJ sequences, flags, etc. + let run; + try { + run = font.layout(text); + } catch { + // Some fonts (e.g. NotoColorEmoji CBDT) don't support layout() because + // fontkit cannot create glyph objects (no glyf/CFF table). + // Fall back to direct cmap lookup + CBDT extraction. + this._emojiFragmentFallback(text, x, y); + return; + } + + let curX = x; + for (let i = 0; i < run.glyphs.length; i++) { + const glyph = run.glyphs[i]; + const position = run.positions[i]; + const advancePt = (position.xAdvance / font.unitsPerEm) * fontSize; + const glyphX = curX + (position.xOffset / font.unitsPerEm) * fontSize; + + if (glyph.type === 'COLR' && glyph.layers) { + // COLR/CPAL vector emoji โ€” render colored layers as PDF paths + this._renderCOLREmoji(glyph, glyphX, y, fontSize, font); + } else { + // SBIX bitmap, CBDT bitmap, or COLR glyph with missing layers โ€” render as image XObject + const img = this._getEmojiImage(glyph, fontSize, font); + if (img) { + const imgW = advancePt; + const imgH = fontSize; + this._placeEmojiImage(img, glyphX, y, imgW, imgH); + } + } + // If no image/path (e.g. unsupported sequence), silently skip + + curX += advancePt; + } + }, + + /** + * Fallback emoji renderer for fonts where layout() fails (e.g. CBDT-only + * fonts like NotoColorEmoji where fontkit cannot create glyph objects). + * Uses cmap to resolve codepoints โ†’ glyph IDs, then extracts bitmaps + * directly from the CBDT table. + * + * @param {string} text - The emoji text (single emoji sequence) + * @param {number} x - X position + * @param {number} y - Y position + */ + _emojiFragmentFallback(text, x, y) { + const font = this._emojiFont; + const fontSize = this._fontSize; + const ppem = Math.round(fontSize); + + // Extract base emoji codepoints, filtering out joiners and modifiers + // that only make sense when processed by GSUB shaping (which we can't use) + const codepoints = []; + for (let i = 0; i < text.length; ) { + const cp = text.codePointAt(i); + i += cp > 0xffff ? 2 : 1; + if ( + cp === 0x200d || // ZWJ + cp === 0xfe0f || // VS16 + cp === 0xfe0e || // VS15 + cp === 0x20e3 || // Combining Enclosing Keycap + (cp >= 0x1f3fb && cp <= 0x1f3ff) || // Skin tone modifiers + (cp >= 0xe0020 && cp <= 0xe007f) // Tag characters (flag sub-sequences) + ) { + continue; + } + codepoints.push(cp); + } + + // Look up each base codepoint individually via cmap + let curX = x; + for (const cp of codepoints) { + let glyphId = null; + if (font._cmapProcessor) { + glyphId = font._cmapProcessor.lookup(cp); + } + if (!glyphId) continue; + + // Try to extract CBDT bitmap directly + const cbdtData = this._extractCBDTBitmap(font, glyphId, ppem); + if (cbdtData) { + const imgData = { type: 'png', data: cbdtData }; + + // Create PDFImage from the bitmap + const cacheKey = `${glyphId}:${ppem}`; + let image = this._emojiImageCache.get(cacheKey); + if (image === undefined) { + const label = `EI${++this._imageCount}`; + try { + image = PDFImage.open(imgData.data, label); + this._emojiImageCache.set(cacheKey, image); + } catch { + this._emojiImageCache.set(cacheKey, null); + image = null; + } + } + + if (image) { + this._placeEmojiImage(image, curX, y, fontSize, fontSize); + } + } + + curX += fontSize; + } + }, + + /** + * Render a COLR/CPAL vector emoji by iterating its colored layers and + * converting each layer's path to PDF content stream operators. + * + * @param {object} glyph - fontkit COLRGlyph object + * @param {number} x - X position (PDFKit coordinates) + * @param {number} y - Y position (top of text line, PDFKit coordinates) + * @param {number} fontSize - Font size in points + * @param {object} font - fontkit font object + */ + _renderCOLREmoji(glyph, x, y, fontSize, font) { + const scale = fontSize / font.unitsPerEm; + const ascent = font.ascent; + + // Save the current fill color so we can restore it after rendering + const savedFillColor = this._fillColor; + + if (!glyph.layers) return; + + for (const layer of glyph.layers) { + const { glyph: layerGlyph, color } = layer; + const path = layerGlyph.path; + + if (!path || !path.commands || path.commands.length === 0) continue; + + this.save(); + + // Set fill color from CPAL palette (0-255 values) + this.fillColor([color.red, color.green, color.blue]); + if (color.alpha < 255) { + this.fillOpacity(color.alpha / 255); + } + + // Track current point for quadraticโ†’cubic conversion + let cx = 0; + let cy = 0; + + for (const cmd of path.commands) { + switch (cmd.command) { + case 'moveTo': { + const [fx, fy] = cmd.args; + this.moveTo(x + fx * scale, y + (ascent - fy) * scale); + cx = fx; + cy = fy; + break; + } + case 'lineTo': { + const [fx, fy] = cmd.args; + this.lineTo(x + fx * scale, y + (ascent - fy) * scale); + cx = fx; + cy = fy; + break; + } + case 'quadraticCurveTo': { + // Convert quadratic bezier to cubic for PDF + // Q(p0, cp, p2) โ†’ C(p0, cp1, cp2, p2) + // cp1 = p0 + 2/3*(cp-p0), cp2 = p2 + 2/3*(cp-p2) + const [qpx, qpy, fx, fy] = cmd.args; + const cp1x = cx + (2 / 3) * (qpx - cx); + const cp1y = cy + (2 / 3) * (qpy - cy); + const cp2x = fx + (2 / 3) * (qpx - fx); + const cp2y = fy + (2 / 3) * (qpy - fy); + this.bezierCurveTo( + x + cp1x * scale, + y + (ascent - cp1y) * scale, + x + cp2x * scale, + y + (ascent - cp2y) * scale, + x + fx * scale, + y + (ascent - fy) * scale, + ); + cx = fx; + cy = fy; + break; + } + case 'bezierCurveTo': { + const [c1x, c1y, c2x, c2y, fx, fy] = cmd.args; + this.bezierCurveTo( + x + c1x * scale, + y + (ascent - c1y) * scale, + x + c2x * scale, + y + (ascent - c2y) * scale, + x + fx * scale, + y + (ascent - fy) * scale, + ); + cx = fx; + cy = fy; + break; + } + case 'closePath': + this.closePath(); + break; + } + } + + // Fill the path (no arguments โ€” color already set) + this.addContent('f'); + + this.restore(); + } + + // Restore the document's fill color. + // Always restore explicitly โ€” some PDF renderers (e.g., pdfjs-dist) may + // not fully restore the color space/state on Q (graphics state pop). + if (savedFillColor) { + this.fillColor(...savedFillColor); + } else { + // No previous fill color was set; reset to default black. + this.fillColor('black'); + } + }, + + /** + * Get or create a cached PDFImage for an emoji glyph at a given size. + * Supports SBIX glyphs (via getImageForSize) and CBDT glyphs (via manual + * table extraction). COLR glyphs should be handled by _renderCOLREmoji instead. + * + * @param {object} glyph - fontkit glyph object (SBIXGlyph or TTFGlyph for CBDT) + * @param {number} fontSize - Target font size in points + * @param {object} font - fontkit font object (needed for CBDT extraction) + * @returns {object|null} PDFImage object or null if no bitmap available + */ + _getEmojiImage(glyph, fontSize, font) { + const ppem = Math.round(fontSize); + + const cacheKey = `${glyph.id}:${ppem}`; + if (this._emojiImageCache.has(cacheKey)) { + return this._emojiImageCache.get(cacheKey); + } + + // Try SBIX first (glyph.getImageForSize) + let imgData; + if (typeof glyph.getImageForSize === 'function') { + try { + imgData = glyph.getImageForSize(ppem); + } catch (e) { + // Not an SBIX glyph or no image available + console.warn( + `Error getting SBIX image for glyph ${glyph.id}: ${e.message}`, + ); + } + } + + // Fallback: try CBDT/CBLC extraction if SBIX didn't work + if ((!imgData || !imgData.data || imgData.data.length === 0) && font) { + const cbdtData = this._extractCBDTBitmap(font, glyph.id, ppem); + if (cbdtData) { + imgData = { type: 'png', data: cbdtData }; + } + } + + if (!imgData?.data?.length) { + this._emojiImageCache.set(cacheKey, null); + return null; + } + + // The imgType has a trailing space (e.g. "png ") โ€” trim it + const imgType = (imgData.type || '').trim().toLowerCase(); + if (imgType !== 'png' && imgType !== 'jpg' && imgType !== 'jpeg') { + this._emojiImageCache.set(cacheKey, null); + return null; + } + + // Create a PDFImage from the raw bitmap buffer + const label = `EI${++this._imageCount}`; + let image; + try { + image = PDFImage.open(imgData.data, label); + } catch (e) { + console.warn( + `Error creating PDFImage for glyph ${glyph.id}: ${e.message}`, + ); + this._emojiImageCache.set(cacheKey, null); + return null; + } + + this._emojiImageCache.set(cacheKey, image); + return image; + }, + + /** + * Extract bitmap data from a CBDT/CBLC font for a specific glyph. + * fontkit does not have a CBDTGlyph class, so we manually navigate + * the CBLC index tables and read raw PNG data from the CBDT table. + * + * @param {object} font - fontkit font object + * @param {number} glyphId - Glyph ID to extract + * @param {number} ppem - Target pixels-per-em + * @returns {Buffer|null} PNG image data buffer, or null + */ + _extractCBDTBitmap(font, glyphId, ppem) { + if (!font.directory?.tables?.CBDT || !font.directory?.tables?.CBLC) { + return null; + } + + let cblc; + try { + cblc = font.CBLC; + } catch (e) { + console.warn(`Error parsing CBLC table: ${e?.message}`); + return null; + } + if (!cblc?.sizes?.length) return null; + + // Find the best strike for the target ppem that contains this glyph + let bestStrike = null; + let bestStrikeIdx = -1; + let bestDiff = Infinity; + for (let i = 0; i < cblc.sizes.length; i++) { + const size = cblc.sizes[i]; + if (glyphId < size.startGlyphIndex || glyphId > size.endGlyphIndex) + continue; + const diff = Math.abs(size.ppemX - ppem); + if (diff < bestDiff) { + bestDiff = diff; + bestStrike = size; + bestStrikeIdx = i; + } + } + if (!bestStrike) return null; + + // Try fontkit's parsed IndexSubtableArray entries first + // (fontkit only parses the first entry per BitmapSizeTable) + let imageFormat = null; + let glyphBitmapOffset = null; // offset from CBDT table start + + if (bestStrike.indexSubTableArray) { + for (const entry of bestStrike.indexSubTableArray) { + if ( + glyphId >= entry.firstGlyphIndex && + glyphId <= entry.lastGlyphIndex && + entry.subtable + ) { + const sub = entry.subtable; + const off = this._calcGlyphOffsetInCBDT( + sub, + entry.firstGlyphIndex, + glyphId, + ); + if (off !== null) { + glyphBitmapOffset = sub.imageDataOffset + off; + imageFormat = sub.imageFormat; + } + break; + } + } + } + + // If not found and there are more index subtables, manually parse from raw buffer + if ( + glyphBitmapOffset === null && + bestStrike.numberOfIndexSubTables > + (bestStrike.indexSubTableArray?.length || 0) + ) { + const result = this._manualCBLCLookup( + font, + bestStrikeIdx, + bestStrike.numberOfIndexSubTables, + glyphId, + ); + if (result) { + glyphBitmapOffset = result.offset; + imageFormat = result.imageFormat; + } + } + + if (glyphBitmapOffset === null) return null; + + // Read bitmap data from CBDT table + const cbdtStart = font.directory.tables.CBDT.offset; + const buf = font.stream.buffer; + const pos = cbdtStart + glyphBitmapOffset; + + try { + switch (imageFormat) { + case 17: { + // SmallMetrics (5 bytes) + uint32 dataLen + PNG data + const dataLen = buf.readUInt32BE(pos + 5); + return buf.slice(pos + 9, pos + 9 + dataLen); + } + case 18: { + // BigMetrics (8 bytes) + uint32 dataLen + PNG data + const dataLen = buf.readUInt32BE(pos + 8); + return buf.slice(pos + 12, pos + 12 + dataLen); + } + case 19: { + // uint32 dataLen + PNG data (no metrics) + const dataLen = buf.readUInt32BE(pos); + return buf.slice(pos + 4, pos + 4 + dataLen); + } + default: + return null; + } + } catch (e) { + console.warn( + `Error reading CBDT data for glyph ${glyphId}: ${e?.message}`, + ); + return null; + } + }, + + /** + * Calculate a glyph's offset within CBDT relative to imageDataOffset, + * based on the IndexSubtable version and data. + * + * @param {object} subtable - Parsed IndexSubtable from fontkit + * @param {number} firstGlyphIndex - First glyph index in the range + * @param {number} glyphId - Target glyph ID + * @returns {number|null} Offset relative to imageDataOffset, or null + */ + _calcGlyphOffsetInCBDT(subtable, firstGlyphIndex, glyphId) { + const idx = glyphId - firstGlyphIndex; + switch (subtable.version) { + case 1: // uint32 offset array + return subtable.offsetArray?.[idx] ?? null; + case 2: // fixed-size images + return idx * subtable.imageSize; + case 3: // uint16 offset array + return subtable.offsetArray?.[idx] ?? null; + case 4: { + // sparse glyphCode/offset pairs + if (!subtable.glyphArray) return null; + for (const pair of subtable.glyphArray) { + if (pair.glyphCode === glyphId) return pair.offset; + } + return null; + } + case 5: { + // fixed-size with explicit glyph list + if (!subtable.glyphCodeArray) return null; + const i = subtable.glyphCodeArray.indexOf(glyphId); + return i >= 0 ? i * subtable.imageSize : null; + } + default: + return null; + } + }, + + /** + * Manually read IndexSubtableArray entries from raw CBLC data when + * fontkit's parser (which only reads 1 entry) doesn't cover the glyph. + * + * @param {object} font - fontkit font object + * @param {number} strikeIdx - Index of the BitmapSizeTable in CBLC.sizes + * @param {number} numSubTables - Total number of IndexSubtableArray entries + * @param {number} glyphId - Target glyph ID + * @returns {{offset: number, imageFormat: number}|null} + */ + _manualCBLCLookup(font, strikeIdx, numSubTables, glyphId) { + const buf = font.stream.buffer; + const cblcStart = font.directory.tables.CBLC.offset; + + // BitmapSizeTable is 48 bytes; array starts at cblcStart + 8 + const bstOffset = cblcStart + 8 + strikeIdx * 48; + // First uint32 in BitmapSizeTable is indexSubTableArrayOffset (from cblcStart) + const istArrayOffset = buf.readUInt32BE(bstOffset); + const istArrayStart = cblcStart + istArrayOffset; + + try { + for (let i = 0; i < numSubTables; i++) { + const entryPos = istArrayStart + i * 8; + const firstGlyph = buf.readUInt16BE(entryPos); + const lastGlyph = buf.readUInt16BE(entryPos + 2); + const additionalOffset = buf.readUInt32BE(entryPos + 4); + + if (glyphId < firstGlyph || glyphId > lastGlyph) continue; + + // Parse IndexSubtable header at istArrayStart + additionalOffset + const subStart = istArrayStart + additionalOffset; + const indexFormat = buf.readUInt16BE(subStart); + const imageFormat = buf.readUInt16BE(subStart + 2); + const imageDataOffset = buf.readUInt32BE(subStart + 4); + + const idx = glyphId - firstGlyph; + let relOffset = null; + + switch (indexFormat) { + case 1: // uint32 offset array after 8-byte header + relOffset = buf.readUInt32BE(subStart + 8 + idx * 4); + break; + case 2: { + // fixed-size after 8-byte header + const imageSize = buf.readUInt32BE(subStart + 8); + relOffset = idx * imageSize; + break; + } + case 3: // uint16 offset array after 8-byte header + relOffset = buf.readUInt16BE(subStart + 8 + idx * 2); + break; + case 4: { + // sparse: numGlyphs (uint32) at +8, then CodeOffsetPair array + const numGlyphs = buf.readUInt32BE(subStart + 8); + for (let j = 0; j <= numGlyphs; j++) { + const pairPos = subStart + 12 + j * 4; + if (buf.readUInt16BE(pairPos) === glyphId) { + relOffset = buf.readUInt16BE(pairPos + 2); + break; + } + } + break; + } + case 5: { + // fixed-size with explicit glyph list + const imageSize = buf.readUInt32BE(subStart + 8); + // BigMetrics (8 bytes) at +12, numGlyphs at +20, glyphCodeArray at +24 + const numGlyphs = buf.readUInt32BE(subStart + 20); + for (let j = 0; j < numGlyphs; j++) { + if (buf.readUInt16BE(subStart + 24 + j * 2) === glyphId) { + relOffset = j * imageSize; + break; + } + } + break; + } + } + + if (relOffset !== null) { + return { offset: imageDataOffset + relOffset, imageFormat }; + } + } + } catch (e) { + console.warn( + `Error manually parsing CBLC/IndexSubtableArray: ${e?.message}`, + ); + return null; + } + + return null; + }, + + /** + * Place an emoji image XObject at the specified position and size. + * Handles the coordinate system flip that PDFKit applies. + * + * @param {object} image - PDFImage object + * @param {number} x - X position (PDFKit coordinates, top-left origin) + * @param {number} y - Y position (PDFKit coordinates, top-left origin) + * @param {number} w - Width in points + * @param {number} h - Height in points + */ + _placeEmojiImage(image, x, y, w, h) { + // Ensure the image is embedded + if (!image.obj) { + image.embed(this); + } + + // Register the XObject on the current page + if (this.page.xobjects[image.label] == null) { + this.page.xobjects[image.label] = image.obj; + } + + // PDFKit has a flipped coordinate system (origin top-left). + // The default page transform is (1, 0, 0, -1, 0, pageHeight). + // To place an image, we need to flip it back: + // y_pdf = pageHeight - y_pdkit + // And images are drawn bottom-up, so we add h to y. + this.save(); + this.transform(w, 0, 0, -h, x, y + h); + this.addContent(`/${image.label} Do`); + this.restore(); + }, }; diff --git a/scripts/probe-emoji-fonts.mjs b/scripts/probe-emoji-fonts.mjs new file mode 100644 index 000000000..44d92fa98 --- /dev/null +++ b/scripts/probe-emoji-fonts.mjs @@ -0,0 +1,135 @@ +/** + * Phase 1 โ€“ fontkit color-emoji API probe + * + * Usage: node scripts/probe-emoji-fonts.mjs + * + * Tests SBIX (Apple Color Emoji) and COLR/CPAL APIs exposed by fontkit 2.x. + */ + +import * as fontkit from 'fontkit'; +import fs from 'fs'; + +const TEST_EMOJI = [ + { label: 'grinning face', cp: 0x1F600 }, + { label: 'thumbs up', cp: 0x1F44D }, + { label: 'red heart', cp: 0x2764 }, + { label: 'flag: US (ZWJ)', cp: 0x1F1FA }, // first cp of ๐Ÿ‡บ๐Ÿ‡ธ +]; + +function probeFont(path, family) { + console.log(`\n${'='.repeat(60)}`); + console.log(`Font: ${path}${family ? ` [${family}]` : ''}`); + console.log('='.repeat(60)); + + let font; + try { + const buf = fs.readFileSync(path); + const top = fontkit.create(buf); + + // For TTC collections, enumerate fonts then pick the right one + if (top.type === 'TTC') { + const psNames = top.fonts.map(f => f.postscriptName); + console.log(` TTC fonts (postscriptName): ${JSON.stringify(psNames)}`); + // Pick by exact family match or first that includes the family string + const match = family + ? (psNames.find(n => n === family) || + psNames.find(n => n && n.toLowerCase().includes(family.toLowerCase().replace(/\s/g, '')))) + : psNames[0]; + if (!match) { + console.error(` ERROR: no font matching "${family}" in TTC`); + return; + } + console.log(` Using postscriptName: ${match}`); + font = top.getFont(match); + } else { + font = top; + } + } catch (e) { + console.error(' ERROR loading font:', e.message); + return; + } + + console.log(` postscriptName : ${font.postscriptName}`); + console.log(` unitsPerEm : ${font.unitsPerEm}`); + console.log(` ascent : ${font.ascent}`); + console.log(` descent : ${font.descent}`); + + // Detect color table presence + const dir = font.directory.tables; + console.log(`\n Table presence:`); + console.log(` sbix : ${!!dir.sbix}`); + console.log(` COLR : ${!!dir.COLR}`); + console.log(` CPAL : ${!!dir.CPAL}`); + console.log(` CBDT : ${!!dir.CBDT}`); + console.log(` CBLC : ${!!dir.CBLC}`); + console.log(` SVG : ${!!(dir['SVG '] || dir.SVG)}`); + + console.log(`\n Per-emoji glyph audit:`); + for (const { label, cp } of TEST_EMOJI) { + const has = font.hasGlyphForCodePoint(cp); + process.stdout.write(` U+${cp.toString(16).toUpperCase().padStart(4,'0')} ${label}: has=${has}`); + if (!has) { console.log(); continue; } + + const glyph = font.glyphForCodePoint(cp); + if (!glyph) { + console.log(' glyph=null'); + continue; + } + process.stdout.write(` type=${glyph.type} id=${glyph.id} advW=${glyph.advanceWidth}`); + + if (glyph.type === 'SBIX') { + const img = glyph.getImageForSize(64); + if (img) { + process.stdout.write(` imgType=${JSON.stringify(img.type)} dataLen=${img.data.length} origin=(${img.originX},${img.originY})`); + } else { + process.stdout.write(' img=null'); + } + } + + if (glyph.type === 'COLR') { + const layers = glyph.layers; + process.stdout.write(` layers=${layers.length}`); + for (const { glyph: lg, color } of layers.slice(0, 2)) { + const cmds = lg.path.commands.length; + process.stdout.write(` [pathCmds=${cmds} rgba=(${color.red},${color.green},${color.blue},${color.alpha})]`); + } + } + + console.log(); + } + + // Probe SBIX sizes if table exists + if (dir.sbix) { + try { + const sbix = font.sbix; + const ppems = sbix.imageTables.map(t => t.ppem).join(', '); + console.log(`\n SBIX available ppem sizes: [${ppems}]`); + } catch(e) { + console.log(`\n SBIX read error: ${e.message}`); + } + } + + // Probe COLR version if table exists + if (dir.COLR) { + try { + const colr = font.COLR; + console.log(`\n COLR version: ${colr.version} baseGlyphRecords: ${colr.numBaseGlyphRecords}`); + } catch(e) { + console.log(`\n COLR read error: ${e.message}`); + } + } +} + +// โ”€โ”€ Probe fonts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +probeFont('/System/Library/Fonts/Apple Color Emoji.ttc', 'Apple Color Emoji'); + +// If a Noto Color Emoji is present locally or was downloaded +const notoPath = 'tests/fonts/NotoColorEmoji.ttf'; +if (fs.existsSync(notoPath)) { + probeFont(notoPath); +} else { + console.log(`\nNoto Color Emoji not found at ${notoPath} โ€“ skipping CBDT probe.`); + console.log('Download from: https://github.com/googlefonts/noto-emoji/releases'); +} + diff --git a/tests/fonts/NotoColorEmoji.ttf b/tests/fonts/NotoColorEmoji.ttf new file mode 100644 index 000000000..943741df1 Binary files /dev/null and b/tests/fonts/NotoColorEmoji.ttf differ diff --git a/tests/fonts/Twemoji.Mozilla.ttf b/tests/fonts/Twemoji.Mozilla.ttf new file mode 100644 index 000000000..9f45178e9 Binary files /dev/null and b/tests/fonts/Twemoji.Mozilla.ttf differ diff --git a/tests/unit/emoji.spec.js b/tests/unit/emoji.spec.js new file mode 100644 index 000000000..469c53d6b --- /dev/null +++ b/tests/unit/emoji.spec.js @@ -0,0 +1,169 @@ +import { + segmentEmojiText, + isEmojiCodePoint, + codePoints, +} from '../../lib/emoji/segmenter'; + +describe('Emoji Segmenter', () => { + describe('segmentEmojiText', () => { + test('empty string returns empty array', () => { + expect(segmentEmojiText('')).toEqual([]); + }); + + test('null/undefined returns empty array', () => { + expect(segmentEmojiText(null)).toEqual([]); + expect(segmentEmojiText(undefined)).toEqual([]); + }); + + test('plain text only returns single text segment', () => { + const result = segmentEmojiText('Hello World'); + expect(result).toEqual([{ type: 'text', text: 'Hello World' }]); + }); + + test('text with no emoji returns single text segment matching input', () => { + const input = 'The quick brown fox jumps over the lazy dog.'; + const result = segmentEmojiText(input); + expect(result).toHaveLength(1); + expect(result[0]).toEqual({ type: 'text', text: input }); + }); + + test('emoji only returns single emoji segment', () => { + const result = segmentEmojiText('๐Ÿ˜€'); + expect(result).toEqual([{ type: 'emoji', text: '๐Ÿ˜€' }]); + }); + + test('mixed text and emoji returns alternating segments', () => { + const result = segmentEmojiText('Hello ๐Ÿ˜€ World'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ type: 'text', text: 'Hello ' }); + expect(result[1]).toEqual({ type: 'emoji', text: '๐Ÿ˜€' }); + expect(result[2]).toEqual({ type: 'text', text: ' World' }); + }); + + test('ZWJ sequences grouped as single emoji segment', () => { + // Family: man, woman, girl, boy (ZWJ sequence) + const family = '๐Ÿ‘จ\u200D๐Ÿ‘ฉ\u200D๐Ÿ‘ง\u200D๐Ÿ‘ฆ'; + const result = segmentEmojiText(family); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(family); + }); + + test('flag emoji (regional indicator pairs) as single segment', () => { + // US flag: regional indicator U + regional indicator S + const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; + const result = segmentEmojiText(usFlag); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(usFlag); + }); + + test('skin tone modifiers grouped with base emoji', () => { + // Waving hand + medium skin tone + const wave = '๐Ÿ‘‹๐Ÿฝ'; + const result = segmentEmojiText(wave); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(wave); + }); + + test('keycap sequences (digit + VS16 + keycap) as emoji', () => { + // 1๏ธโƒฃ = "1" + VS16 + Combining Enclosing Keycap + const keycap = '1\uFE0F\u20E3'; + const result = segmentEmojiText(keycap); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(keycap); + }); + + test('variation selector 16 (heart with VS16)', () => { + // โค๏ธ = โค (U+2764) + VS16 (U+FE0F) + const heart = '\u2764\uFE0F'; + const result = segmentEmojiText('I ' + heart + ' PDFKit'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ type: 'text', text: 'I ' }); + expect(result[1].type).toBe('emoji'); + expect(result[1].text).toBe(heart); + expect(result[2]).toEqual({ type: 'text', text: ' PDFKit' }); + }); + + test('multiple emoji in a row form consecutive emoji segments', () => { + const result = segmentEmojiText('๐ŸŽ๐ŸŠ๐Ÿ‹'); + // All consecutive emoji should be in emoji segment(s) + for (const seg of result) { + expect(seg.type).toBe('emoji'); + } + const combined = result.map((s) => s.text).join(''); + expect(combined).toBe('๐ŸŽ๐ŸŠ๐Ÿ‹'); + }); + + test('multiple separate emoji with text between them', () => { + const result = segmentEmojiText('Hello ๐Ÿ˜€ World ๐ŸŽ‰ Test ๐Ÿš€'); + expect(result.length).toBeGreaterThanOrEqual(5); + expect(result[0]).toEqual({ type: 'text', text: 'Hello ' }); + expect(result[1].type).toBe('emoji'); + expect(result[2].type).toBe('text'); + expect(result[3].type).toBe('emoji'); + expect(result[4].type).toBe('text'); + }); + }); + + describe('isEmojiCodePoint', () => { + test('returns true for common emoji code points', () => { + // ๐Ÿ˜€ = U+1F600 + expect(isEmojiCodePoint(0x1f600)).toBe(true); + // ๐ŸŽ‰ = U+1F389 + expect(isEmojiCodePoint(0x1f389)).toBe(true); + // ๐Ÿš€ = U+1F680 + expect(isEmojiCodePoint(0x1f680)).toBe(true); + // โค = U+2764 + expect(isEmojiCodePoint(0x2764)).toBe(true); + // โ˜€ = U+2600 + expect(isEmojiCodePoint(0x2600)).toBe(true); + }); + + test('returns false for ASCII letters and digits', () => { + // 'A' = 0x41 + expect(isEmojiCodePoint(0x41)).toBe(false); + // 'z' = 0x7A + expect(isEmojiCodePoint(0x7a)).toBe(false); + // '0' = 0x30 + expect(isEmojiCodePoint(0x30)).toBe(false); + // '9' = 0x39 + expect(isEmojiCodePoint(0x39)).toBe(false); + // space = 0x20 + expect(isEmojiCodePoint(0x20)).toBe(false); + }); + }); + + describe('codePoints', () => { + test('correctly handles surrogate pairs (emoji > U+FFFF)', () => { + // ๐Ÿ˜€ = U+1F600, encoded as surrogate pair in UTF-16 + const result = codePoints('๐Ÿ˜€'); + expect(result).toHaveLength(1); + expect(result[0].cp).toBe(0x1f600); + expect(result[0].index).toBe(0); + expect(result[0].length).toBe(2); // surrogate pair = 2 UTF-16 code units + }); + + test('handles basic ASCII', () => { + const result = codePoints('ABC'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ cp: 0x41, index: 0, length: 1 }); + expect(result[1]).toEqual({ cp: 0x42, index: 1, length: 1 }); + expect(result[2]).toEqual({ cp: 0x43, index: 2, length: 1 }); + }); + + test('handles mixed ASCII and emoji', () => { + const result = codePoints('A๐Ÿ˜€B'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ cp: 0x41, index: 0, length: 1 }); + expect(result[1]).toEqual({ cp: 0x1f600, index: 1, length: 2 }); + expect(result[2]).toEqual({ cp: 0x42, index: 3, length: 1 }); + }); + + test('empty string returns empty array', () => { + expect(codePoints('')).toEqual([]); + }); + }); +}); diff --git a/tests/unit/text.spec.js b/tests/unit/text.spec.js index 8d6cc7e71..f58e080d8 100644 --- a/tests/unit/text.spec.js +++ b/tests/unit/text.spec.js @@ -1,5 +1,28 @@ import PDFDocument from '../../lib/document'; import { logData } from './helpers'; +import fs from 'fs'; +import path from 'path'; +import fontkit from 'fontkit'; + +/** + * Create a mock fontkit font object for emoji testing. + */ +function createMockFont(overrides = {}) { + return { + unitsPerEm: 2048, + ascent: 1900, + layout() { + return { + glyphs: overrides.glyphs || [], + positions: overrides.positions || [], + }; + }, + CBLC: overrides.CBLC || null, + directory: overrides.directory || { tables: {} }, + stream: overrides.stream || { buffer: Buffer.alloc(0) }, + ...overrides, + }; +} describe('Text', () => { let document; @@ -279,4 +302,933 @@ Q expect(dataStr).toContain('/S /Link'); }); }); + + describe('_calcGlyphOffsetInCBDT', () => { + test('version 1: uint32 offset array lookup', () => { + const subtable = { version: 1, offsetArray: [0, 100, 200, 300] }; + // glyphId=12, firstGlyphIndex=10 โ†’ idx=2 โ†’ offsetArray[2] = 200 + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 12)).toBe(200); + }); + + test('version 1: returns null for out-of-range index', () => { + const subtable = { version: 1, offsetArray: [0, 100] }; + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 20)).toBeNull(); + }); + + test('version 2: fixed-size calculation', () => { + const subtable = { version: 2, imageSize: 256 }; + // glyphId=15, firstGlyphIndex=10 โ†’ idx=5 โ†’ 5*256 = 1280 + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 15)).toBe(1280); + }); + + test('version 3: uint16 offset array lookup', () => { + const subtable = { version: 3, offsetArray: [0, 50, 100, 150] }; + expect(document._calcGlyphOffsetInCBDT(subtable, 5, 7)).toBe(100); + }); + + test('version 4: sparse glyphCode/offset pairs', () => { + const subtable = { + version: 4, + glyphArray: [ + { glyphCode: 10, offset: 0 }, + { glyphCode: 20, offset: 500 }, + { glyphCode: 30, offset: 1000 }, + ], + }; + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 20)).toBe(500); + }); + + test('version 4: returns null for missing glyph', () => { + const subtable = { + version: 4, + glyphArray: [{ glyphCode: 10, offset: 0 }], + }; + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 99)).toBeNull(); + }); + + test('version 5: fixed-size with glyph code array', () => { + const subtable = { + version: 5, + imageSize: 128, + glyphCodeArray: [10, 20, 30, 40], + }; + // glyphId=30 is at index 2 โ†’ 2*128 = 256 + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 30)).toBe(256); + }); + + test('version 5: returns null for missing glyph', () => { + const subtable = { + version: 5, + imageSize: 128, + glyphCodeArray: [10, 20, 30], + }; + expect(document._calcGlyphOffsetInCBDT(subtable, 10, 99)).toBeNull(); + }); + + test('unknown version returns null', () => { + const subtable = { version: 99 }; + expect(document._calcGlyphOffsetInCBDT(subtable, 0, 0)).toBeNull(); + }); + }); + + describe('_extractCBDTBitmap', () => { + test('returns null when CBDT table is missing', () => { + const font = createMockFont({ directory: { tables: {} } }); + expect(document._extractCBDTBitmap(font, 10, 20)).toBeNull(); + }); + + test('returns null when CBLC table is missing', () => { + const font = createMockFont({ + directory: { tables: { CBDT: { offset: 0, length: 100 } } }, + }); + expect(document._extractCBDTBitmap(font, 10, 20)).toBeNull(); + }); + + test('returns null when CBLC has no sizes', () => { + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: 0, length: 100 }, + CBLC: { offset: 0, length: 100 }, + }, + }, + CBLC: { sizes: [] }, + }); + expect(document._extractCBDTBitmap(font, 10, 20)).toBeNull(); + }); + + test('returns null when glyph is outside all strike ranges', () => { + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: 0, length: 100 }, + CBLC: { offset: 0, length: 100 }, + }, + }, + CBLC: { + sizes: [ + { + ppemX: 20, + startGlyphIndex: 100, + endGlyphIndex: 200, + numberOfIndexSubTables: 0, + indexSubTableArray: [], + }, + ], + }, + }); + // glyphId=50 is outside [100,200] + expect(document._extractCBDTBitmap(font, 50, 20)).toBeNull(); + }); + + test('reads format 17 (SmallMetrics) bitmap correctly', () => { + // Build a minimal CBDT buffer with format 17: + // SmallMetrics (5 bytes) + uint32 dataLen + PNG data + const pngData = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0xaa, 0xbb]); + const cbdtOffset = 100; + const imageDataOffset = 0; // relative to CBDT + const glyphOffset = 0; + + // Build buffer: [5 bytes metrics][4 bytes dataLen][png data] + const buf = Buffer.alloc(cbdtOffset + 9 + pngData.length); + // SmallMetrics at cbdtOffset + 0 (5 bytes, contents don't matter for this test) + buf.writeUInt32BE(pngData.length, cbdtOffset + 5); // dataLen + pngData.copy(buf, cbdtOffset + 9); + + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: cbdtOffset, length: buf.length - cbdtOffset }, + CBLC: { offset: 0, length: 100 }, + }, + }, + stream: { buffer: buf }, + CBLC: { + sizes: [ + { + ppemX: 20, + startGlyphIndex: 10, + endGlyphIndex: 50, + numberOfIndexSubTables: 1, + indexSubTableArray: [ + { + firstGlyphIndex: 10, + lastGlyphIndex: 50, + subtable: { + version: 1, + imageFormat: 17, + imageDataOffset: imageDataOffset, + offsetArray: [glyphOffset], + }, + }, + ], + }, + ], + }, + }); + + const result = document._extractCBDTBitmap(font, 10, 20); + expect(result).not.toBeNull(); + expect(Buffer.compare(result, pngData)).toBe(0); + }); + + test('reads format 18 (BigMetrics) bitmap correctly', () => { + const pngData = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0xcc]); + const cbdtOffset = 50; + + const buf = Buffer.alloc(cbdtOffset + 12 + pngData.length); + // BigMetrics (8 bytes) + uint32 dataLen + buf.writeUInt32BE(pngData.length, cbdtOffset + 8); + pngData.copy(buf, cbdtOffset + 12); + + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: cbdtOffset, length: buf.length - cbdtOffset }, + CBLC: { offset: 0, length: 100 }, + }, + }, + stream: { buffer: buf }, + CBLC: { + sizes: [ + { + ppemX: 20, + startGlyphIndex: 10, + endGlyphIndex: 50, + numberOfIndexSubTables: 1, + indexSubTableArray: [ + { + firstGlyphIndex: 10, + lastGlyphIndex: 50, + subtable: { + version: 2, + imageFormat: 18, + imageDataOffset: 0, + imageSize: 12 + pngData.length, + }, + }, + ], + }, + ], + }, + }); + + const result = document._extractCBDTBitmap(font, 10, 20); + expect(result).not.toBeNull(); + expect(Buffer.compare(result, pngData)).toBe(0); + }); + + test('reads format 19 (no metrics) bitmap correctly', () => { + const pngData = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0xdd, 0xee]); + const cbdtOffset = 30; + + const buf = Buffer.alloc(cbdtOffset + 4 + pngData.length); + // Format 19: uint32 dataLen + PNG data (no metrics) + buf.writeUInt32BE(pngData.length, cbdtOffset); + pngData.copy(buf, cbdtOffset + 4); + + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: cbdtOffset, length: buf.length - cbdtOffset }, + CBLC: { offset: 0, length: 100 }, + }, + }, + stream: { buffer: buf }, + CBLC: { + sizes: [ + { + ppemX: 20, + startGlyphIndex: 5, + endGlyphIndex: 50, + numberOfIndexSubTables: 1, + indexSubTableArray: [ + { + firstGlyphIndex: 5, + lastGlyphIndex: 50, + subtable: { + version: 1, + imageFormat: 19, + imageDataOffset: 0, + offsetArray: [0], + }, + }, + ], + }, + ], + }, + }); + + const result = document._extractCBDTBitmap(font, 5, 20); + expect(result).not.toBeNull(); + expect(Buffer.compare(result, pngData)).toBe(0); + }); + + test('selects closest ppem strike', () => { + const pngData = Buffer.from([0x89, 0x50, 0x4e, 0x47]); + const cbdtOffset = 10; + + const buf = Buffer.alloc(cbdtOffset + 9 + pngData.length); + buf.writeUInt32BE(pngData.length, cbdtOffset + 5); + pngData.copy(buf, cbdtOffset + 9); + + const makeStrike = (ppem) => ({ + ppemX: ppem, + startGlyphIndex: 1, + endGlyphIndex: 100, + numberOfIndexSubTables: 1, + indexSubTableArray: [ + { + firstGlyphIndex: 1, + lastGlyphIndex: 100, + subtable: { + version: 1, + imageFormat: 17, + imageDataOffset: 0, + offsetArray: [0], + }, + }, + ], + }); + + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: cbdtOffset, length: buf.length - cbdtOffset }, + CBLC: { offset: 0, length: 100 }, + }, + }, + stream: { buffer: buf }, + CBLC: { + sizes: [makeStrike(20), makeStrike(40), makeStrike(96)], + }, + }); + + // Target ppem=38 should select strike with ppemX=40 (closest) + const result = document._extractCBDTBitmap(font, 1, 38); + expect(result).not.toBeNull(); + }); + + test('returns null for unsupported image format', () => { + const cbdtOffset = 10; + const buf = Buffer.alloc(cbdtOffset + 100); + + const font = createMockFont({ + directory: { + tables: { + CBDT: { offset: cbdtOffset, length: 100 }, + CBLC: { offset: 0, length: 100 }, + }, + }, + stream: { buffer: buf }, + CBLC: { + sizes: [ + { + ppemX: 20, + startGlyphIndex: 1, + endGlyphIndex: 100, + numberOfIndexSubTables: 1, + indexSubTableArray: [ + { + firstGlyphIndex: 1, + lastGlyphIndex: 100, + subtable: { + version: 1, + imageFormat: 99, + imageDataOffset: 0, + offsetArray: [0], + }, + }, + ], + }, + ], + }, + }); + + const result = document._extractCBDTBitmap(font, 1, 20); + expect(result).toBeNull(); + }); + }); + + describe('_manualCBLCLookup', () => { + test('finds glyph in manually-parsed index subtable (format 1)', () => { + // Build a minimal CBLC buffer with one BitmapSizeTable and one IndexSubtableArray entry + const cblcOffset = 0; + // Header (8 bytes) + BitmapSizeTable (48 bytes) + const totalSize = 8 + 48 + 8 + 8 + 8; // header + BST + IST array entry + subtable header + offset array + const buf = Buffer.alloc(cblcOffset + totalSize + 100); + + // BitmapSizeTable at offset 8, first uint32 = indexSubTableArrayOffset + const istArrayOffset = 56; // 8 + 48 = after BST + buf.writeUInt32BE(istArrayOffset, cblcOffset + 8); + + // IndexSubtableArray entry at istArrayOffset: + // firstGlyph (uint16), lastGlyph (uint16), additionalOffset (uint32) + const entryPos = cblcOffset + istArrayOffset; + buf.writeUInt16BE(10, entryPos); // firstGlyph + buf.writeUInt16BE(20, entryPos + 2); // lastGlyph + buf.writeUInt32BE(8, entryPos + 4); // additionalOffset (relative to istArrayStart) + + // IndexSubtable header at istArrayStart + additionalOffset: + const subStart = cblcOffset + istArrayOffset + 8; + buf.writeUInt16BE(1, subStart); // indexFormat = 1 + buf.writeUInt16BE(17, subStart + 2); // imageFormat = 17 + buf.writeUInt32BE(500, subStart + 4); // imageDataOffset + + // Offset array: uint32 per glyph (for glyphId=15, idx=5) + buf.writeUInt32BE(100, subStart + 8 + 5 * 4); // offset for glyph 15 + + const font = { + stream: { buffer: buf }, + directory: { tables: { CBLC: { offset: cblcOffset } } }, + }; + + const result = document._manualCBLCLookup(font, 0, 1, 15); + expect(result).not.toBeNull(); + expect(result.offset).toBe(500 + 100); // imageDataOffset + relOffset + expect(result.imageFormat).toBe(17); + }); + + test('returns null when glyph is not found in any subtable', () => { + const cblcOffset = 0; + const buf = Buffer.alloc(200); + + const istArrayOffset = 56; + buf.writeUInt32BE(istArrayOffset, cblcOffset + 8); + + const entryPos = cblcOffset + istArrayOffset; + buf.writeUInt16BE(10, entryPos); // firstGlyph + buf.writeUInt16BE(20, entryPos + 2); // lastGlyph + buf.writeUInt32BE(8, entryPos + 4); + + const subStart = cblcOffset + istArrayOffset + 8; + buf.writeUInt16BE(1, subStart); + buf.writeUInt16BE(17, subStart + 2); + buf.writeUInt32BE(0, subStart + 4); + + const font = { + stream: { buffer: buf }, + directory: { tables: { CBLC: { offset: cblcOffset } } }, + }; + + // glyphId=99 is outside [10,20] + const result = document._manualCBLCLookup(font, 0, 1, 99); + expect(result).toBeNull(); + }); + + test('handles format 2 (fixed-size images)', () => { + const cblcOffset = 0; + const buf = Buffer.alloc(200); + + const istArrayOffset = 56; + buf.writeUInt32BE(istArrayOffset, cblcOffset + 8); + + const entryPos = cblcOffset + istArrayOffset; + buf.writeUInt16BE(5, entryPos); + buf.writeUInt16BE(15, entryPos + 2); + buf.writeUInt32BE(8, entryPos + 4); + + const subStart = cblcOffset + istArrayOffset + 8; + buf.writeUInt16BE(2, subStart); // indexFormat = 2 + buf.writeUInt16BE(18, subStart + 2); // imageFormat = 18 + buf.writeUInt32BE(1000, subStart + 4); // imageDataOffset + buf.writeUInt32BE(256, subStart + 8); // imageSize + + const font = { + stream: { buffer: buf }, + directory: { tables: { CBLC: { offset: cblcOffset } } }, + }; + + // glyphId=8, firstGlyph=5 โ†’ idx=3 โ†’ offset = 3*256 = 768 + const result = document._manualCBLCLookup(font, 0, 1, 8); + expect(result).not.toBeNull(); + expect(result.offset).toBe(1000 + 768); + expect(result.imageFormat).toBe(18); + }); + }); + + describe('_getEmojiImage', () => { + test('returns null when SBIX has no image and no CBDT tables', () => { + const glyph = { + id: 42, + getImageForSize: jest.fn().mockReturnValue(null), + }; + const font = createMockFont(); + + const result = document._getEmojiImage(glyph, 20, font); + expect(result).toBeNull(); + }); + + test('returns cached result on second call', () => { + const glyph = { + id: 42, + getImageForSize: jest.fn().mockReturnValue(null), + }; + const font = createMockFont(); + + document._getEmojiImage(glyph, 20, font); + const result = document._getEmojiImage(glyph, 20, font); + // getImageForSize should only be called once (second call uses cache) + expect(glyph.getImageForSize).toHaveBeenCalledTimes(1); + expect(result).toBeNull(); + }); + + test('handles getImageForSize throwing errors gracefully', () => { + const glyph = { + id: 50, + getImageForSize: jest.fn().mockImplementation(() => { + throw new Error('No sbix data'); + }), + }; + const font = createMockFont(); + + const result = document._getEmojiImage(glyph, 20, font); + expect(result).toBeNull(); + }); + + test('returns null for unsupported image types', () => { + const glyph = { + id: 60, + getImageForSize: jest.fn().mockReturnValue({ + type: 'tiff', + data: Buffer.from([0x00, 0x01]), + }), + }; + const font = createMockFont(); + + const result = document._getEmojiImage(glyph, 20, font); + expect(result).toBeNull(); + }); + + test('trims trailing space from image type', () => { + // SBIX type field often has trailing space: "png " โ†’ should be treated as "png" + const pngHeader = Buffer.from([ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, + ]); + // Create a minimal valid PNG buffer (just header + IHDR will fail but + // PDFImage.open handles it). We test that the type trimming works. + const glyph = { + id: 70, + getImageForSize: jest.fn().mockReturnValue({ + type: 'png ', + data: pngHeader, + }), + }; + const font = createMockFont(); + + // This may throw because pngHeader isn't a valid full PNG, + // but the type check should pass (it won't return null for unsupported type) + document._getEmojiImage(glyph, 20, font); + // Either returns an image or null due to invalid PNG data โ€” but NOT null due to type check + // The key assertion is that "png " is accepted as a valid type + expect(glyph.getImageForSize).toHaveBeenCalledWith(20); + }); + }); + + describe('_emojiFragment', () => { + test('returns early when no emoji font is set', () => { + document._emojiFont = null; + // Should not throw + expect(() => { + document._emojiFragment('๐Ÿ˜€', 0, 0, {}); + }).not.toThrow(); + }); + + test('routes COLR glyphs to _renderCOLREmoji', () => { + const mockGlyph = { id: 1, type: 'COLR', layers: [] }; + document._emojiFont = createMockFont({ + glyphs: [mockGlyph], + positions: [{ xAdvance: 2048, xOffset: 0 }], + }); + document._fontSize = 16; + + const spy = jest + .spyOn(document, '_renderCOLREmoji') + .mockImplementation(() => {}); + + document._emojiFragment('๐Ÿ˜€', 100, 200, {}); + expect(spy).toHaveBeenCalledWith( + mockGlyph, + 100, + 200, + 16, + document._emojiFont, + ); + + spy.mockRestore(); + }); + + test('routes non-COLR glyphs to _getEmojiImage and _placeEmojiImage', () => { + const mockGlyph = { id: 1, type: 'SBIX' }; + document._emojiFont = createMockFont({ + glyphs: [mockGlyph], + positions: [{ xAdvance: 2048, xOffset: 0 }], + }); + document._fontSize = 16; + + const mockImage = { label: 'EI1', obj: {} }; + const imgSpy = jest + .spyOn(document, '_getEmojiImage') + .mockReturnValue(mockImage); + const placeSpy = jest + .spyOn(document, '_placeEmojiImage') + .mockImplementation(() => {}); + + document._emojiFragment('๐Ÿ˜€', 100, 200, {}); + + expect(imgSpy).toHaveBeenCalledWith(mockGlyph, 16, document._emojiFont); + expect(placeSpy).toHaveBeenCalledWith(mockImage, 100, 200, 16, 16); + + imgSpy.mockRestore(); + placeSpy.mockRestore(); + }); + + test('skips glyph silently when _getEmojiImage returns null', () => { + const mockGlyph = { id: 1, type: 'TTF' }; + document._emojiFont = createMockFont({ + glyphs: [mockGlyph], + positions: [{ xAdvance: 2048, xOffset: 0 }], + }); + document._fontSize = 16; + + jest.spyOn(document, '_getEmojiImage').mockReturnValue(null); + const placeSpy = jest + .spyOn(document, '_placeEmojiImage') + .mockImplementation(() => {}); + + document._emojiFragment('๐Ÿ˜€', 100, 200, {}); + expect(placeSpy).not.toHaveBeenCalled(); + + placeSpy.mockRestore(); + }); + }); + + describe('_renderCOLREmoji', () => { + test('renders layers with correct fill colors', () => { + // addContent writes to page content stream โ€” spy on it directly + const contentCalls = []; + const origAddContent = document.addContent.bind(document); + jest.spyOn(document, 'addContent').mockImplementation((str) => { + contentCalls.push(str); + return origAddContent(str); + }); + + const mockGlyph = { + layers: [ + { + glyph: { + path: { + commands: [ + { command: 'moveTo', args: [0, 2048] }, + { command: 'lineTo', args: [2048, 2048] }, + { command: 'lineTo', args: [2048, 0] }, + { command: 'closePath', args: [] }, + ], + }, + }, + color: { red: 255, green: 0, blue: 0, alpha: 255 }, + }, + ], + }; + + const font = createMockFont(); + document._renderCOLREmoji(mockGlyph, 100, 200, 16, font); + + const dataStr = contentCalls.join('\n'); + // Should have save/restore (q/Q) and fill (f) operators + expect(dataStr).toContain('q'); + expect(dataStr).toContain('f'); + expect(dataStr).toContain('Q'); + document.addContent.mockRestore(); + }); + + test('skips layers with empty paths', () => { + const contentCalls = []; + const origAddContent = document.addContent.bind(document); + jest.spyOn(document, 'addContent').mockImplementation((str) => { + contentCalls.push(str); + return origAddContent(str); + }); + + const mockGlyph = { + layers: [ + { + glyph: { path: { commands: [] } }, + color: { red: 255, green: 0, blue: 0, alpha: 255 }, + }, + { + glyph: { path: null }, + color: { red: 0, green: 255, blue: 0, alpha: 255 }, + }, + ], + }; + + const font = createMockFont(); + document._renderCOLREmoji(mockGlyph, 100, 200, 16, font); + + const dataStr = contentCalls.join('\n'); + // No fill operator should be emitted since both layers have empty paths + expect(dataStr).not.toContain('f'); + document.addContent.mockRestore(); + }); + + test('handles alpha opacity less than 255', () => { + const mockGlyph = { + layers: [ + { + glyph: { + path: { + commands: [ + { command: 'moveTo', args: [0, 0] }, + { command: 'lineTo', args: [100, 0] }, + { command: 'closePath', args: [] }, + ], + }, + }, + color: { red: 0, green: 0, blue: 255, alpha: 128 }, + }, + ], + }; + + const font = createMockFont(); + const opacitySpy = jest.spyOn(document, 'fillOpacity'); + + document._renderCOLREmoji(mockGlyph, 0, 0, 16, font); + + expect(opacitySpy).toHaveBeenCalledWith(128 / 255); + opacitySpy.mockRestore(); + }); + + test('converts quadratic bezier to cubic', () => { + const contentCalls = []; + const origAddContent = document.addContent.bind(document); + jest.spyOn(document, 'addContent').mockImplementation((str) => { + contentCalls.push(str); + return origAddContent(str); + }); + + const mockGlyph = { + layers: [ + { + glyph: { + path: { + commands: [ + { command: 'moveTo', args: [0, 0] }, + { command: 'quadraticCurveTo', args: [1024, 2048, 2048, 0] }, + { command: 'closePath', args: [] }, + ], + }, + }, + color: { red: 0, green: 0, blue: 0, alpha: 255 }, + }, + ], + }; + + const font = createMockFont(); + document._renderCOLREmoji(mockGlyph, 0, 0, 16, font); + + const dataStr = contentCalls.join('\n'); + // Should contain a bezier curve ('c' operator) not a quadratic ('v' or 'y') + expect(dataStr).toMatch( + /[0-9.]+ [0-9.]+ [0-9.]+ [0-9.]+ [0-9.]+ [0-9.]+ c/, + ); + document.addContent.mockRestore(); + }); + }); + + // โ”€โ”€โ”€ Real font integration tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + // These tests use actual downloadable emoji fonts instead of mocks. + // They are skipped if the font files are not present in tests/fonts/. + + const twemojiPath = path.join(__dirname, '../fonts/Twemoji.Mozilla.ttf'); + const notoPath = path.join(__dirname, '../fonts/NotoColorEmoji.ttf'); + const hasTwemoji = fs.existsSync(twemojiPath); + const hasNoto = fs.existsSync(notoPath); + + const describeIfTwemoji = hasTwemoji ? describe : describe.skip; + const describeIfNoto = hasNoto ? describe : describe.skip; + + describeIfTwemoji('Twemoji.Mozilla COLR/CPAL (real font)', () => { + let twFont; + + beforeAll(() => { + const data = fs.readFileSync(twemojiPath); + twFont = fontkit.create(data); + }); + + test('font has COLR and CPAL tables', () => { + expect(twFont.directory.tables.COLR).toBeTruthy(); + expect(twFont.directory.tables.CPAL).toBeTruthy(); + }); + + test('glyphForCodePoint returns COLR glyph for U+1F600', () => { + const glyph = twFont.glyphForCodePoint(0x1f600); + expect(glyph).toBeTruthy(); + expect(glyph.type).toBe('COLR'); + expect(glyph.layers).toBeTruthy(); + expect(glyph.layers.length).toBeGreaterThan(0); + }); + + test('COLR layers have valid color and path data', () => { + const glyph = twFont.glyphForCodePoint(0x1f600); + for (const layer of glyph.layers) { + expect(layer.color).toHaveProperty('red'); + expect(layer.color).toHaveProperty('green'); + expect(layer.color).toHaveProperty('blue'); + expect(layer.color).toHaveProperty('alpha'); + expect(layer.color.red).toBeGreaterThanOrEqual(0); + expect(layer.color.red).toBeLessThanOrEqual(255); + expect(layer.glyph.path.commands.length).toBeGreaterThan(0); + } + }); + + test('_renderCOLREmoji produces PDF operators from real glyph', () => { + const glyph = twFont.glyphForCodePoint(0x1f600); + + const contentCalls = []; + const origAddContent = document.addContent.bind(document); + jest.spyOn(document, 'addContent').mockImplementation((str) => { + contentCalls.push(str); + return origAddContent(str); + }); + + document._renderCOLREmoji(glyph, 100, 200, 16, twFont); + + const dataStr = contentCalls.join('\n'); + // save/restore for each layer + expect((dataStr.match(/\bq\b/g) || []).length).toBe(glyph.layers.length); + expect((dataStr.match(/\bQ\b/g) || []).length).toBe(glyph.layers.length); + // fill operator for each non-empty layer + expect((dataStr.match(/\bf\b/g) || []).length).toBe(glyph.layers.length); + // RGB color (via fillColor โ†’ /DeviceRGB cs + scn) + path moveTo operators + expect(dataStr).toMatch(/[0-9.]+ [0-9.]+ [0-9.]+ scn/); + expect(dataStr).toMatch(/[0-9.]+ [0-9.]+ m/); + + document.addContent.mockRestore(); + }); + + test('multiple emoji codepoints produce COLR glyphs', () => { + // Test a few different emoji codepoints + const codepoints = [0x2764, 0x1f44d, 0x1f680]; // heart, thumbs up, rocket + for (const cp of codepoints) { + const glyph = twFont.glyphForCodePoint(cp); + if (glyph && glyph.type === 'COLR') { + expect(glyph.layers.length).toBeGreaterThan(0); + } + } + }); + }); + + describeIfNoto('NotoColorEmoji CBDT/CBLC (real font)', () => { + let notoFont; + + beforeAll(() => { + const data = fs.readFileSync(notoPath); + notoFont = fontkit.create(data); + }); + + test('font has CBDT and CBLC tables', () => { + expect(notoFont.directory.tables.CBDT).toBeTruthy(); + expect(notoFont.directory.tables.CBLC).toBeTruthy(); + }); + + test('CBLC has parsed strike data with valid ranges', () => { + const cblc = notoFont.CBLC; + expect(cblc).toBeTruthy(); + expect(cblc.numSizes).toBeGreaterThan(0); + expect(cblc.sizes).toBeTruthy(); + const strike = cblc.sizes[0]; + expect(strike.ppemX).toBeGreaterThan(0); + expect(strike.startGlyphIndex).toBeLessThan(strike.endGlyphIndex); + expect(strike.numberOfIndexSubTables).toBeGreaterThan(0); + }); + + test('raw CBDT data has correct version header', () => { + const buf = notoFont.stream.buffer; + const cbdtStart = notoFont.directory.tables.CBDT.offset; + // CBDT version 3.0 = major=3, minor=0 + expect(buf.readUInt16BE(cbdtStart)).toBe(3); + expect(buf.readUInt16BE(cbdtStart + 2)).toBe(0); + }); + + test('_extractCBDTBitmap returns valid PNG for glyph in first subtable range', () => { + const cblc = notoFont.CBLC; + const strike = cblc.sizes[0]; + // Use a glyph ID in the first parsed subtable range + const glyphId = strike.indexSubTableArray[0].firstGlyphIndex; + const result = document._extractCBDTBitmap( + notoFont, + glyphId, + strike.ppemX, + ); + // If fontkit's parsed subtable works, we get PNG data + if (result) { + // Verify PNG signature: 89 50 4E 47 + expect(result[0]).toBe(0x89); + expect(result[1]).toBe(0x50); + expect(result[2]).toBe(0x4e); + expect(result[3]).toBe(0x47); + expect(result.length).toBeGreaterThan(100); + } + }); + + test('_extractCBDTBitmap returns valid PNG via manual CBLC lookup', () => { + const cblc = notoFont.CBLC; + const strike = cblc.sizes[0]; + // Use a glyph ID OUTSIDE the first subtable (4-17) but inside the + // overall strike range โ€” forces manual CBLC binary parsing + const glyphId = Math.min(500, strike.endGlyphIndex); + const result = document._extractCBDTBitmap( + notoFont, + glyphId, + strike.ppemX, + ); + expect(result).toBeTruthy(); + // PNG signature + expect(result[0]).toBe(0x89); + expect(result[1]).toBe(0x50); + expect(result[2]).toBe(0x4e); + expect(result[3]).toBe(0x47); + expect(result.length).toBeGreaterThan(100); + }); + + test('_extractCBDTBitmap returns null for glyph outside all strikes', () => { + const result = document._extractCBDTBitmap( + notoFont, + notoFont.numGlyphs + 10, + 109, + ); + expect(result).toBeNull(); + }); + + test('_manualCBLCLookup finds glyph in unparsed subtable', () => { + const cblc = notoFont.CBLC; + const strike = cblc.sizes[0]; + // Pick a glyph beyond first parsed subtable range + const lastParsed = strike.indexSubTableArray[0].lastGlyphIndex; + const glyphId = lastParsed + 10; + + const result = document._manualCBLCLookup( + notoFont, + 0, + strike.numberOfIndexSubTables, + glyphId, + ); + expect(result).toBeTruthy(); + expect(result).toHaveProperty('offset'); + expect(result).toHaveProperty('imageFormat'); + expect([17, 18, 19]).toContain(result.imageFormat); + }); + + test('extracted bitmaps for different glyphs are distinct', () => { + const cblc = notoFont.CBLC; + const ppem = cblc.sizes[0].ppemX; + const buf1 = document._extractCBDTBitmap(notoFont, 100, ppem); + const buf2 = document._extractCBDTBitmap(notoFont, 200, ppem); + expect(buf1).toBeTruthy(); + expect(buf2).toBeTruthy(); + // Different glyphs should produce different bitmap data + expect(buf1.equals(buf2)).toBe(false); + }); + }); }); diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-apple-color-emoji-1-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-apple-color-emoji-1-snap.png new file mode 100644 index 000000000..ba268a71b Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-apple-color-emoji-1-snap.png differ diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-1-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-1-snap.png new file mode 100644 index 000000000..21b258534 Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-1-snap.png differ diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-1-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-1-snap.png new file mode 100644 index 000000000..0afd37c85 Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-1-snap.png differ diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-2-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-2-snap.png new file mode 100644 index 000000000..9a4ac2b31 Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-2-snap.png differ diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-3-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-3-snap.png new file mode 100644 index 000000000..9a4ac2b31 Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-multiple-fonts-3-snap.png differ diff --git a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-default-helvetica-1-snap.png b/tests/visual/__image_snapshots__/fonts-spec-js-fonts-default-helvetica-1-snap.png deleted file mode 100644 index 1c4f75324..000000000 Binary files a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-default-helvetica-1-snap.png and /dev/null differ diff --git a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-helvetica-bold-1-snap.png b/tests/visual/__image_snapshots__/fonts-spec-js-fonts-helvetica-bold-1-snap.png deleted file mode 100644 index ea3f81936..000000000 Binary files a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-helvetica-bold-1-snap.png and /dev/null differ diff --git a/tests/visual/emoji.spec.js b/tests/visual/emoji.spec.js new file mode 100644 index 000000000..f0915f37d --- /dev/null +++ b/tests/visual/emoji.spec.js @@ -0,0 +1,140 @@ +import fs from 'fs'; +import { runDocTest } from './helpers'; + +const APPLE_EMOJI_PATH = '/System/Library/Fonts/Apple Color Emoji.ttc'; + +describe('emoji', function () { + const docOptions = [ + { + compress: false, + emojiFont: 'tests/fonts/Twemoji.Mozilla.ttf', + emojiFontFamily: 'TwemojiMozilla', + }, + { + compress: false, + emojiFont: 'tests/fonts/NotoColorEmoji.ttf', + emojiFontFamily: 'NotoColorEmoji', + }, + ]; + + const appleTest = { + compress: false, + emojiFont: APPLE_EMOJI_PATH, + emojiFontFamily: 'AppleColorEmoji', + }; + + const fontTest = (options) => + runDocTest(options, function (doc) { + doc.font('tests/fonts/Roboto-Regular.ttf'); + let y = 30; + const gap = 2; + + // --- Basic emoji mixed with text --- + doc.fontSize(18); + doc.text( + 'Hello ๐Ÿ˜€ World ๐ŸŽ‰ Test ๐Ÿš€ from ' + options.emojiFontFamily + ' font', + 50, + y, + ); + y += 24 + gap; + + // --- Emoji-only line (no surrounding text) --- + doc.text('๐Ÿ˜€๐ŸŽ‰๐Ÿš€๐ŸŒˆโญ๐Ÿ”ฅ๐Ÿ’ฏ', 50, y); + y += 24 + gap; + + // --- Text-only line (no emoji) --- + doc.text('No emoji here, just plain text.', 50, y); + y += 24 + gap; + + // --- ZWJ family sequences --- + doc.fontSize(24); + doc.text('Family: ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Couple: ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘จ', 50, y); + y += 30 + gap; + + // --- Skin tone modifiers --- + doc.fontSize(18); + doc.text('Thumbs: ๐Ÿ‘ ๐Ÿ‘๐Ÿป ๐Ÿ‘๐Ÿผ ๐Ÿ‘๐Ÿฝ ๐Ÿ‘๐Ÿพ ๐Ÿ‘๐Ÿฟ', 50, y); + y += 24 + gap; + + // --- Flag sequences (regional indicators) --- + doc.text('Flags: ๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿ‡ฌ๐Ÿ‡ง ๐Ÿ‡ฏ๐Ÿ‡ต ๐Ÿ‡ซ๐Ÿ‡ท ๐Ÿ‡ฉ๐Ÿ‡ช ๐Ÿ‡ง๐Ÿ‡ท', 50, y); + y += 24 + gap; + + // --- Keycap sequences --- + doc.text('Keycaps: 1๏ธโƒฃ 2๏ธโƒฃ 3๏ธโƒฃ #๏ธโƒฃ *๏ธโƒฃ', 50, y); + y += 24 + gap; + + // --- Emoji at start and end of line --- + doc.text('๐Ÿ”ฅ Fire at start', 50, y); + y += 24 + gap; + doc.text('Fire at end ๐Ÿ”ฅ', 50, y); + y += 24 + gap; + + // --- Multiple emoji adjacent with no spaces --- + doc.text('No spaces:๐ŸŽ๐ŸŠ๐Ÿ‹๐Ÿ‡๐Ÿ‰๐Ÿ“', 50, y); + y += 24 + gap; + + // --- Different font sizes --- + doc.fontSize(10); + doc.text('Small 10pt: Hello ๐ŸŒ World', 50, y); + y += 16 + gap; + + doc.fontSize(36); + doc.text('Large 36pt: ๐ŸŽจ๐Ÿ–Œ๏ธ', 50, y); + y += 42 + gap; + + // --- Emoji with variation selector (text vs emoji presentation) --- + doc.fontSize(18); + doc.text('Heart: โค๏ธ vs โค๏ธŽ Star: โญ vs โœฉ', 50, y); + y += 24 + gap; + + // --- Mixed scripts with emoji --- + doc.text('ๆ—ฅๆœฌ่ชžใƒ†ใ‚นใƒˆ ๐Ÿ—พ ไธญๆ–‡ๆต‹่ฏ• ๐Ÿ‰', 50, y); + y += 24 + gap; + + // --- Emoji in the middle of a long sentence --- + doc.fontSize(14); + doc.text( + 'The quick brown ๐ŸฆŠ jumps over the lazy ๐Ÿถ and runs through the ๐ŸŒฒ๐ŸŒฒ๐ŸŒฒ forest.', + 50, + y, + { width: 450 }, + ); + y += 40 + gap; + + // --- Multiple lines of emoji text --- + doc.fontSize(16); + doc.text('Line 1: Good morning โ˜€๏ธ', 50, y); + y += 22 + gap; + doc.text('Line 2: Good night ๐ŸŒ™', 50, y); + y += 22 + gap; + doc.text('Line 3: Weather ๐ŸŒง๏ธโ›ˆ๏ธ๐ŸŒค๏ธ', 50, y); + y += 22 + gap; + + // --- Animals and nature --- + doc.fontSize(20); + doc.text('๐Ÿฑ๐Ÿถ๐Ÿญ๐Ÿน๐Ÿฐ๐ŸฆŠ๐Ÿป๐Ÿผ๐Ÿจ๐Ÿฏ๐Ÿฆ๐Ÿฎ', 50, y); + y += 26 + gap; + + // --- Food emoji --- + doc.text('๐Ÿ•๐Ÿ”๐ŸŒฎ๐Ÿฃ๐Ÿœ๐Ÿฉ๐Ÿช๐ŸŽ‚๐Ÿฐ๐Ÿง', 50, y); + y += 26 + gap; + + // --- Sports and activities --- + doc.text('โšฝ๐Ÿ€๐Ÿˆโšพ๐ŸŽพ๐Ÿ๐Ÿ‰๐ŸŽฑ๐Ÿ“๐Ÿธ', 50, y); + y += 26 + gap; + + // --- Profession ZWJ sequences --- + doc.fontSize(24); + doc.text('๐Ÿ‘จโ€๐Ÿš€ ๐Ÿ‘ฉโ€๐Ÿ”ฌ ๐Ÿ‘จโ€๐Ÿณ ๐Ÿ‘ฉโ€๐ŸŽค', 50, y); + }); + + test('simple emoji mixed with text multiple fonts', function () { + return Promise.all(docOptions.map(fontTest)); + }); + + (fs.existsSync(appleTest.emojiFont) ? test : test.skip)( + 'Apple Color Emoji', + () => fontTest(appleTest), + ); +});