From 0a8f93e443a05ea87be5e9af0d0d7a52c9680e51 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 18 May 2026 16:00:25 -0300 Subject: [PATCH] fix(document-api): anchor pure-insert word-diff groups to preceding EQUAL token (SD-3044) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In tracked `text.rewrite`, when the word-level diff produces multiple delete/insert groups separated by EQUAL tokens, pure-insert groups were anchoring to the previous result op's end (`oldTo` / `insertAt`) instead of the EQUAL token that precedes the group. `prevStep = steps[i - 1]` read the last step of the just-processed group (always delete/insert), so the `prevStep.type === 'equal'` branch was dead code. For inputs like `[insert] of [insert` → `John James Smith of [insert address`, the diff produced three groups with EQUAL tokens between them (`of`, ` `, `[insert`). Both pure-insert groups fell back to `insertAt = 8`, piling all granular insertions on the first deletion site and producing strings like `JohnJames Smith address of [insert]...` after accept. Capture `groupStart` before the inner while loop and read `steps[groupStart - 1]` so the EQUAL-anchor branch actually fires. Adds unit coverage for `getWordChanges` and two integration tests mirroring the Lighthouse SAFE-document repro shape. --- .../tracked-rewrite.integration.test.ts | 66 ++++++++++++++ .../plan-engine/word-diff.test.ts | 88 +++++++++++++++++++ .../plan-engine/word-diff.ts | 9 +- 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.test.ts diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts index 4059bc1c23..6f1aadbb5f 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/tracked-rewrite.integration.test.ts @@ -310,4 +310,70 @@ describe('doc.replace multi-paragraph integration', () => { expect(insertedTexts).toEqual(expect.arrayContaining(['Alpha'])); expect(deletedTexts.join('')).toContain('hello world'); }); + + // SD-3044: when the word-diff produces multiple groups with EQUAL tokens + // between them, inserted text used to anchor on the previous result op's + // end instead of the EQUAL token's end, piling all granular insertions on + // the first deletion site. + it('SD-3044: tracked rewrite with shared suffix anchors inserts correctly', () => { + editor = makeEditor(['[insert] of [insert], [insert] ("Investor")']); + const receipt = editor.doc.replace( + { + ref: getFirstMatchRef(editor, '[insert] of [insert], [insert] ("Investor")'), + text: 'John James Smith of [insert address], [insert] ("Investor")', + }, + { changeMode: 'tracked' }, + ); + + expect(receipt.success).toBe(true); + + // Accepted view: drop trackDelete marks, keep everything else. + const acceptedParts: string[] = []; + editor.state.doc.descendants((node: any) => { + if (!node.isText || !node.text) return; + const isDeleted = node.marks.some((mark: any) => mark.type.name === TrackDeleteMarkName); + if (!isDeleted) acceptedParts.push(node.text); + }); + + expect(acceptedParts.join('')).toBe('John James Smith of [insert address], [insert] ("Investor")'); + + // Specifically guard against the buggy strings reported in the ticket. + const accepted = acceptedParts.join(''); + expect(accepted).not.toContain('JohnJames'); + expect(accepted).not.toContain('Smith address'); + }); + + it('SD-3044: tracked rewrite of long block preserves spacing across multiple equal anchors', () => { + editor = makeEditor([ + '[insert] Pty Limited a company incorporated in Australia having its registered office at [insert] (ACN [insert])("Company")', + ]); + const target = + 'Working Title Group Limited a company incorporated in New Zealand having its registered office at 29 Park Hill Road, Birkenhead, Auckland, 0626, NZ (NZBN 9429050880331)("Company")'; + + const receipt = editor.doc.replace( + { + ref: getFirstMatchRef( + editor, + '[insert] Pty Limited a company incorporated in Australia having its registered office at [insert] (ACN [insert])("Company")', + ), + text: target, + }, + { changeMode: 'tracked' }, + ); + + expect(receipt.success).toBe(true); + + const acceptedParts: string[] = []; + editor.state.doc.descendants((node: any) => { + if (!node.isText || !node.text) return; + const isDeleted = node.marks.some((mark: any) => mark.type.name === TrackDeleteMarkName); + if (!isDeleted) acceptedParts.push(node.text); + }); + + const accepted = acceptedParts.join(''); + expect(accepted).toBe(target); + expect(accepted).not.toContain('PtyTitle'); + expect(accepted).not.toContain('AustraliaNew'); + expect(accepted).not.toContain('(ACNPark'); + }); }); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.test.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.test.ts new file mode 100644 index 0000000000..4444376994 --- /dev/null +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from 'vitest'; +import { getWordChanges, type WordDiffOp } from './word-diff.ts'; + +function applyOps(oldText: string, ops: WordDiffOp[]): string { + // Apply word ops to oldText to produce the expected new text. Ops anchor on + // oldText offsets and are applied left-to-right with cumulative offset. + let result = ''; + let cursor = 0; + for (const op of ops) { + if (op.type === 'insert') { + // Copy unchanged text up to the insertion point, then insert. + result += oldText.slice(cursor, op.insertAt); + cursor = op.insertAt; + result += op.newText; + } else if (op.type === 'delete') { + result += oldText.slice(cursor, op.oldFrom); + cursor = op.oldTo; + } else { + result += oldText.slice(cursor, op.oldFrom); + cursor = op.oldTo; + result += op.newText; + } + } + result += oldText.slice(cursor); + return result; +} + +describe('getWordChanges', () => { + it('returns empty for identical text', () => { + expect(getWordChanges('hello world', 'hello world')).toEqual([]); + }); + + it('returns single insert when old is empty', () => { + expect(getWordChanges('', 'hello')).toEqual([{ type: 'insert', insertAt: 0, newText: 'hello' }]); + }); + + it('returns single delete when new is empty', () => { + expect(getWordChanges('hello', '')).toEqual([{ type: 'delete', oldFrom: 0, oldTo: 5 }]); + }); + + it('produces correct REPLACE for a single word change', () => { + const ops = getWordChanges('hello world', 'goodbye world'); + expect(applyOps('hello world', ops)).toBe('goodbye world'); + }); + + it('produces correct ops when one word is replaced and the trailing one is kept', () => { + const ops = getWordChanges('foo bar', 'baz bar'); + expect(applyOps('foo bar', ops)).toBe('baz bar'); + }); + + // SD-3044: regression — insert-only groups between EQUAL tokens must anchor + // to the preceding EQUAL token's end, not to the previous result op's end. + it('SD-3044: insert between EQUAL tokens uses correct anchor', () => { + // Pattern: old has an EQUAL token that lands between two insert groups. + const ops = getWordChanges('a b c', 'x a y b c'); + expect(applyOps('a b c', ops)).toBe('x a y b c'); + }); + + it('SD-3044: regression with the exact suffix-trim shape from the Lighthouse fixture', () => { + // After prefix/suffix trim, the parties-investor block reduces to these + // strings. The trailing `]` of the second `[insert]` is in the suffix, so + // `[insert` (without the `]`) becomes a token that matches between old and + // new. Myers then produces three groups separated by EQUAL tokens — the + // bug was that the two pure-INSERT groups both anchored to char 8. + const oldTrimmed = '[insert] of [insert'; + const newTrimmed = 'John James Smith of [insert address'; + const ops = getWordChanges(oldTrimmed, newTrimmed); + expect(applyOps(oldTrimmed, ops)).toBe(newTrimmed); + + // Specifically the bug produced two inserts at insertAt=8; with the fix, + // the second insert anchors past the preserved ` of [insert` (offset 19). + const inserts = ops.filter((o): o is Extract => o.type === 'insert'); + expect(inserts).toHaveLength(2); + const insertAts = inserts.map((o) => o.insertAt).sort((a, b) => a - b); + expect(insertAts[0]).toBe(9); // after the equal space (old[1]) + expect(insertAts[1]).toBe(19); // after the equal `[insert` (old[4]) + }); + + it('SD-3044: prefix-only equal anchors first insert past the prefix', () => { + const ops = getWordChanges('foo', 'foo bar'); + expect(applyOps('foo', ops)).toBe('foo bar'); + // After EQUAL `foo` (length 3), insert anchor must be 3 not 0. + const inserts = ops.filter((o): o is Extract => o.type === 'insert'); + if (inserts.length > 0) { + expect(inserts[0].insertAt).toBe(3); + } + }); +}); diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.ts b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.ts index 6c94df3daa..afaf10311c 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/plan-engine/word-diff.ts @@ -87,6 +87,13 @@ export function getWordChanges(oldText: string, newText: string): WordDiffOp[] { continue; } + // SD-3044: capture the index where this delete/insert group starts so we + // can inspect the step immediately preceding the group (typically an + // 'equal' that anchors a pure-insert group's position). After the inner + // while loop runs, `i` points past the group, so `steps[i - 1]` is the + // last delete/insert in this group and never reflects the prior anchor. + const groupStart = i; + let deleteStart = -1; let deleteEnd = -1; let insertText = ''; @@ -108,7 +115,7 @@ export function getWordChanges(oldText: string, newText: string): WordDiffOp[] { } else if (deleteStart !== -1) { result.push({ type: 'delete', oldFrom: deleteStart, oldTo: deleteEnd }); } else if (insertText.length > 0) { - const prevStep = i > 0 ? steps[i - 1] : null; + const prevStep = groupStart > 0 ? steps[groupStart - 1] : null; let insertAt = 0; if (prevStep && prevStep.type === 'equal') { const prevToken = oldTokens[prevStep.oldIdx];