superdoc-dev · tupizz · May 18, 2026
@@ -310,4 +310,70 @@ describe('doc.replace multi-paragraph integration', () => {
     expect(insertedTexts).toEqual(expect.arrayContaining(['Alpha']));
     expect(deletedTexts.join('')).toContain('hello world');
   });
+
+  // SD-3044: when the word-diff produces multiple groups with EQUAL tokens
+  // between them, inserted text used to anchor on the previous result op's
+  // end instead of the EQUAL token's end, piling all granular insertions on
+  // the first deletion site.
+  it('SD-3044: tracked rewrite with shared suffix anchors inserts correctly', () => {
+    editor = makeEditor(['[insert] of [insert], [insert] ("Investor")']);
+    const receipt = editor.doc.replace(
+      {
+        ref: getFirstMatchRef(editor, '[insert] of [insert], [insert] ("Investor")'),
+        text: 'John James Smith of [insert address], [insert] ("Investor")',
+      },
+      { changeMode: 'tracked' },
+    );
+
+    expect(receipt.success).toBe(true);
+
+    // Accepted view: drop trackDelete marks, keep everything else.
+    const acceptedParts: string[] = [];
+    editor.state.doc.descendants((node: any) => {
+      if (!node.isText || !node.text) return;
+      const isDeleted = node.marks.some((mark: any) => mark.type.name === TrackDeleteMarkName);
+      if (!isDeleted) acceptedParts.push(node.text);
+    });
+
+    expect(acceptedParts.join('')).toBe('John James Smith of [insert address], [insert] ("Investor")');
+
+    // Specifically guard against the buggy strings reported in the ticket.
+    const accepted = acceptedParts.join('');
+    expect(accepted).not.toContain('JohnJames');
+    expect(accepted).not.toContain('Smith  address');
+  });
+
+  it('SD-3044: tracked rewrite of long block preserves spacing across multiple equal anchors', () => {
+    editor = makeEditor([
+      '[insert] Pty Limited a company incorporated in Australia having its registered office at [insert] (ACN [insert])("Company")',
+    ]);
+    const target =
+      'Working Title Group Limited a company incorporated in New Zealand having its registered office at 29 Park Hill Road, Birkenhead, Auckland, 0626, NZ (NZBN 9429050880331)("Company")';
+
+    const receipt = editor.doc.replace(
+      {
+        ref: getFirstMatchRef(
+          editor,
+          '[insert] Pty Limited a company incorporated in Australia having its registered office at [insert] (ACN [insert])("Company")',
+        ),
+        text: target,
+      },
+      { changeMode: 'tracked' },
+    );
+
+    expect(receipt.success).toBe(true);
+
+    const acceptedParts: string[] = [];
+    editor.state.doc.descendants((node: any) => {
+      if (!node.isText || !node.text) return;
+      const isDeleted = node.marks.some((mark: any) => mark.type.name === TrackDeleteMarkName);
+      if (!isDeleted) acceptedParts.push(node.text);
+    });
+
+    const accepted = acceptedParts.join('');
+    expect(accepted).toBe(target);
+    expect(accepted).not.toContain('PtyTitle');
+    expect(accepted).not.toContain('AustraliaNew');
+    expect(accepted).not.toContain('(ACNPark');
+  });
 });
@@ -0,0 +1,88 @@
+import { describe, expect, it } from 'vitest';
+import { getWordChanges, type WordDiffOp } from './word-diff.ts';
+
+function applyOps(oldText: string, ops: WordDiffOp[]): string {
+  // Apply word ops to oldText to produce the expected new text. Ops anchor on
+  // oldText offsets and are applied left-to-right with cumulative offset.
+  let result = '';
+  let cursor = 0;
+  for (const op of ops) {
+    if (op.type === 'insert') {
+      // Copy unchanged text up to the insertion point, then insert.
+      result += oldText.slice(cursor, op.insertAt);
+      cursor = op.insertAt;
+      result += op.newText;
+    } else if (op.type === 'delete') {
+      result += oldText.slice(cursor, op.oldFrom);
+      cursor = op.oldTo;
+    } else {
+      result += oldText.slice(cursor, op.oldFrom);
+      cursor = op.oldTo;
+      result += op.newText;
+    }
+  }
+  result += oldText.slice(cursor);
+  return result;
+}
+
+describe('getWordChanges', () => {
+  it('returns empty for identical text', () => {
+    expect(getWordChanges('hello world', 'hello world')).toEqual([]);
+  });
+
+  it('returns single insert when old is empty', () => {
+    expect(getWordChanges('', 'hello')).toEqual([{ type: 'insert', insertAt: 0, newText: 'hello' }]);
+  });
+
+  it('returns single delete when new is empty', () => {
+    expect(getWordChanges('hello', '')).toEqual([{ type: 'delete', oldFrom: 0, oldTo: 5 }]);
+  });
+
+  it('produces correct REPLACE for a single word change', () => {
+    const ops = getWordChanges('hello world', 'goodbye world');
+    expect(applyOps('hello world', ops)).toBe('goodbye world');
+  });
+
+  it('produces correct ops when one word is replaced and the trailing one is kept', () => {
+    const ops = getWordChanges('foo bar', 'baz bar');
+    expect(applyOps('foo bar', ops)).toBe('baz bar');
+  });
+
+  // SD-3044: regression — insert-only groups between EQUAL tokens must anchor
+  // to the preceding EQUAL token's end, not to the previous result op's end.
+  it('SD-3044: insert between EQUAL tokens uses correct anchor', () => {
+    // Pattern: old has an EQUAL token that lands between two insert groups.
+    const ops = getWordChanges('a b c', 'x a y b c');
+    expect(applyOps('a b c', ops)).toBe('x a y b c');
+  });
+
+  it('SD-3044: regression with the exact suffix-trim shape from the Lighthouse fixture', () => {
+    // After prefix/suffix trim, the parties-investor block reduces to these
+    // strings. The trailing `]` of the second `[insert]` is in the suffix, so
+    // `[insert` (without the `]`) becomes a token that matches between old and
+    // new. Myers then produces three groups separated by EQUAL tokens — the
+    // bug was that the two pure-INSERT groups both anchored to char 8.
+    const oldTrimmed = '[insert] of [insert';
+    const newTrimmed = 'John James Smith of [insert address';
+    const ops = getWordChanges(oldTrimmed, newTrimmed);
+    expect(applyOps(oldTrimmed, ops)).toBe(newTrimmed);
+
+    // Specifically the bug produced two inserts at insertAt=8; with the fix,
+    // the second insert anchors past the preserved ` of [insert` (offset 19).
+    const inserts = ops.filter((o): o is Extract<WordDiffOp, { type: 'insert' }> => o.type === 'insert');
+    expect(inserts).toHaveLength(2);
+    const insertAts = inserts.map((o) => o.insertAt).sort((a, b) => a - b);
+    expect(insertAts[0]).toBe(9); // after the equal space (old[1])
+    expect(insertAts[1]).toBe(19); // after the equal `[insert` (old[4])
+  });
+
+  it('SD-3044: prefix-only equal anchors first insert past the prefix', () => {
+    const ops = getWordChanges('foo', 'foo bar');
+    expect(applyOps('foo', ops)).toBe('foo bar');
+    // After EQUAL `foo` (length 3), insert anchor must be 3 not 0.
+    const inserts = ops.filter((o): o is Extract<WordDiffOp, { type: 'insert' }> => o.type === 'insert');
+    if (inserts.length > 0) {
+      expect(inserts[0].insertAt).toBe(3);
+    }
+  });
+});
@@ -87,6 +87,13 @@ export function getWordChanges(oldText: string, newText: string): WordDiffOp[] {
       continue;
     }
 
+    // SD-3044: capture the index where this delete/insert group starts so we
+    // can inspect the step immediately preceding the group (typically an
+    // 'equal' that anchors a pure-insert group's position). After the inner
+    // while loop runs, `i` points past the group, so `steps[i - 1]` is the
+    // last delete/insert in this group and never reflects the prior anchor.
+    const groupStart = i;
+
     let deleteStart = -1;
     let deleteEnd = -1;
     let insertText = '';
@@ -108,7 +115,7 @@ export function getWordChanges(oldText: string, newText: string): WordDiffOp[] {
     } else if (deleteStart !== -1) {
       result.push({ type: 'delete', oldFrom: deleteStart, oldTo: deleteEnd });
     } else if (insertText.length > 0) {
-      const prevStep = i > 0 ? steps[i - 1] : null;
+      const prevStep = groupStart > 0 ? steps[groupStart - 1] : null;
       let insertAt = 0;
       if (prevStep && prevStep.type === 'equal') {
         const prevToken = oldTokens[prevStep.oldIdx];