Skip to content

Commit c25647c

Browse files
committed
diff: add long-running diff process via diff.<driver>.process
Add support for external diff processes that communicate via the long-running process protocol (pkt-line over stdin/stdout). A diff process is configured per userdiff driver: [diff "cdiff"] process = /path/to/diff-tool The tool provides custom line-matching: it receives file pairs and returns hunks that reference original line numbers. Unlike textconv, which transforms the displayed content, the diff output shows the actual file while the tool controls which lines are marked as changed. The handshake negotiates version=1 and capability=hunks. Per-file requests send command=hunks, pathname, and both file contents as packetized data. The tool responds with hunk lines and a status packet. On error, git falls back to the builtin diff algorithm with a warning. Zero hunks with status=success means the tool considers the files equivalent. Git skips diff output for that file. Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
1 parent de6d85f commit c25647c

8 files changed

Lines changed: 645 additions & 0 deletions

File tree

Documentation/config/diff.adoc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,14 @@ endif::git-diff[]
218218
Set this option to `true` to make the diff driver cache the text
219219
conversion outputs. See linkgit:gitattributes[5] for details.
220220

221+
`diff.<driver>.process`::
222+
The command to run as a long-running diff process.
223+
The tool communicates via the pkt-line protocol and returns
224+
hunks that are fed into Git's diff and blame pipelines.
225+
If the tool returns zero hunks, the file is treated as
226+
unchanged for both diff output and blame attribution.
227+
See linkgit:gitattributes[5] for details.
228+
221229
`diff.indentHeuristic`::
222230
Set this option to `false` to disable the default heuristics
223231
that shift diff hunk boundaries to make patches easier to read.

Documentation/gitattributes.adoc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,46 @@ NOTE: If `diff.<name>.command` is defined for path with the
821821
(see above), and adding `diff.<name>.algorithm` has no effect, as the
822822
algorithm is not passed to the external diff driver.
823823

824+
Using an external diff process
825+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
826+
827+
An external tool can provide content-aware line matching by
828+
setting `diff.<name>.process` to the command that runs
829+
the tool. The tool is a long-running process that communicates via
830+
the pkt-line protocol (described in
831+
Documentation/technical/long-running-process-protocol.adoc).
832+
833+
------------------------
834+
*.c diff=cdiff
835+
------------------------
836+
837+
----------------------------------------------------------------
838+
[diff "cdiff"]
839+
process = /path/to/diff-process-tool
840+
----------------------------------------------------------------
841+
842+
The tool receives file pairs and returns hunk descriptors indicating
843+
which lines changed. Git feeds these hunks into its standard diff
844+
pipeline, so all output features (word diff, function context,
845+
color) work normally.
846+
847+
If the tool fails or returns an error, Git silently falls back to
848+
the builtin diff algorithm. If the tool returns invalid hunks
849+
(out of bounds, overlapping), Git also falls back silently.
850+
851+
The handshake negotiates `version=1` and `capability=hunks`.
852+
Per-file requests send `command=hunks` and `pathname=<path>`,
853+
followed by the old and new file content as packetized data.
854+
The tool responds with lines of the form
855+
`hunk <old_start> <old_count> <new_start> <new_count>`
856+
(1-based line numbers), a flush packet, and `status=success`.
857+
858+
If the tool returns zero hunks with `status=success`, Git treats
859+
the file as having no changes and produces no diff output.
860+
861+
Tools should ignore unknown keys in the per-file request to
862+
remain forward-compatible.
863+
824864
Defining a custom hunk-header
825865
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
826866

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,7 @@ LIB_OBJS += diff-delta.o
11421142
LIB_OBJS += diff-merges.o
11431143
LIB_OBJS += diff-lib.o
11441144
LIB_OBJS += diff-no-index.o
1145+
LIB_OBJS += diff-process.o
11451146
LIB_OBJS += diff.o
11461147
LIB_OBJS += diffcore-break.o
11471148
LIB_OBJS += diffcore-delta.o

diff-process.c

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
* Diff process backend: communicates with a long-running external
3+
* tool via the pkt-line protocol to obtain custom line-matching
4+
* results. Unlike textconv, which transforms the displayed content,
5+
* hunks from a diff process reference original line numbers and
6+
* the display shows the actual file content.
7+
*
8+
* Protocol: pkt-line over stdin/stdout, following the pattern of
9+
* the long-running filter process protocol (see convert.c).
10+
*
11+
* Handshake:
12+
* git> git-diff-client / version=1 / flush
13+
* tool< git-diff-server / version=1 / flush
14+
* git> capability=hunks / flush
15+
* tool< capability=hunks / flush
16+
*
17+
* Per-file:
18+
* git> command=hunks / pathname=<path> / flush
19+
* git> <old content packetized> / flush
20+
* git> <new content packetized> / flush
21+
* tool< hunk <old_start> <old_count> <new_start> <new_count>
22+
* tool< ... / flush
23+
* tool< status=success / flush
24+
*
25+
* Zero hunks with status=success means the tool considers the
26+
* files equivalent. Git will skip the diff for that file.
27+
*/
28+
29+
#include "git-compat-util.h"
30+
#include "diff-process.h"
31+
#include "userdiff.h"
32+
#include "sub-process.h"
33+
#include "pkt-line.h"
34+
#include "strbuf.h"
35+
#include "xdiff/xdiff.h"
36+
37+
#define CAP_HUNKS (1u << 0)
38+
39+
struct diff_subprocess {
40+
struct subprocess_entry subprocess;
41+
unsigned int supported_capabilities;
42+
};
43+
44+
static int subprocess_map_initialized;
45+
static struct hashmap subprocess_map;
46+
47+
static int start_diff_process_fn(struct subprocess_entry *subprocess)
48+
{
49+
static int versions[] = { 1, 0 };
50+
static struct subprocess_capability capabilities[] = {
51+
{ "hunks", CAP_HUNKS },
52+
{ NULL, 0 }
53+
};
54+
struct diff_subprocess *entry =
55+
(struct diff_subprocess *)subprocess;
56+
57+
/* Uses dying pkt-line variant, same as convert.c filters. */
58+
return subprocess_handshake(subprocess, "git-diff",
59+
versions, NULL,
60+
capabilities,
61+
&entry->supported_capabilities);
62+
}
63+
64+
static struct diff_subprocess *find_or_start_process(const char *cmd)
65+
{
66+
struct diff_subprocess *entry;
67+
68+
if (!subprocess_map_initialized) {
69+
subprocess_map_initialized = 1;
70+
hashmap_init(&subprocess_map, cmd2process_cmp, NULL, 0);
71+
}
72+
73+
entry = (struct diff_subprocess *)
74+
subprocess_find_entry(&subprocess_map, cmd);
75+
if (entry)
76+
return entry;
77+
78+
entry = xcalloc(1, sizeof(*entry));
79+
if (subprocess_start(&subprocess_map, &entry->subprocess,
80+
cmd, start_diff_process_fn)) {
81+
free(entry);
82+
return NULL;
83+
}
84+
85+
return entry;
86+
}
87+
88+
static int send_file_content(int fd, const char *buf, long size)
89+
{
90+
int ret;
91+
92+
if (size > 0)
93+
ret = write_packetized_from_buf_no_flush(buf, size, fd);
94+
else
95+
ret = 0;
96+
if (ret)
97+
return ret;
98+
return packet_flush_gently(fd);
99+
}
100+
101+
static int parse_hunk_line(const char *line, struct xdl_hunk *hunk)
102+
{
103+
char *end;
104+
105+
/* Format: "hunk <old_start> <old_count> <new_start> <new_count>" */
106+
if (!skip_prefix(line, "hunk ", &line))
107+
return -1;
108+
109+
hunk->old_start = strtol(line, &end, 10);
110+
if (end == line || *end != ' ')
111+
return -1;
112+
line = end;
113+
114+
hunk->old_count = strtol(line, &end, 10);
115+
if (end == line || *end != ' ')
116+
return -1;
117+
line = end;
118+
119+
hunk->new_start = strtol(line, &end, 10);
120+
if (end == line || *end != ' ')
121+
return -1;
122+
line = end;
123+
124+
hunk->new_count = strtol(line, &end, 10);
125+
if (end == line || *end != '\0')
126+
return -1;
127+
128+
return 0;
129+
}
130+
131+
int diff_process_get_hunks(struct userdiff_driver *drv,
132+
const char *path,
133+
const char *old_buf, long old_size,
134+
const char *new_buf, long new_size,
135+
struct xdl_hunk **hunks_out,
136+
size_t *nr_hunks_out)
137+
{
138+
struct diff_subprocess *backend;
139+
struct child_process *process;
140+
int fd_in, fd_out;
141+
struct strbuf status = STRBUF_INIT;
142+
struct xdl_hunk *hunks = NULL;
143+
struct xdl_hunk hunk;
144+
size_t nr_hunks = 0, alloc_hunks = 0;
145+
int len;
146+
char *line;
147+
148+
if (!drv || !drv->process)
149+
return -1;
150+
151+
backend = find_or_start_process(drv->process);
152+
if (!backend)
153+
return -1;
154+
155+
if (!(backend->supported_capabilities & CAP_HUNKS))
156+
return -1;
157+
158+
process = subprocess_get_child_process(&backend->subprocess);
159+
fd_in = process->in;
160+
fd_out = process->out;
161+
162+
/* Send request */
163+
if (packet_write_fmt_gently(fd_in, "command=hunks\n") ||
164+
packet_write_fmt_gently(fd_in, "pathname=%s\n", path) ||
165+
packet_flush_gently(fd_in))
166+
goto error;
167+
168+
/* Send old file content */
169+
if (send_file_content(fd_in, old_buf, old_size))
170+
goto error;
171+
172+
/* Send new file content */
173+
if (send_file_content(fd_in, new_buf, new_size))
174+
goto error;
175+
176+
/* Read hunks until flush packet */
177+
while ((len = packet_read_line_gently(fd_out, NULL, &line)) >= 0 &&
178+
line) {
179+
if (parse_hunk_line(line, &hunk) < 0)
180+
goto error;
181+
ALLOC_GROW(hunks, nr_hunks + 1, alloc_hunks);
182+
hunks[nr_hunks++] = hunk;
183+
}
184+
if (len < 0)
185+
goto error;
186+
187+
/* Read status */
188+
if (subprocess_read_status(fd_out, &status))
189+
goto error;
190+
191+
if (strcmp(status.buf, "success")) {
192+
if (!strcmp(status.buf, "abort"))
193+
backend->supported_capabilities &= ~CAP_HUNKS;
194+
goto error;
195+
}
196+
197+
*hunks_out = hunks;
198+
*nr_hunks_out = nr_hunks;
199+
strbuf_release(&status);
200+
return 0;
201+
202+
error:
203+
free(hunks);
204+
strbuf_release(&status);
205+
return -1;
206+
}

diff-process.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#ifndef DIFF_PROCESS_H
2+
#define DIFF_PROCESS_H
3+
4+
struct userdiff_driver;
5+
struct xdl_hunk;
6+
7+
/*
8+
* Query a diff process for hunks describing the changes
9+
* between old_buf and new_buf.
10+
*
11+
* The backend is a long-running subprocess configured via
12+
* diff.<driver>.process. It receives file content via
13+
* pkt-line and returns hunks with 1-based line numbers.
14+
*
15+
* On success, sets *hunks_out and *nr_hunks_out to a newly allocated
16+
* array (caller must free) and returns 0.
17+
*
18+
* On failure, returns -1. The caller should fall back to the
19+
* builtin diff algorithm.
20+
*/
21+
int diff_process_get_hunks(struct userdiff_driver *drv,
22+
const char *path,
23+
const char *old_buf, long old_size,
24+
const char *new_buf, long new_size,
25+
struct xdl_hunk **hunks_out,
26+
size_t *nr_hunks_out);
27+
28+
#endif /* DIFF_PROCESS_H */

diff.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "utf8.h"
2626
#include "odb.h"
2727
#include "userdiff.h"
28+
#include "diff-process.h"
2829
#include "submodule.h"
2930
#include "hashmap.h"
3031
#include "mem-pool.h"
@@ -3991,6 +3992,7 @@ static void builtin_diff(const char *name_a,
39913992
xpparam_t xpp;
39923993
xdemitconf_t xecfg;
39933994
struct emit_callback ecbdata;
3995+
struct xdl_hunk *ext_hunks = NULL;
39943996
unsigned ws_rule;
39953997
const struct userdiff_funcname *pe;
39963998

@@ -4031,6 +4033,26 @@ static void builtin_diff(const char *name_a,
40314033
xpp.ignore_regex_nr = o->ignore_regex_nr;
40324034
xpp.anchors = o->anchors;
40334035
xpp.anchors_nr = o->anchors_nr;
4036+
4037+
if (!o->ignore_driver_algorithm &&
4038+
one->driver && one->driver->process) {
4039+
size_t ext_hunks_nr = 0;
4040+
if (!diff_process_get_hunks(
4041+
one->driver, name_a,
4042+
mf1.ptr, mf1.size,
4043+
mf2.ptr, mf2.size,
4044+
&ext_hunks, &ext_hunks_nr)) {
4045+
if (!ext_hunks_nr)
4046+
goto free_ab_and_return;
4047+
xpp.external_hunks = ext_hunks;
4048+
xpp.external_hunks_nr = ext_hunks_nr;
4049+
} else {
4050+
warning(_("diff process failed for '%s',"
4051+
" falling back to builtin diff"),
4052+
name_a);
4053+
}
4054+
}
4055+
40344056
xecfg.ctxlen = o->context;
40354057
xecfg.interhunkctxlen = o->interhunkcontext;
40364058
xecfg.flags = XDL_EMIT_FUNCNAMES;
@@ -4111,6 +4133,7 @@ static void builtin_diff(const char *name_a,
41114133
} else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume,
41124134
&ecbdata, &xpp, &xecfg))
41134135
die("unable to generate diff for %s", one->path);
4136+
free(ext_hunks);
41144137
if (o->word_diff)
41154138
free_diff_words_data(&ecbdata);
41164139
if (textconv_one)

t/.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ t[0-9][0-9][0-9][0-9]/* -whitespace
2323
/t8005/*.txt eol=lf
2424
/t9*/*.dump eol=lf
2525
/t0040*.sh whitespace=-indent-with-non-tab
26+
/t4080-diff-process.sh whitespace=-indent-with-non-tab

0 commit comments

Comments
 (0)