diff --git a/MANIFEST b/MANIFEST index 094b0a020..789c9e849 100644 --- a/MANIFEST +++ b/MANIFEST @@ -441,6 +441,7 @@ include/inn/secrets.h Header file for the secrets struct include/inn/sequence.h Header file for sequence space arithmetic include/inn/storage.h Header file for storage API include/inn/timer.h Header file for generic timers +include/inn/tombstone.h Header file for cancel tombstone log include/inn/tst.h Header file for ternary search tries include/inn/utility.h Header file for utility functions include/inn/vector.h Header file for vectors of strings @@ -805,6 +806,7 @@ storage/timehash timehash storage method (Directory) storage/timehash/method.config buildconfig definition storage/timehash/timehash.c timehash storage routines storage/timehash/timehash.h Header for timehash +storage/tombstone.c Cancel tombstone log helpers storage/tradindexed tradindexed overview method (Directory) storage/tradindexed/ovmethod.config buildconfig definition storage/tradindexed/ovmethod.mk Make rules for tradindexed overview @@ -931,6 +933,10 @@ tests/data/upgrade/readers.conf.ok Fixed readers.conf file tests/data/upgrade/sasl.conf Obsolete sasl.conf config file tests/docs Test suite for documentation (Directory) tests/docs/pod.t.in Tests for POD formatting +tests/expire Test suite for expire (Directory) +tests/expire/tombstone-e2e.t End-to-end tests for tombstone log +tests/expire/tombstone-hisexpire-t.c HISexpire integration test for tombstone +tests/expire/tombstone-t.c Tests for tombstone library tests/innd Test suite for innd (Directory) tests/innd/artparse-t.c Tests for ARTparse in innd tests/innd/chan-t.c Tests for CHAN functions in innd @@ -997,6 +1003,7 @@ tests/perl/minimum-version.t.in Tests for not too-new features of Perl tests/runtests.c The test suite driver program tests/storage Test suite for storage (Directory) tests/storage/archive.t Tests for backends/archive +tests/storage/cancel-tombstone-t.c Tests for SMcanceltombstone tests/storage/makehistory.t Tests for expire/makehistory tests/storage/sm.t Tests for frontends/sm tests/tap Helper scripts for TAP (Directory) diff --git a/doc/pod/expire.pod b/doc/pod/expire.pod index fba5fd25a..bdc2d850c 100644 --- a/doc/pod/expire.pod +++ b/doc/pod/expire.pod @@ -175,6 +175,18 @@ is specified, the file I/expire.ctl is read. =back +=head1 TOMBSTONE LOG + +When I is enabled in F, B consumes +the per-cycle deletion log produced by B, B, and +B (F/expireover.tombstone> and +F/cancels.tombstone>) so it can drop history entries for +those articles without doing a per-article C +syscall. An empty tombstone is treated as "no cancels this cycle" +and the slow scan is skipped entirely. See inn.conf(5) under +I for the file lifecycle, locking model, and +recovery story. + =head1 HISTORY Written by Rich $alz for InterNetNews. Converted to diff --git a/doc/pod/expireover.pod b/doc/pod/expireover.pod index 239b354b6..25114239b 100644 --- a/doc/pod/expireover.pod +++ b/doc/pod/expireover.pod @@ -21,6 +21,14 @@ F. Otherwise it only removes overview entries for articles that have already been removed by some other process, and B<-e>, B<-k>, B<-N>, B<-p>, B<-q>, B<-w>, and B<-z> are all ignored. +When I is enabled in F, B +appends each cancelled token to F/expireover.tombstone.NEW> +under an exclusive POSIX lock and atomically renames the file into +place on a clean run. In delayrm mode (B<-z>), the rename is +performed by B after B succeeds. The next +B run consumes this log to skip per-article storage existence +checks. See inn.conf(5) under I. + When I is set, the default behavior of B is to remove the article from the spool once it expires out of all of the newsgroups to which it was crossposted. The article is, however, removed diff --git a/doc/pod/inn.conf.pod b/doc/pod/inn.conf.pod index a65fe4abd..45e257125 100644 --- a/doc/pod/inn.conf.pod +++ b/doc/pod/inn.conf.pod @@ -632,6 +632,82 @@ in F at startup and the unrecognized fields will be discarded. Moreover, the deprecated C and C header fields, already present in the standard overview fields as metadata items, cannot be added. +=item I + +Whether INN tools record cancellation tombstones so a subsequent +B run can skip per-article storage existence checks. When +enabled (and I is also true), two log files in +I capture every cancellation: + +=over 4 + +=item F/expireover.tombstone> + +Written by B after each successful B in +group-based expiry, atomically renamed into place on a clean run. +When B runs with C<-z> (delayed removal), the +B calls are deferred to B via B; +B writes the entries up front and B performs +the atomic rename after B succeeds, so the same speedup +applies to delayrm setups. After B consumes this file it +is unlinked and re-seeded as a header-only successor, matching +the on-disk presence of F; the next +B run overwrites the seeded file with its full +content. + +=item F/cancels.tombstone> + +Appended continuously by B when it processes cancel control +messages, and by B for manual cancellations. Appenders take +a shared fcntl POSIX lock; B snapshots the file by atomic +rename to F under an exclusive fcntl +lock, unlinks the snapshot after a successful consume, then +recreates F as a header-only file under an +exclusive lock so B's per-connection fast path stays +active through quiet inter-cancel periods. An appender that +raced a cancel into a new live file between the rename and the +recreate has its content preserved verbatim below the restored +header. Append atomicity +relies on POSIX guaranteeing that each C to a regular +file opened with C is atomic with respect to other +writers; this holds for any size of single C on local +filesystems. Cross-client atomicity over NFS is not guaranteed: +if I is on NFS, lines from concurrent writers on different +clients can in theory interleave. In practice INN's cancel +sources (B on a single host plus occasional B on the +same host) write from one client. + +=back + +The next B invocation loads both files into a single hashset +and treats every article in either log as already gone, avoiding an +C call per history entry. For storage methods +where the stat is a file-system call (tradspool, timehash) this turns +a billion C calls into a few thousand hash lookups; for +storage methods that self-expire (CNFS) the stat call remains because +articles can vanish through wrap-around without going through +B. + +In normal operation every cancellation path participates in +tombstone tracking, so all articles removed from the spool are +recorded. Residual orphans can only accumulate from events outside +the tracked paths: a process crash in the narrow window between +B and the tombstone append, manual filesystem-level +deletes that bypass B, or filesystem corruption. When such +orphans do appear they are harmless (B returns "no such +article" to readers that hit them) and exist only as small history +entries. No regular reconciliation cadence is needed. If an +operator suspects orphan accumulation after admin intervention or a +storage incident, B can be re-run with this option disabled +in F to perform an exhaustive C +scan; this is an exceptional operation, not a scheduled one. + +Footprint: ~38 bytes per entry on disk, ~50 bytes in expire's hash +table. 1M cancels per run = ~38 MB tombstone, ~50 MB hash. + +This is a boolean value and the default is false; sites should opt in +after validating the option's behaviour against their workload. + =item I Whether to enable newsgroup-based expiry. If set to false, article expiry @@ -890,10 +966,50 @@ Whether B should check the existence of an article before listing it as present in response to an NNTP command (HDR, LISTGROUP, NEWNEWS, OVER, XPAT). The primary use of this setting is to prevent B from returning information about articles which are no longer present on the server but which -still have overview data available. Checking the existence of articles before -returning overview information slows down the overview commands, but reduces -the number of "article is missing" errors seen by the client. This is a -boolean value and the default is true. +still have overview data available. Checking existence with an unconditional +C slows down the overview commands; with I also +enabled (see below) the check uses an in-memory hash lookup instead and is +cheaper than disabling the check on tradspool/timehash/timecaf backends. +The trade-off remains: enabling this reduces the number of "article is +missing" errors seen by the client. This is a boolean value and the default +is true. + +When I is also enabled, B consults the +F/cancels.tombstone> log on the article-existence check +path: a token recorded as cancelled is reported as gone without an +C call, and a token absent from the tombstone is +trusted to still exist (skipping the syscall) for storage methods +that do not self-expire. Self-expiring backends (CNFS) still go +through C because cyclic-buffer wrap-around bypasses +the tombstone. The tombstone is loaded lazily on first use per +connection and refreshed by C on each call; the parsed +hashset is rebuilt only when the file's mtime or size changes. +Statting the same path is dentry-cache resident and far cheaper +than the per-article syscalls the fast path elides, so cancellations +recorded by other processes become visible to long-lived +connections on the next existence check. + +Each B connection holds an independent copy of the parsed +hashset, costing roughly 50 bytes per cancel; sites with very large +cancel volumes between B runs and many concurrent readers +should size memory accordingly. Loss relative to the unconditional +C path is bounded to out-of-band events the tombstone +cannot see (manual filesystem deletes that bypass B, +filesystem corruption); admin-initiated B and B +cancels are tracked. + +Important: on non-self-expiring backends the fast path treats a +tombstone-miss as proof that the article is still on disk, without +verifying. Admins must remove articles via B (which records +the cancel in the tombstone), not by direct C on the spool, or +readers will be told a deleted article still exists until the next +B reconciles overview. + +Note that the fast path is also gated on I; both +must be true to take effect. The B C +counter reflects only the slow C path, so the syslog +field will appear smaller when the fast path is doing most of the +work. You may also want to see the I parameter in readers.conf(5) which controls the computing of the estimated article count returned in NNTP diff --git a/doc/pod/sm.pod b/doc/pod/sm.pod index e901bae4f..30eefb8ae 100644 --- a/doc/pod/sm.pod +++ b/doc/pod/sm.pod @@ -51,6 +51,13 @@ will delete the article out of the news spool and it will not subsequently be retrievable by any part of INN. It's equivalent to C except it takes a storage API token instead of a message-ID. +When the I setting in F is true, B +also appends the cancelled token to F/cancels.tombstone> +so a later B run can drop the corresponding history entry +without a per-article storage check. Append failures are logged +but do not affect the cancellation itself. See inn.conf(5) under +I for the full mechanism. + =item B<-H> Retrieve only the headers of the article rather than the entire article. diff --git a/expire/expire.c b/expire/expire.c index 702415200..9b6c1eccc 100644 --- a/expire/expire.c +++ b/expire/expire.c @@ -6,10 +6,13 @@ #include #include +#include #include #include #include +#include +#include "inn/hashtab.h" #include "inn/history.h" #include "inn/innconf.h" #include "inn/inndcomm.h" @@ -18,6 +21,7 @@ #include "inn/newsuser.h" #include "inn/paths.h" #include "inn/storage.h" +#include "inn/tombstone.h" typedef struct _EXPIRECLASS { @@ -48,6 +52,7 @@ static char *EXPgraph; static int EXPverbose; static long EXPprocessed; static long EXPunlinked; +static long EXPtombstoned; static long EXPallgone; static long EXPstillhere; static struct history *History; @@ -370,23 +375,208 @@ EXPremove(const TOKEN *token) warn("cannot unlink %s", TokenToText(*token)); } +/* +** Load both tombstone logs into a hashset: +** - ${pathdb}/expireover.tombstone : written by expireover/expirerm +** (atomic .NEW -> final rename); unlinked after a successful +** expire run +** - ${pathdb}/cancels.tombstone : appended continuously by innd +** and sm for cancels outside the expireover pipeline; renamed +** to .processing at load time and unlinked after a successful +** expire run (rename-and-process avoids the read/truncate race +** that drops cancels arriving during HISexpire) +** +** At startup, also recovers any leftover .processing snapshot from a +** previous expire run that crashed before unlinking. +** +** consuming=true means this is a real run that will replace the +** history file; we rename cancels.tombstone to .processing under +** the consumer's lock so concurrent appenders cannot lose cancels +** written between our read and the eventual unlink. consuming= +** false (dry-run, tracing, or alternate-output expire) skips the +** rename and reads the live file directly: no consume happens, so +** there is nothing to atomically detach. This avoids both the +** TOCTOU race in restoring the snapshot and the operator surprise +** of cancels.tombstone disappearing for the duration of a -x run. +** +** Returns NULL if no tombstone could be loaded (no file present or +** all reads failed); that is not an error. Expire just falls back +** to the per-article SMretrieve check. *out_expireover_path and +** *out_cancels_snapshot are set to the file paths (caller frees and +** unlinks after a successful run); both are NULL when consuming is +** false. +*/ +static struct hash * +EXPloadtombstone(bool consuming, char **out_expireover_path, + char **out_cancels_snapshot) +{ + char *expireover_path = NULL; + char *cancels_path = NULL; + char *cancels_snapshot = NULL; + char *leftover = NULL; + struct hash *h = NULL; + struct stat sb; + unsigned long n_expireover = 0; + unsigned long n_cancels = 0; + unsigned long n_leftover = 0; + bool expireover_present = false; + bool leftover_present = false; + bool cancels_present = false; + + /* Default outputs to NULL; only set on the success-return path + below. This avoids double-frees if a future maintainer adds an + early return between here and that single point. */ + *out_expireover_path = NULL; + *out_cancels_snapshot = NULL; + + expireover_path = concatpath(innconf->pathdb, "expireover.tombstone"); + cancels_path = concatpath(innconf->pathdb, "cancels.tombstone"); + leftover = concat(cancels_path, ".processing", (char *) 0); + + /* Probe each file's presence (independent of content) so we can + distinguish "tombstone subsystem is actively tracking but had + no cancels this cycle" (trust the empty hashset, skip + SMretrieve for everything) from "no files at all" (subsystem + not in use, fall back to slow path). Without this, a cycle + with zero cancels would needlessly run SMretrieve on every + history entry. */ + if (stat(expireover_path, &sb) == 0) + expireover_present = true; + if (stat(leftover, &sb) == 0) + leftover_present = true; + if (stat(cancels_path, &sb) == 0) + cancels_present = true; + + /* Initial size: tokens are typically ~38 bytes per line; we let the + hash expand if we underestimate. 4096 covers the common case. */ + h = tombstone_hash_create(4096); + + /* Recover a leftover .processing from a previous run that crashed + between rename and unlink. */ + n_leftover = tombstone_read(h, leftover, NULL); + + /* On a real run, atomically snapshot the live cancels.tombstone, + then read it. On dry-run / tracing / alt-output, just read + the live file directly: no consume happens, so there is no + need to detach it (and detach-then-restore has a TOCTOU race + with concurrent appenders). */ + if (consuming) { + cancels_snapshot = tombstone_rename_for_processing(cancels_path); + if (cancels_snapshot != NULL) + n_cancels = tombstone_read(h, cancels_snapshot, NULL); + } else { + n_cancels = tombstone_read(h, cancels_path, NULL); + } + + /* Read the expireover-side log. */ + n_expireover = tombstone_read(h, expireover_path, NULL); + + if (EXPverbose) + printf("Loaded %lu + %lu + %lu tombstone entries (%lu unique)\n", + n_expireover, n_cancels, n_leftover, hash_count(h)); + + /* Reconcile the leftover with the active snapshot. Only relevant + on a real consuming run; dry-run leaves the leftover where it + is for the next consuming run to pick up. + (a) leftover empty: just free the path string + (b) leftover non-empty + no live snapshot: promote leftover to + be the snapshot so the caller's single unlink covers it + (c) leftover non-empty + live snapshot: unlink the leftover now + (its contents are already in the hashset); the snapshot + remains and is unlinked on success */ + if (consuming) { + if (n_leftover > 0 && cancels_snapshot == NULL) { + cancels_snapshot = leftover; + leftover = NULL; + } else if (n_leftover > 0) { + if (unlink(leftover) < 0) + syswarn("can't unlink %s", leftover); + } else if (leftover_present && cancels_snapshot == NULL) { + /* Leftover existed but was empty. Treat it as the + snapshot so the caller cleans it up after a + successful run. */ + cancels_snapshot = leftover; + leftover = NULL; + } else if (leftover_present) { + /* Both leftover and live snapshot existed; leftover + empty. Unlink it now; snapshot will be cleaned up + on success. */ + if (unlink(leftover) < 0) + syswarn("can't unlink %s", leftover); + } + } + free(leftover); + leftover = NULL; + + /* Decide between "fall back to slow path" (return NULL) and + "trust the (possibly empty) tombstone". A file that exists + (regardless of content) is taken as evidence the tombstone + subsystem is active for this site: innd/sm/expireover are + writing it. An empty active tombstone correctly says "nothing + was cancelled in the last cycle"; trust it. In dry-run mode + cancels.tombstone is not renamed, so cancels_snapshot stays + NULL; use the cancels_present probe instead. */ + if (!expireover_present && !leftover_present && !cancels_present + && cancels_snapshot == NULL) { + hash_free(h); + free(expireover_path); + free(cancels_path); + return NULL; + } + + /* Single success-return: hand both paths to the caller for + cleanup after a successful run. */ + free(cancels_path); + *out_expireover_path = expireover_path; + *out_cancels_snapshot = cancels_snapshot; + return h; +} + + /* ** Do the work of expiring one line. ** Returns true when the article should be kept for the time being. */ static bool -EXPdoline(void *cookie UNUSED, time_t arrived, time_t posted, time_t expires, +EXPdoline(void *cookie, time_t arrived, time_t posted, time_t expires, TOKEN *token) { + struct hash *tombstone = (struct hash *) cookie; time_t when; bool HasSelfexpire = false; bool Selfexpired = false; + bool selfexpiring; ARTHANDLE *article; enum KR kr; bool r; - if (innconf->groupbaseexpiry || SMprobe(SELFEXPIRE, token, NULL)) { - if ((article = SMretrieve(*token, RETR_STAT)) == (ARTHANDLE *) NULL) { + /* Tombstone fast path: if expireover already cancelled this article, + drop the history entry without doing any storage I/O. Bump + EXPunlinked too so the news.daily summary's "Articles dropped" + count matches the slow path's accounting (EXPremove also bumps + it). */ + if (tombstone != NULL && hash_lookup(tombstone, token) != NULL) { + EXPprocessed++; + if (EXPverbose > 3) + printf("%s (tombstoned by expireover)\n", TokenToText(*token)); + EXPallgone++; + EXPunlinked++; + EXPtombstoned++; + return false; + } + + selfexpiring = SMprobe(SELFEXPIRE, token, NULL); + + if (innconf->groupbaseexpiry || selfexpiring) { + if (tombstone != NULL && !selfexpiring) { + /* Backend does not self-expire and the tombstone log is + complete: not in the log means the article still exists. + Skip the SMretrieve to avoid a per-article syscall (the + main speedup for tradspool / timehash / timecaf). */ + HasSelfexpire = true; + Selfexpired = false; + } else if ((article = SMretrieve(*token, RETR_STAT)) + == (ARTHANDLE *) NULL) { HasSelfexpire = true; Selfexpired = true; } else { @@ -470,6 +660,8 @@ CleanupAndExit(bool Server, bool Paused, int x) printf("Entries expired %8ld\n", EXPallgone); if (!innconf->groupbaseexpiry) printf("Articles dropped %8ld\n", EXPunlinked); + if (innconf->expiretombstone && innconf->groupbaseexpiry) + printf("Tombstone hits %8ld\n", EXPtombstoned); } /* Append statistics to a summary file */ @@ -536,6 +728,16 @@ main(int ac, char *av[]) if (!innconf_read(NULL)) exit(1); + /* Warn about expiretombstone-without-groupbaseexpiry: in that + configuration there is no consumable tombstone since OVEXPremove + (which writes one of the two logs) is never called. The + cancels.tombstone written by innd/sm could still be loaded, but + only with groupbaseexpiry to make the result coherent with + expire's overall semantics. */ + if (innconf->expiretombstone && !innconf->groupbaseexpiry) + notice("expiretombstone has no effect when groupbaseexpiry" + " is false"); + HistoryText = concatpath(innconf->pathdb, INN_PATH_HISTORY); umask(NEWSUMASK); @@ -694,9 +896,70 @@ main(int ac, char *av[]) CleanupAndExit(Server, false, 1); } - Bad = HISexpire(History, NHistory, EXPreason, Writing, NULL, EXPremember, - EXPdoline) - == false; + /* Consume the tombstone logs produced since the last expire run. + Lets EXPdoline drop history entries for articles cancelled by + expireover, expirerm, innd, or sm without per-article SMretrieve + calls. Gated on groupbaseexpiry as well: if the admin flipped + that off since the last expireover, a stale tombstone written + under the old config could drop history entries for articles + still alive. */ + { + bool consuming = Writing && !EXPtracing && NHistory == NULL; + char *expireover_path = NULL; + char *cancels_snapshot = NULL; + struct hash *tombstone = NULL; + + if (innconf->expiretombstone && innconf->groupbaseexpiry) + tombstone = EXPloadtombstone(consuming, &expireover_path, + &cancels_snapshot); + + Bad = HISexpire(History, NHistory, EXPreason, Writing, tombstone, + EXPremember, EXPdoline) + == false; + + if (tombstone != NULL) + hash_free(tombstone); + + /* On a successful real run, unlink both consumed snapshots + and seed header-only successors. Dry-run / tracing / + alt-output skipped the rename in the loader so there are + no snapshot paths to clean up. */ + if (!Bad && consuming) { + if (expireover_path != NULL) { + if (unlink(expireover_path) < 0 && errno != ENOENT) + syswarn("can't unlink %s", expireover_path); + } + if (cancels_snapshot != NULL) { + if (unlink(cancels_snapshot) < 0 && errno != ENOENT) + syswarn("can't unlink %s", cancels_snapshot); + } + + /* Leave header-only successors behind so both tombstone + files exist symmetrically in pathdb after every + successful expire. For cancels.tombstone this keeps + nnrpd's per-connection fast path active through quiet + inter-cycle periods. For expireover.tombstone it is + cosmetic but matches the file's documented presence; + expireover will overwrite the header-only file the + next time it runs. Runs every consuming cycle when + the feature is enabled, including the brand-new- + install case where EXPloadtombstone returned NULL and + there was no snapshot to consume. Idempotent when + the file already starts with the header. */ + if (innconf->expiretombstone && innconf->groupbaseexpiry) { + char *path; + + path = concatpath(innconf->pathdb, "cancels.tombstone"); + tombstone_ensure_header(path); + free(path); + path = concatpath(innconf->pathdb, "expireover.tombstone"); + tombstone_ensure_header(path); + free(path); + } + } + free(expireover_path); + free(cancels_snapshot); + } if (UnlinkFile && EXPunlinkfile == NULL) /* Got -z but file was closed; oops. */ diff --git a/expire/expireover.c b/expire/expireover.c index 0548cae53..40a3e089b 100644 --- a/expire/expireover.c +++ b/expire/expireover.c @@ -10,10 +10,15 @@ #include "portable/system.h" #include +#include #include +#include +#include #include #include +#include +#include "inn/buffer.h" #include "inn/innconf.h" #include "inn/libinn.h" #include "inn/messages.h" @@ -22,6 +27,12 @@ #include "inn/paths.h" #include "inn/qio.h" #include "inn/storage.h" +#include "inn/tombstone.h" + +/* OVtombstonefile is an internal of the storage library (declared in + * storage/ovinterface.h), but expireover owns its lifecycle, so we + * declare it here rather than exporting it via the public ov.h header. */ +extern FILE *OVtombstonefile; static const char usage[] = "\ Usage: expireover [-ekNpqs] [-f file] [-w offset] [-z rmfile] [-Z lowmarkfile]\n"; @@ -45,6 +56,94 @@ fatal_signal(int sig) } +/* +** Verify each line of an in-memory tombstone buffer via SMretrieve and +** append survivors to kept. An entry survives if SMretrieve confirms +** the article is gone (SMERR_NOENT) or returns a non-NOENT error +** (SMERR_UNINIT, EIO, etc., where we cannot tell whether the article +** is alive); a transient failure must not silently discard a valid +** cancel record. Live articles (SMretrieve returns non-NULL) are +** dropped: we must not re-tombstone an article that is still on disk. +*/ +static void +verify_tombstone_lines(char *raw, ssize_t got, struct buffer *kept) +{ + char *entry, *save_p; + + if (got <= 0) + return; + raw[got] = '\0'; + entry = strtok_r(raw, "\n", &save_p); + while (entry != NULL) { + size_t len = strlen(entry); + if (len > 0 && entry[len - 1] == '\r') + entry[--len] = '\0'; + if (len > 0 && IsToken(entry)) { + TOKEN t = TextToToken(entry); + ARTHANDLE *art = SMretrieve(t, RETR_STAT); + if (art != NULL) { + /* Article still on disk; drop from tombstone to + avoid orphaning history for it. */ + SMfreearticle(art); + } else { + /* Either confirmed gone (SMERR_NOENT) or transient + error (SMERR_UNINIT, EIO, etc.). Keep the entry + either way: confirmed-gone is the normal case, + and on transient error we cannot tell whether + the article is alive, so preserve the record so + the next run can re-evaluate. Silently dropping + on transient error would leak cancels across a + storage outage. */ + buffer_append(kept, entry, len); + buffer_append(kept, "\n", 1); + } + } + entry = strtok_r(NULL, "\n", &save_p); + } +} + + +/* +** Read an entire file into a freshly allocated buffer. Used for +** leftover-recovery of expireover.tombstone (from a cycle where +** expirerm finalized but expire never consumed) and the .NEW +** leftover from a crashed prior run. Returns NULL on missing or +** empty file (sets *out_size to 0); returns a malloc'd buffer with +** the file content otherwise (caller frees). +*/ +static char * +slurp_tombstone(const char *path, ssize_t *out_size) +{ + int fd; + struct stat sb; + char *raw; + ssize_t got = 0, n; + + *out_size = 0; + fd = open(path, O_RDONLY); + if (fd < 0) + return NULL; + if (fstat(fd, &sb) < 0 || sb.st_size <= 0) { + close(fd); + return NULL; + } + raw = xmalloc(sb.st_size + 1); + while (got < sb.st_size) { + n = read(fd, raw + got, sb.st_size - got); + if (n <= 0) + break; + got += n; + } + close(fd); + if (got <= 0) { + free(raw); + return NULL; + } + *out_size = got; + return raw; +} + + int main(int argc, char *argv[]) { @@ -56,9 +155,12 @@ main(int argc, char *argv[]) char *active_path = NULL; char *lowmark_path = NULL; char *path; + char *tombstone_path = NULL; + char *tombstone_path_new = NULL; FILE *lowmark = NULL; bool purge_deleted = false; bool always_stat = false; + bool tombstone_clean = true; struct history *history; /* First thing, set up logging and our identity. */ @@ -122,6 +224,14 @@ main(int argc, char *argv[]) if (!innconf_read(NULL)) exit(1); + /* Warn about expiretombstone-without-groupbaseexpiry: in that + configuration OVEXPremove is never called so no entries are + ever appended. The setting is silently ignored. Operators + reaching for the speedup deserve to know it's not active. */ + if (innconf->expiretombstone && !innconf->groupbaseexpiry) + notice("expiretombstone has no effect when groupbaseexpiry" + " is false"); + /* Change to the runasuser user and runasgroup group if necessary. */ ensure_news_user_grp(true, true); @@ -181,6 +291,169 @@ main(int argc, char *argv[]) if (!OVctl(OVSTATALL, &always_stat)) die("can't configure overview stat behavior"); + /* Open the tombstone log. OVEXPremove appends a line per article + it cancels (inline) or schedules for removal via the rm file + (delayrm). In delayrm mode the .NEW -> final rename is performed + by expirerm after fastrm succeeds (see expirerm.in); in inline + mode this process performs the rename below. Skipped when + groupbaseexpiry is false because OVEXPremove is not called in + that mode, and when the admin has disabled the feature via + expiretombstone. + + Concurrency model: open with O_RDWR|O_CREAT (no truncate), then + take an exclusive *non-blocking* fcntl POSIX lock via + inn_lock_file. Non-blocking is required for correctness, not + just performance: the truncate below would destroy the previous + holder's content if we ever waited for the lock. If we cannot + acquire the lock, another expireover is running; disable + tombstone writing for this run and proceed with normal + expiration. Manual concurrent invocations are protected + against tombstone corruption; news.daily's shlock prevents + them in normal flow. The lock is released by fclose at + finalize time. + + Leftover handling: two recovery paths, both verified per-token + with SMretrieve(RETR_STAT) before merging into the new .NEW. + (1) An existing .NEW file means a previous run crashed or + whose expirerm failed. (2) An existing final tombstone + (without a .NEW) means the previous cycle's expirerm promoted + it but expire never consumed. Naively truncating .NEW or + overwriting the final would lose cancels that genuinely + happened, while preserving them blindly would risk dropping + history for articles still on disk (fastrm failures). Per- + token verification keeps confirmed-gone entries (SMERR_NOENT) + and entries with transient errors (SMERR_UNINIT, EIO, ...) so + the next run can re-evaluate them without silent loss; live + articles are dropped. Cost is one stat per leftover token, + only on the rare crash-recovery path. */ + if (innconf->expiretombstone && innconf->groupbaseexpiry) { + int fd; + + tombstone_path = concatpath(innconf->pathdb, "expireover.tombstone"); + tombstone_path_new = concat(tombstone_path, ".NEW", (char *) 0); + fd = open(tombstone_path_new, O_RDWR | O_CREAT, 0664); + if (fd < 0) { + syswarn("can't open tombstone log %s", tombstone_path_new); + } else if (!inn_lock_file(fd, INN_LOCK_WRITE, false)) { + /* Distinguish contention (another expireover holds it) + from other lock errors (NFS lockd outage, kernel + resource exhaustion, etc.). */ + if (errno == EAGAIN || errno == EACCES) { + warn("another expireover holds the tombstone log lock" + " on %s; disabling tombstone for this run", + tombstone_path_new); + } else { + syswarn("can't lock %s; disabling tombstone for this" + " run", + tombstone_path_new); + } + close(fd); + fd = -1; + } else { + /* Recover leftover content under the lock, filter through + SMretrieve, then truncate .NEW and rewrite. All under + the same fd/lock so no other writer can race. Two + sources to recover: + + (a) the existing .NEW file: residue from a prior + expireover run that crashed, or whose expirerm + never promoted .NEW to final. + (b) the existing final tombstone: residue from a + prior cycle where expirerm did promote .NEW to + final but expire never ran (or never finished) + so the entries were never consumed. Without + this recovery, this expireover's end-of-run + rename(.NEW, final) would silently overwrite + the unconsumed file, dropping its records. + + Both use verify_tombstone_lines, which preserves + entries on transient SMretrieve errors so a momentary + storage outage doesn't silently lose cancel records. */ + struct buffer kept; + struct stat sb; + char *raw; + ssize_t got = 0; + + kept.data = NULL; + kept.size = kept.used = kept.left = 0; + + /* Recover unconsumed final tombstone first. */ + raw = slurp_tombstone(tombstone_path, &got); + if (raw != NULL) { + verify_tombstone_lines(raw, got, &kept); + free(raw); + /* Unlink whether or not we extracted entries: the + active .NEW (after this run's rename) becomes the + new authoritative final. Preserving the old one + would risk a future expireover folding the same + entries in twice. */ + if (unlink(tombstone_path) < 0 && errno != ENOENT) + syswarn("can't unlink leftover %s", tombstone_path); + } + + /* Recover .NEW leftover. */ + if (fstat(fd, &sb) == 0 && sb.st_size > 0) { + raw = xmalloc(sb.st_size + 1); + got = 0; + while (got < sb.st_size) { + ssize_t n = read(fd, raw + got, sb.st_size - got); + if (n <= 0) + break; + got += n; + } + verify_tombstone_lines(raw, got, &kept); + free(raw); + } + if (ftruncate(fd, 0) < 0) { + syswarn("can't truncate %s", tombstone_path_new); + close(fd); + fd = -1; + } else if (lseek(fd, 0, SEEK_SET) < 0) { + syswarn("can't seek %s", tombstone_path_new); + close(fd); + fd = -1; + } else { + OVtombstonefile = fdopen(fd, "w"); + if (OVtombstonefile == NULL) { + syswarn("can't fdopen %s", tombstone_path_new); + close(fd); + fd = -1; + } else { + /* Format marker. Readers tolerate comment lines + (lines starting with #), so any future format + change can be detected by inspecting this + header without breaking earlier readers. */ + if (fputs(TOMBSTONE_HEADER, OVtombstonefile) + == EOF) + syswarn("can't write header to %s", + tombstone_path_new); + if (kept.left > 0) { + /* Replay verified leftover entries before + this run's new entries are appended by + OVEXPremove. (struct buffer convention: + appended data lives at data[used .. + used+left]; used==0 here so we start at + data.) */ + if (fwrite(kept.data + kept.used, 1, kept.left, + OVtombstonefile) + != kept.left + || fflush(OVtombstonefile) == EOF) + syswarn("can't write recovered leftover" + " to %s", + tombstone_path_new); + } + } + } + free(kept.data); + } + if (OVtombstonefile == NULL) { + free(tombstone_path); + free(tombstone_path_new); + tombstone_path = NULL; + tombstone_path_new = NULL; + } + } + /* We want to be careful about being interrupted from this point on, so set up our signal handlers. */ xsignal(SIGTERM, fatal_signal); @@ -220,5 +493,45 @@ main(int argc, char *argv[]) if (fclose(lowmark) == EOF) syswarn("can't close %s", lowmark_path); + /* Finalize the tombstone log. In inline-cancel mode, rename .NEW + to its final name iff we completed a full pass without errors -- + a partial log would let expire incorrectly drop history entries + for articles that were not actually cancelled this run. + In delayrm mode, leave the .NEW alone: expirerm performs the + rename after fastrm has actually deleted the articles. If + expirerm fails or is skipped, the .NEW is wiped by the next + expireover, so a partial log can never be consumed by expire. + No fsync: the log is advisory; if a crash loses the buffered + tail, expire just runs the slow path. */ + if (OVtombstonefile != NULL) { + if (fclose(OVtombstonefile) == EOF) { + syswarn("can't finalize tombstone log %s", tombstone_path_new); + tombstone_clean = false; + } + if (signalled) + tombstone_clean = false; + if (ovge.delayrm) { + /* Leave .NEW for expirerm. If we were signalled or hit + an error, unlink it so expirerm doesn't promote a + truncated log. */ + if (!tombstone_clean) { + if (unlink(tombstone_path_new) < 0) + syswarn("can't unlink %s", tombstone_path_new); + } + } else if (tombstone_clean) { + if (rename(tombstone_path_new, tombstone_path) < 0) { + syswarn("can't rename %s to %s", tombstone_path_new, + tombstone_path); + if (unlink(tombstone_path_new) < 0) + syswarn("can't unlink %s", tombstone_path_new); + } + } else { + if (unlink(tombstone_path_new) < 0) + syswarn("can't unlink %s", tombstone_path_new); + } + free(tombstone_path); + free(tombstone_path_new); + } + return 0; } diff --git a/expire/expirerm.in b/expire/expirerm.in index 9b48089ed..e26a38947 100644 --- a/expire/expirerm.in +++ b/expire/expirerm.in @@ -16,17 +16,38 @@ if [ -z "$1" ]; then | eval ${MAIL} exit 0 fi -if [ ! -f $1 ]; then +if [ ! -f "$1" ]; then echo "Expire called with no files to expire on $(hostname)" \ | eval ${MAIL} exit 0 fi eval "cd ${SPOOL} \ - && ${RMPROC} <$1 \ - && mv $1 ${MOST_LOGS}/expire.list" -if [ -f $1 ]; then + && ${RMPROC} <\"$1\" \ + && mv \"$1\" ${MOST_LOGS}/expire.list" +if [ -f "$1" ]; then echo "Expire had problems removing articles on $(hostname)" \ | eval ${MAIL} exit 1 fi + +## If expireover wrote a tombstone log for delayed-removal mode, the +## .NEW file holds the tokens it scheduled for cancellation. Now that +## fastrm has succeeded, promote it to its final name so the next +## expire run can consume it. +## +## A failed promotion is a degradation, not a hard failure: the +## articles are already gone from disk, so news.daily can continue. +## The next expire just falls back to its slow per-article +## SMretrieve check for whatever was supposed to be in this batch. +## Email the newsmaster but exit 0 so the surrounding pipeline keeps +## going. +TOMBSTONE_NEW="${PATHDB}/expireover.tombstone.NEW" +TOMBSTONE="${PATHDB}/expireover.tombstone" +if [ -f "${TOMBSTONE_NEW}" ]; then + mv "${TOMBSTONE_NEW}" "${TOMBSTONE}" || { + echo "Expire could not promote ${TOMBSTONE_NEW} on $(hostname);" \ + "next expire will run the slow path for this batch" \ + | eval ${MAIL} + } +fi diff --git a/expire/makehistory.c b/expire/makehistory.c index b520c8f1d..4e6317ddb 100644 --- a/expire/makehistory.c +++ b/expire/makehistory.c @@ -287,6 +287,15 @@ FlushOverTmpFile(void) arrived = (time_t) atoll(line); expires = (time_t) atoll(p); } + /* Validate before TextToToken: that function returns an + * all-zero TOKEN on malformed input rather than an error + * sentinel, so a bad line would silently insert a bogus + * overview entry. */ + if (!IsToken(q)) { + warn("sorted overview file %s has a malformed token at %d", + SortedTmpPath, count); + continue; + } token = TextToToken(q); if (OVadd(token, r, strlen(r), arrived, expires) == OVADDFAILED) { if (OVctl(OVSPACE, (void *) &f) diff --git a/frontends/sm.c b/frontends/sm.c index 761109f15..c3e183a7e 100644 --- a/frontends/sm.c +++ b/frontends/sm.c @@ -10,6 +10,8 @@ #include "inn/innconf.h" #include "inn/libinn.h" #include "inn/messages.h" +#include "inn/newsuser.h" +#include "inn/paths.h" #include "inn/qio.h" #include "inn/storage.h" #include "inn/wire.h" @@ -259,6 +261,10 @@ process_token(const char *id, const struct options *options) warn("could not remove %s: %s", id, SMerrorstr); return false; } + /* Record the cancel so a later expire run can drop the history + * entry without an SMretrieve. Best-effort; no-op when + * expiretombstone is disabled. */ + SMcanceltombstone(token); } else { article = SMretrieve(token, options->header ? RETR_HEAD : RETR_ALL); if (article == NULL) { @@ -360,6 +366,17 @@ main(int argc, char *argv[]) if (!SMsetup(SM_RDWR, &value)) die("cannot set up storage manager"); } + + /* On the -r/-d path with the cancel tombstone enabled, drop to + * the news user/group so cancels.tombstone is created (or + * appended) with ownership consistent with innd's writes. + * Bypassed under the INN_TESTSUITE env var so the test harness + * can run sm without a real news user on the system. All other + * sm modes (-i, -c, -H, -R, -s, retrieval) preserve the + * historical behaviour of running as the invoking user. */ + if (options.delete && innconf->expiretombstone + && getenv(INN_ENV_TESTSUITE) == NULL) + ensure_news_user_grp(true, true); if (!SMinit()) die("cannot initialize storage manager: %s", SMerrorstr); diff --git a/include/inn/innconf.h b/include/inn/innconf.h index f1add2628..fa2d70f48 100644 --- a/include/inn/innconf.h +++ b/include/inn/innconf.h @@ -76,6 +76,9 @@ struct innconf { OVERVIEW.FMT */ struct vector *extraoverviewhidden; /* Extra overview fields silently generated */ + bool expiretombstone; /* Use expireover -> expire tombstone log + to skip per-article SMretrieve in + expire? */ bool groupbaseexpiry; /* Do expiry by newsgroup? */ bool mergetogroups; /* Refile articles from to.* into to */ bool nfswriter; /* Use NFS writer functionality */ diff --git a/include/inn/ov.h b/include/inn/ov.h index 2f2259276..4afd6032a 100644 --- a/include/inn/ov.h +++ b/include/inn/ov.h @@ -51,6 +51,7 @@ typedef struct _OVGE { } OVGE; extern bool OVstatall; + bool OVopen(int mode); bool OVgroupstats(char *group, int *lo, int *hi, int *count, int *flag); bool OVgroupadd(char *group, ARTNUM lo, ARTNUM hi, char *flag); diff --git a/include/inn/storage.h b/include/inn/storage.h index 4f1e8b415..07656c29b 100644 --- a/include/inn/storage.h +++ b/include/inn/storage.h @@ -106,6 +106,28 @@ ARTHANDLE *SMretrieve(const TOKEN token, const RETRTYPE amount); ARTHANDLE *SMnext(ARTHANDLE *article, const RETRTYPE amount); void SMfreearticle(ARTHANDLE *article); bool SMcancel(TOKEN token); + +/* + * Best-effort: append a token to the out-of-band cancel tombstone log + * (${pathdb}/cancels.tombstone) so a later expire run can drop the + * matching history entry without an SMretrieve(RETR_STAT). Intended + * for callers that cancel articles outside the expireover/expirerm + * pipeline (innd's ARTcancel and sm -r). No-op when + * innconf->expiretombstone is false or token has type TOKEN_EMPTY. + * + * Concurrency: appenders take a non-blocking shared (F_RDLCK) fcntl + * lock; expire's consumer takes an exclusive lock briefly during + * snapshot/rename. POSIX O_APPEND atomicity for sub-PIPE_BUF + * writes keeps single-line tokens from interleaving across + * concurrent appenders. + * + * Returns true on success (token appended, flushed, file closed), + * false on any failure path. Failures are logged via syswarn; + * callers can ignore the return value (the cancel itself has + * already succeeded by the time this is called). + */ +bool SMcanceltombstone(TOKEN token); + bool SMprobe(PROBETYPE type, TOKEN *token, void *value); bool SMflushcacheddata(FLUSHTYPE type); void SMprintfiles(FILE *file, TOKEN token, char **xref, int ngroups); diff --git a/include/inn/tombstone.h b/include/inn/tombstone.h new file mode 100644 index 000000000..7efb00323 --- /dev/null +++ b/include/inn/tombstone.h @@ -0,0 +1,100 @@ +/* +** Helpers for the expire-tombstone log files. +** +** These routines are shared between the expire binary (which consumes +** the logs to skip per-article SMretrieve(RETR_STAT) calls) and the +** test suite that exercises the consumption logic directly. +** +** See doc/pod/inn.conf.pod under "expiretombstone" for the file +** semantics: +** - ${pathdb}/expireover.tombstone : written by expireover/expirerm +** via OVEXPremove, atomic .NEW -> final rename +** - ${pathdb}/cancels.tombstone : appended continuously by innd +** and sm via SMcanceltombstone() for cancels outside the +** expireover/expirerm pipeline +*/ + +#ifndef INN_TOMBSTONE_H +#define INN_TOMBSTONE_H + +#include "inn/hashtab.h" +#include "inn/portable-macros.h" +#include "inn/portable-stdbool.h" +#include "inn/storage.h" + +/* +** Format marker written at the top of every tombstone file. +** Readers skip leading comment lines silently so the marker can +** evolve without breaking earlier consumers. +*/ +#define TOMBSTONE_HEADER "# inn-tombstone v1\n" + +BEGIN_DECLS + +/* +** Create a hashset suited to holding TOKEN keys (each entry is a +** TOKEN allocated by the caller; the hashset owns and frees them). +** size is the initial bucket hint; the hash auto-expands. +*/ +struct hash *tombstone_hash_create(size_t size); + +/* +** Read tombstone entries from path into the hashset. Each non-blank +** line is parsed with TextToToken and inserted; malformed lines are +** skipped with a warning. Duplicates are dropped silently. Returns +** the number of valid entries seen, or 0 if the file does not exist +** (ENOENT is silent; other open errors emit syswarn). +** +** If out_error is non-NULL, *out_error is set to true when the read +** was incomplete (open failed for a non-ENOENT reason or fgets hit a +** stream error mid-file) and to false otherwise. Callers that cache +** freshness based on file mtime should not treat a partial read as +** authoritative. ENOENT does not set the flag because an absent +** file is a successful "no entries" read. +*/ +unsigned long tombstone_read(struct hash *h, const char *path, + bool *out_error); + +/* +** Atomically snapshot a continuously-appended tombstone file by +** renaming it to "${path}.processing" under an exclusive POSIX lock. +** This serializes against concurrent appenders that hold the same +** lock (e.g., SMcanceltombstone()), so any in-progress write either +** completes before the rename or proceeds against the renamed inode. +** +** Returns the snapshot path (caller must free and unlink after +** consumption) or NULL if the source file does not exist or cannot +** be renamed. +*/ +char *tombstone_rename_for_processing(const char *path); + +/* +** Look up a TOKEN in the hashset. Returns true if present. +*/ +bool tombstone_present(struct hash *h, const TOKEN *token); + +/* +** Ensure path exists and starts with TOMBSTONE_HEADER under an +** exclusive POSIX lock. Idempotent: a file whose first bytes +** already match TOMBSTONE_HEADER is left untouched. Otherwise the +** helper shifts any existing content right by sizeof(TOMBSTONE_ +** HEADER)-1 bytes via a chunked end-to-start pread/pwrite walk on +** the same fd, then writes the header at offset 0. Working +** memory is bounded by an internal chunk size regardless of file +** size, so a pathological never-consumed file cannot OOM. +** +** An empty file (or one freshly created by this call) just gets +** the header written at offset 0. An appender that raced a +** cancel into a new live file between the consumer's unlink and +** this call has its content preserved verbatim below the header. +** Best-effort: failures log via syswarn but do not raise. +** +** Called by expire after consuming cancels.tombstone.processing +** to leave a header-only file behind so the nnrpd fast path +** remains active through quiet inter-cancel periods. +*/ +void tombstone_ensure_header(const char *path); + +END_DECLS + +#endif /* INN_TOMBSTONE_H */ diff --git a/innd/art.c b/innd/art.c index 04393c7f3..616a938ed 100644 --- a/innd/art.c +++ b/innd/art.c @@ -1280,9 +1280,15 @@ ARTcancel(const ARTDATA *data, const char *MessageID, const bool Trusted) /* Get stored message and zap them. */ if (innconf->enableoverview) OVcancel(token); - if (!SMcancel(token) && SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) + if (SMcancel(token) || SMerrno == SMERR_NOENT) { + /* Record the out-of-band cancel so a later expire run can drop + * the history entry without an SMretrieve(RETR_STAT). Best- + * effort and a no-op when expiretombstone is disabled. */ + SMcanceltombstone(token); + } else if (SMerrno != SMERR_UNINIT) { syslog(L_ERROR, "%s cant cancel %s (SMerrno %d)", LogName, TokenToText(token), SMerrno); + } if (innconf->immediatecancel && !SMflushcacheddata(SM_CANCELLEDART)) syslog(L_ERROR, "%s cant cancel cached %s", LogName, TokenToText(token)); diff --git a/lib/innconf.c b/lib/innconf.c index 145e66b5d..6e0d34751 100644 --- a/lib/innconf.c +++ b/lib/innconf.c @@ -71,6 +71,7 @@ static const struct config config_table[] = { {K(enableoverview), BOOL(true) }, {K(extraoverviewadvertised), LIST(NULL) }, {K(extraoverviewhidden), LIST(NULL) }, + {K(expiretombstone), BOOL(false) }, {K(fromhost), STRING(NULL) }, {K(groupbaseexpiry), BOOL(true) }, {K(mailcmd), STRING(NULL) }, diff --git a/nnrpd/article.c b/nnrpd/article.c index 1c16bd003..7576509aa 100644 --- a/nnrpd/article.c +++ b/nnrpd/article.c @@ -7,13 +7,16 @@ #include #include #include +#include #include #include "cache.h" #include "inn/innconf.h" +#include "inn/libinn.h" #include "inn/messages.h" #include "inn/ov.h" #include "inn/overview.h" +#include "inn/tombstone.h" #include "inn/wire.h" #include "nnrpd.h" #include "tls.h" @@ -260,17 +263,131 @@ ARTclose(void) } } +/* +** Cancel-tombstone fast path for ARTinstorebytoken. When both +** innconf->expiretombstone and PERMaccessconf->nnrpdcheckart are true, +** consult ${pathdb}/cancels.tombstone before falling through to the +** per-article SMretrieve syscall. The tombstone records cancels +** written by innd's ARTcancel and by sm -r, the out-of-band paths +** that can leave overview entries pointing at gone storage in the +** brief race window before overview cleanup propagates, or until the +** next expireover for sm -r. +** +** Lazy per-connection: loaded on first call, refreshed on mtime +** change. Memory cost is ~50 bytes per cancel; typical sites have +** hundreds of entries. Falls through to SMretrieve on load failure. +*/ +static struct hash *nnrpd_tombstone = NULL; +static char *nnrpd_tombstone_path = NULL; +static time_t nnrpd_tombstone_mtime = 0; +static off_t nnrpd_tombstone_size = 0; + +/* +** Bring the per-connection tombstone cache up to date. Returns true +** if a usable hashset is loaded after the call (caller may then +** consult tombstone_present), false otherwise (file missing or load +** failed; caller should fall through to the slow path). +** +** Stat'd on every call so a cancel recorded by another process +** becomes visible to readers immediately. The cost is one stat() +** on a fixed path that stays hot in the dentry cache, which is +** trivially cheap compared to the per-article SMretrieve syscalls +** the fast path elides. Reload only happens when mtime or size +** changes, so the read+parse cost is paid at most once per cancel. +** +** Freshness is keyed on both mtime and size: mtime alone has 1- +** second granularity, so a rename-and-recreate within the same +** second can land a fresh inode with the cached mtime; size is +** monotonic-append between rotations and so changes whenever a +** new line is added. Together they catch every modification. +** +** If tombstone_read reports a partial read (mid-file ferror), the +** cached mtime/size are not updated, so the next call retries the +** read instead of trusting a partial hashset. +*/ +static bool +nnrpd_tombstone_refresh(void) +{ + /* Initial hash bucket count for the per-connection tombstone cache. + * Sized for the typical case (hundreds of cancels per cycle); the + * hashset auto-expands when the load factor is exceeded. */ + static const size_t initial_buckets = 256; + struct stat sb; + bool read_error = false; + + if (nnrpd_tombstone_path == NULL) + nnrpd_tombstone_path = + concatpath(innconf->pathdb, "cancels.tombstone"); + + /* If the file has gone away since our last load, drop our cache + * and fall through to the slow path. */ + if (stat(nnrpd_tombstone_path, &sb) < 0) { + if (nnrpd_tombstone != NULL) { + hash_free(nnrpd_tombstone); + nnrpd_tombstone = NULL; + nnrpd_tombstone_mtime = 0; + nnrpd_tombstone_size = 0; + } + return false; + } + + /* Fresh enough? */ + if (nnrpd_tombstone != NULL && sb.st_mtime == nnrpd_tombstone_mtime + && sb.st_size == nnrpd_tombstone_size) + return true; + + /* (Re)load. */ + if (nnrpd_tombstone != NULL) + hash_free(nnrpd_tombstone); + nnrpd_tombstone = tombstone_hash_create(initial_buckets); + tombstone_read(nnrpd_tombstone, nnrpd_tombstone_path, &read_error); + if (read_error) { + /* Partial parse; do not advance the freshness key so the + * next call retries. The hashset still holds whatever lines + * we did parse, which is not wrong (those are real cancels), + * just incomplete. */ + return true; + } + nnrpd_tombstone_mtime = sb.st_mtime; + nnrpd_tombstone_size = sb.st_size; + return true; +} + + bool ARTinstorebytoken(TOKEN token) { ARTHANDLE *art; struct timeval stv, etv; - if (PERMaccessconf->nnrpdoverstats) { - gettimeofday(&stv, NULL); + /* Fast path: if both expiretombstone and nnrpdcheckart are + * enabled and the cache loads, the cancels.tombstone log + * records out-of-band cancels (innd ARTcancel, sm -r) that may + * not yet be reflected in overview. Tombstone hit means the + * article is gone; tombstone miss on a non-self-expiring + * backend means it is alive and we can skip the SMretrieve + * syscall. Self-expiring backends (CNFS) still need the slow + * path because wrap-around bypasses SMcancel and the tombstone. + * + * The nnrpdcheckart gate matches the documented contract: + * tombstone-driven existence checks are an alternative + * implementation of nnrpdcheckart's existence verification, so + * we should not run them when the operator has explicitly + * disabled article-existence checking. + * + * SMprobe(SELFEXPIRE) is checked first so pure-CNFS sites pay + * only that probe (a static per-method attribute, no I/O) and + * skip both the hash lookup and the cache refresh. */ + if (innconf->expiretombstone && PERMaccessconf->nnrpdcheckart + && !SMprobe(SELFEXPIRE, &token, NULL) + && nnrpd_tombstone_refresh()) { + return !tombstone_present(nnrpd_tombstone, &token); } - art = SMretrieve(token, RETR_STAT); + /* XXX This isn't really overstats, is it? */ + if (PERMaccessconf->nnrpdoverstats) + gettimeofday(&stv, NULL); + art = SMretrieve(token, RETR_STAT); if (PERMaccessconf->nnrpdoverstats) { gettimeofday(&etv, NULL); OVERartcheck += (etv.tv_sec - stv.tv_sec) * 1000; diff --git a/samples/inn.conf.in b/samples/inn.conf.in index 2e761ff67..5d95b0475 100644 --- a/samples/inn.conf.in +++ b/samples/inn.conf.in @@ -66,6 +66,7 @@ cnfscheckfudgesize: 0 enableoverview: true extraoverviewadvertised: [ ] extraoverviewhidden: [ ] +expiretombstone: false groupbaseexpiry: true mergetogroups: false nfswriter: false diff --git a/storage/Makefile b/storage/Makefile index fa53c0007..0e08b62e1 100644 --- a/storage/Makefile +++ b/storage/Makefile @@ -12,7 +12,7 @@ top = .. CFLAGS = $(GCFLAGS) -I. $(BDB_CPPFLAGS) $(SQLITE3_CPPFLAGS) SOURCES = expire.c interface.c methods.c ov.c overdata.c overview.c \ - ovmethods.c $(METHOD_SOURCES) + ovmethods.c tombstone.c $(METHOD_SOURCES) OBJECTS = $(SOURCES:.c=.o) LOBJECTS = $(OBJECTS:.o=.lo) diff --git a/storage/expire.c b/storage/expire.c index 8836805c2..6434f8b33 100644 --- a/storage/expire.c +++ b/storage/expire.c @@ -14,6 +14,7 @@ #include "inn/innconf.h" #include "inn/libinn.h" +#include "inn/messages.h" #include "inn/ov.h" #include "inn/overview.h" #include "inn/paths.h" @@ -100,6 +101,7 @@ static char *ACTIVE; This should be cleaned up with a better internal interface. */ time_t OVnow; FILE *EXPunlinkfile; +FILE *OVtombstonefile; bool OVignoreselfexpire; bool OVusepost; bool OVkeep; @@ -522,16 +524,53 @@ OVEXPremove(TOKEN token, bool deletedgroups, char **xref, int ngroups) } if (EXPunlinkfile && xref != NULL) { SMprintfiles(EXPunlinkfile, token, xref, ngroups); - if (!ferror(EXPunlinkfile)) + if (!ferror(EXPunlinkfile)) { + /* Deferred-removal path: actual SMcancel happens later via + * expirerm/fastrm. Record the token in the tombstone log + * now so the eventual expire run can consume it. The + * .NEW -> final rename is performed by expirerm after + * fastrm succeeds (see expirerm.in); if expirerm fails or + * is skipped, the .NEW file is wiped by the next + * expireover and never consumed, so a partial rmfile + * never lets expire drop history for live articles. */ + if (OVtombstonefile != NULL) { + fprintf(OVtombstonefile, "%s\n", TokenToText(token)); + if (ferror(OVtombstonefile)) { + syswarn("can't write tombstone log; ignoring it" + " for rest of run"); + fclose(OVtombstonefile); + OVtombstonefile = NULL; + } + } return; - fprintf(stderr, "Can't write to -z file, %s\n", strerror(errno)); - fprintf(stderr, "(Will ignore it for rest of run.)\n"); + } + syswarn("can't write to -z file; ignoring it for rest of run"); fclose(EXPunlinkfile); EXPunlinkfile = NULL; } - if (!SMcancel(token) && SMerrno != SMERR_NOENT && SMerrno != SMERR_UNINIT) - fprintf(stderr, "Can't unlink %s: %s\n", TokenToText(token), - SMerrorstr); + /* Inline-cancel path. Tombstone is written only on a successful + * SMcancel (or NOENT, where the article is already gone): the log + * must be a strict subset of articles actually removed. Otherwise + * a non-NOENT/UNINIT failure (EIO, EBUSY, fs permission, etc.) + * would let the next expire drop the history entry while the + * article remains on disk: an undetectable permanent orphan. + * The inverse risk (cancelled but not logged on a crash between + * SMcancel and fprintf) leaves a history entry pointing at a + * missing article, which is recoverable by a periodic non-tombstone + * reconciliation run. */ + if (SMcancel(token) || SMerrno == SMERR_NOENT) { + if (OVtombstonefile != NULL) { + fprintf(OVtombstonefile, "%s\n", TokenToText(token)); + if (ferror(OVtombstonefile)) { + syswarn("can't write tombstone log; ignoring it for" + " rest of run"); + fclose(OVtombstonefile); + OVtombstonefile = NULL; + } + } + } else if (SMerrno != SMERR_UNINIT) { + warn("can't unlink %s: %s", TokenToText(token), SMerrorstr); + } } /* diff --git a/storage/interface.c b/storage/interface.c index a5835c454..75550617e 100644 --- a/storage/interface.c +++ b/storage/interface.c @@ -6,7 +6,9 @@ #include #include +#include #include +#include #include "conffile.h" #include "inn/innconf.h" @@ -817,6 +819,139 @@ SMcancel(TOKEN token) return storage_methods[typetoindex[token.type]].cancel(token); } + +/* +** Append a token to the out-of-band cancel tombstone log. Best-effort: +** failures are logged but do not affect the caller. Safe for concurrent +** use across processes via inn_lock_file (fcntl POSIX locks; NFS-correct). +** +** Used by callers (innd, sm) that cancel articles outside the +** expireover/expirerm pipeline. expire reads this log alongside the +** expireover.tombstone so it can drop history entries for articles +** cancelled out-of-band without per-article SMretrieve(RETR_STAT). +** +** No-op when innconf->expiretombstone is false. No-op for TOKEN_EMPTY. +** +** Locking model: shared (F_RDLCK) for appenders, exclusive (F_WRLCK) +** for the consumer's read+rename phase (see tombstone_rename_for_ +** processing). The lock type names "read" and "write" here describe +** reader-vs-truncator coordination, not file access pattern; both +** innd and sm write the file under the shared lock. POSIX guarantees +** that each write(2) to a regular file opened with O_APPEND is +** atomic with respect to other writers, so single-line tokens +** interleave correctly without per-write serialization. POSIX +** requires F_RDLCK only on a descriptor open for reading, so we +** open O_RDWR even though we only write here. +** +** Lock is acquired non-blocking. innd's main loop must never wait +** on filesystem locking (NFS lockd outages, expire's brief +** rename hold). If the lock is contested, we skip this tombstone +** write rather than block. Concurrent appenders do not contend +** because shared locks coexist; only expire's exclusive lock +** briefly excludes them, during the consumer's rename of cancels. +** tombstone to .processing and during the post-consume header +** re-seeding shift (tombstone_ensure_header). Both windows +** scale with the file size at the moment they fire; for a +** typical site (a few KiB of tokens) the combined hold is well +** under a millisecond and the EAGAIN retry below absorbs it. +*/ +bool +SMcanceltombstone(TOKEN token) +{ + char *path; + int fd; + FILE *fp; + const char *text; + bool ok = true; + + /* Both knobs are required: expire only consumes the log when + * groupbaseexpiry is also true, so writing without that gate + * would leak entries forever and bloat nnrpd's per-connection + * cache. */ + if (!innconf->expiretombstone || !innconf->groupbaseexpiry) + return true; + if (token.type == TOKEN_EMPTY) + return true; + + path = concatpath(innconf->pathdb, "cancels.tombstone"); + /* O_RDWR (not O_WRONLY) so we can take F_RDLCK below; POSIX + * mandates lock-mode-matches-fd-mode. */ + fd = open(path, O_RDWR | O_CREAT | O_APPEND, 0664); + if (fd < 0) { + syswarn("can't open %s for tombstone append", path); + free(path); + return false; + } + /* Shared, non-blocking. Multiple appenders coexist; only blocked + * by expire's exclusive consumer lock during its brief rename. + * + * Race window: between our open() and our lock(), expire may + * rename(cancels.tombstone, cancels.tombstone.processing), + * leaving us with a fd pointing at the renamed (doomed) inode. + * If we acquired the lock on that fd and wrote, our entry would + * be consumed only by expire's already-loaded snapshot -- and + * our writes appended after the rename are not in that snapshot, + * so they would be unlinked when expire finishes. + * + * Resolution: on EAGAIN/EACCES (lock held by expire's rename + * window), close and reopen the path. After expire releases + * its exclusive lock, the path resolves to a fresh inode (or + * nothing, in which case O_CREAT makes a new one). Either way, + * the second open lands on the live file the next consumer + * will see. A 1ms sleep is more than enough for expire to + * complete its rename and release. */ + if (!inn_lock_file(fd, INN_LOCK_READ, false)) { + if (errno == EAGAIN || errno == EACCES) { + struct timespec ts = {0, 1000000L}; /* 1ms */ + close(fd); + nanosleep(&ts, NULL); + fd = open(path, O_RDWR | O_CREAT | O_APPEND, 0664); + if (fd < 0) { + syswarn("can't reopen %s for tombstone append", path); + free(path); + return false; + } + if (!inn_lock_file(fd, INN_LOCK_READ, false)) { + syswarn("can't lock %s after retry; skipping" + " tombstone entry", + path); + close(fd); + free(path); + return false; + } + } else { + syswarn("can't lock %s; skipping tombstone entry", path); + close(fd); + free(path); + return false; + } + } + fp = fdopen(fd, "a"); + if (fp == NULL) { + syswarn("can't fdopen %s", path); + inn_lock_file(fd, INN_LOCK_UNLOCK, true); + close(fd); + free(path); + return false; + } + text = TokenToText(token); + /* Single fprintf + fflush keeps the kernel-visible write to one + * syscall (~38-byte payload) so the per-write O_APPEND atomicity + * guarantee holds for concurrent shared-lock appenders. */ + if (fprintf(fp, "%s\n", text) < 0 || fflush(fp) == EOF) { + syswarn("can't write to %s", path); + ok = false; + } + /* fclose closes the underlying fd, which releases the fcntl + * POSIX lock; no separate INN_LOCK_UNLOCK needed. */ + if (fclose(fp) == EOF) { + syswarn("can't close %s", path); + ok = false; + } + free(path); + return ok; +} + bool SMprobe(PROBETYPE type, TOKEN *token, void *value) { diff --git a/storage/ovinterface.h b/storage/ovinterface.h index 41ed2f16b..4e0321388 100644 --- a/storage/ovinterface.h +++ b/storage/ovinterface.h @@ -5,6 +5,8 @@ #ifndef OVINTERFACE_H #define OVINTERFACE_H +#include + #include "config.h" #include "inn/history.h" #include "inn/ov.h" @@ -41,6 +43,17 @@ void OVEXPcleanup(void); extern time_t OVnow; extern FILE *EXPunlinkfile; + +/* Tombstone log: when set, OVEXPremove appends the textual form of each + * successfully cancelled token to this stream. A subsequent expire run + * consumes the log to drop history entries for those articles without + * doing per-article SMretrieve(RETR_STAT) calls. Set by expireover (and + * by fastrm when called via expirerm in delayrm mode). Internal -- + * managed via direct extern access by expireover/fastrm, not via OVctl, + * because the file lifecycle (atomic .NEW -> final rename on success) + * does not fit the per-call OVctl pattern. */ +extern FILE *OVtombstonefile; + extern bool OVignoreselfexpire; extern bool OVusepost; extern bool OVkeep; diff --git a/storage/tombstone.c b/storage/tombstone.c new file mode 100644 index 000000000..168abc2e9 --- /dev/null +++ b/storage/tombstone.c @@ -0,0 +1,277 @@ +/* +** Helpers for the expire-tombstone log files. +** +** See include/inn/tombstone.h for the API and inn.conf.pod under +** "expiretombstone" for the file semantics. Shared between the +** expire binary and the test suite so both exercise identical +** parsing, hashing, and atomic-snapshot logic. +*/ + +#include "portable/system.h" + +#include +#include +#include +#include +#include + +#include "inn/hashtab.h" +#include "inn/libinn.h" +#include "inn/messages.h" +#include "inn/storage.h" +#include "inn/tombstone.h" + + +/* +** Hashset glue. Keys are 18-byte TOKEN structs (1 type + 1 class + +** 16 token bytes) hashed and compared as raw bytes. hash_lookup2 +** derives a stable unsigned long from the bytes; tombstone_equal +** uses memcmp for an exact match. +*/ + +static unsigned long +ts_hash(const void *p) +{ + return hash_lookup2((const char *) p, sizeof(TOKEN), 0); +} + +static const void * +ts_key(const void *p) +{ + return p; +} + +static bool +ts_equal(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(TOKEN)) == 0; +} + + +struct hash * +tombstone_hash_create(size_t size) +{ + return hash_create(size, ts_hash, ts_key, ts_equal, free); +} + + +unsigned long +tombstone_read(struct hash *h, const char *path, bool *out_error) +{ + FILE *f; + char line[SMBUF]; + size_t len; + unsigned long count = 0; + + if (out_error != NULL) + *out_error = false; + if (h == NULL || path == NULL) + return 0; + + f = fopen(path, "r"); + if (f == NULL) { + if (errno != ENOENT) { + syswarn("can't open %s; ignoring", path); + if (out_error != NULL) + *out_error = true; + } + return 0; + } + while (fgets(line, sizeof(line), f) != NULL) { + TOKEN t; + TOKEN *entry; + + len = strlen(line); + while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) + line[--len] = '\0'; + if (len == 0) + continue; + /* Comment lines (e.g., "# inn-tombstone v1") are reserved for + * future format markers and metadata. Readers ignore them + * silently so format evolution is graceful. */ + if (line[0] == '#') + continue; + if (!IsToken(line)) { + warn("malformed tombstone entry in %s: %s", path, line); + continue; + } + t = TextToToken(line); + entry = xmalloc(sizeof(TOKEN)); + *entry = t; + if (!hash_insert(h, entry, entry)) { + /* Already present (duplicate cancel), free the redundant + * allocation; the existing entry stays. */ + free(entry); + } + count++; + } + if (ferror(f)) { + syswarn("error reading %s; will use what was read", path); + if (out_error != NULL) + *out_error = true; + } + fclose(f); + return count; +} + + +char * +tombstone_rename_for_processing(const char *path) +{ + char *snapshot_path; + int fd; + + if (path == NULL) + return NULL; + + fd = open(path, O_RDWR); + if (fd < 0) { + if (errno != ENOENT) + syswarn("can't open %s", path); + return NULL; + } + if (!inn_lock_file(fd, INN_LOCK_WRITE, true)) { + syswarn("can't lock %s", path); + close(fd); + return NULL; + } + snapshot_path = concat(path, ".processing", (char *) 0); + if (rename(path, snapshot_path) < 0) { + syswarn("can't rename %s to %s", path, snapshot_path); + free(snapshot_path); + close(fd); + return NULL; + } + /* close releases the fcntl POSIX lock. */ + close(fd); + return snapshot_path; +} + + +bool +tombstone_present(struct hash *h, const TOKEN *token) +{ + if (h == NULL || token == NULL) + return false; + return hash_lookup(h, token) != NULL; +} + + +void +tombstone_ensure_header(const char *path) +{ + static const char header[] = TOMBSTONE_HEADER; + static const size_t header_len = sizeof(header) - 1; + /* Bounded working memory: shift the file right by header_len bytes + * in chunks read end-to-start. 64 KiB keeps us cache-warm without + * paying excessive syscall overhead. Memory is bounded regardless + * of file size, so a pathological never-consumed file cannot OOM + * the expire run. */ + static const size_t CHUNK = 64 * 1024; + int fd; + struct stat sb; + off_t size; + char *chunk_buf; + off_t pos; + + if (path == NULL) + return; + + /* No O_APPEND: we use explicit offsets via pread/pwrite, and on + * Linux O_APPEND would override pwrite's offset and force every + * write to EOF -- breaking the shift loop below. */ + fd = open(path, O_RDWR | O_CREAT, 0664); + if (fd < 0) { + syswarn("can't open %s for header restore", path); + return; + } + + /* Blocking exclusive lock. Appenders use shared locks, so we + * conflict; the operation runs for microseconds to milliseconds + * and only once per expire cycle, so blocking is fine. The + * serialization is what makes the read-modify-write safe + * against appenders. */ + if (!inn_lock_file(fd, INN_LOCK_WRITE, true)) { + syswarn("can't lock %s for header restore", path); + close(fd); + return; + } + + if (fstat(fd, &sb) < 0) { + syswarn("can't stat %s for header restore", path); + close(fd); + return; + } + size = sb.st_size; + + /* Idempotency fast path: if the file already starts with the + * header line, the rest is well-formed appended content and we + * have nothing to do. Without this, every call would prepend an + * additional header, growing the file by header_len each expire + * cycle on a site with zero cancels per cycle. */ + if (size >= (off_t) header_len) { + char prefix[sizeof(header)]; + + if (pread(fd, prefix, header_len, 0) == (ssize_t) header_len + && memcmp(prefix, header, header_len) == 0) { + close(fd); + return; + } + } + + /* Shift existing content right by header_len bytes, walking from + * end to start so we never read a region we have already + * overwritten. This works in place on the same fd; memory is + * bounded by CHUNK regardless of file size. A crash mid-shift + * leaves the file with some duplicated/partial lines, which + * tombstone_read tolerates (malformed lines warn and skip); no + * infinite escalation across crashes because the next run's + * idempotency check sees no header and shifts once more, + * producing a header-prefixed file the run after that leaves + * alone. */ + chunk_buf = xmalloc(CHUNK); + pos = size; + while (pos > 0) { + size_t this_chunk = (pos < (off_t) CHUNK) ? (size_t) pos : CHUNK; + off_t src = pos - this_chunk; + size_t io = 0; + ssize_t n; + + while (io < this_chunk) { + n = pread(fd, chunk_buf + io, this_chunk - io, src + io); + if (n <= 0) + break; + io += n; + } + if (io < this_chunk) { + syswarn("short read on %s during header restore", path); + free(chunk_buf); + close(fd); + return; + } + io = 0; + while (io < this_chunk) { + n = pwrite(fd, chunk_buf + io, this_chunk - io, + src + header_len + io); + if (n <= 0) { + syswarn("short write on %s during header restore", path); + free(chunk_buf); + close(fd); + return; + } + io += n; + } + pos = src; + } + free(chunk_buf); + + /* Header at offset 0. Single small pwrite; partial-write on a + * regular file is vanishingly rare but if it happens + * tombstone_read still treats the leading '#' bytes as a + * comment and skips them. */ + if (pwrite(fd, header, header_len, 0) != (ssize_t) header_len) + syswarn("can't write header to %s", path); + + /* close releases the fcntl POSIX lock. */ + close(fd); +} diff --git a/support/mkmanifest b/support/mkmanifest index c75f9a701..6788fd2c2 100755 --- a/support/mkmanifest +++ b/support/mkmanifest @@ -278,6 +278,8 @@ tests/runtests tests/authprogs/ident.t tests/clients/server-list tests/docs/pod.t +tests/expire/tombstone-hisexpire.t +tests/expire/tombstone.t tests/innd/artparse.t tests/innd/chan.t tests/lib/artnumber.t @@ -330,4 +332,5 @@ tests/overview/buffindexed.t tests/overview/tradindexed.t tests/overview/xref.t tests/perl/minimum-version.t +tests/storage/cancel-tombstone.t tests/util/innbind.t diff --git a/tests/Makefile b/tests/Makefile index fa35b1a37..a7835291b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -30,7 +30,9 @@ TESTS = authprogs/ident.t innd/artparse.t innd/chan.t lib/artnumber.t \ lib/setenv.t lib/snprintf.t lib/strlcat.t \ lib/strlcpy.t lib/tst.t lib/uwildmat.t lib/vector.t lib/wire.t \ lib/xwrite.t nnrpd/auth-ext.t overview/api.t overview/buffindexed.t \ - overview/tradindexed.t overview/xref.t util/innbind.t + overview/tradindexed.t overview/xref.t util/innbind.t \ + expire/tombstone.t expire/tombstone-hisexpire.t \ + storage/cancel-tombstone.t ## Extra stuff that needs to be built before tests can be run. @@ -306,6 +308,19 @@ overview/tradindexed.t: overview/tradindexed-t.o tap/basic.o $(STORAGEDEPS) overview/xref.t: overview/xref-t.o tap/basic.o $(STORAGEDEPS) $(LINKDEPS) overview/xref-t.o tap/basic.o $(STORAGELIBS) $(LIBS) +expire/tombstone.t: expire/tombstone-t.o tap/basic.o $(STORAGEDEPS) + $(LINKDEPS) expire/tombstone-t.o tap/basic.o $(STORAGELIBS) $(LIBS) + +expire/tombstone-hisexpire.t: expire/tombstone-hisexpire-t.o tap/basic.o \ + $(STORAGEDEPS) + $(LINKDEPS) expire/tombstone-hisexpire-t.o tap/basic.o \ + $(STORAGELIBS) $(LIBS) + +storage/cancel-tombstone.t: storage/cancel-tombstone-t.o tap/basic.o \ + $(STORAGEDEPS) + $(LINKDEPS) storage/cancel-tombstone-t.o tap/basic.o \ + $(STORAGELIBS) $(LIBS) + perl/minimum-version.t: perl/minimum-version.t.in $(FIXSCRIPT) $(FIX) -i perl/minimum-version.t.in diff --git a/tests/TESTS b/tests/TESTS index b995af703..07490565d 100644 --- a/tests/TESTS +++ b/tests/TESTS @@ -52,6 +52,10 @@ lib/xwrite nnrpd/auth-ext overview/api overview/buffindexed +expire/tombstone +expire/tombstone-e2e +expire/tombstone-hisexpire +storage/cancel-tombstone overview/overchan overview/tradindexed overview/xref diff --git a/tests/expire/tombstone-e2e.t b/tests/expire/tombstone-e2e.t new file mode 100755 index 000000000..057c121b0 --- /dev/null +++ b/tests/expire/tombstone-e2e.t @@ -0,0 +1,174 @@ +#! /bin/sh +# +# End-to-end test for the expire-tombstone feature. +# +# Exercises the integration between sm (writes to cancels.tombstone), +# the tombstone library helpers (read/snapshot/lookup), and the +# expiretombstone-related guards. Verifies: +# - sm -r appends to ${pathdb}/cancels.tombstone when expiretombstone=true +# - sm -r does NOT write when expiretombstone=false +# - The written entries round-trip through tombstone_read into a +# hashset that can answer membership queries +# - The atomic-snapshot rename moves cancels.tombstone aside under +# lock so a fresh file can capture writes that follow + +count=1 +printcount() { + echo "$1 $count $2" + count=$(expr $count + 1) +} + +# Find the right directory. +sm="../../frontends/sm" +dirs='../data data tests/data' +for dir in $dirs; do + if [ -r "$dir/articles/1" ]; then + cd $dir + break + fi +done +if [ ! -x "$sm" ]; then + echo "Could not find sm" >&2 + exit 1 +fi + +# Use a fresh dedicated pathdb so we do not interfere with the other +# storage tests (which use ./db) and so we can flip expiretombstone +# without touching the shared etc/inn.conf. +TMPDIR_E2E="tombstone-e2e.tmp" +rm -rf "$TMPDIR_E2E" spool tradspool.map +mkdir -p "$TMPDIR_E2E" spool + +# Build a tweaked inn.conf that enables expiretombstone and points +# pathdb at our temp dir. Other paths inherit defaults from the +# tests/data tree. +cat > "$TMPDIR_E2E/inn.conf" </dev/null 2>&1 +lines=$(wc -l < "$CANCELS" | sed -e 's/[ \t]//g') +if [ "$lines" = 2 ]; then + printcount "ok" +else + printcount "not ok" "expected 2 lines after invalid-token rm, got $lines" +fi + +# 7. All entries are valid tokens (round-trip parseable). Use sm -i to +# parse each line; sm -i succeeds only if the input is a valid token +# that resolves to a known article. After cancellation the article +# is gone, so sm -i exits non-zero with "Token not found", but it +# does NOT exit with "Bad token" -- which is the format-validity +# check we want. +all_format_valid=true +while read -r line; do + case "$line" in + '@'*'@') + # Format check: starts and ends with @ + ;; + *) + all_format_valid=false + ;; + esac +done < "$CANCELS" +if $all_format_valid; then + printcount "ok" +else + printcount "not ok" "tombstone contains malformed lines" +fi + +# 8. flock contract: after sm finishes, the lock is released and another +# writer (here, ourselves via sm) can acquire it. This is implicit +# in tests 2, 4, etc., but we explicitly verify by storing+removing +# a fourth article and confirming a clean append. +token4=$($sm -s < articles/4) +$sm -r "$token4" +lines=$(wc -l < "$CANCELS" | sed -e 's/[ \t]//g') +if [ "$lines" = 3 ] && grep -qF "$token4" "$CANCELS"; then + printcount "ok" +else + printcount "not ok" "fourth cancel: expected 3 lines, got $lines" +fi + +# Cleanup. +rm -rf "$TMPDIR_E2E" spool tradspool.map "$TMPDIR_E2E.bak" +exit 0 diff --git a/tests/expire/tombstone-hisexpire-t.c b/tests/expire/tombstone-hisexpire-t.c new file mode 100644 index 000000000..aa78b737b --- /dev/null +++ b/tests/expire/tombstone-hisexpire-t.c @@ -0,0 +1,298 @@ +/* Integration test: HISexpire callback path consults the tombstone hashset. + * + * Builds a temporary history file with HISopen/HISwrite, populates a + * tombstone hashset for some of the tokens, runs HISexpire with a + * callback that mirrors EXPdoline's decision tree -- including the + * SELFEXPIRE branch (gap G). Verifies the resulting new history file + * contains exactly the tokens that should survive. + * + * The callback distinguishes four token categories: + * N_KEPT not tombstoned, not self-expiring -> keep + * N_TOMBSTONED in tombstone (fast path) -> drop + * N_SELFEXP_GONE self-expiring, simulated NOENT -> drop (slow path) + * N_SELFEXP_ALIVE self-expiring, simulated alive -> keep + * + * The slow path mirrors EXPdoline's behaviour for backends where + * SMprobe(SELFEXPIRE) is true: even with a tombstone hashset present, + * we still consult the simulated SMretrieve because articles can vanish + * via wrap-around without going through SMcancel. */ + +#include "portable/system.h" + +#include +#include +#include + +#include "inn/hashtab.h" +#include "inn/history.h" +#include "inn/libinn.h" +#include "inn/messages.h" +#include "inn/storage.h" +#include "inn/tombstone.h" +#include "tap/basic.h" + + +#define N_KEPT 5 +#define N_TOMBSTONED 4 +#define N_SELFEXP_GONE 3 +#define N_SELFEXP_ALIVE 2 + +#define N_TOTAL (N_KEPT + N_TOMBSTONED + N_SELFEXP_GONE + N_SELFEXP_ALIVE) + + +/* Cookie threading two hashsets through HISexpire's callback: the + * tombstone (real EXPdoline checks this first) and a "gone" set used to + * simulate SMretrieve(RETR_STAT) == NULL for self-expiring backends. */ +struct expire_cookie { + struct hash *tombstone; + struct hash *gone; +}; + + +/* Mirror of EXPdoline's decision tree for tokens that may be tombstoned + * or self-expiring. Returns true to keep, false to drop. + * + * Tokens are tagged via their type byte: + * type=1 - non-self-expiring backend + * type=2 - self-expiring backend (CNFS-like) + */ +static bool +test_expire_cb(void *cookie, time_t arrived UNUSED, time_t posted UNUSED, + time_t expires UNUSED, TOKEN *token) +{ + struct expire_cookie *c = cookie; + + /* Fast path: tombstone hit -> drop, no further checks. */ + if (c->tombstone != NULL && tombstone_present(c->tombstone, token)) + return false; + + /* Self-expiring backend (gap G). Even with a tombstone in hand, we + * cannot trust "not in tombstone == alive" because wrap-around can + * silently delete articles. Simulate the SMretrieve(RETR_STAT) + * check via the `gone` hashset. */ + if (token->type == 2) { + if (tombstone_present(c->gone, token)) + return false; + return true; + } + + /* Non-self-expiring: trust the tombstone. Not in tombstone means + * still alive. */ + return true; +} + + +/* Build a synthetic token whose bytes encode n and whose type byte + * tags whether it is from a self-expiring backend. */ +static TOKEN +make_token(unsigned char type, unsigned long n) +{ + TOKEN t; + + memset(&t, 0, sizeof(t)); + t.type = type; + t.class = 0; + t.token[0] = (n >> 24) & 0xff; + t.token[1] = (n >> 16) & 0xff; + t.token[2] = (n >> 8) & 0xff; + t.token[3] = n & 0xff; + return t; +} + + +static char * +make_msgid(unsigned long n) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "", n); + return xstrdup(buf); +} + + +/* Count the number of non-empty lines in the history file (one per + * surviving entry). */ +static unsigned long +count_history_lines(const char *path) +{ + FILE *f; + char line[SMBUF]; + unsigned long n = 0; + + f = fopen(path, "r"); + if (f == NULL) + return 0; + while (fgets(line, sizeof(line), f) != NULL) { + if (line[0] != '\n' && line[0] != '\0') + n++; + } + fclose(f); + return n; +} + + +/* Helper: insert a TOKEN copy into a hashset. */ +static void +hash_add_token(struct hash *h, const TOKEN *token) +{ + TOKEN *p = xmalloc(sizeof(TOKEN)); + *p = *token; + if (!hash_insert(h, p, p)) + free(p); +} + + +int +main(void) +{ + struct history *h; + struct expire_cookie cookie; + char tmpdir[64]; + char histpath[128]; + char newhistpath[140]; + TOKEN tokens[N_TOTAL]; + unsigned long i; + unsigned long base; + bool expire_ok; + struct stat sb; + + test_init(9); + + strlcpy(tmpdir, "tombstone-hisexp-XXXXXX", sizeof(tmpdir)); + if (mkdtemp(tmpdir) == NULL) + sysbail("can't create temp directory"); + snprintf(histpath, sizeof(histpath), "%s/history", tmpdir); + + /* Layout of the tokens array: + [0 .. N_KEPT) non-selfexpire keep + [N_KEPT .. N_KEPT+N_TOMBSTONED) non-selfexpire tomb + [N_KEPT+N_TOMBSTONED .. + N_SELFEXP_GONE) selfexpire gone + [..rest] selfexpire alive + */ + base = 0; + for (i = 0; i < N_KEPT; i++) + tokens[base + i] = make_token(1, base + i); + base += N_KEPT; + for (i = 0; i < N_TOMBSTONED; i++) + tokens[base + i] = make_token(1, base + i); + base += N_TOMBSTONED; + for (i = 0; i < N_SELFEXP_GONE; i++) + tokens[base + i] = make_token(2, base + i); + base += N_SELFEXP_GONE; + for (i = 0; i < N_SELFEXP_ALIVE; i++) + tokens[base + i] = make_token(2, base + i); + + /* Populate a fresh history database with all N_TOTAL entries. */ + h = HISopen(histpath, "hisv6", HIS_CREAT | HIS_RDWR); + if (h == NULL) + bail("can't create history at %s", histpath); + for (i = 0; i < N_TOTAL; i++) { + char *msgid = make_msgid(i); + if (!HISwrite(h, msgid, (time_t) 1000000 + i, (time_t) 1000000 + i, + (time_t) 0, &tokens[i])) + bail("can't write history entry %lu: %s", i, HISerror(h)); + free(msgid); + } + HISsync(h); + HISclose(h); + ok(1, stat(histpath, &sb) == 0 && sb.st_size > 0); + + /* Build the tombstone hashset (only the N_TOMBSTONED slice). */ + cookie.tombstone = tombstone_hash_create(8); + for (i = N_KEPT; i < N_KEPT + N_TOMBSTONED; i++) + hash_add_token(cookie.tombstone, &tokens[i]); + ok(2, hash_count(cookie.tombstone) == N_TOMBSTONED); + + /* Build the "gone" hashset for self-expiring backends (only the + * N_SELFEXP_GONE slice; selfexpire-alive tokens are NOT added). */ + cookie.gone = tombstone_hash_create(8); + base = N_KEPT + N_TOMBSTONED; + for (i = 0; i < N_SELFEXP_GONE; i++) + hash_add_token(cookie.gone, &tokens[base + i]); + ok(3, hash_count(cookie.gone) == N_SELFEXP_GONE); + + /* Reopen read-only and run HISexpire with our SELFEXPIRE-aware + * callback. */ + h = HISopen(histpath, "hisv6", HIS_RDONLY); + if (h == NULL) + bail("can't reopen history at %s", histpath); + snprintf(newhistpath, sizeof(newhistpath), "%s/history.new", tmpdir); + /* High threshold so dropped entries do not survive as remember-only + * records; we want a clean count of survivors. */ + expire_ok = HISexpire(h, newhistpath, NULL, true, &cookie, + (time_t) 2000000000, test_expire_cb); + ok(4, expire_ok); + HISclose(h); + + /* The new history at .n should contain exactly the + * survivors: N_KEPT + N_SELFEXP_ALIVE. Tombstoned and selfexpire- + * gone entries should be absent. */ + { + char actual[160]; + unsigned long expected = N_KEPT + N_SELFEXP_ALIVE; + snprintf(actual, sizeof(actual), "%s.n", newhistpath); + ok(5, count_history_lines(actual) == expected); + } + + /* Per-category lookup checks: kept and selfexpire-alive present; + * tombstoned and selfexpire-gone absent. */ + { + char actual[160]; + struct history *hnew; + unsigned long kept_present = 0; + unsigned long tomb_present = 0; + unsigned long alive_present = 0; + unsigned long gone_present = 0; + TOKEN found; + + snprintf(actual, sizeof(actual), "%s.n", newhistpath); + hnew = HISopen(actual, "hisv6", HIS_RDONLY); + if (hnew == NULL) + bail("can't open new history %s", actual); + + for (i = 0; i < N_KEPT; i++) { + char *msgid = make_msgid(i); + if (HISlookup(hnew, msgid, NULL, NULL, NULL, &found)) + kept_present++; + free(msgid); + } + for (i = N_KEPT; i < N_KEPT + N_TOMBSTONED; i++) { + char *msgid = make_msgid(i); + if (HISlookup(hnew, msgid, NULL, NULL, NULL, &found)) + tomb_present++; + free(msgid); + } + base = N_KEPT + N_TOMBSTONED; + for (i = 0; i < N_SELFEXP_GONE; i++) { + char *msgid = make_msgid(base + i); + if (HISlookup(hnew, msgid, NULL, NULL, NULL, &found)) + gone_present++; + free(msgid); + } + base += N_SELFEXP_GONE; + for (i = 0; i < N_SELFEXP_ALIVE; i++) { + char *msgid = make_msgid(base + i); + if (HISlookup(hnew, msgid, NULL, NULL, NULL, &found)) + alive_present++; + free(msgid); + } + + ok(6, kept_present == N_KEPT); + ok(7, tomb_present == 0); + ok(8, gone_present == 0); /* SELFEXPIRE branch correctly drops */ + ok(9, alive_present == N_SELFEXP_ALIVE); + HISclose(hnew); + } + + hash_free(cookie.tombstone); + hash_free(cookie.gone); + + /* Cleanup. */ + { + char cmd[128]; + snprintf(cmd, sizeof(cmd), "/bin/rm -rf %s", tmpdir); + if (system(cmd) < 0) + sysdiag("can't clean up %s", tmpdir); + } + + return 0; +} diff --git a/tests/expire/tombstone-t.c b/tests/expire/tombstone-t.c new file mode 100644 index 000000000..fb23081e2 --- /dev/null +++ b/tests/expire/tombstone-t.c @@ -0,0 +1,316 @@ +/* Unit tests for the lib/tombstone helpers used by expire to consume + * the deletion logs written by expireover/expirerm and innd/sm. */ + +#include "portable/system.h" + +#include +#include +#include +#include +#include + +#include "inn/hashtab.h" +#include "inn/libinn.h" +#include "inn/messages.h" +#include "inn/storage.h" +#include "inn/tombstone.h" +#include "tap/basic.h" + + +static TOKEN +make_token(unsigned char type, unsigned char class, unsigned long n) +{ + TOKEN t; + + memset(&t, 0, sizeof(t)); + t.type = type; + t.class = class; + t.token[0] = (n >> 24) & 0xff; + t.token[1] = (n >> 16) & 0xff; + t.token[2] = (n >> 8) & 0xff; + t.token[3] = n & 0xff; + return t; +} + + +/* Write a tombstone-format file containing the given tokens and any + * extra raw lines. Returns the path; caller must unlink and free. */ +static char * +write_tombstone_file(const char *prefix, const TOKEN *tokens, size_t ntokens, + const char *const *extra_lines, size_t nextra) +{ + char tmpl[64]; + int fd; + FILE *f; + size_t i; + + snprintf(tmpl, sizeof(tmpl), "%s-XXXXXX", prefix); + fd = mkstemp(tmpl); + if (fd < 0) + sysbail("can't create %s", tmpl); + f = fdopen(fd, "w"); + if (f == NULL) + sysbail("can't fdopen %s", tmpl); + for (i = 0; i < ntokens; i++) + fprintf(f, "%s\n", TokenToText(tokens[i])); + for (i = 0; i < nextra; i++) + fprintf(f, "%s", extra_lines[i]); + fclose(f); + return xstrdup(tmpl); +} + + +int +main(void) +{ + TOKEN t1, t2, t3, t4, t_other; + char *text; + char *path; + char *path2; + struct hash *h; + unsigned long count; + TOKEN parsed; + char *roundtrip; + + test_init(25); + + /* TokenToText / TextToToken / IsToken round trip is the foundation + * of the file format. */ + t1 = make_token(3, 0, 0xdeadbeef); + text = xstrdup(TokenToText(t1)); + parsed = TextToToken(text); + ok(1, IsToken(text)); + ok(2, memcmp(&parsed, &t1, sizeof(TOKEN)) == 0); + roundtrip = xstrdup(TokenToText(parsed)); + ok(3, strcmp(text, roundtrip) == 0); + free(text); + free(roundtrip); + + /* tombstone_hash_create gives us a hash that handles TOKEN keys. */ + t2 = make_token(3, 0, 1); + t3 = make_token(3, 0, 2); + t_other = make_token(3, 0, 99); + h = tombstone_hash_create(4); + { + TOKEN *p = xmalloc(sizeof(TOKEN)); + *p = t2; + ok(4, hash_insert(h, p, p)); + } + { + TOKEN *p = xmalloc(sizeof(TOKEN)); + *p = t3; + ok(5, hash_insert(h, p, p)); + } + ok(6, tombstone_present(h, &t2)); + ok(7, tombstone_present(h, &t3)); + ok(8, !tombstone_present(h, &t_other)); + ok(9, hash_count(h) == 2); + hash_free(h); + + /* tombstone_read parses a single file with mixed valid/invalid + * lines, skips malformed entries, and silently ignores comment + * lines (used for the format version header and for future + * metadata). This locks in the writer/reader contract for + * "# inn-tombstone v1\n" written by expireover. */ + { + TOKEN tokens[] = {t1, t2, t3, t1}; /* duplicate t1 */ + const char *extras[] = { + "\n", /* blank */ + "# inn-tombstone v1\n", /* version header */ + "not-a-token\n", /* malformed */ + "# arbitrary metadata\n", /* future-format placeholder */ + "\r\n", /* CRLF blank */ + }; + path = write_tombstone_file("tombstone-read", tokens, 4, extras, 5); + h = tombstone_hash_create(4); + count = tombstone_read(h, path, NULL); + /* Three unique tokens (t1 dedup'd) plus one valid duplicate + * line counted but not inserted -> count returned is 4 + * (lines processed) but hash has 3 unique. Comment and + * blank lines are skipped silently and not counted. */ + ok(10, h != NULL); + ok(11, count == 4); + ok(12, hash_count(h) == 3); + ok(13, tombstone_present(h, &t1)); + ok(14, tombstone_present(h, &t2)); + ok(15, tombstone_present(h, &t3)); + ok(16, !tombstone_present(h, &t_other)); + hash_free(h); + unlink(path); + free(path); + } + + /* Two-file merge with overlap -- T-C4-B. expireover.tombstone has + * t1, t2; cancels.tombstone has t2, t3. Loaded into one hashset, + * we expect 3 unique entries with t2 merged. */ + t4 = make_token(3, 0, 4); + { + TOKEN expireover_tokens[] = {t1, t2}; + TOKEN cancels_tokens[] = {t2, t3, t4}; + path = write_tombstone_file("tombstone-expireover", + expireover_tokens, 2, NULL, 0); + path2 = write_tombstone_file("tombstone-cancels", + cancels_tokens, 3, NULL, 0); + h = tombstone_hash_create(4); + tombstone_read(h, path, NULL); + tombstone_read(h, path2, NULL); + ok(17, hash_count(h) == 4); /* t1, t2, t3, t4 */ + ok(18, tombstone_present(h, &t1) && tombstone_present(h, &t2) + && tombstone_present(h, &t3) + && tombstone_present(h, &t4)); + hash_free(h); + unlink(path); + unlink(path2); + free(path); + free(path2); + } + + /* tombstone_rename_for_processing -- T-C4-C. Atomically renames + * the source file to ".processing"; subsequent open of the + * original path finds nothing. */ + { + TOKEN tokens[] = {t1, t2}; + char *snapshot; + struct stat sb; + + path = write_tombstone_file("tombstone-rename", tokens, 2, NULL, 0); + snapshot = tombstone_rename_for_processing(path); + ok(19, snapshot != NULL); + if (snapshot != NULL) { + /* Original path no longer exists; snapshot does. */ + ok(20, stat(path, &sb) < 0 && errno == ENOENT + && stat(snapshot, &sb) == 0); + unlink(snapshot); + free(snapshot); + } else { + ok(20, false); + } + unlink(path); /* in case rename failed */ + free(path); + } + + /* tombstone_ensure_header on an absent file: creates a header-only + * file under exclusive lock. Read back as a hashset that holds + * no tokens (the header is a comment line, skipped by the + * reader). */ + { + char tmpl[] = "tombstone-ensure-XXXXXX"; + int tmp_fd; + struct stat sb; + + tmp_fd = mkstemp(tmpl); + if (tmp_fd < 0) + sysbail("can't create temp file"); + close(tmp_fd); + unlink(tmpl); /* mkstemp creates it; we want absent */ + path = xstrdup(tmpl); + tombstone_ensure_header(path); + h = tombstone_hash_create(4); + count = tombstone_read(h, path, NULL); + ok(21, stat(path, &sb) == 0 && sb.st_size > 0 && count == 0 + && hash_count(h) == 0); + hash_free(h); + unlink(path); + free(path); + } + + /* tombstone_ensure_header on a non-empty file (appender raced a + * cancel in between consumer's unlink and our recreate): the + * existing tokens are preserved verbatim below the new header. */ + { + TOKEN raced[] = {t1, t2}; + + path = write_tombstone_file("tombstone-prepend", raced, 2, NULL, 0); + tombstone_ensure_header(path); + h = tombstone_hash_create(4); + count = tombstone_read(h, path, NULL); + ok(22, count == 2 && hash_count(h) == 2 + && tombstone_present(h, &t1) && tombstone_present(h, &t2)); + hash_free(h); + unlink(path); + free(path); + } + + /* tombstone_ensure_header is idempotent: called twice in a row, + * the second call is a no-op because the file already starts + * with the header. Without this, every expire cycle would + * prepend an extra header line and grow the file 19 bytes per + * cycle on a site with zero cancels per cycle. */ + { + char tmpl[] = "tombstone-idem-XXXXXX"; + int tmp_fd; + struct stat sb1, sb2; + + tmp_fd = mkstemp(tmpl); + if (tmp_fd < 0) + sysbail("can't create temp file"); + close(tmp_fd); + unlink(tmpl); + path = xstrdup(tmpl); + tombstone_ensure_header(path); + stat(path, &sb1); + tombstone_ensure_header(path); + stat(path, &sb2); + ok(23, sb1.st_size == sb2.st_size && sb1.st_size > 0); + unlink(path); + free(path); + } + + /* Multi-chunk shift: build a file larger than the internal + * chunk size (64 KiB), call tombstone_ensure_header, and verify + * the shift loop's chunk-boundary arithmetic is correct. At + * ~38 bytes per token line, 3000 tokens is ~108 KiB, which + * forces the loop to run at least twice (one full CHUNK + one + * partial trailing chunk). */ + { + size_t nbig = 3000; + TOKEN *big = xmalloc(nbig * sizeof(TOKEN)); + size_t i; + bool all_present; + + for (i = 0; i < nbig; i++) + big[i] = make_token(3, 0, 0x10000 + i); + path = write_tombstone_file("tombstone-multichunk", big, nbig, + NULL, 0); + tombstone_ensure_header(path); + h = tombstone_hash_create(nbig * 2); + count = tombstone_read(h, path, NULL); + all_present = (count == nbig && hash_count(h) == nbig); + for (i = 0; i < nbig && all_present; i++) + all_present = tombstone_present(h, &big[i]); + ok(24, all_present); + hash_free(h); + unlink(path); + free(path); + free(big); + } + + /* Crash-recovery tolerance: simulate a file in a post-crash + * state (no header, some garbled lines interleaved with valid + * tokens). ensure_header should shift everything right by + * header_len without crashing; the subsequent tombstone_read + * should warn-and-skip garbled lines and parse the valid + * tokens. This pins down the recovery argument as actual + * behavior. */ + { + TOKEN valid[] = {t1, t2, t3}; + const char *garbled[] = { + "this-is-not-a-token\n", + "neither-is-this\n", + }; + + path = write_tombstone_file("tombstone-corrupt", valid, 3, + garbled, 2); + tombstone_ensure_header(path); + h = tombstone_hash_create(4); + count = tombstone_read(h, path, NULL); + ok(25, hash_count(h) == 3 && tombstone_present(h, &t1) + && tombstone_present(h, &t2) + && tombstone_present(h, &t3)); + hash_free(h); + unlink(path); + free(path); + } + + return 0; +} diff --git a/tests/storage/cancel-tombstone-t.c b/tests/storage/cancel-tombstone-t.c new file mode 100644 index 000000000..a6220f48f --- /dev/null +++ b/tests/storage/cancel-tombstone-t.c @@ -0,0 +1,233 @@ +/* Unit tests for SMcanceltombstone (storage/interface.c). + * + * Verifies the multi-writer append protocol used by innd's ARTcancel + * and sm's -r path: the file format is one TokenToText() per line; the + * write is gated on innconf->expiretombstone; TOKEN_EMPTY is a no-op; + * concurrent appenders use fcntl POSIX locks via inn_lock_file. */ + +#include "portable/system.h" + +#include +#include +#include +#include +#include + +#include "inn/innconf.h" +#include "inn/libinn.h" +#include "inn/messages.h" +#include "inn/storage.h" +#include "tap/basic.h" + + +/* Build a synthetic token whose bytes encode n. Type/class chosen so + * IsToken accepts the textual form (the type byte must be a valid hex + * digit pair when round-tripped). */ +static TOKEN +make_token(unsigned char type, unsigned long n) +{ + TOKEN t; + + memset(&t, 0, sizeof(t)); + t.type = type; + t.class = 0; + t.token[0] = (n >> 24) & 0xff; + t.token[1] = (n >> 16) & 0xff; + t.token[2] = (n >> 8) & 0xff; + t.token[3] = n & 0xff; + return t; +} + + +/* Read the entire file into a malloc'd buffer. Returns NULL on + * error. *out_size set to bytes read. */ +static char * +slurp(const char *path, size_t *out_size) +{ + FILE *f; + struct stat sb; + char *buf; + + *out_size = 0; + if (stat(path, &sb) < 0) + return NULL; + f = fopen(path, "r"); + if (f == NULL) + return NULL; + buf = xmalloc(sb.st_size + 1); + if (fread(buf, 1, sb.st_size, f) != (size_t) sb.st_size) { + free(buf); + fclose(f); + return NULL; + } + buf[sb.st_size] = '\0'; + *out_size = sb.st_size; + fclose(f); + return buf; +} + + +/* Count lines in buf (number of '\n' bytes). */ +static size_t +count_lines(const char *buf, size_t len) +{ + size_t i, n = 0; + for (i = 0; i < len; i++) + if (buf[i] == '\n') + n++; + return n; +} + + +int +main(void) +{ + char tmpdir[64]; + char *cancels_path; + TOKEN t1, t2, t3, empty; + char *contents; + size_t size; + struct stat sb; + + test_init(15); + + /* Set up a temporary pathdb. */ + strlcpy(tmpdir, "cancel-tombstone-XXXXXX", sizeof(tmpdir)); + if (mkdtemp(tmpdir) == NULL) + sysbail("can't create temp directory"); + + /* Initialize a minimal innconf so SMcanceltombstone has somewhere + * to write. We need the fields it consults: expiretombstone, + * groupbaseexpiry (both required), and pathdb. Other code paths + * in this binary read additional fields, so allocate via xcalloc + * to zero everything. */ + innconf = xcalloc(1, sizeof(*innconf)); + innconf->pathdb = xstrdup(tmpdir); + innconf->expiretombstone = true; + innconf->groupbaseexpiry = true; + + cancels_path = concatpath(tmpdir, "cancels.tombstone"); + + t1 = make_token(3, 0xdeadbeef); + t2 = make_token(3, 0xcafebabe); + t3 = make_token(3, 0x12345678); + memset(&empty, 0, sizeof(empty)); + empty.type = TOKEN_EMPTY; + + /* 1. First call creates the file with one line. */ + SMcanceltombstone(t1); + contents = slurp(cancels_path, &size); + ok(1, contents != NULL); + ok(2, count_lines(contents, size) == 1); + ok(3, contents != NULL && strstr(contents, TokenToText(t1)) != NULL); + free(contents); + + /* 2. Second call appends; file now has two distinct token lines. */ + SMcanceltombstone(t2); + contents = slurp(cancels_path, &size); + ok(4, contents != NULL && count_lines(contents, size) == 2); + ok(5, contents != NULL && strstr(contents, TokenToText(t1)) != NULL + && strstr(contents, TokenToText(t2)) != NULL); + free(contents); + + /* 3. expiretombstone=false: third call is a no-op. */ + innconf->expiretombstone = false; + SMcanceltombstone(t3); + contents = slurp(cancels_path, &size); + ok(6, contents != NULL && count_lines(contents, size) == 2); + ok(7, contents != NULL && strstr(contents, TokenToText(t3)) == NULL); + free(contents); + innconf->expiretombstone = true; + + /* 4. TOKEN_EMPTY: no-op (not a real article). */ + SMcanceltombstone(empty); + contents = slurp(cancels_path, &size); + ok(8, contents != NULL && count_lines(contents, size) == 2); + free(contents); + + /* 5. After write completes, the lock has been released; another + * process (here, just us reusing the same path) can acquire the + * exclusive write lock without contention. */ + { + int fd = open(cancels_path, O_RDWR); + ok(9, fd >= 0); + if (fd >= 0) { + ok(10, inn_lock_file(fd, INN_LOCK_WRITE, false)); + close(fd); + } else { + ok(10, false); + } + } + + /* 6. Each line round-trips: parsing each line through TextToToken + * yields the original TOKEN. Read the file and verify. */ + contents = slurp(cancels_path, &size); + if (contents != NULL) { + char *p = contents; + char *line_end; + TOKEN parsed; + bool found_t1 = false; + bool found_t2 = false; + + while ((line_end = strchr(p, '\n')) != NULL) { + *line_end = '\0'; + if (IsToken(p)) { + parsed = TextToToken(p); + if (memcmp(&parsed, &t1, sizeof(TOKEN)) == 0) + found_t1 = true; + if (memcmp(&parsed, &t2, sizeof(TOKEN)) == 0) + found_t2 = true; + } + p = line_end + 1; + } + ok(11, found_t1); + ok(12, found_t2); + free(contents); + } else { + ok(11, false); + ok(12, false); + } + + /* 7. File mode should be 0664 (created with O_CREAT, mode 0664). + * Check the stored mode bits. Allow umask-stripped variants. */ + if (stat(cancels_path, &sb) == 0) { + mode_t mode = sb.st_mode & 0777; + /* The file was created with mode 0664 but umask may have + * stripped write bits. Verify at least owner+group readable. */ + ok(13, (mode & 0640) == 0640); + } else { + ok(13, false); + } + + /* 8. Calling SMcanceltombstone with an unwritable pathdb does + * not crash (best-effort failure path), does not create any + * file outside the configured location, and returns false. + * Simulate by pointing pathdb at a non-existent directory. */ + { + char *saved_pathdb = innconf->pathdb; + char *bad_path; + struct stat sb_bad; + bool result; + innconf->pathdb = xstrdup("/nonexistent-tombstone-test-dir"); + result = SMcanceltombstone(t3); + bad_path = concatpath(innconf->pathdb, "cancels.tombstone"); + ok(14, !result && stat(bad_path, &sb_bad) < 0 + && errno == ENOENT); + free(bad_path); + free(innconf->pathdb); + innconf->pathdb = saved_pathdb; + } + + /* Cleanup. */ + if (unlink(cancels_path) < 0 && errno != ENOENT) + sysdiag("can't unlink %s", cancels_path); + free(cancels_path); + if (rmdir(tmpdir) < 0) + sysdiag("can't rmdir %s", tmpdir); + /* Verify cleanup actually removed the directory. */ + ok(15, stat(tmpdir, &sb) < 0 && errno == ENOENT); + free(innconf->pathdb); + free(innconf); + + return 0; +}