Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lab/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ cd ../lab/bench && make BDB=../../build_unix # build the drivers
drives a shared environment from N threads and reports ops/sec plus
region-contention signals.

- **`lock_bench`** — direct lock-manager probe. Each thread allocates its own
locker and calls `lock_get`/`lock_put` in a tight loop on `distinct`
(per-thread, no-conflict) or `shared` (read-lock the same objects) keys,
bypassing the access methods and buffer pool so the lock subsystem's own
scaling is measured in isolation.

- **`tproc_c` / `tproc_b` / `tproc_h`** — HammerDB-style workloads
(independently implemented; **not** the TPC benchmarks and not comparable to
TPC results):
Expand All @@ -38,6 +44,7 @@ individual protections removed, to measure their cost:
| `-d sync\|wnosync\|nosync` | commit durability (default `nosync`) |
| `-m` | MVCC / snapshot isolation (`DB_MULTIVERSION`) |
| `-C` | Concurrent Data Store (`DB_INIT_CDB`) instead of full txns |
| `-D N` | deadlock detection: `0` (default) detects on every conflict; `N>0` runs a background detector every `N` ms and leaves the hot path free of detection |
| `-c` `-t` `-S` `-s` `-i` | cache bytes, threads, scale, seconds, init |

Example:
Expand Down
5 changes: 4 additions & 1 deletion lab/bench/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@ CFLAGS ?= -O2 -pthread
INCLUDES = -I$(BDB)
LIBS = -L$(BDB)/.libs -ldb-5.3

BENCHES = scale_bench tproc_c tproc_b tproc_h
BENCHES = scale_bench tproc_c tproc_b tproc_h lock_bench

all: $(BENCHES)

scale_bench: scale_bench.c
$(CC) $(CFLAGS) $(INCLUDES) scale_bench.c $(LIBS) -o $@

lock_bench: lock_bench.c
$(CC) $(CFLAGS) $(INCLUDES) lock_bench.c $(LIBS) -o $@

tproc_c: tproc_c.c bdb_bench.h
$(CC) $(CFLAGS) $(INCLUDES) tproc_c.c $(LIBS) -o $@

Expand Down
69 changes: 67 additions & 2 deletions lab/bench/bdb_bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ typedef struct {
int use_log; /* DB_INIT_LOG */
int use_mvcc; /* DB_MULTIVERSION + DB_TXN_SNAPSHOT readers */
int use_cdb; /* DB_INIT_CDB (concurrent data store) */
int dd_periodic; /* deadlock detect: 0=on every conflict (default),
* else run a background detector every N ms and
* set_lk_detect(NONE) on the hot path */
enum bb_durability durability;
} bb_config;

Expand Down Expand Up @@ -95,7 +98,7 @@ bb_getopt(int argc, char **argv, bb_config *c)
int ch;
extern char *optarg;

while ((ch = getopt(argc, argv, "h:c:t:S:s:imCd:X:R:")) != EOF)
while ((ch = getopt(argc, argv, "h:c:t:S:s:imCd:X:R:D:")) != EOF)
switch (ch) {
case 'h': c->home = optarg; break;
case 'c': c->cachebytes = strtoull(optarg, NULL, 10); break;
Expand All @@ -106,6 +109,7 @@ bb_getopt(int argc, char **argv, bb_config *c)
case 'm': c->use_mvcc = 1; break;
case 'C': c->use_cdb = 1; break;
case 'R': c->seed = (unsigned)strtoul(optarg, NULL, 10); break;
case 'D': c->dd_periodic = atoi(optarg); break; /* dd interval ms */
case 'd':
if (strcmp(optarg, "sync") == 0) c->durability = BB_SYNC;
else if (strcmp(optarg, "wnosync") == 0)
Expand Down Expand Up @@ -173,9 +177,37 @@ bb_env_open(bb_config *c, DB_ENV **envp)
* victim and returns DB_LOCK_DEADLOCK) instead of blocking forever --
* the workloads here intentionally contend on shared rows.
*/
if (c->use_lock || c->use_txn)
/*
* Deadlock detection. By default BDB can run the detector on every
* lock conflict (set_lk_detect) -- correct, but every blocked acquire
* pays the detector's cost inline. With -D N we instead leave the hot
* path free of detection and run a background detector every N ms
* (started by bb_start_dd after open); a victim is chosen at the next
* sweep rather than immediately. This A/B isolates the detector cost.
*/
if ((c->use_lock || c->use_txn) && c->dd_periodic == 0)
(void)env->set_lk_detect(env, DB_LOCK_DEFAULT);

/*
* Size the lock subsystem generously. The default region holds only
* ~1000 locks/lockers/objects; a batched bulk load or a many-thread
* run needs far more (each held lock and each active transaction
* consumes entries), and exhausting them returns ENOMEM mid-run.
*/
if (c->use_lock || c->use_txn) {
(void)env->set_lk_max_locks(env, 200000);
(void)env->set_lk_max_objects(env, 200000);
(void)env->set_lk_max_lockers(env, 200000);
}

/*
* Size the in-memory log buffer so a write-heavy run does not stall
* rolling tiny (default) log segments. Durability is governed
* separately by the -d toggle below.
*/
if (c->use_log || c->use_txn)
(void)env->set_lg_bsize(env, 16 * 1024 * 1024);

if (c->use_txn) {
if (c->durability == BB_NOSYNC)
(void)env->set_flags(env, DB_TXN_NOSYNC, 1);
Expand Down Expand Up @@ -287,4 +319,37 @@ bb_print_config(const bb_config *c, const char *name)
c->durability == BB_WRITE_NOSYNC ? "wnosync" : "nosync");
}

/* ---- background deadlock detector (for -D N) -------------------- */
struct bb_dd_arg { DB_ENV *env; int interval_ms; volatile int *stop; };

static void *
bb_dd_thread(void *a)
{
struct bb_dd_arg *arg = a;
int rejected;

while (!*arg->stop) {
usleep((useconds_t)arg->interval_ms * 1000);
(void)arg->env->lock_detect(arg->env, 0,
DB_LOCK_YOUNGEST, &rejected);
}
return NULL;
}

/*
* bb_start_dd / bb_stop_dd -- run a periodic deadlock detector when
* c->dd_periodic > 0. No-ops otherwise. *stop must outlive the thread.
*/
static int
bb_start_dd(const bb_config *c, DB_ENV *env, pthread_t *tid,
struct bb_dd_arg *arg, volatile int *stop)
{
if (c->dd_periodic <= 0)
return 0;
arg->env = env;
arg->interval_ms = c->dd_periodic;
arg->stop = stop;
return pthread_create(tid, NULL, bb_dd_thread, arg);
}

#endif /* BDB_BENCH_H */
164 changes: 164 additions & 0 deletions lab/bench/lock_bench.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*-
* See the file LICENSE for redistribution information.
*
* lock_bench -- direct lock-manager throughput/scaling probe.
*
* Bypasses the access methods and the buffer pool entirely: each thread
* allocates its own locker id, then in a tight loop calls DB_ENV->lock_get
* followed by DB_ENV->lock_put on a chosen object. This isolates the lock
* manager's own machinery (partition mutexes, object hash, lock/object free
* lists, locker lookup, per-op counters) from B-tree search and page-pin
* cache misses that dominate a real DB->get and mask the lock layer.
*
* ./lock_bench <secs> <nobj> <mode> <t1> [t2 ...]
*
* mode:
* distinct - each thread locks objects from its own disjoint key range
* (no conflicts; pure throughput / partition scaling)
* shared - all threads lock READ over the SAME small set of nobj objects
* (read locks don't conflict, but they share object hash slots
* + lock-object refcount cache lines -> measures true sharing)
*
* Prints ops/sec per thread count.
*/
#include <sys/types.h>
#include <errno.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>

#include <db.h>

static DB_ENV *g_env;
static int g_secs, g_nobj, g_shared, g_maxthreads;
static volatile int g_stop;

typedef struct {
int tid;
u_int32_t locker;
uint64_t ops;
} worker;

static double
now_ms(void)
{
struct timeval tv;
(void)gettimeofday(&tv, NULL);
return (double)tv.tv_sec * 1000.0 + (double)tv.tv_usec / 1000.0;
}

static void *
worker_main(void *arg)
{
worker *w = arg;
DBT obj;
DB_LOCK lock;
uint32_t key, base;
uint64_t i = 0;
int ret;

/* distinct: thread t owns keys [t*nobj, (t+1)*nobj). shared: [0,nobj). */
base = g_shared ? 0 : (uint32_t)w->tid * (uint32_t)g_nobj;
memset(&obj, 0, sizeof(obj));
obj.size = sizeof(key);
obj.data = &key;

while (!g_stop) {
key = base + (uint32_t)(i++ % (uint64_t)g_nobj);
ret = g_env->lock_get(g_env, w->locker, 0, &obj,
g_shared ? DB_LOCK_READ : DB_LOCK_WRITE, &lock);
if (ret != 0) {
fprintf(stderr, "lock_get: %s\n", db_strerror(ret));
return NULL;
}
if ((ret = g_env->lock_put(g_env, &lock)) != 0) {
fprintf(stderr, "lock_put: %s\n", db_strerror(ret));
return NULL;
}
w->ops++;
}
return NULL;
}

int
main(int argc, char **argv)
{
pthread_t *tids;
worker *workers;
const char *home = "LOCKBENCHDIR";
double t0, elapsed;
int ai, t, nthreads, ret;

if (argc < 5) {
fprintf(stderr,
"usage: %s <secs> <nobj> <distinct|shared> <t1> [t2 ...]\n",
argv[0]);
return 1;
}
g_secs = atoi(argv[1]);
g_nobj = atoi(argv[2]);
g_shared = strcmp(argv[3], "shared") == 0;
if (g_nobj < 1) g_nobj = 1;

for (ai = 4; ai < argc; ai++)
if (atoi(argv[ai]) > g_maxthreads) g_maxthreads = atoi(argv[ai]);

if ((ret = db_env_create(&g_env, 0)) != 0) {
fprintf(stderr, "env_create: %s\n", db_strerror(ret));
return 1;
}
g_env->set_errfile(g_env, stderr);
/* Size the lock subsystem for many lockers/objects/locks. */
(void)g_env->set_lk_max_locks(g_env, 500000);
(void)g_env->set_lk_max_objects(g_env, 500000);
(void)g_env->set_lk_max_lockers(g_env, 500000);
if ((ret = g_env->open(g_env, home,
DB_CREATE | DB_INIT_LOCK | DB_THREAD | DB_PRIVATE, 0)) != 0) {
g_env->err(g_env, ret, "env open (mkdir %s first)", home);
return 1;
}

printf("# lock_bench mode=%s nobj=%d secs=%d\n",
g_shared ? "shared" : "distinct", g_nobj, g_secs);
printf("# threads ops/sec\n");

for (ai = 4; ai < argc; ai++) {
nthreads = atoi(argv[ai]);
tids = calloc((size_t)nthreads, sizeof(*tids));
workers = calloc((size_t)nthreads, sizeof(*workers));
for (t = 0; t < nthreads; t++) {
workers[t].tid = t;
if ((ret = g_env->lock_id(g_env, &workers[t].locker)) != 0) {
fprintf(stderr, "lock_id: %s\n", db_strerror(ret));
return 1;
}
}
g_stop = 0;
t0 = now_ms();
for (t = 0; t < nthreads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);
usleep((useconds_t)g_secs * 1000000);
g_stop = 1;
for (t = 0; t < nthreads; t++)
pthread_join(tids[t], NULL);
elapsed = (now_ms() - t0) / 1000.0;

{
uint64_t total = 0;
for (t = 0; t < nthreads; t++) {
total += workers[t].ops;
(void)g_env->lock_id_free(g_env, workers[t].locker);
}
printf("%-12d %12.0f\n", nthreads,
(double)total / elapsed);
}
free(tids); free(workers);
}

(void)g_env->close(g_env, 0);
return 0;
}
21 changes: 14 additions & 7 deletions lab/bench/tproc_b.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,20 @@ main(int argc, char **argv)
}

g_stop = 0;
t0 = bb_now_ms();
for (t = 0; t < g_cfg.threads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);
usleep((useconds_t)g_cfg.seconds * 1000000);
g_stop = 1;
for (t = 0; t < g_cfg.threads; t++)
pthread_join(tids[t], NULL);
{
pthread_t ddtid; struct bb_dd_arg ddarg; int dd_on;
dd_on = (bb_start_dd(&g_cfg, g_env, &ddtid, &ddarg, &g_stop) == 0
&& g_cfg.dd_periodic > 0);
t0 = bb_now_ms();
for (t = 0; t < g_cfg.threads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);
usleep((useconds_t)g_cfg.seconds * 1000000);
g_stop = 1;
for (t = 0; t < g_cfg.threads; t++)
pthread_join(tids[t], NULL);
if (dd_on)
pthread_join(ddtid, NULL);
}
elapsed = (bb_now_ms() - t0) / 1000.0;

okall = retryall = 0;
Expand Down
25 changes: 16 additions & 9 deletions lab/bench/tproc_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -534,15 +534,22 @@ main(int argc, char **argv)
}

g_stop = 0;
t0 = bb_now_ms();
for (t = 0; t < g_cfg.threads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);

/* Run for the requested wall-clock, then signal stop. */
usleep((useconds_t)g_cfg.seconds * 1000000);
g_stop = 1;
for (t = 0; t < g_cfg.threads; t++)
pthread_join(tids[t], NULL);
{
pthread_t ddtid; struct bb_dd_arg ddarg; int dd_on;
dd_on = (bb_start_dd(&g_cfg, g_env, &ddtid, &ddarg, &g_stop) == 0
&& g_cfg.dd_periodic > 0);
t0 = bb_now_ms();
for (t = 0; t < g_cfg.threads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);

/* Run for the requested wall-clock, then signal stop. */
usleep((useconds_t)g_cfg.seconds * 1000000);
g_stop = 1;
for (t = 0; t < g_cfg.threads; t++)
pthread_join(tids[t], NULL);
if (dd_on)
pthread_join(ddtid, NULL);
}
elapsed = (bb_now_ms() - t0) / 1000.0;

memset(total, 0, sizeof(total));
Expand Down
21 changes: 14 additions & 7 deletions lab/bench/tproc_h.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,13 +399,20 @@ main(int argc, char **argv)
}

g_stop = 0;
t0 = bb_now_ms();
for (t = 0; t < nthreads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);
usleep((useconds_t)g_cfg.seconds * 1000000);
g_stop = 1;
for (t = 0; t < nthreads; t++)
pthread_join(tids[t], NULL);
{
pthread_t ddtid; struct bb_dd_arg ddarg; int dd_on;
dd_on = (bb_start_dd(&g_cfg, g_env, &ddtid, &ddarg, &g_stop) == 0
&& g_cfg.dd_periodic > 0);
t0 = bb_now_ms();
for (t = 0; t < nthreads; t++)
pthread_create(&tids[t], NULL, worker_main, &workers[t]);
usleep((useconds_t)g_cfg.seconds * 1000000);
g_stop = 1;
for (t = 0; t < nthreads; t++)
pthread_join(tids[t], NULL);
if (dd_on)
pthread_join(ddtid, NULL);
}
elapsed = (bb_now_ms() - t0) / 1000.0;

memset(total, 0, sizeof(total));
Expand Down
Loading
Loading