Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion umbra/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-tsv"
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-single"
export BENCH_DURABLE=yes
exec ../lib/benchmark-common.sh
124 changes: 120 additions & 4 deletions umbra/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,123 @@ create table hits (
hasgclid smallint not null,
refererhash bigint not null,
urlhash bigint not null,
clid integer not null,
primary key (counterid, eventdate, userid, eventtime, watchid)
);
copy hits from '/data/hits.tsv' with (format text);
clid integer not null
) with (compression=zstd);

-- Ingest from the Athena parquet rather than the TSV. Umbra's COPY-from-
-- parquet path deadlocks on the second bulk op and the row-group reader had
-- crashes (both worked around / fixed), so we load via the umbra.parquetview
-- table function in a single INSERT — one bulk op, inline type conversion, no
-- staging. The function lives only in the Umbra-mode function table (hence the
-- umbra. qualifier) and surfaces columns CamelCased, so each must be double-
-- quoted. EventTime/ClientEventTime/LocalEventTime are unix-second int64s;
-- EventDate is a uint16 day count from the epoch. Path is the container's
-- '/data' bind mount.
insert into hits
select
"WatchID",
"JavaEnable",
"Title",
"GoodEvent",
to_timestamp("EventTime")::timestamp,
(DATE '1970-01-01' + "EventDate"::int),
"CounterID",
"ClientIP",
"RegionID",
"UserID",
"CounterClass",
"OS",
"UserAgent",
"URL",
"Referer",
"IsRefresh",
"RefererCategoryID",
"RefererRegionID",
"URLCategoryID",
"URLRegionID",
"ResolutionWidth",
"ResolutionHeight",
"ResolutionDepth",
"FlashMajor",
"FlashMinor",
"FlashMinor2",
"NetMajor",
"NetMinor",
"UserAgentMajor",
"UserAgentMinor",
"CookieEnable",
"JavascriptEnable",
"IsMobile",
"MobilePhone",
"MobilePhoneModel",
"Params",
"IPNetworkID",
"TraficSourceID",
"SearchEngineID",
"SearchPhrase",
"AdvEngineID",
"IsArtifical",
"WindowClientWidth",
"WindowClientHeight",
"ClientTimeZone",
to_timestamp("ClientEventTime")::timestamp,
"SilverlightVersion1",
"SilverlightVersion2",
"SilverlightVersion3",
"SilverlightVersion4",
"PageCharset",
"CodeVersion",
"IsLink",
"IsDownload",
"IsNotBounce",
"FUniqID",
"OriginalURL",
"HID",
"IsOldCounter",
"IsEvent",
"IsParameter",
"DontCountHits",
"WithHash",
"HitColor",
to_timestamp("LocalEventTime")::timestamp,
"Age",
"Sex",
"Income",
"Interests",
"Robotness",
"RemoteIP",
"WindowName",
"OpenerName",
"HistoryLength",
"BrowserLanguage",
"BrowserCountry",
"SocialNetwork",
"SocialAction",
"HTTPError",
"SendTiming",
"DNSTiming",
"ConnectTiming",
"ResponseStartTiming",
"ResponseEndTiming",
"FetchTiming",
"SocialSourceNetworkID",
"SocialSourcePage",
"ParamPrice",
"ParamOrderID",
"ParamCurrency",
"ParamCurrencyID",
"OpenstatServiceName",
"OpenstatCampaignID",
"OpenstatAdID",
"OpenstatSourceID",
"UTMSource",
"UTMMedium",
"UTMCampaign",
"UTMContent",
"UTMTerm",
"FromTag",
"HasGCLID",
"RefererHash",
"URLHash",
"CLID"
from umbra.parquetview('/data/hits.parquet');
10 changes: 5 additions & 5 deletions umbra/load
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set -eu

mkdir -p data
mv hits.tsv data/
mv hits.parquet data/
chmod -R 777 data

# create.sql for umbra both creates the table and ingests via COPY. Use
Expand All @@ -16,14 +16,14 @@ PGPASSWORD=postgres psql -p 5432 -h 127.0.0.1 -U postgres \
# partial table on memory-constrained hosts (16 GB c6a.4xlarge can't
# hold the full mmap working set), letting the benchmark proceed and
# producing implausibly fast warm timings on the surviving subset.
# ClickBench's hits dataset is 99,997,497 rows; allow a small margin.
# ClickBench's hits dataset is exactly 99,997,497 rows.
expected=99997497
got=$(PGPASSWORD=postgres psql -p 5432 -h 127.0.0.1 -U postgres -tAq \
-c 'SELECT count(*) FROM hits')
if [ "$got" -lt $((expected - 100)) ]; then
echo "umbra/load: hits has $got rows, expected ~$expected — partial load" >&2
if [ "$got" -ne "$expected" ]; then
echo "umbra/load: hits has $got rows, expected $expected — partial load" >&2
exit 1
fi

rm -f data/hits.tsv
rm -f data/hits.parquet
sync
235 changes: 235 additions & 0 deletions umbra/results/20260619/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
{
"system": "Umbra",
"date": "2026-06-19",
"machine": "c6a.4xlarge",
"cluster_size": 1,
"proprietary": "yes",
"hardware": "cpu",
"tuned": "no",
"tags": [
"C++",
"column-oriented",
"PostgreSQL compatible"
],
"load_time": 118.635,
"data_size": 9642782263,
"concurrent_qps": 7.443,
"concurrent_error_ratio": 0.325,
"result": [
[
0.149,
0.008,
0.008
],
[
0.22,
0.004,
0.004
],
[
0.221,
0.027,
0.026
],
[
0.296,
0.028,
0.027
],
[
0.348,
0.134,
0.134
],
[
0.498,
0.171,
0.171
],
[
0.194,
0.024,
0.025
],
[
0.244,
0.005,
0.005
],
[
0.46,
0.161,
0.16
],
[
0.603,
0.231,
0.227
],
[
0.392,
0.026,
0.026
],
[
0.271,
0.028,
0.028
],
[
0.523,
0.161,
0.16
],
[
1.144,
0.306,
0.307
],
[
0.547,
0.179,
0.178
],
[
0.393,
0.169,
0.171
],
[
1.055,
0.361,
0.362
],
[
0.973,
0.222,
0.222
],
[
3.378,
0.866,
0.829
],
[
0.348,
0.002,
0.002
],
[
4.185,
0.273,
0.273
],
[
5.07,
0.082,
0.081
],
[
9.086,
0.147,
0.147
],
[
1.833,
0.011,
0.011
],
[
0.236,
0.018,
0.006
],
[
0.256,
0.01,
0.01
],
[
0.248,
0.006,
0.006
],
[
4.202,
0.287,
0.289
],
[
4.2,
1.51,
1.503
],
[
0.167,
0.028,
0.03
],
[
0.785,
0.083,
0.083
],
[
3.813,
0.131,
0.129
],
[
3.974,
1.315,
1.314
],
[
4.468,
0.926,
0.779
],
[
4.462,
0.863,
0.779
],
[
0.299,
0.127,
0.124
],
[
0.252,
0.01,
0.011
],
[
0.233,
0.006,
0.006
],
[
0.27,
0.003,
0.003
],
[
0.306,
0.026,
0.022
],
[
0.224,
0.004,
0.003
],
[
0.212,
0.003,
0.003
],
[
0.217,
0.004,
0.004
]
]
}
Loading