Skip to content

Commit 6b14bb5

Browse files
arseny114Arseny Kositsyn
andauthored
[PGPRO-11599] Fix wrong results returned when order_by_attach=TRUE. (#164)
With order_by_attach=TRUE, the RUM index arranges the entries in the posting lists (for keys that have additional information) in order of sorting by additional information. For the remaining keys, the records are sorted by tid. The scanGetItemRegular() function assumes that all posting lists have the same sorting, so it was not suitable for sorting in this case. In order to fix this, the appropriate TIDs for keys that are ordered by tid are assembled in advance into a bitmap. For keys ordered in an alternative order, a routine scan is performed, after which the appropriate tid is checked in the bitmap. 1. Static functions from TIDBitmap were added to RUM as RumTIDBitmap. A function has been added to search for tid inside RumTIDBitmap. 2. Added a test of scanning with keys that are ordered differently. 3. Fixed CI crashes at hardcore level. --------- Co-authored-by: Arseny Kositsyn <a.kositsyn@postgrespro.ru>
1 parent d666401 commit 6b14bb5

File tree

18 files changed

+11709
-7
lines changed

18 files changed

+11709
-7
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ EXTENSION = rum
55
EXTVERSION = 1.3
66
PGFILEDESC = "RUM index access method"
77

8-
OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \
8+
OBJS = src/rumtidbitmap.o src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \
99
src/rumbtree.o src/rumbulk.o src/rumdatapage.o \
1010
src/rumentrypage.o src/rumget.o src/ruminsert.o \
1111
src/rumscan.o src/rumutil.o src/rumvacuum.o src/rumvalidate.o \
@@ -16,7 +16,7 @@ DATA = rum--1.0--1.1.sql rum--1.1--1.2.sql \
1616

1717
DATA_built = $(EXTENSION)--$(EXTVERSION).sql
1818

19-
INCLUDES = rum.h rumsort.h
19+
INCLUDES = rum.h rumsort.h rumtidbitmap.h
2020
RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES))
2121

2222
LDFLAGS_SL += $(filter -lm, $(LIBS))

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ This module is available under the [license](LICENSE) similar to
4545

4646
Before building and installing **rum**, you should ensure following are installed:
4747

48-
* PostgreSQL version is 9.6+.
48+
* PostgreSQL version is 12+.
49+
50+
* PostgreSQL 9.6 - 11 (but you need to transfer the `src/backend/nodes/tidbitmap.c` of the required version to the `contrib/rum/src/tidbitmap/tidbitmapXX.c` and include it to `contrib/rum/src/rumtidbitmap.c`)
4951

5052
Typical installation procedure may look like this:
5153

expected/altorder.out

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,3 +573,49 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDE
573573
506 | Sun May 22 21:21:22.326724 2016
574574
(112 rows)
575575

576+
CREATE TABLE test_table (id bigint, folder bigint, time bigint, tsv tsvector);
577+
CREATE INDEX test_idx ON test_table USING rum(folder, tsv rum_tsvector_addon_ops, time) with (attach = 'time', to = 'tsv', order_by_attach=TRUE);
578+
INSERT INTO test_table (id, folder, time, tsv) VALUES
579+
(1, 10, 100, to_tsvector('wordA')),
580+
(2, 20, 200, to_tsvector('wordB')),
581+
(3, 10, 300, to_tsvector('wordA')),
582+
(4, 20, 400, to_tsvector('wordB')),
583+
(5, 20, 60, to_tsvector('wordB')),
584+
(6, 10, 40, to_tsvector('wordA')),
585+
(7, 20, 50, to_tsvector('wordB')),
586+
(8, 10, 30, to_tsvector('wordA'));
587+
EXPLAIN (costs off)
588+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
589+
QUERY PLAN
590+
--------------------------------------------------------------------------------
591+
Index Scan using test_idx on test_table
592+
Index Cond: ((folder = '10'::bigint) AND (tsv @@ to_tsquery('wordA'::text)))
593+
(2 rows)
594+
595+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
596+
id | folder | time | tsv
597+
----+--------+------+-----------
598+
8 | 10 | 30 | 'worda':1
599+
6 | 10 | 40 | 'worda':1
600+
1 | 10 | 100 | 'worda':1
601+
3 | 10 | 300 | 'worda':1
602+
(4 rows)
603+
604+
EXPLAIN (costs off)
605+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
606+
QUERY PLAN
607+
--------------------------------------------------------------------------------
608+
Index Scan using test_idx on test_table
609+
Index Cond: ((folder = '10'::bigint) AND (tsv @@ to_tsquery('wordA'::text)))
610+
Order By: ("time" <=| '500'::bigint)
611+
(3 rows)
612+
613+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
614+
id | folder | time | tsv
615+
----+--------+------+-----------
616+
3 | 10 | 300 | 'worda':1
617+
1 | 10 | 100 | 'worda':1
618+
6 | 10 | 40 | 'worda':1
619+
8 | 10 | 30 | 'worda':1
620+
(4 rows)
621+

expected/altorder_1.out

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,3 +572,51 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDE
572572
506 | Sun May 22 21:21:22.326724 2016
573573
(112 rows)
574574

575+
CREATE TABLE test_table (id bigint, folder bigint, time bigint, tsv tsvector);
576+
CREATE INDEX test_idx ON test_table USING rum(folder, tsv rum_tsvector_addon_ops, time) with (attach = 'time', to = 'tsv', order_by_attach=TRUE);
577+
ERROR: doesn't support order index over pass-by-reference column
578+
INSERT INTO test_table (id, folder, time, tsv) VALUES
579+
(1, 10, 100, to_tsvector('wordA')),
580+
(2, 20, 200, to_tsvector('wordB')),
581+
(3, 10, 300, to_tsvector('wordA')),
582+
(4, 20, 400, to_tsvector('wordB')),
583+
(5, 20, 60, to_tsvector('wordB')),
584+
(6, 10, 40, to_tsvector('wordA')),
585+
(7, 20, 50, to_tsvector('wordB')),
586+
(8, 10, 30, to_tsvector('wordA'));
587+
EXPLAIN (costs off)
588+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
589+
QUERY PLAN
590+
----------------------------------------------------------------------------
591+
Seq Scan on test_table
592+
Filter: ((folder = '10'::bigint) AND (tsv @@ to_tsquery('wordA'::text)))
593+
(2 rows)
594+
595+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
596+
id | folder | time | tsv
597+
----+--------+------+-----------
598+
1 | 10 | 100 | 'worda':1
599+
3 | 10 | 300 | 'worda':1
600+
6 | 10 | 40 | 'worda':1
601+
8 | 10 | 30 | 'worda':1
602+
(4 rows)
603+
604+
EXPLAIN (costs off)
605+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
606+
QUERY PLAN
607+
----------------------------------------------------------------------------------
608+
Sort
609+
Sort Key: (("time" <=| '500'::bigint))
610+
-> Seq Scan on test_table
611+
Filter: ((folder = '10'::bigint) AND (tsv @@ to_tsquery('wordA'::text)))
612+
(4 rows)
613+
614+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
615+
id | folder | time | tsv
616+
----+--------+------+-----------
617+
3 | 10 | 300 | 'worda':1
618+
1 | 10 | 100 | 'worda':1
619+
6 | 10 | 40 | 'worda':1
620+
8 | 10 | 30 | 'worda':1
621+
(4 rows)
622+

expected/altorder_2.out

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,3 +582,54 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDE
582582
506 | Sun May 22 21:21:22.326724 2016
583583
(112 rows)
584584

585+
CREATE TABLE test_table (id bigint, folder bigint, time bigint, tsv tsvector);
586+
CREATE INDEX test_idx ON test_table USING rum(folder, tsv rum_tsvector_addon_ops, time) with (attach = 'time', to = 'tsv', order_by_attach=TRUE);
587+
ERROR: doesn't support order index over pass-by-reference column
588+
INSERT INTO test_table (id, folder, time, tsv) VALUES
589+
(1, 10, 100, to_tsvector('wordA')),
590+
(2, 20, 200, to_tsvector('wordB')),
591+
(3, 10, 300, to_tsvector('wordA')),
592+
(4, 20, 400, to_tsvector('wordB')),
593+
(5, 20, 60, to_tsvector('wordB')),
594+
(6, 10, 40, to_tsvector('wordA')),
595+
(7, 20, 50, to_tsvector('wordB')),
596+
(8, 10, 30, to_tsvector('wordA'));
597+
EXPLAIN (costs off)
598+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
599+
QUERY PLAN
600+
----------------------------------------------------------------------------
601+
Seq Scan on test_table
602+
Disabled Nodes: 1
603+
Filter: ((folder = '10'::bigint) AND (tsv @@ to_tsquery('wordA'::text)))
604+
(3 rows)
605+
606+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
607+
id | folder | time | tsv
608+
----+--------+------+-----------
609+
1 | 10 | 100 | 'worda':1
610+
3 | 10 | 300 | 'worda':1
611+
6 | 10 | 40 | 'worda':1
612+
8 | 10 | 30 | 'worda':1
613+
(4 rows)
614+
615+
EXPLAIN (costs off)
616+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
617+
QUERY PLAN
618+
----------------------------------------------------------------------------------
619+
Sort
620+
Disabled Nodes: 1
621+
Sort Key: (("time" <=| '500'::bigint))
622+
-> Seq Scan on test_table
623+
Disabled Nodes: 1
624+
Filter: ((folder = '10'::bigint) AND (tsv @@ to_tsquery('wordA'::text)))
625+
(6 rows)
626+
627+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
628+
id | folder | time | tsv
629+
----+--------+------+-----------
630+
3 | 10 | 300 | 'worda':1
631+
1 | 10 | 100 | 'worda':1
632+
6 | 10 | 40 | 'worda':1
633+
8 | 10 | 30 | 'worda':1
634+
(4 rows)
635+

sql/altorder.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,25 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER
9898
EXPLAIN (costs off)
9999
SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d;
100100
SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d;
101+
102+
CREATE TABLE test_table (id bigint, folder bigint, time bigint, tsv tsvector);
103+
CREATE INDEX test_idx ON test_table USING rum(folder, tsv rum_tsvector_addon_ops, time) with (attach = 'time', to = 'tsv', order_by_attach=TRUE);
104+
105+
INSERT INTO test_table (id, folder, time, tsv) VALUES
106+
(1, 10, 100, to_tsvector('wordA')),
107+
(2, 20, 200, to_tsvector('wordB')),
108+
(3, 10, 300, to_tsvector('wordA')),
109+
(4, 20, 400, to_tsvector('wordB')),
110+
(5, 20, 60, to_tsvector('wordB')),
111+
(6, 10, 40, to_tsvector('wordA')),
112+
(7, 20, 50, to_tsvector('wordB')),
113+
(8, 10, 30, to_tsvector('wordA'));
114+
115+
EXPLAIN (costs off)
116+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
117+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint);
118+
119+
EXPLAIN (costs off)
120+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
121+
SELECT * FROM test_table WHERE tsv @@ (to_tsquery('wordA')) AND (folder = 10::bigint) ORDER BY time <=| 500::bigint;
122+

src/rum.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "utils/memutils.h"
2424

2525
#include "rumsort.h"
26+
#include "rumtidbitmap.h"
2627

2728
/* RUM distance strategies */
2829
#define RUM_DISTANCE 20
@@ -758,8 +759,24 @@ typedef struct RumScanOpaqueData
758759
bool willSort; /* is there any columns in ordering */
759760
RumScanType scanType;
760761

761-
ScanDirection naturalOrder;
762-
bool secondPass;
762+
ScanDirection naturalOrder;
763+
bool secondPass;
764+
765+
/*
766+
* scanWithAltOrderKeys = true if there are several keys in the query
767+
* that are sorted in different order (the entryGetItem() function for
768+
* different RumScanEntry will return the results to curItem in
769+
* different order).
770+
*
771+
* This happens when a multi-column RUM index is scanned and one of its
772+
* keys is ordered by additional information.
773+
*
774+
* In this case, all matching tids for keys that are ordered by tid will
775+
* be placed in the tbm. Keys ordered by additional information will be
776+
* scanned as usual.
777+
*/
778+
bool scanWithAltOrderKeys;
779+
RumTIDBitmap *tbm;
763780
} RumScanOpaqueData;
764781

765782
typedef RumScanOpaqueData *RumScanOpaque;

0 commit comments

Comments
 (0)