Skip to content

Commit

Permalink
Fix NULL handling for in-memory tuple filtering
Browse files Browse the repository at this point in the history
During DML decompression, we filter out batches that don't
need decompression using in-memory filtering which filters
compressed tuples using scankeys. Those scankeys were not
detecting NULL values correctly for unique constraints
which use NULLS NOT DISTINCT setting. This change uses
existing codebase to properly detect those tuples when
using SK_ISNULL flag.
  • Loading branch information
antekresic committed Dec 20, 2024
1 parent b01d832 commit afdceb5
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 33 deletions.
22 changes: 16 additions & 6 deletions tsl/src/compression/compression_dml.c
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,21 @@ decompress_batches_scan(Relation in_rel, Relation out_rel, Relation index_rel, S
return stats;
}

static bool
slot_keys_test(TupleTableSlot *slot, int nkeys, ScanKey keys)
{
int cur_nkeys = nkeys;
ScanKey cur_key = keys;

for (; cur_nkeys--; cur_key++)
{
if (!slot_key_test(slot, cur_key))
return false;
}

return true;
}

static bool
batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scankeys,
tuple_filtering_constraints *constraints, bool *skip_current_tuple)
Expand All @@ -585,12 +600,7 @@ batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scan
for (int row = 0; row < num_tuples; row++)
{
TupleTableSlot *decompressed_slot = decompressor->decompressed_slots[row];
HeapTuple tuple = decompressed_slot->tts_ops->get_heap_tuple(decompressed_slot);
#if PG16_LT
HeapKeyTest(tuple, decompressor->out_desc, num_scankeys, scankeys, valid);
#else
valid = HeapKeyTest(tuple, decompressor->out_desc, num_scankeys, scankeys);
#endif
valid = slot_keys_test(decompressed_slot, num_scankeys, scankeys);
if (valid)
{
if (constraints)
Expand Down
2 changes: 2 additions & 0 deletions tsl/src/compression/compression_dml.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ typedef struct tuple_filtering_constraints
bool nullsnotdistinct;
} tuple_filtering_constraints;

bool slot_key_test(TupleTableSlot *slot, ScanKey skey);

ScanKeyData *build_mem_scankeys_from_slot(Oid ht_relid, CompressionSettings *settings,
Relation out_rel,
tuple_filtering_constraints *constraints,
Expand Down
23 changes: 23 additions & 0 deletions tsl/src/compression/compression_scankey.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,29 @@ static int create_segment_filter_scankey(Relation in_rel, char *segment_filter_c
Bitmapset **null_columns, Datum value, bool is_null_check,
bool is_array_op);

/*
* Test ScanKey against a slot.
*
* Unlike HeapKeyTest, this function takes into account SK_ISNULL
* and works correctly when looking for null values.
*/
bool
slot_key_test(TupleTableSlot *compressed_slot, ScanKey key)
{
Datum val;
bool is_null;
val = slot_getattr(compressed_slot, key->sk_attno, &is_null);

/* NULL values only match the value is NULL */
if (key->sk_flags & SK_ISNULL)
return is_null;

if (is_null)
return false;

return DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, val, key->sk_argument));
}

/*
* Build scankeys for decompressed tuple to check if it is part of the batch.
*
Expand Down
15 changes: 1 addition & 14 deletions tsl/src/compression/recompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <utils/typcache.h>

#include "compression.h"
#include "compression_dml.h"
#include "create.h"
#include "guc.h"
#include "hypercore/hypercore_handler.h"
Expand Down Expand Up @@ -517,20 +518,6 @@ update_orderby_scankeys(TupleTableSlot *uncompressed_slot, CompressedSegmentInfo
}
}

static bool
slot_key_test(TupleTableSlot *compressed_slot, ScanKey key)
{
Datum val;
bool is_null;
val = slot_getattr(compressed_slot, key->sk_attno, &is_null);

/* NULL values only match the value is NULL */
if (key->sk_flags & SK_ISNULL)
return is_null;

return DatumGetBool(FunctionCall2Coll(&key->sk_func, key->sk_collation, val, key->sk_argument));
}

static enum Batch_match_result
handle_null_scan(int key_flags, bool nulls_first, enum Batch_match_result result)
{
Expand Down
20 changes: 12 additions & 8 deletions tsl/test/shared/expected/compression_nulls_not_distinct.out
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@
-- only exists since then. once we drop support for PG14, this test
-- can be moved to non-shared compression_conflicts test
set timescaledb.debug_compression_path_info to on;
CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, value float);
CREATE UNIQUE INDEX ON nulls_not_distinct (time, device) NULLS NOT DISTINCT;
CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, label text, value float);
CREATE UNIQUE INDEX ON nulls_not_distinct (time, device, label) NULLS NOT DISTINCT;
SELECT table_name FROM create_hypertable('nulls_not_distinct', 'time');
table_name
nulls_not_distinct
(1 row)

ALTER TABLE nulls_not_distinct SET (timescaledb.compress, timescaledb.compress_segmentby = 'device');
NOTICE: default order by for hypertable "nulls_not_distinct" is set to ""time" DESC"
INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', i FROM generate_series(1,6000) g(i);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
NOTICE: default order by for hypertable "nulls_not_distinct" is set to ""time" DESC, label"
INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', 'l1', i FROM generate_series(1,6000) g(i);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
SELECT count(compress_chunk(c)) FROM show_chunks('nulls_not_distinct') c;
INFO: using tuplesort to scan rows from "_hyper_X_X_chunk" for compression
count
Expand All @@ -25,9 +26,12 @@ INFO: using tuplesort to scan rows from "_hyper_X_X_chunk" for compression

-- shouldn't succeed because nulls are not distinct
\set ON_ERROR_STOP 0
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
INFO: Using index scan with scan keys: index 1, heap 2, memory 1.
ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_idx"
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
INFO: Using index scan with scan keys: index 1, heap 4, memory 2.
ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_label_idx"
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
INFO: Using index scan with scan keys: index 1, heap 2, memory 2.
ERROR: duplicate key value violates unique constraint "_hyper_X_X_chunk_nulls_not_distinct_time_device_label_idx"
\set ON_ERROR_STOP 1
RESET timescaledb.debug_compression_path_info;
DROP TABLE nulls_not_distinct;
12 changes: 7 additions & 5 deletions tsl/test/shared/sql/compression_nulls_not_distinct.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@
-- can be moved to non-shared compression_conflicts test

set timescaledb.debug_compression_path_info to on;
CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, value float);
CREATE UNIQUE INDEX ON nulls_not_distinct (time, device) NULLS NOT DISTINCT;
CREATE TABLE nulls_not_distinct(time timestamptz not null, device text, label text, value float);
CREATE UNIQUE INDEX ON nulls_not_distinct (time, device, label) NULLS NOT DISTINCT;
SELECT table_name FROM create_hypertable('nulls_not_distinct', 'time');
ALTER TABLE nulls_not_distinct SET (timescaledb.compress, timescaledb.compress_segmentby = 'device');

INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', i FROM generate_series(1,6000) g(i);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
INSERT INTO nulls_not_distinct SELECT '2024-01-01'::timestamptz + format('%s',i)::interval, 'd1', 'l1', i FROM generate_series(1,6000) g(i);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);

SELECT count(compress_chunk(c)) FROM show_chunks('nulls_not_distinct') c;

-- shouldn't succeed because nulls are not distinct
\set ON_ERROR_STOP 0
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 1);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', NULL, 'l1', 1);
INSERT INTO nulls_not_distinct VALUES ('2024-01-01 0:00:00.5', 'd2', NULL, 1);
\set ON_ERROR_STOP 1

RESET timescaledb.debug_compression_path_info;
Expand Down

0 comments on commit afdceb5

Please sign in to comment.