Skip to content

Commit

Permalink
Add index creation for chunks without segmentby
Browse files Browse the repository at this point in the history
Previously, for chunks without a segmentby column specified in the
compression settings, no indexes were created. This change allows
indexes to be created on orderby columns only in this scenario.

Since there is no segmentby in the compression settings for these chunks
we also disable segmentwise recompression for them as it is more optimal
to do a full recompression in this case.
  • Loading branch information
kpan2034 committed Jan 10, 2025
1 parent b181aaa commit 3383294
Show file tree
Hide file tree
Showing 18 changed files with 532 additions and 532 deletions.
1 change: 1 addition & 0 deletions .unreleased/pr_7436
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #7436 Add index creation on orderby columns
8 changes: 8 additions & 0 deletions tsl/src/compression/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,14 @@ get_compressed_chunk_index_for_recompression(Chunk *uncompressed_chunk)

CompressionSettings *settings = ts_compression_settings_get(compressed_chunk->table_id);

int num_segmentby = ts_array_length(settings->fd.segmentby);

if(num_segmentby == 0) {
table_close(compressed_chunk_rel, NoLock);
table_close(uncompressed_chunk_rel, NoLock);
return InvalidOid;
}

CatalogIndexState indstate = CatalogOpenIndexes(compressed_chunk_rel);
Oid index_oid = get_compressed_chunk_index(indstate, settings);
CatalogCloseIndexes(indstate);
Expand Down
5 changes: 0 additions & 5 deletions tsl/src/compression/compression_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,11 +308,6 @@ create_compressed_chunk_indexes(Chunk *chunk, CompressionSettings *settings)
}
}

if (list_length(indexcols) == 0)
{
return;
}

SortByDir ordering;
SortByNulls nulls_ordering;

Expand Down
12 changes: 6 additions & 6 deletions tsl/test/expected/compression_bgw.out
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ WHERE compression_status LIKE 'Compressed'
ORDER BY chunk_name;
chunk_name | before_compression_total_bytes | after_compression_total_bytes
------------------+--------------------------------+-------------------------------
_hyper_3_5_chunk | 24576 | 24576
_hyper_3_6_chunk | 24576 | 24576
_hyper_3_5_chunk | 24576 | 40960
_hyper_3_6_chunk | 24576 | 40960
(2 rows)

--integer tests
Expand Down Expand Up @@ -215,8 +215,8 @@ WHERE compression_status LIKE 'Compressed'
ORDER BY chunk_name;
chunk_name | before_compression_total_bytes | after_compression_total_bytes
-------------------+--------------------------------+-------------------------------
_hyper_5_12_chunk | 24576 | 24576
_hyper_5_13_chunk | 24576 | 24576
_hyper_5_12_chunk | 24576 | 40960
_hyper_5_13_chunk | 24576 | 40960
(2 rows)

--bigint test
Expand Down Expand Up @@ -255,8 +255,8 @@ WHERE compression_status LIKE 'Compressed'
ORDER BY chunk_name;
chunk_name | before_compression_total_bytes | after_compression_total_bytes
-------------------+--------------------------------+-------------------------------
_hyper_7_19_chunk | 24576 | 24576
_hyper_7_20_chunk | 24576 | 24576
_hyper_7_19_chunk | 24576 | 40960
_hyper_7_20_chunk | 24576 | 40960
(2 rows)

--TEST 8
Expand Down
210 changes: 69 additions & 141 deletions tsl/test/expected/compression_ddl.out

Large diffs are not rendered by default.

124 changes: 124 additions & 0 deletions tsl/test/expected/compression_indexcreate.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- force index scan to be used when possible
set enable_seqscan to false;
\set PREFIX 'EXPLAIN (analyze, costs off, summary off, timing off) '
create table segind(time timestamptz, a int, b int);
select create_hypertable('segind', by_range('time'));
NOTICE: adding not-null constraint to column "time"
create_hypertable
-------------------
(1,t)
(1 row)

-- enable compression on hypertable with no segment by column
alter table segind set (timescaledb.compress, timescaledb.compress_segmentby='', timescaledb.compress_orderby='time, b');
insert into segind values('2024-11-08 10:31:28.436014-07', 1, 1), ('2024-11-08 10:32:28.436014-07', 2, 1), ('2024-11-08 10:33:28.436014-07', 3, 1), ('2024-11-08 10:34:28.436014-07', 2, 1), ('2024-11-08 10:35:28.436014-07', 1, 2), ('2024-11-08 10:36:28.436014-07', 4, 1);
-- compress chunk
-- this should create an index using orderby columns
select compress_chunk(show_chunks('segind'));
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
(1 row)

-- query using orderby columns should use the index
:PREFIX select * from segind where b = 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=5 loops=1)
Vectorized Filter: (b = 1)
Rows Removed by Filter: 1
-> Index Scan using compress_hyper_2_2_chunk__ts_meta_min_1__ts_meta_max_1__ts__idx on compress_hyper_2_2_chunk (actual rows=1 loops=1)
Index Cond: ((_ts_meta_min_2 <= 1) AND (_ts_meta_max_2 >= 1))
(5 rows)

:PREFIX select * from segind where time = '2024-11-08 10:32:28.436014-07';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1 loops=1)
Vectorized Filter: ("time" = 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone)
Rows Removed by Filter: 5
-> Index Scan using compress_hyper_2_2_chunk__ts_meta_min_1__ts_meta_max_1__ts__idx on compress_hyper_2_2_chunk (actual rows=1 loops=1)
Index Cond: ((_ts_meta_min_1 <= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone))
(5 rows)

:PREFIX select * from segind where b = 1 and time = '2024-11-08 10:32:28.436014-07';
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1 loops=1)
Vectorized Filter: ((b = 1) AND ("time" = 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone))
Rows Removed by Filter: 5
-> Index Scan using compress_hyper_2_2_chunk__ts_meta_min_1__ts_meta_max_1__ts__idx on compress_hyper_2_2_chunk (actual rows=1 loops=1)
Index Cond: ((_ts_meta_min_1 <= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone) AND (_ts_meta_min_2 <= 1) AND (_ts_meta_max_2 >= 1))
(5 rows)

-- a query on another column should perform a seq scan since there is no index on it
:PREFIX select * from segind where a = 1;
QUERY PLAN
---------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=2 loops=1)
Vectorized Filter: (a = 1)
Rows Removed by Filter: 4
-> Seq Scan on compress_hyper_2_2_chunk (actual rows=1 loops=1)
(4 rows)

-- decompress the chunk to drop the index
select decompress_chunk(show_chunks('segind'));
decompress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
(1 row)

-- change compression settings to use segmentby column
alter table segind set (timescaledb.compress, timescaledb.compress_segmentby='a', timescaledb.compress_orderby='time, b');
-- compress chunk
-- this should create an index using segmentby and orderby columns
select compress_chunk(show_chunks('segind'));
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
(1 row)

-- queries using segmentby or orderby columns should use the index
:PREFIX select * from segind where b = 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=5 loops=1)
Vectorized Filter: (b = 1)
Rows Removed by Filter: 1
-> Index Scan using compress_hyper_2_3_chunk_a__ts_meta_min_1__ts_meta_max_1__t_idx on compress_hyper_2_3_chunk (actual rows=4 loops=1)
Index Cond: ((_ts_meta_min_2 <= 1) AND (_ts_meta_max_2 >= 1))
(5 rows)

:PREFIX select * from segind where time = '2024-11-08 10:32:28.436014-07';
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1 loops=1)
Vectorized Filter: ("time" = 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone)
Rows Removed by Filter: 3
-> Index Scan using compress_hyper_2_3_chunk_a__ts_meta_min_1__ts_meta_max_1__t_idx on compress_hyper_2_3_chunk (actual rows=2 loops=1)
Index Cond: ((_ts_meta_min_1 <= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone))
(5 rows)

:PREFIX select * from segind where b = 1 and time = '2024-11-08 10:32:28.436014-07';
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=1 loops=1)
Vectorized Filter: ((b = 1) AND ("time" = 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone))
Rows Removed by Filter: 3
-> Index Scan using compress_hyper_2_3_chunk_a__ts_meta_min_1__ts_meta_max_1__t_idx on compress_hyper_2_3_chunk (actual rows=2 loops=1)
Index Cond: ((_ts_meta_min_1 <= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone) AND (_ts_meta_max_1 >= 'Fri Nov 08 09:32:28.436014 2024 PST'::timestamp with time zone) AND (_ts_meta_min_2 <= 1) AND (_ts_meta_max_2 >= 1))
(5 rows)

:PREFIX select * from segind where a = 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk (actual rows=2 loops=1)
-> Index Scan using compress_hyper_2_3_chunk_a__ts_meta_min_1__ts_meta_max_1__t_idx on compress_hyper_2_3_chunk (actual rows=1 loops=1)
Index Cond: (a = 1)
(3 rows)

-- cleanup
RESET enable_seqscan;
44 changes: 17 additions & 27 deletions tsl/test/expected/compression_insert.out
Original file line number Diff line number Diff line change
Expand Up @@ -790,13 +790,11 @@ SELECT compress_chunk(format('%I.%I',chunk_schema,chunk_name), true) FROM timesc

-- should be ordered append
:PREFIX SELECT * FROM test_ordering ORDER BY 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_13_20_chunk
-> Sort
Sort Key: compress_hyper_14_21_chunk._ts_meta_min_1, compress_hyper_14_21_chunk._ts_meta_max_1
-> Seq Scan on compress_hyper_14_21_chunk
(4 rows)
-> Index Scan Backward using compress_hyper_14_21_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_21_chunk
(2 rows)

INSERT INTO test_ordering SELECT 1;
-- should not be ordered append
Expand All @@ -807,39 +805,35 @@ INSERT INTO test_ordering SELECT 1;
-- It was hard to include a path without pushed down sort for consideration, as `add_path` would reject
-- the path with sort pushdown, which is desirable in most cases
:PREFIX SELECT * FROM test_ordering ORDER BY 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (ChunkAppend) on test_ordering
Order: test_ordering."time"
-> Merge Append
Sort Key: _hyper_13_20_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_13_20_chunk
-> Sort
Sort Key: compress_hyper_14_21_chunk._ts_meta_min_1, compress_hyper_14_21_chunk._ts_meta_max_1
-> Seq Scan on compress_hyper_14_21_chunk
-> Index Scan Backward using compress_hyper_14_21_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_21_chunk
-> Sort
Sort Key: _hyper_13_20_chunk."time"
-> Seq Scan on _hyper_13_20_chunk
(11 rows)
(9 rows)

INSERT INTO test_ordering VALUES (105),(104),(103);
-- should be ordered append
:PREFIX SELECT * FROM test_ordering ORDER BY 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (ChunkAppend) on test_ordering
Order: test_ordering."time"
-> Merge Append
Sort Key: _hyper_13_20_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_13_20_chunk
-> Sort
Sort Key: compress_hyper_14_21_chunk._ts_meta_min_1, compress_hyper_14_21_chunk._ts_meta_max_1
-> Seq Scan on compress_hyper_14_21_chunk
-> Index Scan Backward using compress_hyper_14_21_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_21_chunk
-> Sort
Sort Key: _hyper_13_20_chunk."time"
-> Seq Scan on _hyper_13_20_chunk
-> Index Only Scan Backward using _hyper_13_22_chunk_test_ordering_time_idx on _hyper_13_22_chunk
(12 rows)
(10 rows)

--insert into compressed + uncompressed chunk
INSERT INTO test_ordering VALUES (21), (22),(113);
Expand Down Expand Up @@ -881,19 +875,15 @@ SELECT compress_chunk(format('%I.%I',chunk_schema,chunk_name), true) FROM timesc

-- should be ordered append
:PREFIX SELECT * FROM test_ordering ORDER BY 1;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
Custom Scan (ChunkAppend) on test_ordering
Order: test_ordering."time"
-> Custom Scan (DecompressChunk) on _hyper_13_20_chunk
-> Sort
Sort Key: compress_hyper_14_23_chunk._ts_meta_min_1, compress_hyper_14_23_chunk._ts_meta_max_1
-> Seq Scan on compress_hyper_14_23_chunk
-> Index Scan Backward using compress_hyper_14_23_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_23_chunk
-> Custom Scan (DecompressChunk) on _hyper_13_22_chunk
-> Sort
Sort Key: compress_hyper_14_24_chunk._ts_meta_min_1, compress_hyper_14_24_chunk._ts_meta_max_1
-> Seq Scan on compress_hyper_14_24_chunk
(10 rows)
-> Index Scan Backward using compress_hyper_14_24_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_24_chunk
(6 rows)

SET timescaledb.enable_decompression_sorted_merge = 1;
-- TEST cagg triggers with insert into compressed chunk
Expand Down
4 changes: 2 additions & 2 deletions tsl/test/expected/compression_qualpushdown.out
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ order by factorid, end_dt;
Output: _hyper_3_4_chunk.factorid, _hyper_3_4_chunk.end_dt, _hyper_3_4_chunk.logret
Filter: ((_hyper_3_4_chunk.end_dt >= '12-10-2012'::date) AND (_hyper_3_4_chunk.end_dt <= '12-11-2012'::date))
Vectorized Filter: (_hyper_3_4_chunk.fmid = 56)
-> Seq Scan on _timescaledb_internal.compress_hyper_4_5_chunk
-> Index Scan using compress_hyper_4_5_chunk__ts_meta_min_1__ts_meta_max_1_idx on _timescaledb_internal.compress_hyper_4_5_chunk
Output: compress_hyper_4_5_chunk._ts_meta_count, compress_hyper_4_5_chunk.fmid, compress_hyper_4_5_chunk.factorid, compress_hyper_4_5_chunk.start_dt, compress_hyper_4_5_chunk._ts_meta_min_1, compress_hyper_4_5_chunk._ts_meta_max_1, compress_hyper_4_5_chunk.end_dt, compress_hyper_4_5_chunk.interval_number, compress_hyper_4_5_chunk.logret, compress_hyper_4_5_chunk.knowledge_date
Filter: ((compress_hyper_4_5_chunk._ts_meta_max_1 >= '12-10-2012'::date) AND (compress_hyper_4_5_chunk._ts_meta_min_1 <= '12-11-2012'::date))
Index Cond: ((compress_hyper_4_5_chunk._ts_meta_min_1 <= '12-11-2012'::date) AND (compress_hyper_4_5_chunk._ts_meta_max_1 >= '12-10-2012'::date))
(10 rows)

--no pushdown here
Expand Down
Loading

0 comments on commit 3383294

Please sign in to comment.