Skip to content

Commit

Permalink
Fix NaN-handling for vectorized aggregation
Browse files Browse the repository at this point in the history
The vector agg functions didn't handle NaN-floats properly for min/max
functions, which produced the wrong `min()` output when NaN values
were presents (e.g., picking NaN over -Inf). NaN-checks are different
for min and max so the checks are moved to the predicate macro instead
of being defined in the template function.

The previously erroneous handling of NaN is evident by some of the
changes in the test output. However, some queries didn't run any
actual vectorized agg plans when they should have, thus "accidentally"
producing the correct min result in the test file. In those cases,
instead of the vectorized plan, the test ran an init plan doing a sort
with a limit of 1 to find the min. Disabling sort in the test ensures
the plan is vectorized, and thus producing the erroneous result when
the fix is not present.
  • Loading branch information
erimatnor committed Jan 9, 2025
1 parent a03b43a commit 92007ca
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 7 deletions.
1 change: 1 addition & 0 deletions .unreleased/pr_7584
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixes: #7584 Fix NaN-handling for vectorized aggregation
3 changes: 1 addition & 2 deletions tsl/src/nodes/vector_agg/function/minmax_arithmetic_single.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ FUNCTION_NAME(vector_impl)(void *agg_state, int n, const CTYPE *values, const ui
* Note that we have to properly handle NaNs and Infinities for floats.
*/
const bool do_replace =
new_value_ok && (unlikely(!outer_isvalid) || PREDICATE(outer_result, new_value) ||
isnan((double) new_value));
new_value_ok && (unlikely(!outer_isvalid) || PREDICATE(outer_result, new_value));

outer_result = do_replace ? new_value : outer_result;
outer_isvalid = outer_isvalid || do_replace;
Expand Down
8 changes: 6 additions & 2 deletions tsl/src/nodes/vector_agg/function/minmax_templates.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,17 @@ minmax_emit(void *agg_state, Datum *out_result, bool *out_isnull)

/*
* Templated parts for vectorized min(), max().
*
* NaN handled similar to equivalent PG functions.
*/
#define AGG_NAME MIN
#define PREDICATE(CURRENT, NEW) ((CURRENT) > (NEW))
#define PREDICATE(CURRENT, NEW) \
unlikely(!isnan((double) NEW) && (isnan((double) CURRENT) || (CURRENT) > (NEW)))
#include "minmax_arithmetic_types.c"

#define AGG_NAME MAX
#define PREDICATE(CURRENT, NEW) ((CURRENT) < (NEW))
#define PREDICATE(CURRENT, NEW) \
unlikely(!isnan((double) CURRENT) && (isnan((double) NEW) || (CURRENT) < (NEW)))
#include "minmax_arithmetic_types.c"

#undef AGG_NAME
10 changes: 7 additions & 3 deletions tsl/test/expected/vector_agg_functions.out
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ set timescaledb.debug_require_vector_agg = :'guc_value';
---- on float4 due to different numeric stability in our and PG implementations.
--set timescaledb.enable_chunkwise_aggregation to off; set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';
set max_parallel_workers_per_gather = 0;
-- Disable sorting to force vectorized agg plans for min and max,
-- which otherwise can produce a non-vectorized init-plan that does a
-- sort with limit 1.
set enable_sort = false;
select
format('%sselect %s%s(%s) from aggfns%s%s%s;',
explain,
Expand Down Expand Up @@ -378,13 +382,13 @@ select ss, min(cfloat4) from aggfns group by ss order by min(cfloat4), ss limit
3 | -Infinity
4 | -49.9999
6 | -49.9995
11 | -49.9991
7 | -49.9984
8 | -49.9969
0 | -49.9949
5 | -49.9942
9 | -49.9911
| -45.4083
11 | NaN
(10 rows)

select stddev(cfloat4) from aggfns;
Expand Down Expand Up @@ -1997,14 +2001,14 @@ select ss, min(cfloat4) from aggfns where cfloat8 > 0 group by ss order by min(c
----+-----------
3 | -Infinity
4 | -49.9993
11 | -49.9974
8 | -49.9969
7 | -49.9969
0 | -49.9915
9 | -49.9911
5 | -49.9892
6 | -49.9891
| -41.6131
11 | NaN
(10 rows)

select stddev(cfloat4) from aggfns where cfloat8 > 0;
Expand Down Expand Up @@ -5193,13 +5197,13 @@ select ss, min(cfloat4) from aggfns where cfloat8 < 1000 group by ss order by mi
3 | -Infinity
4 | -49.9999
6 | -49.9995
11 | -49.9991
7 | -49.9984
8 | -49.9969
0 | -49.9949
5 | -49.9942
9 | -49.9911
| -45.4083
11 | NaN
(10 rows)

select stddev(cfloat4) from aggfns where cfloat8 < 1000;
Expand Down
4 changes: 4 additions & 0 deletions tsl/test/sql/vector_agg_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ set timescaledb.debug_require_vector_agg = :'guc_value';
--set timescaledb.enable_chunkwise_aggregation to off; set timescaledb.enable_vectorized_aggregation to off; set timescaledb.debug_require_vector_agg = 'forbid';

set max_parallel_workers_per_gather = 0;
-- Disable sorting to force vectorized agg plans for min and max,
-- which otherwise can produce a non-vectorized init-plan that does a
-- sort with limit 1.
set enable_sort = false;

select
format('%sselect %s%s(%s) from aggfns%s%s%s;',
Expand Down

0 comments on commit 92007ca

Please sign in to comment.