Skip to content

Commit

Permalink
fix bug with data_present
Browse files Browse the repository at this point in the history
  • Loading branch information
davidtsuk committed Jan 8, 2025
1 parent 857a9fe commit 0417e33
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 34 deletions.
14 changes: 5 additions & 9 deletions snuba/web/rpc/common/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class ExtrapolationContext(ABC):
sample_count: int

@property
def extrapolated_data_present(self) -> bool:
def data_present(self) -> bool:
return self.sample_count > 0

@property
Expand Down Expand Up @@ -108,16 +108,12 @@ def from_row(
elif custom_column_information.custom_column_id == "count":
sample_count = col_value

if (
confidence_interval is None
or average_sample_rate is None
or sample_count is None
):
if confidence_interval is None:
return GenericExtrapolationContext(
value=value,
confidence_interval=None,
average_sample_rate=0,
sample_count=0,
sample_count=sample_count if sample_count is not None else 0,
)

if is_percentile:
Expand Down Expand Up @@ -150,7 +146,7 @@ def is_extrapolated(self) -> bool:

@cached_property
def reliability(self) -> Reliability.ValueType:
if not self.is_extrapolated or not self.extrapolated_data_present:
if not self.is_extrapolated or not self.data_present:
return Reliability.RELIABILITY_UNSPECIFIED

relative_confidence = (
Expand Down Expand Up @@ -183,7 +179,7 @@ def is_extrapolated(self) -> bool:

@cached_property
def reliability(self) -> Reliability.ValueType:
if not self.is_extrapolated or not self.extrapolated_data_present:
if not self.is_extrapolated or not self.data_present:
return Reliability.RELIABILITY_UNSPECIFIED

lower_bound, upper_bound = _calculate_approximate_ci_percentile_levels(
Expand Down
26 changes: 10 additions & 16 deletions snuba/web/rpc/v1/endpoint_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,7 @@ def _convert_result_timeseries(
extrapolation_context = ExtrapolationContext.from_row(
timeseries.label, row_data
)
if (
# This isn't quite right but all non extrapolated aggregates
# are assumed to be present.
not extrapolation_context.is_extrapolated
# This checks if the extrapolated aggregate is present by
# inspecting the sample count
or extrapolation_context.extrapolated_data_present
):
if extrapolation_context.data_present:
timeseries.data_points.append(
DataPoint(
data=row_data[timeseries.label],
Expand Down Expand Up @@ -209,32 +202,33 @@ def _build_query(request: TimeSeriesRequest) -> Query:
for aggregation in request.aggregations
]

extrapolation_columns = []
additional_context_columns = []
for aggregation in request.aggregations:
if (
aggregation.extrapolation_mode
== ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED
):
confidence_interval_column = get_confidence_interval_column(aggregation)
if confidence_interval_column is not None:
extrapolation_columns.append(
additional_context_columns.append(
SelectedExpression(
name=confidence_interval_column.alias,
expression=confidence_interval_column,
)
)

average_sample_rate_column = get_average_sample_rate_column(aggregation)
count_column = get_count_column(aggregation)
extrapolation_columns.append(
additional_context_columns.append(
SelectedExpression(
name=average_sample_rate_column.alias,
expression=average_sample_rate_column,
)
)
extrapolation_columns.append(
SelectedExpression(name=count_column.alias, expression=count_column)
)

count_column = get_count_column(aggregation)
additional_context_columns.append(
SelectedExpression(name=count_column.alias, expression=count_column)
)

groupby_columns = [
SelectedExpression(
Expand Down Expand Up @@ -275,7 +269,7 @@ def _build_query(request: TimeSeriesRequest) -> Query:
),
*aggregation_columns,
*groupby_columns,
*extrapolation_columns,
*additional_context_columns,
],
granularity=request.granularity_secs,
condition=base_conditions_and(
Expand Down
6 changes: 3 additions & 3 deletions tests/web/rpc/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_get_confidence_interval_column_for_non_extrapolatable_column() -> None:
"column_name",
"average_sample_rate",
"reliability",
"extrapolated_data_present",
"data_present",
"is_extrapolated",
),
[
Expand Down Expand Up @@ -185,13 +185,13 @@ def test_get_extrapolation_meta(
column_name: str,
average_sample_rate: float,
reliability: Reliability.ValueType,
extrapolated_data_present: bool,
data_present: bool,
is_extrapolated: bool,
) -> None:
extrapolation_context = ExtrapolationContext.from_row(column_name, row_data)
assert extrapolation_context.average_sample_rate == average_sample_rate
assert extrapolation_context.reliability == reliability
assert extrapolation_context.extrapolated_data_present == extrapolated_data_present
assert extrapolation_context.data_present == data_present
assert extrapolation_context.is_extrapolated == is_extrapolated


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -396,17 +396,13 @@ def test_with_no_data_present(self) -> None:
aggregations=[
AttributeAggregation(
aggregate=Function.FUNCTION_SUM,
key=AttributeKey(
type=AttributeKey.TYPE_FLOAT, name="sparse_metric"
),
key=AttributeKey(type=AttributeKey.TYPE_FLOAT, name="test_metric"),
label="sum",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
),
AttributeAggregation(
aggregate=Function.FUNCTION_AVG,
key=AttributeKey(
type=AttributeKey.TYPE_FLOAT, name="sparse_metric"
),
key=AttributeKey(type=AttributeKey.TYPE_FLOAT, name="test_metric"),
label="avg",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
),
Expand Down Expand Up @@ -626,6 +622,54 @@ def test_start_time_not_divisible_by_time_buckets_returns_valid_data(self) -> No
assert len(ts.data_points) == 1
assert ts.data_points[0].data == 300

def test_with_non_existent_attribute(self) -> None:
store_timeseries(
BASE_TIME,
1,
3600,
metrics=[
DummyMetric("test_metric", get_value=lambda x: 1),
],
)

message = TimeSeriesRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
organization_id=1,
cogs_category="something",
referrer="something",
start_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp())),
end_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp() + 60 * 30)),
),
aggregations=[
AttributeAggregation(
aggregate=Function.FUNCTION_SUM,
key=AttributeKey(
type=AttributeKey.TYPE_FLOAT, name="non_existent_metric"
),
label="sum",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
),
],
granularity_secs=300,
)

response = EndpointTimeSeries().execute(message)
expected_buckets = [
Timestamp(seconds=int(BASE_TIME.timestamp()) + secs)
for secs in range(0, 60 * 30, 300)
]

assert response.result_timeseries == [
TimeSeries(
label="sum",
buckets=expected_buckets,
data_points=[
DataPoint(data_present=False) for _ in range(len(expected_buckets))
],
)
]


class TestUtils:
def test_no_duplicate_labels(self) -> None:
Expand Down

0 comments on commit 0417e33

Please sign in to comment.