fix bug with data_present

getsentry · Jan 8, 2025 · 0417e33 · 0417e33
1 parent 857a9fe
commit 0417e33
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 34 deletions.
diff --git a/snuba/web/rpc/common/aggregation.py b/snuba/web/rpc/common/aggregation.py
@@ -46,7 +46,7 @@ class ExtrapolationContext(ABC):
     sample_count: int
 
     @property
-    def extrapolated_data_present(self) -> bool:
+    def data_present(self) -> bool:
         return self.sample_count > 0
 
     @property
@@ -108,16 +108,12 @@ def from_row(
             elif custom_column_information.custom_column_id == "count":
                 sample_count = col_value
 
-        if (
-            confidence_interval is None
-            or average_sample_rate is None
-            or sample_count is None
-        ):
+        if confidence_interval is None:
             return GenericExtrapolationContext(
                 value=value,
                 confidence_interval=None,
                 average_sample_rate=0,
-                sample_count=0,
+                sample_count=sample_count if sample_count is not None else 0,
             )
 
         if is_percentile:
@@ -150,7 +146,7 @@ def is_extrapolated(self) -> bool:
 
     @cached_property
     def reliability(self) -> Reliability.ValueType:
-        if not self.is_extrapolated or not self.extrapolated_data_present:
+        if not self.is_extrapolated or not self.data_present:
             return Reliability.RELIABILITY_UNSPECIFIED
 
         relative_confidence = (
@@ -183,7 +179,7 @@ def is_extrapolated(self) -> bool:
 
     @cached_property
     def reliability(self) -> Reliability.ValueType:
-        if not self.is_extrapolated or not self.extrapolated_data_present:
+        if not self.is_extrapolated or not self.data_present:
             return Reliability.RELIABILITY_UNSPECIFIED
 
         lower_bound, upper_bound = _calculate_approximate_ci_percentile_levels(

diff --git a/snuba/web/rpc/v1/endpoint_time_series.py b/snuba/web/rpc/v1/endpoint_time_series.py
@@ -173,14 +173,7 @@ def _convert_result_timeseries(
                 extrapolation_context = ExtrapolationContext.from_row(
                     timeseries.label, row_data
                 )
-                if (
-                    # This isn't quite right but all non extrapolated aggregates
-                    # are assumed to be present.
-                    not extrapolation_context.is_extrapolated
-                    # This checks if the extrapolated aggregate is present by
-                    # inspecting the sample count
-                    or extrapolation_context.extrapolated_data_present
-                ):
+                if extrapolation_context.data_present:
                     timeseries.data_points.append(
                         DataPoint(
                             data=row_data[timeseries.label],
@@ -209,32 +202,33 @@ def _build_query(request: TimeSeriesRequest) -> Query:
         for aggregation in request.aggregations
     ]
 
-    extrapolation_columns = []
+    additional_context_columns = []
     for aggregation in request.aggregations:
         if (
             aggregation.extrapolation_mode
             == ExtrapolationMode.EXTRAPOLATION_MODE_SAMPLE_WEIGHTED
         ):
             confidence_interval_column = get_confidence_interval_column(aggregation)
             if confidence_interval_column is not None:
-                extrapolation_columns.append(
+                additional_context_columns.append(
                     SelectedExpression(
                         name=confidence_interval_column.alias,
                         expression=confidence_interval_column,
                     )
                 )
 
             average_sample_rate_column = get_average_sample_rate_column(aggregation)
-            count_column = get_count_column(aggregation)
-            extrapolation_columns.append(
+            additional_context_columns.append(
                 SelectedExpression(
                     name=average_sample_rate_column.alias,
                     expression=average_sample_rate_column,
                 )
             )
-            extrapolation_columns.append(
-                SelectedExpression(name=count_column.alias, expression=count_column)
-            )
+
+        count_column = get_count_column(aggregation)
+        additional_context_columns.append(
+            SelectedExpression(name=count_column.alias, expression=count_column)
+        )
 
     groupby_columns = [
         SelectedExpression(
@@ -275,7 +269,7 @@ def _build_query(request: TimeSeriesRequest) -> Query:
             ),
             *aggregation_columns,
             *groupby_columns,
-            *extrapolation_columns,
+            *additional_context_columns,
         ],
         granularity=request.granularity_secs,
         condition=base_conditions_and(

diff --git a/tests/web/rpc/test_aggregation.py b/tests/web/rpc/test_aggregation.py
@@ -87,7 +87,7 @@ def test_get_confidence_interval_column_for_non_extrapolatable_column() -> None:
         "column_name",
         "average_sample_rate",
         "reliability",
-        "extrapolated_data_present",
+        "data_present",
         "is_extrapolated",
     ),
     [
@@ -185,13 +185,13 @@ def test_get_extrapolation_meta(
     column_name: str,
     average_sample_rate: float,
     reliability: Reliability.ValueType,
-    extrapolated_data_present: bool,
+    data_present: bool,
     is_extrapolated: bool,
 ) -> None:
     extrapolation_context = ExtrapolationContext.from_row(column_name, row_data)
     assert extrapolation_context.average_sample_rate == average_sample_rate
     assert extrapolation_context.reliability == reliability
-    assert extrapolation_context.extrapolated_data_present == extrapolated_data_present
+    assert extrapolation_context.data_present == data_present
     assert extrapolation_context.is_extrapolated == is_extrapolated
 
 

diff --git a/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series.py b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series.py
@@ -396,17 +396,13 @@ def test_with_no_data_present(self) -> None:
             aggregations=[
                 AttributeAggregation(
                     aggregate=Function.FUNCTION_SUM,
-                    key=AttributeKey(
-                        type=AttributeKey.TYPE_FLOAT, name="sparse_metric"
-                    ),
+                    key=AttributeKey(type=AttributeKey.TYPE_FLOAT, name="test_metric"),
                     label="sum",
                     extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
                 ),
                 AttributeAggregation(
                     aggregate=Function.FUNCTION_AVG,
-                    key=AttributeKey(
-                        type=AttributeKey.TYPE_FLOAT, name="sparse_metric"
-                    ),
+                    key=AttributeKey(type=AttributeKey.TYPE_FLOAT, name="test_metric"),
                     label="avg",
                     extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
                 ),
@@ -626,6 +622,54 @@ def test_start_time_not_divisible_by_time_buckets_returns_valid_data(self) -> No
         assert len(ts.data_points) == 1
         assert ts.data_points[0].data == 300
 
+    def test_with_non_existent_attribute(self) -> None:
+        store_timeseries(
+            BASE_TIME,
+            1,
+            3600,
+            metrics=[
+                DummyMetric("test_metric", get_value=lambda x: 1),
+            ],
+        )
+
+        message = TimeSeriesRequest(
+            meta=RequestMeta(
+                project_ids=[1, 2, 3],
+                organization_id=1,
+                cogs_category="something",
+                referrer="something",
+                start_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp())),
+                end_timestamp=Timestamp(seconds=int(BASE_TIME.timestamp() + 60 * 30)),
+            ),
+            aggregations=[
+                AttributeAggregation(
+                    aggregate=Function.FUNCTION_SUM,
+                    key=AttributeKey(
+                        type=AttributeKey.TYPE_FLOAT, name="non_existent_metric"
+                    ),
+                    label="sum",
+                    extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
+                ),
+            ],
+            granularity_secs=300,
+        )
+
+        response = EndpointTimeSeries().execute(message)
+        expected_buckets = [
+            Timestamp(seconds=int(BASE_TIME.timestamp()) + secs)
+            for secs in range(0, 60 * 30, 300)
+        ]
+
+        assert response.result_timeseries == [
+            TimeSeries(
+                label="sum",
+                buckets=expected_buckets,
+                data_points=[
+                    DataPoint(data_present=False) for _ in range(len(expected_buckets))
+                ],
+            )
+        ]
+
 
 class TestUtils:
     def test_no_duplicate_labels(self) -> None: