-
-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
big refactor: simplify a lot of the remapping logic
- Loading branch information
1 parent
005de34
commit 5eb4974
Showing
7 changed files
with
231 additions
and
288 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
106 changes: 30 additions & 76 deletions
106
snuba/query/processors/logical/eap_map_access_remapper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,102 +1,56 @@ | ||
from typing import Sequence | ||
from typing import Mapping | ||
|
||
from snuba.query.expressions import Column, Expression, FunctionCall, Literal | ||
from snuba.query.dsl import column, literal | ||
from snuba.query.expressions import Expression, FunctionCall, SubscriptableReference | ||
from snuba.query.logical import Query | ||
from snuba.query.processors.logical import LogicalQueryProcessor | ||
from snuba.query.query_settings import QuerySettings | ||
from snuba.utils.constants import ATTRIBUTE_BUCKETS | ||
from snuba.utils.hashes import fnv_1a | ||
|
||
|
||
class HashBucketFunctionTransformer(LogicalQueryProcessor): | ||
class EAPClickhouseColumnRemapper(LogicalQueryProcessor): | ||
""" | ||
In eap_spans, we split up map columns for better performance. | ||
In the entity, attr_str Map(String, String) becomes | ||
attr_str_0 Map(String, String), | ||
attr_str_1 Map(String, String), | ||
etc. | ||
In EAP entities, all attributes are hidden behind some virtual maps: attr_str, attr_i64, etc | ||
This transformer converts mapKeys(attr_str) to arrayConcat(mapKeys(attr_str_0), mapKeys(attr_str_1), ...) | ||
and the same for mapValues | ||
Sometimes a map access should refer to a 'real' column. | ||
For example, you can use this processor to convert | ||
attr_i64[duration_ms] to CAST(duration_ms, 'Int64') | ||
It converts mapExists(attr_str, 'blah') to mapExists(attr_str_{hash('blah')%20}, 'blah') | ||
If data_type is the special value 'hex', the result is converted with the 'hex' function instead. | ||
If there is no matching column, the map access remains as-is: | ||
attr_str[derp] remains attr_str[derp] | ||
""" | ||
|
||
def __init__(self, hash_bucket_names: Sequence[str]): | ||
def __init__(self, hash_bucket_name: str, keys: Mapping[str, str], data_type: str): | ||
super().__init__() | ||
self.hash_bucket_names = set(hash_bucket_names) | ||
self.hash_bucket_name = hash_bucket_name | ||
self.keys = keys | ||
self.data_type = data_type | ||
|
||
def process_query(self, query: Query, query_settings: QuerySettings) -> None: | ||
def transform_map_keys_and_values_expression(exp: Expression) -> Expression: | ||
if not isinstance(exp, FunctionCall): | ||
return exp | ||
|
||
if len(exp.parameters) != 1: | ||
return exp | ||
|
||
param = exp.parameters[0] | ||
if not isinstance(param, Column): | ||
return exp | ||
|
||
if param.column_name not in self.hash_bucket_names: | ||
return exp | ||
|
||
if exp.function_name not in ("mapKeys", "mapValues"): | ||
return exp | ||
|
||
return FunctionCall( | ||
alias=exp.alias, | ||
function_name="arrayConcat", | ||
parameters=tuple( | ||
FunctionCall( | ||
None, | ||
function_name=exp.function_name, | ||
parameters=( | ||
Column( | ||
None, | ||
column_name=f"{param.column_name}_{i}", | ||
table_name=param.table_name, | ||
), | ||
), | ||
) | ||
for i in range(ATTRIBUTE_BUCKETS) | ||
), | ||
) | ||
|
||
def transform_map_contains_expression(exp: Expression) -> Expression: | ||
if not isinstance(exp, FunctionCall): | ||
return exp | ||
|
||
if len(exp.parameters) != 2: | ||
return exp | ||
|
||
column = exp.parameters[0] | ||
if not isinstance(column, Column): | ||
return exp | ||
|
||
if column.column_name not in self.hash_bucket_names: | ||
def transform(exp: Expression) -> Expression: | ||
if not isinstance(exp, SubscriptableReference): | ||
return exp | ||
|
||
if exp.function_name != "mapContains": | ||
if exp.column.column_name != self.hash_bucket_name: | ||
return exp | ||
|
||
key = exp.parameters[1] | ||
if not isinstance(key, Literal) or not isinstance(key.value, str): | ||
if exp.key.value not in self.keys: | ||
return exp | ||
|
||
bucket_idx = fnv_1a(key.value.encode("utf-8")) % ATTRIBUTE_BUCKETS | ||
if self.data_type == "hex": | ||
return FunctionCall( | ||
alias=exp.alias, | ||
function_name="hex", | ||
parameters=(column(self.keys[exp.key.value]),), | ||
) | ||
return FunctionCall( | ||
alias=exp.alias, | ||
function_name=exp.function_name, | ||
function_name="CAST", | ||
parameters=( | ||
Column( | ||
None, | ||
None, | ||
f"{column.column_name}_{bucket_idx}", | ||
), | ||
key, | ||
column(self.keys[exp.key.value]), | ||
literal(self.data_type), | ||
), | ||
) | ||
|
||
query.transform_expressions(transform_map_keys_and_values_expression) | ||
query.transform_expressions(transform_map_contains_expression) | ||
query.transform_expressions(transform) |
Oops, something went wrong.