From e043b0712b7da39109217edc1663a85cb1aa1b18 Mon Sep 17 00:00:00 2001 From: Leonard Herold <92177433+LeonardHd@users.noreply.github.com> Date: Sat, 25 Nov 2023 08:10:20 +0100 Subject: [PATCH] feat: replace naive evaluation with lark * fixing expression by enforcing @ expression * adding array support --- .../expression_evaluation_value_error.py | 2 + .../functions/_expression_transformer.py | 217 +++--- .../functions/expression_evaluator.py | 132 ++-- tests/test_test_framework.py | 2 +- .../functions/test_expression_evaluator.py | 679 ++++++++++++++++++ .../unit/functions/test_function_argument.py | 224 ------ tests/unit/functions/test_function_parser.py | 49 -- .../models/activities/base/test_activity.py | 2 +- .../test_for_each_activity.py | 2 +- .../test_switch_activity.py | 2 +- .../models/activities/test_fail_activity.py | 2 +- 11 files changed, 899 insertions(+), 414 deletions(-) create mode 100644 src/azure_data_factory_testing_framework/exceptions/expression_evaluation_value_error.py create mode 100644 tests/unit/functions/test_expression_evaluator.py delete mode 100644 tests/unit/functions/test_function_argument.py delete mode 100644 tests/unit/functions/test_function_parser.py diff --git a/src/azure_data_factory_testing_framework/exceptions/expression_evaluation_value_error.py b/src/azure_data_factory_testing_framework/exceptions/expression_evaluation_value_error.py new file mode 100644 index 00000000..20cb1101 --- /dev/null +++ b/src/azure_data_factory_testing_framework/exceptions/expression_evaluation_value_error.py @@ -0,0 +1,2 @@ +class ExpressionEvaluationValueError(Exception): + pass diff --git a/src/azure_data_factory_testing_framework/functions/_expression_transformer.py b/src/azure_data_factory_testing_framework/functions/_expression_transformer.py index fdf74ec6..47f60b93 100644 --- a/src/azure_data_factory_testing_framework/functions/_expression_transformer.py +++ b/src/azure_data_factory_testing_framework/functions/_expression_transformer.py @@ -2,8 +2,25 @@ import inspect from typing import Callable -from lark import Token, Transformer, Tree - +from lark import Discard, Token, Transformer + +from azure_data_factory_testing_framework.exceptions.activity_not_found_error import ActivityNotFoundError +from azure_data_factory_testing_framework.exceptions.dataset_parameter_not_found_error import ( + DatasetParameterNotFoundError, +) +from azure_data_factory_testing_framework.exceptions.expression_evaluation_value_error import ( + ExpressionEvaluationValueError, +) +from azure_data_factory_testing_framework.exceptions.expression_parameter_not_found_error import ( + ExpressionParameterNotFoundError, +) +from azure_data_factory_testing_framework.exceptions.linked_service_parameter_not_found_error import ( + LinkedServiceParameterNotFoundError, +) +from azure_data_factory_testing_framework.exceptions.state_iteration_item_not_set_error import ( + StateIterationItemNotSetError, +) +from azure_data_factory_testing_framework.exceptions.variable_not_found_error import VariableNotFoundError from azure_data_factory_testing_framework.functions.functions_repository import FunctionsRepository from azure_data_factory_testing_framework.state.pipeline_run_state import PipelineRunState from azure_data_factory_testing_framework.state.run_parameter import RunParameter @@ -16,17 +33,72 @@ def __init__(self, state: PipelineRunState) -> None: self.state: PipelineRunState = state super().__init__() - def literal(self, value: list[Token]): + def LITERAL_LETTER(self, token: Token): # noqa: N802 + return str(token.value) + + def LITERAL_INT(self, token: Token): # noqa: N802 + return int(token.value) + + def LITERAL_FLOAT(self, token: Token): # noqa: N802 + return float(token.value) + + def LITERAL_SINGLE_QUOTED_STRING(self, token: Token): # noqa: N802 + return str(token.value) + + def LITERAL_BOOLEAN(self, token: Token): # noqa: N802 + return bool(token.value) + + def LITERAL_NULL(self, token: Token): # noqa: N802 + return None + + def literal_evaluation(self, value: list[Token, str, int, float, bool]): + if len(value) != 1: + raise ExpressionEvaluationValueError() return value[0] - def parameter_name(self, value: list[Token]): - return value[0].value + def EXPRESSION_NULL(self, token: Token): # noqa: N802 + return None + + def EXPRESSION_STRING(self, token: Token): # noqa: N802 + string = str(token.value) + string = string.replace("''", "'") # replace escaped single quotes + string = string[1:-1] + + return string - def pipeline_reference(self, value: list[Token]): - pipeline_reference_property = value[0] - pipeline_reference_property_parameter = value[1] + def EXPRESSION_INTEGER(self, token: Token): # noqa: N802 + return int(token.value) + + def EXPRESSION_FLOAT(self, token: Token): # noqa: N802 + return float(token.value) + + def EXPRESSION_BOOLEAN(self, token: Token): # noqa: N802 + return bool(token.value) + + def EXPRESSION_WS(self, token: Token): # noqa: N802 + # Discard whitespaces in expressions + return Discard + + def EXPRESSION_ARRAY_INDEX(self, token: Token): # noqa: N802 + token.value = int(token.value[1:-1]) + return token + + def expression_pipeline_reference(self, value: list[Token, str, int, float, bool]): + if not isinstance(value[0], Token): + raise ExpressionEvaluationValueError() + + if not isinstance(value[1], Token): + raise ExpressionEvaluationValueError() + + pipeline_reference_property: Token = value[0] + pipeline_reference_property_parameter: Token = value[1] + + if not ( + pipeline_reference_property.type == "EXPRESSION_PIPELINE_PROPERTY" + and pipeline_reference_property_parameter.type == "EXPRESSION_PARAMETER_NAME" + ): + raise ExpressionEvaluationValueError() - # TODO: need to improve this global_parameters: list[RunParameter] = list( filter(lambda p: p.type == RunParameterType.Global, self.state.parameters) ) @@ -39,108 +111,86 @@ def pipeline_reference(self, value: list[Token]): first = list(filter(lambda p: p.name == pipeline_reference_property_parameter, global_parameters)) if len(first) == 0: - raise Exception("Parameter not found") + raise ExpressionParameterNotFoundError(pipeline_reference_property_parameter) return first[0].value - def variable_reference(self, value: list[Token]): + def expression_variable_reference(self, value: list[Token, str, int, float, bool]): variable_name = value[0].value variable_name = variable_name[1:-1] # remove quotes - - # variable_property = value[1].value - variable = list(filter(lambda p: p.name == variable_name, self.state.variables)) if len(variable) == 0: - raise Exception("Variable not found") + raise VariableNotFoundError(variable_name) return variable[0].value - def dataset_reference(self, value: list[Token]): + def expression_dataset_reference(self, value: list[Token, str, int, float, bool]): dataset_name = value[0].value - dataset_property = value[1].value - - dataset = list(filter(lambda p: p.name == dataset_name, self.state.datasets)) + dataset_name = dataset_name[1:-1] # remove quotes + datasets = list(filter(lambda p: p.type == RunParameterType.Dataset, self.state.parameters)) + dataset = list(filter(lambda p: p.name == dataset_name, datasets)) if len(dataset) == 0: - raise Exception("Dataset not found") - return dataset[0].value[dataset_property] + raise DatasetParameterNotFoundError(dataset_name) + return dataset[0].value - def linked_service_reference(self, value: list[Token]): + def expression_linked_service_reference(self, value: list[Token, str, int, float, bool]): linked_service_name = value[0].value - linked_service_property = value[1].value - - linked_service = list(filter(lambda p: p.name == linked_service_name, self.state.linked_services)) + linked_service_name = linked_service_name[1:-1] # remove quotes + linked_services = list(filter(lambda p: p.type == RunParameterType.LinkedService, self.state.parameters)) + linked_service = list(filter(lambda p: p.name == linked_service_name, linked_services)) if len(linked_service) == 0: - raise Exception("Linked service not found") - return linked_service[0].value[linked_service_property] + raise LinkedServiceParameterNotFoundError(linked_service_name) + return linked_service[0].value - def activity_reference(self, value: list[Token]): + def expression_activity_reference(self, value: list[Token, str, int, float, bool]): activity_name = value[0].value activity_name = activity_name[1:-1] # remove quotes - # activity_property = value[1].value activity_property = value[1] - # activity_property_parameter = value[2].value - activity_property_parameter = value[2] + property_fields = value[2:] activity = self.state.try_get_scoped_activity_result_by_name(activity_name) if activity is None: - raise Exception("Activity not found") - return activity[activity_property][activity_property_parameter] + raise ActivityNotFoundError(activity_name) - def item_reference(self, value: list[Token]): + activity_property_parameter = activity[activity_property] + for field in property_fields: + field_value = field.value + activity_property_parameter = activity_property_parameter[field_value] + return activity_property_parameter + + def expression_item_reference(self, value: list[Token, str, int, float, bool]): item = self.state.iteration_item if item is None: - raise Exception("Item not found") + raise StateIterationItemNotSetError() return item - def boolean(self, value: list[Token]): # noqa: ANN401, ANN201, ANN001 - return bool(value[0].value) - - def integer(self, value: list[Token]): - return int(value[0].value) - - def float(self, value: Token): # noqa: A003 - return float(value[0]) - - def single_quoted_string(self, value: list[Token]): - result: str = value[0].value - result = result[1:-1] # remove quotes - return result - - def string(self, value: list[Token]): - if isinstance(value[0], Token): - return value[0].value - else: - return value[0] - - def function_parameters(self, value): - parameters = [] - - if isinstance(value, list): - for v in value: - if isinstance(v, Tree): - result = self.transform(v) - parameters.append(result) - else: - result = v - parameters.append(result) - else: - raise Exception("Unexpected value type") - return parameters - - def parameter(self, value): - return value[1] - - def expression(self, value): - # TODO: need to improve this (array support) - return value[0] - - def function_call(self, expression): - fn = expression[0] - fn_parameters = expression[1] - array_index = expression[2] if len(expression) > 2 else None - + def expression_function_parameters(self, values: list[Token, str, int, float, bool]): + return values + + def expression_parameter(self, values: list[Token, str, int, float, bool]): + if len(values) != 1: + raise ExpressionEvaluationValueError + return values[0] + + def expression_evaluation(self, values: list[Token, str, int, float, bool, list]): + eval_value = values[0] + array_indices = values[1] + for array_index in array_indices: + if array_index is None: + continue + if not isinstance(array_index, Token) or array_index.type != "EXPRESSION_ARRAY_INDEX": + raise ExpressionEvaluationValueError() + eval_value = eval_value[array_index.value] + return eval_value + + def expression_array_indices(self, values: list[Token, str, int, float, bool]): + return values + + def expression_function_call(self, values: list[Token, str, int, float, bool]): + fn = values[0] + fn_parameters = values[1] function: Callable = FunctionsRepository.functions.get(fn.value) pos_or_keyword_parameters = [] @@ -156,7 +206,4 @@ def function_call(self, expression): var_positional_values = fn_parameters[len(pos_or_keyword_parameters) :] # should be 0 or 1 result = function(*pos_or_keyword_values, *var_positional_values) - if array_index is not None: - result = result[array_index] - return result diff --git a/src/azure_data_factory_testing_framework/functions/expression_evaluator.py b/src/azure_data_factory_testing_framework/functions/expression_evaluator.py index daea5ff1..a2f10e73 100644 --- a/src/azure_data_factory_testing_framework/functions/expression_evaluator.py +++ b/src/azure_data_factory_testing_framework/functions/expression_evaluator.py @@ -1,6 +1,7 @@ from typing import Union from lark import Lark, Token, Tree +from lark.exceptions import VisitError from azure_data_factory_testing_framework.functions._expression_transformer import ExpressionTransformer from azure_data_factory_testing_framework.functions.functions_repository import FunctionsRepository @@ -9,66 +10,92 @@ class ExpressionEvaluator: def __init__(self) -> None: - """Parser for the ADF expression language.""" - grammar = f""" - ?start: ["@@"] literal | ["@"] expression - - expression: evaluation array_index? - array_index: "[" INT "]" - - literal: integer | literal_letter | single_quoted_string | boolean | float | null - ?literal_letter: /[a-zA-Z0-9_]+/ - - ?evaluation : function_call - | pipeline_reference - | variable_reference - | activity_reference - | dataset_reference - | linked_service_reference - | item_reference - - # any character except a @ (but can be escaped with @@) - literal_string: ESCAPED_STRING - - - pipeline_reference: "pipeline" "()" "." PIPELINE_PROPERTY "." parameter_name - PIPELINE_PROPERTY: "parameters" | "globalParameters" - activity_reference: "activity" "(" SINGLE_QUOTED_STRING ")" "." parameter_name "." parameter_name - variable_reference: "variables" "(" SINGLE_QUOTED_STRING ")" - dataset_reference: "dataset" "(" SINGLE_QUOTED_STRING ")" "." parameter_name - linked_service_reference: "linkedService" "(" SINGLE_QUOTED_STRING ")" "." parameter_name - item_reference: "item()" - - parameter_name: /[a-zA-Z0-9_]+/ - - function_call: FUNCTION_NAME "(" function_parameters ")" - function_parameters: parameter ("," parameter)* - - ?parameter: WS* (null | integer | float | boolean | string | expression ) WS* + """Evaluator for the expression language.""" + literal_grammer = """ + + // literal rules + ?literal_start: literal_evaluation + literal_evaluation: LITERAL_INT + | LITERAL_LETTER + | LITERAL_SINGLE_QUOTED_STRING + | LITERAL_BOOLEAN + | LITERAL_FLOAT + | LITERAL_NULL + | literal_array + literal_array: "[" literal_evaluation ("," literal_evaluation)* "]" + + // literal terminals: + LITERAL_LETTER: /[^@]+/ + LITERAL_INT: SIGNED_INT + LITERAL_FLOAT: SIGNED_FLOAT + LITERAL_SINGLE_QUOTED_STRING: SINGLE_QUOTED_STRING + LITERAL_BOOLEAN: BOOLEAN + LITERAL_NULL: NULL + """ - null: "null" - string: single_quoted_string + expression_grammer = f""" + // TODO: add support for array index + ?expression_start: expression_evaluation + expression_evaluation: expression_call [expression_array_indices] + ?expression_call: expression_function_call + | expression_pipeline_reference + | expression_variable_reference + | expression_activity_reference + | expression_dataset_reference + | expression_linked_service_reference + | expression_item_reference + expression_array_indices: [EXPRESSION_ARRAY_INDEX]* + + // reference rules: + expression_pipeline_reference: "pipeline" "()" "." EXPRESSION_PIPELINE_PROPERTY "." EXPRESSION_PARAMETER_NAME + expression_variable_reference: "variables" "(" EXPRESSION_VARIABLE_NAME ")" + expression_activity_reference: "activity" "(" EXPRESSION_ACTIVITY_NAME ")" ("." EXPRESSION_PARAMETER_NAME)+ + expression_dataset_reference: "dataset" "(" EXPRESSION_DATASET_NAME ")" + expression_linked_service_reference: "linkedService" "(" EXPRESSION_LINKED_SERVICE_NAME ")" + expression_item_reference: "item()" + + // function call rules + expression_function_call: EXPRESSION_FUNCTION_NAME "(" expression_function_parameters ")" + expression_function_parameters: expression_parameter ("," expression_parameter )* + expression_parameter: EXPRESSION_WS* (EXPRESSION_NULL | EXPRESSION_INTEGER | EXPRESSION_FLOAT | EXPRESSION_BOOLEAN | EXPRESSION_STRING | expression_start) EXPRESSION_WS* - single_quoted_string: SINGLE_QUOTED_STRING - integer: SIGNED_INT - float: SIGNED_FLOAT - boolean: "true" | "false" + // expression terminals + EXPRESSION_PIPELINE_PROPERTY: "parameters" | "globalParameters" + EXPRESSION_PARAMETER_NAME: /[a-zA-Z0-9_]+/ + EXPRESSION_VARIABLE_NAME: "'" /[^']*/ "'" + EXPRESSION_ACTIVITY_NAME: "'" /[^']*/ "'" + EXPRESSION_DATASET_NAME: "'" /[^']*/ "'" + EXPRESSION_LINKED_SERVICE_NAME: "'" /[^']*/ "'" + EXPRESSION_FUNCTION_NAME: {self._supported_functions()} + EXPRESSION_NULL: NULL + EXPRESSION_STRING: SINGLE_QUOTED_STRING + EXPRESSION_INTEGER: SIGNED_INT + EXPRESSION_FLOAT: SIGNED_FLOAT + EXPRESSION_BOOLEAN: BOOLEAN + EXPRESSION_WS: WS + EXPRESSION_ARRAY_INDEX: ARRAY_INDEX + """ # noqa: E501 + + base_grammar = """ + ?start: ("@" expression_start) | (["@@"] literal_start) - FUNCTION_NAME: {self._supported_functions()} + // shared rules + ARRAY_INDEX: "[" /[0-9]+/ "]" - EOL : /\\n/ - - SINGLE_QUOTED_STRING: "'" /[^']*/ "'" - %import common.WORD + // shared custom basic data type rules: + NULL: "null" + BOOLEAN: "true" | "false" + SINGLE_QUOTED_STRING: "'" /([^']|'')*/ "'" + + // imported lark terminals: %import common.SIGNED_INT %import common.SIGNED_FLOAT %import common.INT %import common.WS - %import common.ESCAPED_STRING - """ - self.lark_parser = Lark(grammar, start="start") + grammer = base_grammar + literal_grammer + expression_grammer + self.lark_parser = Lark(grammer, start="start") def _supported_functions(self) -> str: functions = list(FunctionsRepository.functions.keys()) @@ -82,5 +109,8 @@ def parse(self, expression: str) -> Tree[Token]: def evaluate(self, expression: str, state: PipelineRunState) -> Union[str, int, float, bool]: tree: Tree = self.parse(expression) transformer = ExpressionTransformer(state) - result: Tree = transformer.transform(tree) + try: + result: Tree = transformer.transform(tree) + except VisitError as ve: + raise ve.orig_exc from ve return result diff --git a/tests/test_test_framework.py b/tests/test_test_framework.py index 573316fc..f2d80240 100644 --- a/tests/test_test_framework.py +++ b/tests/test_test_framework.py @@ -85,7 +85,7 @@ def test_fail_activity_halts_further_evaluation() -> None: FailActivity( name="failActivity", typeProperties={ - "message": DataFactoryElement("concat('Error code: ', '500')"), + "message": DataFactoryElement("@concat('Error code: ', '500')"), "errorCode": "500", }, dependsOn=[], diff --git a/tests/unit/functions/test_expression_evaluator.py b/tests/unit/functions/test_expression_evaluator.py new file mode 100644 index 00000000..ac9cac65 --- /dev/null +++ b/tests/unit/functions/test_expression_evaluator.py @@ -0,0 +1,679 @@ +from typing import Union + +import pytest +from azure_data_factory_testing_framework.exceptions.activity_not_found_error import ActivityNotFoundError +from azure_data_factory_testing_framework.exceptions.dataset_parameter_not_found_error import ( + DatasetParameterNotFoundError, +) +from azure_data_factory_testing_framework.exceptions.expression_parameter_not_found_error import ( + ExpressionParameterNotFoundError, +) +from azure_data_factory_testing_framework.exceptions.linked_service_parameter_not_found_error import ( + LinkedServiceParameterNotFoundError, +) +from azure_data_factory_testing_framework.exceptions.state_iteration_item_not_set_error import ( + StateIterationItemNotSetError, +) +from azure_data_factory_testing_framework.exceptions.variable_not_found_error import VariableNotFoundError +from azure_data_factory_testing_framework.functions.expression_evaluator import ExpressionEvaluator +from azure_data_factory_testing_framework.state.dependency_condition import DependencyCondition +from azure_data_factory_testing_framework.state.pipeline_run_state import PipelineRunState +from azure_data_factory_testing_framework.state.pipeline_run_variable import PipelineRunVariable +from azure_data_factory_testing_framework.state.run_parameter import RunParameter +from azure_data_factory_testing_framework.state.run_parameter_type import RunParameterType +from lark import Token, Tree +from pytest import param as p + + +@pytest.mark.parametrize( + ["expression", "expected"], + [ + p("value", Tree(Token("RULE", "literal_evaluation"), [Token("LITERAL_LETTER", "value")]), id="string_literal"), + p( + " value ", + Tree(Token("RULE", "literal_evaluation"), [Token("LITERAL_LETTER", "value")]), + id="string_with_ws_literal", + marks=pytest.mark.skip(""), + ), + p("11", Tree(Token("RULE", "literal_evaluation"), [Token("LITERAL_INT", "11")]), id="integer_literal"), + p( + "@pipeline().parameters.parameter", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_pipeline_reference"), + [ + Token("EXPRESSION_PIPELINE_PROPERTY", "parameters"), + Token("EXPRESSION_PARAMETER_NAME", "parameter"), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="pipeline_parameters_reference", + ), + p( + "@pipeline().globalParameters.parameter", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_pipeline_reference"), + [ + Token("EXPRESSION_PIPELINE_PROPERTY", "globalParameters"), + Token("EXPRESSION_PARAMETER_NAME", "parameter"), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="pipeline_global_parameters_reference", + ), + p( + "@variables('variable')", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_variable_reference"), + [Token("EXPRESSION_VARIABLE_NAME", "'variable'")], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="variables_reference", + ), + p( + "@activity('activityName').output.outputName", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_activity_reference"), + [ + Token("EXPRESSION_ACTIVITY_NAME", "'activityName'"), + Token("EXPRESSION_PARAMETER_NAME", "output"), + Token("EXPRESSION_PARAMETER_NAME", "outputName"), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="activity_reference", + ), + p( + "@dataset('datasetName')", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_dataset_reference"), + [Token("EXPRESSION_DATASET_NAME", "'datasetName'")], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="dataset_reference", + ), + p( + "@linkedService('linkedServiceName')", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_linked_service_reference"), + [Token("EXPRESSION_LINKED_SERVICE_NAME", "'linkedServiceName'")], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="linked_service_reference", + ), + p( + "@item()", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree(Token("RULE", "expression_item_reference"), []), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="item_reference", + ), + p( + "@concat('a', 'b' )", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "concat"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree(Token("RULE", "expression_parameter"), [Token("EXPRESSION_STRING", "'a'")]), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Token("EXPRESSION_STRING", "'b'"), + Token("EXPRESSION_WS", " "), + ], + ), + ], + ), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="function_call", + ), + p( + "@concat('a', 'b' )", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "concat"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree(Token("RULE", "expression_parameter"), [Token("EXPRESSION_STRING", "'a'")]), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Token("EXPRESSION_STRING", "'b'"), + Token("EXPRESSION_WS", " "), + ], + ), + ], + ), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="function_call", + ), + p( + "@concat('https://example.com/jobs/', '123''', concat('&', 'abc,'))", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "concat"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree( + Token("RULE", "expression_parameter"), + [Token("EXPRESSION_STRING", "'https://example.com/jobs/'")], + ), + Tree( + Token("RULE", "expression_parameter"), + [Token("EXPRESSION_WS", " "), Token("EXPRESSION_STRING", "'123'''")], + ), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "concat"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree( + Token("RULE", "expression_parameter"), + [Token("EXPRESSION_STRING", "'&'")], + ), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Token("EXPRESSION_STRING", "'abc,'"), + ], + ), + ], + ), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + ], + ), + ], + ), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="function_call_with_nested_function_and_single_quote", + ), + p( + "concat('https://example.com/jobs/', '123''', variables('abc'), pipeline().parameters.abc, activity('abc').output.abc)", + Tree( + Token("RULE", "literal_evaluation"), + [ + Token( + "LITERAL_LETTER", + "concat('https://example.com/jobs/', '123''', variables('abc'), pipeline().parameters.abc, activity('abc').output.abc)", + ) + ], + ), + id="literal_function_call_with_nested_function_and_single_quote", + ), + p( + "@concat('https://example.com/jobs/', '123''', variables('abc'), pipeline().parameters.abc, activity('abc').output.abc)", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "concat"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree( + Token("RULE", "expression_parameter"), + [Token("EXPRESSION_STRING", "'https://example.com/jobs/'")], + ), + Tree( + Token("RULE", "expression_parameter"), + [Token("EXPRESSION_WS", " "), Token("EXPRESSION_STRING", "'123'''")], + ), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_variable_reference"), + [Token("EXPRESSION_VARIABLE_NAME", "'abc'")], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + ], + ), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_pipeline_reference"), + [ + Token("EXPRESSION_PIPELINE_PROPERTY", "parameters"), + Token("EXPRESSION_PARAMETER_NAME", "abc"), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + ], + ), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_activity_reference"), + [ + Token("EXPRESSION_ACTIVITY_NAME", "'abc'"), + Token("EXPRESSION_PARAMETER_NAME", "output"), + Token("EXPRESSION_PARAMETER_NAME", "abc"), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + ], + ), + ], + ), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + id="function_call_with_adf_native_functions", + ), + p( + "@createArray('a', createArray('a', 'b'))[1][1]", + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "createArray"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree(Token("RULE", "expression_parameter"), [Token("EXPRESSION_STRING", "'a'")]), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Tree( + Token("RULE", "expression_evaluation"), + [ + Tree( + Token("RULE", "expression_function_call"), + [ + Token("EXPRESSION_FUNCTION_NAME", "createArray"), + Tree( + Token("RULE", "expression_function_parameters"), + [ + Tree( + Token("RULE", "expression_parameter"), + [Token("EXPRESSION_STRING", "'a'")], + ), + Tree( + Token("RULE", "expression_parameter"), + [ + Token("EXPRESSION_WS", " "), + Token("EXPRESSION_STRING", "'b'"), + ], + ), + ], + ), + ], + ), + Tree(Token("RULE", "expression_array_indices"), [None]), + ], + ), + ], + ), + ], + ), + ], + ), + Tree( + Token("RULE", "expression_array_indices"), + [Token("EXPRESSION_ARRAY_INDEX", "[1]"), Token("EXPRESSION_ARRAY_INDEX", "[1]")], + ), + ], + ), + id="function_call_with_nested_array_index", + ), + ], +) +def test_parse(expression: str, expected: Tree[Token]) -> None: + # Arrange + evaluator = ExpressionEvaluator() + + # Act + actual = evaluator.parse(expression) + + # Assert + assert actual == expected + + +@pytest.mark.parametrize( + ["expression", "state", "expected"], + [ + p("value", PipelineRunState(), "value", id="string_literal"), + p(" value ", PipelineRunState(), " value ", id="string_with_ws_literal"), + p("11", PipelineRunState(), 11, id="integer_literal"), + p( + "@pipeline().parameters.parameter", + PipelineRunState( + parameters=[ + RunParameter(RunParameterType.Pipeline, "parameter", "value"), + ] + ), + "value", + id="pipeline_parameters_reference", + ), + p( + "@pipeline().parameters.parameter", + PipelineRunState( + parameters=[ + RunParameter(RunParameterType.Pipeline, "parameter", 1), + ] + ), + 1, + id="pipeline_parameters_reference", + ), + p( + "@pipeline().globalParameters.parameter", + PipelineRunState( + parameters=[ + RunParameter(RunParameterType.Global, "parameter", "value"), + ] + ), + "value", + id="pipeline_global_parameters_reference", + ), + p( + "@variables('variable')", + PipelineRunState( + variables=[ + PipelineRunVariable(name="variable", default_value="value"), + ] + ), + "value", + id="variables_reference", + ), + p( + "@activity('activityName').output.outputName", + PipelineRunState( + pipeline_activity_results={ + "activityName": { + "output": { + "outputName": "value", + }, + "status": DependencyCondition.SUCCEEDED, + } + } + ), + "value", + id="activity_reference", + ), + p( + "@dataset('datasetName')", + PipelineRunState(parameters=[RunParameter(RunParameterType.Dataset, "datasetName", "datasetNameValue")]), + "datasetNameValue", + id="dataset_reference", + ), + p( + "@linkedService('linkedServiceName')", + PipelineRunState( + parameters=[RunParameter(RunParameterType.LinkedService, "linkedServiceName", "linkedServiceNameValue")] + ), + "linkedServiceNameValue", + id="linked_service_reference", + ), + p("@item()", PipelineRunState(iteration_item="value"), "value", id="item_reference"), + p("@concat('a', 'b' )", PipelineRunState(), "ab", id="function_call"), + p( + "concat('https://example.com/jobs/', '123''', concat('&', 'abc,'))", + PipelineRunState(), + "concat('https://example.com/jobs/', '123''', concat('&', 'abc,'))", + id="literal_function_call_with_nested_function_and_single_quote", + ), + p( + "@concat('https://example.com/jobs/', '123''', concat('&', 'abc,'))", + PipelineRunState(), + "https://example.com/jobs/123'&abc,", + id="function_call_with_nested_function_and_single_quote", + ), + p( + "@activity('activityName').output.outputName", + PipelineRunState( + pipeline_activity_results={ + "activityName": { + "output": { + "outputName": 1, + }, + "status": DependencyCondition.SUCCEEDED, + } + } + ), + 1, + id="activity_reference", + ), + p( + "@activity('activityName').output.pipelineReturnValue.test", + PipelineRunState( + pipeline_activity_results={ + "activityName": { + "output": { + "pipelineReturnValue": { + "test": "value", + }, + }, + "status": DependencyCondition.SUCCEEDED, + } + } + ), + "value", + id="activity_reference_with_nested_property", + ), + p("@createArray('a', 'b')", PipelineRunState(), ["a", "b"], id="function_call_array_result"), + p("@createArray('a', 'b')[1]", PipelineRunState(), "b", id="function_call_with_array_index"), + p( + "@createArray('a', createArray('b', 'c'))[1][0]", + PipelineRunState(), + "b", + id="function_call_with_nested_array_index", + ), + p( + "@concat( 'x1' , \n 'x2','x3' )", + PipelineRunState(), + "x1x2x3", + id="function_call_with_ws_and_newline", + ), + ], +) +def test_evaluate(expression: str, state: PipelineRunState, expected: Union[str, int, bool, float]) -> None: + # Arrange + evaluator = ExpressionEvaluator() + + # Act + actual = evaluator.evaluate(expression, state) + + # Assert + assert actual == expected + + +def test_evaluate_raises_exception_when_pipeline_parameter_not_found() -> None: + # Arrange + expression = "@pipeline().parameters.parameter" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(ExpressionParameterNotFoundError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "Parameter 'parameter' not found" + + +def test_evaluate_raises_exception_when_pipeline_global_parameter_not_found() -> None: + # Arrange + expression = "@pipeline().globalParameters.parameter" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(ExpressionParameterNotFoundError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "Parameter 'parameter' not found" + + +def test_evaluate_raises_exception_when_variable_not_found() -> None: + # Arrange + expression = "@variables('variable')" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(VariableNotFoundError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "Variable 'variable' not found" + + +def test_evaluate_raises_exception_when_dataset_not_found() -> None: + # Arrange + expression = "@dataset('datasetName')" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(DatasetParameterNotFoundError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "Dataset parameter: 'datasetName' not found" + + +def test_evaluate_raises_exception_when_linked_service_not_found() -> None: + # Arrange + expression = "@linkedService('linkedServiceName')" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(LinkedServiceParameterNotFoundError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "LinkedService parameter: 'linkedServiceName' not found" + + +def test_evaluate_raises_exception_when_activity_not_found() -> None: + # Arrange + expression = "@activity('activityName').output.outputName" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(ActivityNotFoundError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "Activity with name activityName not found" + + +def test_evaluate_raises_exception_when_state_iteration_item_not_set() -> None: + # Arrange + expression = "@item()" + evaluator = ExpressionEvaluator() + state = PipelineRunState() + + # Act + with pytest.raises(StateIterationItemNotSetError) as exinfo: + evaluator.evaluate(expression, state) + + # Assert + assert str(exinfo.value) == "Iteration item not set." diff --git a/tests/unit/functions/test_function_argument.py b/tests/unit/functions/test_function_argument.py deleted file mode 100644 index b6358b1e..00000000 --- a/tests/unit/functions/test_function_argument.py +++ /dev/null @@ -1,224 +0,0 @@ -# ruff: noqa - -import pytest -from azure_data_factory_testing_framework.exceptions.dataset_parameter_not_found_error import ( - DatasetParameterNotFoundError, -) -from azure_data_factory_testing_framework.exceptions.expression_parameter_not_found_error import ( - ExpressionParameterNotFoundError, -) -from azure_data_factory_testing_framework.exceptions.linked_service_parameter_not_found_error import ( - LinkedServiceParameterNotFoundError, -) -from azure_data_factory_testing_framework.exceptions.variable_not_found_error import VariableNotFoundError -from azure_data_factory_testing_framework.state import PipelineRunState, PipelineRunVariable, RunParameterType -from azure_data_factory_testing_framework.state.dependency_condition import DependencyCondition -from azure_data_factory_testing_framework.state.run_parameter import RunParameter - -pytest.skip("Refactor tests to use new expression evaluator", allow_module_level=True) - - -def test_evaluate_parameter_expression() -> None: - # Arrange - expression = "pipeline().parameters.parameterName" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.parameters.append(RunParameter(RunParameterType.Pipeline, "parameterName", "parameterValue")) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "parameterValue" - - -def test_evaluate_parameter_returning_int_expression() -> None: - # Arrange - expression = "pipeline().parameters.parameterName" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.parameters.append(RunParameter(RunParameterType.Pipeline, "parameterName", 1)) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == 1 - - -def test_evaluate_global_parameter_expression() -> None: - # Arrange - expression = "pipeline().globalParameters.parameterName" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.parameters.append(RunParameter(RunParameterType.Global, "parameterName", "parameterValue")) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "parameterValue" - - -def test_evaluate_variable_string_expression() -> None: - # Arrange - expression = "variables('variableName')" - argument = FunctionArgument(expression) - state = PipelineRunState( - variables=[ - PipelineRunVariable(name="variableName", default_value="variableValue"), - ], - ) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "variableValue" - - -def test_evaluate_linked_service_string_expression() -> None: - # Arrange - expression = "@linkedService('linkedServiceName')" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.parameters.append(RunParameter(RunParameterType.LinkedService, "linkedServiceName", "linkedServiceNameValue")) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "linkedServiceNameValue" - - -def test_evaluate_dataset_string_expression() -> None: - # Arrange - expression = "dataset('datasetName')" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.parameters.append(RunParameter(RunParameterType.Dataset, "datasetName", "datasetNameValue")) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "datasetNameValue" - - -def test_evaluate_iteration_item_string_expression() -> None: - # Arrange - expression = "item()" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.iteration_item = "iterationItemValue" - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "iterationItemValue" - - -def test_evaluate_unknown_pipeline_parameter() -> None: - # Arrange - expression = "pipeline().parameters.parameterName" - argument = FunctionArgument(expression) - state = PipelineRunState() - - # Act - with pytest.raises(ExpressionParameterNotFoundError): - argument.evaluate(state) - print("hi") - - -def test_evaluate_unknown_global_pipeline_parameter() -> None: - # Arrange - expression = "pipeline().globalParameters.parameterName" - argument = FunctionArgument(expression) - state = PipelineRunState() - - # Act - with pytest.raises(ExpressionParameterNotFoundError): - argument.evaluate(state) - - -def test_evaluate_unknown_variable() -> None: - # Arrange - expression = "variables('variableName')" - argument = FunctionArgument(expression) - state = PipelineRunState() - - # Act - with pytest.raises(VariableNotFoundError): - argument.evaluate(state) - - -def test_evaluate_unknown_dataset() -> None: - # Arrange - expression = "dataset('datasetName')" - argument = FunctionArgument(expression) - state = PipelineRunState() - - # Act - with pytest.raises(DatasetParameterNotFoundError): - argument.evaluate(state) - - -def test_evaluate_unknown_linked_service() -> None: - # Arrange - expression = "linkedService('linkedServiceName')" - argument = FunctionArgument(expression) - state = PipelineRunState() - - # Act - with pytest.raises(LinkedServiceParameterNotFoundError): - argument.evaluate(state) - - -def test_evaluate_activity_output_expression() -> None: - # Arrange - expression = "activity('activityName').output.outputName" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.add_activity_result("activityName", DependencyCondition.SUCCEEDED, {"outputName": "outputValue"}) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "outputValue" - - -def test_evaluate_activity_output_nested_expression() -> None: - # Arrange - expression = "activity('activityName').output.nestedOutput.nestedField" - argument = FunctionArgument(expression) - state = PipelineRunState() - state.add_activity_result( - "activityName", - DependencyCondition.SUCCEEDED, - {"nestedOutput": {"nestedField": "outputValue"}}, - ) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == "outputValue" - - -def test_evaluate_complex_json_expression() -> None: - # Arrange - expression = ( - '" { "command": "@pipeline().globalParameters.command", "argument": @pipeline().parameters.argument } "' - ) - argument = FunctionArgument(expression) - state = PipelineRunState() - state.parameters.append(RunParameter(RunParameterType.Global, "command", "commandValue")) - state.parameters.append(RunParameter(RunParameterType.Pipeline, "argument", "argumentValue")) - - # Act - evaluated = argument.evaluate(state) - - # Assert - assert evaluated == '" { "command": "commandValue", "argument": argumentValue } "' diff --git a/tests/unit/functions/test_function_parser.py b/tests/unit/functions/test_function_parser.py deleted file mode 100644 index f3147b55..00000000 --- a/tests/unit/functions/test_function_parser.py +++ /dev/null @@ -1,49 +0,0 @@ -# ruff: noqa -import pytest -from azure_data_factory_testing_framework.state import PipelineRunState - -pytest.skip("Refactor tests to use new expression evaluator", allow_module_level=True) - - -def test_parse_expression_with_nested_function_and_single_quote() -> None: - # Arrange - PipelineRunState() - raw_expression = "concat('https://example.com/jobs/', '123''', concat('&', 'abc,'))" - - # Act - expression = parse_expression(raw_expression) - - # Assert - function = expression - assert isinstance(expression, FunctionCall) - assert function is not None - assert function.name == "concat" - assert len(function.arguments) == 3 - assert function.arguments[0].expression == "'https://example.com/jobs/'" - assert function.arguments[1].expression == "'123''" - - inner_function = function.arguments[2] - assert isinstance(inner_function, FunctionCall) - assert inner_function.name == "concat" - assert len(inner_function.arguments) == 2 - assert inner_function.arguments[0].expression == "'&'" - assert inner_function.arguments[1].expression == "'abc,'" - - -def test_parse_expression_with_adf_native_functions() -> None: - # Arrange - PipelineRunState() - raw_expression = "concat('https://example.com/jobs/', '123''', variables('abc'), pipeline().parameters.abc, activity('abc').output.abc)" - - # Act - expression = parse_expression(raw_expression) - - # Assert - function = expression - assert function.name == "concat" - assert len(function.arguments) == 5 - assert function.arguments[0].expression == "'https://example.com/jobs/'" - assert function.arguments[1].expression == "'123''" - assert function.arguments[2].expression == "variables('abc')" - assert function.arguments[3].expression == "pipeline().parameters.abc" - assert function.arguments[4].expression == "activity('abc').output.abc" diff --git a/tests/unit/models/activities/base/test_activity.py b/tests/unit/models/activities/base/test_activity.py index a8254af8..412ed187 100644 --- a/tests/unit/models/activities/base/test_activity.py +++ b/tests/unit/models/activities/base/test_activity.py @@ -71,7 +71,7 @@ def test_evaluate_is_evaluating_expressions_inside_dict() -> None: typeProperties={ "pipeline": {"referenceName": "dummy"}, "parameters": { - "url": DataFactoryElement("pipeline().parameters.url"), + "url": DataFactoryElement("@pipeline().parameters.url"), }, }, depends_on=[], diff --git a/tests/unit/models/activities/control_activities/test_for_each_activity.py b/tests/unit/models/activities/control_activities/test_for_each_activity.py index 484ad936..5249f618 100644 --- a/tests/unit/models/activities/control_activities/test_for_each_activity.py +++ b/tests/unit/models/activities/control_activities/test_for_each_activity.py @@ -19,7 +19,7 @@ def test_when_evaluate_child_activities_then_should_return_the_activity_with_ite name="setVariable", typeProperties={ "variableName": "variable", - "value": DataFactoryElement[str]("item()"), + "value": DataFactoryElement[str]("@item()"), }, depends_on=[], ), diff --git a/tests/unit/models/activities/control_activities/test_switch_activity.py b/tests/unit/models/activities/control_activities/test_switch_activity.py index 8af35f81..18e6cae0 100644 --- a/tests/unit/models/activities/control_activities/test_switch_activity.py +++ b/tests/unit/models/activities/control_activities/test_switch_activity.py @@ -13,7 +13,7 @@ def test_when_evaluated_should_evaluate_expression() -> None: name="SwitchActivity", default_activities=[], cases_activities={}, - typeProperties={"on": DataFactoryElement("concat('case_', '1')")}, + typeProperties={"on": DataFactoryElement("@concat('case_', '1')")}, ) # Act diff --git a/tests/unit/models/activities/test_fail_activity.py b/tests/unit/models/activities/test_fail_activity.py index 118903ae..5f05bdad 100644 --- a/tests/unit/models/activities/test_fail_activity.py +++ b/tests/unit/models/activities/test_fail_activity.py @@ -9,7 +9,7 @@ def test_fail_activity_evaluates_to_failed_result() -> None: fail_activity = FailActivity( name="FailActivity", typeProperties={ - "message": DataFactoryElement("concat('Error code: ', '500')"), + "message": DataFactoryElement("@concat('Error code: ', '500')"), "errorCode": "500", }, depends_on=[],