feat: read JSON-based test definition from *_configs.py

This commit reads the JSON-based test definititions from the JSON files generated by the previous commit into the variables where they were defined previsouly in the `*_configs.py` files. As a consequence, there is (almost) no visible change to the outside world; further refactoring will happen in subsequent PRs. A small change was necessary in the test case parametrization logic, which relied on the order of the dictionary keys in the test definitions, which has changed during the conversion.
substrait-io · Dec 11, 2024 · d386903 · d386903
1 parent 702a0f7
commit d386903
Show file tree

Hide file tree

Showing 13 changed files with 87 additions and 747 deletions.
diff --git a/substrait_consumer/functional/approximation_configs.py b/substrait_consumer/functional/approximation_configs.py
@@ -1,16 +1,10 @@
-from substrait_consumer.functional.queries.sql.approximation_functions_sql import SQL_AGGREGATE
+from pathlib import Path
 
-AGGREGATE_FUNCTIONS = (
-    {
-        "test_name": "approx_count_distinct",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_AGGREGATE["approx_count_distinct"],
-    },
-    {
-        "test_name": "approx_distinct",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_AGGREGATE["approx_distinct"],
-    },
-)
+from substrait_consumer.functional.utils import load_json
+
+CONFIG_DIR = Path(__file__).parent
+SCALAR_DIR = CONFIG_DIR / "functions" / "approximation" / "scalar"
+AGGREGATE_DIR = CONFIG_DIR / "functions" / "approximation" / "aggregate"
+
+SCALAR_FUNCTIONS = tuple(load_json(file) for file in SCALAR_DIR.glob("*.json"))
+AGGREGATE_FUNCTIONS = tuple(load_json(file) for file in AGGREGATE_DIR.glob("*.json"))
diff --git a/substrait_consumer/functional/arithmetic_configs.py b/substrait_consumer/functional/arithmetic_configs.py
@@ -1,189 +1,10 @@
-from substrait_consumer.functional.queries.sql.arithmetic_functions_sql import (
-    SQL_AGGREGATE, SQL_SCALAR)
+from pathlib import Path
 
-SCALAR_FUNCTIONS = (
-    {
-        "test_name": "add",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["add"],
-    },
-    {
-        "test_name": "subtract",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["subtract"],
-    },
-    {
-        "test_name": "multiply",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["multiply"],
-    },
-    {
-        "test_name": "divide",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["divide"],
-    },
-    {
-        "test_name": "modulus",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["modulus"],
-    },
-    {
-        "test_name": "factorial",
-        "local_files": {},
-        "named_tables": {"nation": "nation.parquet"},
-        "sql_query": SQL_SCALAR["factorial"],
-    },
-    {
-        "test_name": "power",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["power"],
-    },
-    {
-        "test_name": "sqrt",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["sqrt"],
-    },
-    {
-        "test_name": "exp",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["exp"],
-    },
-    {
-        "test_name": "negate",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["negate"],
-    },
-    {
-        "test_name": "cos",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["cos"],
-    },
-    {
-        "test_name": "acos",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["acos"],
-    },
-    {
-        "test_name": "sin",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["sin"],
-    },
-    {
-        "test_name": "asin",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["asin"],
-    },
-    {
-        "test_name": "tan",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_SCALAR["tan"],
-    },
-    {
-        "test_name": "atan",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["atan"],
-    },
-    {
-        "test_name": "atan2",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["atan2"],
-    },
-    {
-        "test_name": "abs",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_SCALAR["abs"],
-    },
-    {
-        "test_name": "sign",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_SCALAR["sign"],
-    },
-)
+from substrait_consumer.functional.utils import load_json
 
+CONFIG_DIR = Path(__file__).parent
+SCALAR_DIR = CONFIG_DIR / "functions" / "arithmetic" / "scalar"
+AGGREGATE_DIR = CONFIG_DIR / "functions" / "arithmetic" / "aggregate"
 
-AGGREGATE_FUNCTIONS = (
-    {
-        "test_name": "sum",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["sum"],
-    },
-    {
-        "test_name": "count",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["count"],
-    },
-    {
-        "test_name": "count_star",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["count_star"],
-    },
-    {
-        "test_name": "avg",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["avg"],
-    },
-    {
-        "test_name": "min",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["min"],
-    },
-    {
-        "test_name": "max",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["max"],
-    },
-    {
-        "test_name": "median",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["median"],
-    },
-    {
-        "test_name": "mode",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["mode"],
-    },
-    {
-        "test_name": "product",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["product"],
-    },
-    {
-        "test_name": "std_dev",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["std_dev"],
-    },
-    {
-        "test_name": "variance",
-        "local_files": {},
-        "named_tables": {"partsupp": "partsupp.parquet"},
-        "sql_query": SQL_AGGREGATE["variance"],
-    },
-)
+SCALAR_FUNCTIONS = tuple(load_json(file) for file in SCALAR_DIR.glob("*.json"))
+AGGREGATE_FUNCTIONS = tuple(load_json(file) for file in AGGREGATE_DIR.glob("*.json"))
diff --git a/substrait_consumer/functional/arithmetic_decimal_configs.py b/substrait_consumer/functional/arithmetic_decimal_configs.py
@@ -1,64 +1,10 @@
-from substrait_consumer.functional.queries.sql.arithmetic_demical_functions_sql import (
-    SQL_SCALAR,
-    SQL_AGGREGATE,
-)
+from pathlib import Path
 
-SCALAR_FUNCTIONS = (
-    {
-        "test_name": "add",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["add"],
-    },
-    {
-        "test_name": "subtract",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["subtract"],
-    },
-    {
-        "test_name": "multiply",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["multiply"],
-    },
-    {
-        "test_name": "divide",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["divide"],
-    },
-    {
-        "test_name": "modulus",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_SCALAR["modulus"],
-    },
-)
+from substrait_consumer.functional.utils import load_json
 
-AGGREGATE_FUNCTIONS = (
-    {
-        "test_name": "sum",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_AGGREGATE["sum"],
-    },
-    {
-        "test_name": "avg",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_AGGREGATE["avg"],
-    },
-    {
-        "test_name": "min",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_AGGREGATE["min"],
-    },
-    {
-        "test_name": "max",
-        "local_files": {},
-        "named_tables": {"lineitem": "lineitem.parquet"},
-        "sql_query": SQL_AGGREGATE["max"],
-    },
-)
+CONFIG_DIR = Path(__file__).parent
+SCALAR_DIR = CONFIG_DIR / "functions" / "arithmetic_decimal" / "scalar"
+AGGREGATE_DIR = CONFIG_DIR / "functions" / "arithmetic_decimal" / "aggregate"
+
+SCALAR_FUNCTIONS = tuple(load_json(file) for file in SCALAR_DIR.glob("*.json"))
+AGGREGATE_FUNCTIONS = tuple(load_json(file) for file in AGGREGATE_DIR.glob("*.json"))
diff --git a/substrait_consumer/functional/boolean_configs.py b/substrait_consumer/functional/boolean_configs.py
@@ -1,43 +1,10 @@
-from substrait_consumer.functional.queries.sql.boolean_functions_sql import SQL_SCALAR, SQL_AGGREGATE
+from pathlib import Path
 
-SCALAR_FUNCTIONS = (
-    {
-        "test_name": "or",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_SCALAR["or"],
-    },
-    {
-        "test_name": "and",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_SCALAR["and"],
-    },
-    {
-        "test_name": "not",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_SCALAR["not"],
-    },
-    {
-        "test_name": "xor",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_SCALAR["xor"],
-    },
-)
+from substrait_consumer.functional.utils import load_json
 
-AGGREGATE_FUNCTIONS = (
-    {
-        "test_name": "bool_and",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_AGGREGATE["bool_and"],
-    },
-    {
-        "test_name": "bool_or",
-        "local_files": {},
-        "named_tables": {},
-        "sql_query": SQL_AGGREGATE["bool_or"],
-    },
-)
+CONFIG_DIR = Path(__file__).parent
+SCALAR_DIR = CONFIG_DIR / "functions" / "boolean" / "scalar"
+AGGREGATE_DIR = CONFIG_DIR / "functions" / "boolean" / "aggregate"
+
+SCALAR_FUNCTIONS = tuple(load_json(file) for file in SCALAR_DIR.glob("*.json"))
+AGGREGATE_FUNCTIONS = tuple(load_json(file) for file in AGGREGATE_DIR.glob("*.json"))
diff --git a/substrait_consumer/functional/common.py b/substrait_consumer/functional/common.py
@@ -264,7 +264,7 @@ def substrait_producer_sql_test(
 
     sql_query, supported_producers = sql_query
 
-    if not type(producer) in supported_producers:
+    if not producer.name() in supported_producers:
         pytest.xfail(
             f"{producer.name()} does not support the following SQL: {sql_query}"
         )