Skip to content

Commit

Permalink
feat: express TPC-H plan generation with existing infra (#172)
Browse files Browse the repository at this point in the history
This PR uses `substrait_producer_sql_test` from `common.py` to express
the TPC-H test targets that regenerate the Substrait plans using
Isthmus. This replaces test-specific code with generic code and is,
thus, a steps towards reducing the complexity of the tests. As a
consequence, the paths of some snapshots change; however, note in the
corresponding git commit that all files are renamed without content
changes.

Signed-off-by: Ingo Müller <[email protected]>
  • Loading branch information
ingomueller-net authored Dec 17, 2024
1 parent 6659d6f commit 9aa6cdf
Show file tree
Hide file tree
Showing 47 changed files with 46 additions and 30 deletions.
2 changes: 2 additions & 0 deletions substrait_consumer/functional/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
RELATION_SNAPSHOT_DIR = (
Path(__file__).parent.parent / "tests" / "functional" / "relations"
)
TPCH_SNAPSHOT_DIR = Path(__file__).parent.parent / "tests" / "functional" / "tpch"
SNAPSHOT_DIR = {
"function": FUNCTION_SNAPSHOT_DIR,
"relation": RELATION_SNAPSHOT_DIR,
"tpch": TPCH_SNAPSHOT_DIR,
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<java class 'org.apache.calcite.runtime.CalciteContextException'>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
True
9 changes: 7 additions & 2 deletions substrait_consumer/tests/integration/test_isthmus_on_acero.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from substrait_consumer.functional.utils import load_json
from substrait_consumer.producers.duckdb_producer import DuckDBProducer

PLAN_DIR = Path(__file__).parent / "queries" / "tpch_substrait_plans"
PLAN_DIR = Path(__file__).parent.parent / "functional" / "tpch"

CONFIG_DIR = Path(__file__).parent.parent / "integration"
TPCH_CONFIG_DIR = CONFIG_DIR / "tpch"
Expand Down Expand Up @@ -68,7 +68,12 @@ def test_isthmus_substrait_plan(
outcome_path = f"query_{tpch_num:02d}_outcome.txt"

# Load Isthmus plan from file.
substrait_plan_path = PLAN_DIR / f"query_{tpch_num:02d}_plan.json"
substrait_plan_path = (
PLAN_DIR
/ f"q{tpch_num:02d}_snapshots"
/ "IsthmusProducer"
/ f"q{tpch_num:02d}_plan.json"
)
with open(substrait_plan_path, "r") as f:
proto_bytes = f.read()

Expand Down
43 changes: 15 additions & 28 deletions substrait_consumer/tests/integration/test_tpch_plans_valid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@
import pytest
from pytest_snapshot.plugin import Snapshot

from substrait_consumer.functional.common import check_match
from substrait_consumer.functional.common import substrait_producer_sql_test
from substrait_consumer.functional.utils import load_json
from substrait_consumer.producers.duckdb_producer import DuckDBProducer
from substrait_consumer.producers.isthmus_producer import IsthmusProducer

PLAN_SNAPSHOT_DIR = (
Path(__file__).parent / "queries" / "tpch_substrait_plans"
)

CONFIG_DIR = Path(__file__).parent.parent / "integration"
TPCH_CONFIG_DIR = CONFIG_DIR / "tpch"
TEST_CASE_PATHS = list(
Expand All @@ -23,38 +19,29 @@

@pytest.mark.parametrize(["path"], TEST_CASE_PATHS, ids=IDS)
@pytest.mark.usefixtures("prepare_tpch_parquet_data")
@pytest.mark.produce_substrait_snapshot
def test_isthmus_substrait_plan_generation(
path: Path,
snapshot: Snapshot,
record_property,
db_con: duckdb.DuckDBPyConnection,
) -> None:
"""
Generate the substrait plans using Isthmus.
"""
):
test_case = load_json(CONFIG_DIR / path)
test_name = test_case["test_name"]
local_files = test_case["local_files"]
named_tables = test_case["named_tables"]
sql_query, supported_producers = test_case["sql_query"]

assert "isthmus" in supported_producers

tpch_num = test_name.split("_")[-1].zfill(2)
snapshot.snapshot_dir = PLAN_SNAPSHOT_DIR

sql_query = test_case["sql_query"]
producer = IsthmusProducer()
producer.setup(db_con, local_files, named_tables)

try:
substrait_query = producer.produce_substrait(sql_query)
except BaseException as e:
snapshot.assert_match(str(type(e)), f"query_{tpch_num}_outcome.txt")
return

match_result = check_match(
snapshot, str(substrait_query), f"query_{tpch_num}_plan.json"
substrait_producer_sql_test(
path,
snapshot,
record_property,
db_con,
local_files,
named_tables,
sql_query,
producer,
validate=False,
)
snapshot.assert_match(str(match_result), f"query_{tpch_num}_outcome.txt")


@pytest.mark.parametrize(["path"], TEST_CASE_PATHS, ids=IDS)
Expand Down

0 comments on commit 9aa6cdf

Please sign in to comment.