feat: reorganize TPC-H test scripts to make them more similar (#171)

This PR splits the `test_*_tpch.py` scripts into several `test_X_on_Y.py` scripts that have minimal differences among them. More precisely, `test_duckdb_on_acero.py` and `test_duckdb_on_duckdb.py` now only differ on which consumer is being built and `test_duckdb_on_acero.py` and `test_isthmus_on_acero.py` differ only on which plan is used (the former produces it on the fly, the latter loads it from a file). The splitting essentially consisted of moving/copying entire functions from one file to another, changing the order of lines, renaming variables, factoring out common expressions into variables, and changing comments. This is a preparatory step for reusing test functionality in `common.py` in all three scripts. Signed-off-by: Ingo Müller <[email protected]>
substrait-io · Dec 17, 2024 · 6659d6f · 6659d6f
1 parent 525f26e
commit 6659d6f
Show file tree

Hide file tree

Showing 70 changed files with 191 additions and 177 deletions.
diff --git a/...plan/test_tpch_sql_1/query_01_outcome.txt → ...uery/test_tpch_sql_1/query_01_outcome.txt b/...plan/test_tpch_sql_1/query_01_outcome.txt → ...uery/test_tpch_sql_1/query_01_outcome.txt
diff --git a/...lan/test_tpch_sql_10/query_10_outcome.txt → ...ery/test_tpch_sql_10/query_10_outcome.txt b/...lan/test_tpch_sql_10/query_10_outcome.txt → ...ery/test_tpch_sql_10/query_10_outcome.txt
diff --git a/...lan/test_tpch_sql_11/query_11_outcome.txt → ...ery/test_tpch_sql_11/query_11_outcome.txt b/...lan/test_tpch_sql_11/query_11_outcome.txt → ...ery/test_tpch_sql_11/query_11_outcome.txt
diff --git a/...lan/test_tpch_sql_12/query_12_outcome.txt → ...ery/test_tpch_sql_12/query_12_outcome.txt b/...lan/test_tpch_sql_12/query_12_outcome.txt → ...ery/test_tpch_sql_12/query_12_outcome.txt
diff --git a/...lan/test_tpch_sql_13/query_13_outcome.txt → ...ery/test_tpch_sql_13/query_13_outcome.txt b/...lan/test_tpch_sql_13/query_13_outcome.txt → ...ery/test_tpch_sql_13/query_13_outcome.txt
diff --git a/...lan/test_tpch_sql_14/query_14_outcome.txt → ...ery/test_tpch_sql_14/query_14_outcome.txt b/...lan/test_tpch_sql_14/query_14_outcome.txt → ...ery/test_tpch_sql_14/query_14_outcome.txt
diff --git a/...lan/test_tpch_sql_15/query_15_outcome.txt → ...ery/test_tpch_sql_15/query_15_outcome.txt b/...lan/test_tpch_sql_15/query_15_outcome.txt → ...ery/test_tpch_sql_15/query_15_outcome.txt
diff --git a/...lan/test_tpch_sql_16/query_16_outcome.txt → ...ery/test_tpch_sql_16/query_16_outcome.txt b/...lan/test_tpch_sql_16/query_16_outcome.txt → ...ery/test_tpch_sql_16/query_16_outcome.txt
diff --git a/...lan/test_tpch_sql_17/query_17_outcome.txt → ...ery/test_tpch_sql_17/query_17_outcome.txt b/...lan/test_tpch_sql_17/query_17_outcome.txt → ...ery/test_tpch_sql_17/query_17_outcome.txt
diff --git a/...lan/test_tpch_sql_18/query_18_outcome.txt → ...ery/test_tpch_sql_18/query_18_outcome.txt b/...lan/test_tpch_sql_18/query_18_outcome.txt → ...ery/test_tpch_sql_18/query_18_outcome.txt
diff --git a/...lan/test_tpch_sql_19/query_19_outcome.txt → ...ery/test_tpch_sql_19/query_19_outcome.txt b/...lan/test_tpch_sql_19/query_19_outcome.txt → ...ery/test_tpch_sql_19/query_19_outcome.txt
diff --git a/...plan/test_tpch_sql_2/query_02_outcome.txt → ...uery/test_tpch_sql_2/query_02_outcome.txt b/...plan/test_tpch_sql_2/query_02_outcome.txt → ...uery/test_tpch_sql_2/query_02_outcome.txt
diff --git a/...lan/test_tpch_sql_20/query_20_outcome.txt → ...ery/test_tpch_sql_20/query_20_outcome.txt b/...lan/test_tpch_sql_20/query_20_outcome.txt → ...ery/test_tpch_sql_20/query_20_outcome.txt
diff --git a/...lan/test_tpch_sql_21/query_21_outcome.txt → ...ery/test_tpch_sql_21/query_21_outcome.txt b/...lan/test_tpch_sql_21/query_21_outcome.txt → ...ery/test_tpch_sql_21/query_21_outcome.txt
diff --git a/...lan/test_tpch_sql_22/query_22_outcome.txt → ...ery/test_tpch_sql_22/query_22_outcome.txt b/...lan/test_tpch_sql_22/query_22_outcome.txt → ...ery/test_tpch_sql_22/query_22_outcome.txt
diff --git a/...plan/test_tpch_sql_3/query_03_outcome.txt → ...uery/test_tpch_sql_3/query_03_outcome.txt b/...plan/test_tpch_sql_3/query_03_outcome.txt → ...uery/test_tpch_sql_3/query_03_outcome.txt
diff --git a/...plan/test_tpch_sql_4/query_04_outcome.txt → ...uery/test_tpch_sql_4/query_04_outcome.txt b/...plan/test_tpch_sql_4/query_04_outcome.txt → ...uery/test_tpch_sql_4/query_04_outcome.txt
diff --git a/...plan/test_tpch_sql_5/query_05_outcome.txt → ...uery/test_tpch_sql_5/query_05_outcome.txt b/...plan/test_tpch_sql_5/query_05_outcome.txt → ...uery/test_tpch_sql_5/query_05_outcome.txt
diff --git a/...plan/test_tpch_sql_6/query_06_outcome.txt → ...uery/test_tpch_sql_6/query_06_outcome.txt b/...plan/test_tpch_sql_6/query_06_outcome.txt → ...uery/test_tpch_sql_6/query_06_outcome.txt
diff --git a/...plan/test_tpch_sql_7/query_07_outcome.txt → ...uery/test_tpch_sql_7/query_07_outcome.txt b/...plan/test_tpch_sql_7/query_07_outcome.txt → ...uery/test_tpch_sql_7/query_07_outcome.txt
diff --git a/...plan/test_tpch_sql_8/query_08_outcome.txt → ...uery/test_tpch_sql_8/query_08_outcome.txt b/...plan/test_tpch_sql_8/query_08_outcome.txt → ...uery/test_tpch_sql_8/query_08_outcome.txt
diff --git a/...plan/test_tpch_sql_9/query_09_outcome.txt → ...uery/test_tpch_sql_9/query_09_outcome.txt b/...plan/test_tpch_sql_9/query_09_outcome.txt → ...uery/test_tpch_sql_9/query_09_outcome.txt
diff --git a/...uery/test_tpch_sql_1/query_01_outcome.txt → ...uery/test_tpch_sql_1/query_01_outcome.txt b/...uery/test_tpch_sql_1/query_01_outcome.txt → ...uery/test_tpch_sql_1/query_01_outcome.txt
diff --git a/...ery/test_tpch_sql_10/query_10_outcome.txt → ...ery/test_tpch_sql_10/query_10_outcome.txt b/...ery/test_tpch_sql_10/query_10_outcome.txt → ...ery/test_tpch_sql_10/query_10_outcome.txt
diff --git a/...ery/test_tpch_sql_11/query_11_outcome.txt → ...ery/test_tpch_sql_11/query_11_outcome.txt b/...ery/test_tpch_sql_11/query_11_outcome.txt → ...ery/test_tpch_sql_11/query_11_outcome.txt
diff --git a/...ery/test_tpch_sql_12/query_12_outcome.txt → ...ery/test_tpch_sql_12/query_12_outcome.txt b/...ery/test_tpch_sql_12/query_12_outcome.txt → ...ery/test_tpch_sql_12/query_12_outcome.txt
diff --git a/...ery/test_tpch_sql_13/query_13_outcome.txt → ...ery/test_tpch_sql_13/query_13_outcome.txt b/...ery/test_tpch_sql_13/query_13_outcome.txt → ...ery/test_tpch_sql_13/query_13_outcome.txt
diff --git a/...ery/test_tpch_sql_14/query_14_outcome.txt → ...ery/test_tpch_sql_14/query_14_outcome.txt b/...ery/test_tpch_sql_14/query_14_outcome.txt → ...ery/test_tpch_sql_14/query_14_outcome.txt
diff --git a/...ery/test_tpch_sql_15/query_15_outcome.txt → ...ery/test_tpch_sql_15/query_15_outcome.txt b/...ery/test_tpch_sql_15/query_15_outcome.txt → ...ery/test_tpch_sql_15/query_15_outcome.txt
diff --git a/...ery/test_tpch_sql_16/query_16_outcome.txt → ...ery/test_tpch_sql_16/query_16_outcome.txt b/...ery/test_tpch_sql_16/query_16_outcome.txt → ...ery/test_tpch_sql_16/query_16_outcome.txt
diff --git a/...ery/test_tpch_sql_17/query_17_outcome.txt → ...ery/test_tpch_sql_17/query_17_outcome.txt b/...ery/test_tpch_sql_17/query_17_outcome.txt → ...ery/test_tpch_sql_17/query_17_outcome.txt
diff --git a/...ery/test_tpch_sql_18/query_18_outcome.txt → ...ery/test_tpch_sql_18/query_18_outcome.txt b/...ery/test_tpch_sql_18/query_18_outcome.txt → ...ery/test_tpch_sql_18/query_18_outcome.txt
diff --git a/...ery/test_tpch_sql_19/query_19_outcome.txt → ...ery/test_tpch_sql_19/query_19_outcome.txt b/...ery/test_tpch_sql_19/query_19_outcome.txt → ...ery/test_tpch_sql_19/query_19_outcome.txt
diff --git a/...uery/test_tpch_sql_2/query_02_outcome.txt → ...uery/test_tpch_sql_2/query_02_outcome.txt b/...uery/test_tpch_sql_2/query_02_outcome.txt → ...uery/test_tpch_sql_2/query_02_outcome.txt
diff --git a/...ery/test_tpch_sql_20/query_20_outcome.txt → ...ery/test_tpch_sql_20/query_20_outcome.txt b/...ery/test_tpch_sql_20/query_20_outcome.txt → ...ery/test_tpch_sql_20/query_20_outcome.txt
diff --git a/...ery/test_tpch_sql_21/query_21_outcome.txt → ...ery/test_tpch_sql_21/query_21_outcome.txt b/...ery/test_tpch_sql_21/query_21_outcome.txt → ...ery/test_tpch_sql_21/query_21_outcome.txt
diff --git a/...ery/test_tpch_sql_22/query_22_outcome.txt → ...ery/test_tpch_sql_22/query_22_outcome.txt b/...ery/test_tpch_sql_22/query_22_outcome.txt → ...ery/test_tpch_sql_22/query_22_outcome.txt
diff --git a/...uery/test_tpch_sql_3/query_03_outcome.txt → ...uery/test_tpch_sql_3/query_03_outcome.txt b/...uery/test_tpch_sql_3/query_03_outcome.txt → ...uery/test_tpch_sql_3/query_03_outcome.txt
diff --git a/...uery/test_tpch_sql_4/query_04_outcome.txt → ...uery/test_tpch_sql_4/query_04_outcome.txt b/...uery/test_tpch_sql_4/query_04_outcome.txt → ...uery/test_tpch_sql_4/query_04_outcome.txt
diff --git a/...uery/test_tpch_sql_5/query_05_outcome.txt → ...uery/test_tpch_sql_5/query_05_outcome.txt b/...uery/test_tpch_sql_5/query_05_outcome.txt → ...uery/test_tpch_sql_5/query_05_outcome.txt
diff --git a/...uery/test_tpch_sql_6/query_06_outcome.txt → ...uery/test_tpch_sql_6/query_06_outcome.txt b/...uery/test_tpch_sql_6/query_06_outcome.txt → ...uery/test_tpch_sql_6/query_06_outcome.txt
diff --git a/...uery/test_tpch_sql_7/query_07_outcome.txt → ...uery/test_tpch_sql_7/query_07_outcome.txt b/...uery/test_tpch_sql_7/query_07_outcome.txt → ...uery/test_tpch_sql_7/query_07_outcome.txt
diff --git a/...uery/test_tpch_sql_8/query_08_outcome.txt → ...uery/test_tpch_sql_8/query_08_outcome.txt b/...uery/test_tpch_sql_8/query_08_outcome.txt → ...uery/test_tpch_sql_8/query_08_outcome.txt
diff --git a/...uery/test_tpch_sql_9/query_09_outcome.txt → ...uery/test_tpch_sql_9/query_09_outcome.txt b/...uery/test_tpch_sql_9/query_09_outcome.txt → ...uery/test_tpch_sql_9/query_09_outcome.txt
diff --git a/...plan/test_tpch_sql_1/query_01_outcome.txt → ...plan/test_tpch_sql_1/query_01_outcome.txt b/...plan/test_tpch_sql_1/query_01_outcome.txt → ...plan/test_tpch_sql_1/query_01_outcome.txt
diff --git a/...lan/test_tpch_sql_10/query_10_outcome.txt → ...lan/test_tpch_sql_10/query_10_outcome.txt b/...lan/test_tpch_sql_10/query_10_outcome.txt → ...lan/test_tpch_sql_10/query_10_outcome.txt
diff --git a/...lan/test_tpch_sql_11/query_11_outcome.txt → ...lan/test_tpch_sql_11/query_11_outcome.txt b/...lan/test_tpch_sql_11/query_11_outcome.txt → ...lan/test_tpch_sql_11/query_11_outcome.txt
diff --git a/...lan/test_tpch_sql_12/query_12_outcome.txt → ...lan/test_tpch_sql_12/query_12_outcome.txt b/...lan/test_tpch_sql_12/query_12_outcome.txt → ...lan/test_tpch_sql_12/query_12_outcome.txt
diff --git a/...lan/test_tpch_sql_13/query_13_outcome.txt → ...lan/test_tpch_sql_13/query_13_outcome.txt b/...lan/test_tpch_sql_13/query_13_outcome.txt → ...lan/test_tpch_sql_13/query_13_outcome.txt
diff --git a/...lan/test_tpch_sql_14/query_14_outcome.txt → ...lan/test_tpch_sql_14/query_14_outcome.txt b/...lan/test_tpch_sql_14/query_14_outcome.txt → ...lan/test_tpch_sql_14/query_14_outcome.txt
diff --git a/...lan/test_tpch_sql_15/query_15_outcome.txt → ...lan/test_tpch_sql_15/query_15_outcome.txt b/...lan/test_tpch_sql_15/query_15_outcome.txt → ...lan/test_tpch_sql_15/query_15_outcome.txt
diff --git a/...lan/test_tpch_sql_16/query_16_outcome.txt → ...lan/test_tpch_sql_16/query_16_outcome.txt b/...lan/test_tpch_sql_16/query_16_outcome.txt → ...lan/test_tpch_sql_16/query_16_outcome.txt
diff --git a/...lan/test_tpch_sql_17/query_17_outcome.txt → ...lan/test_tpch_sql_17/query_17_outcome.txt b/...lan/test_tpch_sql_17/query_17_outcome.txt → ...lan/test_tpch_sql_17/query_17_outcome.txt
diff --git a/...lan/test_tpch_sql_18/query_18_outcome.txt → ...lan/test_tpch_sql_18/query_18_outcome.txt b/...lan/test_tpch_sql_18/query_18_outcome.txt → ...lan/test_tpch_sql_18/query_18_outcome.txt
diff --git a/...lan/test_tpch_sql_19/query_19_outcome.txt → ...lan/test_tpch_sql_19/query_19_outcome.txt b/...lan/test_tpch_sql_19/query_19_outcome.txt → ...lan/test_tpch_sql_19/query_19_outcome.txt
diff --git a/...plan/test_tpch_sql_2/query_02_outcome.txt → ...plan/test_tpch_sql_2/query_02_outcome.txt b/...plan/test_tpch_sql_2/query_02_outcome.txt → ...plan/test_tpch_sql_2/query_02_outcome.txt
diff --git a/...lan/test_tpch_sql_20/query_20_outcome.txt → ...lan/test_tpch_sql_20/query_20_outcome.txt b/...lan/test_tpch_sql_20/query_20_outcome.txt → ...lan/test_tpch_sql_20/query_20_outcome.txt
diff --git a/...lan/test_tpch_sql_21/query_21_outcome.txt → ...lan/test_tpch_sql_21/query_21_outcome.txt b/...lan/test_tpch_sql_21/query_21_outcome.txt → ...lan/test_tpch_sql_21/query_21_outcome.txt
diff --git a/...lan/test_tpch_sql_22/query_22_outcome.txt → ...lan/test_tpch_sql_22/query_22_outcome.txt b/...lan/test_tpch_sql_22/query_22_outcome.txt → ...lan/test_tpch_sql_22/query_22_outcome.txt
diff --git a/...plan/test_tpch_sql_3/query_03_outcome.txt → ...plan/test_tpch_sql_3/query_03_outcome.txt b/...plan/test_tpch_sql_3/query_03_outcome.txt → ...plan/test_tpch_sql_3/query_03_outcome.txt
diff --git a/...plan/test_tpch_sql_4/query_04_outcome.txt → ...plan/test_tpch_sql_4/query_04_outcome.txt b/...plan/test_tpch_sql_4/query_04_outcome.txt → ...plan/test_tpch_sql_4/query_04_outcome.txt
diff --git a/...plan/test_tpch_sql_5/query_05_outcome.txt → ...plan/test_tpch_sql_5/query_05_outcome.txt b/...plan/test_tpch_sql_5/query_05_outcome.txt → ...plan/test_tpch_sql_5/query_05_outcome.txt
diff --git a/...plan/test_tpch_sql_6/query_06_outcome.txt → ...plan/test_tpch_sql_6/query_06_outcome.txt b/...plan/test_tpch_sql_6/query_06_outcome.txt → ...plan/test_tpch_sql_6/query_06_outcome.txt
diff --git a/...plan/test_tpch_sql_7/query_07_outcome.txt → ...plan/test_tpch_sql_7/query_07_outcome.txt b/...plan/test_tpch_sql_7/query_07_outcome.txt → ...plan/test_tpch_sql_7/query_07_outcome.txt
diff --git a/...plan/test_tpch_sql_8/query_08_outcome.txt → ...plan/test_tpch_sql_8/query_08_outcome.txt b/...plan/test_tpch_sql_8/query_08_outcome.txt → ...plan/test_tpch_sql_8/query_08_outcome.txt
diff --git a/...plan/test_tpch_sql_9/query_09_outcome.txt → ...plan/test_tpch_sql_9/query_09_outcome.txt b/...plan/test_tpch_sql_9/query_09_outcome.txt → ...plan/test_tpch_sql_9/query_09_outcome.txt
diff --git a/substrait_consumer/tests/integration/test_acero_tpch.py b/substrait_consumer/tests/integration/test_acero_tpch.py
diff --git a/substrait_consumer/tests/integration/test_duckdb_on_acero.py b/substrait_consumer/tests/integration/test_duckdb_on_acero.py
@@ -0,0 +1,93 @@
+from pathlib import Path
+
+import duckdb
+import pytest
+from pytest_snapshot.plugin import Snapshot
+
+from substrait_consumer.consumers.acero_consumer import AceroConsumer
+from substrait_consumer.functional.utils import load_json
+from substrait_consumer.producers.duckdb_producer import DuckDBProducer
+
+CONFIG_DIR = Path(__file__).parent.parent / "integration"
+TPCH_CONFIG_DIR = CONFIG_DIR / "tpch"
+TEST_CASE_PATHS = list(
+    (path.relative_to(CONFIG_DIR),) for path in TPCH_CONFIG_DIR.rglob("*.json")
+)
+IDS = list((str(path[0]).removesuffix(".json") for path in TEST_CASE_PATHS))
+
+
+@pytest.mark.parametrize(["path"], TEST_CASE_PATHS, ids=IDS)
+@pytest.mark.usefixtures("prepare_tpch_parquet_data")
+def test_substrait_query(
+    path: Path,
+    snapshot: Snapshot,
+    db_con: duckdb.DuckDBPyConnection,
+) -> None:
+    """
+    1.  Load all the parquet files into DuckDB as separate named_tables.
+    2.  Format the SQL query to work with DuckDB by inserting all the table names.
+    3.  Execute the SQL on DuckDB.
+    4.  Run the substrait query plan.
+    5.  Compare substrait query plan results against the results of
+        running the SQL on DuckDB.
+
+    Parameters:
+        test_name:
+            Name of test.
+        local_files:
+            A `dict` mapping format argument names to local files paths.
+        named_tables:
+            A `dict` mapping table names to local file paths.
+        sql_query:
+            SQL query.
+    """
+    test_case = load_json(CONFIG_DIR / path)
+    test_name = test_case["test_name"]
+    local_files = test_case["local_files"]
+    named_tables = test_case["named_tables"]
+    sql_query, supported_producers = test_case["sql_query"]
+
+    assert "duckdb" in supported_producers
+
+    tpch_num = int(test_name.split("_")[-1])
+
+    snapshot.snapshot_dir = snapshot.snapshot_dir.parent / f"test_tpch_sql_{tpch_num}"
+
+    consumer = AceroConsumer()
+    producer = DuckDBProducer()
+
+    consumer.setup(db_con, local_files, named_tables)
+    producer.setup(db_con, local_files, named_tables)
+
+    outcome_path = f"query_{tpch_num:02d}_outcome.txt"
+
+    # Produce DuckDB plan from SQL query.
+    try:
+        proto_bytes = producer.produce_substrait(sql_query)
+    except BaseException as e:
+        snapshot.assert_match(str(type(e)), outcome_path)
+        return
+
+    try:
+        subtrait_query_result_tb = consumer.run_substrait_query(proto_bytes)
+    except BaseException as e:
+        snapshot.assert_match(str(type(e)), outcome_path)
+        return
+
+    # Calculate results to verify against by running the SQL query on DuckDB
+    try:
+        duckdb_sql_result_tb = producer.run_sql_query(sql_query)
+    except BaseException as e:
+        snapshot.assert_match(str(type(e)), outcome_path)
+        return
+
+    col_names = [x.lower() for x in subtrait_query_result_tb.column_names]
+    exp_col_names = [x.lower() for x in duckdb_sql_result_tb.column_names]
+
+    # Verify results between substrait plan query and sql running against
+    # duckdb are equal.
+    outcome = {
+        "column_names": col_names == exp_col_names,
+        "table": subtrait_query_result_tb == duckdb_sql_result_tb,
+    }
+    snapshot.assert_match(str(outcome), outcome_path)
diff --git a/...mer/tests/integration/test_duckdb_tpch.py → ...ests/integration/test_duckdb_on_duckdb.py b/...mer/tests/integration/test_duckdb_tpch.py → ...ests/integration/test_duckdb_on_duckdb.py
@@ -61,7 +61,7 @@ def test_substrait_query(
 
     outcome_path = f"query_{tpch_num:02d}_outcome.txt"
 
-    # Convert the SQL into a substrait query plan and run the plan.
+    # Produce DuckDB plan from SQL query.
     try:
         proto_bytes = producer.produce_substrait(sql_query)
     except BaseException as e:

diff --git a/substrait_consumer/tests/integration/test_isthmus_on_acero.py b/substrait_consumer/tests/integration/test_isthmus_on_acero.py
@@ -0,0 +1,97 @@
+from pathlib import Path
+
+import duckdb
+import pytest
+from pytest_snapshot.plugin import Snapshot
+
+from substrait_consumer.consumers.acero_consumer import AceroConsumer
+from substrait_consumer.functional.utils import load_json
+from substrait_consumer.producers.duckdb_producer import DuckDBProducer
+
+PLAN_DIR = Path(__file__).parent / "queries" / "tpch_substrait_plans"
+
+CONFIG_DIR = Path(__file__).parent.parent / "integration"
+TPCH_CONFIG_DIR = CONFIG_DIR / "tpch"
+TEST_CASE_PATHS = list(
+    (path.relative_to(CONFIG_DIR),) for path in TPCH_CONFIG_DIR.rglob("*.json")
+)
+IDS = list((str(path[0]).removesuffix(".json") for path in TEST_CASE_PATHS))
+
+
+@pytest.mark.parametrize(["path"], TEST_CASE_PATHS, ids=IDS)
+@pytest.mark.usefixtures("prepare_tpch_parquet_data")
+def test_isthmus_substrait_plan(
+    path: Path,
+    snapshot: Snapshot,
+    db_con: duckdb.DuckDBPyConnection,
+) -> None:
+    """
+    1.  Format the substrait_query by replacing the 'local_files' 'uri_file'
+        path with the full path to the parquet data.
+    2.  Format the SQL query to work with DuckDB by setting the 'Table'
+        Parameters to be the relative files paths for parquet data.
+    3.  Run the substrait query plan.
+    4.  Execute the SQL on DuckDB.
+    5.  Compare substrait query plan results against the results of
+        running the SQL on DuckDB.
+
+    Parameters:
+        test_name:
+            Name of test.
+        local_files:
+            A `dict` mapping format argument names to local files paths.
+        named_tables:
+            A `dict` mapping table names to local file paths.
+        sql_query:
+            SQL query.
+        substrait_query:
+            Substrait query.
+    """
+    test_case = load_json(CONFIG_DIR / path)
+    test_name = test_case["test_name"]
+    local_files = test_case["local_files"]
+    named_tables = test_case["named_tables"]
+    sql_query, supported_producers = test_case["sql_query"]
+
+    assert "duckdb" in supported_producers
+
+    tpch_num = int(test_name.split("_")[-1])
+
+    snapshot.snapshot_dir = snapshot.snapshot_dir.parent / f"test_tpch_sql_{tpch_num}"
+
+    consumer = AceroConsumer()
+    producer = DuckDBProducer()
+
+    consumer.setup(db_con, local_files, named_tables)
+    producer.setup(db_con, local_files, named_tables)
+
+    outcome_path = f"query_{tpch_num:02d}_outcome.txt"
+
+    # Load Isthmus plan from file.
+    substrait_plan_path = PLAN_DIR / f"query_{tpch_num:02d}_plan.json"
+    with open(substrait_plan_path, "r") as f:
+        proto_bytes = f.read()
+
+    try:
+        subtrait_query_result_tb = consumer.run_substrait_query(proto_bytes)
+    except BaseException as e:
+        snapshot.assert_match(str(type(e)), outcome_path)
+        return
+
+    # Calculate results to verify against by running the SQL query on DuckDB
+    try:
+        duckdb_sql_result_tb = producer.run_sql_query(sql_query)
+    except BaseException as e:
+        snapshot.assert_match(str(type(e)), outcome_path)
+        return
+
+    col_names = [x.lower() for x in subtrait_query_result_tb.column_names]
+    exp_col_names = [x.lower() for x in duckdb_sql_result_tb.column_names]
+
+    # Verify results between substrait plan query and sql running against
+    # duckdb are equal.
+    outcome = {
+        "column_names": col_names == exp_col_names,
+        "table": subtrait_query_result_tb == duckdb_sql_result_tb,
+    }
+    snapshot.assert_match(str(outcome), outcome_path)