Skip to content

Commit

Permalink
feat: remove unused sort_results feature (#165)
Browse files Browse the repository at this point in the history
Signed-off-by: Ingo Müller <[email protected]>
  • Loading branch information
ingomueller-net authored Dec 16, 2024
1 parent d4b92c1 commit 888adc7
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 40 deletions.
36 changes: 0 additions & 36 deletions substrait_consumer/tests/integration/test_acero_tpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def test_isthmus_substrait_plan(
named_tables: dict[str, str],
sql_query: str,
substrait_query: str,
sort_results: bool = False,
) -> None:
"""
1. Format the substrait_query by replacing the 'local_files' 'uri_file'
Expand All @@ -66,8 +65,6 @@ def test_isthmus_substrait_plan(
SQL query.
substrait_query:
Substrait query.
sort_results:
Whether to sort the results before comparison.
"""
tpch_num = test_name.split("_")[-1].zfill(2)

Expand Down Expand Up @@ -95,17 +92,6 @@ def test_isthmus_substrait_plan(
col_names = [x.lower() for x in subtrait_query_result_tb.column_names]
exp_col_names = [x.lower() for x in duckdb_query_result_tb.column_names]

# Sort results by specified column names
if sort_results:
subtrait_sort_col = subtrait_query_result_tb.column_names[0]
subtrait_query_result_tb = arrow_sort_tb_values(
subtrait_query_result_tb, sortby=[subtrait_sort_col]
)
duckdb_sort_col = duckdb_query_result_tb.column_names[0]
duckdb_query_result_tb = arrow_sort_tb_values(
duckdb_query_result_tb, sortby=[duckdb_sort_col]
)

# Verify results between substrait plan query and sql running against
# duckdb are equal.
outcome = {
Expand All @@ -123,7 +109,6 @@ def test_duckdb_substrait_plan(
named_tables: dict[str, str],
sql_query: str,
substrait_query: str,
sort_results: bool = False,
) -> None:
"""
1. Load all the parquet files into DuckDB as separate named_tables.
Expand Down Expand Up @@ -182,24 +167,3 @@ def test_duckdb_substrait_plan(
"table": subtrait_query_result_tb == duckdb_sql_result_tb,
}
snapshot.assert_match(str(outcome), f"query_{tpch_num}_outcome.txt")


def arrow_sort_tb_values(table: pa.Table, sortby: Iterable[str]) -> pa.Table:
"""
Sort the pyarrow table by the given list of columns.
Parameters:
table:
Original pyarrow Table.
sortby:
Columns to sort the results by.
Returns:
Pyarrow Table sorted by given columns.
"""
table_sorted_indexes = pa.compute.bottom_k_unstable(
table, sort_keys=sortby, k=len(table)
)
table_sorted = table.take(table_sorted_indexes)
return table_sorted
1 change: 0 additions & 1 deletion substrait_consumer/tests/integration/test_duckdb_tpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def test_substrait_query(
named_tables: dict[str, str],
sql_query: str,
substrait_query: str,
sort_results: bool = False,
) -> None:
"""
1. Load all the parquet files into DuckDB as separate named_tables.
Expand Down
3 changes: 0 additions & 3 deletions substrait_consumer/tests/integration/test_tpch_plans_valid.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def test_isthmus_substrait_plan_generation(
named_tables: dict[str, str],
sql_query: str,
substrait_query: str,
sort_results: bool = False,
) -> None:
"""
Generate the substrait plans using Isthmus.
Expand Down Expand Up @@ -77,7 +76,6 @@ def test_isthmus_substrait_plans_valid(
named_tables: dict[str, str],
sql_query: str,
substrait_query: str,
sort_results: bool = False,
) -> None:
"""
Run the Isthmus generated substrait plans through the substrait validator.
Expand Down Expand Up @@ -106,7 +104,6 @@ def test_duckdb_substrait_plans_valid(
named_tables: dict[str, str],
sql_query: str,
substrait_query: str,
sort_results: bool = False,
) -> None:
"""
Run the Duckdb generated substrait plans through the substrait validator.
Expand Down

0 comments on commit 888adc7

Please sign in to comment.