Skip to content

Commit

Permalink
Fixed merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
jprakash-db committed Dec 26, 2024
2 parents 3fc4e01 + f9d6ef1 commit a63ece8
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 9 deletions.
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
# Release History


# 4.0.0
# 4.0.0 (TBD)

- Split the connector into two separate packages: `databricks-sql-connector` and `databricks-sqlalchemy`. The `databricks-sql-connector` package contains the core functionality of the connector, while the `databricks-sqlalchemy` package contains the SQLAlchemy dialect for the connector.
- Pyarrow dependency is now optional in `databricks-sql-connector`. Users needing arrow are supposed to explicitly install pyarrow

# 3.7.0 (2024-12-23)

- Fix: Incorrect number of rows fetched in inline results when fetching results with FETCH_NEXT orientation (databricks/databricks-sql-python#479 by @jprakash-db)
- Updated the doc to specify native parameters are not supported in PUT operation (databricks/databricks-sql-python#477 by @jprakash-db)
- Relax `pyarrow` and `numpy` pin (databricks/databricks-sql-python#452 by @arredond)
- Feature: Support for async execute has been added (databricks/databricks-sql-python#463 by @jprakash-db)
- Updated the HTTP retry logic to be similar to the other Databricks drivers (databricks/databricks-sql-python#467 by @jprakash-db)

# 3.6.0 (2024-10-25)

- Support encryption headers in the cloud fetch request (https://github.com/databricks/databricks-sql-python/pull/460 by @jackyhu-db)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "databricks-sql-connector"
version = "4.0.0.b4"
version = "4.0.0"
description = "Databricks SQL Connector for Python"
authors = ["Databricks <[email protected]>"]
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __repr__(self):
DATE = DBAPITypeObject("date")
ROWID = DBAPITypeObject()

__version__ = "3.6.0"
__version__ = "3.7.0"
USER_AGENT_NAME = "PyDatabricksSqlConnector"

# These two functions are pyhive legacy
Expand Down
4 changes: 4 additions & 0 deletions src/databricks/sql/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ def execute(
self.thrift_backend,
self.buffer_size_bytes,
self.arraysize,
self.connection.use_cloud_fetch,
)

if execute_response.is_staging_operation:
Expand Down Expand Up @@ -1209,6 +1210,7 @@ def __init__(
thrift_backend: ThriftBackend,
result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
arraysize: int = 10000,
use_cloud_fetch: bool = True,
):
"""
A ResultSet manages the results of a single command.
Expand All @@ -1230,6 +1232,7 @@ def __init__(
self.description = execute_response.description
self._arrow_schema_bytes = execute_response.arrow_schema_bytes
self._next_row_index = 0
self._use_cloud_fetch = use_cloud_fetch

if execute_response.arrow_queue:
# In this case the server has taken the fast path and returned an initial batch of
Expand Down Expand Up @@ -1257,6 +1260,7 @@ def _fill_results_buffer(self):
lz4_compressed=self.lz4_compressed,
arrow_schema_bytes=self._arrow_schema_bytes,
description=self.description,
use_cloud_fetch=self._use_cloud_fetch,
)
self.results = results
self.has_more_rows = has_more_rows
Expand Down
12 changes: 7 additions & 5 deletions src/databricks/sql/thrift_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def _handle_request_error(self, error_info, attempt, elapsed):

# FUTURE: Consider moving to https://github.com/litl/backoff or
# https://github.com/jd/tenacity for retry logic.
def make_request(self, method, request):
def make_request(self, method, request, retryable=True):
"""Execute given request, attempting retries when
1. Receiving HTTP 429/503 from server
2. OSError is raised during a GetOperationStatus
Expand Down Expand Up @@ -460,7 +460,7 @@ def attempt_request(attempt):
# return on success
# if available: bounded delay and retry
# if not: raise error
max_attempts = self._retry_stop_after_attempts_count
max_attempts = self._retry_stop_after_attempts_count if retryable else 1

# use index-1 counting for logging/human consistency
for attempt in range(1, max_attempts + 1):
Expand Down Expand Up @@ -1028,6 +1028,7 @@ def fetch_results(
lz4_compressed,
arrow_schema_bytes,
description,
use_cloud_fetch=True,
):
assert op_handle is not None

Expand All @@ -1044,10 +1045,11 @@ def fetch_results(
includeResultSetMetadata=True,
)

resp = self.make_request(self._client.FetchResults, req)
# Fetch results in Inline mode with FETCH_NEXT orientation are not idempotent and hence not retried
resp = self.make_request(self._client.FetchResults, req, use_cloud_fetch)
if resp.results.startRowOffset > expected_row_start_offset:
logger.warning(
"Expected results to start from {} but they instead start at {}".format(
raise DataError(
"fetch_results failed due to inconsistency in the state between the client and the server. Expected results to start from {} but they instead start at {}, some result batches must have been skipped".format(
expected_row_start_offset, resp.results.startRowOffset
)
)
Expand Down
1 change: 1 addition & 0 deletions tests/unit/test_fetches.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def fetch_results(
lz4_compressed,
arrow_schema_bytes,
description,
use_cloud_fetch=True,
):
nonlocal batch_index
results = FetchTests.make_arrow_queue(batch_list[batch_index])
Expand Down

0 comments on commit a63ece8

Please sign in to comment.