Fixed merge conflicts

databricks · Dec 26, 2024 · a63ece8 · a63ece8
2 parents 3fc4e01 + f9d6ef1
commit a63ece8
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,11 +1,18 @@
 # Release History
 
-
-# 4.0.0 
+# 4.0.0 (TBD)
 
 - Split the connector into two separate packages: `databricks-sql-connector` and `databricks-sqlalchemy`. The `databricks-sql-connector` package contains the core functionality of the connector, while the `databricks-sqlalchemy` package contains the SQLAlchemy dialect for the connector. 
 - Pyarrow dependency is now optional in `databricks-sql-connector`. Users needing arrow are supposed to explicitly install pyarrow
 
+# 3.7.0 (2024-12-23)
+
+- Fix: Incorrect number of rows fetched in inline results when fetching results with FETCH_NEXT orientation (databricks/databricks-sql-python#479 by @jprakash-db)
+- Updated the doc to specify native parameters are not supported in PUT operation (databricks/databricks-sql-python#477 by @jprakash-db)
+- Relax `pyarrow` and `numpy` pin (databricks/databricks-sql-python#452 by @arredond)
+- Feature: Support for async execute has been added (databricks/databricks-sql-python#463 by @jprakash-db)
+- Updated the HTTP retry logic to be similar to the other Databricks drivers (databricks/databricks-sql-python#467 by @jprakash-db)
+
 # 3.6.0 (2024-10-25)
 
 - Support encryption headers in the cloud fetch request (https://github.com/databricks/databricks-sql-python/pull/460 by @jackyhu-db)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "databricks-sql-connector"
-version = "4.0.0.b4"
+version = "4.0.0"
 description = "Databricks SQL Connector for Python"
 authors = ["Databricks <[email protected]>"]
 license = "Apache-2.0"

diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py
@@ -68,7 +68,7 @@ def __repr__(self):
 DATE = DBAPITypeObject("date")
 ROWID = DBAPITypeObject()
 
-__version__ = "3.6.0"
+__version__ = "3.7.0"
 USER_AGENT_NAME = "PyDatabricksSqlConnector"
 
 # These two functions are pyhive legacy

diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py
@@ -815,6 +815,7 @@ def execute(
             self.thrift_backend,
             self.buffer_size_bytes,
             self.arraysize,
+            self.connection.use_cloud_fetch,
         )
 
         if execute_response.is_staging_operation:
@@ -1209,6 +1210,7 @@ def __init__(
         thrift_backend: ThriftBackend,
         result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
         arraysize: int = 10000,
+        use_cloud_fetch: bool = True,
     ):
         """
         A ResultSet manages the results of a single command.
@@ -1230,6 +1232,7 @@ def __init__(
         self.description = execute_response.description
         self._arrow_schema_bytes = execute_response.arrow_schema_bytes
         self._next_row_index = 0
+        self._use_cloud_fetch = use_cloud_fetch
 
         if execute_response.arrow_queue:
             # In this case the server has taken the fast path and returned an initial batch of
@@ -1257,6 +1260,7 @@ def _fill_results_buffer(self):
             lz4_compressed=self.lz4_compressed,
             arrow_schema_bytes=self._arrow_schema_bytes,
             description=self.description,
+            use_cloud_fetch=self._use_cloud_fetch,
         )
         self.results = results
         self.has_more_rows = has_more_rows

diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py
@@ -321,7 +321,7 @@ def _handle_request_error(self, error_info, attempt, elapsed):
 
     # FUTURE: Consider moving to https://github.com/litl/backoff or
     # https://github.com/jd/tenacity for retry logic.
-    def make_request(self, method, request):
+    def make_request(self, method, request, retryable=True):
         """Execute given request, attempting retries when
             1. Receiving HTTP 429/503 from server
             2. OSError is raised during a GetOperationStatus
@@ -460,7 +460,7 @@ def attempt_request(attempt):
         #       return on success
         #       if available: bounded delay and retry
         #       if not: raise error
-        max_attempts = self._retry_stop_after_attempts_count
+        max_attempts = self._retry_stop_after_attempts_count if retryable else 1
 
         # use index-1 counting for logging/human consistency
         for attempt in range(1, max_attempts + 1):
@@ -1028,6 +1028,7 @@ def fetch_results(
         lz4_compressed,
         arrow_schema_bytes,
         description,
+        use_cloud_fetch=True,
     ):
         assert op_handle is not None
 
@@ -1044,10 +1045,11 @@ def fetch_results(
             includeResultSetMetadata=True,
         )
 
-        resp = self.make_request(self._client.FetchResults, req)
+        # Fetch results in Inline mode with FETCH_NEXT orientation are not idempotent and hence not retried
+        resp = self.make_request(self._client.FetchResults, req, use_cloud_fetch)
         if resp.results.startRowOffset > expected_row_start_offset:
-            logger.warning(
-                "Expected results to start from {} but they instead start at {}".format(
+            raise DataError(
+                "fetch_results failed due to inconsistency in the state between the client and the server. Expected results to start from {} but they instead start at {}, some result batches must have been skipped".format(
                     expected_row_start_offset, resp.results.startRowOffset
                 )
             )

diff --git a/tests/unit/test_fetches.py b/tests/unit/test_fetches.py
@@ -70,6 +70,7 @@ def fetch_results(
             lz4_compressed,
             arrow_schema_bytes,
             description,
+            use_cloud_fetch=True,
         ):
             nonlocal batch_index
             results = FetchTests.make_arrow_queue(batch_list[batch_index])