From cf239d6d87500a7e1863bfa30ba27d3342b0e927 Mon Sep 17 00:00:00 2001
From: Vikrant Puppala <vikrant.puppala@databricks.com>
Date: Tue, 19 May 2026 09:53:10 +0000
Subject: [PATCH 1/2] feat(backend/kernel): match Thrift backend's user-visible
 surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three small parity fixes that close 34 of the 66 diffs the Thrift
vs Kernel comparator surfaced against Dogfood today (see
databricks-driver-test docs/comparator-thrift-vs-kernel-diffs.md).
None of these are user-data correctness — they're contract-level
differences that affect code reading the cursor's description,
catching server errors by class, or calling get_columns with no
catalog.

1. PEP-249 description.null_ok always None
   description_from_arrow_schema previously took null_ok from the
   Arrow field.nullable bit. The Thrift backend has always
   reported None for this slot (PEP 249 permits either). Hardcode
   None so the kernel backend is a drop-in for code that reads
   cursor.description[i][6].

2. SqlError maps to ServerOperationError
   Server-side SQL failures (syntax error, missing object, etc.)
   used to wrap into the generic DatabaseError on the kernel
   backend. The Thrift backend raises ServerOperationError for the
   same shape. ServerOperationError is a DatabaseError subclass,
   so existing catches of the base class are unaffected — but code
   that catches the specific subclass now works equivalently.

3. get_columns accepts catalog_name=None
   Previously rejected at the connector layer with a
   ProgrammingError. The kernel's list_columns now issues
   SHOW COLUMNS IN ALL CATALOGS server-side when catalog is None;
   the response carries catalogName per row so TABLE_CAT is
   correctly attributed without client-side enumeration. Matches
   Thrift's getColumns(null, ...) behaviour.

Two other comparator-surfaced gaps are out of scope:
- INTERVAL columns crashing on fetch (kernel-side fix; intervals
  need to be stringified post-decode).
- _use_arrow_native_complex_types=False not honoured (pre-existing
  gap shared with the native SEA backend; needs separate work).

Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
---
 src/databricks/sql/backend/kernel/_errors.py  |  9 ++-
 src/databricks/sql/backend/kernel/client.py   | 13 ++---
 .../sql/backend/kernel/type_mapping.py        | 10 ++--
 tests/unit/test_kernel_client.py              | 58 +++++++++++++------
 tests/unit/test_kernel_result_set.py          | 11 +++-
 tests/unit/test_kernel_type_mapping.py        | 25 +++++---
 6 files changed, 83 insertions(+), 43 deletions(-)

diff --git a/src/databricks/sql/backend/kernel/_errors.py b/src/databricks/sql/backend/kernel/_errors.py
index a844ff716..78b542300 100644
--- a/src/databricks/sql/backend/kernel/_errors.py
+++ b/src/databricks/sql/backend/kernel/_errors.py
@@ -43,9 +43,9 @@
     Error,
     OperationalError,
     ProgrammingError,
+    ServerOperationError,
 )
 
-
 try:
     import databricks_sql_kernel as _kernel  # type: ignore[import-not-found]
 except ImportError as exc:  # pragma: no cover - same hint as client.py
@@ -76,7 +76,12 @@
     "Internal": DatabaseError,
     "InvalidStatementHandle": ProgrammingError,
     "NetworkError": OperationalError,
-    "SqlError": DatabaseError,
+    # `SqlError` is a server-side query failure (syntax error, missing
+    # object, etc.) — exactly what the Thrift backend surfaces as
+    # `ServerOperationError`. Match Thrift's contract so user code that
+    # catches `ServerOperationError` (a subclass of `DatabaseError`)
+    # works equivalently with `use_kernel=True`.
+    "SqlError": ServerOperationError,
     "Unknown": DatabaseError,
 }
 
diff --git a/src/databricks/sql/backend/kernel/client.py b/src/databricks/sql/backend/kernel/client.py
index b9ff6a738..f10293311 100644
--- a/src/databricks/sql/backend/kernel/client.py
+++ b/src/databricks/sql/backend/kernel/client.py
@@ -517,14 +517,13 @@ def get_columns(
     ) -> "ResultSet":
         if self._kernel_session is None:
             raise InterfaceError("get_columns requires an open session.")
-        if not catalog_name:
-            # Kernel's list_columns requires a catalog (SEA `SHOW
-            # COLUMNS` cannot span catalogs). Surface the constraint
-            # explicitly rather than letting the kernel error.
-            raise ProgrammingError(
-                "get_columns requires catalog_name on the kernel backend."
-            )
         try:
+            # `catalog_name=None` is supported: the kernel issues
+            # `SHOW COLUMNS IN ALL CATALOGS` server-side and the
+            # response carries `catalogName` per row, so each result
+            # row's `TABLE_CAT` is correctly attributed. Matches the
+            # Thrift backend's `getColumns(null, …)` behaviour from
+            # the user's perspective.
             stream = self._kernel_session.metadata().list_columns(
                 catalog=catalog_name,
                 schema_pattern=schema_name,
diff --git a/src/databricks/sql/backend/kernel/type_mapping.py b/src/databricks/sql/backend/kernel/type_mapping.py
index 4bd38e621..19e0f15f4 100644
--- a/src/databricks/sql/backend/kernel/type_mapping.py
+++ b/src/databricks/sql/backend/kernel/type_mapping.py
@@ -85,9 +85,11 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
     """Build a PEP 249 ``description`` list from a pyarrow Schema.
 
     Each tuple is ``(name, type_code, display_size, internal_size,
-    precision, scale, null_ok)``. ``null_ok`` is taken from
-    ``field.nullable``; the other four are not reported by the
-    kernel today.
+    precision, scale, null_ok)``. PEP 249 allows ``null_ok`` to be
+    either a bool or ``None``; the Thrift backend always reports
+    ``None``, so we match that here for drop-in parity. The actual
+    nullability bit is still available via ``schema.field(i).nullable``
+    for callers that want it from the Arrow schema directly.
     """
     return [
         (
@@ -97,7 +99,7 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
             None,
             None,
             None,
-            field.nullable,
+            None,
         )
         for field in schema
     ]
diff --git a/tests/unit/test_kernel_client.py b/tests/unit/test_kernel_client.py
index a9e9c9090..0e2948284 100644
--- a/tests/unit/test_kernel_client.py
+++ b/tests/unit/test_kernel_client.py
@@ -70,9 +70,9 @@ def __init__(
     NotSupportedError,
     OperationalError,
     ProgrammingError,
+    ServerOperationError,
 )
 
-
 # ---------------------------------------------------------------------------
 # Error mapping
 # ---------------------------------------------------------------------------
@@ -93,7 +93,14 @@ def __init__(
         ("Internal", DatabaseError),
         ("InvalidStatementHandle", ProgrammingError),
         ("NetworkError", OperationalError),
-        ("SqlError", DatabaseError),
+        # `SqlError` is the kernel's slug for server-side query
+        # failures (syntax error, missing object, etc.) — exactly the
+        # case Thrift's backend surfaces as ``ServerOperationError``.
+        # Match Thrift so user code that catches the specific class
+        # works equivalently. ``ServerOperationError`` is itself a
+        # ``DatabaseError`` subclass, so existing catches of the base
+        # class are unaffected.
+        ("SqlError", ServerOperationError),
         ("Unknown", DatabaseError),
     ],
 )
@@ -309,20 +316,37 @@ def test_execute_command_rejects_query_tags():
         )
 
 
-def test_get_columns_requires_catalog():
+def test_get_columns_accepts_none_catalog():
+    """The kernel's `list_columns` honours `catalog=None` by issuing
+    `SHOW COLUMNS IN ALL CATALOGS` server-side. The connector should
+    pass `None` through rather than rejecting it, matching the Thrift
+    backend's `getColumns(null, …)` behaviour."""
     c = _make_client()
     c._kernel_session = MagicMock()
     cursor = MagicMock()
     cursor.arraysize = 100
     cursor.buffer_size_bytes = 1024
-    with pytest.raises(ProgrammingError, match="catalog_name"):
-        c.get_columns(
-            session_id=MagicMock(),
-            max_rows=1,
-            max_bytes=1,
-            cursor=cursor,
-            catalog_name=None,
-        )
+    cursor.connection = MagicMock()
+    # `list_columns` returns a stream the result-set wrapper will try
+    # to call `arrow_schema()` on; give it a minimal fake.
+    fake_stream = MagicMock()
+    fake_stream.arrow_schema.return_value = MagicMock(__iter__=lambda self: iter([]))
+    c._kernel_session.metadata.return_value.list_columns.return_value = fake_stream
+
+    c.get_columns(
+        session_id=MagicMock(),
+        max_rows=1,
+        max_bytes=1,
+        cursor=cursor,
+        catalog_name=None,
+    )
+    # `list_columns` should be called with catalog=None, not rejected.
+    c._kernel_session.metadata.return_value.list_columns.assert_called_once_with(
+        catalog=None,
+        schema_pattern=None,
+        table_pattern=None,
+        column_pattern=None,
+    )
 
 
 # ---------------------------------------------------------------------------
@@ -521,7 +545,9 @@ def test_pyo3_native_exception_wrapped_as_operational_error():
     stmt = MagicMock()
     stmt.execute.side_effect = TypeError("argument 'foo' must be str, not int")
     c._kernel_session.statement.return_value = stmt
-    with pytest.raises(OperationalError, match="Unexpected error from databricks_sql_kernel"):
+    with pytest.raises(
+        OperationalError, match="Unexpected error from databricks_sql_kernel"
+    ):
         c.execute_command(
             operation="SELECT 1",
             session_id=MagicMock(),
@@ -546,9 +572,7 @@ def test_pyo3_native_exception_wrapped_for_metadata_calls():
     cursor.arraysize = 100
     cursor.buffer_size_bytes = 1024
     with pytest.raises(OperationalError):
-        c.get_catalogs(
-            session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor
-        )
+        c.get_catalogs(session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor)
 
 
 # ---------------------------------------------------------------------------
@@ -574,9 +598,7 @@ def test_kernel_error_during_result_set_construction_is_mapped():
     cursor.arraysize = 100
     cursor.buffer_size_bytes = 1024
     with pytest.raises(DatabaseError, match="schema unavailable"):
-        c.get_catalogs(
-            session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor
-        )
+        c.get_catalogs(session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor)
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/test_kernel_result_set.py b/tests/unit/test_kernel_result_set.py
index 87fa4f0d2..9ec69380a 100644
--- a/tests/unit/test_kernel_result_set.py
+++ b/tests/unit/test_kernel_result_set.py
@@ -77,8 +77,9 @@ def int_schema():
 def test_description_built_from_kernel_schema(int_schema):
     handle = _FakeKernelHandle(int_schema, [])
     rs = _make_rs(handle)
-    # null_ok slot reflects pyarrow.Field.nullable (True by default).
-    assert rs.description == [("n", "bigint", None, None, None, None, True)]
+    # `null_ok` slot is always `None` to match the Thrift backend's
+    # PEP-249 description shape.
+    assert rs.description == [("n", "bigint", None, None, None, None, None)]
 
 
 def test_fetchall_arrow_drains_all_batches(int_schema):
@@ -107,7 +108,11 @@ def test_fetchmany_arrow_slices_within_batch(int_schema):
 def test_fetchmany_arrow_spans_batch_boundary(int_schema):
     handle = _FakeKernelHandle(
         int_schema,
-        [_batch(int_schema, [1, 2]), _batch(int_schema, [3, 4]), _batch(int_schema, [5, 6])],
+        [
+            _batch(int_schema, [1, 2]),
+            _batch(int_schema, [3, 4]),
+            _batch(int_schema, [5, 6]),
+        ],
     )
     rs = _make_rs(handle)
     t = rs.fetchmany_arrow(5)
diff --git a/tests/unit/test_kernel_type_mapping.py b/tests/unit/test_kernel_type_mapping.py
index bee1ee7be..a39cf366a 100644
--- a/tests/unit/test_kernel_type_mapping.py
+++ b/tests/unit/test_kernel_type_mapping.py
@@ -65,16 +65,21 @@ def test_description_from_schema_preserves_field_names_and_order():
         ("name", "string"),
         ("created_at", "timestamp"),
     ]
-    # PEP 249 says 7-tuples. We don't report display_size /
-    # internal_size / precision / scale (all None); ``null_ok`` is
-    # taken from ``pyarrow.Field.nullable`` — True by default for
-    # schemas built from (name, type) pairs.
+    # PEP 249 7-tuples; we report column name and type only. PEP 249
+    # allows ``null_ok`` to be ``None`` and that's what the Thrift
+    # backend has always returned; match it so kernel-backed cursors
+    # are drop-in compatible. The Arrow ``field.nullable`` bit is still
+    # available via ``schema.field(i).nullable`` for callers that need
+    # the real value.
     for d in desc:
         assert len(d) == 7
-        assert d[2:] == (None, None, None, None, True)
+        assert d[2:] == (None, None, None, None, None)
 
 
-def test_description_from_schema_reports_non_nullable_fields():
+def test_description_null_ok_always_none_regardless_of_field_nullable():
+    # Match Thrift backend's behaviour: ``null_ok`` is ``None`` for
+    # every column even when the Arrow ``field.nullable`` bit is
+    # meaningful.
     schema = pa.schema(
         [
             pa.field("id", pa.int64(), nullable=False),
@@ -82,8 +87,8 @@ def test_description_from_schema_reports_non_nullable_fields():
         ]
     )
     desc = description_from_arrow_schema(schema)
-    assert desc[0][6] is False
-    assert desc[1][6] is True
+    assert desc[0][6] is None
+    assert desc[1][6] is None
 
 
 # ─── bind_tspark_params ──────────────────────────────────────────────────
@@ -94,7 +99,9 @@ def _mk_param(*, type, value, ordinal=True, name=None):
     from databricks.sql.thrift_api.TCLIService import ttypes
 
     p = ttypes.TSparkParameter(ordinal=ordinal, name=name, type=type)
-    p.value = ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
+    p.value = (
+        ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
+    )
     return p
 
 

From 85c522ffd9c0e3eab93ffce22584ed6d0e192a89 Mon Sep 17 00:00:00 2001
From: Vikrant Puppala <vikrant.puppala@databricks.com>
Date: Tue, 19 May 2026 10:11:30 +0000
Subject: [PATCH 2/2] fix(backend/kernel): expose VARIANT columns as 'variant'
 in description
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kernel writes the server-reported type name into Arrow field
metadata under `databricks.type_name` (see
`databricks_sql_kernel::reader::metadata_keys::TYPE_NAME`).
`description_from_arrow_schema` now consults it and emits 'variant'
when the metadata says so, matching the Thrift backend.

Today only VARIANT needs this remap — every other precise type
either lands on a dedicated Arrow shape (INT, DECIMAL, …) or
collapses on both backends (INTERVAL_*, GEOMETRY, GEOGRAPHY).
Closes 3 PREPARED_STATEMENT_TYPES / COMPLEX_TYPES diffs in the
comparator.

Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
---
 .../sql/backend/kernel/type_mapping.py        | 32 ++++++++++++++++++-
 tests/unit/test_kernel_type_mapping.py        | 23 +++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/src/databricks/sql/backend/kernel/type_mapping.py b/src/databricks/sql/backend/kernel/type_mapping.py
index 19e0f15f4..cc6ccc7b9 100644
--- a/src/databricks/sql/backend/kernel/type_mapping.py
+++ b/src/databricks/sql/backend/kernel/type_mapping.py
@@ -90,11 +90,23 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
     ``None``, so we match that here for drop-in parity. The actual
     nullability bit is still available via ``schema.field(i).nullable``
     for callers that want it from the Arrow schema directly.
+
+    ``type_code`` normally comes from the Arrow ``DataType`` via
+    ``_arrow_type_to_dbapi_string``, which collapses
+    Databricks-specific types into their nearest Arrow shape (e.g.
+    ``VARIANT`` → ``Utf8``). To recover the precise Databricks type
+    name, we consult the field's metadata first — the kernel writes
+    the server-reported type into ``databricks.type_name`` (see
+    ``databricks_sql_kernel::reader::metadata_keys``). Today only
+    ``VARIANT`` is special-cased here for parity with the Thrift
+    backend's behaviour; other precise types (``INTERVAL_*``,
+    ``GEOMETRY``, ``GEOGRAPHY``) collapse to their Arrow shape on
+    both backends and don't need a remap.
     """
     return [
         (
             field.name,
-            _arrow_type_to_dbapi_string(field.type),
+            _databricks_type_for_field(field),
             None,
             None,
             None,
@@ -105,6 +117,24 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
     ]
 
 
+def _databricks_type_for_field(field: pyarrow.Field) -> str:
+    """Pick the PEP 249 type code for a single field.
+
+    Consults the field's Arrow metadata under
+    ``databricks.type_name`` (written by the kernel from the SEA
+    response's column type) so types that collapse onto a generic
+    Arrow shape can still be distinguished. Today only ``VARIANT``
+    is mapped; everything else delegates to
+    ``_arrow_type_to_dbapi_string``.
+    """
+    md = field.metadata or {}
+    # `databricks.type_name` is bytes (Arrow metadata is always
+    # bytes); compare against bytes to avoid one encode per field.
+    if md.get(b"databricks.type_name") == b"VARIANT":
+        return "variant"
+    return _arrow_type_to_dbapi_string(field.type)
+
+
 def _tspark_param_value_str(param: ttypes.TSparkParameter) -> Any:
     """Extract the string-encoded value from a ``TSparkParameter``,
     or ``None`` for SQL NULL.
diff --git a/tests/unit/test_kernel_type_mapping.py b/tests/unit/test_kernel_type_mapping.py
index a39cf366a..7d1cce546 100644
--- a/tests/unit/test_kernel_type_mapping.py
+++ b/tests/unit/test_kernel_type_mapping.py
@@ -91,6 +91,29 @@ def test_description_null_ok_always_none_regardless_of_field_nullable():
     assert desc[1][6] is None
 
 
+def test_description_uses_databricks_type_name_for_variant():
+    """VARIANT columns arrive over SEA as Arrow ``Utf8``; the kernel
+    annotates them with ``databricks.type_name=VARIANT`` so the
+    connector can recover the precise type for PEP-249 description.
+    Matches the Thrift backend, which exposes the same column as
+    ``variant``."""
+    schema = pa.schema(
+        [
+            pa.field(
+                "v",
+                pa.string(),
+                metadata={b"databricks.type_name": b"VARIANT"},
+            ),
+            # Plain Utf8 column without the metadata stays ``string``
+            # so we don't claim "variant" for everything.
+            pa.field("s", pa.string()),
+        ]
+    )
+    desc = description_from_arrow_schema(schema)
+    assert desc[0][1] == "variant"
+    assert desc[1][1] == "string"
+
+
 # ─── bind_tspark_params ──────────────────────────────────────────────────