databricks · vikrantpuppala · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/src/databricks/sql/backend/kernel/_errors.py b/src/databricks/sql/backend/kernel/_errors.py
@@ -43,9 +43,9 @@
     Error,
     OperationalError,
     ProgrammingError,
+    ServerOperationError,
 )
 
-
 try:
     import databricks_sql_kernel as _kernel  # type: ignore[import-not-found]
 except ImportError as exc:  # pragma: no cover - same hint as client.py
@@ -76,7 +76,12 @@
     "Internal": DatabaseError,
     "InvalidStatementHandle": ProgrammingError,
     "NetworkError": OperationalError,
-    "SqlError": DatabaseError,
+    # `SqlError` is a server-side query failure (syntax error, missing
+    # object, etc.) — exactly what the Thrift backend surfaces as
+    # `ServerOperationError`. Match Thrift's contract so user code that
+    # catches `ServerOperationError` (a subclass of `DatabaseError`)
+    # works equivalently with `use_kernel=True`.
+    "SqlError": ServerOperationError,
     "Unknown": DatabaseError,
 }
 

diff --git a/src/databricks/sql/backend/kernel/client.py b/src/databricks/sql/backend/kernel/client.py
@@ -517,14 +517,13 @@ def get_columns(
     ) -> "ResultSet":
         if self._kernel_session is None:
             raise InterfaceError("get_columns requires an open session.")
-        if not catalog_name:
-            # Kernel's list_columns requires a catalog (SEA `SHOW
-            # COLUMNS` cannot span catalogs). Surface the constraint
-            # explicitly rather than letting the kernel error.
-            raise ProgrammingError(
-                "get_columns requires catalog_name on the kernel backend."
-            )
         try:
+            # `catalog_name=None` is supported: the kernel issues
+            # `SHOW COLUMNS IN ALL CATALOGS` server-side and the
+            # response carries `catalogName` per row, so each result
+            # row's `TABLE_CAT` is correctly attributed. Matches the
+            # Thrift backend's `getColumns(null, …)` behaviour from
+            # the user's perspective.
             stream = self._kernel_session.metadata().list_columns(
                 catalog=catalog_name,
                 schema_pattern=schema_name,

diff --git a/src/databricks/sql/backend/kernel/type_mapping.py b/src/databricks/sql/backend/kernel/type_mapping.py
@@ -85,24 +85,56 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
     """Build a PEP 249 ``description`` list from a pyarrow Schema.
 
     Each tuple is ``(name, type_code, display_size, internal_size,
-    precision, scale, null_ok)``. ``null_ok`` is taken from
-    ``field.nullable``; the other four are not reported by the
-    kernel today.
+    precision, scale, null_ok)``. PEP 249 allows ``null_ok`` to be
+    either a bool or ``None``; the Thrift backend always reports
+    ``None``, so we match that here for drop-in parity. The actual
+    nullability bit is still available via ``schema.field(i).nullable``
+    for callers that want it from the Arrow schema directly.
+
+    ``type_code`` normally comes from the Arrow ``DataType`` via
+    ``_arrow_type_to_dbapi_string``, which collapses
+    Databricks-specific types into their nearest Arrow shape (e.g.
+    ``VARIANT`` → ``Utf8``). To recover the precise Databricks type
+    name, we consult the field's metadata first — the kernel writes
+    the server-reported type into ``databricks.type_name`` (see
+    ``databricks_sql_kernel::reader::metadata_keys``). Today only
+    ``VARIANT`` is special-cased here for parity with the Thrift
+    backend's behaviour; other precise types (``INTERVAL_*``,
+    ``GEOMETRY``, ``GEOGRAPHY``) collapse to their Arrow shape on
+    both backends and don't need a remap.
     """
     return [
         (
             field.name,
-            _arrow_type_to_dbapi_string(field.type),
+            _databricks_type_for_field(field),
+            None,
             None,
             None,
             None,
             None,
-            field.nullable,
         )
         for field in schema
     ]
 
 
+def _databricks_type_for_field(field: pyarrow.Field) -> str:
+    """Pick the PEP 249 type code for a single field.
+
+    Consults the field's Arrow metadata under
+    ``databricks.type_name`` (written by the kernel from the SEA
+    response's column type) so types that collapse onto a generic
+    Arrow shape can still be distinguished. Today only ``VARIANT``
+    is mapped; everything else delegates to
+    ``_arrow_type_to_dbapi_string``.
+    """
+    md = field.metadata or {}
+    # `databricks.type_name` is bytes (Arrow metadata is always
+    # bytes); compare against bytes to avoid one encode per field.
+    if md.get(b"databricks.type_name") == b"VARIANT":
+        return "variant"
+    return _arrow_type_to_dbapi_string(field.type)
+
+
 def _tspark_param_value_str(param: ttypes.TSparkParameter) -> Any:
     """Extract the string-encoded value from a ``TSparkParameter``,
     or ``None`` for SQL NULL.

diff --git a/tests/unit/test_kernel_client.py b/tests/unit/test_kernel_client.py
@@ -70,9 +70,9 @@ def __init__(
     NotSupportedError,
     OperationalError,
     ProgrammingError,
+    ServerOperationError,
 )
 
-
 # ---------------------------------------------------------------------------
 # Error mapping
 # ---------------------------------------------------------------------------
@@ -93,7 +93,14 @@ def __init__(
         ("Internal", DatabaseError),
         ("InvalidStatementHandle", ProgrammingError),
         ("NetworkError", OperationalError),
-        ("SqlError", DatabaseError),
+        # `SqlError` is the kernel's slug for server-side query
+        # failures (syntax error, missing object, etc.) — exactly the
+        # case Thrift's backend surfaces as ``ServerOperationError``.
+        # Match Thrift so user code that catches the specific class
+        # works equivalently. ``ServerOperationError`` is itself a
+        # ``DatabaseError`` subclass, so existing catches of the base
+        # class are unaffected.
+        ("SqlError", ServerOperationError),
         ("Unknown", DatabaseError),
     ],
 )
@@ -309,20 +316,37 @@ def test_execute_command_rejects_query_tags():
         )
 
 
-def test_get_columns_requires_catalog():
+def test_get_columns_accepts_none_catalog():
+    """The kernel's `list_columns` honours `catalog=None` by issuing
+    `SHOW COLUMNS IN ALL CATALOGS` server-side. The connector should
+    pass `None` through rather than rejecting it, matching the Thrift
+    backend's `getColumns(null, …)` behaviour."""
     c = _make_client()
     c._kernel_session = MagicMock()
     cursor = MagicMock()
     cursor.arraysize = 100
     cursor.buffer_size_bytes = 1024
-    with pytest.raises(ProgrammingError, match="catalog_name"):
-        c.get_columns(
-            session_id=MagicMock(),
-            max_rows=1,
-            max_bytes=1,
-            cursor=cursor,
-            catalog_name=None,
-        )
+    cursor.connection = MagicMock()
+    # `list_columns` returns a stream the result-set wrapper will try
+    # to call `arrow_schema()` on; give it a minimal fake.
+    fake_stream = MagicMock()
+    fake_stream.arrow_schema.return_value = MagicMock(__iter__=lambda self: iter([]))
+    c._kernel_session.metadata.return_value.list_columns.return_value = fake_stream
+
+    c.get_columns(
+        session_id=MagicMock(),
+        max_rows=1,
+        max_bytes=1,
+        cursor=cursor,
+        catalog_name=None,
+    )
+    # `list_columns` should be called with catalog=None, not rejected.
+    c._kernel_session.metadata.return_value.list_columns.assert_called_once_with(
+        catalog=None,
+        schema_pattern=None,
+        table_pattern=None,
+        column_pattern=None,
+    )
 
 
 # ---------------------------------------------------------------------------
@@ -521,7 +545,9 @@ def test_pyo3_native_exception_wrapped_as_operational_error():
     stmt = MagicMock()
     stmt.execute.side_effect = TypeError("argument 'foo' must be str, not int")
     c._kernel_session.statement.return_value = stmt
-    with pytest.raises(OperationalError, match="Unexpected error from databricks_sql_kernel"):
+    with pytest.raises(
+        OperationalError, match="Unexpected error from databricks_sql_kernel"
+    ):
         c.execute_command(
             operation="SELECT 1",
             session_id=MagicMock(),
@@ -546,9 +572,7 @@ def test_pyo3_native_exception_wrapped_for_metadata_calls():
     cursor.arraysize = 100
     cursor.buffer_size_bytes = 1024
     with pytest.raises(OperationalError):
-        c.get_catalogs(
-            session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor
-        )
+        c.get_catalogs(session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor)
 
 
 # ---------------------------------------------------------------------------
@@ -574,9 +598,7 @@ def test_kernel_error_during_result_set_construction_is_mapped():
     cursor.arraysize = 100
     cursor.buffer_size_bytes = 1024
     with pytest.raises(DatabaseError, match="schema unavailable"):
-        c.get_catalogs(
-            session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor
-        )
+        c.get_catalogs(session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor)
 
 
 # ---------------------------------------------------------------------------

diff --git a/tests/unit/test_kernel_result_set.py b/tests/unit/test_kernel_result_set.py
@@ -77,8 +77,9 @@ def int_schema():
 def test_description_built_from_kernel_schema(int_schema):
     handle = _FakeKernelHandle(int_schema, [])
     rs = _make_rs(handle)
-    # null_ok slot reflects pyarrow.Field.nullable (True by default).
-    assert rs.description == [("n", "bigint", None, None, None, None, True)]
+    # `null_ok` slot is always `None` to match the Thrift backend's
+    # PEP-249 description shape.
+    assert rs.description == [("n", "bigint", None, None, None, None, None)]
 
 
 def test_fetchall_arrow_drains_all_batches(int_schema):
@@ -107,7 +108,11 @@ def test_fetchmany_arrow_slices_within_batch(int_schema):
 def test_fetchmany_arrow_spans_batch_boundary(int_schema):
     handle = _FakeKernelHandle(
         int_schema,
-        [_batch(int_schema, [1, 2]), _batch(int_schema, [3, 4]), _batch(int_schema, [5, 6])],
+        [
+            _batch(int_schema, [1, 2]),
+            _batch(int_schema, [3, 4]),
+            _batch(int_schema, [5, 6]),
+        ],
     )
     rs = _make_rs(handle)
     t = rs.fetchmany_arrow(5)

diff --git a/tests/unit/test_kernel_type_mapping.py b/tests/unit/test_kernel_type_mapping.py
@@ -65,25 +65,53 @@ def test_description_from_schema_preserves_field_names_and_order():
         ("name", "string"),
         ("created_at", "timestamp"),
     ]
-    # PEP 249 says 7-tuples. We don't report display_size /
-    # internal_size / precision / scale (all None); ``null_ok`` is
-    # taken from ``pyarrow.Field.nullable`` — True by default for
-    # schemas built from (name, type) pairs.
+    # PEP 249 7-tuples; we report column name and type only. PEP 249
+    # allows ``null_ok`` to be ``None`` and that's what the Thrift
+    # backend has always returned; match it so kernel-backed cursors
+    # are drop-in compatible. The Arrow ``field.nullable`` bit is still
+    # available via ``schema.field(i).nullable`` for callers that need
+    # the real value.
     for d in desc:
         assert len(d) == 7
-        assert d[2:] == (None, None, None, None, True)
+        assert d[2:] == (None, None, None, None, None)
 
 
-def test_description_from_schema_reports_non_nullable_fields():
+def test_description_null_ok_always_none_regardless_of_field_nullable():
+    # Match Thrift backend's behaviour: ``null_ok`` is ``None`` for
+    # every column even when the Arrow ``field.nullable`` bit is
+    # meaningful.
     schema = pa.schema(
         [
             pa.field("id", pa.int64(), nullable=False),
             pa.field("name", pa.string(), nullable=True),
         ]
     )
     desc = description_from_arrow_schema(schema)
-    assert desc[0][6] is False
-    assert desc[1][6] is True
+    assert desc[0][6] is None
+    assert desc[1][6] is None
+
+
+def test_description_uses_databricks_type_name_for_variant():
+    """VARIANT columns arrive over SEA as Arrow ``Utf8``; the kernel
+    annotates them with ``databricks.type_name=VARIANT`` so the
+    connector can recover the precise type for PEP-249 description.
+    Matches the Thrift backend, which exposes the same column as
+    ``variant``."""
+    schema = pa.schema(
+        [
+            pa.field(
+                "v",
+                pa.string(),
+                metadata={b"databricks.type_name": b"VARIANT"},
+            ),
+            # Plain Utf8 column without the metadata stays ``string``
+            # so we don't claim "variant" for everything.
+            pa.field("s", pa.string()),
+        ]
+    )
+    desc = description_from_arrow_schema(schema)
+    assert desc[0][1] == "variant"
+    assert desc[1][1] == "string"
 
 
 # ─── bind_tspark_params ──────────────────────────────────────────────────
@@ -94,7 +122,9 @@ def _mk_param(*, type, value, ordinal=True, name=None):
     from databricks.sql.thrift_api.TCLIService import ttypes
 
     p = ttypes.TSparkParameter(ordinal=ordinal, name=name, type=type)
-    p.value = ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
+    p.value = (
+        ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
+    )
     return p