Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/databricks/sql/backend/kernel/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
Error,
OperationalError,
ProgrammingError,
ServerOperationError,
)


try:
import databricks_sql_kernel as _kernel # type: ignore[import-not-found]
except ImportError as exc: # pragma: no cover - same hint as client.py
Expand Down Expand Up @@ -76,7 +76,12 @@
"Internal": DatabaseError,
"InvalidStatementHandle": ProgrammingError,
"NetworkError": OperationalError,
"SqlError": DatabaseError,
# `SqlError` is a server-side query failure (syntax error, missing
# object, etc.) — exactly what the Thrift backend surfaces as
# `ServerOperationError`. Match Thrift's contract so user code that
# catches `ServerOperationError` (a subclass of `DatabaseError`)
# works equivalently with `use_kernel=True`.
"SqlError": ServerOperationError,
"Unknown": DatabaseError,
}

Expand Down
13 changes: 6 additions & 7 deletions src/databricks/sql/backend/kernel/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,14 +517,13 @@ def get_columns(
) -> "ResultSet":
if self._kernel_session is None:
raise InterfaceError("get_columns requires an open session.")
if not catalog_name:
# Kernel's list_columns requires a catalog (SEA `SHOW
# COLUMNS` cannot span catalogs). Surface the constraint
Comment thread
vikrantpuppala marked this conversation as resolved.
# explicitly rather than letting the kernel error.
raise ProgrammingError(
"get_columns requires catalog_name on the kernel backend."
)
try:
# `catalog_name=None` is supported: the kernel issues
# `SHOW COLUMNS IN ALL CATALOGS` server-side and the
# response carries `catalogName` per row, so each result
# row's `TABLE_CAT` is correctly attributed. Matches the
# Thrift backend's `getColumns(null, …)` behaviour from
# the user's perspective.
stream = self._kernel_session.metadata().list_columns(
catalog=catalog_name,
schema_pattern=schema_name,
Expand Down
42 changes: 37 additions & 5 deletions src/databricks/sql/backend/kernel/type_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,56 @@ def description_from_arrow_schema(schema: pyarrow.Schema) -> List[Tuple]:
"""Build a PEP 249 ``description`` list from a pyarrow Schema.

Each tuple is ``(name, type_code, display_size, internal_size,
precision, scale, null_ok)``. ``null_ok`` is taken from
``field.nullable``; the other four are not reported by the
kernel today.
precision, scale, null_ok)``. PEP 249 allows ``null_ok`` to be
Comment thread
vikrantpuppala marked this conversation as resolved.
either a bool or ``None``; the Thrift backend always reports
``None``, so we match that here for drop-in parity. The actual
nullability bit is still available via ``schema.field(i).nullable``
for callers that want it from the Arrow schema directly.

``type_code`` normally comes from the Arrow ``DataType`` via
``_arrow_type_to_dbapi_string``, which collapses
Databricks-specific types into their nearest Arrow shape (e.g.
``VARIANT`` → ``Utf8``). To recover the precise Databricks type
name, we consult the field's metadata first — the kernel writes
the server-reported type into ``databricks.type_name`` (see
``databricks_sql_kernel::reader::metadata_keys``). Today only
``VARIANT`` is special-cased here for parity with the Thrift
backend's behaviour; other precise types (``INTERVAL_*``,
``GEOMETRY``, ``GEOGRAPHY``) collapse to their Arrow shape on
both backends and don't need a remap.
"""
return [
(
field.name,
_arrow_type_to_dbapi_string(field.type),
_databricks_type_for_field(field),
None,
None,
None,
None,
None,
field.nullable,
)
for field in schema
]


def _databricks_type_for_field(field: pyarrow.Field) -> str:
"""Pick the PEP 249 type code for a single field.

Consults the field's Arrow metadata under
``databricks.type_name`` (written by the kernel from the SEA
response's column type) so types that collapse onto a generic
Arrow shape can still be distinguished. Today only ``VARIANT``
is mapped; everything else delegates to
``_arrow_type_to_dbapi_string``.
"""
md = field.metadata or {}
# `databricks.type_name` is bytes (Arrow metadata is always
# bytes); compare against bytes to avoid one encode per field.
if md.get(b"databricks.type_name") == b"VARIANT":
return "variant"
return _arrow_type_to_dbapi_string(field.type)


def _tspark_param_value_str(param: ttypes.TSparkParameter) -> Any:
"""Extract the string-encoded value from a ``TSparkParameter``,
or ``None`` for SQL NULL.
Expand Down
58 changes: 40 additions & 18 deletions tests/unit/test_kernel_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def __init__(
NotSupportedError,
OperationalError,
ProgrammingError,
ServerOperationError,
)


# ---------------------------------------------------------------------------
# Error mapping
# ---------------------------------------------------------------------------
Expand All @@ -93,7 +93,14 @@ def __init__(
("Internal", DatabaseError),
("InvalidStatementHandle", ProgrammingError),
("NetworkError", OperationalError),
("SqlError", DatabaseError),
# `SqlError` is the kernel's slug for server-side query
# failures (syntax error, missing object, etc.) — exactly the
# case Thrift's backend surfaces as ``ServerOperationError``.
# Match Thrift so user code that catches the specific class
# works equivalently. ``ServerOperationError`` is itself a
# ``DatabaseError`` subclass, so existing catches of the base
# class are unaffected.
("SqlError", ServerOperationError),
("Unknown", DatabaseError),
],
)
Expand Down Expand Up @@ -309,20 +316,37 @@ def test_execute_command_rejects_query_tags():
)


def test_get_columns_requires_catalog():
def test_get_columns_accepts_none_catalog():
"""The kernel's `list_columns` honours `catalog=None` by issuing
`SHOW COLUMNS IN ALL CATALOGS` server-side. The connector should
pass `None` through rather than rejecting it, matching the Thrift
backend's `getColumns(null, …)` behaviour."""
c = _make_client()
c._kernel_session = MagicMock()
cursor = MagicMock()
cursor.arraysize = 100
cursor.buffer_size_bytes = 1024
with pytest.raises(ProgrammingError, match="catalog_name"):
c.get_columns(
session_id=MagicMock(),
max_rows=1,
max_bytes=1,
cursor=cursor,
catalog_name=None,
)
cursor.connection = MagicMock()
# `list_columns` returns a stream the result-set wrapper will try
# to call `arrow_schema()` on; give it a minimal fake.
fake_stream = MagicMock()
fake_stream.arrow_schema.return_value = MagicMock(__iter__=lambda self: iter([]))
c._kernel_session.metadata.return_value.list_columns.return_value = fake_stream

c.get_columns(
session_id=MagicMock(),
max_rows=1,
max_bytes=1,
cursor=cursor,
catalog_name=None,
)
# `list_columns` should be called with catalog=None, not rejected.
c._kernel_session.metadata.return_value.list_columns.assert_called_once_with(
catalog=None,
schema_pattern=None,
table_pattern=None,
column_pattern=None,
)


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -521,7 +545,9 @@ def test_pyo3_native_exception_wrapped_as_operational_error():
stmt = MagicMock()
stmt.execute.side_effect = TypeError("argument 'foo' must be str, not int")
c._kernel_session.statement.return_value = stmt
with pytest.raises(OperationalError, match="Unexpected error from databricks_sql_kernel"):
with pytest.raises(
OperationalError, match="Unexpected error from databricks_sql_kernel"
):
c.execute_command(
operation="SELECT 1",
session_id=MagicMock(),
Expand All @@ -546,9 +572,7 @@ def test_pyo3_native_exception_wrapped_for_metadata_calls():
cursor.arraysize = 100
cursor.buffer_size_bytes = 1024
with pytest.raises(OperationalError):
c.get_catalogs(
session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor
)
c.get_catalogs(session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor)


# ---------------------------------------------------------------------------
Expand All @@ -574,9 +598,7 @@ def test_kernel_error_during_result_set_construction_is_mapped():
cursor.arraysize = 100
cursor.buffer_size_bytes = 1024
with pytest.raises(DatabaseError, match="schema unavailable"):
c.get_catalogs(
session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor
)
c.get_catalogs(session_id=MagicMock(), max_rows=1, max_bytes=1, cursor=cursor)


# ---------------------------------------------------------------------------
Expand Down
11 changes: 8 additions & 3 deletions tests/unit/test_kernel_result_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ def int_schema():
def test_description_built_from_kernel_schema(int_schema):
handle = _FakeKernelHandle(int_schema, [])
rs = _make_rs(handle)
# null_ok slot reflects pyarrow.Field.nullable (True by default).
assert rs.description == [("n", "bigint", None, None, None, None, True)]
# `null_ok` slot is always `None` to match the Thrift backend's
# PEP-249 description shape.
assert rs.description == [("n", "bigint", None, None, None, None, None)]


def test_fetchall_arrow_drains_all_batches(int_schema):
Expand Down Expand Up @@ -107,7 +108,11 @@ def test_fetchmany_arrow_slices_within_batch(int_schema):
def test_fetchmany_arrow_spans_batch_boundary(int_schema):
handle = _FakeKernelHandle(
int_schema,
[_batch(int_schema, [1, 2]), _batch(int_schema, [3, 4]), _batch(int_schema, [5, 6])],
[
_batch(int_schema, [1, 2]),
_batch(int_schema, [3, 4]),
_batch(int_schema, [5, 6]),
],
)
rs = _make_rs(handle)
t = rs.fetchmany_arrow(5)
Expand Down
48 changes: 39 additions & 9 deletions tests/unit/test_kernel_type_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,25 +65,53 @@ def test_description_from_schema_preserves_field_names_and_order():
("name", "string"),
("created_at", "timestamp"),
]
# PEP 249 says 7-tuples. We don't report display_size /
# internal_size / precision / scale (all None); ``null_ok`` is
# taken from ``pyarrow.Field.nullable`` — True by default for
# schemas built from (name, type) pairs.
# PEP 249 7-tuples; we report column name and type only. PEP 249
# allows ``null_ok`` to be ``None`` and that's what the Thrift
# backend has always returned; match it so kernel-backed cursors
# are drop-in compatible. The Arrow ``field.nullable`` bit is still
# available via ``schema.field(i).nullable`` for callers that need
# the real value.
for d in desc:
assert len(d) == 7
assert d[2:] == (None, None, None, None, True)
assert d[2:] == (None, None, None, None, None)


def test_description_from_schema_reports_non_nullable_fields():
def test_description_null_ok_always_none_regardless_of_field_nullable():
# Match Thrift backend's behaviour: ``null_ok`` is ``None`` for
# every column even when the Arrow ``field.nullable`` bit is
# meaningful.
schema = pa.schema(
[
pa.field("id", pa.int64(), nullable=False),
pa.field("name", pa.string(), nullable=True),
]
)
desc = description_from_arrow_schema(schema)
assert desc[0][6] is False
assert desc[1][6] is True
assert desc[0][6] is None
assert desc[1][6] is None


def test_description_uses_databricks_type_name_for_variant():
"""VARIANT columns arrive over SEA as Arrow ``Utf8``; the kernel
annotates them with ``databricks.type_name=VARIANT`` so the
connector can recover the precise type for PEP-249 description.
Matches the Thrift backend, which exposes the same column as
``variant``."""
schema = pa.schema(
[
pa.field(
"v",
pa.string(),
metadata={b"databricks.type_name": b"VARIANT"},
),
# Plain Utf8 column without the metadata stays ``string``
# so we don't claim "variant" for everything.
pa.field("s", pa.string()),
]
)
desc = description_from_arrow_schema(schema)
assert desc[0][1] == "variant"
assert desc[1][1] == "string"


# ─── bind_tspark_params ──────────────────────────────────────────────────
Expand All @@ -94,7 +122,9 @@ def _mk_param(*, type, value, ordinal=True, name=None):
from databricks.sql.thrift_api.TCLIService import ttypes

p = ttypes.TSparkParameter(ordinal=ordinal, name=name, type=type)
p.value = ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
p.value = (
ttypes.TSparkParameterValue(stringValue=value) if value is not None else None
)
return p


Expand Down
Loading