Skip to content

Commit 4d11e22

Browse files
committed
cleanup in parser and docstring refactor, cleanup of thr remaining todoes
1 parent 1fd9d36 commit 4d11e22

12 files changed

Lines changed: 445 additions & 160 deletions

sql_metadata/ast_parser.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ def dialect(self) -> DialectType:
5555
Set as a side-effect of :attr:`ast` access. May be ``None``
5656
(default dialect), a string like ``"mysql"``, or a custom
5757
:class:`Dialect` subclass such as :class:`HashVarDialect`.
58+
59+
:rtype: DialectType
5860
"""
5961
_ = self.ast
6062
return self._dialect
@@ -67,6 +69,8 @@ def is_replace(self) -> bool:
6769
(sqlglot otherwise produces an opaque ``Command`` node). This
6870
flag allows :attr:`Parser.query_type` to restore the correct
6971
:class:`QueryType.REPLACE` value.
72+
73+
:rtype: bool
7074
"""
7175
_ = self.ast
7276
return self._is_replace
@@ -77,6 +81,8 @@ def cte_name_map(self) -> dict[str, str]:
7781
7882
Keys are underscore-separated placeholders (``db__DOT__name``),
7983
values are the original dotted names (``db.name``).
84+
85+
:rtype: dict[str, str]
8086
"""
8187
_ = self.ast
8288
return self._cte_name_map

sql_metadata/column_extractor.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@
2828
class ExtractionResult:
2929
"""Immutable container for column extraction results.
3030
31-
Replaces the earlier 7-tuple return value with named fields.
31+
Returned by :meth:`ColumnExtractor.extract` and consumed by
32+
:class:`Parser` to populate its column/alias/CTE properties.
33+
Each field corresponds to a public ``Parser`` property.
3234
"""
3335

3436
columns: UniqueList
3537
columns_dict: dict[str, UniqueList]
3638
alias_names: UniqueList
37-
alias_dict: dict[str, UniqueList] | None
39+
alias_dict: dict[str, UniqueList]
3840
alias_map: dict[str, str | list[str]]
3941
cte_names: UniqueList
4042
subquery_names: UniqueList
@@ -119,7 +121,17 @@ def _dfs(node: exp.Expression) -> Any:
119121

120122

121123
def _is_date_part_unit(node: exp.Column) -> bool:
122-
"""Return True if *node* is the first arg of a date-part function."""
124+
"""Return ``True`` if *node* is the date-part unit argument of a function.
125+
126+
Functions like ``DATEADD``, ``DATEDIFF``, and ``DATE_TRUNC`` accept a
127+
date-part keyword (``DAY``, ``MONTH``, …) as their first argument.
128+
sqlglot parses these keywords as ``exp.Column`` nodes, but they are not
129+
real columns and must be skipped during extraction.
130+
131+
:param node: A column AST node to inspect.
132+
:type node: exp.Column
133+
:rtype: bool
134+
"""
123135
parent = node.parent
124136
if (
125137
isinstance(parent, exp.Anonymous)
@@ -167,13 +179,28 @@ def __init__(self, table_aliases: dict[str, str]):
167179
self.output_columns: list[str] = []
168180

169181
def add_column(self, name: str, clause: str) -> None:
170-
"""Record a column name, filing it into the appropriate section."""
182+
"""Record a column name, filing it into the appropriate clause section.
183+
184+
:param name: The column name to record.
185+
:type name: str
186+
:param clause: The SQL clause section (e.g. ``"select"``, ``"where"``).
187+
:type clause: str
188+
"""
171189
self.columns.append(name)
172190
if clause:
173191
self.columns_dict.setdefault(clause, UniqueList()).append(name)
174192

175193
def add_alias(self, name: str, target: Any, clause: str) -> None:
176-
"""Record a column alias and its target expression."""
194+
"""Record a column alias and its target expression.
195+
196+
:param name: The alias name.
197+
:type name: str
198+
:param target: The source column name or expression the alias refers
199+
to, or ``None`` if not determinable.
200+
:type target: Any
201+
:param clause: The SQL clause section where the alias was defined.
202+
:type clause: str
203+
"""
177204
self.alias_names.append(name)
178205
if clause:
179206
self.alias_dict.setdefault(clause, UniqueList()).append(name)
@@ -267,7 +294,7 @@ def extract(self) -> ExtractionResult:
267294
for name in c.cte_names:
268295
final_cte.append(self._reverse_cte_map.get(name, name))
269296

270-
alias_dict = c.alias_dict if c.alias_dict else None
297+
alias_dict = c.alias_dict
271298
return ExtractionResult(
272299
columns=c.columns,
273300
columns_dict=c.columns_dict,

sql_metadata/comments.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import re
2222
from typing import Any
2323

24+
from sqlglot.errors import TokenError
2425
from sqlglot.tokens import Tokenizer
2526

2627

@@ -90,8 +91,7 @@ def extract_comments(sql: str) -> list[str]:
9091
return []
9192
try:
9293
tokens = list(_choose_tokenizer(sql).tokenize(sql))
93-
# TODO: revisit if sqlglot tokenizer starts raising on specific inputs
94-
except Exception: # pragma: no cover
94+
except TokenError:
9595
return []
9696
comments: list[str] = []
9797
prev_end = -1
@@ -119,7 +119,19 @@ def _scan_gap(sql: str, start: int, end: int, out: list[str]) -> None:
119119

120120

121121
def _reconstruct_from_tokens(sql: str, tokens: list[Any]) -> str:
122-
"""Rebuild SQL from token spans, collapsing gaps to single spaces."""
122+
"""Rebuild SQL from token spans, collapsing gaps to single spaces.
123+
124+
Concatenates the text of each token using its ``start`` / ``end``
125+
positions. Any gap between consecutive tokens (where comments or
126+
extra whitespace lived) is replaced by a single space.
127+
128+
:param sql: The original SQL string.
129+
:type sql: str
130+
:param tokens: Sqlglot token objects with ``start`` and ``end`` attrs.
131+
:type tokens: list[Any]
132+
:returns: Reconstructed SQL with comments removed.
133+
:rtype: str
134+
"""
123135
if not tokens:
124136
return ""
125137
parts = [sql[tokens[0].start : tokens[0].end + 1]]
@@ -158,7 +170,7 @@ def strip_comments_for_parsing(sql: str) -> str:
158170
tokenizer = MySQL.Tokenizer()
159171
try:
160172
tokens = list(tokenizer.tokenize(sql))
161-
except Exception:
173+
except TokenError:
162174
return sql.strip()
163175
return _reconstruct_from_tokens(sql, tokens)
164176

@@ -183,6 +195,6 @@ def strip_comments(sql: str) -> str:
183195
return sql or ""
184196
try:
185197
tokens = list(_choose_tokenizer(sql).tokenize(sql))
186-
except Exception:
198+
except TokenError:
187199
return sql.strip()
188200
return _reconstruct_from_tokens(sql, tokens)

sql_metadata/dialect_parser.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,32 @@ class HashVarDialect(Dialect):
4343
"""
4444

4545
class Tokenizer(BaseTokenizer):
46-
"""Tokenizer subclass that includes ``#`` in variable tokens."""
46+
"""Tokenizer subclass that includes ``#`` in variable tokens.
47+
48+
Removes ``#`` from ``SINGLE_TOKENS`` and adds it to
49+
``VAR_SINGLE_TOKENS`` so that ``#temp`` is lexed as a single
50+
``VAR`` token instead of ``#`` + ``temp``.
51+
"""
4752

4853
SINGLE_TOKENS = {**BaseTokenizer.SINGLE_TOKENS}
4954
SINGLE_TOKENS.pop("#", None)
5055
VAR_SINGLE_TOKENS = {*BaseTokenizer.VAR_SINGLE_TOKENS, "#"}
5156

5257

5358
class _RedshiftAppendParser(RedshiftParser):
54-
"""Redshift parser extended with ``ALTER TABLE ... APPEND FROM``."""
59+
"""Redshift parser extended with ``ALTER TABLE … APPEND FROM``.
60+
61+
Adds an ``APPEND`` entry to ``ALTER_PARSERS`` so that the Redshift-
62+
specific ``ALTER TABLE t APPEND FROM src`` syntax produces a proper
63+
``exp.Alter`` node instead of degrading to ``exp.Command``.
64+
"""
5565

5666
def _parse_alter_table_append(self) -> "exp.Expr | None":
67+
"""Parse the ``FROM <table>`` portion of an ``APPEND FROM`` clause.
68+
69+
:returns: The parsed table expression, or ``None``.
70+
:rtype: exp.Expr | None
71+
"""
5772
self._match_text_seq("FROM")
5873
return self._parse_table()
5974

sql_metadata/nested_resolver.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,25 +46,53 @@ class _PreservingGenerator(Generator):
4646
}
4747

4848
def coalesce_sql(self, expression: exp.Expression) -> str:
49+
"""Render ``COALESCE`` back to ``IFNULL`` for two-argument calls.
50+
51+
:param expression: The ``exp.Coalesce`` AST node.
52+
:type expression: exp.Expression
53+
:returns: SQL string using ``IFNULL`` (2 args) or ``COALESCE``.
54+
:rtype: str
55+
"""
4956
args = [expression.this] + expression.expressions
5057
if len(args) == 2:
5158
return f"IFNULL({self.sql(args[0])}, {self.sql(args[1])})"
5259
args_sql = ", ".join(self.sql(a) for a in args)
5360
return f"COALESCE({args_sql})"
5461

5562
def dateadd_sql(self, expression: exp.Expression) -> str:
63+
"""Render ``exp.DateAdd`` back to ``DATE_ADD(…)`` syntax.
64+
65+
:param expression: The ``exp.DateAdd`` AST node.
66+
:type expression: exp.Expression
67+
:rtype: str
68+
"""
5669
return (
5770
f"DATE_ADD({self.sql(expression, 'this')}, "
5871
f"{self.sql(expression, 'expression')})"
5972
)
6073

6174
def datesub_sql(self, expression: exp.Expression) -> str:
75+
"""Render ``exp.DateSub`` back to ``DATE_SUB(…)`` syntax.
76+
77+
:param expression: The ``exp.DateSub`` AST node.
78+
:type expression: exp.Expression
79+
:rtype: str
80+
"""
6281
return (
6382
f"DATE_SUB({self.sql(expression, 'this')}, "
6483
f"{self.sql(expression, 'expression')})"
6584
)
6685

6786
def tsordsadd_sql(self, expression: exp.Expression) -> str:
87+
"""Render ``exp.TsOrDsAdd`` as ``DATE_ADD`` or ``DATE_SUB``.
88+
89+
When the interval multiplier is ``-1`` the expression is rendered
90+
as ``DATE_SUB`` instead, preserving the original SQL intent.
91+
92+
:param expression: The ``exp.TsOrDsAdd`` AST node.
93+
:type expression: exp.Expression
94+
:rtype: str
95+
"""
6896
this = self.sql(expression, "this")
6997
expr_node = expression.expression
7098
if isinstance(expr_node, exp.Mul):
@@ -79,6 +107,16 @@ def tsordsadd_sql(self, expression: exp.Expression) -> str:
79107
return f"DATE_ADD({this}, {self.sql(expression, 'expression')})"
80108

81109
def not_sql(self, expression: exp.Expression) -> str:
110+
"""Render ``NOT`` expressions preserving ``IS NOT NULL`` and ``NOT IN``.
111+
112+
sqlglot normalises ``IS NOT NULL`` to ``NOT (x IS NULL)`` and
113+
``NOT IN`` to ``NOT (x IN (...))``; this override renders them
114+
back to their original idiomatic forms.
115+
116+
:param expression: The ``exp.Not`` AST node.
117+
:type expression: exp.Expression
118+
:rtype: str
119+
"""
82120
child = expression.this
83121
if isinstance(child, exp.Is) and isinstance(child.expression, exp.Null):
84122
return f"{self.sql(child, 'this')} IS NOT NULL"
@@ -96,14 +134,26 @@ def not_sql(self, expression: exp.Expression) -> str:
96134

97135

98136
def _is_qualified_reference(result: list[str]) -> bool:
99-
"""Check if result is a single dotted reference like ``['cte.col']``."""
137+
"""Check if *result* is a single dotted reference like ``['cte.col']``.
138+
139+
:param result: Resolved column list to inspect.
140+
:type result: list[str]
141+
:rtype: bool
142+
"""
100143
return len(result) == 1 and "." in result[0]
101144

102145

103146
def _is_not_already_resolved_qualified_reference(
104147
result: list[str], column: str
105148
) -> bool:
106-
"""Check if result is a qualified reference that changed from the input."""
149+
"""Check if *result* is a qualified reference that differs from *column*.
150+
151+
:param result: Resolved column list to inspect.
152+
:type result: list[str]
153+
:param column: The original column name before resolution.
154+
:type column: str
155+
:rtype: bool
156+
"""
107157
return _is_qualified_reference(result) and result != [column]
108158

109159

0 commit comments

Comments
 (0)