Skip to content

Commit b3dd9cc

Browse files
committed
docs(_ext): Add pretty_argparse extension with CLI usage highlighting
A comprehensive Sphinx extension that enhances sphinx-argparse output: Features: - Strip ANSI escape codes from argparse help text (FORCE_COLOR support) - Transform "examples:" definition lists into proper doc sections - Nest category-specific examples under parent Examples section - Custom Pygments lexer (cli-usage) for usage block syntax highlighting - Reorder sections so usage appears before examples CLI usage lexer token types: - Generic.Heading: "usage:" prefix - Name.Tag: long options (--verbose, --no-color) - Name.Attribute: short options (-h, -S) - Name.Variable: option values (socket-path, COMMAND) - Name.Constant: uppercase metavars (FILE, PATH) - Name.Label: positional args and command names - Punctuation: brackets [], parentheses () - Operator: pipe | and equals = Files added: - docs/_ext/pretty_argparse.py: Main extension (680 lines) - docs/_ext/cli_usage_lexer.py: Pygments lexer (116 lines) - tests/docs/_ext/test_pretty_argparse.py: 66 tests (854 lines) - tests/docs/_ext/test_cli_usage_lexer.py: 22 tests (357 lines)
1 parent 19915ae commit b3dd9cc

9 files changed

Lines changed: 2021 additions & 0 deletions

File tree

docs/_ext/cli_usage_lexer.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""Pygments lexer for CLI usage/help output.
2+
3+
This module provides a custom Pygments lexer for highlighting command-line
4+
usage text typically generated by argparse, getopt, or similar libraries.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
from pygments.lexer import RegexLexer, bygroups, include
10+
from pygments.token import Generic, Name, Operator, Punctuation, Text, Whitespace
11+
12+
13+
class CLIUsageLexer(RegexLexer):
14+
"""Lexer for CLI usage/help text (argparse, etc.).
15+
16+
Highlights usage patterns including options, arguments, and meta-variables.
17+
18+
Examples
19+
--------
20+
>>> from pygments.token import Token
21+
>>> lexer = CLIUsageLexer()
22+
>>> tokens = list(lexer.get_tokens("usage: cmd [-h]"))
23+
>>> tokens[0]
24+
(Token.Generic.Heading, 'usage:')
25+
>>> tokens[2]
26+
(Token.Name.Label, 'cmd')
27+
"""
28+
29+
name = "CLI Usage"
30+
aliases = ["cli-usage", "usage"] # noqa: RUF012
31+
filenames: list[str] = [] # noqa: RUF012
32+
mimetypes = ["text/x-cli-usage"] # noqa: RUF012
33+
34+
tokens = { # noqa: RUF012
35+
"root": [
36+
# "usage:" at start of line
37+
(r"^(usage:)(\s+)", bygroups(Generic.Heading, Whitespace)), # type: ignore[no-untyped-call]
38+
# Continuation lines (leading whitespace for wrapped usage)
39+
(r"^(\s+)(?=\S)", Whitespace),
40+
include("inline"),
41+
],
42+
"inline": [
43+
# Whitespace
44+
(r"\s+", Whitespace),
45+
# Long options with = value (e.g., --log-level=VALUE)
46+
(
47+
r"(--[a-zA-Z0-9][-a-zA-Z0-9]*)(=)([A-Z][A-Z0-9_]*|[a-z][-a-z0-9]*)",
48+
bygroups(Name.Tag, Operator, Name.Variable), # type: ignore[no-untyped-call]
49+
),
50+
# Long options standalone
51+
(r"--[a-zA-Z0-9][-a-zA-Z0-9]*", Name.Tag),
52+
# Short options with space-separated value (e.g., -S socket-path)
53+
(
54+
r"(-[a-zA-Z0-9])(\s+)([A-Z][A-Z0-9_]*|[a-z][-a-z0-9]*)",
55+
bygroups(Name.Attribute, Whitespace, Name.Variable), # type: ignore[no-untyped-call]
56+
),
57+
# Short options standalone
58+
(r"-[a-zA-Z0-9]", Name.Attribute),
59+
# UPPERCASE meta-variables (COMMAND, FILE, PATH)
60+
(r"\b[A-Z][A-Z0-9_]+\b", Name.Constant),
61+
# Opening bracket - enter optional state
62+
(r"\[", Punctuation, "optional"),
63+
# Closing bracket (fallback for unmatched)
64+
(r"\]", Punctuation),
65+
# Choice separator (pipe)
66+
(r"\|", Operator),
67+
# Parentheses for grouping
68+
(r"[()]", Punctuation),
69+
# Positional/command names (lowercase with dashes)
70+
(r"\b[a-z][-a-z0-9]*\b", Name.Label),
71+
# Catch-all for any other text
72+
(r"[^\s\[\]|()]+", Text),
73+
],
74+
"optional": [
75+
# Nested optional bracket
76+
(r"\[", Punctuation, "#push"),
77+
# End optional
78+
(r"\]", Punctuation, "#pop"),
79+
# Contents use inline rules
80+
include("inline"),
81+
],
82+
}
83+
84+
85+
def tokenize_usage(text: str) -> list[tuple[str, str]]:
86+
"""Tokenize usage text and return list of (token_type, value) tuples.
87+
88+
Parameters
89+
----------
90+
text : str
91+
CLI usage text to tokenize.
92+
93+
Returns
94+
-------
95+
list[tuple[str, str]]
96+
List of (token_type_name, text_value) tuples.
97+
98+
Examples
99+
--------
100+
>>> result = tokenize_usage("usage: cmd [-h]")
101+
>>> result[0]
102+
('Token.Generic.Heading', 'usage:')
103+
>>> result[2]
104+
('Token.Name.Label', 'cmd')
105+
>>> result[4]
106+
('Token.Punctuation', '[')
107+
>>> result[5]
108+
('Token.Name.Attribute', '-h')
109+
>>> result[6]
110+
('Token.Punctuation', ']')
111+
"""
112+
lexer = CLIUsageLexer()
113+
return [
114+
(str(tok_type), tok_value) for tok_type, tok_value in lexer.get_tokens(text)
115+
]

0 commit comments

Comments
 (0)