11# Parser based on RFC 5228, especially the grammar as defined in section 8. All
22# references are to sections in RFC 5228 unless stated otherwise.
33
4+ from typing import (
5+ TYPE_CHECKING ,
6+ Any ,
7+ Optional
8+ )
9+
410import math
511import ply .lex # type: ignore
612
13+ if TYPE_CHECKING :
14+ from ply .lex import Lexer , LexToken
15+
716__all__ = ('lexer' , 'tokens' ,)
817
918
10- def lexer (** kwargs ) :
19+ def lexer (** kwargs : Any ) -> 'Lexer' :
1120 return ply .lex .lex (** kwargs )
1221
1322
@@ -22,22 +31,23 @@ def lexer(**kwargs):
2231
2332
2433# section 2.3
25- def t_HASH_COMMENT (t ) :
34+ def t_HASH_COMMENT (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
2635 r'\#.*\r\n'
2736 t .lexer .lineno += 1
37+ return None
2838
2939
3040# section 2.3
31- def t_BRACKET_COMMENT (t ) :
41+ def t_BRACKET_COMMENT (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
3242 r'/\*.*\*/'
3343 # TODO: Bracketed comments begin with the token "/*" and end with "*/"
3444 # outside of a string. Bracketed comments may span multiple lines.
3545 # Bracketed comments do not nest.
36- pass
46+ return None
3747
3848
3949# section 2.4.2
40- def t_MULTILINE_STRING (t ) :
50+ def t_MULTILINE_STRING (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
4151 r'"@@@@@@@@@@@@@@@"'
4252 # TODO: For entering larger amounts of text, such as an email message,
4353 # a multi-line form is allowed. It starts with the keyword "text:",
@@ -52,11 +62,11 @@ def t_MULTILINE_STRING(t):
5262 # that is, ".foo" is interpreted as ".foo". However, because this is
5363 # potentially ambiguous, scripts SHOULD be properly dot-stuffed so such
5464 # lines do not appear.
55- pass
65+ return None
5666
5767
5868# section 2.4.2
59- def t_QUOTED_STRING (t ) :
69+ def t_QUOTED_STRING (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
6070 r'"([^"\\]|\\["\\])*"'
6171 # TODO: Add support for:
6272 # - An undefined escape sequence (such as "\a" in a context where "a"
@@ -70,20 +80,20 @@ def t_QUOTED_STRING(t):
7080 return t
7181
7282
73- def t_TAG (t ) :
83+ def t_TAG (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
7484 r':[a-zA-Z_][a-zA-Z0-9_]*'
7585 t .value = t .value [1 :].upper ()
7686 return t
7787
7888
79- def t_IDENTIFIER (t ) :
89+ def t_IDENTIFIER (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
8090 r'[a-zA-Z_][a-zA-Z0-9_]*'
8191 t .value = t .value .upper ()
8292 return t
8393
8494
8595# section 2.4.1
86- def t_NUMBER (t ) :
96+ def t_NUMBER (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
8797 r'[0-9]+[KkMmGg]?'
8898 exponents = {
8999 'G' : 30 , 'g' : 30 ,
@@ -97,13 +107,15 @@ def t_NUMBER(t):
97107 return t
98108
99109
100- def t_newline (t ) :
110+ def t_newline (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
101111 r'(\r\n)+'
102112 t .lexer .lineno += t .value .count ('\n ' )
113+ return None
103114
104115
105- def t_error (t ) :
116+ def t_error (t : 'LexToken' ) -> Optional [ 'LexToken' ] :
106117 t .lexer .skip (1 )
118+ return None
107119
108120
109121if __name__ == '__main__' :
0 commit comments