11from __future__ import annotations
22
3+ import codecs
34import datetime
45import email .message
56import json as jsonlib
7+ import re
68import typing
79import urllib .request
810from collections .abc import Mapping
4446 SyncByteStream ,
4547)
4648from ._urls import URL
47- from ._utils import (
48- is_known_encoding ,
49- obfuscate_sensitive_headers ,
50- parse_content_type_charset ,
51- parse_header_links ,
52- )
49+ from ._utils import to_bytes_or_str , to_str
5350
5451__all__ = ["Cookies" , "Headers" , "Request" , "Response" ]
5552
53+ SENSITIVE_HEADERS = {"authorization" , "proxy-authorization" }
54+
55+
56+ def _is_known_encoding (encoding : str ) -> bool :
57+ """
58+ Return `True` if `encoding` is a known codec.
59+ """
60+ try :
61+ codecs .lookup (encoding )
62+ except LookupError :
63+ return False
64+ return True
65+
5666
5767def _normalize_header_key (key : str | bytes , encoding : str | None = None ) -> bytes :
5868 """
@@ -72,6 +82,60 @@ def _normalize_header_value(value: str | bytes, encoding: str | None = None) ->
7282 return value .encode (encoding or "ascii" )
7383
7484
85+ def _parse_content_type_charset (content_type : str ) -> str | None :
86+ # We used to use `cgi.parse_header()` here, but `cgi` became a dead battery.
87+ # See: https://peps.python.org/pep-0594/#cgi
88+ msg = email .message .Message ()
89+ msg ["content-type" ] = content_type
90+ return msg .get_content_charset (failobj = None )
91+
92+
93+ def _parse_header_links (value : str ) -> list [dict [str , str ]]:
94+ """
95+ Returns a list of parsed link headers, for more info see:
96+ https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
97+ The generic syntax of those is:
98+ Link: < uri-reference >; param1=value1; param2="value2"
99+ So for instance:
100+ Link; '<http:/.../front.jpeg>; type="image/jpeg",<http://.../back.jpeg>;'
101+ would return
102+ [
103+ {"url": "http:/.../front.jpeg", "type": "image/jpeg"},
104+ {"url": "http://.../back.jpeg"},
105+ ]
106+ :param value: HTTP Link entity-header field
107+ :return: list of parsed link headers
108+ """
109+ links : list [dict [str , str ]] = []
110+ replace_chars = " '\" "
111+ value = value .strip (replace_chars )
112+ if not value :
113+ return links
114+ for val in re .split (", *<" , value ):
115+ try :
116+ url , params = val .split (";" , 1 )
117+ except ValueError :
118+ url , params = val , ""
119+ link = {"url" : url .strip ("<> '\" " )}
120+ for param in params .split (";" ):
121+ try :
122+ key , value = param .split ("=" )
123+ except ValueError :
124+ break
125+ link [key .strip (replace_chars )] = value .strip (replace_chars )
126+ links .append (link )
127+ return links
128+
129+
130+ def _obfuscate_sensitive_headers (
131+ items : typing .Iterable [tuple [typing .AnyStr , typing .AnyStr ]],
132+ ) -> typing .Iterator [tuple [typing .AnyStr , typing .AnyStr ]]:
133+ for k , v in items :
134+ if to_str (k .lower ()) in SENSITIVE_HEADERS :
135+ v = to_bytes_or_str ("[secure]" , match_type_of = v )
136+ yield k , v
137+
138+
75139class Headers (typing .MutableMapping [str , str ]):
76140 """
77141 HTTP headers, as a case-insensitive multi-dict.
@@ -306,7 +370,7 @@ def __repr__(self) -> str:
306370 if self .encoding != "ascii" :
307371 encoding_str = f", encoding={ self .encoding !r} "
308372
309- as_list = list (obfuscate_sensitive_headers (self .multi_items ()))
373+ as_list = list (_obfuscate_sensitive_headers (self .multi_items ()))
310374 as_dict = dict (as_list )
311375
312376 no_duplicate_keys = len (as_dict ) == len (as_list )
@@ -599,7 +663,7 @@ def encoding(self) -> str | None:
599663 """
600664 if not hasattr (self , "_encoding" ):
601665 encoding = self .charset_encoding
602- if encoding is None or not is_known_encoding (encoding ):
666+ if encoding is None or not _is_known_encoding (encoding ):
603667 if isinstance (self .default_encoding , str ):
604668 encoding = self .default_encoding
605669 elif hasattr (self , "_content" ):
@@ -630,7 +694,7 @@ def charset_encoding(self) -> str | None:
630694 if content_type is None :
631695 return None
632696
633- return parse_content_type_charset (content_type )
697+ return _parse_content_type_charset (content_type )
634698
635699 def _get_content_decoder (self ) -> ContentDecoder :
636700 """
@@ -785,7 +849,7 @@ def links(self) -> dict[str | None, dict[str, str]]:
785849
786850 return {
787851 (link .get ("rel" ) or link .get ("url" )): link
788- for link in parse_header_links (header )
852+ for link in _parse_header_links (header )
789853 }
790854
791855 @property
0 commit comments