@@ -76,12 +76,21 @@ def domain(
7676 if not value :
7777 return False
7878
79- if consider_tld and not _IanaTLD .check (value .rstrip ("." ).rsplit ("." , 1 )[- 1 ].upper ()):
80- return False
81-
8279 try :
80+ ascii_domain = value .encode ("idna" ).decode ("utf-8" )
81+ domain_without_trailing_dot = ascii_domain .rstrip ("." )
82+
83+ if not domain_without_trailing_dot or len (domain_without_trailing_dot ) > 253 :
84+ return False
85+
86+ if consider_tld and not _IanaTLD .check (
87+ domain_without_trailing_dot .rsplit ("." , 1 )[- 1 ].upper ()
88+ ):
89+ return False
90+
8391 service_record = r"_" if rfc_2782 else ""
8492 trailing_dot = r"\.?$" if rfc_1034 else r"$"
93+ tld = r"(?:[a-z]{2,63}|xn--[a-z0-9](?:[a-z0-9-]{0,57}[a-z0-9])?)"
8594
8695 return not re .search (r"\s|__+" , value ) and re .match (
8796 # First character of the domain
@@ -90,12 +99,10 @@ def domain(
9099 + rf"(?:[a-z0-9-{ service_record } ]{{0,61}}"
91100 # Hostname
92101 + rf"[a-z0-9{ service_record } ])?\.)"
93- # First 61 characters of the gTLD
94- + r"+[a-z0-9][a-z0-9-_]{0,61}"
95- # Last character of the gTLD
96- + rf"[a-z]{ trailing_dot } " ,
97- value .encode ("idna" ).decode ("utf-8" ),
102+ # Top-level domain
103+ + rf"+{ tld } { trailing_dot } " ,
104+ ascii_domain ,
98105 re .IGNORECASE ,
99106 )
100107 except UnicodeError as err :
101- raise UnicodeError (f"Unable to encode/decode { value } " ) from err
108+ raise UnicodeError (f"Unable to encode/decode { value } " ) from err
0 commit comments