1212
1313"""bencode.py - bencode encoder + decoder."""
1414
15+ from collections import deque
16+
1517from bencode .BTL import BTFailure
1618from bencode .exceptions import BencodeDecodeError
1719
2931
3032def decode_int (x , f ):
3133 f += 1
32- newf = x .index ('e' , f )
34+ newf = x .index (b 'e' , f )
3335 n = int (x [f :newf ])
3436
35- if x [f ] == '-' :
36- if x [f + 1 ] == '0' :
37+ if x [f : f + 1 ] == b '-' :
38+ if x [f + 1 : f + 2 ] == b '0' :
3739 raise ValueError
38- elif x [f ] == '0' and newf != f + 1 :
40+ elif x [f : f + 1 ] == b '0' and newf != f + 1 :
3941 raise ValueError
4042
4143 return n , newf + 1
4244
4345
44- def decode_string (x , f ):
45- colon = x .index (':' , f )
46+ def decode_string (x , f , try_decode_utf8 = True , force_decode_utf8 = False ):
47+ """
48+ decode torrent bencoded 'string' in x starting at f
49+
50+ An attempt is made to convert the string to a python string from utf-8.
51+ However, both string and non-string binary data is intermixed in the
52+ torrent bencoding standard. So we have to guess whether the byte
53+ sequence is a string or just binary data. We make this guess by trying
54+ to decode (from utf-8), and if that fails, assuming it is binary data.
55+ There are some instances where the data SHOULD be a string though.
56+ You can check enforce this by setting force_decode_utf8 to True. If the
57+ decoding from utf-8 fails, an UnidcodeDecodeError is raised. Similarly,
58+ if you know it should not be a string, you can skip the decoding
59+ attempt by setting try_decode_utf8=False.
60+ """
61+ colon = x .index (b':' , f )
4662 n = int (x [f :colon ])
4763
48- if x [f ] == '0' and colon != f + 1 :
64+ if x [f : f + 1 ] == b '0' and colon != f + 1 :
4965 raise ValueError
5066
5167 colon += 1
68+ s = x [colon :colon + n ]
69+ if try_decode_utf8 :
70+ try :
71+ s = s .decode ('utf-8' )
72+ except UnicodeDecodeError as e :
73+ if force_decode_utf8 :
74+ raise
5275
53- return x [ colon : colon + n ] , colon + n
76+ return s , colon + n
5477
5578
5679def decode_list (x , f ):
5780 r , f = [], f + 1
5881
59- while x [f ] != 'e' :
60- v , f = decode_func [x [f ]](x , f )
82+ while x [f : f + 1 ] != b 'e' :
83+ v , f = decode_func [x [f : f + 1 ]](x , f )
6184 r .append (v )
6285
6386 return r , f + 1
@@ -66,28 +89,28 @@ def decode_list(x, f):
6689def decode_dict (x , f ):
6790 r , f = {}, f + 1
6891
69- while x [f ] != 'e' :
92+ while x [f : f + 1 ] != b 'e' :
7093 k , f = decode_string (x , f )
71- r [k ], f = decode_func [x [f ]](x , f )
94+ r [k ], f = decode_func [x [f : f + 1 ]](x , f )
7295
7396 return r , f + 1
7497
7598
7699# noinspection PyDictCreation
77100decode_func = {}
78- decode_func ['l' ] = decode_list
79- decode_func ['d' ] = decode_dict
80- decode_func ['i' ] = decode_int
81- decode_func ['0' ] = decode_string
82- decode_func ['1' ] = decode_string
83- decode_func ['2' ] = decode_string
84- decode_func ['3' ] = decode_string
85- decode_func ['4' ] = decode_string
86- decode_func ['5' ] = decode_string
87- decode_func ['6' ] = decode_string
88- decode_func ['7' ] = decode_string
89- decode_func ['8' ] = decode_string
90- decode_func ['9' ] = decode_string
101+ decode_func [b 'l' ] = decode_list
102+ decode_func [b 'd' ] = decode_dict
103+ decode_func [b 'i' ] = decode_int
104+ decode_func [b '0' ] = decode_string
105+ decode_func [b '1' ] = decode_string
106+ decode_func [b '2' ] = decode_string
107+ decode_func [b '3' ] = decode_string
108+ decode_func [b '4' ] = decode_string
109+ decode_func [b '5' ] = decode_string
110+ decode_func [b '6' ] = decode_string
111+ decode_func [b '7' ] = decode_string
112+ decode_func [b '8' ] = decode_string
113+ decode_func [b '9' ] = decode_string
91114
92115
93116def bdecode (value ):
@@ -101,8 +124,8 @@ def bdecode(value):
101124 :rtype: object
102125 """
103126 try :
104- r , l = decode_func [value [0 ]](value , 0 )
105- except (IndexError , KeyError , ValueError ):
127+ r , l = decode_func [value [0 : 1 ]](value , 0 )
128+ except (IndexError , KeyError , TypeError , ValueError ):
106129 raise BencodeDecodeError ("not a valid bencoded string" )
107130
108131 if l != len (value ):
@@ -123,7 +146,7 @@ def encode_bencached(x, r):
123146
124147
125148def encode_int (x , r ):
126- r .extend (('i' , str (x ), 'e' ))
149+ r .extend ((b 'i' , str (x ). encode ( 'utf-8' ), b 'e' ))
127150
128151
129152def encode_bool (x , r ):
@@ -134,28 +157,34 @@ def encode_bool(x, r):
134157
135158
136159def encode_string (x , r ):
137- r .extend ((str (len (x )), ':' , x ))
160+ s = x .encode ('utf-8' )
161+ r .extend ((str (len (s )).encode ('utf-8' ), b':' , s ))
162+
163+
164+ def encode_bytes (x , r ):
165+ r .extend ((str (len (x )).encode ('utf-8' ), b':' , x ))
138166
139167
140168def encode_list (x , r ):
141- r .append ('l' )
169+ r .append (b 'l' )
142170
143171 for i in x :
144172 encode_func [type (i )](i , r )
145173
146- r .append ('e' )
174+ r .append (b 'e' )
147175
148176
149177def encode_dict (x , r ):
150- r .append ('d' )
178+ r .append (b 'd' )
151179 ilist = list (x .items ())
152180 ilist .sort ()
153181
154182 for k , v in ilist :
155- r .extend ((str (len (k )), ':' , k ))
183+ k = k .encode ('utf-8' )
184+ r .extend ((str (len (k )).encode ('utf-8' ), b':' , k ))
156185 encode_func [type (v )](v , r )
157186
158- r .append ('e' )
187+ r .append (b 'e' )
159188
160189
161190# noinspection PyDictCreation
@@ -184,6 +213,7 @@ def encode_dict(x, r):
184213 encode_func [list ] = encode_list
185214 encode_func [str ] = encode_string
186215 encode_func [tuple ] = encode_list
216+ encode_func [bytes ] = encode_bytes
187217
188218
189219def bencode (value ):
@@ -196,9 +226,9 @@ def bencode(value):
196226 :return: Bencode formatted string
197227 :rtype: str
198228 """
199- r = []
229+ r = deque () # makes more sense for something with lots of appends
200230 encode_func [type (value )](value , r )
201- return '' .join (r )
231+ return b '' .join (r )
202232
203233
204234# Method proxies (for compatibility with other libraries)
0 commit comments