@@ -35,15 +35,15 @@ class Item(object):
3535 or another Item.
3636 """
3737
38- def __init__ (self , itemtype = None , itemid = None , url = "" ):
38+ def __init__ (self , itemtype = None , itemid = None , domain = "" ):
3939 """Create an Item, with an optional itemptype and/or itemid.
4040 """
4141 # itemtype can be a space delimited list
4242 if itemtype :
43- self .itemtype = [URI (i , domain = url ) for i in itemtype .split (" " )]
43+ self .itemtype = [URI (i , domain = domain ) for i in itemtype .split (" " )]
4444
4545 if itemid :
46- self .itemid = URI (itemid , domain = url )
46+ self .itemid = URI (itemid , domain = domain )
4747
4848 self .props = {}
4949
@@ -110,14 +110,12 @@ def json_dict(self):
110110
111111class URI (object ):
112112
113- def __init__ (self , string , domain ):
113+ def __init__ (self , string , domain = "" ):
114114 if string .startswith ("http://" ) or string .startswith ("https://" ):
115115 self .string = string
116116 else :
117117 self .string = "http://" + domain + string
118118
119- print "URI created with string" , self .string
120-
121119 def __eq__ (self , other ):
122120 if isinstance (other , URI ):
123121 return self .string == other .string
@@ -151,23 +149,23 @@ def get_domain(url_string):
151149}
152150
153151
154- def _find_items (e , url = "" ):
152+ def _find_items (e , domain = "" ):
155153 items = []
156154 unlinked = []
157155 if _is_element (e ) and _is_itemscope (e ):
158- item = _make_item (e , url = url )
159- unlinked = _extract (e , item , url = url )
156+ item = _make_item (e , domain = domain )
157+ unlinked = _extract (e , item , domain = domain )
160158 items .append (item )
161159 for unlinked_element in unlinked :
162- items .extend (_find_items (unlinked_element , url = url ))
160+ items .extend (_find_items (unlinked_element , domain = domain ))
163161 else :
164162 for child in e .childNodes :
165- items .extend (_find_items (child , url = url ))
163+ items .extend (_find_items (child , domain = domain ))
166164
167165 return items
168166
169167
170- def _extract (e , item , url = "" ):
168+ def _extract (e , item , domain = "" ):
171169 # looks in a DOM element for microdata to assign to an Item
172170 # _extract returns a list of elements which appeared to have microdata
173171 # but which were not directly related to the Item that was passed in
@@ -177,19 +175,19 @@ def _extract(e, item, url=""):
177175 itemprop = _attr (child , "itemprop" )
178176 itemscope = _is_itemscope (child )
179177 if itemprop and itemscope :
180- nested_item = _make_item (child , url = url )
181- unlinked .extend (_extract (child , nested_item , url = url ))
178+ nested_item = _make_item (child , domain = domain )
179+ unlinked .extend (_extract (child , nested_item , domain = domain ))
182180 item .set (itemprop , nested_item )
183181 elif itemprop :
184- value = _property_value (child , domain = url )
182+ value = _property_value (child , domain = domain )
185183 # itemprops may also be in a space delimited list
186184 for i in itemprop .split (" " ):
187185 item .set (i , value )
188- unlinked .extend (_extract (child , item , url = url ))
186+ unlinked .extend (_extract (child , item , domain = domain ))
189187 elif itemscope :
190188 unlinked .append (child )
191189 else :
192- unlinked .extend (_extract (child , item , url = url ))
190+ unlinked .extend (_extract (child , item , domain = domain ))
193191
194192 return unlinked
195193
@@ -233,12 +231,12 @@ def _text(e):
233231 return '' .join (chunks )
234232
235233
236- def _make_item (e , url = "" ):
234+ def _make_item (e , domain = "" ):
237235 if not _is_itemscope (e ):
238236 raise Exception ("element is not an Item" )
239237 itemtype = _attr (e , "itemtype" )
240238 itemid = _attr (e , "itemid" )
241- return Item (itemtype , itemid , url = url )
239+ return Item (itemtype , itemid , domain = domain )
242240
243241
244242if __name__ == "__main__" :
0 commit comments