Skip to content

Commit d457566

Browse files
committed
Adapt tests.
1 parent b1a7342 commit d457566

2 files changed

Lines changed: 21 additions & 23 deletions

File tree

microdata.py

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,15 @@ class Item(object):
3535
or another Item.
3636
"""
3737

38-
def __init__(self, itemtype=None, itemid=None, url=""):
38+
def __init__(self, itemtype=None, itemid=None, domain=""):
3939
"""Create an Item, with an optional itemptype and/or itemid.
4040
"""
4141
# itemtype can be a space delimited list
4242
if itemtype:
43-
self.itemtype = [URI(i, domain=url) for i in itemtype.split(" ")]
43+
self.itemtype = [URI(i, domain=domain) for i in itemtype.split(" ")]
4444

4545
if itemid:
46-
self.itemid = URI(itemid, domain=url)
46+
self.itemid = URI(itemid, domain=domain)
4747

4848
self.props = {}
4949

@@ -110,14 +110,12 @@ def json_dict(self):
110110

111111
class URI(object):
112112

113-
def __init__(self, string, domain):
113+
def __init__(self, string, domain=""):
114114
if string.startswith("http://") or string.startswith("https://"):
115115
self.string = string
116116
else:
117117
self.string = "http://" + domain + string
118118

119-
print "URI created with string", self.string
120-
121119
def __eq__(self, other):
122120
if isinstance(other, URI):
123121
return self.string == other.string
@@ -151,23 +149,23 @@ def get_domain(url_string):
151149
}
152150

153151

154-
def _find_items(e, url=""):
152+
def _find_items(e, domain=""):
155153
items = []
156154
unlinked = []
157155
if _is_element(e) and _is_itemscope(e):
158-
item = _make_item(e, url=url)
159-
unlinked = _extract(e, item, url=url)
156+
item = _make_item(e, domain=domain)
157+
unlinked = _extract(e, item, domain=domain)
160158
items.append(item)
161159
for unlinked_element in unlinked:
162-
items.extend(_find_items(unlinked_element, url=url))
160+
items.extend(_find_items(unlinked_element, domain=domain))
163161
else:
164162
for child in e.childNodes:
165-
items.extend(_find_items(child, url=url))
163+
items.extend(_find_items(child, domain=domain))
166164

167165
return items
168166

169167

170-
def _extract(e, item, url=""):
168+
def _extract(e, item, domain=""):
171169
# looks in a DOM element for microdata to assign to an Item
172170
# _extract returns a list of elements which appeared to have microdata
173171
# but which were not directly related to the Item that was passed in
@@ -177,19 +175,19 @@ def _extract(e, item, url=""):
177175
itemprop = _attr(child, "itemprop")
178176
itemscope = _is_itemscope(child)
179177
if itemprop and itemscope:
180-
nested_item = _make_item(child, url=url)
181-
unlinked.extend(_extract(child, nested_item, url=url))
178+
nested_item = _make_item(child, domain=domain)
179+
unlinked.extend(_extract(child, nested_item, domain=domain))
182180
item.set(itemprop, nested_item)
183181
elif itemprop:
184-
value = _property_value(child, domain=url)
182+
value = _property_value(child, domain=domain)
185183
# itemprops may also be in a space delimited list
186184
for i in itemprop.split(" "):
187185
item.set(i, value)
188-
unlinked.extend(_extract(child, item, url=url))
186+
unlinked.extend(_extract(child, item, domain=domain))
189187
elif itemscope:
190188
unlinked.append(child)
191189
else:
192-
unlinked.extend(_extract(child, item, url=url))
190+
unlinked.extend(_extract(child, item, domain=domain))
193191

194192
return unlinked
195193

@@ -233,12 +231,12 @@ def _text(e):
233231
return ''.join(chunks)
234232

235233

236-
def _make_item(e, url=""):
234+
def _make_item(e, domain=""):
237235
if not _is_itemscope(e):
238236
raise Exception("element is not an Item")
239237
itemtype = _attr(e, "itemtype")
240238
itemid = _attr(e, "itemid")
241-
return Item(itemtype, itemid, url=url)
239+
return Item(itemtype, itemid, domain=domain)
242240

243241

244242
if __name__ == "__main__":

test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class MicrodataParserTest(unittest.TestCase):
1212
def test_parse(self):
1313

1414
# parse the html for microdata
15-
items = get_items(open("test-data/example.html"))
15+
items = get_items("test-data/example.html")
1616

1717
# this html should have just one main item
1818
self.assertTrue(len(items), 1)
@@ -55,7 +55,7 @@ def test_parse(self):
5555
def test_parse_nested(self):
5656

5757
# parse the html for microdata
58-
items = get_items(open("test-data/example-nested.html"))
58+
items = get_items("test-data/example-nested.html")
5959

6060
# this html should have just one main item
6161
self.assertTrue(len(items), 1)
@@ -89,7 +89,7 @@ def test_parse_nested(self):
8989
self.assertEqual(i["properties"]["location"][0]["properties"]["address"][0]["properties"]["addressLocality"][0], "Philadelphia")
9090

9191
def test_parse_unlinked(self):
92-
items = get_items(open("test-data/unlinked.html"))
92+
items = get_items("test-data/unlinked.html")
9393
self.assertEqual(len(items), 2)
9494

9595
i = items[0]
@@ -108,7 +108,7 @@ def test_parse_unlinked(self):
108108
self.assertTrue('Whitworth' in i.streetAddress)
109109

110110
def test_skip_level(self):
111-
items = get_items(open("test-data/skip-level.html"))
111+
items = get_items("test-data/skip-level.html")
112112
self.assertEqual(len(items), 1)
113113
self.assertEqual(items[0].name, "Jane Doe")
114114

0 commit comments

Comments
 (0)