Skip to content

Commit 402c267

Browse files
Shikhar-395RayBB
andauthored
Remove unused SubjectProcessor and stale engine test (#12315)
* Remove unused SubjectProcessor and stale engine test * Update openlibrary/tests/core/test_lists_engine.py --------- Co-authored-by: Raymond Berger <RayBB@users.noreply.github.com>
1 parent 7200c38 commit 402c267

2 files changed

Lines changed: 30 additions & 59 deletions

File tree

openlibrary/core/lists/engine.py

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Utility functions for processing lists."""
22

3-
import collections
43
import re
54

65
RE_SUBJECT = re.compile("[, _]+")
@@ -37,47 +36,3 @@ def get(work):
3736
yield s['key']
3837

3938
return list(get(work))
40-
41-
42-
class SubjectProcessor:
43-
"""Processor to take a dict of subjects, places, people and times and build a list of ranked subjects."""
44-
45-
def __init__(self):
46-
self.subjects = collections.defaultdict(list)
47-
48-
def add_subjects(self, subjects):
49-
for s in subjects.get("subjects", []):
50-
self._add_subject('subject:', s)
51-
52-
for s in subjects.get("people", []):
53-
self._add_subject('person:', s)
54-
55-
for s in subjects.get("places", []):
56-
self._add_subject('place:', s)
57-
58-
for s in subjects.get("times", []):
59-
self._add_subject('time:', s)
60-
61-
def _add_subject(self, prefix, name):
62-
if s := self._get_subject(prefix, name):
63-
self.subjects[s['key']].append(s['name'])
64-
65-
def _get_subject(self, prefix, subject_name):
66-
if isinstance(subject_name, str):
67-
key = prefix + RE_SUBJECT.sub("_", subject_name.lower()).strip("_")
68-
return {"key": key, "name": subject_name}
69-
70-
def _most_used(self, seq):
71-
d = collections.defaultdict(lambda: 0)
72-
for x in seq:
73-
d[x] += 1
74-
75-
return sorted(d, key=lambda k: d[k], reverse=True)[0]
76-
77-
def top_subjects(self, limit=100):
78-
subjects = [
79-
{"key": key, "name": self._most_used(names), "count": len(names)}
80-
for key, names in self.subjects.items()
81-
]
82-
subjects.sort(key=lambda s: s['count'], reverse=True)
83-
return subjects[:limit]
Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,34 @@
11
from openlibrary.core.lists import engine
22

33

4-
def test_reduce():
5-
def test_reduce(self):
6-
d1 = [1, 2, 1, "2010-11-11 10:20:30", {"subjects": ["Love", "Hate"]}]
4+
def test_get_seeds():
5+
work = {
6+
"key": "/works/OL1W",
7+
"authors": [
8+
{"author": {"key": "/authors/OL1A"}},
9+
{"author": {"key": "/authors/OL2A"}},
10+
{"not_author": {"key": "/authors/ignored"}},
11+
],
12+
"editions": [
13+
{"key": "/books/OL1M"},
14+
{"key": "/books/OL2M"},
15+
],
16+
"subjects": ["Love Story", "love_story", 123],
17+
"subject_places": ["New York", None],
18+
"subject_people": ["Jane Doe", {"name": "ignored"}],
19+
"subject_times": ["2000-2099"],
20+
}
721

8-
d2 = [1, 1, 0, "2009-01-02 10:20:30", {"subjects": ["Love"]}]
9-
assert engine.reduce([d1, d2]) == {
10-
"works": 2,
11-
"editions": 3,
12-
"ebooks": 1,
13-
"last_modified": "2010-11-11 10:20:30",
14-
"subjects": [
15-
{"name": "Love", "key": "subject:love", "count": 2},
16-
{"name": "Hate", "key": "subject:hate", "count": 1},
17-
],
18-
}
22+
seeds = engine.get_seeds(work)
23+
24+
assert seeds == [
25+
"/works/OL1W",
26+
"/authors/OL1A",
27+
"/authors/OL2A",
28+
"/books/OL1M",
29+
"/books/OL2M",
30+
"subject:love_story",
31+
"place:new_york",
32+
"person:jane_doe",
33+
"time:2000-2099",
34+
]

0 commit comments

Comments
 (0)