Remove unused SubjectProcessor and stale engine test (#12315)

Shikhar-395 · RayBB · web-flow · commit 402c2676a35f · 2026-04-07T16:41:50.000-07:00
* Remove unused SubjectProcessor and stale engine test

* Update openlibrary/tests/core/test_lists_engine.py

---------

Co-authored-by: Raymond Berger &lt;RayBB@users.noreply.github.com&gt;
diff --git a/openlibrary/core/lists/engine.py b/openlibrary/core/lists/engine.py
@@ -1,6 +1,5 @@
 """Utility functions for processing lists."""
 
-import collections
 import re
 
 RE_SUBJECT = re.compile("[, _]+")
@@ -37,47 +36,3 @@ def get(work):
             yield s['key']
 
     return list(get(work))
-
-
-class SubjectProcessor:
-    """Processor to take a dict of subjects, places, people and times and build a list of ranked subjects."""
-
-    def __init__(self):
-        self.subjects = collections.defaultdict(list)
-
-    def add_subjects(self, subjects):
-        for s in subjects.get("subjects", []):
-            self._add_subject('subject:', s)
-
-        for s in subjects.get("people", []):
-            self._add_subject('person:', s)
-
-        for s in subjects.get("places", []):
-            self._add_subject('place:', s)
-
-        for s in subjects.get("times", []):
-            self._add_subject('time:', s)
-
-    def _add_subject(self, prefix, name):
-        if s := self._get_subject(prefix, name):
-            self.subjects[s['key']].append(s['name'])
-
-    def _get_subject(self, prefix, subject_name):
-        if isinstance(subject_name, str):
-            key = prefix + RE_SUBJECT.sub("_", subject_name.lower()).strip("_")
-            return {"key": key, "name": subject_name}
-
-    def _most_used(self, seq):
-        d = collections.defaultdict(lambda: 0)
-        for x in seq:
-            d[x] += 1
-
-        return sorted(d, key=lambda k: d[k], reverse=True)[0]
-
-    def top_subjects(self, limit=100):
-        subjects = [
-            {"key": key, "name": self._most_used(names), "count": len(names)}
-            for key, names in self.subjects.items()
-        ]
-        subjects.sort(key=lambda s: s['count'], reverse=True)
-        return subjects[:limit]
diff --git a/openlibrary/tests/core/test_lists_engine.py b/openlibrary/tests/core/test_lists_engine.py
@@ -1,18 +1,34 @@
 from openlibrary.core.lists import engine
 
 
-def test_reduce():
-    def test_reduce(self):
-        d1 = [1, 2, 1, "2010-11-11 10:20:30", {"subjects": ["Love", "Hate"]}]
+def test_get_seeds():
+    work = {
+        "key": "/works/OL1W",
+        "authors": [
+            {"author": {"key": "/authors/OL1A"}},
+            {"author": {"key": "/authors/OL2A"}},
+            {"not_author": {"key": "/authors/ignored"}},
+        ],
+        "editions": [
+            {"key": "/books/OL1M"},
+            {"key": "/books/OL2M"},
+        ],
+        "subjects": ["Love Story", "love_story", 123],
+        "subject_places": ["New York", None],
+        "subject_people": ["Jane Doe", {"name": "ignored"}],
+        "subject_times": ["2000-2099"],
+    }
 
-        d2 = [1, 1, 0, "2009-01-02 10:20:30", {"subjects": ["Love"]}]
-        assert engine.reduce([d1, d2]) == {
-            "works": 2,
-            "editions": 3,
-            "ebooks": 1,
-            "last_modified": "2010-11-11 10:20:30",
-            "subjects": [
-                {"name": "Love", "key": "subject:love", "count": 2},
-                {"name": "Hate", "key": "subject:hate", "count": 1},
-            ],
-        }
+    seeds = engine.get_seeds(work)
+
+    assert seeds == [
+        "/works/OL1W",
+        "/authors/OL1A",
+        "/authors/OL2A",
+        "/books/OL1M",
+        "/books/OL2M",
+        "subject:love_story",
+        "place:new_york",
+        "person:jane_doe",
+        "time:2000-2099",
+    ]