33import logging
44from share import Harvester
55
6- logger = logging .getLogger ('com.peerj' )
6+ logger = logging .getLogger (__name__ )
77
88
99class PeerJXMLHarvester (Harvester ):
@@ -14,12 +14,16 @@ def do_harvest(self, start_date: pendulum.Pendulum, end_date: pendulum.Pendulum)
1414 while True :
1515 logger .debug ('Fetching page %s' , url )
1616 resp = self .requests .get (url )
17+ resp .raise_for_status ()
18+ resp_data = resp .json ()
1719
18- for record in resp . json () ['_items' ]:
20+ for record in resp_data ['_items' ]:
1921 if pendulum .parse (record ['date' ]) < start_date :
22+ logger .info ('%s is before %s, ending harvest' , record ['date' ], start_date )
2023 return
2124
2225 if pendulum .parse (record ['date' ]) > end_date :
26+ logger .debug ('%s is after %s, skipping' , record ['date' ], end_date )
2327 continue
2428
2529 logger .debug ('Fetching article %s' , record ['_links' ]['alternate' ]['xml' ]['href' ])
@@ -28,6 +32,7 @@ def do_harvest(self, start_date: pendulum.Pendulum, end_date: pendulum.Pendulum)
2832
2933 yield record ['@id' ], details .content
3034
31- if 'next' not in record ['_links' ]:
35+ if 'next' not in resp_data ['_links' ]:
36+ logger .info ('No "next" key found, ending harvest' )
3237 return
33- url = record ['_links' ]['next' ]['href' ]
38+ url = resp_data ['_links' ]['next' ]['href' ]
0 commit comments