Skip to content

Commit 03d51be

Browse files
Return the resolved url
1 parent 576c923 commit 03d51be

2 files changed

Lines changed: 47 additions & 3 deletions

File tree

src/doi/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def validate_doi(doi):
4343
4444
:param doi: Doi identificator
4545
:type doi: str
46+
:returns: It returns the url assigned to the doi if everything went right
47+
:rtype: str
4648
4749
:raises ValueError: Whenever the doi is not valid
4850
"""
@@ -56,6 +58,10 @@ def validate_doi(doi):
5658

5759
try:
5860
result = json.loads(urllib.request.urlopen(request).read().decode())
61+
if 'values' in result:
62+
url = [v['data']['value']
63+
for v in result['values'] if v.get('type') == 'URL']
64+
return url[0] if url else None
5965
except HTTPError:
6066
raise ValueError('HTTP 404: DOI not found')
6167
except URLError as e:
@@ -117,3 +123,15 @@ def find_doi_in_text(text):
117123
except StopIteration:
118124
pass
119125
return None
126+
127+
128+
def get_real_url_from_doi(doi):
129+
url = validate_doi(doi)
130+
if not url:
131+
return url
132+
133+
m = re.match('.*linkinghub\.elsevier.*/pii/([A-Z0-9]+).*', url, re.I)
134+
if m:
135+
return ('https://www.sciencedirect.com/science/article/abs/pii/{pii}'
136+
.format(pii=m.group(1)))
137+
return url

tests/test_doi.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import os
44
from pkg_resources import parse_version
55

6-
from doi import validate_doi, find_doi_in_text, __version__, pdf_to_doi
6+
from doi import (
7+
validate_doi, find_doi_in_text, __version__, pdf_to_doi,
8+
get_real_url_from_doi
9+
)
710

811

912
def test_valid_version():
@@ -12,14 +15,37 @@ def test_valid_version():
1215

1316

1417
def test_validate_doi():
15-
doi = '10.1063/1.5081715'
16-
validate_doi(doi)
18+
data = [
19+
('10.1063/1.5081715',
20+
'http://aip.scitation.org/doi/10.1063/1.5081715'),
21+
('10.1007%2FBF01451751',
22+
'http://link.springer.com/10.1007/BF01451751'),
23+
('10.1103/PhysRevLett.49.57',
24+
'https://link.aps.org/doi/10.1103/PhysRevLett.49.57'),
25+
('10.1080/14786442408634457',
26+
'https://www.tandfonline.com/doi/full/10.1080/14786442408634457'),
27+
('10.1021/jp003647e', 'https://pubs.acs.org/doi/10.1021/jp003647e'),
28+
('10.1016/S0009-2614(97)04014-1',
29+
'http://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'),
30+
]
31+
for doi, url in data:
32+
assert(url == validate_doi(doi))
33+
1734
for doi in ['', 'asdf']:
1835
try:
1936
validate_doi(doi)
2037
except ValueError as e:
2138
assert(str(e) == 'HTTP 404: DOI not found')
2239

40+
def test_get_real_url_from_doi():
41+
data = [
42+
('10.1016/S0009-2614(97)04014-1',
43+
'https://www.sciencedirect.com/science/'
44+
'article/abs/pii/S0009261497040141'),
45+
]
46+
for doi, url in data:
47+
assert(url == get_real_url_from_doi(doi))
48+
2349

2450
def test_find_doi_in_line():
2551
test_data = [

0 commit comments

Comments
 (0)