Skip to content

Commit e03ef53

Browse files
committed
Use urlparse in get_domain.
1 parent 744e466 commit e03ef53

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

microdata.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import html5lib
55

66
from collections import defaultdict
7+
from urlparse import urlparse
78

89

910
try:
@@ -137,8 +138,9 @@ def get_domain(url_string):
137138
"""
138139
Get the domain _including_ the protocol specified, if any.
139140
"""
140-
if "://" in url_string:
141-
return "/".join(url_string.split("/")[0:3])
141+
parsed = urlparse(url_string)
142+
if parsed.scheme:
143+
return "/".join(parsed.scheme, "", parsed.netloc)
142144
else:
143145
return url_string.split("/")[0]
144146

0 commit comments

Comments
 (0)