1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
import urllib.request
import urllib.parse
class Url:
def __init__(self, url):
self._url = url
def get_data(self):
fp = urllib.request.urlopen(self._url)
# Handle compressed page
if fp.info().get('Content-Encoding') == 'gzip':
bi = io.BytesIO(fp.read())
fp = gzip.GzipFile(fileobj=bi, mode='rb')
return fp.read()
def get_root(self):
'''Return root of link
e.g. http://test.com/python => http://test.com'''
res = urllib.parse.urlparse(self._url)
return res.scheme + '://' + res.netloc
|