summaryrefslogtreecommitdiffstats
path: root/pym/portage/util/_urlopen.py
blob: bcd8f7c89e9652a657b222e7cfd9bf2257b9ecaa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Copyright 2012 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2

import io
import sys
from datetime import datetime
from time import mktime
from email.utils import formatdate, parsedate

try:
	from urllib.request import urlopen as _urlopen
	import urllib.parse as urllib_parse
	import urllib.request as urllib_request
	from urllib.parse import splituser as urllib_parse_splituser
except ImportError:
	from urllib import urlopen as _urlopen
	import urlparse as urllib_parse
	import urllib2 as urllib_request
	from urllib import splituser as urllib_parse_splituser

if sys.hexversion >= 0x3000000:
	long = int

# to account for the difference between TIMESTAMP of the index' contents
#  and the file-'mtime'
TIMESTAMP_TOLERANCE=5

def urlopen(url, if_modified_since=None):
	parse_result = urllib_parse.urlparse(url)
	try:
		if parse_result.scheme not in ("http", "https"):
			return _urlopen(url)
		request = urllib_request.Request(url)
		request.add_header('User-Agent', 'Gentoo Portage')
		if if_modified_since:
			request.add_header('If-Modified-Since', _timestamp_to_http(if_modified_since))
		opener = urllib_request.build_opener(CompressedResponseProcessor)
		hdl = opener.open(request)
		if hdl.headers.get('last-modified', ''):
			try:
				add_header = hdl.headers.add_header
			except AttributeError:
				# Python 2
				add_header = hdl.headers.addheader
			add_header('timestamp', _http_to_timestamp(hdl.headers.get('last-modified')))
		return hdl
	except SystemExit:
		raise
	except Exception as e:
		if hasattr(e, 'code') and e.code == 304: # HTTPError 304: not modified
			raise
		if sys.hexversion < 0x3000000:
			raise
		if parse_result.scheme not in ("http", "https") or \
			not parse_result.username:
			raise

	return _new_urlopen(url)

def _new_urlopen(url):
	# This is experimental code for bug #413983.
	parse_result = urllib_parse.urlparse(url)
	netloc = urllib_parse_splituser(parse_result.netloc)[1]
	url = urllib_parse.urlunparse((parse_result.scheme, netloc, parse_result.path, parse_result.params, parse_result.query, parse_result.fragment))
	password_manager = urllib_request.HTTPPasswordMgrWithDefaultRealm()
	if parse_result.username is not None:
		password_manager.add_password(None, url, parse_result.username, parse_result.password)
	auth_handler = urllib_request.HTTPBasicAuthHandler(password_manager)
	opener = urllib_request.build_opener(auth_handler)
	return opener.open(url)

def _timestamp_to_http(timestamp):
	dt = datetime.fromtimestamp(float(long(timestamp)+TIMESTAMP_TOLERANCE))
	stamp = mktime(dt.timetuple())
	return formatdate(timeval=stamp, localtime=False, usegmt=True)

def _http_to_timestamp(http_datetime_string):
	tuple = parsedate(http_datetime_string)
	timestamp = mktime(tuple)
	return str(long(timestamp))

class CompressedResponseProcessor(urllib_request.BaseHandler):
	# Handler for compressed responses.

	def http_request(self, req):
		req.add_header('Accept-Encoding', 'bzip2,gzip,deflate')
		return req
	https_request = http_request

	def http_response(self, req, response):
		decompressed = None
		if response.headers.get('content-encoding') == 'bzip2':
			import bz2
			decompressed = io.BytesIO(bz2.decompress(response.read()))
		elif response.headers.get('content-encoding') == 'gzip':
			from gzip import GzipFile
			decompressed = GzipFile(fileobj=io.BytesIO(response.read()), mode='r')
		elif response.headers.get('content-encoding') == 'deflate':
			import zlib
			try:
				decompressed = io.BytesIO(zlib.decompress(response.read()))
			except zlib.error: # they ignored RFC1950
				decompressed = io.BytesIO(zlib.decompress(response.read(), -zlib.MAX_WBITS))
		if decompressed:
			old_response = response
			response = urllib_request.addinfourl(decompressed, old_response.headers, old_response.url, old_response.code)
			response.msg = old_response.msg
		return response
	https_response = http_response