summaryrefslogtreecommitdiffstats
path: root/pym/getbinpkg.py
diff options
context:
space:
mode:
authorMarius Mauch <genone@gentoo.org>2007-01-25 15:49:26 +0000
committerMarius Mauch <genone@gentoo.org>2007-01-25 15:49:26 +0000
commit3b08c21101b0801d7c5d6c145a27bef5cd42078c (patch)
tree2eea73b311d67b567410670630335796bf0a272c /pym/getbinpkg.py
parentb4eed9540e19ee7038ac875f0e084f8256675580 (diff)
downloadportage-3b08c21101b0801d7c5d6c145a27bef5cd42078c.tar.gz
portage-3b08c21101b0801d7c5d6c145a27bef5cd42078c.tar.bz2
portage-3b08c21101b0801d7c5d6c145a27bef5cd42078c.zip
Namespace sanitizing, step 1
svn path=/main/trunk/; revision=5778
Diffstat (limited to 'pym/getbinpkg.py')
l---------[-rw-r--r--]pym/getbinpkg.py573
1 files changed, 1 insertions, 572 deletions
diff --git a/pym/getbinpkg.py b/pym/getbinpkg.py
index 462da429d..89c090948 100644..120000
--- a/pym/getbinpkg.py
+++ b/pym/getbinpkg.py
@@ -1,572 +1 @@
-# getbinpkg.py -- Portage binary-package helper functions
-# Copyright 2003-2004 Gentoo Foundation
-# Distributed under the terms of the GNU General Public License v2
-# $Id$
-
-if not hasattr(__builtins__, "set"):
- from sets import Set as set
-
-from output import red, yellow, green
-import htmllib,HTMLParser,formatter,sys,os,xpak,time,tempfile,base64,urllib2
-
-try:
- import cPickle
-except ImportError:
- import pickle as cPickle
-
-try:
- import ftplib
-except SystemExit, e:
- raise
-except Exception, e:
- sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
-
-try:
- import httplib
-except SystemExit, e:
- raise
-except Exception, e:
- sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n")
-
-def make_metadata_dict(data):
- myid,myglob = data
-
- mydict = {}
- for x in xpak.getindex_mem(myid):
- mydict[x] = xpak.getitem(data,x)
-
- return mydict
-
-class ParseLinks(HTMLParser.HTMLParser):
- """Parser class that overrides HTMLParser to grab all anchors from an html
- page and provide suffix and prefix limitors"""
- def __init__(self):
- self.PL_anchors = []
- HTMLParser.HTMLParser.__init__(self)
-
- def get_anchors(self):
- return self.PL_anchors
-
- def get_anchors_by_prefix(self,prefix):
- newlist = []
- for x in self.PL_anchors:
- if x.startswith(prefix):
- if x not in newlist:
- newlist.append(x[:])
- return newlist
-
- def get_anchors_by_suffix(self,suffix):
- newlist = []
- for x in self.PL_anchors:
- if x.endswith(suffix):
- if x not in newlist:
- newlist.append(x[:])
- return newlist
-
- def handle_endtag(self,tag):
- pass
-
- def handle_starttag(self,tag,attrs):
- if tag == "a":
- for x in attrs:
- if x[0] == 'href':
- if x[1] not in self.PL_anchors:
- self.PL_anchors.append(urllib2.unquote(x[1]))
-
-
-def create_conn(baseurl,conn=None):
- """(baseurl,conn) --- Takes a protocol://site:port/address url, and an
- optional connection. If connection is already active, it is passed on.
- baseurl is reduced to address and is returned in tuple (conn,address)"""
- parts = baseurl.split("://",1)
- if len(parts) != 2:
- raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl
- protocol,url_parts = parts
- del parts
- host,address = url_parts.split("/",1)
- del url_parts
- address = "/"+address
-
- userpass_host = host.split("@",1)
- if len(userpass_host) == 1:
- host = userpass_host[0]
- userpass = ["anonymous"]
- else:
- host = userpass_host[1]
- userpass = userpass_host[0].split(":")
- del userpass_host
-
- if len(userpass) > 2:
- raise ValueError, "Unable to interpret username/password provided."
- elif len(userpass) == 2:
- username = userpass[0]
- password = userpass[1]
- elif len(userpass) == 1:
- username = userpass[0]
- password = None
- del userpass
-
- http_headers = {}
- http_params = {}
- if username and password:
- http_headers = {
- "Authorization": "Basic %s" %
- base64.encodestring("%s:%s" % (username, password)).replace(
- "\012",
- ""
- ),
- }
-
- if not conn:
- if protocol == "https":
- conn = httplib.HTTPSConnection(host)
- elif protocol == "http":
- conn = httplib.HTTPConnection(host)
- elif protocol == "ftp":
- passive = 1
- if(host[-1] == "*"):
- passive = 0
- host = host[:-1]
- conn = ftplib.FTP(host)
- if password:
- conn.login(username,password)
- else:
- sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n")
- conn.login(username)
- conn.set_pasv(passive)
- conn.set_debuglevel(0)
- else:
- raise NotImplementedError, "%s is not a supported protocol." % protocol
-
- return (conn,protocol,address, http_params, http_headers)
-
-def make_ftp_request(conn, address, rest=None, dest=None):
- """(conn,address,rest) --- uses the conn object to request the data
- from address and issuing a rest if it is passed."""
- try:
-
- if dest:
- fstart_pos = dest.tell()
-
- conn.voidcmd("TYPE I")
- fsize = conn.size(address)
-
- if (rest != None) and (rest < 0):
- rest = fsize+int(rest)
- if rest < 0:
- rest = 0
-
- if rest != None:
- mysocket = conn.transfercmd("RETR "+str(address), rest)
- else:
- mysocket = conn.transfercmd("RETR "+str(address))
-
- mydata = ""
- while 1:
- somedata = mysocket.recv(8192)
- if somedata:
- if dest:
- dest.write(somedata)
- else:
- mydata = mydata + somedata
- else:
- break
-
- if dest:
- data_size = fstart_pos - dest.tell()
- else:
- data_size = len(mydata)
-
- mysocket.close()
- conn.voidresp()
- conn.voidcmd("TYPE A")
-
- return mydata,not (fsize==data_size),""
-
- except ValueError, e:
- return None,int(str(e)[:4]),str(e)
-
-
-def make_http_request(conn, address, params={}, headers={}, dest=None):
- """(conn,address,params,headers) --- uses the conn object to request
- the data from address, performing Location forwarding and using the
- optional params and headers."""
-
- rc = 0
- response = None
- while (rc == 0) or (rc == 301) or (rc == 302):
- try:
- if (rc != 0):
- conn,ignore,ignore,ignore,ignore = create_conn(address)
- conn.request("GET", address, params, headers)
- except SystemExit, e:
- raise
- except Exception, e:
- return None,None,"Server request failed: "+str(e)
- response = conn.getresponse()
- rc = response.status
-
- # 301 means that the page address is wrong.
- if ((rc == 301) or (rc == 302)):
- ignored_data = response.read()
- del ignored_data
- for x in str(response.msg).split("\n"):
- parts = x.split(": ",1)
- if parts[0] == "Location":
- if (rc == 301):
- sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n")
- if (rc == 302):
- sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n")
- address = parts[1]
- break
-
- if (rc != 200) and (rc != 206):
- sys.stderr.write(str(response.msg)+"\n")
- sys.stderr.write(response.read()+"\n")
- sys.stderr.write("address: "+address+"\n")
- return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
-
- if dest:
- dest.write(response.read())
- return "",0,""
-
- return response.read(),0,""
-
-
-def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
- myarray = []
-
- if not (prefix and suffix):
- match_both = 0
-
- for x in array:
- add_p = 0
- if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
- add_p = 1
-
- if match_both:
- if prefix and not add_p: # Require both, but don't have first one.
- continue
- else:
- if add_p: # Only need one, and we have it.
- myarray.append(x[:])
- continue
-
- if not allow_overlap: # Not allow to overlap prefix and suffix
- if len(x) >= (len(prefix)+len(suffix)):
- y = x[len(prefix):]
- else:
- continue # Too short to match.
- else:
- y = x # Do whatever... We're overlapping.
-
- if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
- myarray.append(x) # It matches
- else:
- continue # Doesn't match.
-
- return myarray
-
-
-
-def dir_get_list(baseurl,conn=None):
- """(baseurl[,connection]) -- Takes a base url to connect to and read from.
- URL should be in the for <proto>://<site>[:port]<path>
- Connection is used for persistent connection instances."""
-
- if not conn:
- keepconnection = 0
- else:
- keepconnection = 1
-
- conn,protocol,address,params,headers = create_conn(baseurl, conn)
-
- listing = None
- if protocol in ["http","https"]:
- page,rc,msg = make_http_request(conn,address,params,headers)
-
- if page:
- parser = ParseLinks()
- parser.feed(page)
- del page
- listing = parser.get_anchors()
- else:
- raise Exception, "Unable to get listing: %s %s" % (rc,msg)
- elif protocol in ["ftp"]:
- if address[-1] == '/':
- olddir = conn.pwd()
- conn.cwd(address)
- listing = conn.nlst()
- conn.cwd(olddir)
- del olddir
- else:
- listing = conn.nlst(address)
- else:
- raise TypeError, "Unknown protocol. '%s'" % protocol
-
- if not keepconnection:
- conn.close()
-
- return listing
-
-def file_get_metadata(baseurl,conn=None, chunk_size=3000):
- """(baseurl[,connection]) -- Takes a base url to connect to and read from.
- URL should be in the for <proto>://<site>[:port]<path>
- Connection is used for persistent connection instances."""
-
- if not conn:
- keepconnection = 0
- else:
- keepconnection = 1
-
- conn,protocol,address,params,headers = create_conn(baseurl, conn)
-
- if protocol in ["http","https"]:
- headers["Range"] = "bytes=-"+str(chunk_size)
- data,rc,msg = make_http_request(conn, address, params, headers)
- elif protocol in ["ftp"]:
- data,rc,msg = make_ftp_request(conn, address, -chunk_size)
- else:
- raise TypeError, "Unknown protocol. '%s'" % protocol
-
- if data:
- xpaksize = xpak.decodeint(data[-8:-4])
- if (xpaksize+8) > chunk_size:
- myid = file_get_metadata(baseurl, conn, (xpaksize+8))
- if not keepconnection:
- conn.close()
- return myid
- else:
- xpak_data = data[len(data)-(xpaksize+8):-8]
- del data
-
- myid = xpak.xsplit_mem(xpak_data)
- if not myid:
- myid = None,None
- del xpak_data
- else:
- myid = None,None
-
- if not keepconnection:
- conn.close()
-
- return myid
-
-
-def file_get(baseurl,dest,conn=None,fcmd=None):
- """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
- URL should be in the for <proto>://[user[:pass]@]<site>[:port]<path>"""
-
- if not fcmd:
- return file_get_lib(baseurl,dest,conn)
-
- fcmd = fcmd.replace("${DISTDIR}",dest)
- fcmd = fcmd.replace("${URI}", baseurl)
- fcmd = fcmd.replace("${FILE}", os.path.basename(baseurl))
- mysplit = fcmd.split()
- mycmd = mysplit[0]
- myargs = [os.path.basename(mycmd)]+mysplit[1:]
- mypid=os.fork()
- if mypid == 0:
- try:
- os.execv(mycmd,myargs)
- except OSError:
- pass
- sys.stderr.write("!!! Failed to spawn fetcher.\n")
- sys.stderr.flush()
- os._exit(1)
- retval=os.waitpid(mypid,0)[1]
- if (retval & 0xff) == 0:
- retval = retval >> 8
- else:
- sys.stderr.write("Spawned processes caught a signal.\n")
- sys.exit(1)
- if retval != 0:
- sys.stderr.write("Fetcher exited with a failure condition.\n")
- return 0
- return 1
-
-def file_get_lib(baseurl,dest,conn=None):
- """(baseurl[,connection]) -- Takes a base url to connect to and read from.
- URL should be in the for <proto>://<site>[:port]<path>
- Connection is used for persistent connection instances."""
-
- if not conn:
- keepconnection = 0
- else:
- keepconnection = 1
-
- conn,protocol,address,params,headers = create_conn(baseurl, conn)
-
- sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
- if protocol in ["http","https"]:
- data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
- elif protocol in ["ftp"]:
- data,rc,msg = make_ftp_request(conn, address, dest=dest)
- else:
- raise TypeError, "Unknown protocol. '%s'" % protocol
-
- if not keepconnection:
- conn.close()
-
- return rc
-
-
-def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
- """(baseurl,conn,chunk_size,verbose) --
- """
- if not conn:
- keepconnection = 0
- else:
- keepconnection = 1
-
- if makepickle is None:
- makepickle = "/var/cache/edb/metadata.idx.most_recent"
-
- conn,protocol,address,params,headers = create_conn(baseurl, conn)
-
- filedict = {}
-
- try:
- metadatafile = open("/var/cache/edb/remote_metadata.pickle")
- metadata = cPickle.load(metadatafile)
- sys.stderr.write("Loaded metadata pickle.\n")
- metadatafile.close()
- except (cPickle.UnpicklingError, OSError, IOError, EOFError):
- metadata = {}
- if not metadata.has_key(baseurl):
- metadata[baseurl]={}
- if not metadata[baseurl].has_key("indexname"):
- metadata[baseurl]["indexname"]=""
- if not metadata[baseurl].has_key("timestamp"):
- metadata[baseurl]["timestamp"]=0
- if not metadata[baseurl].has_key("unmodified"):
- metadata[baseurl]["unmodified"]=0
- if not metadata[baseurl].has_key("data"):
- metadata[baseurl]["data"]={}
-
- filelist = dir_get_list(baseurl, conn)
- tbz2list = match_in_array(filelist, suffix=".tbz2")
- metalist = match_in_array(filelist, prefix="metadata.idx")
- del filelist
-
- # Determine if our metadata file is current.
- metalist.sort()
- metalist.reverse() # makes the order new-to-old.
- havecache=0
- for mfile in metalist:
- if usingcache and \
- ((metadata[baseurl]["indexname"] != mfile) or \
- (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
- # Try to download new cache until we succeed on one.
- data=""
- for trynum in [1,2,3]:
- mytempfile = tempfile.TemporaryFile()
- try:
- file_get(baseurl+"/"+mfile, mytempfile, conn)
- if mytempfile.tell() > len(data):
- mytempfile.seek(0)
- data = mytempfile.read()
- except ValueError, e:
- sys.stderr.write("--- "+str(e)+"\n")
- if trynum < 3:
- sys.stderr.write("Retrying...\n")
- mytempfile.close()
- continue
- if match_in_array([mfile],suffix=".gz"):
- sys.stderr.write("gzip'd\n")
- try:
- import gzip
- mytempfile.seek(0)
- gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
- data = gzindex.read()
- except SystemExit, e:
- raise
- except Exception, e:
- mytempfile.close()
- sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n")
- mytempfile.close()
- try:
- metadata[baseurl]["data"] = cPickle.loads(data)
- del data
- metadata[baseurl]["indexname"] = mfile
- metadata[baseurl]["timestamp"] = int(time.time())
- metadata[baseurl]["modified"] = 0 # It's not, right after download.
- sys.stderr.write("Pickle loaded.\n")
- break
- except SystemExit, e:
- raise
- except Exception, e:
- sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n")
- sys.stderr.write("!!! "+str(e)+"\n")
- try:
- metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
- cPickle.dump(metadata,metadatafile)
- metadatafile.close()
- except SystemExit, e:
- raise
- except Exception, e:
- sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
- sys.stderr.write("!!! "+str(e)+"\n")
- break
- # We may have metadata... now we run through the tbz2 list and check.
- sys.stderr.write(yellow("cache miss: 'x'")+" --- "+green("cache hit: 'o'")+"\n")
- binpkg_filenames = set()
- for x in tbz2list:
- x = os.path.basename(x)
- binpkg_filenames.add(x)
- if ((not metadata[baseurl]["data"].has_key(x)) or \
- (x not in metadata[baseurl]["data"].keys())):
- sys.stderr.write(yellow("x"))
- metadata[baseurl]["modified"] = 1
- myid = None
- for retry in xrange(3):
- try:
- myid = file_get_metadata(
- "/".join((baseurl.rstrip("/"), x.lstrip("/"))),
- conn, chunk_size)
- break
- except httplib.BadStatusLine:
- # Sometimes this error is thrown from conn.getresponse() in
- # make_http_request(). The docstring for this error in
- # httplib.py says "Presumably, the server closed the
- # connection before sending a valid response".
- conn, protocol, address, params, headers = create_conn(
- baseurl)
-
- if myid and myid[0]:
- metadata[baseurl]["data"][x] = make_metadata_dict(myid)
- elif verbose:
- sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n")
- else:
- sys.stderr.write(green("o"))
- # Cleanse stale cache for files that don't exist on the server anymore.
- stale_cache = set(metadata[baseurl]["data"]).difference(binpkg_filenames)
- if stale_cache:
- for x in stale_cache:
- del metadata[baseurl]["data"][x]
- metadata[baseurl]["modified"] = 1
- del stale_cache
- del binpkg_filenames
- sys.stderr.write("\n")
-
- try:
- if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]:
- metadata[baseurl]["timestamp"] = int(time.time())
- metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
- cPickle.dump(metadata,metadatafile)
- metadatafile.close()
- if makepickle:
- metadatafile = open(makepickle, "w")
- cPickle.dump(metadata[baseurl]["data"],metadatafile)
- metadatafile.close()
- except SystemExit, e:
- raise
- except Exception, e:
- sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
- sys.stderr.write("!!! "+str(e)+"\n")
-
- if not keepconnection:
- conn.close()
-
- return metadata[baseurl]["data"]
+portage/getbinpkg.py \ No newline at end of file