From d9fc4acc572c6647a4f27b838d35d27d805d190e Mon Sep 17 00:00:00 2001 From: Jason Stubbs Date: Sun, 28 Aug 2005 08:37:44 +0000 Subject: Migration (without history) of the current stable line to subversion. svn path=/main/branches/2.0/; revision=1941 --- pym/getbinpkg.py | 541 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 541 insertions(+) create mode 100644 pym/getbinpkg.py (limited to 'pym/getbinpkg.py') diff --git a/pym/getbinpkg.py b/pym/getbinpkg.py new file mode 100644 index 000000000..7145d3adb --- /dev/null +++ b/pym/getbinpkg.py @@ -0,0 +1,541 @@ +# getbinpkg.py -- Portage binary-package helper functions +# Copyright 2003-2004 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Header: /var/cvsroot/gentoo-src/portage/pym/getbinpkg.py,v 1.12.2.3 2005/01/16 02:35:33 carpaski Exp $ +cvs_id_string="$Id: getbinpkg.py,v 1.12.2.3 2005/01/16 02:35:33 carpaski Exp $"[5:-2] + +from output import * +import htmllib,HTMLParser,string,formatter,sys,os,xpak,time,tempfile,cPickle,base64 + +try: + import ftplib +except SystemExit, e: + raise +except Exception, e: + sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n") + +try: + import httplib +except SystemExit, e: + raise +except Exception, e: + sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n") + +def make_metadata_dict(data): + myid,myglob = data + + mydict = {} + for x in xpak.getindex_mem(myid): + mydict[x] = xpak.getitem(data,x) + + return mydict + +class ParseLinks(HTMLParser.HTMLParser): + """Parser class that overrides HTMLParser to grab all anchors from an html + page and provide suffix and prefix limitors""" + def __init__(self): + self.PL_anchors = [] + HTMLParser.HTMLParser.__init__(self) + + def get_anchors(self): + return self.PL_anchors + + def get_anchors_by_prefix(self,prefix): + newlist = [] + for x in self.PL_anchors: + if (len(x) >= len(prefix)) and (x[:len(suffix)] == prefix): + if x not in newlist: + newlist.append(x[:]) + return newlist + + def get_anchors_by_suffix(self,suffix): + newlist = [] + for x in self.PL_anchors: + if (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix): + if x not in newlist: + newlist.append(x[:]) + return newlist + + def handle_endtag(self,tag): + pass + + def handle_starttag(self,tag,attrs): + if tag == "a": + for x in attrs: + if x[0] == 'href': + if x[1] not in self.PL_anchors: + self.PL_anchors.append(x[1]) + + +def create_conn(baseurl,conn=None): + """(baseurl,conn) --- Takes a protocol://site:port/address url, and an + optional connection. If connection is already active, it is passed on. + baseurl is reduced to address and is returned in tuple (conn,address)""" + parts = string.split(baseurl, "://", 1) + if len(parts) != 2: + raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl + protocol,url_parts = parts + del parts + host,address = string.split(url_parts, "/", 1) + del url_parts + address = "/"+address + + userpass_host = string.split(host, "@", 1) + if len(userpass_host) == 1: + host = userpass_host[0] + userpass = ["anonymous"] + else: + host = userpass_host[1] + userpass = string.split(userpass_host[0], ":") + del userpass_host + + if len(userpass) > 2: + raise ValueError, "Unable to interpret username/password provided." + elif len(userpass) == 2: + username = userpass[0] + password = userpass[1] + elif len(userpass) == 1: + username = userpass[0] + password = None + del userpass + + http_headers = {} + http_params = {} + if username and password: + http_headers = { + "Authorization": "Basic %s" % + string.replace( + base64.encodestring("%s:%s" % (username, password)), + "\012", + "" + ), + } + + if not conn: + if protocol == "https": + conn = httplib.HTTPSConnection(host) + elif protocol == "http": + conn = httplib.HTTPConnection(host) + elif protocol == "ftp": + passive = 1 + if(host[-1] == "*"): + passive = 0 + host = host[:-1] + conn = ftplib.FTP(host) + if password: + conn.login(username,password) + else: + sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n") + conn.login(username) + conn.set_pasv(passive) + conn.set_debuglevel(0) + else: + raise NotImplementedError, "%s is not a supported protocol." % protocol + + return (conn,protocol,address, http_params, http_headers) + +def make_ftp_request(conn, address, rest=None, dest=None): + """(conn,address,rest) --- uses the conn object to request the data + from address and issuing a rest if it is passed.""" + try: + + if dest: + fstart_pos = dest.tell() + + conn.voidcmd("TYPE I") + fsize = conn.size(address) + + if (rest != None) and (rest < 0): + rest = fsize+int(rest) + if rest < 0: + rest = 0 + + if rest != None: + mysocket = conn.transfercmd("RETR "+str(address), rest) + else: + mysocket = conn.transfercmd("RETR "+str(address)) + + mydata = "" + while 1: + somedata = mysocket.recv(8192) + if somedata: + if dest: + dest.write(somedata) + else: + mydata = mydata + somedata + else: + break + + if dest: + data_size = fstart_pos - dest.tell() + else: + data_size = len(mydata) + + mysocket.close() + conn.voidresp() + conn.voidcmd("TYPE A") + + return mydata,not (fsize==data_size),"" + + except ValueError, e: + return None,int(str(e)[:4]),str(e) + + +def make_http_request(conn, address, params={}, headers={}, dest=None): + """(conn,address,params,headers) --- uses the conn object to request + the data from address, performing Location forwarding and using the + optional params and headers.""" + + rc = 0 + response = None + while (rc == 0) or (rc == 301) or (rc == 302): + try: + if (rc != 0): + conn,ignore,ignore,ignore,ignore = create_conn(address) + conn.request("GET", address, params, headers) + except SystemExit, e: + raise + except Exception, e: + return None,None,"Server request failed: "+str(e) + response = conn.getresponse() + rc = response.status + + # 301 means that the page address is wrong. + if ((rc == 301) or (rc == 302)): + ignored_data = response.read() + del ignored_data + for x in string.split(str(response.msg), "\n"): + parts = string.split(x, ": ", 1) + if parts[0] == "Location": + if (rc == 301): + sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n") + if (rc == 302): + sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n") + address = parts[1] + break + + if (rc != 200) and (rc != 206): + sys.stderr.write(str(response.msg)+"\n") + sys.stderr.write(response.read()+"\n") + sys.stderr.write("address: "+address+"\n") + return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")" + + if dest: + dest.write(response.read()) + return "",0,"" + + return response.read(),0,"" + + +def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0): + myarray = [] + + if not (prefix and suffix): + match_both = 0 + + for x in array: + add_p = 0 + if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix): + add_p = 1 + + if match_both: + if prefix and not add_p: # Require both, but don't have first one. + continue + else: + if add_p: # Only need one, and we have it. + myarray.append(x[:]) + continue + + if not allow_overlap: # Not allow to overlap prefix and suffix + if len(x) >= (len(prefix)+len(suffix)): + y = x[len(prefix):] + else: + continue # Too short to match. + else: + y = x # Do whatever... We're overlapping. + + if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix): + myarray.append(x) # It matches + else: + continue # Doesn't match. + + return myarray + + + +def dir_get_list(baseurl,conn=None): + """(baseurl[,connection]) -- Takes a base url to connect to and read from. + URL should be in the for ://[:port] + Connection is used for persistent connection instances.""" + + if not conn: + keepconnection = 0 + else: + keepconnection = 1 + + conn,protocol,address,params,headers = create_conn(baseurl, conn) + + listing = None + if protocol in ["http","https"]: + page,rc,msg = make_http_request(conn,address,params,headers) + + if page: + parser = ParseLinks() + parser.feed(page) + del page + listing = parser.get_anchors() + else: + raise Exception, "Unable to get listing: %s %s" % (rc,msg) + elif protocol in ["ftp"]: + if address[-1] == '/': + olddir = conn.pwd() + conn.cwd(address) + listing = conn.nlst() + conn.cwd(olddir) + del olddir + else: + listing = conn.nlst(address) + else: + raise TypeError, "Unknown protocol. '%s'" % protocol + + if not keepconnection: + conn.close() + + return listing + +def file_get_metadata(baseurl,conn=None, chunk_size=3000): + """(baseurl[,connection]) -- Takes a base url to connect to and read from. + URL should be in the for ://[:port] + Connection is used for persistent connection instances.""" + + if not conn: + keepconnection = 0 + else: + keepconnection = 1 + + conn,protocol,address,params,headers = create_conn(baseurl, conn) + + if protocol in ["http","https"]: + headers["Range"] = "bytes=-"+str(chunk_size) + data,rc,msg = make_http_request(conn, address, params, headers) + elif protocol in ["ftp"]: + data,rc,msg = make_ftp_request(conn, address, -chunk_size) + else: + raise TypeError, "Unknown protocol. '%s'" % protocol + + if data: + xpaksize = xpak.decodeint(data[-8:-4]) + if (xpaksize+8) > chunk_size: + myid = file_get_metadata(baseurl, conn, (xpaksize+8)) + if not keepconnection: + conn.close() + return myid + else: + xpak_data = data[len(data)-(xpaksize+8):-8] + del data + + myid = xpak.xsplit_mem(xpak_data) + if not myid: + myid = None,None + del xpak_data + else: + myid = None,None + + if not keepconnection: + conn.close() + + return myid + + +def file_get(baseurl,dest,conn=None,fcmd=None): + """(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from. + URL should be in the for ://[user[:pass]@][:port]""" + + if not fcmd: + return file_get_lib(baseurl,dest,conn) + + fcmd = string.replace(fcmd, "${DISTDIR}", dest) + fcmd = string.replace(fcmd, "${URI}", baseurl) + fcmd = string.replace(fcmd, "${FILE}", os.path.basename(baseurl)) + mysplit = string.split(fcmd) + mycmd = mysplit[0] + myargs = [os.path.basename(mycmd)]+mysplit[1:] + mypid=os.fork() + if mypid == 0: + os.execv(mycmd,myargs) + sys.stderr.write("!!! Failed to spawn fetcher.\n") + sys.exit(1) + retval=os.waitpid(mypid,0)[1] + if (retval & 0xff) == 0: + retval = retval >> 8 + else: + sys.stderr.write("Spawned processes caught a signal.\n") + sys.exit(1) + if retval != 0: + sys.stderr.write("Fetcher exited with a failure condition.\n") + return 0 + return 1 + +def file_get_lib(baseurl,dest,conn=None): + """(baseurl[,connection]) -- Takes a base url to connect to and read from. + URL should be in the for ://[:port] + Connection is used for persistent connection instances.""" + + if not conn: + keepconnection = 0 + else: + keepconnection = 1 + + conn,protocol,address,params,headers = create_conn(baseurl, conn) + + sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n")) + if protocol in ["http","https"]: + data,rc,msg = make_http_request(conn, address, params, headers, dest=dest) + elif protocol in ["ftp"]: + data,rc,msg = make_ftp_request(conn, address, dest=dest) + else: + raise TypeError, "Unknown protocol. '%s'" % protocol + + if not keepconnection: + conn.close() + + return rc + + +def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None): + """(baseurl,conn,chunk_size,verbose) -- + """ + if not conn: + keepconnection = 0 + else: + keepconnection = 1 + + if makepickle == None: + makepickle = "/var/cache/edb/metadata.idx.most_recent" + + conn,protocol,address,params,headers = create_conn(baseurl, conn) + + filedict = {} + + try: + metadatafile = open("/var/cache/edb/remote_metadata.pickle") + metadata = cPickle.load(metadatafile) + sys.stderr.write("Loaded metadata pickle.\n") + metadatafile.close() + except SystemExit, e: + raise + except: + metadata = {} + if not metadata.has_key(baseurl): + metadata[baseurl]={} + if not metadata[baseurl].has_key("indexname"): + metadata[baseurl]["indexname"]="" + if not metadata[baseurl].has_key("timestamp"): + metadata[baseurl]["timestamp"]=0 + if not metadata[baseurl].has_key("unmodified"): + metadata[baseurl]["unmodified"]=0 + if not metadata[baseurl].has_key("data"): + metadata[baseurl]["data"]={} + + filelist = dir_get_list(baseurl, conn) + tbz2list = match_in_array(filelist, suffix=".tbz2") + metalist = match_in_array(filelist, prefix="metadata.idx") + del filelist + + # Determine if our metadata file is current. + metalist.sort() + metalist.reverse() # makes the order new-to-old. + havecache=0 + for mfile in metalist: + if usingcache and \ + ((metadata[baseurl]["indexname"] != mfile) or \ + (metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))): + # Try to download new cache until we succeed on one. + data="" + for trynum in [1,2,3]: + mytempfile = tempfile.TemporaryFile() + try: + file_get(baseurl+"/"+mfile, mytempfile, conn) + if mytempfile.tell() > len(data): + mytempfile.seek(0) + data = mytempfile.read() + except ValueError, e: + sys.stderr.write("--- "+str(e)+"\n") + if trynum < 3: + sys.stderr.write("Retrying...\n") + mytempfile.close() + continue + if match_in_array([mfile],suffix=".gz"): + sys.stderr.write("gzip'd\n") + try: + import gzip + mytempfile.seek(0) + gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile) + data = gzindex.read() + except SystemExit, e: + raise + except Exception, e: + mytempfile.close() + sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n") + mytempfile.close() + try: + metadata[baseurl]["data"] = cPickle.loads(data) + del data + metadata[baseurl]["indexname"] = mfile + metadata[baseurl]["timestamp"] = int(time.time()) + metadata[baseurl]["modified"] = 0 # It's not, right after download. + sys.stderr.write("Pickle loaded.\n") + break + except SystemExit, e: + raise + except Exception, e: + sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n") + sys.stderr.write("!!! "+str(e)+"\n") + try: + metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+") + cPickle.dump(metadata,metadatafile) + metadatafile.close() + except SystemExit, e: + raise + except Exception, e: + sys.stderr.write("!!! Failed to write binary metadata to disk!\n") + sys.stderr.write("!!! "+str(e)+"\n") + break + # We may have metadata... now we run through the tbz2 list and check. + sys.stderr.write(yellow("cache miss: 'x'")+" --- "+green("cache hit: 'o'")+"\n") + for x in tbz2list: + x = os.path.basename(x) + if ((not metadata[baseurl]["data"].has_key(x)) or \ + (x not in metadata[baseurl]["data"].keys())): + sys.stderr.write(yellow("x")) + metadata[baseurl]["modified"] = 1 + myid = file_get_metadata(baseurl+"/"+x, conn, chunk_size) + + if myid[0]: + metadata[baseurl]["data"][x] = make_metadata_dict(myid) + elif verbose: + sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n") + else: + sys.stderr.write(green("o")) + sys.stderr.write("\n") + + try: + if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]: + metadata[baseurl]["timestamp"] = int(time.time()) + metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+") + cPickle.dump(metadata,metadatafile) + metadatafile.close() + if makepickle: + metadatafile = open(makepickle, "w") + cPickle.dump(metadata[baseurl]["data"],metadatafile) + metadatafile.close() + except SystemExit, e: + raise + except Exception, e: + sys.stderr.write("!!! Failed to write binary metadata to disk!\n") + sys.stderr.write("!!! "+str(e)+"\n") + + if not keepconnection: + conn.close() + + return metadata[baseurl]["data"] -- cgit v1.2.3-1-g7c22