diff options
author | Sean B. Palmer <http://inamidst.com/sbp/> | 2008-02-21 12:06:33 +0000 |
---|---|---|
committer | Sean B. Palmer <http://inamidst.com/sbp/> | 2008-02-21 12:06:33 +0000 |
commit | 7931fab14599b739c18c8f1ebcc24b75688dbc09 (patch) | |
tree | bf4df9757f10c155e3b6f78aed48f15884ebbbe6 /modules/head.py | |
download | bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.tar.gz bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.tar.bz2 bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.zip |
Phenny2, now being tested on Freenode as the main phenny.
Diffstat (limited to 'modules/head.py')
-rwxr-xr-x | modules/head.py | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/modules/head.py b/modules/head.py new file mode 100755 index 0000000..4b75cb4 --- /dev/null +++ b/modules/head.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +""" +head.py - Phenny HTTP Metadata Utilities +Copyright 2008, Sean B. Palmer, inamidst.com +Licensed under the Eiffel Forum License 2. + +http://inamidst.com/phenny/ +""" + +import re, urllib +from htmlentitydefs import name2codepoint +import web +from tools import deprecated + +@deprecated +def f_httphead(self, origin, match, args): + """.head <URI> <FieldName>? - Perform an HTTP HEAD on URI.""" + if origin.sender == '#talis': return + uri = match.group(2) + header = match.group(3) + + try: info = web.head(uri) + except IOError: + self.msg(origin.sender, "Can't connect to %s" % uri) + return + + if not isinstance(info, list): + info = dict(info) + info['Status'] = '200' + else: + newInfo = dict(info[0]) + newInfo['Status'] = str(info[1]) + info = newInfo + + if header is None: + msg = 'Status: %s (for more, try ".head uri header")' % info['Status'] + self.msg(origin.sender, msg) + else: + headerlower = header.lower() + if info.has_key(headerlower): + self.msg(origin.sender, header + ': ' + info.get(headerlower)) + else: + msg = 'There was no %s header in the response.' % header + self.msg(origin.sender, msg) +f_httphead.rule = (['head'], r'(\S+)(?: +(\S+))?') +f_httphead.thread = True + +r_title = re.compile(r'(?ims)<title[^>]*>(.*?)</title\s*>') +r_entity = re.compile(r'&[A-Za-z0-9#]+;') + +@deprecated +def f_title(self, origin, match, args): + """.title <URI> - Return the title of URI.""" + uri = match.group(2) + if not ':' in uri: + uri = 'http://' + uri + + try: + redirects = 0 + while True: + info = web.head(uri) + + if not isinstance(info, list): + status = '200' + else: + status = str(info[1]) + info = info[0] + if status.startswith('3'): + uri = info['Location'] + else: break + + redirects += 1 + if redirects >= 25: + self.msg(origin.sender, origin.nick + ": Too many redirects") + return + + try: mtype = info['Content-Type'] + except: + self.msg(origin.sender, origin.nick + ": Document isn't HTML") + return + if not (('/html' in mtype) or ('/xhtml' in mtype)): + self.msg(origin.sender, origin.nick + ": Document isn't HTML") + return + + u = urllib.urlopen(uri) + bytes = u.read(32768) + u.close() + + except IOError: + self.msg(origin.sender, "Can't connect to %s" % uri) + return + + m = r_title.search(bytes) + if m: + title = m.group(1) + title = title.strip() + title = title.replace('\t', ' ') + title = title.replace('\r', ' ') + title = title.replace('\n', ' ') + while ' ' in title: + title = title.replace(' ', ' ') + if len(title) > 200: + title = title[:200] + '[...]' + + def e(m): + entity = m.group(0) + if entity.startswith('&#x'): + cp = int(entity[3:-1], 16) + return unichr(cp).encode('utf-8') + elif entity.startswith('&#'): + cp = int(entity[2:-1]) + return unichr(cp).encode('utf-8') + else: + char = name2codepoint[entity[1:-1]] + return unichr(char).encode('utf-8') + title = r_entity.sub(e, title) + + if not title: + title = '[Title is the empty document, "".]' + self.msg(origin.sender, origin.nick + ': ' + title) + else: self.msg(origin.sender, origin.nick + ': No title found') +f_title.rule = (['title'], r'(\S+)') +f_title.thread = True + +if __name__ == '__main__': + print __doc__ |