summaryrefslogtreecommitdiffstats
path: root/modules/head.py
diff options
context:
space:
mode:
authorSean B. Palmer <http://inamidst.com/sbp/>2008-02-21 12:06:33 +0000
committerSean B. Palmer <http://inamidst.com/sbp/>2008-02-21 12:06:33 +0000
commit7931fab14599b739c18c8f1ebcc24b75688dbc09 (patch)
treebf4df9757f10c155e3b6f78aed48f15884ebbbe6 /modules/head.py
downloadbot-7931fab14599b739c18c8f1ebcc24b75688dbc09.tar.gz
bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.tar.bz2
bot-7931fab14599b739c18c8f1ebcc24b75688dbc09.zip
Phenny2, now being tested on Freenode as the main phenny.
Diffstat (limited to 'modules/head.py')
-rwxr-xr-xmodules/head.py126
1 files changed, 126 insertions, 0 deletions
diff --git a/modules/head.py b/modules/head.py
new file mode 100755
index 0000000..4b75cb4
--- /dev/null
+++ b/modules/head.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+"""
+head.py - Phenny HTTP Metadata Utilities
+Copyright 2008, Sean B. Palmer, inamidst.com
+Licensed under the Eiffel Forum License 2.
+
+http://inamidst.com/phenny/
+"""
+
+import re, urllib
+from htmlentitydefs import name2codepoint
+import web
+from tools import deprecated
+
+@deprecated
+def f_httphead(self, origin, match, args):
+ """.head <URI> <FieldName>? - Perform an HTTP HEAD on URI."""
+ if origin.sender == '#talis': return
+ uri = match.group(2)
+ header = match.group(3)
+
+ try: info = web.head(uri)
+ except IOError:
+ self.msg(origin.sender, "Can't connect to %s" % uri)
+ return
+
+ if not isinstance(info, list):
+ info = dict(info)
+ info['Status'] = '200'
+ else:
+ newInfo = dict(info[0])
+ newInfo['Status'] = str(info[1])
+ info = newInfo
+
+ if header is None:
+ msg = 'Status: %s (for more, try ".head uri header")' % info['Status']
+ self.msg(origin.sender, msg)
+ else:
+ headerlower = header.lower()
+ if info.has_key(headerlower):
+ self.msg(origin.sender, header + ': ' + info.get(headerlower))
+ else:
+ msg = 'There was no %s header in the response.' % header
+ self.msg(origin.sender, msg)
+f_httphead.rule = (['head'], r'(\S+)(?: +(\S+))?')
+f_httphead.thread = True
+
+r_title = re.compile(r'(?ims)<title[^>]*>(.*?)</title\s*>')
+r_entity = re.compile(r'&[A-Za-z0-9#]+;')
+
+@deprecated
+def f_title(self, origin, match, args):
+ """.title <URI> - Return the title of URI."""
+ uri = match.group(2)
+ if not ':' in uri:
+ uri = 'http://' + uri
+
+ try:
+ redirects = 0
+ while True:
+ info = web.head(uri)
+
+ if not isinstance(info, list):
+ status = '200'
+ else:
+ status = str(info[1])
+ info = info[0]
+ if status.startswith('3'):
+ uri = info['Location']
+ else: break
+
+ redirects += 1
+ if redirects >= 25:
+ self.msg(origin.sender, origin.nick + ": Too many redirects")
+ return
+
+ try: mtype = info['Content-Type']
+ except:
+ self.msg(origin.sender, origin.nick + ": Document isn't HTML")
+ return
+ if not (('/html' in mtype) or ('/xhtml' in mtype)):
+ self.msg(origin.sender, origin.nick + ": Document isn't HTML")
+ return
+
+ u = urllib.urlopen(uri)
+ bytes = u.read(32768)
+ u.close()
+
+ except IOError:
+ self.msg(origin.sender, "Can't connect to %s" % uri)
+ return
+
+ m = r_title.search(bytes)
+ if m:
+ title = m.group(1)
+ title = title.strip()
+ title = title.replace('\t', ' ')
+ title = title.replace('\r', ' ')
+ title = title.replace('\n', ' ')
+ while ' ' in title:
+ title = title.replace(' ', ' ')
+ if len(title) > 200:
+ title = title[:200] + '[...]'
+
+ def e(m):
+ entity = m.group(0)
+ if entity.startswith('&#x'):
+ cp = int(entity[3:-1], 16)
+ return unichr(cp).encode('utf-8')
+ elif entity.startswith('&#'):
+ cp = int(entity[2:-1])
+ return unichr(cp).encode('utf-8')
+ else:
+ char = name2codepoint[entity[1:-1]]
+ return unichr(char).encode('utf-8')
+ title = r_entity.sub(e, title)
+
+ if not title:
+ title = '[Title is the empty document, "".]'
+ self.msg(origin.sender, origin.nick + ': ' + title)
+ else: self.msg(origin.sender, origin.nick + ': No title found')
+f_title.rule = (['title'], r'(\S+)')
+f_title.thread = True
+
+if __name__ == '__main__':
+ print __doc__