1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
#!/usr/bin/env python
"""
codepoints.py - Phenny Codepoints Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
import re, unicodedata
from itertools import islice
def about(u, cp=None, name=None):
if cp is None:
cp = ord(u)
if name is None:
try: name = unicodedata.name(u)
except ValueError:
return 'U+%04X (No name found)' % cp
if not unicodedata.combining(u):
template = 'U+%04X %s (%s)'
else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
return template % (cp, name, u.encode('utf-8'))
def codepoint_simple(arg):
arg = arg.upper()
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')
results = []
for cp in xrange(0xFFFF):
u = unichr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
for cp in xrange(0xFFFF):
u = unichr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
return None
length, u, cp, name = sorted(results)[0]
return about(u, cp, name)
def codepoint_extended(arg):
arg = arg.upper()
try: r_search = re.compile(arg)
except: raise ValueError('Broken regexp: %r' % arg)
for cp in xrange(1, 0x10FFFF):
u = unichr(cp)
name = unicodedata.name(u, '-')
if r_search.search(name):
yield about(u, cp, name)
def u(phenny, input):
arg = input.bytes[3:]
# phenny.msg('#inamidst', '%r' % arg)
if not arg:
return phenny.reply('You gave me zero length input.')
# @@ space
if set(arg.upper()) - set(
'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'):
printable = False
else: printable = True
if printable:
extended = False
for c in '.?+*{}[]\\/^$':
if c in arg:
extended = True
break
if len(arg) == 4:
try: u = unichr(int(arg, 16))
except ValueError: pass
else: return phenny.say(about(u))
if extended:
# look up a codepoint with regexp
results = list(islice(codepoint_extended(arg), 4))
for i, result in enumerate(results):
if (i < 2) or ((i == 2) and (len(results) < 4)):
phenny.say(result)
elif (i == 2) and (len(results) > 3):
phenny.say(result + ' [...]')
if not results:
phenny.reply('Sorry, no results')
else:
# look up a codepoint freely
result = codepoint_simple(arg)
if result is not None:
phenny.say(result)
else: phenny.reply("Sorry, no results for %r." % arg)
else:
text = arg.decode('utf-8')
# look up less than three podecoints
if len(text) <= 3:
for u in text:
phenny.say(about(u))
# look up more than three podecoints
elif len(text) <= 10:
phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
else: phenny.reply('Sorry, your input is too long!')
u.commands = ['u']
def bytes(phenny, input):
b = input.bytes
phenny.reply('%r' % b[b.find(' ') + 1:])
bytes.commands = ['bytes']
if __name__ == '__main__':
print __doc__.strip()
|