modules/codepoints.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

#!/usr/bin/env python
"""
codepoints.py - Phenny Codepoints Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

http://inamidst.com/phenny/
"""

import re, unicodedata
from itertools import islice

def about(u, cp=None, name=None):
    if cp is None:
        cp = ord(u)
    if name is None:
        try: name = unicodedata.name(u)
        except ValueError:
            return 'U+%04X (No name found)' % cp

    if not unicodedata.combining(u):
        template = 'U+%04X %s (%s)'
    else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
    return template % (cp, name, u.encode('utf-8'))

def codepoint_simple(arg):
    arg = arg.upper()

    r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')

    results = []
    for cp in xrange(0xFFFF):
        u = unichr(cp)
        try: name = unicodedata.name(u)
        except ValueError: continue

        if r_label.search(name):
            results.append((len(name), u, cp, name))
    if not results:
        r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
        for cp in xrange(0xFFFF):
            u = unichr(cp)
            try: name = unicodedata.name(u)
            except ValueError: continue

            if r_label.search(name):
                results.append((len(name), u, cp, name))

    if not results:
        return None

    length, u, cp, name = sorted(results)[0]
    return about(u, cp, name)

def codepoint_extended(arg):
    arg = arg.upper()
    try: r_search = re.compile(arg)
    except: raise ValueError('Broken regexp: %r' % arg)

    for cp in xrange(1, 0x10FFFF):
        u = unichr(cp)
        name = unicodedata.name(u, '-')

        if r_search.search(name):
            yield about(u, cp, name)

def u(phenny, input):
    """Look up unicode information."""
    arg = input.bytes[3:]
    # phenny.msg('#inamidst', '%r' % arg)
    if not arg:
        return phenny.reply('You gave me zero length input.')
    elif not arg.strip(' '):
        if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
        return phenny.reply('1 SPACE (U+0020)')

    # @@ space
    if set(arg.upper()) - set(
        'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'):
        printable = False
    elif len(arg) > 1:
        printable = True
    else: printable = False

    if printable:
        extended = False
        for c in '.?+*{}[]\\/^$':
            if c in arg:
                extended = True
                break

        if len(arg) == 4:
            try: u = unichr(int(arg, 16))
            except ValueError: pass
            else: return phenny.say(about(u))

        if extended:
            # look up a codepoint with regexp
            results = list(islice(codepoint_extended(arg), 4))
            for i, result in enumerate(results):
                if (i < 2) or ((i == 2) and (len(results) < 4)):
                    phenny.say(result)
                elif (i == 2) and (len(results) > 3):
                    phenny.say(result + ' [...]')
            if not results:
                phenny.reply('Sorry, no results')
        else:
            # look up a codepoint freely
            result = codepoint_simple(arg)
            if result is not None:
                phenny.say(result)
            else: phenny.reply("Sorry, no results for %r." % arg)
    else:
        text = arg.decode('utf-8')
        # look up less than three podecoints
        if len(text) <= 3:
            for u in text:
                phenny.say(about(u))
        # look up more than three podecoints
        elif len(text) <= 10:
            phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
        else: phenny.reply('Sorry, your input is too long!')
u.commands = ['u']
u.example = '.u 203D'

def bytes(phenny, input):
    """Show the input as pretty printed bytes."""
    b = input.bytes
    phenny.reply('%r' % b[b.find(' ') + 1:])
bytes.commands = ['bytes']
bytes.example = '.bytes \xe3\x8b\xa1'

if __name__ == '__main__':
    print __doc__.strip()