summaryrefslogtreecommitdiffstats
path: root/askbot/utils/markup.py
blob: 8347811a28f51dd9d3bc3d2d8d8b9e375132c161 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import re
from askbot import const
from askbot.conf import settings as askbot_settings
from markdown2 import Markdown

#url taken from http://regexlib.com/REDetails.aspx?regexp_id=501 by Brian Bothwell
URL_RE = re.compile("(?<!href=['\"])((http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&amp;%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&amp;%\$#\=~_\-]+))*)")

LINK_PATTERNS = [
    (URL_RE, r'\1'),
]

def get_parser():
    extras = ['link-patterns',]  
    if askbot_settings.ENABLE_MATHJAX or \
        askbot_settings.MARKUP_CODE_FRIENDLY:
        extras.append('code-friendly')

    return Markdown(
                html4tags=True,
                extras=extras,
                link_patterns = LINK_PATTERNS
            )


def format_mention_in_html(mentioned_user):
    url = mentioned_user.get_profile_url()
    username = mentioned_user.username
    return '<a href="%s">@%s</a>' % (url, username)

def extract_first_matching_mentioned_author(text, anticipated_authors):

    if len(text) == 0:
        return None, ''

    for a in anticipated_authors:
        if text.startswith(a.username):
            ulen = len(a.username)
            if len(text) == ulen:
                text = ''
            elif text[ulen] in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS:
                text = text[ulen:]
            else:
                #near miss, here we could insert a warning that perhaps
                #a termination character is needed
                continue
            return a, text
    return None, text

def extract_mentioned_name_seeds(text):
    extra_name_seeds = set()
    while '@' in text:
        pos = text.index('@')
        text = text[pos+1:]#chop off prefix
        name_seed = ''
        for c in text:
            if c in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS:
                extra_name_seeds.add(name_seed)
                name_seed = ''
                break
            if len(name_seed) > 10:
                extra_name_seeds.add(name_seed)
                name_seed = ''
                break
            if c == '@':
                if len(name_seed) > 0:
                    extra_name_seeds.add(name_seed)
                    name_seed = ''
                break
            name_seed += c
        if len(name_seed) > 0:
            #in case we run off the end of text
            extra_name_seeds.add(name_seed)

    return extra_name_seeds

def mentionize_text(text, anticipated_authors):
    output = ''
    mentioned_authors = list()
    while '@' in text:
        #the purpose of this loop is to convert any occurance of 
        #'@mention ' syntax
        #to user account links leading space is required unless @ is the first
        #character in whole text, also, either a punctuation or 
        #a ' ' char is required after the name
        pos = text.index('@')

        #save stuff before @mention to the output
        output += text[:pos]#this works for pos == 0 too

        if len(text) == pos + 1:
            #finish up if the found @ is the last symbol
            output += '@'
            text = ''
            break

        if pos > 0:

            if text[pos-1] in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS:
                #if there is a termination character before @mention
                #indeed try to find a matching person
                text = text[pos+1:]
                mentioned_author, text = \
                                    extract_first_matching_mentioned_author(
                                                            text, 
                                                            anticipated_authors
                                                        )
                if mentioned_author:
                    mentioned_authors.append(mentioned_author)
                    output += format_mention_in_html(mentioned_author)
                else:
                    output += '@'

            else:
                #if there isn't, i.e. text goes like something@mention,
                #do not look up people
                output += '@'
                text = text[pos+1:]
        else:
            #do this if @ is the first character
            text = text[1:]
            mentioned_author, text = \
                                extract_first_matching_mentioned_author(
                                                    text, 
                                                    anticipated_authors
                                                )
            if mentioned_author:
                mentioned_authors.append(mentioned_author)
                output += format_mention_in_html(mentioned_author)
            else:
                output += '@'

    #append the rest of text that did not have @ symbols
    output += text
    return mentioned_authors, output