1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
import re
from askbot import const
from askbot.conf import settings as askbot_settings
from markdown2 import Markdown
#url taken from http://regexlib.com/REDetails.aspx?regexp_id=501 by Brian Bothwell
URL_RE = re.compile("(?<!href=['\"])((http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)")
LINK_PATTERNS = [
(URL_RE, r'\1'),
]
def get_parser():
extras = ['link-patterns',]
if askbot_settings.ENABLE_MATHJAX or \
askbot_settings.MARKUP_CODE_FRIENDLY:
extras.append('code-friendly')
return Markdown(
html4tags=True,
extras=extras,
link_patterns = LINK_PATTERNS
)
def format_mention_in_html(mentioned_user):
url = mentioned_user.get_profile_url()
username = mentioned_user.username
return '<a href="%s">@%s</a>' % (url, username)
def extract_first_matching_mentioned_author(text, anticipated_authors):
if len(text) == 0:
return None, ''
for a in anticipated_authors:
if text.startswith(a.username):
ulen = len(a.username)
if len(text) == ulen:
text = ''
elif text[ulen] in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS:
text = text[ulen:]
else:
#near miss, here we could insert a warning that perhaps
#a termination character is needed
continue
return a, text
return None, text
def extract_mentioned_name_seeds(text):
extra_name_seeds = set()
while '@' in text:
pos = text.index('@')
text = text[pos+1:]#chop off prefix
name_seed = ''
for c in text:
if c in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS:
extra_name_seeds.add(name_seed)
name_seed = ''
break
if len(name_seed) > 10:
extra_name_seeds.add(name_seed)
name_seed = ''
break
if c == '@':
if len(name_seed) > 0:
extra_name_seeds.add(name_seed)
name_seed = ''
break
name_seed += c
if len(name_seed) > 0:
#in case we run off the end of text
extra_name_seeds.add(name_seed)
return extra_name_seeds
def mentionize_text(text, anticipated_authors):
output = ''
mentioned_authors = list()
while '@' in text:
#the purpose of this loop is to convert any occurance of
#'@mention ' syntax
#to user account links leading space is required unless @ is the first
#character in whole text, also, either a punctuation or
#a ' ' char is required after the name
pos = text.index('@')
#save stuff before @mention to the output
output += text[:pos]#this works for pos == 0 too
if len(text) == pos + 1:
#finish up if the found @ is the last symbol
output += '@'
text = ''
break
if pos > 0:
if text[pos-1] in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS:
#if there is a termination character before @mention
#indeed try to find a matching person
text = text[pos+1:]
mentioned_author, text = \
extract_first_matching_mentioned_author(
text,
anticipated_authors
)
if mentioned_author:
mentioned_authors.append(mentioned_author)
output += format_mention_in_html(mentioned_author)
else:
output += '@'
else:
#if there isn't, i.e. text goes like something@mention,
#do not look up people
output += '@'
text = text[pos+1:]
else:
#do this if @ is the first character
text = text[1:]
mentioned_author, text = \
extract_first_matching_mentioned_author(
text,
anticipated_authors
)
if mentioned_author:
mentioned_authors.append(mentioned_author)
output += format_mention_in_html(mentioned_author)
else:
output += '@'
#append the rest of text that did not have @ symbols
output += text
return mentioned_authors, output
|