"""methods that make parsing of post inputs possible, handling of markdown and additional syntax rules - such as optional link patterns, video embedding and Twitter-style @mentions""" import re import logging from askbot import const from askbot.conf import settings as askbot_settings from askbot.utils.html import sanitize_html, strip_tags from django.utils.html import urlize from markdown2 import Markdown #url taken from http://regexlib.com/REDetails.aspx?regexp_id=501 by Brian Bothwell URL_RE = re.compile("((?@%s' % (url, username) def extract_first_matching_mentioned_author(text, anticipated_authors): """matches beginning of ``text`` string with the names of ``anticipated_authors`` - list of user objects. Returns upon first match the first matched user object and the remainder of the ``text`` that is left unmatched""" if len(text) == 0: return None, '' for author in anticipated_authors: if text.lower().startswith(author.username.lower()): ulen = len(author.username) if len(text) == ulen: text = '' elif text[ulen] in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS: text = text[ulen:] else: #near miss, here we could insert a warning that perhaps #a termination character is needed continue return author, text return None, text def extract_mentioned_name_seeds(text): """Returns list of strings that follow the '@' symbols in the text. The strings will be 10 characters long, or shorter, if the subsequent character is one of the list accepted to be termination characters. """ extra_name_seeds = set() while '@' in text: pos = text.index('@') text = text[pos+1:]#chop off prefix name_seed = '' for char in text: if char in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS: extra_name_seeds.add(name_seed) name_seed = '' break if len(name_seed) > 10: extra_name_seeds.add(name_seed) name_seed = '' break if char == '@': if len(name_seed) > 0: extra_name_seeds.add(name_seed) name_seed = '' break name_seed += char if len(name_seed) > 0: #in case we run off the end of text extra_name_seeds.add(name_seed) return extra_name_seeds def mentionize_text(text, anticipated_authors): """Returns a tuple of two items: * modified text where @mentions are replaced with urls to the corresponding user profiles * list of users whose names matched the @mentions """ output = '' mentioned_authors = list() while '@' in text: #the purpose of this loop is to convert any occurance of #'@mention ' syntax #to user account links leading space is required unless @ is the first #character in whole text, also, either a punctuation or #a ' ' char is required after the name pos = text.index('@') #save stuff before @mention to the output output += text[:pos]#this works for pos == 0 too if len(text) == pos + 1: #finish up if the found @ is the last symbol output += '@' text = '' break if pos > 0: if text[pos-1] in const.TWITTER_STYLE_MENTION_TERMINATION_CHARS: #if there is a termination character before @mention #indeed try to find a matching person text = text[pos+1:] mentioned_author, text = \ extract_first_matching_mentioned_author( text, anticipated_authors ) if mentioned_author: mentioned_authors.append(mentioned_author) output += format_mention_in_html(mentioned_author) else: output += '@' else: #if there isn't, i.e. text goes like something@mention, #do not look up people output += '@' text = text[pos+1:] else: #do this if @ is the first character text = text[1:] mentioned_author, text = \ extract_first_matching_mentioned_author( text, anticipated_authors ) if mentioned_author: mentioned_authors.append(mentioned_author) output += format_mention_in_html(mentioned_author) else: output += '@' #append the rest of text that did not have @ symbols output += text return mentioned_authors, output def plain_text_input_converter(text): """plain text to html converter""" return sanitize_html(urlize('

' + text + '

')) def markdown_input_converter(text): """markdown to html converter""" text = urlize(text) text = get_parser().convert(text) return sanitize_html(text) def tinymce_input_converter(text): """tinymce input to production html converter""" text = urlize(text) return strip_tags(text, ['script', 'style', 'link'])