From 675118869f07a78ffe380b2b925ca30ab050fc98 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Wed, 8 May 2013 23:19:42 -0400 Subject: require html5lib for bs4 for better compatibility with apache --- askbot/mail/__init__.py | 2 +- askbot/templatetags/extra_filters_jinja.py | 2 +- askbot/utils/html.py | 4 ++-- askbot/views/commands.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/askbot/mail/__init__.py b/askbot/mail/__init__.py index a2f53017..8b999534 100644 --- a/askbot/mail/__init__.py +++ b/askbot/mail/__init__.py @@ -90,7 +90,7 @@ def clean_html_email(email_body): todo: needs more clenup might not work for other email templates that do not use table layout """ - soup = BeautifulSoup(email_body) + soup = BeautifulSoup(email_body, 'html5lib') body_element = soup.find('body') filter_func = lambda s: bool(s.strip()) phrases = map( diff --git a/askbot/templatetags/extra_filters_jinja.py b/askbot/templatetags/extra_filters_jinja.py index 19462da6..dccd9a2a 100644 --- a/askbot/templatetags/extra_filters_jinja.py +++ b/askbot/templatetags/extra_filters_jinja.py @@ -63,7 +63,7 @@ def is_empty_editor_value(value): return True #tinymce uses a weird sentinel placeholder if askbot_settings.EDITOR_TYPE == 'tinymce': - soup = BeautifulSoup(value) + soup = BeautifulSoup(value, 'html5lib') return soup.getText().strip() == '' return False diff --git a/askbot/utils/html.py b/askbot/utils/html.py index 29e8bd70..d7b321da 100644 --- a/askbot/utils/html.py +++ b/askbot/utils/html.py @@ -73,7 +73,7 @@ def replace_links_with_text(html): return '%s (%s)' % (url, text) return url or text or '' - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, 'html5lib') abs_url_re = r'^http(s)?://' images = soup.find_all('img') @@ -103,7 +103,7 @@ def strip_tags(html, tags=None): assert(tags != None) - soup = BeautifulSoup(html) + soup = BeautifulSoup(html, 'html5lib') for tag in tags: tag_matches = soup.find_all(tag) map(lambda v: v.replaceWith(''), tag_matches) diff --git a/askbot/views/commands.py b/askbot/views/commands.py index dee8d1e4..b2c8a788 100644 --- a/askbot/views/commands.py +++ b/askbot/views/commands.py @@ -1489,7 +1489,7 @@ def get_editor(request): ) #parse out javascript and dom, and return them separately #we need that, because js needs to be added in a special way - html_soup = BeautifulSoup(editor_html) + html_soup = BeautifulSoup(editor_html, 'html5lib') parsed_scripts = list() for script in html_soup.find_all('script'): -- cgit v1.2.3-1-g7c22