From a1578c654f26cab07309bc9cbddd3c95c0c205b5 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 24 May 2012 00:05:30 -0400
Subject: repoman: unroll escaped lines so we can check the entirety of it

Sometimes people wrap long lines in their ebuilds to make it easier to
read, but this causes us issues when doing line-by-line checking.  So
automatically unroll those lines before passing the full content down
to our checkers.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 pym/repoman/checks.py | 65 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 53 insertions(+), 12 deletions(-)

(limited to 'pym/repoman')

diff --git a/pym/repoman/checks.py b/pym/repoman/checks.py
index 77df603a2..a413968e8 100644
--- a/pym/repoman/checks.py
+++ b/pym/repoman/checks.py
@@ -5,6 +5,7 @@
 """This module contains functions used in Repoman to ascertain the quality
 and correctness of an ebuild."""
 
+import codecs
 import re
 import time
 import repoman.errors as errors
@@ -692,8 +693,11 @@ _here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
 _ignore_comment_re = re.compile(r'^\s*#')
 
 def run_checks(contents, pkg):
+	unicode_escape_codec = codecs.lookup('unicode_escape')
+	unicode_escape = lambda x: unicode_escape_codec.decode(x)[0]
 	checks = _constant_checks
 	here_doc_delim = None
+	multiline = None
 
 	for lc in checks:
 		lc.new(pkg)
@@ -707,19 +711,56 @@ def run_checks(contents, pkg):
 			here_doc = _here_doc_re.match(line)
 			if here_doc is not None:
 				here_doc_delim = re.compile(r'^\s*%s$' % here_doc.group(1))
+		if here_doc_delim is not None:
+			continue
+
+		# Unroll multiline escaped strings so that we can check things:
+		#		inherit foo bar \
+		#			moo \
+		#			cow
+		# This will merge these lines like so:
+		#		inherit foo bar 	moo 	cow
+		try:
+			# A normal line will end in the two bytes: <\> <\n>.  So decoding
+			# that will result in python thinking the <\n> is being escaped
+			# and eat the single <\> which makes it hard for us to detect.
+			# Instead, strip the newline (which we know all lines have), and
+			# append a <0>.  Then when python escapes it, if the line ended
+			# in a <\>, we'll end up with a <\0> marker to key off of.  This
+			# shouldn't be a problem with any valid ebuild ...
+			line_escaped = unicode_escape(line.rstrip('\n') + '0')
+		except SystemExit:
+			raise
+		except:
+			# Who knows what kind of crazy crap an ebuild will have
+			# in it -- don't allow it to kill us.
+			line_escaped = line
+		if multiline:
+			# Chop off the \ and \n bytes from the previous line.
+			multiline = multiline[:-2] + line
+			if not line_escaped.endswith('\0'):
+				line = multiline
+				num = multinum
+				multiline = None
+			else:
+				continue
+		else:
+			if line_escaped.endswith('\0'):
+				multinum = num
+				multiline = line
+				continue
 
-		if here_doc_delim is None:
-			# We're not in a here-document.
-			is_comment = _ignore_comment_re.match(line) is not None
-			for lc in checks:
-				if is_comment and lc.ignore_comment:
-					continue
-				if lc.check_eapi(pkg.metadata['EAPI']):
-					ignore = lc.ignore_line
-					if not ignore or not ignore.match(line):
-						e = lc.check(num, line)
-						if e:
-							yield lc.repoman_check_name, e % (num + 1)
+		# Finally we have a full line to parse.
+		is_comment = _ignore_comment_re.match(line) is not None
+		for lc in checks:
+			if is_comment and lc.ignore_comment:
+				continue
+			if lc.check_eapi(pkg.metadata['EAPI']):
+				ignore = lc.ignore_line
+				if not ignore or not ignore.match(line):
+					e = lc.check(num, line)
+					if e:
+						yield lc.repoman_check_name, e % (num + 1)
 
 	for lc in checks:
 		i = lc.end()
-- 
cgit v1.2.3-1-g7c22