diff options
Diffstat (limited to 'trunk/etherpad/src/etherpad/pad/exporthtml.js')
-rw-r--r-- | trunk/etherpad/src/etherpad/pad/exporthtml.js | 383 |
1 files changed, 383 insertions, 0 deletions
diff --git a/trunk/etherpad/src/etherpad/pad/exporthtml.js b/trunk/etherpad/src/etherpad/pad/exporthtml.js new file mode 100644 index 0000000..2512603 --- /dev/null +++ b/trunk/etherpad/src/etherpad/pad/exporthtml.js @@ -0,0 +1,383 @@ +/** + * Copyright 2009 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS-IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import("etherpad.collab.ace.easysync2.Changeset"); + +function getPadPlainText(pad, revNum) { + var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : + pad.atext()); + var textLines = atext.text.slice(0,-1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + var apool = pad.pool(); + + var pieces = []; + for(var i=0;i<textLines.length;i++) { + var line = _analyzeLine(textLines[i], attribLines[i], apool); + if (line.listLevel) { + var numSpaces = line.listLevel*2-1; + var bullet = '*'; + pieces.push(new Array(numSpaces+1).join(' '), bullet, ' ', line.text, '\n'); + } + else { + pieces.push(line.text, '\n'); + } + } + + return pieces.join(''); +} + +function getPadHTML(pad, revNum) { + var atext = ((revNum !== undefined) ? pad.getInternalRevisionAText(revNum) : + pad.atext()); + var textLines = atext.text.slice(0,-1).split('\n'); + var attribLines = Changeset.splitAttributionLines(atext.attribs, atext.text); + + var apool = pad.pool(); + + var tags = ['b','i','u','s','h1','h2','h3','h4','h5','h6']; + var props = ['bold','italic','underline','strikethrough','h1','h2','h3','h4','h5','h6']; + var anumMap = {}; + props.forEach(function(propName, i) { + var propTrueNum = apool.putAttrib([propName,true], true); + if (propTrueNum >= 0) { + anumMap[propTrueNum] = i; + } + }); + + function getLineHTML(text, attribs) { + var propVals = [false, false, false]; + var ENTER = 1; + var STAY = 2; + var LEAVE = 0; + + // Use order of tags (b/i/u) as order of nesting, for simplicity + // and decent nesting. For example, + // <b>Just bold<b> <b><i>Bold and italics</i></b> <i>Just italics</i> + // becomes + // <b>Just bold <i>Bold and italics</i></b> <i>Just italics</i> + + var taker = Changeset.stringIterator(text); + var assem = Changeset.stringAssembler(); + + function emitOpenTag(i) { + assem.append('<'); + assem.append(tags[i]); + assem.append('>'); + } + function emitCloseTag(i) { + assem.append('</'); + assem.append(tags[i]); + assem.append('>'); + } + + var urls = _findURLs(text); + + var idx = 0; + function processNextChars(numChars) { + if (numChars <= 0) { + return; + } + + var iter = Changeset.opIterator(Changeset.subattribution(attribs, + idx, idx+numChars)); + idx += numChars; + + while (iter.hasNext()) { + var o = iter.next(); + var propChanged = false; + Changeset.eachAttribNumber(o.attribs, function(a) { + if (a in anumMap) { + var i = anumMap[a]; // i = 0 => bold, etc. + if (! propVals[i]) { + propVals[i] = ENTER; + propChanged = true; + } + else { + propVals[i] = STAY; + } + } + }); + for(var i=0;i<propVals.length;i++) { + if (propVals[i] === true) { + propVals[i] = LEAVE; + propChanged = true; + } + else if (propVals[i] === STAY) { + propVals[i] = true; // set it back + } + } + // now each member of propVal is in {false,LEAVE,ENTER,true} + // according to what happens at start of span + + if (propChanged) { + // leaving bold (e.g.) also leaves italics, etc. + var left = false; + for(var i=0;i<propVals.length;i++) { + var v = propVals[i]; + if (! left) { + if (v === LEAVE) { + left = true; + } + } + else { + if (v === true) { + propVals[i] = STAY; // tag will be closed and re-opened + } + } + } + + for(var i=propVals.length-1; i>=0; i--) { + if (propVals[i] === LEAVE) { + emitCloseTag(i); + propVals[i] = false; + } + else if (propVals[i] === STAY) { + emitCloseTag(i); + } + } + for(var i=0; i<propVals.length; i++) { + if (propVals[i] === ENTER || propVals[i] === STAY) { + emitOpenTag(i); + propVals[i] = true; + } + } + // propVals is now all {true,false} again + } // end if (propChanged) + + var chars = o.chars; + if (o.lines) { + chars--; // exclude newline at end of line, if present + } + var s = taker.take(chars); + + assem.append(_escapeHTML(s)); + } // end iteration over spans in line + + for(var i=propVals.length-1; i>=0; i--) { + if (propVals[i]) { + emitCloseTag(i); + propVals[i] = false; + } + } + } // end processNextChars + + if (urls) { + urls.forEach(function(urlData) { + var startIndex = urlData[0]; + var url = urlData[1]; + var urlLength = url.length; + processNextChars(startIndex - idx); + assem.append('<a href="'+url.replace(/\"/g, '"')+'">'); + processNextChars(urlLength); + assem.append('</a>'); + }); + } + processNextChars(text.length - idx); + + return _processSpaces(assem.toString()); + } // end getLineHTML + + var pieces = []; + + // Need to deal with constraints imposed on HTML lists; can + // only gain one level of nesting at once, can't change type + // mid-list, etc. + // People might use weird indenting, e.g. skip a level, + // so we want to do something reasonable there. We also + // want to deal gracefully with blank lines. + var lists = []; // e.g. [[1,'bullet'], [3,'bullet'], ...] + for(var i=0;i<textLines.length;i++) { + var line = _analyzeLine(textLines[i], attribLines[i], apool); + var lineContent = getLineHTML(line.text, line.aline); + + if (line.listLevel || lists.length > 0) { + // do list stuff + var whichList = -1; // index into lists or -1 + if (line.listLevel) { + whichList = lists.length; + for(var j=lists.length-1;j>=0;j--) { + if (line.listLevel <= lists[j][0]) { + whichList = j; + } + } + } + + if (whichList >= lists.length) { + lists.push([line.listLevel, line.listTypeName]); + pieces.push('<ul><li>', lineContent || '<br/>'); + } + else if (whichList == -1) { + if (line.text) { + // non-blank line, end all lists + pieces.push(new Array(lists.length+1).join('</li></ul\n>')); + lists.length = 0; + pieces.push(lineContent, '<br\n/>'); + } + else { + pieces.push('<br/><br\n/>'); + } + } + else { + while (whichList < lists.length-1) { + pieces.push('</li></ul\n>'); + lists.length--; + } + pieces.push('</li\n><li>', lineContent || '<br/>'); + } + } + else { + pieces.push(lineContent, '<br\n/>'); + } + } + pieces.push(new Array(lists.length+1).join('</li></ul\n>')); + + return pieces.join(''); +} + +function _analyzeLine(text, aline, apool) { + var line = {}; + + // identify list + var lineMarker = 0; + line.listLevel = 0; + if (aline) { + var opIter = Changeset.opIterator(aline); + if (opIter.hasNext()) { + var listType = Changeset.opAttributeValue(opIter.next(), 'list', apool); + if (listType) { + lineMarker = 1; + listType = /([a-z]+)([12345678])/.exec(listType); + if (listType) { + line.listTypeName = listType[1]; + line.listLevel = Number(listType[2]); + } + } + } + } + if (lineMarker) { + line.text = text.substring(1); + line.aline = Changeset.subattribution(aline, 1); + } + else { + line.text = text; + line.aline = aline; + } + + return line; +} + +function getPadHTMLDocument(pad, revNum, noDocType) { + var head = (noDocType?'':'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" '+ + '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n')+ + '<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">\n'+ + (noDocType?'': + '<head>\n'+ + '<meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n'+ + '<meta http-equiv="Content-Language" content="en-us" />\n'+ + '<title>'+'/'+pad.getId()+'</title>\n'+ + '<style type="text/css">h1,h2,h3,h4,h5,h6 { display: inline; }</style>\n' + + '</head>\n')+ + '<body>'; + + var foot = '</body>\n</html>\n'; + + return head + getPadHTML(pad, revNum) + foot; +} + +function _escapeHTML(s) { + var re = /[&<>]/g; + if (! re.MAP) { + // persisted across function calls! + re.MAP = { + '&': '&', + '<': '<', + '>': '>', + }; + } + return s.replace(re, function(c) { return re.MAP[c]; }); +} + +// copied from ACE +function _processSpaces(s) { + var doesWrap = true; + if (s.indexOf("<") < 0 && ! doesWrap) { + // short-cut + return s.replace(/ /g, ' '); + } + var parts = []; + s.replace(/<[^>]*>?| |[^ <]+/g, function(m) { parts.push(m); }); + if (doesWrap) { + var endOfLine = true; + var beforeSpace = false; + // last space in a run is normal, others are nbsp, + // end of line is nbsp + for(var i=parts.length-1;i>=0;i--) { + var p = parts[i]; + if (p == " ") { + if (endOfLine || beforeSpace) + parts[i] = ' '; + endOfLine = false; + beforeSpace = true; + } + else if (p.charAt(0) != "<") { + endOfLine = false; + beforeSpace = false; + } + } + // beginning of line is nbsp + for(var i=0;i<parts.length;i++) { + var p = parts[i]; + if (p == " ") { + parts[i] = ' '; + break; + } + else if (p.charAt(0) != "<") { + break; + } + } + } + else { + for(var i=0;i<parts.length;i++) { + var p = parts[i]; + if (p == " ") { + parts[i] = ' '; + } + } + } + return parts.join(''); +} + + +// copied from ACE +var _REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; +var _REGEX_SPACE = /\s/; +var _REGEX_URLCHAR = new RegExp('('+/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source+'|'+_REGEX_WORDCHAR.source+')'); +var _REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source+_REGEX_URLCHAR.source+'*(?![:.,;])'+_REGEX_URLCHAR.source, 'g'); + +// returns null if no URLs, or [[startIndex1, url1], [startIndex2, url2], ...] +function _findURLs(text) { + _REGEX_URL.lastIndex = 0; + var urls = null; + var execResult; + while ((execResult = _REGEX_URL.exec(text))) { + urls = (urls || []); + var startIndex = execResult.index; + var url = execResult[0]; + urls.push([startIndex, url]); + } + + return urls; +} |