source: trunk/modules/GetHtml/DOMwalk.js @ 1185

Last change on this file since 1185 was 1185, checked in by ray, 10 years ago
  • #1424 [IE8][DOMwalk] style attribute put in twice, once empty
  • #1423 [IE] inline style converted to lower case, which can break background images


  • Property svn:keywords set to LastChangedDate LastChangedRevision LastChangedBy HeadURL Id
File size: 12.1 KB
Line 
1
2  /*--------------------------------------:noTabs=true:tabSize=2:indentSize=2:--
3    --  Xinha (is not htmlArea) - http://xinha.gogo.co.nz/
4    --
5    --  Use of Xinha is granted by the terms of the htmlArea License (based on
6    --  BSD license)  please read license.txt in this package for details.
7    --
8    --  Xinha was originally based on work by Mihai Bazon which is:
9    --      Copyright (c) 2003-2004 dynarch.com.
10    --      Copyright (c) 2002-2003 interactivetools.com, inc.
11    --      This copyright notice MUST stay intact for use.
12    --
13    --  This is the standard implementation of the method for rendering HTML code from the DOM
14    --
15    --  The file is loaded by the Xinha Core when no alternative method (plugin) is loaded.
16    --
17    --
18    --  $HeadURL$
19    --  $LastChangedDate$
20    --  $LastChangedRevision$
21    --  $LastChangedBy$
22    --------------------------------------------------------------------------*/
23function GetHtmlImplementation(editor) {
24    this.editor = editor;
25}
26
27GetHtmlImplementation._pluginInfo = {
28  name          : "GetHtmlImplementation DOMwalk",
29  origin        : "Xinha Core",
30  version       : "$LastChangedRevision$".replace(/^[^:]*:\s*(.*)\s*\$$/, '$1'),
31  developer     : "The Xinha Core Developer Team",
32  developer_url : "$HeadURL$".replace(/^[^:]*:\s*(.*)\s*\$$/, '$1'),
33  sponsor       : "",
34  sponsor_url   : "",
35  license       : "htmlArea"
36};
37
38// Retrieves the HTML code from the given node.  This is a replacement for
39// getting innerHTML, using standard DOM calls.
40// Wrapper legacy see #442
41Xinha.getHTML = function(root, outputRoot, editor)
42{
43  return Xinha.getHTMLWrapper(root,outputRoot,editor);
44};
45
46Xinha.emptyAttributes = " checked disabled ismap readonly nowrap compact declare selected defer multiple noresize noshade "
47
48Xinha.getHTMLWrapper = function(root, outputRoot, editor, indent)
49{
50  var html = "";
51  if ( !indent )
52  {
53    indent = '';
54  }
55
56  switch ( root.nodeType )
57  {
58    case 10:// Node.DOCUMENT_TYPE_NODE
59    case 6: // Node.ENTITY_NODE
60    case 12:// Node.NOTATION_NODE
61      // this all are for the document type, probably not necessary
62    break;
63
64    case 2: // Node.ATTRIBUTE_NODE
65      // Never get here, this has to be handled in the ELEMENT case because
66      // of IE crapness requring that some attributes are grabbed directly from
67      // the attribute (nodeValue doesn't return correct values), see
68      //http://groups.google.com/groups?hl=en&lr=&ie=UTF-8&oe=UTF-8&safe=off&selm=3porgu4mc4ofcoa1uqkf7u8kvv064kjjb4%404ax.com
69      // for information
70    break;
71
72    case 4: // Node.CDATA_SECTION_NODE
73      // Mozilla seems to convert CDATA into a comment when going into wysiwyg mode,
74      //  don't know about IE
75      html += (Xinha.is_ie ? ('\n' + indent) : '') + '<![CDATA[' + root.data + ']]>' ;
76    break;
77
78    case 5: // Node.ENTITY_REFERENCE_NODE
79      html += '&' + root.nodeValue + ';';
80    break;
81
82    case 7: // Node.PROCESSING_INSTRUCTION_NODE
83      // PI's don't seem to survive going into the wysiwyg mode, (at least in moz)
84      // so this is purely academic
85      html += (Xinha.is_ie ? ('\n' + indent) : '') + '<'+'?' + root.target + ' ' + root.data + ' ?>';
86    break;
87
88    case 1: // Node.ELEMENT_NODE
89    case 11: // Node.DOCUMENT_FRAGMENT_NODE
90    case 9: // Node.DOCUMENT_NODE
91      var closed;
92      var i;
93      var root_tag = (root.nodeType == 1) ? root.tagName.toLowerCase() : '';
94      if ( ( root_tag == "script" || root_tag == "noscript" ) && editor.config.stripScripts )
95      {
96        break;
97      }
98      if ( outputRoot )
99      {
100        outputRoot = !(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(root_tag));
101      }
102      if ( Xinha.is_ie && root_tag == "head" )
103      {
104        if ( outputRoot )
105        {
106          html += (Xinha.is_ie ? ('\n' + indent) : '') + "<head>";
107        }
108       
109        var save_multiline = RegExp.multiline;
110        RegExp.multiline = true;
111        var txt =
112        root.innerHTML
113        .replace(Xinha.RE_tagName, function(str, p1, p2) { return p1 + p2.toLowerCase(); }) // lowercasize
114        .replace(/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g, '="$2$4$5"$3') //add attribute quotes
115        .replace(/<(link|meta)((\s*\S*="[^"]*")*)>([\n\r]*)/g, '<$1$2 />\n'); //terminate singlet tags
116        RegExp.multiline = save_multiline;
117        html += txt + '\n';
118        if ( outputRoot )
119        {
120          html += (Xinha.is_ie ? ('\n' + indent) : '') + "</head>";
121        }
122        break;
123      }
124      else if ( outputRoot )
125      {
126        closed = (!(root.hasChildNodes() || Xinha.needsClosingTag(root)));
127        html += ((Xinha.isBlockElement(root)) ? ('\n' + indent) : '') + "<" + root.tagName.toLowerCase();
128        var attrs = root.attributes;
129       
130        for ( i = 0; i < attrs.length; ++i )
131        {
132          var a = attrs.item(i);
133          // In certain browsers (*cough* firefox) the dom node loses
134          // information if the image is currently broken.  In order to prevent
135          // corrupting the height and width of image tags, we strip height and
136          // width from the image rather than reporting bad information.
137          if (Xinha.is_real_gecko && (root.tagName.toLowerCase() == 'img') &&
138              ((a.nodeName.toLowerCase() == 'height') || (a.nodeName.toLowerCase() == 'width')))
139          {
140            if (!root.complete || root.naturalWidth === 0)
141            {
142              // This means that firefox has been unable to read the dimensions from the actual image
143              continue;
144            }
145          }
146          if (typeof a.nodeValue == 'object' ) continue; // see #684
147          if (root.tagName.toLowerCase() == "input"
148              && root.type.toLowerCase() == "checkbox"
149              && a.nodeName.toLowerCase() == "value"
150              && a.nodeValue.toLowerCase() == "on")
151          {
152            continue;
153          }
154          if ( !a.specified
155            // IE claims these are !a.specified even though they are.  Perhaps others too?
156            && !(root.tagName.toLowerCase().match(/input|option/) && a.nodeName == 'value')
157            && !(root.tagName.toLowerCase().match(/area/) && a.nodeName.match(/shape|coords/i))
158          )
159          {
160            continue;
161          }
162          var name = a.nodeName.toLowerCase();
163          if ( /_moz_editor_bogus_node/.test(name) || ( name == 'class' && a.nodeValue == 'webkit-block-placeholder') )
164          {
165            html = "";
166            break;
167          }
168          if ( /(_moz)|(contenteditable)|(_msh)/.test(name) )
169          {
170            // avoid certain attributes
171            continue;
172          }
173          var value;
174          if ( Xinha.emptyAttributes.indexOf(" "+name+" ") != -1)
175          {
176            value = name;
177          }
178          else if ( name != "style" )
179          {
180            // IE5.5 reports 25 when cellSpacing is
181            // 1; other values might be doomed too.
182            // For this reason we extract the
183            // values directly from the root node.
184            // I'm starting to HATE JavaScript
185            // development.  Browser differences
186            // suck.
187            //
188            // Using Gecko the values of href and src are converted to absolute links
189            // unless we get them using nodeValue()
190            if ( typeof root[a.nodeName] != "undefined" && name != "href" && name != "src" && !(/^on/.test(name)) )
191            {
192              value = root[a.nodeName];
193            }
194            else
195            {
196              value = a.nodeValue;
197                          if (name == 'class')
198                          {
199                                value = value.replace(/Apple-style-span/,'');
200                                if (!value) continue;
201                          }
202              // IE seems not willing to return the original values - it converts to absolute
203              // links using a.nodeValue, a.value, a.stringValue, root.getAttribute("href")
204              // So we have to strip the baseurl manually :-/
205              if ( Xinha.is_ie && (name == "href" || name == "src") )
206              {
207                value = editor.stripBaseURL(value);
208              }
209
210              // High-ascii (8bit) characters in links seem to cause problems for some sites,
211              // while this seems to be consistent with RFC 3986 Section 2.4
212              // because these are not "reserved" characters, it does seem to
213              // cause links to international resources not to work.  See ticket:167
214
215              // IE always returns high-ascii characters un-encoded in links even if they
216              // were supplied as % codes (it unescapes them when we pul the value from the link).
217
218              // Hmmm, very strange if we use encodeURI here, or encodeURIComponent in place
219              // of escape below, then the encoding is wrong.  I mean, completely.
220              // Nothing like it should be at all.  Using escape seems to work though.
221              // It's in both browsers too, so either I'm doing something wrong, or
222              // something else is going on?
223
224              if ( editor.config.only7BitPrintablesInURLs && ( name == "href" || name == "src" ) )
225              {
226                value = value.replace(/([^!-~]+)/g, function(match) { return escape(match); });
227              }
228            }
229          }
230          else if ( !Xinha.is_ie )
231          {
232            value = root.style.cssText.replace(/rgb\(.*?\)/ig,function(rgb){ return Xinha._colorToRgb(rgb) });
233          }
234          else if (!value) // IE8 has style in attributes (see below), but it's empty!
235          {
236            continue;
237          }
238          if ( /^(_moz)?$/.test(value) )
239          {
240            // Mozilla reports some special tags
241            // here; we don't need them.
242            continue;
243          }
244          html += " " + name + '="' + Xinha.htmlEncode(value) + '"';
245        }
246        //IE fails to put style in attributes list & cssText is UPPERCASE
247        if ( Xinha.is_ie && root.style.cssText )
248        {
249          html += ' style="' + root.style.cssText.replace(/(^)?([^:]*):(.*?)(;|$)/g, function(m0, m1,m2,m3, m4){return m2.toLowerCase() + ':' + m3 + m4;}) + '"';
250        }
251        if ( Xinha.is_ie && root.tagName.toLowerCase() == "option" && root.selected )
252        {
253          html += ' selected="selected"';
254        }
255        if ( html !== "" )
256        {
257          if ( closed && root_tag=="p" )
258          {
259            //never use <p /> as empty paragraphs won't be visible
260            html += ">&nbsp;</p>";
261          }
262          else if ( closed )
263          {
264            html += " />";
265          }
266          else
267          {
268            html += ">";
269          }
270        }
271      }
272      var containsBlock = false;
273      if ( root_tag == "script" || root_tag == "noscript" )
274      {
275        if ( !editor.config.stripScripts )
276        {
277          if (Xinha.is_ie)
278          {
279            var innerText = "\n" + root.innerHTML.replace(/^[\n\r]*/,'').replace(/\s+$/,'') + '\n' + indent;
280          }
281          else
282          {
283            var innerText = (root.hasChildNodes()) ? root.firstChild.nodeValue : '';
284          }
285          html += innerText + '</'+root_tag+'>' + ((Xinha.is_ie) ? '\n' : '');
286        }
287      }
288      else if (root_tag == "pre")
289      {
290        html += ((Xinha.is_ie) ? '\n' : '') + root.innerHTML.replace(/<br>/g,'\n') + '</'+root_tag+'>';
291      }
292      else
293      {
294        for ( i = root.firstChild; i; i = i.nextSibling )
295        {
296          if ( !containsBlock && i.nodeType == 1 && Xinha.isBlockElement(i) )
297          {
298            containsBlock = true;
299          }
300          html += Xinha.getHTMLWrapper(i, true, editor, indent + '  ');
301        }
302        if ( outputRoot && !closed )
303        {
304          html += (((Xinha.isBlockElement(root) && containsBlock) || root_tag == 'head' || root_tag == 'html') ? ('\n' + indent) : '') + "</" + root.tagName.toLowerCase() + ">";
305        }
306      }
307    break;
308
309    case 3: // Node.TEXT_NODE
310      if ( /^script|noscript|style$/i.test(root.parentNode.tagName) )
311      {
312        html = root.data;
313      }
314      else if(root.data.trim() == '')
315      {
316        if(root.data)
317        {
318          html = ' ';
319        }
320        else
321        {
322          html = '';
323        }
324      }
325      else
326      {
327        html = Xinha.htmlEncode(root.data);
328      }
329    break;
330
331    case 8: // Node.COMMENT_NODE
332      html = "<!--" + root.data + "-->";
333    break;
334  }
335  return html;
336};
337
338
339
Note: See TracBrowser for help on using the repository browser.