3 * Original code by Erik John Resig (ejohn.org)
\r
4 * http://ejohn.org/blog/pure-javascript-html-parser/
\r
9 alphabets : 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz',
\r
10 whiteSpace : '\t\r\n\f\b ',
\r
12 // Empty Elements - HTML 4.01
\r
13 empty : X.Dom.DTD.EMPTY,
\r
15 // Block Elements - HTML 4.01
\r
16 block : {address:1,applet:1,blockquote:1,button:1,center:1,dd:1,del:1,dir:1,div:1,dl:1,dt:1,fieldset:1,form:1,frameset:1,hr:1,iframe:1,ins:1,isindex:1,li:1,map:1,menu:1,noframes:1,noscript:1,object:1,ol:1,p:1,pre:1,script:1,table:1,tbody:1,td:1,tfoot:1,th:1,thead:1,tr:1,ul:1},
\r
18 // Inline Elements - HTML 4.01
\r
19 inline : {a:1,abbr:1,acronym:1,applet:1,b:1,basefont:1,bdo:1,big:1,br:1,button:1,cite:1,code:1,del:1,dfn:1,em:1,font:1,i:1,iframe:1,img:1,input:1,ins:1,kbd:1,label:1,map:1,object:1,q:1,s:1,samp:1,script:1,select:1,small:1,span:1,strike:1,strong:1,sub:1,sup:1,textarea:1,tt:1,u:1,'var':1},
\r
21 // Elements that you can, intentionally, leave open
\r
22 // (and which close themselves)
\r
23 closeSelf : {colgroup:1,dd:1,dt:1,li:1,options:1,p:1,tbody:1,td:1,tfoot:1,th:1,thead:1,tr:1}, // add tbody
\r
26 plainText : { plaintext : 1, xmp : 1, textarea : 1 },
\r
33 colgroup : { caption : 1 },
\r
34 thead : { caption : 1, colgroup : 1 },
\r
35 tfoot : { caption : 1, colgroup : 1, thead : 1, tbody : 1 },
\r
36 tbody : { caption : 1, colgroup : 1, thead : 1, tfoot : 1 }
\r
39 * http://www.tohoho-web.com/html/tbody.htm
\r
40 * HTML4.01では、ヘッダとフッタを先読みして表示するために、<tbody> よりも <tfoot> の方を先に記述しなくてはならないと定義されています。
\r
41 * IE5.0 などでは HEAD → BODY → FOOT の順に表示するのですが、
\r
42 * <tfoot> に未対応の古いブラウザでは、HEAD → FOOT → BODY の順に表示されてしまいます。
\r
43 * また、HTML5 では、<tfoot> と <tbody> の順番はどちらでもよいことになりました。
\r
46 // Attributes that have their values filled in disabled="disabled"
\r
47 fillAttrs : X.Dom.Attr.noValue, //{checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};
\r
49 // Special Elements (can contain anything)
\r
50 special : {script:1,style:1,plaintext : 1, xmp : 1, textarea : 1},
\r
52 exec : function( html, handler, async ){
\r
53 var special = X.Dom.Parser.special,
\r
54 plainText = X.Dom.Parser.plainText,
\r
55 startTime = async && X.getTime(),
\r
56 _parseStartTag = X.Dom.Parser._parseStartTag,
\r
57 _parseEndTag = X.Dom.Parser._parseEndTag,
\r
58 stack = async ? async[ 1 ] : [],
\r
60 chars, last, text, index;
\r
64 last = stack[ stack.length - 1 ];
\r
66 // Make sure we're not in a script or style element
\r
67 if ( last && special[ last.toLowerCase() ] === 1 ) {
\r
68 if( 0 <= ( index = html.toLowerCase().indexOf( '</' + last.toLowerCase() ) ) ){
\r
69 handler.chars( html.substring( 0, index ) );
\r
70 if( index = _parseEndTag( stack, handler, html ) ){
\r
71 html = html.substring( index );
\r
73 handler.chars( html );
\r
77 handler.chars( html );
\r
82 if ( html.indexOf("<!--") === 0 ) {
\r
83 if ( 0 < ( index = html.indexOf("-->") ) ) {
\r
84 handler.comment( html.substring( 4, index ) );
\r
85 html = html.substring( index + 3 );
\r
90 } else if ( html.indexOf("</") === 0 ) {
\r
91 if ( 2 < ( index = _parseEndTag( stack, handler, html ) ) ) {
\r
92 html = html.substring( index );
\r
97 } else if ( html.indexOf("<") === 0 ) {
\r
98 if( index = _parseStartTag( stack, last, handler, html ) ){
\r
99 html = html.substring( index );
\r
102 if( index === false ){
\r
108 index = html.indexOf("<");
\r
110 text = index < 0 ? html : html.substring( 0, index );
\r
111 html = index < 0 ? '' : html.substring( index );
\r
113 handler.chars( text );
\r
118 if( html === lastHtml ){
\r
119 handler.err( html );
\r
123 if( async && startTime + 15 <= X.getTime() && html ){
\r
124 handler.progress( 1 - html.length / async[ 0 ] );
\r
125 X.Timer.once( 0, X.Dom.Parser.exec, [ html, handler, async ] );
\r
132 // Clean up any remaining tags
\r
133 X.Dom.Parser.parseEndTag( stack, handler );
\r
135 async && handler.complete();
\r
138 _parseStartTag : function( stack, last, handler, html ){
\r
139 var alphabets = X.Dom.Parser.alphabets,
\r
140 whiteSpace = X.Dom.Parser.whiteSpace,
\r
141 saveAttr = X.Dom.Parser.saveAttr,
\r
142 uri = X.Dom.DTD.ATTR_VAL_IS_URI,
\r
148 chr, start, attrName, quot, escape;
\r
150 while( i < l && phase < 9 ){
\r
151 chr = html.charAt( i );
\r
154 chr === '<' && ( ++phase );
\r
156 case 1 : // タグ名の開始を待つ
\r
157 alphabets.indexOf( chr ) !== -1 && ( ++phase && ( start = i ) );
\r
159 case 2 : // タグ名の終わりの空白文字を待つ
\r
160 whiteSpace.indexOf( chr ) !== -1 ?
\r
161 ( ++phase && ( tagName = html.substring( start, i ) ) ) :
\r
162 ( chr === '>' || ( empty = html.substr( i, 2 ) === '/>' ) ) &&
\r
163 ( ( tagName = html.substring( start, i ) ) && ( phase = 9 ) );
\r
165 case 3 : // 属性名の開始を待つ
\r
166 alphabets.indexOf( chr ) !== -1 ?
\r
167 ( ++phase && ( start = i ) ) :
\r
168 ( chr === '>' || ( empty = html.substr( i, 2 ) === '/>' ) ) &&
\r
171 case 4 : // 属性名の終わりを待つ
\r
173 ( ( phase = 6 ) && ( attrName = html.substring( start, i ) ) ) :
\r
174 whiteSpace.indexOf( chr ) !== -1 &&
\r
175 ( ( phase = 5 ) && ( attrName = html.substring( start, i ) ) );
\r
177 case 5 : // 属性の = または次の属性または htmlタグの閉じ
\r
178 whiteSpace.indexOf( chr ) !== -1 ?// ie4 未対応の属性には cite = http:// となる
\r
180 alphabets.indexOf( chr ) !== -1 ?
\r
181 ( ( phase = 4 ) && ( attrs[ attrs.length ] = attrName ) && ( start = i ) ) :
\r
184 ( chr === '>' || ( empty = html.substr( i, 2 ) === '/>' ) ) &&
\r
185 ( ( phase = 9 ) && ( attrs[ attrs.length ] = attrName ) );
\r
187 case 6 : // 属性値の開始 quot を待つ
\r
188 ( chr === '"' || chr === "'" ) ?
\r
189 ( ( phase = 7 ) && ( quot = chr ) && ( start = i + 1 ) ):
\r
190 whiteSpace.indexOf( chr ) === -1 &&
\r
191 ( ( phase = 8 ) && ( start = i ) ); // no quot
\r
193 case 7 : //属性値の閉じ quot を待つ
\r
194 !escape && ( chr === quot ) && ( phase = 3 ) && saveAttr( attrs, attrName, html.substring( start, i ) );
\r
196 case 8 : //閉じ quot のない属性の値
\r
197 whiteSpace.indexOf( chr ) !== -1 ?
\r
198 ( ( phase = 3 ) && saveAttr( attrs, attrName, html.substring( start, i ) ) ) :
\r
200 ( ( phase = 9 ) && saveAttr( attrs, attrName, html.substring( start, i ) ) ) :
\r
201 ( !escape && uri.indexOf( attrName ) === -1 && html.substr( i, 2 ) === '\/>' ) && // attr の val が uri で / で終わりかつ、未対応属性の場合
\r
205 escape = chr === '\\' && !escape; // \\\\ is not escape for "
\r
209 if( X.Dom.Parser.parseStartTag( stack, last, handler, tagName, attrs, empty, i ) === false ) return false;
\r
215 _parseEndTag : function( stack, handler, html ){
\r
216 var alphabets = X.Dom.Parser.alphabets,
\r
217 whiteSpace = X.Dom.Parser.whiteSpace,
\r
224 while( i < l && phase < 9 ){
\r
225 chr = html.charAt( i );
\r
228 html.substr( i, 2 ) === '</' && ( ++phase && ++i );
\r
230 case 1 : // タグ名の開始を待つ
\r
231 alphabets.indexOf( chr ) !== -1 && ( ++phase && ( start = i ) );
\r
233 case 2 : // タグ名の終わりの空白文字を待つ
\r
234 whiteSpace.indexOf( chr ) !== -1 && ( ++phase );
\r
235 ( chr === '>' ) && ( phase = 9 );
\r
236 ( phase !== 2 ) && ( tagName = html.substring( start, i ) );
\r
238 case 3 : // タグの終了を待つ
\r
239 chr === '>' && ( phase = 9 );
\r
245 X.Dom.Parser.parseEndTag( stack, handler, tagName );
\r
251 saveAttr : function( attrs, name, value ){
\r
252 name = name.toLowerCase();
\r
253 value = X.Dom.Parser.fillAttrs[ name ] === 1 ? name : value;
\r
254 attrs[ attrs.length ] = {
\r
258 value.indexOf( '"' ) !== -1 ?
\r
259 value.split( '"' ).join( '\\"' ).split( '\\\\"' ).join( '\\"' ) :
\r
264 parseStartTag : function( stack, last, handler, tagName, attrs, unary, index ) {
\r
265 var tagLower = tagName.toLowerCase(),
\r
266 inline = X.Dom.Parser.inline,
\r
267 parseEndTag = X.Dom.Parser.parseEndTag,
\r
268 sisters = X.Dom.Parser.sisters;
\r
269 if ( X.Dom.Parser.block[ tagLower ] === 1 ) {
\r
270 while ( last && inline[ last.toLowerCase() ] === 1 ) {
\r
271 parseEndTag( stack, handler, last );
\r
272 last = stack[ stack.length - 1 ];
\r
275 X.Dom.Parser.closeSelf[ tagLower ] === 1 && ( last === tagName || ( sisters[ tagLower ] && sisters[ tagLower ][ last.toLowerCase() ] === 1 ) ) && parseEndTag( stack, handler, last );
\r
276 unary = X.Dom.Parser.empty[ tagLower ] === 1 || !!unary;
\r
277 !unary && ( stack[ stack.length ] = tagName );
\r
279 return handler.start( tagName, attrs, unary, index );
\r
282 parseEndTag : function( stack, handler, tagName ) {
\r
283 var pos = 0, i = stack.length;
\r
284 // If no tag name is provided, clean shop
\r
286 // Find the closest opened tag of the same type
\r
288 for ( pos = i; 0 <= pos; )
\r
289 if ( stack[ --pos ] === tagName )
\r
293 // Close all the open elements, up the stack
\r
295 handler.end( stack[ --i ] );
\r
297 // Remove the open elements from the stack
\r
298 stack.length = pos;
\r
304 X.Dom._htmlStringToXNode = {
\r
307 err : function( html ){
\r
308 X.Dom._htmlStringToXNode.flat.length = 0;
\r
309 X.Dom._htmlStringToXNode.ignoreError !== true && X.Notification.warn( 'X.Dom.Parser() error ' + html );
\r
311 start : function( tagName, attrs, noChild, length ){
\r
313 nest = X.Dom._htmlStringToXNode.nest,
\r
314 flat = X.Dom._htmlStringToXNode.flat,
\r
316 attr, name, i, _attrs; //, toIndex;
\r
318 xnode = nest[ l - 1 ].create( tagName );
\r
320 xnode = flat[ flat.length ] = X.Dom.Node.create( tagName );
\r
322 if( !noChild ) nest[ l ] = xnode;
\r
323 if( i = attrs.length ){
\r
326 if( attr = attrs[ --i ] ){
\r
327 if( typeof attr === 'string' ){
\r
329 _attrs[ name ] = true;
\r
332 _attrs[ name ] = attr.escaped;
\r
336 xnode.attr( _attrs );
\r
340 0 < X.Dom._htmlStringToXNode.nest.length && ( --X.Dom._htmlStringToXNode.nest.length );
\r
342 chars : function( text ){
\r
343 if( X.Dom._htmlStringToXNode.nest.length ){
\r
344 X.Dom._htmlStringToXNode.nest[ X.Dom._htmlStringToXNode.nest.length - 1 ].createText( text );
\r
346 X.Dom._htmlStringToXNode.flat[ X.Dom._htmlStringToXNode.flat.length ] = X.Dom.Node.createText( text );
\r
349 comment : X.emptyFunction
\r
352 X.Dom.parse = function( html, ignoreError ){
\r
353 var worker = X.Dom._htmlStringToXNode, ret;
\r
355 worker.nest.length = 0;
\r
356 worker.ignoreError = ignoreError;
\r
357 X.Dom.Parser.exec( html, worker );
\r
359 delete worker.flat;
\r
363 X.Dom._asyncHtmlStringToXNode = {
\r
364 err : function( html ){
\r
365 X.Dom._htmlStringToXNode.err( html );
\r
366 this.asyncDispatch( 0, { type : X.Event.ERROR } );
\r
368 start : X.Dom._htmlStringToXNode.start,
\r
369 end : X.Dom._htmlStringToXNode.end,
\r
370 chars : X.Dom._htmlStringToXNode.chars,
\r
371 comment : X.emptyFunction,
\r
373 progress : function( pct ){
\r
374 this.asyncDispatch( 0, { type : X.Event.PROGRESS, progress : pct } );
\r
376 complete : function(){
\r
377 var ret = X.Dom._htmlStringToXNode.flat;
\r
378 delete X.Dom._htmlStringToXNode.flat;
\r
379 this.asyncDispatch( 0, { type : X.Event.SUCCESS, xnodes : ret } );
\r
383 X.Dom.asyncParse = function( html, ignoreError ){
\r
384 var dispatcher = X.Class._override( new X.EventDispatcher(), X.Dom._asyncHtmlStringToXNode ),
\r
385 worker = X.Dom._htmlStringToXNode;
\r
386 dispatcher.listenOnce( X.Event.SUCCESS, dispatcher, dispatcher.kill );
\r
388 worker.nest.length = 0;
\r
389 worker.ignoreError = ignoreError;
\r
390 X.Dom.Parser.exec( html, dispatcher, [ html.length, [] ] );
\r