X-Git-Url: http://git.osdn.jp/view?a=blobdiff_plain;f=0.6.x%2Fjs%2F02_dom%2F09_XDomParser.js;fp=0.6.x%2Fjs%2F02_dom%2F09_XDomParser.js;h=648de81b528ba3b0b40f3a8f7cb5e1e825661e63;hb=541618cd9485cb041f46177d6869cc6d618ed1da;hp=0000000000000000000000000000000000000000;hpb=c54d46848e396b9208daa4d4cb535b4b461cb019;p=pettanr%2FclientJs.git diff --git a/0.6.x/js/02_dom/09_XDomParser.js b/0.6.x/js/02_dom/09_XDomParser.js new file mode 100644 index 0000000..648de81 --- /dev/null +++ b/0.6.x/js/02_dom/09_XDomParser.js @@ -0,0 +1,396 @@ + +/* + * Original code by Erik John Resig (ejohn.org) + * http://ejohn.org/blog/pure-javascript-html-parser/ + * + */ + +X_Dom_Parser = { + CHARS : { + A:1,B:1,C:1,D:1,E:1,F:1,G:1,H:1,I:1,J:1,K:1,L:1,M:1,N:1,O:1,P:1,Q:1,R:1,S:1,T:1,U:1,V:1,W:1,X:1,Y:1,Z:1, + a:2,b:2,c:2,d:2,e:2,f:2,g:2,h:2,i:2,j:2,k:2,l:2,m:2,n:2,o:2,p:2,q:2,r:2,s:2,t:2,u:2,v:2,w:2,x:2,y:2,z:2, + '0' : 4, '1' : 4, '2' : 4, '3' : 4, '4' : 4, '5' : 4, '6' : 4, '7' : 4, '8' : 4, '9' : 4, + + '\t' : 16, '\r\n' : 16, '\r' : 16, '\n' : 16, '\f' : 16, '\b' : 16, ' ' : 16 + }, + alphabets : 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', + whiteSpace : '\t\r\n\f\b ', + + // Empty Elements - HTML 4.01 + empty : X.Dom.DTD.EMPTY, + + // Block Elements - HTML 4.01 + block : {ADDRESS:1,APPLET:1,BLOCKQUOTE:1,BUTTON:1,CENTER:1,DD:1,DEL:1,DIR:1,DIV:1,DL:1,DT:1,FIELDSET:1,FORM:1,FRAMESET:1,HR:1,IFRAME:1,INS:1, + ISINDEX:1,LI:1,MAP:1,MENU:1,NOFRAMES:1,NOSCRIPT:1,OBJECT:1,OL:1,P:1,PRE:1,SCRIPT:1,TABLE:1,TBODY:1,TD:1,TFOOT:1,TH:1,THEAD:1,TR:1,UL:1 }, + // Inline Elements - HTML 4.01 + inline : {A:1,ABBR:1,ACRONYM:1,APPLET:1,B:1,BASEFONT:1,BDO:1,BIG:1,BR:1,BUTTON:1,CITE:1,CODE:1,DEL:1,DFN:1,EM:1,FONT:1,I:1,IFRAME:1,IMG:1, + INPUT:1,INS:1,KBD:1,LABEL:1,MAP:1,OBJECT:1,Q:1,S:1,SAMP:1,SCRIPT:1,SELECT:1,SMALL:1,SPAN:1,STRIKE:1,STRONG:1,SUB:1,SUP:1,TEXTAREA:1,TT:1,U:1,VAR:1}, + // Elements that you can, intentionally, leave open + // (and which close themselves) + closeSelf : {OLGROUP:1,DD:1,DT:1,LI:1,OPTIONS:1,P:1,TBODY:1,TD:1,TFOOT:1,TH:1,THEAD:1,TR:1}, // add tbody + + sisters : { + TH : { TD : 1 }, + TD : { TH : 1 }, + DT : { DD : 1 }, + DD : { DT : 1 }, + COLGROUP : { CAPTION : 1 }, + THEAD : { CAPTION : 1, COLGROUP : 1 }, + TFOOT : { CAPTION : 1, COLGROUP : 1, THEAD : 1, TBODY : 1 }, + TBODY : { CAPTION : 1, COLGROUP : 1, THEAD : 1, TFOOT : 1 } + }, + /* + * http://www.tohoho-web.com/html/tbody.htm + * HTML4.01では、ヘッダとフッタを先読みして表示するために、 よりも の方を先に記述しなくてはならないと定義されています。 + * IE5.0 などでは HEAD → BODY → FOOT の順に表示するのですが、 + * に未対応の古いブラウザでは、HEAD → FOOT → BODY の順に表示されてしまいます。 + * また、HTML5 では、 と の順番はどちらでもよいことになりました。 + */ + + // Attributes that have their values filled in disabled="disabled" + fillAttrs : X.Dom.Attr.noValue, //{checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1}; + + // Special Elements (can contain anything) + special : { SCRIPT : 1, STYLE : 1, PLAINTEXT : 1, XMP : 1, TEXTAREA : 1 }, + + exec : function( html, handler, async ){ + var special = X_Dom_Parser.special, + //plainText = X_Dom_Parser.plainText, + startTime = async && X.getTime(), + _parseStartTag = X_Dom_Parser._parseStartTag, + _parseEndTag = X_Dom_Parser._parseEndTag, + stack = async ? async[ 1 ] : [], + lastHtml = html, + chars, last, text, index; + + while ( html ) { + chars = true; + last = stack[ stack.length - 1 ]; + + // Make sure we're not in a script or style element + if ( last && special[ last ] === 1 ) { + if( 0 <= ( index = html.toUpperCase().indexOf( '") ) ) { + handler.comment( html.substring( 4, index ) ); + html = html.substring( index + 3 ); + chars = false; + }; + + // end tag + } else if ( html.indexOf("' || ( empty = html.substr( i, 2 ) === '/>' ) ) && + ( ( tagName = html.substring( start, i ) ) && ( phase = 9 ) ); + break; + case 3 : // 属性名の開始を待つ + ( alphabets[ chr ] & 3 ) ? + ( ++phase && ( start = i ) ) : + ( chr === '>' || ( empty = html.substr( i, 2 ) === '/>' ) ) && + ( phase = 9 ); + break; + case 4 : // 属性名の終わりを待つ + chr === '=' ? + ( ( phase = 6 ) && ( attrName = html.substring( start, i ) ) ) : + ( whiteSpace[ chr ] & 16 ) && + ( ( phase = 5 ) && ( attrName = html.substring( start, i ) ) ); + break; + case 5 : // 属性の = または次の属性または htmlタグの閉じ + ( whiteSpace[ chr ] & 16 ) ?// ie4 未対応の属性には cite = http:// となる + 1 : + ( alphabets[ chr ] & 3 ) ? + ( ( phase = 4 ) && ( attrs[ attrs.length ] = attrName ) && ( start = i ) ) : + chr === '=' ? + ( phase = 6 ) : + ( chr === '>' || ( empty = html.substr( i, 2 ) === '/>' ) ) && + ( ( phase = 9 ) && ( attrs[ attrs.length ] = attrName ) ); + break; + case 6 : // 属性値の開始 quot を待つ + ( chr === '"' || chr === "'" ) ? + ( ( phase = 7 ) && ( quot = chr ) && ( start = i + 1 ) ): + !( whiteSpace[ chr ] & 16 ) && + ( ( phase = 8 ) && ( start = i ) ); // no quot + break; + case 7 : //属性値の閉じ quot を待つ + !escape && ( chr === quot ) && ( phase = 3 ) && saveAttr( attrs, attrName, html.substring( start, i ) ); + break; + case 8 : //閉じ quot のない属性の値 + ( whiteSpace[ chr ] & 16 ) ? + ( ( phase = 3 ) && saveAttr( attrs, attrName, html.substring( start, i ) ) ) : + ( chr === '>' ) ? + ( ( phase = 9 ) && saveAttr( attrs, attrName, html.substring( start, i ) ) ) : + !escape && !uri[ attrName ] && ( empty = html.substr( i, 2 ) === '/>' ) && // attr の val が uri で / で終わりかつ、未対応属性の場合 + ( phase = 9 ); + break; + }; + escape = chr === '\\' && !escape; // \\\\ is not escape for " + ++i; + }; + if( phase === 9 ){ + if( empty ) ++i; + if( X_Dom_Parser.parseStartTag( stack, last, handler, tagName.toUpperCase(), attrs, empty, i ) === false ) return false; + return i; + }; + return 0; // error + }, + + _parseEndTag : function( stack, handler, html ){ + var alphabets = X_Dom_Parser.CHARS, + whiteSpace = X_Dom_Parser.CHARS, + phase = 0, + l = html.length, + i = 0, + tagName, + chr, start; + + while( i < l && phase < 9 ){ + chr = html.charAt( i ); + switch( phase ){ + case 0 : + html.substr( i, 2 ) === '' ) && ( phase = 9 ); + ( phase !== 2 ) && ( tagName = html.substring( start, i ) ); + break; + case 3 : // タグの終了を待つ + chr === '>' && ( phase = 9 ); + break; + }; + ++i; + }; + if( phase === 9 ){ + X_Dom_Parser.parseEndTag( stack, handler, tagName.toUpperCase() ); + return i; + }; + return 0; // error + }, + + saveAttr : function( attrs, name, value ){ + name = name.toLowerCase(); + value = X_Dom_Parser.fillAttrs[ name ] === 1 ? name : value; + attrs[ attrs.length ] = { + name : name, + value : value, + escaped : + value.indexOf( '"' ) !== -1 ? + value.split( '"' ).join( '\\"' ).split( '\\\\"' ).join( '\\"' ) : + value + }; + }, + + parseStartTag : function( stack, last, handler, tagName, attrs, unary, index ) { + var inline = X_Dom_Parser.inline, + parseEndTag = X_Dom_Parser.parseEndTag, + sisters = X_Dom_Parser.sisters; + if ( X_Dom_Parser.block[ tagName ] === 1 ) { + while ( last && inline[ last ] === 1 ) { + parseEndTag( stack, handler, last ); + last = stack[ stack.length - 1 ]; + }; + }; + last && X_Dom_Parser.closeSelf[ tagName ] === 1 && ( last === tagName || ( sisters[ tagName ] && sisters[ tagName ][ last ] === 1 ) ) && parseEndTag( stack, handler, last ); + unary = unary || X_Dom_Parser.empty[ tagName ]; + !unary && ( stack[ stack.length ] = tagName ); + + return handler.start( tagName, attrs, unary, index ); + }, + + parseEndTag : function( stack, handler, tagName ) { + var pos = 0, i = stack.length; + // If no tag name is provided, clean shop + + // Find the closest opened tag of the same type + if ( tagName ) + for ( pos = i; 0 <= pos; ) + if ( stack[ --pos ] === tagName ) + break; + + if ( 0 <= pos ) { + // Close all the open elements, up the stack + for ( ; pos < i; ) + handler.end( stack[ --i ] ); + + // Remove the open elements from the stack + stack.length = pos; + }; + } + +}; + +X.Dom._htmlStringToXNode = { + flat : null, + nest : [], + err : function( html ){ + X.Dom._htmlStringToXNode.flat.length = 0; + X.Dom._htmlStringToXNode.ignoreError !== true && X.Logger.warn( 'X_Dom_Parser() error ' + html ); + }, + start : function( tagName, attrs, noChild, length ){ + var xnode, + nest = X.Dom._htmlStringToXNode.nest, + flat = X.Dom._htmlStringToXNode.flat, + l = nest.length, + attr, name, i, _attrs; //, toIndex; + if( l ){ + xnode = nest[ l - 1 ].create( tagName ); + } else { + xnode = flat[ flat.length ] = X.Dom.Node.create( tagName ); + }; + if( !noChild ) nest[ l ] = xnode; + if( i = attrs.length ){ + _attrs = {}; + for( ; i; ){ + if( attr = attrs[ --i ] ){ + if( typeof attr === 'string' ){ + name = attr; + _attrs[ name ] = true; + } else { + name = attr.name; + _attrs[ name ] = attr.escaped; + }; + }; + }; + xnode.attr( _attrs ); + }; + }, + end : function(){ + 0 < X.Dom._htmlStringToXNode.nest.length && ( --X.Dom._htmlStringToXNode.nest.length ); + }, + chars : function( text ){ + if( X.Dom._htmlStringToXNode.nest.length ){ + X.Dom._htmlStringToXNode.nest[ X.Dom._htmlStringToXNode.nest.length - 1 ].createText( text ); + } else { + X.Dom._htmlStringToXNode.flat[ X.Dom._htmlStringToXNode.flat.length ] = X.Dom.Node.createText( text ); + }; + }, + comment : X.emptyFunction +}; + +X.Dom.parse = function( html, ignoreError ){ + var worker = X.Dom._htmlStringToXNode, ret; + worker.flat = []; + worker.nest.length = 0; + worker.ignoreError = ignoreError; + X_Dom_Parser.exec( html, worker ); + ret = worker.flat; + delete worker.flat; + return ret; +}; + +X.Dom._asyncHtmlStringToXNode = { + err : function( html ){ + X.Dom._htmlStringToXNode.err( html ); + this.asyncDispatch( 0, { type : X.Event.ERROR } ); + }, + start : X.Dom._htmlStringToXNode.start, + end : X.Dom._htmlStringToXNode.end, + chars : X.Dom._htmlStringToXNode.chars, + comment : X.emptyFunction, + + progress : function( pct ){ + this.asyncDispatch( 0, { type : X.Event.PROGRESS, percent : pct } ); + }, + complete : function(){ + var ret = X.Dom._htmlStringToXNode.flat; + delete X.Dom._htmlStringToXNode.flat; + this.asyncDispatch( 0, { type : X.Event.SUCCESS, xnodes : ret } ); + } +}; + +X.Dom.asyncParse = function( html, ignoreError ){ + var dispatcher = X.Class._override( new X.EventDispatcher(), X.Dom._asyncHtmlStringToXNode ), + worker = X.Dom._htmlStringToXNode; + dispatcher.listenOnce( X.Event.SUCCESS, dispatcher, dispatcher.kill ); + worker.flat = []; + worker.nest.length = 0; + worker.ignoreError = ignoreError; + X_Dom_Parser.exec( html, dispatcher, [ html.length, [] ] ); + return dispatcher; +};