OSDN Git Service

AIのDB連携機能を実装中。
[chnosproject/AI004.git] / aihtml.js
1 function AI_HTMLParser(env){
2         this.env = env;
3         this.mainString = "";
4         //[[linkURL, dispString],...]
5         this.linkList = new Array();
6 }
7 AI_HTMLParser.prototype = {
8         keywordList0:[
9                 //タグ抽出用
10                 "</",
11                 "<",
12                 ">",
13         ],
14         keywordList1:[
15                 //タグ属性抽出用
16                 "\\\"",
17                 "\"",
18                 "'",
19                 "=",
20                 " ",
21         ],
22         loadText: function(src){
23                 if(!src){
24                         return;
25                 }
26                 this.mainString = "";
27                 this.linkList = new Array();
28         
29                 src.replaceAll("\n", "");
30                 var a = src.splitByArraySeparatorSeparatedLong(this.keywordList0);
31                 var currentTag;
32                 var s;
33                 var mode = 0;
34                 var appendDisabled = false;
35                 var linkInfo = null;
36                 var attr;
37                 //0:何が来てもOK
38                 //1:タグの中身か閉じ括弧
39                 for(var i = 0, iLen = a.length; i < iLen; i++){
40                         s = a[i];
41                         if(mode == 0){
42                                 if(s == "<"){
43                                         //開始タグの開始
44                                         mode = 1;
45                                         currentTag = a[i + 1].trim().split(" ")[0];
46                                         if(currentTag == "br"){
47                                                 //<br />
48                                                 this.mainString += "\n";
49                                                 //this.env.debug("<" + currentTag + " />\n");
50                                         } else if(currentTag == "script" || currentTag == "style"){
51                                                 //内部を無視するタグ
52                                                 appendDisabled = true;
53                                         } else if(currentTag == "a"){
54                                                 //リンク
55                                                 linkInfo = ["", ""];
56                                                 attr = this.getAttributesFromTagString(a[i + 1].substring(currentTag.length));
57                                                 linkInfo[0] = attr.search2DObject(0, 1, "href");
58                                         } else{
59                                                 //this.env.debug("<" + currentTag + ">\n");
60                                         }
61                                 } else if(s == "</"){
62                                         //終了タグの開始
63                                         mode = 1;
64                                         currentTag = a[i + 1].trim().split(" ")[0];
65                                         //this.env.debug("</" + currentTag + ">\n");
66                                         if(currentTag == "script" || currentTag == "style"){
67                                                 appendDisabled = false;
68                                         } else if(currentTag == "a" && linkInfo){
69                                                 this.linkList.push(linkInfo);
70                                                 linkInfo = null;
71                                         }
72                                 } else if(!appendDisabled){
73                                         this.mainString += s;
74                                         if(linkInfo){
75                                                 linkInfo[1] += s;
76                                         }
77                                 }
78                         } else if(mode == 1){
79                                 if(s == ">"){
80                                         mode = 0;
81                                 }
82                         }
83                 }
84                 return this.mainString;
85         },
86         getAttributesFromTagString: function(tagStr){
87                 var a = tagStr.splitByArraySeparatorSeparatedLong(this.keywordList1);
88                 var s;
89                 var t = [undefined, ""];
90                 var mode = 0;
91                 var retArray = new Array();
92                 var inStringLiteral = false;
93                 for(var i = 0, iLen = a.length; i < iLen; i++){
94                         s = a[i];
95                         if(s == " " || s == "\\\"" || s == "'"){
96                                 if(mode == 1 && inStringLiteral){
97                                         t[1] += s;
98                                 }
99                         } else if(s == "\""){
100                                 inStringLiteral = !inStringLiteral;
101                                 if(!inStringLiteral){
102                                         retArray.push(t);
103                                         t = [undefined, ""];
104                                         mode = 0;
105                                 }
106                         } else if(s == "="){
107                                 if(mode == 1 && inStringLiteral){
108                                         t[1] += s;
109                                 }
110                                 mode = 1;
111                         } else{
112                                 if(mode == 0){
113                                         //左辺
114                                         t[0] = s;
115                                 } else if(mode == 1){
116                                         //右辺
117                                         t[1] += s;
118                                 }
119                         }
120                 }
121                 //console.log(retArray);
122                 return retArray;
123         },
124 }