OSDN Git Service

027c130263e2b06d7aeeef66de010f19d389441b
[chnosproject/AI004.git] / aiwrcgnz.js
1 function AI_WordRecognition(env){
2         this.env = env;
3         this.wordListCache = null;
4         this.wordListCacheLastModifiedDate = new Date();
5 }
6 AI_WordRecognition.prototype = {
7         slideLookUpCandidateWordByHistory: function(input){
8                 var h = this.env.input.historyList;
9                 var cList = new Array();
10                 for(var i = 0, iLen = input.length; i < iLen; i++){
11                         //input character loop
12                         var iStr = input.substr(i);
13                         var cLen = 0;
14                         var cStr = "";
15                         for(var j = 0, jLen = h.length; j < jLen; j++){
16                                 //history entry loop
17                                 var hStrBase = h[j];
18                                 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
19                                         //history character loop
20                                         var hStr = hStrBase.substr(k);
21                                         var m = hStr.compareLeftHand(iStr);
22                                         if(m > cLen && m != iStr.length){
23                                                 cLen = m;
24                                         }
25                                 }
26                         }
27                         if(cLen > 0){
28                                 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen).trim())).wordCount++;
29                         }
30                 }
31                 //フィルター
32                 this.filterCandidateWordList00(cList);
33                 this.filterCandidateWordList01(cList, 2);
34                 this.filterCandidateWordList03(cList);
35                 //追加
36                 this.mergeCandidateWordList(cList);
37                 
38         },
39         appendCandidateWordList: function(strTag){
40                 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
41                 if(s){
42                         s.wordCount++;
43                 } else{
44                         strTag.wordCount = 1;
45                         this.env.memory.appendMemoryTag(strTag);
46                 }
47         },
48         getCandidateWordTagByString: function(str){
49                 return this.env.memory.candidateWordList.isIncluded(str, function(a, b){ return (a.str == b); });
50         },
51         mergeCandidateWordList: function(strTagList){
52                 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
53                         this.appendCandidateWordList(strTagList[i]);
54                 }
55         },
56         cleanCandidateWordList: function(){
57                 //不要な候補単語を削除
58                 //出現回数の少ない候補単語
59                 //単語度が1未満の単語(暫定)
60                 var iLen = this.env.memory.candidateWordList.length;
61                 for(var i = 0; i < iLen; i++){
62                         if(this.env.memory.candidateWordList[i].wordCount < 10){
63                                 this.env.debug("Too small wordCount of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
64                                 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
65                                 i--;
66                                 iLen--;
67                                 continue;
68                         }
69                         if(this.env.memory.candidateWordList[i].wordLevel < 1){
70                                 this.env.debug("Too small wordLevel of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
71                                 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
72                                 i--;
73                                 iLen--;
74                                 continue;
75                         }
76                 }
77                 this.env.memory.candidateWordListLastCleanedDate = new Date();
78         },
79         debugShowCandidateWordList: function(){
80                 var c = this.env.memory.candidateWordList.copy();
81                 c.reverse();
82                 this.env.debug("candidateWordList:" + c.length + "\n #:wCount:level:str\n");
83                 
84                 for(var i = 0, iLen = c.length; i < iLen; i++){
85                         this.env.debug((i + 1) + ":\t" + c[i].wordCount.toString() + ":\t" + c[i].wordLevel.toString() + ":\t" + c[i].str + "\n");
86                 }
87                 this.env.debug("candidateWordList end\n");
88         },
89         filterCandidateWordList00:function(cList){
90                 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
91                 //cList内の候補単語に対して、フィルターをかける。
92                 var iLen = cList.length;
93                 if(iLen < 1){
94                         return;
95                 }
96                 var baseStrTag = cList[0];
97                 for(var i = 1; i < iLen; i++){
98                         var c = cList[i];
99                         if(baseStrTag.str.indexOf(c.str) != -1){
100                                 //c.strはbaseStrTag.strに含まれている
101                                 if(baseStrTag.wordCount == c.wordCount){
102                                         //かつ出現回数が等しいので不要な単語
103                                         //後で削除する。出現回数を0にマークする。
104                                         c.wordCount = 0;
105                                 }
106                         }
107                         if(c.wordCount > 0){
108                                 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
109                                 var baseStrTag = c;
110                         }
111                 }
112                 //削除処理
113                 for(var i = 1; i < iLen; i++){
114                         var c = cList[i];
115                         if(c.wordCount == 0){
116                                 cList.removeByIndex(i);
117                                 i--;
118                                 iLen--;
119                         }
120                 }
121         },
122         filterCandidateWordList01:function(cList, minLen){
123                 //01:minLenに満たない文字数の候補を削除
124                 var iLen = cList.length;
125                 for(var i = 0; i < iLen; i++){
126                         if(cList[i].str.length < minLen){
127                                 cList.removeByIndex(i);
128                                 i--;
129                                 iLen--;
130                         }
131                 }
132         },
133         filterCandidateWordList02:function(cList, minCount){
134                 //02:minCountに満たない出現回数の候補を削除
135                 var iLen = cList.length;
136                 for(var i = 0; i < iLen; i++){
137                         if(cList[i].wordCount < minCount){
138                                 cList.removeByIndex(i);
139                                 i--;
140                                 iLen--;
141                         }
142                 }
143         },
144         filterCandidateWordList03: function(cList){
145                 //03:すでに単語と判明している候補を削除
146                 var iLen = cList.length;
147                 for(var i = 0; i < iLen; i++){
148                         if(this.env.memory.getUUIDFromWord(cList[i].str) != this.env.UUID_Meaning_UndefinedString){
149                                 cList.removeByIndex(i);
150                                 i--;
151                                 iLen--;
152                         }
153                 }
154         },
155         sortCandidateWordListByWordCount: function(){
156                 this.env.memory.candidateWordList.stableSort(function(a, b){
157                         return a.wordCount - b.wordCount;
158                 });
159         },
160         sortCandidateWordListByWordLevel: function(){
161                 this.env.memory.candidateWordList.stableSort(function(a, b){
162                         return a.wordLevel - b.wordLevel;
163                 });
164         },
165         sortWordListByLength: function(){
166                 //文字数の大きい方がリストの最初に来るようにする。
167                 this.env.memory.wordList.stableSort(function(a, b){
168                         return b.str.length - a.str.length;
169                 });
170         },
171         computeWordLevel: function(strTag){
172                 var s = strTag.str;
173                 var iLen = s.length;
174                 var f = 0;
175                 strTag.wordLevel = 0;
176                 //文字列中の文字種数を数える
177                 for(var i = 0; i < iLen; i++){
178                         if(s.isHiraganaAt(i)){
179                                 f |= 0x01;
180                         } else if(s.isKanjiAt(i)){
181                                 f |= 0x02;
182                         } else if(s.isKatakanaAt(i)){
183                                 f |= 0x04;
184                         } else if(s.isHankakuKanaAt(i)){
185                                 f |= 0x08;
186                         } else{
187                                 f |= 0x10;
188                         }
189                 }
190                 for(var i = 0; i < 5; i++){
191                         if((f & 0x01) != 0){
192                                 strTag.wordLevel++;
193                         }
194                         f >>>= 1;
195                 }
196                 strTag.wordLevel = 1 / strTag.wordLevel;
197                 return;
198         },
199         computeEachWordLevel: function(){
200                 var iLen = this.env.memory.candidateWordList.length;
201                 for(var i = 0; i < iLen; i++){
202                         this.computeWordLevel(this.env.memory.candidateWordList[i]);
203                 }
204         },
205         splitByWord: function(s){
206                 if(!this.wordListCache || this.wordListCacheLastModifiedDate < this.env.memory.wordListLastModifiedDate){
207                         //キャッシュが存在しないか古い場合、元のリストをソートしてからキャッシュを作成
208                         this.sortWordListByLength();
209                         this.wordListCache = this.env.memory.wordList.propertiesNamed("str");
210                         this.wordListCacheLastModifiedDate = new Date();
211                 }
212                 return s.splitByArraySeparatorSeparatedLong(this.wordListCache);
213         },
214         getUUIDListFromSeparatedString: function(separated){
215                 var retv = new Array();
216                 for(var i = 0, iLen = separated.length; i < iLen; i++){
217                         retv.push(this.env.memory.getUUIDFromWord(separated[i]));
218                 }
219                 return retv;
220         },
221 }