1 function AI_WordRecognition(env){
3 this.wordListCache = null;
4 this.wordListCacheLastModifiedDate = new Date();
6 AI_WordRecognition.prototype = {
7 slideLookUpCandidateWordByHistory: function(input){
8 var h = this.env.input.historyList;
9 var cList = new Array();
10 for(var i = 0, iLen = input.length; i < iLen; i++){
11 //input character loop
12 var iStr = input.substr(i);
15 for(var j = 0, jLen = h.length; j < jLen; j++){
18 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
19 //history character loop
20 var hStr = hStrBase.substr(k);
21 var m = hStr.compareLeftHand(iStr);
22 if(m > cLen && m != iStr.length){
28 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen).trim())).wordCount++;
32 this.filterCandidateWordList00(cList);
33 this.filterCandidateWordList01(cList, 2);
34 this.filterCandidateWordList03(cList);
36 this.mergeCandidateWordList(cList);
39 appendCandidateWordList: function(strTag){
40 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
45 this.env.memory.appendMemoryTag(strTag);
48 getCandidateWordTagByString: function(str){
49 return this.env.memory.candidateWordList.isIncluded(str, function(a, b){ return (a.str == b); });
51 mergeCandidateWordList: function(strTagList){
52 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
53 this.appendCandidateWordList(strTagList[i]);
56 cleanCandidateWordList: function(){
58 var iLen = this.env.memory.candidateWordList.length;
59 for(var i = 0; i < iLen; i++){
62 if(this.env.memory.candidateWordList[i].wordCount < 10){
63 this.env.debug("Too small wordCount of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
64 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
71 if(this.env.memory.candidateWordList[i].wordLevel < 1){
72 this.env.debug("Too small wordLevel of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
73 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
79 this.env.memory.candidateWordListLastCleanedDate = new Date();
81 debugShowCandidateWordList: function(){
82 var c = this.env.memory.candidateWordList.copy();
84 this.env.debug("candidateWordList:" + c.length + "\n #:wCount:level:str\n");
86 for(var i = 0, iLen = c.length; i < iLen; i++){
87 this.env.debug((i + 1) + ":\t" + c[i].wordCount.toString() + ":\t" + c[i].wordLevel.toString() + ":\t" + c[i].str + "\n");
89 this.env.debug("candidateWordList end\n");
91 filterCandidateWordList00:function(cList){
92 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
93 //cList内の候補単語に対して、フィルターをかける。
94 var iLen = cList.length;
98 var baseStrTag = cList[0];
99 for(var i = 1; i < iLen; i++){
101 if(baseStrTag.str.indexOf(c.str) != -1){
102 //c.strはbaseStrTag.strに含まれている
103 if(baseStrTag.wordCount == c.wordCount){
105 //後で削除する。出現回数を0にマークする。
110 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
115 for(var i = 1; i < iLen; i++){
117 if(c.wordCount == 0){
118 cList.removeByIndex(i);
124 filterCandidateWordList01:function(cList, minLen){
125 //01:minLenに満たない文字数の候補を削除
126 var iLen = cList.length;
127 for(var i = 0; i < iLen; i++){
128 if(cList[i].str.length < minLen){
129 cList.removeByIndex(i);
135 filterCandidateWordList02:function(cList, minCount){
136 //02:minCountに満たない出現回数の候補を削除
137 var iLen = cList.length;
138 for(var i = 0; i < iLen; i++){
139 if(cList[i].wordCount < minCount){
140 cList.removeByIndex(i);
146 filterCandidateWordList03: function(cList){
147 //03:すでに単語と判明している候補を削除
148 var iLen = cList.length;
149 for(var i = 0; i < iLen; i++){
150 if(this.env.memory.getUUIDFromWord(cList[i].str) != this.env.UUID_Meaning_UndefinedString){
151 cList.removeByIndex(i);
157 sortCandidateWordListByWordCount: function(){
158 this.env.memory.candidateWordList.stableSort(function(a, b){
159 return a.wordCount - b.wordCount;
162 sortCandidateWordListByWordLevel: function(){
163 this.env.memory.candidateWordList.stableSort(function(a, b){
164 return a.wordLevel - b.wordLevel;
167 sortWordListByLength: function(){
168 //文字数の大きい方がリストの最初に来るようにする。
169 this.env.memory.wordList.stableSort(function(a, b){
170 return b.str.length - a.str.length;
173 computeWordLevel: function(strTag){
177 strTag.wordLevel = 0;
179 for(var i = 0; i < iLen; i++){
180 if(s.isHiraganaAt(i)){
182 } else if(s.isKanjiAt(i)){
184 } else if(s.isKatakanaAt(i)){
186 } else if(s.isHankakuKanaAt(i)){
192 for(var i = 0; i < 5; i++){
198 strTag.wordLevel = 1 / strTag.wordLevel;
201 computeEachWordLevel: function(){
202 var iLen = this.env.memory.candidateWordList.length;
203 for(var i = 0; i < iLen; i++){
204 this.computeWordLevel(this.env.memory.candidateWordList[i]);
207 splitByWord: function(s){
208 if(!this.wordListCache || this.wordListCacheLastModifiedDate < this.env.memory.wordListLastModifiedDate){
209 //キャッシュが存在しないか古い場合、元のリストをソートしてからキャッシュを作成
210 this.sortWordListByLength();
211 this.wordListCache = this.env.memory.wordList.propertiesNamed("str");
212 this.wordListCacheLastModifiedDate = new Date();
214 return s.splitByArraySeparatorSeparatedLong(this.wordListCache);
216 getUUIDListFromSeparatedString: function(separated){
217 var retv = new Array();
218 for(var i = 0, iLen = separated.length; i < iLen; i++){
219 retv.push(this.env.memory.getUUIDFromWord(separated[i]));