1 function AI_WordRecognition(env){
3 this.wordListCache = null;
4 this.wordListCacheLastModifiedDate = new Date();
6 AI_WordRecognition.prototype = {
7 slideLookUpCandidateWordByHistory: function(input){
8 var h = this.env.input.historyList;
9 var cList = new Array();
10 for(var i = 0, iLen = input.length; i < iLen; i++){
11 //input character loop
12 var iStr = input.substr(i);
15 for(var j = 0, jLen = h.length; j < jLen; j++){
18 for(var k = 0, kLen = hStrBase.length; k < kLen; k++){
19 //history character loop
20 var hStr = hStrBase.substr(k);
21 var m = hStr.compareLeftHand(iStr);
22 if(m > cLen && m != iStr.length){
28 cList.pushUnique(new AI_CandidateWordTag(iStr.substr(0, cLen).trim())).wordCount++;
32 this.filterCandidateWordList00(cList);
33 this.filterCandidateWordList01(cList, 2);
34 this.filterCandidateWordList03(cList);
36 this.mergeCandidateWordList(cList);
39 appendCandidateWordList: function(strTag){
40 var s = this.env.memory.candidateWordList.isIncluded(strTag, function(a, b){ return (a.str == b.str); });
45 this.env.memory.appendMemoryTag(strTag);
48 getCandidateWordTagByString: function(str){
49 return this.env.memory.candidateWordList.isIncluded(str, function(a, b){ return (a.str == b); });
51 mergeCandidateWordList: function(strTagList){
52 for(var i = 0, iLen = strTagList.length; i < iLen; i++){
53 this.appendCandidateWordList(strTagList[i]);
56 cleanCandidateWordList: function(){
60 var iLen = this.env.memory.candidateWordList.length;
61 for(var i = 0; i < iLen; i++){
62 if(this.env.memory.candidateWordList[i].wordCount < 10){
63 this.env.debug("Too small wordCount of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
64 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
69 if(this.env.memory.candidateWordList[i].wordLevel < 1){
70 this.env.debug("Too small wordLevel of candidateWord [" + this.env.memory.candidateWordList[i].str + "]. Removed.\n");
71 this.env.memory.removeMemoryTagByObject(this.env.memory.candidateWordList[i]);
77 this.env.memory.candidateWordListLastCleanedDate = new Date();
79 debugShowCandidateWordList: function(){
80 var c = this.env.memory.candidateWordList.copy();
82 this.env.debug("candidateWordList:" + c.length + "\n #:wCount:level:str\n");
84 for(var i = 0, iLen = c.length; i < iLen; i++){
85 this.env.debug((i + 1) + ":\t" + c[i].wordCount.toString() + ":\t" + c[i].wordLevel.toString() + ":\t" + c[i].str + "\n");
87 this.env.debug("candidateWordList end\n");
89 filterCandidateWordList00:function(cList){
90 //00:長い単語に含まれており、かつ出現頻度が長い単語と等しい単語を削除
91 //cList内の候補単語に対して、フィルターをかける。
92 var iLen = cList.length;
96 var baseStrTag = cList[0];
97 for(var i = 1; i < iLen; i++){
99 if(baseStrTag.str.indexOf(c.str) != -1){
100 //c.strはbaseStrTag.strに含まれている
101 if(baseStrTag.wordCount == c.wordCount){
103 //後で削除する。出現回数を0にマークする。
108 //単語は削除されなかった、つまり異なる単語なので、baseStrTagを更新
113 for(var i = 1; i < iLen; i++){
115 if(c.wordCount == 0){
116 cList.removeByIndex(i);
122 filterCandidateWordList01:function(cList, minLen){
123 //01:minLenに満たない文字数の候補を削除
124 var iLen = cList.length;
125 for(var i = 0; i < iLen; i++){
126 if(cList[i].str.length < minLen){
127 cList.removeByIndex(i);
133 filterCandidateWordList02:function(cList, minCount){
134 //02:minCountに満たない出現回数の候補を削除
135 var iLen = cList.length;
136 for(var i = 0; i < iLen; i++){
137 if(cList[i].wordCount < minCount){
138 cList.removeByIndex(i);
144 filterCandidateWordList03: function(cList){
145 //03:すでに単語と判明している候補を削除
146 var iLen = cList.length;
147 for(var i = 0; i < iLen; i++){
148 if(this.env.memory.getUUIDFromWord(cList[i].str) != this.env.UUID_Meaning_UndefinedString){
149 cList.removeByIndex(i);
155 sortCandidateWordListByWordCount: function(){
156 this.env.memory.candidateWordList.stableSort(function(a, b){
157 return a.wordCount - b.wordCount;
160 sortCandidateWordListByWordLevel: function(){
161 this.env.memory.candidateWordList.stableSort(function(a, b){
162 return a.wordLevel - b.wordLevel;
165 sortWordListByLength: function(){
166 //文字数の大きい方がリストの最初に来るようにする。
167 this.env.memory.wordList.stableSort(function(a, b){
168 return b.str.length - a.str.length;
171 computeWordLevel: function(strTag){
175 strTag.wordLevel = 0;
177 for(var i = 0; i < iLen; i++){
178 if(s.isHiraganaAt(i)){
180 } else if(s.isKanjiAt(i)){
182 } else if(s.isKatakanaAt(i)){
184 } else if(s.isHankakuKanaAt(i)){
190 for(var i = 0; i < 5; i++){
196 strTag.wordLevel = 1 / strTag.wordLevel;
199 computeEachWordLevel: function(){
200 var iLen = this.env.memory.candidateWordList.length;
201 for(var i = 0; i < iLen; i++){
202 this.computeWordLevel(this.env.memory.candidateWordList[i]);
205 splitByWord: function(s){
206 if(!this.wordListCache || this.wordListCacheLastModifiedDate < this.env.memory.wordListLastModifiedDate){
207 //キャッシュが存在しないか古い場合、元のリストをソートしてからキャッシュを作成
208 this.sortWordListByLength();
209 this.wordListCache = this.env.memory.wordList.propertiesNamed("str");
210 this.wordListCacheLastModifiedDate = new Date();
212 return s.splitByArraySeparatorSeparatedLong(this.wordListCache);
214 getUUIDListFromSeparatedString: function(separated){
215 var retv = new Array();
216 for(var i = 0, iLen = separated.length; i < iLen; i++){
217 retv.push(this.env.memory.getUUIDFromWord(separated[i]));