Ejemplo n.º 1
0
 @Override
 public void run() {
   SentenceSplitor sentenceSplitor = new SentenceSplitor();
   CorpusWords cache_chars_unnormal = new CorpusWords("char_unnormal");
   try {
     PageAnalyzer pageAnalyzer = new PageAnalyzer();
     while (is_run) {
       String target_url = urlQueue.get_one_url();
       if (target_url.length() > 0) {
         //                    System.out.printf("%s
         //         \n", target_url);
         pageAnalyzer.set_taget_url(target_url);
         //                    // Content Data
         //                    ArrayList<String> content_datas = pageAnalyzer.getContentDatas();
         //                    for (int i = 0; i < content_datas.size(); i ++) {
         //                        ArrayList<String> string_sentences =
         // sentenceSplitor.split_sentence(content_datas.get(i));
         //                        for (int k = 0; k < string_sentences.size(); k++) {
         //                            String string_sentence = string_sentences.get(k);
         //                            if (string_sentence.length() > 35 ||
         // string_sentence.length() == 0)
         //                                continue;
         //                            if (!isMessyCode(cache_chars_unnormal, string_sentence)) {
         //                                db.add_web_content(string_sentence);
         //                            }
         //                        }
         //                    }
         // Links
         urlQueue.add_urls(pageAnalyzer.getLinks());
       } else {
         Thread.sleep(1000);
       }
     }
   } catch (InterruptedException e) {
     e.printStackTrace();
   }
 }