@Override public void run() { SentenceSplitor sentenceSplitor = new SentenceSplitor(); CorpusWords cache_chars_unnormal = new CorpusWords("char_unnormal"); try { PageAnalyzer pageAnalyzer = new PageAnalyzer(); while (is_run) { String target_url = urlQueue.get_one_url(); if (target_url.length() > 0) { // System.out.printf("%s // \n", target_url); pageAnalyzer.set_taget_url(target_url); // // Content Data // ArrayList<String> content_datas = pageAnalyzer.getContentDatas(); // for (int i = 0; i < content_datas.size(); i ++) { // ArrayList<String> string_sentences = // sentenceSplitor.split_sentence(content_datas.get(i)); // for (int k = 0; k < string_sentences.size(); k++) { // String string_sentence = string_sentences.get(k); // if (string_sentence.length() > 35 || // string_sentence.length() == 0) // continue; // if (!isMessyCode(cache_chars_unnormal, string_sentence)) { // db.add_web_content(string_sentence); // } // } // } // Links urlQueue.add_urls(pageAnalyzer.getLinks()); } else { Thread.sleep(1000); } } } catch (InterruptedException e) { e.printStackTrace(); } }