@Override public void constructIndex() throws IOException { String corpusFile = _options._corpusPrefix + "/"; System.out.println("Construct index from: " + corpusFile); chooseFiles cf = new chooseFiles(_options); int times = cf.writeTimes(); System.out.println(times); FileOps filewriter = new FileOps(_options._indexPrefix + "/"); for (int i = 0; i < times; i++) { Vector<String> files = cf.loadFile(i); for (String name : files) { String filepath = corpusFile + name; File file = new File(filepath); time = new Date().getTime(); String content = ProcessHtml.process(file); totalTime += new Date().getTime() - time; if (content != null) processDocument(content, name); } // System.out.println("Times here : " + i); // System.out.println("processes:" + totalTime); String name = "temp" + i + ".txt"; Map<String, String> content = new HashMap<String, String>(); for (String term : _index.keySet()) { StringBuilder builder = new StringBuilder(); for (Integer x : _index.get(term)) { builder.append(x + "|"); } builder.deleteCharAt(builder.length() - 1); content.put(term, builder.toString()); } filewriter.write(name, content); _index.clear(); _terms.clear(); } String corpus_statistics = _options._indexPrefix + "/" + "statistics"; BufferedWriter outsta = new BufferedWriter(new FileWriter(corpus_statistics)); // the first line in the corpus_statistics is the number of docs in the corpus outsta.write(_numDocs + "\n"); outsta.write(String.valueOf(_totalTermFrequency) + "\n"); outsta.close(); String[] files = new String[times]; for (int count = 0; count < times; count++) { files[count] = "temp" + count + ".txt"; } filewriter.merge(files, "index.txt", "|"); }