public boolean AddTFcollection() throws IOException { HashMap CTF = new HashMap(); termEum = reader.terms(); while (termEum.next()) { term = termEum.term(); termDocs = reader.termDocs(term); if (termDocs == null) { return false; } next(); tf = 0.0d; while (doc < Integer.MAX_VALUE) { // for docs in window int f = freqs[pointer]; tf += f * 1.0d; if (++pointer >= pointerMax) { pointerMax = termDocs.read(docs, freqs); // refill buffers if (pointerMax != 0) { pointer = 0; } else { termDocs.close(); // close stream doc = Integer.MAX_VALUE; // set to sentinel value break; } } doc = docs[pointer]; } // System.out.println(term.text()+" "+ tf); CTF.put(term.text(), tf); } termEum.close(); FileOutputStream result = new FileOutputStream(new File(indexDir + "ExtendedCTF")); ByteArrayOutputStream bo = new ByteArrayOutputStream(); ObjectOutputStream oo = new ObjectOutputStream(bo); oo.writeObject(CTF); result.write(bo.toByteArray()); result.close(); return true; }
public double totalWords() throws CorruptIndexException, IOException { byte[] fieldNorms = reader.norms(field); double collectionLength = 0; for (int i = 0; i < fieldNorms.length; i++) { collectionLength += 1.0d * Normalise.decodeNorm(fieldNorms[i]); // System.out.println(field); } CL = collectionLength; avgDL = collectionLength / fieldNorms.length * 1.0d; return collectionLength; }
public int docFreq(Term term) throws IOException { return reader.docFreq(term); }
public ExtraInformation(String index, String field) throws IOException { this.reader = IndexReader.open(index); this.indexDir = index; this.field = field; }
public Document doc(int i) throws CorruptIndexException, IOException { return reader.document(i); }