public void seek(TermEnum terms) throws IOException { original.seek(terms); docFreq = terms.docFreq(); pointer = -1; if (docFreq > postingMaps.length) { // grow postingsMap PostingMap[] newMap = new PostingMap[docFreq]; System.arraycopy(postingMaps, 0, newMap, 0, postingMaps.length); for (int i = postingMaps.length; i < docFreq; i++) { newMap[i] = new PostingMap(); } postingMaps = newMap; } out.reset(); int i = 0; while (original.next()) { PostingMap map = postingMaps[i++]; map.newDoc = oldToNew[original.doc()]; // remap the newDoc id map.offset = out.getFilePointer(); // save pointer to buffer final int tf = original.freq(); // buffer tf & positions out.writeVInt(tf); int prevPosition = 0; for (int j = tf; j > 0; j--) { // delta encode positions int p = original.nextPosition(); out.writeVInt(p - prevPosition); prevPosition = p; } } out.flush(); docFreq = i; // allow for deletions Arrays.sort(postingMaps, 0, docFreq); // resort by mapped doc ids // HeapSorter.sort(postingMaps,docFreq); // TODO MC - due to the lack of space // NOTE: this might be substantially faster if RAMInputStream were public // and supported a reset() operation. in = tempDir.openInput(TEMP_FILE); }
private static class SortedTermPositions implements TermPositions { private TermPositions original; private int[] oldToNew; private int docFreq; private PostingMap[] postingMaps = new PostingMap[0]; private int pointer; private int freq; private int position; private static final String TEMP_FILE = "temp"; private final RAMDirectory tempDir = new RAMDirectory(); private final RAMOutputStream out = (RAMOutputStream) tempDir.createOutput(TEMP_FILE); private IndexInput in; public SortedTermPositions(TermPositions original, int[] oldToNew) { this.original = original; this.oldToNew = oldToNew; } public void seek(Term term) throws IOException { throw new UnsupportedOperationException(); } public void seek(TermEnum terms) throws IOException { original.seek(terms); docFreq = terms.docFreq(); pointer = -1; if (docFreq > postingMaps.length) { // grow postingsMap PostingMap[] newMap = new PostingMap[docFreq]; System.arraycopy(postingMaps, 0, newMap, 0, postingMaps.length); for (int i = postingMaps.length; i < docFreq; i++) { newMap[i] = new PostingMap(); } postingMaps = newMap; } out.reset(); int i = 0; while (original.next()) { PostingMap map = postingMaps[i++]; map.newDoc = oldToNew[original.doc()]; // remap the newDoc id map.offset = out.getFilePointer(); // save pointer to buffer final int tf = original.freq(); // buffer tf & positions out.writeVInt(tf); int prevPosition = 0; for (int j = tf; j > 0; j--) { // delta encode positions int p = original.nextPosition(); out.writeVInt(p - prevPosition); prevPosition = p; } } out.flush(); docFreq = i; // allow for deletions Arrays.sort(postingMaps, 0, docFreq); // resort by mapped doc ids // HeapSorter.sort(postingMaps,docFreq); // TODO MC - due to the lack of space // NOTE: this might be substantially faster if RAMInputStream were public // and supported a reset() operation. in = tempDir.openInput(TEMP_FILE); } public boolean next() throws IOException { pointer++; if (pointer < docFreq) { in.seek(postingMaps[pointer].offset); freq = in.readVInt(); position = 0; return true; } return false; } public int doc() { return postingMaps[pointer].newDoc; } public int freq() { return freq; } public int nextPosition() throws IOException { int positionIncrement = in.readVInt(); position += positionIncrement; return position; } public int read(int[] docs, int[] freqs) { throw new UnsupportedOperationException(); } public boolean skipTo(int target) { throw new UnsupportedOperationException(); } public void close() throws IOException { original.close(); } }