public static void writeStringBTree(String directory, String stringField, File btreeDir) throws IOException { final String termsFilename = Files.buildPath(directory, SimpleStringFieldWriter.getTermsFilename(stringField)); if (!new File(termsFilename).exists() || new File(termsFilename).length() == 0L) return; final CountingInputStream termsList = new CountingInputStream(new BufferedInputStream(new FileInputStream(termsFilename), 65536)); try { ImmutableBTreeIndex.Writer.write( btreeDir, new AbstractIterator<Generation.Entry<String, LongPair>>() { private String key; private LongPair value; private byte[] lastTerm = new byte[10]; private int lastTermLen = 0; private long lastTermDocOffset = 0L; private long lastTermFileOffset = 0L; @Override public Generation.Entry<String, LongPair> computeNext() { try { if (!nextTerm()) return endOfData(); key = new String(lastTerm, 0, lastTermLen, Charsets.UTF_8); value = new LongPair(lastTermFileOffset, lastTermDocOffset); for (int i = 0; i < BLOCK_SIZE - 1; ++i) { if (!nextTerm()) { break; } } return Generation.Entry.create(key, value); } catch (IOException e) { throw new RuntimeException(e); } } private boolean nextTerm() throws IOException { final int removeLen; // sorry try { removeLen = (int) FlamdexUtils.readVLong(termsList); } catch (EOFException e) { return false; } final int newLen = (int) FlamdexUtils.readVLong(termsList); lastTerm = ensureCapacity(lastTerm, lastTermLen - removeLen + newLen); ByteStreams.readFully(termsList, lastTerm, lastTermLen - removeLen, newLen); lastTermLen = lastTermLen - removeLen + newLen; final long offsetDelta = FlamdexUtils.readVLong(termsList); lastTermDocOffset += offsetDelta; lastTermFileOffset = termsList.getCount(); FlamdexUtils.readVLong(termsList); // termDocFreq return true; } }, new StringSerializer(), new LongPairSerializer(), 65536, false); } finally { termsList.close(); } }
public static void writeIntBTree(String directory, String intField, File btreeDir) throws IOException { final String termsFilename = Files.buildPath(directory, SimpleIntFieldWriter.getTermsFilename(intField)); if (!new File(termsFilename).exists() || new File(termsFilename).length() == 0L) return; final CountingInputStream termsList = new CountingInputStream(new BufferedInputStream(new FileInputStream(termsFilename), 65536)); try { ImmutableBTreeIndex.Writer.write( btreeDir, new AbstractIterator<Generation.Entry<Long, LongPair>>() { private long lastTerm = 0; private long lastTermDocOffset = 0L; private long lastTermFileOffset = 0L; private long key; private LongPair value; @Override protected Generation.Entry<Long, LongPair> computeNext() { try { if (!nextTerm()) return endOfData(); key = lastTerm; value = new LongPair(lastTermFileOffset, lastTermDocOffset); for (int i = 0; i < BLOCK_SIZE - 1; ++i) { if (!nextTerm()) { break; } } return Generation.Entry.create(key, value); } catch (IOException e) { throw new RuntimeException(e); } } private boolean nextTerm() throws IOException { final long termDelta; // sorry try { termDelta = FlamdexUtils.readVLong(termsList); } catch (EOFException e) { return false; } lastTerm += termDelta; final long offsetDelta = FlamdexUtils.readVLong(termsList); lastTermDocOffset += offsetDelta; lastTermFileOffset = termsList.getCount(); FlamdexUtils.readVLong(termsList); // termDocFreq return true; } }, new LongSerializer(), new LongPairSerializer(), 65536, false); } finally { termsList.close(); } }