Exemplo n.º 1
0
  public static void writeStringBTree(String directory, String stringField, File btreeDir)
      throws IOException {
    final String termsFilename =
        Files.buildPath(directory, SimpleStringFieldWriter.getTermsFilename(stringField));
    if (!new File(termsFilename).exists() || new File(termsFilename).length() == 0L) return;
    final CountingInputStream termsList =
        new CountingInputStream(new BufferedInputStream(new FileInputStream(termsFilename), 65536));
    try {
      ImmutableBTreeIndex.Writer.write(
          btreeDir,
          new AbstractIterator<Generation.Entry<String, LongPair>>() {
            private String key;
            private LongPair value;

            private byte[] lastTerm = new byte[10];
            private int lastTermLen = 0;
            private long lastTermDocOffset = 0L;
            private long lastTermFileOffset = 0L;

            @Override
            public Generation.Entry<String, LongPair> computeNext() {
              try {
                if (!nextTerm()) return endOfData();

                key = new String(lastTerm, 0, lastTermLen, Charsets.UTF_8);
                value = new LongPair(lastTermFileOffset, lastTermDocOffset);

                for (int i = 0; i < BLOCK_SIZE - 1; ++i) {
                  if (!nextTerm()) {
                    break;
                  }
                }

                return Generation.Entry.create(key, value);
              } catch (IOException e) {
                throw new RuntimeException(e);
              }
            }

            private boolean nextTerm() throws IOException {
              final int removeLen;
              // sorry
              try {
                removeLen = (int) FlamdexUtils.readVLong(termsList);
              } catch (EOFException e) {
                return false;
              }

              final int newLen = (int) FlamdexUtils.readVLong(termsList);

              lastTerm = ensureCapacity(lastTerm, lastTermLen - removeLen + newLen);
              ByteStreams.readFully(termsList, lastTerm, lastTermLen - removeLen, newLen);
              lastTermLen = lastTermLen - removeLen + newLen;

              final long offsetDelta = FlamdexUtils.readVLong(termsList);
              lastTermDocOffset += offsetDelta;

              lastTermFileOffset = termsList.getCount();

              FlamdexUtils.readVLong(termsList); // termDocFreq

              return true;
            }
          },
          new StringSerializer(),
          new LongPairSerializer(),
          65536,
          false);
    } finally {
      termsList.close();
    }
  }
Exemplo n.º 2
0
  public static void writeIntBTree(String directory, String intField, File btreeDir)
      throws IOException {
    final String termsFilename =
        Files.buildPath(directory, SimpleIntFieldWriter.getTermsFilename(intField));
    if (!new File(termsFilename).exists() || new File(termsFilename).length() == 0L) return;
    final CountingInputStream termsList =
        new CountingInputStream(new BufferedInputStream(new FileInputStream(termsFilename), 65536));
    try {
      ImmutableBTreeIndex.Writer.write(
          btreeDir,
          new AbstractIterator<Generation.Entry<Long, LongPair>>() {
            private long lastTerm = 0;
            private long lastTermDocOffset = 0L;
            private long lastTermFileOffset = 0L;

            private long key;
            private LongPair value;

            @Override
            protected Generation.Entry<Long, LongPair> computeNext() {
              try {
                if (!nextTerm()) return endOfData();

                key = lastTerm;
                value = new LongPair(lastTermFileOffset, lastTermDocOffset);

                for (int i = 0; i < BLOCK_SIZE - 1; ++i) {
                  if (!nextTerm()) {
                    break;
                  }
                }

                return Generation.Entry.create(key, value);
              } catch (IOException e) {
                throw new RuntimeException(e);
              }
            }

            private boolean nextTerm() throws IOException {
              final long termDelta;
              // sorry
              try {
                termDelta = FlamdexUtils.readVLong(termsList);
              } catch (EOFException e) {
                return false;
              }

              lastTerm += termDelta;

              final long offsetDelta = FlamdexUtils.readVLong(termsList);
              lastTermDocOffset += offsetDelta;

              lastTermFileOffset = termsList.getCount();

              FlamdexUtils.readVLong(termsList); // termDocFreq

              return true;
            }
          },
          new LongSerializer(),
          new LongPairSerializer(),
          65536,
          false);
    } finally {
      termsList.close();
    }
  }