Beispiel #1
0
 public static CharsetDecoder createUtf8Decoder() {
   if (LENIENT_CODING) {
     return Charsets.UTF_8
         .newDecoder()
         .onMalformedInput(CodingErrorAction.REPLACE)
         .onUnmappableCharacter(CodingErrorAction.REPLACE);
   } else {
     return Charsets.UTF_8
         .newDecoder()
         .onMalformedInput(CodingErrorAction.REPORT)
         .onUnmappableCharacter(CodingErrorAction.REPORT);
   }
 }
  private String getComparator(int i, ByteBuffer bb) {
    String name = comparatorForPosition(i);
    if (name != null) {
      return name;
    }
    if (!dynamic) {
      if (bb.hasRemaining()) {
        return BYTESTYPE.getTypeName();
      } else {
        return null;
      }
    }
    if (bb.hasRemaining()) {
      try {
        int header = getShortLength(bb);
        if ((header & 0x8000) == 0) {

          name = Charsets.UTF_8.newDecoder().decode(getBytes(bb, header).duplicate()).toString();
        } else {
          byte a = (byte) (header & 0xFF);
          name = aliasToComparatorMapping.get(a);
          if (name == null) {
            a = (byte) Character.toLowerCase((char) a);
            name = aliasToComparatorMapping.get(a);
            if (name != null) {
              name += "(reversed=true)";
            }
          }
        }
      } catch (CharacterCodingException e) {
        throw new RuntimeException(e);
      }
    }
    if ((name != null) && (name.length() == 0)) {
      name = null;
    }
    return name;
  }
/** @author jsgroth */
final class SimpleStringTermIteratorImpl implements SimpleStringTermIterator {
  private static final Logger log = Logger.getLogger(SimpleStringTermIteratorImpl.class);

  private static final int BUFFER_SIZE = 8192;

  private final byte[] buffer;
  private int bufferLen;
  private long bufferOffset;
  private int bufferPtr;

  private final String docsFilename;
  private ImmutableBTreeIndex.Reader<String, LongPair> index;
  private final File indexFile;

  private final CharsetDecoder decoder = Charsets.UTF_8.newDecoder();

  private final SharedReference<MMapBuffer> file;
  private final DirectMemory memory;

  private byte[] lastTermBytes = new byte[100];
  private ByteBuffer lastTermByteBuffer = ByteBuffer.wrap(lastTermBytes);
  private int lastTermLength = 0;
  private long lastTermOffset = 0L;
  private int lastTermDocFreq = 0;
  private String lastString = null;

  private boolean done = false;
  private boolean bufferNext = false;
  private boolean closed = false;

  SimpleStringTermIteratorImpl(
      MapCache mapCache, String filename, String docsFilename, String indexFilename)
      throws IOException {
    buffer = new byte[BUFFER_SIZE];

    this.docsFilename = docsFilename;
    final CachedFile cf = CachedFile.create(indexFilename);
    if (cf.exists()) {
      indexFile = cf.loadDirectory();
    } else {
      indexFile = null;
    }

    file = mapCache.copyOrOpen(filename);
    memory = file.get().memory();
    done = false;
    bufferLen = 0;
    bufferOffset = 0L;
    bufferPtr = 0;
  }

  @Override
  public void reset(String term) {
    try {
      internalReset(term);
    } catch (IOException e) {
      close();
      throw new RuntimeException(e);
    }
  }

  private void internalReset(String term) throws IOException {
    if (indexFile != null) {
      if (index == null) {
        index =
            new ImmutableBTreeIndex.Reader<String, LongPair>(
                indexFile, new StringSerializer(), new LongPairSerializer(), false);
      }
      Generation.Entry<String, LongPair> e = index.floor(term);
      if (e == null) {
        e = index.first();
      }
      lastTermBytes = e.getKey().getBytes(Charsets.UTF_8);
      lastTermByteBuffer = ByteBuffer.wrap(lastTermBytes);
      lastTermLength = lastTermBytes.length;
      lastString = null;
      final LongPair p = e.getValue();
      refillBuffer(p.getFirst());
      lastTermOffset = p.getSecond();
      lastTermDocFreq = (int) readVLong();
      done = false;

      while (decoder
                  .decode((ByteBuffer) lastTermByteBuffer.position(0).limit(lastTermLength))
                  .toString()
                  .compareTo(term)
              < 0
          && next()) {}
      bufferNext = true;
    } else {
      lastTermLength = 0;
      lastTermOffset = 0L;
      lastTermDocFreq = 0;
      lastString = null;

      bufferLen = 0;
      bufferOffset = 0L;
      bufferPtr = 0;

      done = false;

      while (next()
          && new String(lastTermBytes, 0, lastTermLength, Charsets.UTF_8).compareTo(term) < 0) {}
      bufferNext = true;
    }
  }

  @Override
  public String term() {
    if (lastString == null) {
      try {
        lastString =
            decoder
                .decode((ByteBuffer) lastTermByteBuffer.position(0).limit(lastTermLength))
                .toString();
      } catch (CharacterCodingException e) {
        throw new RuntimeException(e);
      }
    }
    return lastString;
  }

  @Override
  public byte[] termStringBytes() {
    return lastTermBytes;
  }

  @Override
  public int termStringLength() {
    return lastTermLength;
  }

  @Override
  public boolean next() {
    try {
      return internalNext();
    } catch (IOException e) {
      close();
      throw new RuntimeException(e);
    }
  }

  private boolean internalNext() throws IOException {
    if (done) return false;
    if (bufferNext) {
      bufferNext = false;
      return true;
    }

    final int firstByte = read();
    if (firstByte == -1) {
      done = true;
      return false;
    }

    final int removeLen = (int) readVLong(firstByte);
    final int newLen = (int) readVLong();

    ensureCapacity(lastTermLength - removeLen + newLen);
    readFully(lastTermBytes, lastTermLength - removeLen, newLen);
    lastTermLength = lastTermLength - removeLen + newLen;
    lastString = null;

    final long offsetDelta = readVLong();
    lastTermOffset += offsetDelta;

    lastTermDocFreq = (int) readVLong();

    return true;
  }

  private void ensureCapacity(int len) {
    // TODO is > sufficient here? I think yes, verify later
    if (len >= lastTermBytes.length) {
      lastTermBytes = Arrays.copyOf(lastTermBytes, Math.max(len, 2 * lastTermBytes.length));
      lastTermByteBuffer = ByteBuffer.wrap(lastTermBytes);
    }
  }

  @Override
  public int docFreq() {
    return lastTermDocFreq;
  }

  @Override
  public void close() {
    if (!closed) {
      try {
        if (index != null) {
          index.close();
        }
      } catch (IOException e) {
        log.error("error closing index", e);
      }
      try {
        file.close();
      } catch (IOException e) {
        log.error("error closing file", e);
      }
      closed = true;
    }
  }

  @Override
  public String getFilename() {
    return docsFilename;
  }

  @Override
  public long getOffset() {
    return lastTermOffset;
  }

  private int read() throws IOException {
    if (bufferPtr == bufferLen) {
      refillBuffer(bufferOffset + bufferLen);
      if (bufferLen == 0) return -1;
    }
    return buffer[bufferPtr++] & 0xFF;
  }

  private void readFully(final byte[] b, int off, int len) throws IOException {
    while (true) {
      final int available = bufferLen - bufferPtr;
      if (available >= len) {
        System.arraycopy(buffer, bufferPtr, b, off, len);
        bufferPtr += len;
        return;
      } else {
        System.arraycopy(buffer, bufferPtr, b, off, available);
        off += available;
        len -= available;
        refillBuffer(bufferOffset + bufferLen);
      }
    }
  }

  private void refillBuffer(final long offset) throws IOException {
    bufferLen = (int) Math.min(buffer.length, memory.length() - offset);
    if (bufferLen > 0) {
      memory.getBytes(offset, buffer, 0, bufferLen);
    }
    bufferOffset = offset;
    bufferPtr = 0;
  }

  private long readVLong(int b) throws IOException {
    long ret = 0L;
    int shift = 0;
    do {
      ret |= ((b & 0x7F) << shift);
      if (b < 0x80) return ret;
      shift += 7;
      b = read();
    } while (true);
  }

  private long readVLong() throws IOException {
    long ret = 0L;
    int shift = 0;
    do {
      int b = read();
      ret |= ((b & 0x7F) << shift);
      if (b < 0x80) return ret;
      shift += 7;
    } while (true);
  }
}