Ejemplo n.º 1
0
 @Override
 public boolean readAndFollowForwardLink() {
   final int nextAddress = mDictBuffer.readUnsignedInt24();
   if (nextAddress >= 0 && nextAddress < mDictBuffer.limit()) {
     mDictBuffer.position(nextAddress);
     return true;
   }
   return false;
 }
 // Unit test for CharEncoding.readString and CharEncoding.writeString.
 public void testCharEncoding() {
   // the max length of a word in sWords is less than 50.
   // See generateWords.
   final byte[] buffer = new byte[50 * 3];
   final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
   for (final String word : sWords) {
     Arrays.fill(buffer, (byte) 0);
     CharEncoding.writeString(buffer, 0, word);
     dictBuffer.position(0);
     final String str = CharEncoding.readString(dictBuffer);
     assertEquals(word, str);
   }
 }
Ejemplo n.º 3
0
  private ArrayList<WeightedString> readShortcuts(final int terminalId) {
    if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null;

    final ArrayList<WeightedString> ret = CollectionUtils.newArrayList();
    final int posOfShortcuts =
        mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId);
    mShortcutBuffer.position(posOfShortcuts);
    while (true) {
      final int flags = mShortcutBuffer.readUnsignedByte();
      final String word = CharEncoding.readString(mShortcutBuffer);
      ret.add(new WeightedString(word, flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY));
      if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
    }
    return ret;
  }
Ejemplo n.º 4
0
 @Override
 public boolean hasNextPtNodeArray() {
   return mDictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS;
 }
Ejemplo n.º 5
0
 @Override
 public int getPosition() {
   return mDictBuffer.position();
 }
Ejemplo n.º 6
0
 @Override
 public void setPosition(int newPos) {
   mDictBuffer.position(newPos);
 }
Ejemplo n.º 7
0
  @Override
  public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) {
    int addressPointer = ptNodePos;
    final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer);
    addressPointer += FormatSpec.PTNODE_FLAGS_SIZE;

    final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options);
    if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
      addressPointer += FormatSpec.PARENT_ADDRESS_SIZE;
    }

    final int characters[];
    if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
      int index = 0;
      int character = CharEncoding.readChar(mDictBuffer);
      addressPointer += CharEncoding.getCharSize(character);
      while (FormatSpec.INVALID_CHARACTER != character && index < FormatSpec.MAX_WORD_LENGTH) {
        mCharacterBuffer[index++] = character;
        character = CharEncoding.readChar(mDictBuffer);
        addressPointer += CharEncoding.getCharSize(character);
      }
      characters = Arrays.copyOfRange(mCharacterBuffer, 0, index);
    } else {
      final int character = CharEncoding.readChar(mDictBuffer);
      addressPointer += CharEncoding.getCharSize(character);
      characters = new int[] {character};
    }
    final int terminalId;
    if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
      terminalId = PtNodeReader.readTerminalId(mDictBuffer);
      addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE;
    } else {
      terminalId = PtNode.NOT_A_TERMINAL;
    }

    final int frequency;
    if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
      frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId);
    } else {
      frequency = PtNode.NOT_A_TERMINAL;
    }
    int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options);
    if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
      childrenAddress += addressPointer;
    }
    addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
    final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId);

    final ArrayList<PendingAttribute> bigrams;
    if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
      bigrams = new ArrayList<PendingAttribute>();
      final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId);
      mBigramBuffer.position(posOfBigrams);
      while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
        // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
        // remaining bigram entries are ignored.
        final int bigramFlags = mBigramBuffer.readUnsignedByte();
        final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
        mTerminalAddressTableBuffer.position(
            targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
        final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
        bigrams.add(
            new PendingAttribute(
                bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, targetAddress));
        if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
      }
      if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
        throw new RuntimeException(
            "Too many bigrams in a PtNode ("
                + bigrams.size()
                + " but max is "
                + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE
                + ")");
      }
    } else {
      bigrams = null;
    }
    return new PtNodeInfo(
        ptNodePos,
        addressPointer,
        flags,
        characters,
        frequency,
        parentAddress,
        childrenAddress,
        shortcutTargets,
        bigrams);
  }
Ejemplo n.º 8
0
 protected static int readTerminalId(final DictBuffer dictBuffer) {
   return dictBuffer.readInt();
 }
Ejemplo n.º 9
0
 protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
   frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
   return frequencyBuffer.readUnsignedByte();
 }