// Unit test for CharEncoding.readString and CharEncoding.writeString. public void testCharEncoding() { // the max length of a word in sWords is less than 50. // See generateWords. final byte[] buffer = new byte[50 * 3]; final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); for (final String word : sWords) { Arrays.fill(buffer, (byte) 0); CharEncoding.writeString(buffer, 0, word); dictBuffer.position(0); final String str = CharEncoding.readString(dictBuffer); assertEquals(word, str); } }
private ArrayList<WeightedString> readShortcuts(final int terminalId) { if (mShortcutAddressTable.get(0, terminalId) == SparseTable.NOT_EXIST) return null; final ArrayList<WeightedString> ret = CollectionUtils.newArrayList(); final int posOfShortcuts = mShortcutAddressTable.get(FormatSpec.SHORTCUT_CONTENT_INDEX, terminalId); mShortcutBuffer.position(posOfShortcuts); while (true) { final int flags = mShortcutBuffer.readUnsignedByte(); final String word = CharEncoding.readString(mShortcutBuffer); ret.add(new WeightedString(word, flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY)); if (0 == (flags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } return ret; }
@Override public PtNodeInfo readPtNode(int ptNodePos, FormatOptions options) { int addressPointer = ptNodePos; final int flags = PtNodeReader.readPtNodeOptionFlags(mDictBuffer); addressPointer += FormatSpec.PTNODE_FLAGS_SIZE; final int parentAddress = PtNodeReader.readParentAddress(mDictBuffer, options); if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { addressPointer += FormatSpec.PARENT_ADDRESS_SIZE; } final int characters[]; if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { int index = 0; int character = CharEncoding.readChar(mDictBuffer); addressPointer += CharEncoding.getCharSize(character); while (FormatSpec.INVALID_CHARACTER != character && index < FormatSpec.MAX_WORD_LENGTH) { mCharacterBuffer[index++] = character; character = CharEncoding.readChar(mDictBuffer); addressPointer += CharEncoding.getCharSize(character); } characters = Arrays.copyOfRange(mCharacterBuffer, 0, index); } else { final int character = CharEncoding.readChar(mDictBuffer); addressPointer += CharEncoding.getCharSize(character); characters = new int[] {character}; } final int terminalId; if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { terminalId = PtNodeReader.readTerminalId(mDictBuffer); addressPointer += FormatSpec.PTNODE_TERMINAL_ID_SIZE; } else { terminalId = PtNode.NOT_A_TERMINAL; } final int frequency; if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { frequency = PtNodeReader.readFrequency(mFrequencyBuffer, terminalId); } else { frequency = PtNode.NOT_A_TERMINAL; } int childrenAddress = PtNodeReader.readChildrenAddress(mDictBuffer, flags, options); if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { childrenAddress += addressPointer; } addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); final ArrayList<WeightedString> shortcutTargets = readShortcuts(terminalId); final ArrayList<PendingAttribute> bigrams; if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { bigrams = new ArrayList<PendingAttribute>(); final int posOfBigrams = mBigramAddressTable.get(0 /* contentTableIndex */, terminalId); mBigramBuffer.position(posOfBigrams); while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE, // remaining bigram entries are ignored. final int bigramFlags = mBigramBuffer.readUnsignedByte(); final int targetTerminalId = mBigramBuffer.readUnsignedInt24(); mTerminalAddressTableBuffer.position( targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE); final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24(); bigrams.add( new PendingAttribute( bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY, targetAddress)); if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break; } if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) { throw new RuntimeException( "Too many bigrams in a PtNode (" + bigrams.size() + " but max is " + FormatSpec.MAX_BIGRAMS_IN_A_PTNODE + ")"); } } else { bigrams = null; } return new PtNodeInfo( ptNodePos, addressPointer, flags, characters, frequency, parentAddress, childrenAddress, shortcutTargets, bigrams); }