// The #bytes required to serialize the count map. // Here let us assume to use 4 bytes for each of the int items. Normally it will consume lesser // bytes as we will use vints. // TODO Do we need to consider 5 as the number of bytes for each of the int field? Else there is // a chance of ArrayIndexOutOfBoundsException when all the int fields are having very large // values. Will that ever occur? private int countMapSerializationSize() { int size = Bytes.SIZEOF_INT; // Write the number of entries in the Map for (ImmutableBytesPtr key : this.valueVsCount.keySet()) { // Add up the key and key's lengths (Int) and the value size += key.getLength() + Bytes.SIZEOF_INT + Bytes.SIZEOF_INT; } return size; }
// The heap size which will be taken by the count map. private int countMapHeapSize() { int size = 0; if (this.valueVsCount.size() > 0) { for (ImmutableBytesPtr key : this.valueVsCount.keySet()) { size += SizedUtil.MAP_ENTRY_SIZE + // entry Bytes.SIZEOF_INT + // key size key.getLength() + SizedUtil.ARRAY_SIZE; // value size } } else { // Initially when the getSize() is called, we dont have any entries in the map so as to // tell the exact heap need. Let us approximate the #entries SizedUtil.sizeOfMap( DEFAULT_ESTIMATED_DISTINCT_VALUES, SizedUtil.IMMUTABLE_BYTES_PTR_SIZE, Bytes.SIZEOF_INT); } return size; }
@Override public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) { // This serializes the Map. The format is as follows // Map size(VInt ie. 1 to 5 bytes) + // ( key length [VInt ie. 1 to 5 bytes] + key bytes + value [VInt ie. 1 to 5 bytes] )* buffer = new byte[countMapSerializationSize()]; int offset = 0; offset += ByteUtil.vintToBytes(buffer, offset, this.valueVsCount.size()); for (Entry<ImmutableBytesPtr, Integer> entry : this.valueVsCount.entrySet()) { ImmutableBytesPtr key = entry.getKey(); offset += ByteUtil.vintToBytes(buffer, offset, key.getLength()); System.arraycopy(key.get(), key.getOffset(), buffer, offset, key.getLength()); offset += key.getLength(); offset += ByteUtil.vintToBytes(buffer, offset, entry.getValue().intValue()); } ptr.set(buffer, 0, offset); return true; }