/** This calculates the exact size of the sub columns on the fly */ int getSizeOfAllColumns() { int size = 0; Collection<IColumn> subColumns = getSubColumns(); for (IColumn subColumn : subColumns) { size += subColumn.serializedSize(); } return size; }
/** * Given the collection of columns in the Column Family, the name index is generated and written * into the provided stream * * @param columns for whom the name index needs to be generated * @param dos stream into which the serialized name index needs to be written. * @throws IOException */ private static void doIndexing( AbstractType comparator, Collection<IColumn> columns, DataOutput dos) throws IOException { if (columns.isEmpty()) { dos.writeInt(0); return; } /* * Maintains a list of ColumnIndexInfo objects for the columns in this * column family. The key is the column name and the position is the * relative offset of that column name from the start of the list. * We do this so that we don't read all the columns into memory. */ List<IndexHelper.IndexInfo> indexList = new ArrayList<IndexHelper.IndexInfo>(); int endPosition = 0, startPosition = -1; int indexSizeInBytes = 0; IColumn column = null, firstColumn = null; /* column offsets at the right thresholds into the index map. */ for (Iterator<IColumn> it = columns.iterator(); it.hasNext(); ) { column = it.next(); if (firstColumn == null) { firstColumn = column; startPosition = endPosition; } endPosition += column.serializedSize(); /* if we hit the column index size that we have to index after, go ahead and index it. */ if (endPosition - startPosition >= DatabaseDescriptor.getColumnIndexSize()) { IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo( firstColumn.name(), column.name(), startPosition, endPosition - startPosition); indexList.add(cIndexInfo); indexSizeInBytes += cIndexInfo.serializedSize(); firstColumn = null; } } // the last column may have fallen on an index boundary already. if not, index it explicitly. if (indexList.isEmpty() || comparator.compare(indexList.get(indexList.size() - 1).lastName, column.name()) != 0) { IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo( firstColumn.name(), column.name(), startPosition, endPosition - startPosition); indexList.add(cIndexInfo); indexSizeInBytes += cIndexInfo.serializedSize(); } assert indexSizeInBytes > 0; dos.writeInt(indexSizeInBytes); for (IndexHelper.IndexInfo cIndexInfo : indexList) { cIndexInfo.serialize(dos); } }
/** * !BIGDATA: ************************************************************************ New * serialized ColumnFamily row with column index. The new method to write column family row * (column index and columns) to SSTable. * ************************************************************************ * * <p>the new format of ColumnFamily row: |------------------------| | bloom filter | (int for * len, and content) |------------------------| | deletion meta | localDeletionTime(int) and * markedForDeleteAt(long) |------------------------| | column count | (int) * |------------------------|<-------|<---- COLUMN BLOCKS START POSITION(CBSP) | column block 0 | * | BLOCK-INDEX OFFSET FROM CBSP | (compressed) | | |---------- ----------|<---| | | column block * 1 | | | | (compressed) | | | |------------------------|<---|---|------| | column index size | | * | | (byte size of column index) |------------------------| | | | | index of block 0 |----|---- * | |---------- ----------| | | | index of block 1 |----- | |------------------------| | | index * size |---------------- to seek to position of index |------------------------| * * @param columnFamily * @param dos * @throws IOException */ public void bigdataSerializeWithIndexesAtEnd( ColumnFamily columnFamily, DataOutputBuffer dos, Compression.Algorithm compressAlgo) { // get the sorted columns from column family row Collection<IColumn> columns = columnFamily.getSortedColumns(); // create and serialize bloom filter BigdataColumnIndexer.createAndSerializeBloomFiliter(columns, dos); /* * Maintains a list of Column IndexInfo objects for the columns in this * column family row. The key is the column name and the position is the * relative offset of that column name from the start of the list. * We do this so that we don't read all the columns into memory. */ List<IndexHelper.IndexInfo> indexList = new ArrayList<IndexHelper.IndexInfo>(); // different column family use different compression algorithm. CompressionContext context = CompressionContext.getInstance(compressAlgo); try { // deletion meta information dos.writeInt(columnFamily.localDeletionTime.get()); dos.writeLong(columnFamily.markedForDeleteAt.get()); // column count dos.writeInt(columns.size()); // the current column IColumn column = null; // the size of serialized column index, computed up front int indexSizeInBytes = 0; // the position of first block, at where the column blocks start. int firstBlockPos = dos.getLength(); // the first column of current block IColumn blockFirstColumn = null; // the start position of current block // the column index will store the offset from firstBlockPos int blockStartPos = firstBlockPos; // uncompressed current block size int blockSize = 0; // compressed output stream of current block DataOutputStream blockOut = null; for (Iterator<IColumn> it = columns.iterator(); it.hasNext(); ) { column = it.next(); if ((blockFirstColumn == null) && (blockOut == null)) { // start a new block blockFirstColumn = column; blockStartPos = dos.getLength(); blockSize = 0; // get a new block output stream blockOut = getBlockOutputStream(dos, context); } // serialize column columnFamily.getColumnSerializer().serialize(column, blockOut); // uncompressed block size blockSize += column.serializedSize(); // if we hit the block size that we have to index after, go ahead and index it. if (blockSize >= DatabaseDescriptor.getColumnIndexSize()) { int blockWidth = releaseBlockOutputStream(blockOut, context); assert blockWidth == blockSize; blockOut = null; int blockEndPos = dos.getLength(); IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo( blockFirstColumn.name(), column.name(), blockStartPos - firstBlockPos, blockWidth, blockEndPos - blockStartPos); indexList.add(cIndexInfo); indexSizeInBytes += cIndexInfo.serializedSize(); // to next block blockFirstColumn = null; } } if (blockOut != null) { int blockWidth = releaseBlockOutputStream(blockOut, context); assert blockWidth == blockSize; blockOut = null; int blockEndPos = dos.getLength(); IndexHelper.IndexInfo cIndexInfo = new IndexHelper.IndexInfo( blockFirstColumn.name(), column.name(), blockStartPos - firstBlockPos, blockWidth, blockEndPos - blockStartPos); indexList.add(cIndexInfo); indexSizeInBytes += cIndexInfo.serializedSize(); } // the start position of column index int indexStartPos = dos.getLength(); // serialize column index BigdataColumnIndexer.serialize(indexList, indexSizeInBytes, dos); // write out the size of index. dos.writeInt(dos.getLength() - indexStartPos); } catch (IOException e) { logger.error(e.toString()); throw new RuntimeException(e); } finally { context.releaseCompressor(); } }