/** * Add key/value to file. Keys must be added in an order that agrees with the Comparator passed on * construction. * * @param kv KeyValue to add. Cannot be empty nor null. * @throws IOException */ @Override public void append(final KeyValue kv) throws IOException { byte[] key = kv.getBuffer(); int koffset = kv.getKeyOffset(); int klength = kv.getKeyLength(); byte[] value = kv.getValueArray(); int voffset = kv.getValueOffset(); int vlength = kv.getValueLength(); boolean dupKey = checkKey(key, koffset, klength); checkValue(value, voffset, vlength); if (!dupKey) { checkBlockBoundary(); } if (!fsBlockWriter.isWriting()) newBlock(); fsBlockWriter.write(kv); totalKeyLength += klength; totalValueLength += vlength; // Are we the first key in this block? if (firstKeyInBlock == null) { // Copy the key. firstKeyInBlock = new byte[klength]; System.arraycopy(key, koffset, firstKeyInBlock, 0, klength); } lastKeyBuffer = key; lastKeyOffset = koffset; lastKeyLength = klength; entryCount++; this.maxMemstoreTS = Math.max(this.maxMemstoreTS, kv.getMvccVersion()); }
/** * Collect all column values for the same Row. RowKey may be different if indexes are involved, so * it writes a separate record for each unique RowKey * * @param context Current mapper context * @param tableName Table index in tableNames list * @param lkv List of KV values that will be combined in a single ImmutableBytesWritable * @throws IOException * @throws InterruptedException */ private void writeAggregatedRow(Context context, String tableName, List<KeyValue> lkv) throws IOException, InterruptedException { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); DataOutputStream outputStream = new DataOutputStream(bos); ImmutableBytesWritable outputKey = null; if (!lkv.isEmpty()) { for (KeyValue cell : lkv) { if (outputKey == null || Bytes.compareTo( outputKey.get(), outputKey.getOffset(), outputKey.getLength(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()) != 0) { // This a the first RowKey or a different from previous if (outputKey != null) { // It's a different RowKey, so we need to write it ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray()); outputStream.close(); context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray); } outputKey = new ImmutableBytesWritable( cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); bos = new ByteArrayOutputStream(1024); outputStream = new DataOutputStream(bos); } /* The order of aggregation: type, index of column, length of value, value itself */ int i = findIndex(cell); if (i == -1) { // That may happen when we load only local indexes. Since KV pairs for both // table and local index are going to the same physical table at that point // we skip those KVs that are not belongs to loca index continue; } outputStream.writeByte(cell.getTypeByte()); WritableUtils.writeVInt(outputStream, i); WritableUtils.writeVInt(outputStream, cell.getValueLength()); outputStream.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); } ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray()); outputStream.close(); context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray); } }
@Test(timeOut = 10_000) public void testCorrectMapingOfCellsToShadowCells() throws IOException { // Create the required data final byte[] validShadowCellQualifier = com.google.common.primitives.Bytes.concat(qualifier, SHADOW_CELL_SUFFIX); final byte[] qualifier2 = Bytes.toBytes("test-qual2"); final byte[] validShadowCellQualifier2 = com.google.common.primitives.Bytes.concat(qualifier2, SHADOW_CELL_SUFFIX); final byte[] qualifier3 = Bytes.toBytes("test-qual3"); Cell cell1 = new KeyValue(row, family, qualifier, 1, Bytes.toBytes("value")); // Default type is Put Cell dupCell1 = new KeyValue(row, family, qualifier, 1, Bytes.toBytes("value")); // Default type is Put Cell dupCell1WithAnotherValue = new KeyValue(row, family, qualifier, 1, Bytes.toBytes("other-value")); Cell delCell1 = new KeyValue(row, family, qualifier, 1, Type.Delete, Bytes.toBytes("value")); Cell shadowCell1 = new KeyValue(row, family, validShadowCellQualifier, 1, Bytes.toBytes("sc-value")); Cell cell2 = new KeyValue(row, family, qualifier2, 1, Bytes.toBytes("value2")); Cell shadowCell2 = new KeyValue(row, family, validShadowCellQualifier2, 1, Bytes.toBytes("sc-value2")); Cell cell3 = new KeyValue(row, family, qualifier3, 1, Bytes.toBytes("value3")); // Check a list of cells with duplicate values List<Cell> badListWithDups = new ArrayList<>(); badListWithDups.add(cell1); badListWithDups.add(dupCell1WithAnotherValue); // Check dup shadow cell with same MVCC is ignored SortedMap<Cell, Optional<Cell>> cellsToShadowCells = CellUtils.mapCellsToShadowCells(badListWithDups); assertEquals(cellsToShadowCells.size(), 1, "There should be only 1 key-value maps"); assertTrue(cellsToShadowCells.containsKey(cell1)); KeyValue firstKey = (KeyValue) cellsToShadowCells.firstKey(); KeyValue lastKey = (KeyValue) cellsToShadowCells.lastKey(); assertTrue(firstKey.equals(lastKey)); assertTrue( 0 == Bytes.compareTo( firstKey.getValueArray(), firstKey.getValueOffset(), firstKey.getValueLength(), cell1.getValueArray(), cell1.getValueOffset(), cell1.getValueLength()), "Should be equal"); // Modify dup shadow cell to have a greater MVCC and check that is replaced HBaseShims.setKeyValueSequenceId((KeyValue) dupCell1WithAnotherValue, 1); cellsToShadowCells = CellUtils.mapCellsToShadowCells(badListWithDups); assertEquals(cellsToShadowCells.size(), 1, "There should be only 1 key-value maps"); assertTrue(cellsToShadowCells.containsKey(dupCell1WithAnotherValue)); firstKey = (KeyValue) cellsToShadowCells.firstKey(); lastKey = (KeyValue) cellsToShadowCells.lastKey(); assertTrue(firstKey.equals(lastKey)); assertTrue( 0 == Bytes.compareTo( firstKey.getValueArray(), firstKey.getValueOffset(), firstKey.getValueLength(), dupCell1WithAnotherValue.getValueArray(), dupCell1WithAnotherValue.getValueOffset(), dupCell1WithAnotherValue.getValueLength()), "Should be equal"); // Check a list of cells with duplicate values List<Cell> cellListWithDups = new ArrayList<>(); cellListWithDups.add(cell1); cellListWithDups.add(shadowCell1); cellListWithDups.add(dupCell1); // Dup cell cellListWithDups.add(delCell1); // Another Dup cell but with different type cellListWithDups.add(cell2); cellListWithDups.add(cell3); cellListWithDups.add(shadowCell2); cellsToShadowCells = CellUtils.mapCellsToShadowCells(cellListWithDups); assertEquals(cellsToShadowCells.size(), 3, "There should be only 3 key-value maps"); assertTrue(cellsToShadowCells.get(cell1).get().equals(shadowCell1)); assertTrue(cellsToShadowCells.get(dupCell1).get().equals(shadowCell1)); assertFalse( cellsToShadowCells.containsKey(delCell1)); // TODO This is strange and needs to be solved. // The current algo avoids to put the delete cell // as key after the put cell with same value was added assertTrue(cellsToShadowCells.get(cell2).get().equals(shadowCell2)); assertTrue(cellsToShadowCells.get(cell3).equals(Optional.absent())); }