Beispiel #1
0
  /**
   * Add key/value to file. Keys must be added in an order that agrees with the Comparator passed on
   * construction.
   *
   * @param kv KeyValue to add. Cannot be empty nor null.
   * @throws IOException
   */
  @Override
  public void append(final KeyValue kv) throws IOException {
    byte[] key = kv.getBuffer();
    int koffset = kv.getKeyOffset();
    int klength = kv.getKeyLength();
    byte[] value = kv.getValueArray();
    int voffset = kv.getValueOffset();
    int vlength = kv.getValueLength();
    boolean dupKey = checkKey(key, koffset, klength);
    checkValue(value, voffset, vlength);
    if (!dupKey) {
      checkBlockBoundary();
    }

    if (!fsBlockWriter.isWriting()) newBlock();

    fsBlockWriter.write(kv);

    totalKeyLength += klength;
    totalValueLength += vlength;

    // Are we the first key in this block?
    if (firstKeyInBlock == null) {
      // Copy the key.
      firstKeyInBlock = new byte[klength];
      System.arraycopy(key, koffset, firstKeyInBlock, 0, klength);
    }

    lastKeyBuffer = key;
    lastKeyOffset = koffset;
    lastKeyLength = klength;
    entryCount++;
    this.maxMemstoreTS = Math.max(this.maxMemstoreTS, kv.getMvccVersion());
  }
 /**
  * Collect all column values for the same Row. RowKey may be different if indexes are involved, so
  * it writes a separate record for each unique RowKey
  *
  * @param context Current mapper context
  * @param tableName Table index in tableNames list
  * @param lkv List of KV values that will be combined in a single ImmutableBytesWritable
  * @throws IOException
  * @throws InterruptedException
  */
 private void writeAggregatedRow(Context context, String tableName, List<KeyValue> lkv)
     throws IOException, InterruptedException {
   ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
   DataOutputStream outputStream = new DataOutputStream(bos);
   ImmutableBytesWritable outputKey = null;
   if (!lkv.isEmpty()) {
     for (KeyValue cell : lkv) {
       if (outputKey == null
           || Bytes.compareTo(
                   outputKey.get(),
                   outputKey.getOffset(),
                   outputKey.getLength(),
                   cell.getRowArray(),
                   cell.getRowOffset(),
                   cell.getRowLength())
               != 0) {
         // This a the first RowKey or a different from previous
         if (outputKey != null) { // It's a different RowKey, so we need to write it
           ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray());
           outputStream.close();
           context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray);
         }
         outputKey =
             new ImmutableBytesWritable(
                 cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
         bos = new ByteArrayOutputStream(1024);
         outputStream = new DataOutputStream(bos);
       }
       /*
       The order of aggregation: type, index of column, length of value, value itself
        */
       int i = findIndex(cell);
       if (i == -1) {
         // That may happen when we load only local indexes. Since KV pairs for both
         // table and local index are going to the same physical table at that point
         // we skip those KVs that are not belongs to loca index
         continue;
       }
       outputStream.writeByte(cell.getTypeByte());
       WritableUtils.writeVInt(outputStream, i);
       WritableUtils.writeVInt(outputStream, cell.getValueLength());
       outputStream.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
     }
     ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray());
     outputStream.close();
     context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray);
   }
 }
Beispiel #3
0
  @Test(timeOut = 10_000)
  public void testCorrectMapingOfCellsToShadowCells() throws IOException {
    // Create the required data
    final byte[] validShadowCellQualifier =
        com.google.common.primitives.Bytes.concat(qualifier, SHADOW_CELL_SUFFIX);

    final byte[] qualifier2 = Bytes.toBytes("test-qual2");
    final byte[] validShadowCellQualifier2 =
        com.google.common.primitives.Bytes.concat(qualifier2, SHADOW_CELL_SUFFIX);

    final byte[] qualifier3 = Bytes.toBytes("test-qual3");

    Cell cell1 =
        new KeyValue(row, family, qualifier, 1, Bytes.toBytes("value")); // Default type is Put
    Cell dupCell1 =
        new KeyValue(row, family, qualifier, 1, Bytes.toBytes("value")); // Default type is Put
    Cell dupCell1WithAnotherValue =
        new KeyValue(row, family, qualifier, 1, Bytes.toBytes("other-value"));
    Cell delCell1 = new KeyValue(row, family, qualifier, 1, Type.Delete, Bytes.toBytes("value"));
    Cell shadowCell1 =
        new KeyValue(row, family, validShadowCellQualifier, 1, Bytes.toBytes("sc-value"));

    Cell cell2 = new KeyValue(row, family, qualifier2, 1, Bytes.toBytes("value2"));
    Cell shadowCell2 =
        new KeyValue(row, family, validShadowCellQualifier2, 1, Bytes.toBytes("sc-value2"));

    Cell cell3 = new KeyValue(row, family, qualifier3, 1, Bytes.toBytes("value3"));

    // Check a list of cells with duplicate values
    List<Cell> badListWithDups = new ArrayList<>();
    badListWithDups.add(cell1);
    badListWithDups.add(dupCell1WithAnotherValue);

    // Check dup shadow cell with same MVCC is ignored
    SortedMap<Cell, Optional<Cell>> cellsToShadowCells =
        CellUtils.mapCellsToShadowCells(badListWithDups);
    assertEquals(cellsToShadowCells.size(), 1, "There should be only 1 key-value maps");
    assertTrue(cellsToShadowCells.containsKey(cell1));
    KeyValue firstKey = (KeyValue) cellsToShadowCells.firstKey();
    KeyValue lastKey = (KeyValue) cellsToShadowCells.lastKey();
    assertTrue(firstKey.equals(lastKey));
    assertTrue(
        0
            == Bytes.compareTo(
                firstKey.getValueArray(),
                firstKey.getValueOffset(),
                firstKey.getValueLength(),
                cell1.getValueArray(),
                cell1.getValueOffset(),
                cell1.getValueLength()),
        "Should be equal");

    // Modify dup shadow cell to have a greater MVCC and check that is replaced
    HBaseShims.setKeyValueSequenceId((KeyValue) dupCell1WithAnotherValue, 1);
    cellsToShadowCells = CellUtils.mapCellsToShadowCells(badListWithDups);
    assertEquals(cellsToShadowCells.size(), 1, "There should be only 1 key-value maps");
    assertTrue(cellsToShadowCells.containsKey(dupCell1WithAnotherValue));
    firstKey = (KeyValue) cellsToShadowCells.firstKey();
    lastKey = (KeyValue) cellsToShadowCells.lastKey();
    assertTrue(firstKey.equals(lastKey));
    assertTrue(
        0
            == Bytes.compareTo(
                firstKey.getValueArray(),
                firstKey.getValueOffset(),
                firstKey.getValueLength(),
                dupCell1WithAnotherValue.getValueArray(),
                dupCell1WithAnotherValue.getValueOffset(),
                dupCell1WithAnotherValue.getValueLength()),
        "Should be equal");
    // Check a list of cells with duplicate values
    List<Cell> cellListWithDups = new ArrayList<>();
    cellListWithDups.add(cell1);
    cellListWithDups.add(shadowCell1);
    cellListWithDups.add(dupCell1); // Dup cell
    cellListWithDups.add(delCell1); // Another Dup cell but with different type
    cellListWithDups.add(cell2);
    cellListWithDups.add(cell3);
    cellListWithDups.add(shadowCell2);

    cellsToShadowCells = CellUtils.mapCellsToShadowCells(cellListWithDups);
    assertEquals(cellsToShadowCells.size(), 3, "There should be only 3 key-value maps");
    assertTrue(cellsToShadowCells.get(cell1).get().equals(shadowCell1));
    assertTrue(cellsToShadowCells.get(dupCell1).get().equals(shadowCell1));
    assertFalse(
        cellsToShadowCells.containsKey(delCell1)); // TODO This is strange and needs to be solved.
    // The current algo avoids to put the delete cell
    // as key after the put cell with same value was added
    assertTrue(cellsToShadowCells.get(cell2).get().equals(shadowCell2));
    assertTrue(cellsToShadowCells.get(cell3).equals(Optional.absent()));
  }