/** * Collect all column values for the same Row. RowKey may be different if indexes are involved, so * it writes a separate record for each unique RowKey * * @param context Current mapper context * @param tableName Table index in tableNames list * @param lkv List of KV values that will be combined in a single ImmutableBytesWritable * @throws IOException * @throws InterruptedException */ private void writeAggregatedRow(Context context, String tableName, List<KeyValue> lkv) throws IOException, InterruptedException { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); DataOutputStream outputStream = new DataOutputStream(bos); ImmutableBytesWritable outputKey = null; if (!lkv.isEmpty()) { for (KeyValue cell : lkv) { if (outputKey == null || Bytes.compareTo( outputKey.get(), outputKey.getOffset(), outputKey.getLength(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()) != 0) { // This a the first RowKey or a different from previous if (outputKey != null) { // It's a different RowKey, so we need to write it ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray()); outputStream.close(); context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray); } outputKey = new ImmutableBytesWritable( cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); bos = new ByteArrayOutputStream(1024); outputStream = new DataOutputStream(bos); } /* The order of aggregation: type, index of column, length of value, value itself */ int i = findIndex(cell); if (i == -1) { // That may happen when we load only local indexes. Since KV pairs for both // table and local index are going to the same physical table at that point // we skip those KVs that are not belongs to loca index continue; } outputStream.writeByte(cell.getTypeByte()); WritableUtils.writeVInt(outputStream, i); WritableUtils.writeVInt(outputStream, cell.getValueLength()); outputStream.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); } ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray()); outputStream.close(); context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray); } }
/** * Flushes the snapshot of the memstore. Flushes the mob data to the mob files, and flushes the * name of these mob files to HBase. * * @param snapshot The snapshot of the memstore. * @throws IOException */ private void internalFlushCache(final MemStoreSnapshot snapshot) throws IOException { if (snapshot.getCellsCount() == 0) { return; } // generate the files into a temp directory. String tempPathString = context.getConfiguration().get(SweepJob.WORKING_FILES_DIR_KEY); StoreFile.Writer mobFileWriter = MobUtils.createWriter( conf, fs, hcd, partitionId.getDate(), new Path(tempPathString), snapshot.getCellsCount(), hcd.getCompactionCompression(), partitionId.getStartKey(), cacheConfig, cryptoContext); String relativePath = mobFileWriter.getPath().getName(); LOG.info("Create files under a temp directory " + mobFileWriter.getPath().toString()); byte[] referenceValue = Bytes.toBytes(relativePath); KeyValueScanner scanner = snapshot.getScanner(); Cell cell = null; while (null != (cell = scanner.next())) { mobFileWriter.append(cell); } scanner.close(); // Write out the log sequence number that corresponds to this output // hfile. The hfile is current up to and including logCacheFlushId. mobFileWriter.appendMetadata(Long.MAX_VALUE, false, snapshot.getCellsCount()); mobFileWriter.close(); MobUtils.commitFile(conf, fs, mobFileWriter.getPath(), mobFamilyDir, cacheConfig); context.getCounter(SweepCounter.FILE_AFTER_MERGE_OR_CLEAN).increment(1); // write reference/fileName back to the store files of HBase. scanner = snapshot.getScanner(); scanner.seek(KeyValueUtil.createFirstOnRow(HConstants.EMPTY_START_ROW)); cell = null; Tag tableNameTag = new ArrayBackedTag( TagType.MOB_TABLE_NAME_TAG_TYPE, Bytes.toBytes(this.table.getName().toString())); long updatedCount = 0; while (null != (cell = scanner.next())) { KeyValue reference = MobUtils.createMobRefKeyValue(cell, referenceValue, tableNameTag); Put put = new Put(reference.getRowArray(), reference.getRowOffset(), reference.getRowLength()); put.add(reference); table.mutate(put); updatedCount++; } table.flush(); context.getCounter(SweepCounter.RECORDS_UPDATED).increment(updatedCount); scanner.close(); }
private void assertIteration(String dataColumns, String pk, Object[] values, String dataProps) throws Exception { String schemaName = ""; String tableName = "T"; Connection conn = DriverManager.getConnection(getUrl()); String fullTableName = SchemaUtil.getTableName( SchemaUtil.normalizeIdentifier(schemaName), SchemaUtil.normalizeIdentifier(tableName)); conn.createStatement() .execute( "CREATE TABLE " + fullTableName + "(" + dataColumns + " CONSTRAINT pk PRIMARY KEY (" + pk + ")) " + (dataProps.isEmpty() ? "" : dataProps)); PhoenixConnection pconn = conn.unwrap(PhoenixConnection.class); PTable table = pconn.getTable(new PTableKey(pconn.getTenantId(), fullTableName)); conn.close(); StringBuilder buf = new StringBuilder("UPSERT INTO " + fullTableName + " VALUES("); for (int i = 0; i < values.length; i++) { buf.append("?,"); } buf.setCharAt(buf.length() - 1, ')'); PreparedStatement stmt = conn.prepareStatement(buf.toString()); for (int i = 0; i < values.length; i++) { stmt.setObject(i + 1, values[i]); } stmt.execute(); Iterator<Pair<byte[], List<KeyValue>>> iterator = PhoenixRuntime.getUncommittedDataIterator(conn); List<KeyValue> dataKeyValues = iterator.next().getSecond(); KeyValue keyValue = dataKeyValues.get(0); List<SortOrder> sortOrders = Lists.newArrayListWithExpectedSize(table.getPKColumns().size()); for (PColumn col : table.getPKColumns()) { sortOrders.add(col.getSortOrder()); } RowKeySchema schema = table.getRowKeySchema(); int minOffset = keyValue.getRowOffset(); ImmutableBytesWritable ptr = new ImmutableBytesWritable(); int nExpectedValues = values.length; for (int i = values.length - 1; i >= 0; i--) { if (values[i] == null) { nExpectedValues--; } else { break; } } int i = 0; int maxOffset = schema.iterator(keyValue.getRowArray(), minOffset, keyValue.getRowLength(), ptr); for (i = 0; i < schema.getFieldCount(); i++) { Boolean hasValue = schema.next(ptr, i, maxOffset); if (hasValue == null) { break; } assertTrue(hasValue); PDataType type = PDataType.fromLiteral(values[i]); SortOrder sortOrder = sortOrders.get(i); Object value = type.toObject(ptr, schema.getField(i).getDataType(), sortOrder); assertEquals(values[i], value); } assertEquals(nExpectedValues, i); assertNull(schema.next(ptr, i, maxOffset)); for (i--; i >= 0; i--) { Boolean hasValue = schema.previous(ptr, i, minOffset); if (hasValue == null) { break; } assertTrue(hasValue); PDataType type = PDataType.fromLiteral(values[i]); SortOrder sortOrder = sortOrders.get(i); Object value = type.toObject(ptr, schema.getField(i).getDataType(), sortOrder); assertEquals(values[i], value); } assertEquals(-1, i); assertNull(schema.previous(ptr, i, minOffset)); }