/** * Override the preAppend for checkAndPut and checkAndDelete, as we need the ability to a) set the * TimeRange for the Get being done and b) return something back to the client to indicate * success/failure */ @SuppressWarnings("deprecation") @Override public Result preAppend( final ObserverContext<RegionCoprocessorEnvironment> e, final Append append) throws IOException { byte[] opBuf = append.getAttribute(OPERATION_ATTRIB); if (opBuf == null) { return null; } Op op = Op.values()[opBuf[0]]; long clientTimestamp = HConstants.LATEST_TIMESTAMP; byte[] clientTimestampBuf = append.getAttribute(MAX_TIMERANGE_ATTRIB); if (clientTimestampBuf != null) { clientTimestamp = Bytes.toLong(clientTimestampBuf); } boolean hadClientTimestamp = (clientTimestamp != HConstants.LATEST_TIMESTAMP); if (hadClientTimestamp) { // Prevent race condition of creating two sequences at the same timestamp // by looking for a sequence at or after the timestamp at which it'll be // created. if (op == Op.CREATE_SEQUENCE) { clientTimestamp++; } } else { clientTimestamp = EnvironmentEdgeManager.currentTimeMillis(); clientTimestampBuf = Bytes.toBytes(clientTimestamp); } RegionCoprocessorEnvironment env = e.getEnvironment(); // We need to set this to prevent region.append from being called e.bypass(); e.complete(); HRegion region = env.getRegion(); byte[] row = append.getRow(); region.startRegionOperation(); try { Integer lid = region.getLock(null, row, true); try { KeyValue keyValue = append.getFamilyMap().values().iterator().next().iterator().next(); byte[] family = keyValue.getFamily(); byte[] qualifier = keyValue.getQualifier(); Get get = new Get(row); get.setTimeRange(MetaDataProtocol.MIN_TABLE_TIMESTAMP, clientTimestamp); get.addColumn(family, qualifier); Result result = region.get(get); if (result.isEmpty()) { if (op == Op.DROP_SEQUENCE || op == Op.RESET_SEQUENCE) { return getErrorResult( row, clientTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode()); } } else { if (op == Op.CREATE_SEQUENCE) { return getErrorResult( row, clientTimestamp, SQLExceptionCode.SEQUENCE_ALREADY_EXIST.getErrorCode()); } } Mutation m = null; switch (op) { case RESET_SEQUENCE: KeyValue currentValueKV = result.raw()[0]; long expectedValue = PDataType.LONG .getCodec() .decodeLong(append.getAttribute(CURRENT_VALUE_ATTRIB), 0, null); long value = PDataType.LONG .getCodec() .decodeLong(currentValueKV.getBuffer(), currentValueKV.getValueOffset(), null); // Timestamp should match exactly, or we may have the wrong sequence if (expectedValue != value || currentValueKV.getTimestamp() != clientTimestamp) { return new Result( Collections.singletonList( KeyValueUtil.newKeyValue( row, PhoenixDatabaseMetaData.SEQUENCE_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES, currentValueKV.getTimestamp(), ByteUtil.EMPTY_BYTE_ARRAY))); } m = new Put(row, currentValueKV.getTimestamp()); m.getFamilyMap().putAll(append.getFamilyMap()); break; case DROP_SEQUENCE: m = new Delete(row, clientTimestamp, null); break; case CREATE_SEQUENCE: m = new Put(row, clientTimestamp); m.getFamilyMap().putAll(append.getFamilyMap()); break; } if (!hadClientTimestamp) { for (List<KeyValue> kvs : m.getFamilyMap().values()) { for (KeyValue kv : kvs) { kv.updateLatestStamp(clientTimestampBuf); } } } @SuppressWarnings("unchecked") Pair<Mutation, Integer>[] mutations = new Pair[1]; mutations[0] = new Pair<Mutation, Integer>(m, lid); region.batchMutate(mutations); long serverTimestamp = MetaDataUtil.getClientTimeStamp(m); // Return result with single KeyValue. The only piece of information // the client cares about is the timestamp, which is the timestamp of // when the mutation was actually performed (useful in the case of . return new Result( Collections.singletonList( KeyValueUtil.newKeyValue( row, PhoenixDatabaseMetaData.SEQUENCE_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES, serverTimestamp, SUCCESS_VALUE))); } finally { region.releaseRowLock(lid); } } catch (Throwable t) { ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t); return null; // Impossible } finally { region.closeRegionOperation(); } }
/** * Use PreIncrement hook of BaseRegionObserver to overcome deficiencies in Increment * implementation (HBASE-10254): 1) Lack of recognition and identification of when the key value * to increment doesn't exist 2) Lack of the ability to set the timestamp of the updated key * value. Works the same as existing region.increment(), except assumes there is a single column * to increment and uses Phoenix LONG encoding. * * @author jtaylor * @since 3.0.0 */ @Override public Result preIncrement( final ObserverContext<RegionCoprocessorEnvironment> e, final Increment increment) throws IOException { RegionCoprocessorEnvironment env = e.getEnvironment(); // We need to set this to prevent region.increment from being called e.bypass(); e.complete(); HRegion region = env.getRegion(); byte[] row = increment.getRow(); TimeRange tr = increment.getTimeRange(); region.startRegionOperation(); try { Integer lid = region.getLock(null, row, true); try { long maxTimestamp = tr.getMax(); if (maxTimestamp == HConstants.LATEST_TIMESTAMP) { maxTimestamp = EnvironmentEdgeManager.currentTimeMillis(); tr = new TimeRange(tr.getMin(), maxTimestamp); } Get get = new Get(row); get.setTimeRange(tr.getMin(), tr.getMax()); for (Map.Entry<byte[], NavigableMap<byte[], Long>> entry : increment.getFamilyMap().entrySet()) { byte[] cf = entry.getKey(); for (byte[] cq : entry.getValue().keySet()) { get.addColumn(cf, cq); } } Result result = region.get(get); if (result.isEmpty()) { return getErrorResult( row, maxTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode()); } KeyValue currentValueKV = Sequence.getCurrentValueKV(result); KeyValue incrementByKV = Sequence.getIncrementByKV(result); KeyValue cacheSizeKV = Sequence.getCacheSizeKV(result); long value = PDataType.LONG .getCodec() .decodeLong(currentValueKV.getBuffer(), currentValueKV.getValueOffset(), null); long incrementBy = PDataType.LONG .getCodec() .decodeLong(incrementByKV.getBuffer(), incrementByKV.getValueOffset(), null); int cacheSize = PDataType.INTEGER .getCodec() .decodeInt(cacheSizeKV.getBuffer(), cacheSizeKV.getValueOffset(), null); value += incrementBy * cacheSize; byte[] valueBuffer = new byte[PDataType.LONG.getByteSize()]; PDataType.LONG.getCodec().encodeLong(value, valueBuffer, 0); Put put = new Put(row, currentValueKV.getTimestamp()); // Hold timestamp constant for sequences, so that clients always only see the latest value // regardless of when they connect. KeyValue newCurrentValueKV = KeyValueUtil.newKeyValue( row, currentValueKV.getFamily(), currentValueKV.getQualifier(), currentValueKV.getTimestamp(), valueBuffer); put.add(newCurrentValueKV); @SuppressWarnings("unchecked") Pair<Mutation, Integer>[] mutations = new Pair[1]; mutations[0] = new Pair<Mutation, Integer>(put, lid); region.batchMutate(mutations); return Sequence.replaceCurrentValueKV(result, newCurrentValueKV); } finally { region.releaseRowLock(lid); } } catch (Throwable t) { ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t); return null; // Impossible } finally { region.closeRegionOperation(); } }
@Override protected RegionScanner doPostScannerOpen( final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s) throws IOException { byte[] isUngroupedAgg = scan.getAttribute(BaseScannerRegionObserver.UNGROUPED_AGG); if (isUngroupedAgg == null) { return s; } final ScanProjector p = ScanProjector.deserializeProjectorFromScan(scan); final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan); RegionScanner theScanner = s; if (p != null || j != null) { theScanner = new HashJoinRegionScanner(s, p, j, ScanUtil.getTenantId(scan), c.getEnvironment()); } final RegionScanner innerScanner = theScanner; byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID); PTable projectedTable = null; List<Expression> selectExpressions = null; byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE); boolean isUpsert = false; boolean isDelete = false; byte[] deleteCQ = null; byte[] deleteCF = null; byte[][] values = null; byte[] emptyCF = null; ImmutableBytesWritable ptr = null; if (upsertSelectTable != null) { isUpsert = true; projectedTable = deserializeTable(upsertSelectTable); selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS)); values = new byte[projectedTable.getPKColumns().size()][]; ptr = new ImmutableBytesWritable(); } else { byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG); isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0; if (!isDelete) { deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF); deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ); } emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF); } int batchSize = 0; long ts = scan.getTimeRange().getMax(); HRegion region = c.getEnvironment().getRegion(); List<Mutation> mutations = Collections.emptyList(); if (isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null) { // TODO: size better mutations = Lists.newArrayListWithExpectedSize(1024); batchSize = c.getEnvironment() .getConfiguration() .getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE); } Aggregators aggregators = ServerAggregators.deserialize( scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), c.getEnvironment().getConfiguration()); Aggregator[] rowAggregators = aggregators.getAggregators(); boolean hasMore; boolean hasAny = false; MultiKeyValueTuple result = new MultiKeyValueTuple(); if (logger.isInfoEnabled()) { logger.info("Starting ungrouped coprocessor scan " + scan); } long rowCount = 0; region.startRegionOperation(); try { do { List<Cell> results = new ArrayList<Cell>(); // Results are potentially returned even when the return value of s.next is false // since this is an indication of whether or not there are more values after the // ones returned hasMore = innerScanner.nextRaw(results); if (!results.isEmpty()) { rowCount++; result.setKeyValues(results); try { if (isDelete) { // FIXME: the version of the Delete constructor without the lock args was introduced // in 0.94.4, thus if we try to use it here we can no longer use the 0.94.2 version // of the client. Cell firstKV = results.get(0); Delete delete = new Delete( firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength(), ts); mutations.add(delete); } else if (isUpsert) { Arrays.fill(values, null); int i = 0; List<PColumn> projectedColumns = projectedTable.getColumns(); for (; i < projectedTable.getPKColumns().size(); i++) { Expression expression = selectExpressions.get(i); if (expression.evaluate(result, ptr)) { values[i] = ptr.copyBytes(); // If SortOrder from expression in SELECT doesn't match the // column being projected into then invert the bits. if (expression.getSortOrder() != projectedColumns.get(i).getSortOrder()) { SortOrder.invert(values[i], 0, values[i], 0, values[i].length); } } } projectedTable.newKey(ptr, values); PRow row = projectedTable.newRow(kvBuilder, ts, ptr); for (; i < projectedColumns.size(); i++) { Expression expression = selectExpressions.get(i); if (expression.evaluate(result, ptr)) { PColumn column = projectedColumns.get(i); Object value = expression.getDataType().toObject(ptr, column.getSortOrder()); // We are guaranteed that the two column will have the same type. if (!column .getDataType() .isSizeCompatible( ptr, value, column.getDataType(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) { throw new ValueTypeIncompatibleException( column.getDataType(), column.getMaxLength(), column.getScale()); } column .getDataType() .coerceBytes( ptr, value, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder()); byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr); row.setValue(column, bytes); } } for (Mutation mutation : row.toRowMutations()) { mutations.add(mutation); } } else if (deleteCF != null && deleteCQ != null) { // No need to search for delete column, since we project only it // if no empty key value is being set if (emptyCF == null || result.getValue(deleteCF, deleteCQ) != null) { Delete delete = new Delete( results.get(0).getRowArray(), results.get(0).getRowOffset(), results.get(0).getRowLength()); delete.deleteColumns(deleteCF, deleteCQ, ts); mutations.add(delete); } } if (emptyCF != null) { /* * If we've specified an emptyCF, then we need to insert an empty * key value "retroactively" for any key value that is visible at * the timestamp that the DDL was issued. Key values that are not * visible at this timestamp will not ever be projected up to * scans past this timestamp, so don't need to be considered. * We insert one empty key value per row per timestamp. */ Set<Long> timeStamps = Sets.newHashSetWithExpectedSize(results.size()); for (Cell kv : results) { long kvts = kv.getTimestamp(); if (!timeStamps.contains(kvts)) { Put put = new Put(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()); put.add( emptyCF, QueryConstants.EMPTY_COLUMN_BYTES, kvts, ByteUtil.EMPTY_BYTE_ARRAY); mutations.add(put); } } } // Commit in batches based on UPSERT_BATCH_SIZE_ATTRIB in config if (!mutations.isEmpty() && batchSize > 0 && mutations.size() % batchSize == 0) { commitBatch(region, mutations, indexUUID); mutations.clear(); } } catch (ConstraintViolationException e) { // Log and ignore in count logger.error( "Failed to create row in " + region.getRegionNameAsString() + " with values " + SchemaUtil.toString(values), e); continue; } aggregators.aggregate(rowAggregators, result); hasAny = true; } } while (hasMore); } finally { innerScanner.close(); region.closeRegionOperation(); } if (logger.isInfoEnabled()) { logger.info("Finished scanning " + rowCount + " rows for ungrouped coprocessor scan " + scan); } if (!mutations.isEmpty()) { commitBatch(region, mutations, indexUUID); } final boolean hadAny = hasAny; KeyValue keyValue = null; if (hadAny) { byte[] value = aggregators.toBytes(rowAggregators); keyValue = KeyValueUtil.newKeyValue( UNGROUPED_AGG_ROW_KEY, SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length); } final KeyValue aggKeyValue = keyValue; RegionScanner scanner = new BaseRegionScanner() { private boolean done = !hadAny; @Override public HRegionInfo getRegionInfo() { return innerScanner.getRegionInfo(); } @Override public boolean isFilterDone() { return done; } @Override public void close() throws IOException { innerScanner.close(); } @Override public boolean next(List<Cell> results) throws IOException { if (done) return false; done = true; results.add(aggKeyValue); return false; } @Override public long getMaxResultSize() { return scan.getMaxResultSize(); } }; return scanner; }