Ejemplo n.º 1
0
  /**
   * Override the preAppend for checkAndPut and checkAndDelete, as we need the ability to a) set the
   * TimeRange for the Get being done and b) return something back to the client to indicate
   * success/failure
   */
  @SuppressWarnings("deprecation")
  @Override
  public Result preAppend(
      final ObserverContext<RegionCoprocessorEnvironment> e, final Append append)
      throws IOException {
    byte[] opBuf = append.getAttribute(OPERATION_ATTRIB);
    if (opBuf == null) {
      return null;
    }
    Op op = Op.values()[opBuf[0]];

    long clientTimestamp = HConstants.LATEST_TIMESTAMP;
    byte[] clientTimestampBuf = append.getAttribute(MAX_TIMERANGE_ATTRIB);
    if (clientTimestampBuf != null) {
      clientTimestamp = Bytes.toLong(clientTimestampBuf);
    }
    boolean hadClientTimestamp = (clientTimestamp != HConstants.LATEST_TIMESTAMP);
    if (hadClientTimestamp) {
      // Prevent race condition of creating two sequences at the same timestamp
      // by looking for a sequence at or after the timestamp at which it'll be
      // created.
      if (op == Op.CREATE_SEQUENCE) {
        clientTimestamp++;
      }
    } else {
      clientTimestamp = EnvironmentEdgeManager.currentTimeMillis();
      clientTimestampBuf = Bytes.toBytes(clientTimestamp);
    }

    RegionCoprocessorEnvironment env = e.getEnvironment();
    // We need to set this to prevent region.append from being called
    e.bypass();
    e.complete();
    HRegion region = env.getRegion();
    byte[] row = append.getRow();
    region.startRegionOperation();
    try {
      Integer lid = region.getLock(null, row, true);
      try {
        KeyValue keyValue = append.getFamilyMap().values().iterator().next().iterator().next();
        byte[] family = keyValue.getFamily();
        byte[] qualifier = keyValue.getQualifier();

        Get get = new Get(row);
        get.setTimeRange(MetaDataProtocol.MIN_TABLE_TIMESTAMP, clientTimestamp);
        get.addColumn(family, qualifier);
        Result result = region.get(get);
        if (result.isEmpty()) {
          if (op == Op.DROP_SEQUENCE || op == Op.RESET_SEQUENCE) {
            return getErrorResult(
                row, clientTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode());
          }
        } else {
          if (op == Op.CREATE_SEQUENCE) {
            return getErrorResult(
                row, clientTimestamp, SQLExceptionCode.SEQUENCE_ALREADY_EXIST.getErrorCode());
          }
        }
        Mutation m = null;
        switch (op) {
          case RESET_SEQUENCE:
            KeyValue currentValueKV = result.raw()[0];
            long expectedValue =
                PDataType.LONG
                    .getCodec()
                    .decodeLong(append.getAttribute(CURRENT_VALUE_ATTRIB), 0, null);
            long value =
                PDataType.LONG
                    .getCodec()
                    .decodeLong(currentValueKV.getBuffer(), currentValueKV.getValueOffset(), null);
            // Timestamp should match exactly, or we may have the wrong sequence
            if (expectedValue != value || currentValueKV.getTimestamp() != clientTimestamp) {
              return new Result(
                  Collections.singletonList(
                      KeyValueUtil.newKeyValue(
                          row,
                          PhoenixDatabaseMetaData.SEQUENCE_FAMILY_BYTES,
                          QueryConstants.EMPTY_COLUMN_BYTES,
                          currentValueKV.getTimestamp(),
                          ByteUtil.EMPTY_BYTE_ARRAY)));
            }
            m = new Put(row, currentValueKV.getTimestamp());
            m.getFamilyMap().putAll(append.getFamilyMap());
            break;
          case DROP_SEQUENCE:
            m = new Delete(row, clientTimestamp, null);
            break;
          case CREATE_SEQUENCE:
            m = new Put(row, clientTimestamp);
            m.getFamilyMap().putAll(append.getFamilyMap());
            break;
        }
        if (!hadClientTimestamp) {
          for (List<KeyValue> kvs : m.getFamilyMap().values()) {
            for (KeyValue kv : kvs) {
              kv.updateLatestStamp(clientTimestampBuf);
            }
          }
        }
        @SuppressWarnings("unchecked")
        Pair<Mutation, Integer>[] mutations = new Pair[1];
        mutations[0] = new Pair<Mutation, Integer>(m, lid);
        region.batchMutate(mutations);
        long serverTimestamp = MetaDataUtil.getClientTimeStamp(m);
        // Return result with single KeyValue. The only piece of information
        // the client cares about is the timestamp, which is the timestamp of
        // when the mutation was actually performed (useful in the case of .
        return new Result(
            Collections.singletonList(
                KeyValueUtil.newKeyValue(
                    row,
                    PhoenixDatabaseMetaData.SEQUENCE_FAMILY_BYTES,
                    QueryConstants.EMPTY_COLUMN_BYTES,
                    serverTimestamp,
                    SUCCESS_VALUE)));
      } finally {
        region.releaseRowLock(lid);
      }
    } catch (Throwable t) {
      ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t);
      return null; // Impossible
    } finally {
      region.closeRegionOperation();
    }
  }
Ejemplo n.º 2
0
 /**
  * Use PreIncrement hook of BaseRegionObserver to overcome deficiencies in Increment
  * implementation (HBASE-10254): 1) Lack of recognition and identification of when the key value
  * to increment doesn't exist 2) Lack of the ability to set the timestamp of the updated key
  * value. Works the same as existing region.increment(), except assumes there is a single column
  * to increment and uses Phoenix LONG encoding.
  *
  * @author jtaylor
  * @since 3.0.0
  */
 @Override
 public Result preIncrement(
     final ObserverContext<RegionCoprocessorEnvironment> e, final Increment increment)
     throws IOException {
   RegionCoprocessorEnvironment env = e.getEnvironment();
   // We need to set this to prevent region.increment from being called
   e.bypass();
   e.complete();
   HRegion region = env.getRegion();
   byte[] row = increment.getRow();
   TimeRange tr = increment.getTimeRange();
   region.startRegionOperation();
   try {
     Integer lid = region.getLock(null, row, true);
     try {
       long maxTimestamp = tr.getMax();
       if (maxTimestamp == HConstants.LATEST_TIMESTAMP) {
         maxTimestamp = EnvironmentEdgeManager.currentTimeMillis();
         tr = new TimeRange(tr.getMin(), maxTimestamp);
       }
       Get get = new Get(row);
       get.setTimeRange(tr.getMin(), tr.getMax());
       for (Map.Entry<byte[], NavigableMap<byte[], Long>> entry :
           increment.getFamilyMap().entrySet()) {
         byte[] cf = entry.getKey();
         for (byte[] cq : entry.getValue().keySet()) {
           get.addColumn(cf, cq);
         }
       }
       Result result = region.get(get);
       if (result.isEmpty()) {
         return getErrorResult(
             row, maxTimestamp, SQLExceptionCode.SEQUENCE_UNDEFINED.getErrorCode());
       }
       KeyValue currentValueKV = Sequence.getCurrentValueKV(result);
       KeyValue incrementByKV = Sequence.getIncrementByKV(result);
       KeyValue cacheSizeKV = Sequence.getCacheSizeKV(result);
       long value =
           PDataType.LONG
               .getCodec()
               .decodeLong(currentValueKV.getBuffer(), currentValueKV.getValueOffset(), null);
       long incrementBy =
           PDataType.LONG
               .getCodec()
               .decodeLong(incrementByKV.getBuffer(), incrementByKV.getValueOffset(), null);
       int cacheSize =
           PDataType.INTEGER
               .getCodec()
               .decodeInt(cacheSizeKV.getBuffer(), cacheSizeKV.getValueOffset(), null);
       value += incrementBy * cacheSize;
       byte[] valueBuffer = new byte[PDataType.LONG.getByteSize()];
       PDataType.LONG.getCodec().encodeLong(value, valueBuffer, 0);
       Put put = new Put(row, currentValueKV.getTimestamp());
       // Hold timestamp constant for sequences, so that clients always only see the latest value
       // regardless of when they connect.
       KeyValue newCurrentValueKV =
           KeyValueUtil.newKeyValue(
               row,
               currentValueKV.getFamily(),
               currentValueKV.getQualifier(),
               currentValueKV.getTimestamp(),
               valueBuffer);
       put.add(newCurrentValueKV);
       @SuppressWarnings("unchecked")
       Pair<Mutation, Integer>[] mutations = new Pair[1];
       mutations[0] = new Pair<Mutation, Integer>(put, lid);
       region.batchMutate(mutations);
       return Sequence.replaceCurrentValueKV(result, newCurrentValueKV);
     } finally {
       region.releaseRowLock(lid);
     }
   } catch (Throwable t) {
     ServerUtil.throwIOException("Increment of sequence " + Bytes.toStringBinary(row), t);
     return null; // Impossible
   } finally {
     region.closeRegionOperation();
   }
 }
  @Override
  protected RegionScanner doPostScannerOpen(
      final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s)
      throws IOException {
    byte[] isUngroupedAgg = scan.getAttribute(BaseScannerRegionObserver.UNGROUPED_AGG);
    if (isUngroupedAgg == null) {
      return s;
    }

    final ScanProjector p = ScanProjector.deserializeProjectorFromScan(scan);
    final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
    RegionScanner theScanner = s;
    if (p != null || j != null) {
      theScanner =
          new HashJoinRegionScanner(s, p, j, ScanUtil.getTenantId(scan), c.getEnvironment());
    }
    final RegionScanner innerScanner = theScanner;

    byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID);
    PTable projectedTable = null;
    List<Expression> selectExpressions = null;
    byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE);
    boolean isUpsert = false;
    boolean isDelete = false;
    byte[] deleteCQ = null;
    byte[] deleteCF = null;
    byte[][] values = null;
    byte[] emptyCF = null;
    ImmutableBytesWritable ptr = null;
    if (upsertSelectTable != null) {
      isUpsert = true;
      projectedTable = deserializeTable(upsertSelectTable);
      selectExpressions =
          deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS));
      values = new byte[projectedTable.getPKColumns().size()][];
      ptr = new ImmutableBytesWritable();
    } else {
      byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG);
      isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0;
      if (!isDelete) {
        deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF);
        deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ);
      }
      emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF);
    }

    int batchSize = 0;
    long ts = scan.getTimeRange().getMax();
    HRegion region = c.getEnvironment().getRegion();
    List<Mutation> mutations = Collections.emptyList();
    if (isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null) {
      // TODO: size better
      mutations = Lists.newArrayListWithExpectedSize(1024);
      batchSize =
          c.getEnvironment()
              .getConfiguration()
              .getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE);
    }
    Aggregators aggregators =
        ServerAggregators.deserialize(
            scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS),
            c.getEnvironment().getConfiguration());
    Aggregator[] rowAggregators = aggregators.getAggregators();
    boolean hasMore;
    boolean hasAny = false;
    MultiKeyValueTuple result = new MultiKeyValueTuple();
    if (logger.isInfoEnabled()) {
      logger.info("Starting ungrouped coprocessor scan " + scan);
    }
    long rowCount = 0;
    region.startRegionOperation();
    try {
      do {
        List<Cell> results = new ArrayList<Cell>();
        // Results are potentially returned even when the return value of s.next is false
        // since this is an indication of whether or not there are more values after the
        // ones returned
        hasMore = innerScanner.nextRaw(results);
        if (!results.isEmpty()) {
          rowCount++;
          result.setKeyValues(results);
          try {
            if (isDelete) {
              // FIXME: the version of the Delete constructor without the lock args was introduced
              // in 0.94.4, thus if we try to use it here we can no longer use the 0.94.2 version
              // of the client.
              Cell firstKV = results.get(0);
              Delete delete =
                  new Delete(
                      firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength(), ts);
              mutations.add(delete);
            } else if (isUpsert) {
              Arrays.fill(values, null);
              int i = 0;
              List<PColumn> projectedColumns = projectedTable.getColumns();
              for (; i < projectedTable.getPKColumns().size(); i++) {
                Expression expression = selectExpressions.get(i);
                if (expression.evaluate(result, ptr)) {
                  values[i] = ptr.copyBytes();
                  // If SortOrder from expression in SELECT doesn't match the
                  // column being projected into then invert the bits.
                  if (expression.getSortOrder() != projectedColumns.get(i).getSortOrder()) {
                    SortOrder.invert(values[i], 0, values[i], 0, values[i].length);
                  }
                }
              }
              projectedTable.newKey(ptr, values);
              PRow row = projectedTable.newRow(kvBuilder, ts, ptr);
              for (; i < projectedColumns.size(); i++) {
                Expression expression = selectExpressions.get(i);
                if (expression.evaluate(result, ptr)) {
                  PColumn column = projectedColumns.get(i);
                  Object value = expression.getDataType().toObject(ptr, column.getSortOrder());
                  // We are guaranteed that the two column will have the same type.
                  if (!column
                      .getDataType()
                      .isSizeCompatible(
                          ptr,
                          value,
                          column.getDataType(),
                          expression.getMaxLength(),
                          expression.getScale(),
                          column.getMaxLength(),
                          column.getScale())) {
                    throw new ValueTypeIncompatibleException(
                        column.getDataType(), column.getMaxLength(), column.getScale());
                  }
                  column
                      .getDataType()
                      .coerceBytes(
                          ptr,
                          value,
                          expression.getDataType(),
                          expression.getMaxLength(),
                          expression.getScale(),
                          expression.getSortOrder(),
                          column.getMaxLength(),
                          column.getScale(),
                          column.getSortOrder());
                  byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
                  row.setValue(column, bytes);
                }
              }
              for (Mutation mutation : row.toRowMutations()) {
                mutations.add(mutation);
              }
            } else if (deleteCF != null && deleteCQ != null) {
              // No need to search for delete column, since we project only it
              // if no empty key value is being set
              if (emptyCF == null || result.getValue(deleteCF, deleteCQ) != null) {
                Delete delete =
                    new Delete(
                        results.get(0).getRowArray(),
                        results.get(0).getRowOffset(),
                        results.get(0).getRowLength());
                delete.deleteColumns(deleteCF, deleteCQ, ts);
                mutations.add(delete);
              }
            }
            if (emptyCF != null) {
              /*
               * If we've specified an emptyCF, then we need to insert an empty
               * key value "retroactively" for any key value that is visible at
               * the timestamp that the DDL was issued. Key values that are not
               * visible at this timestamp will not ever be projected up to
               * scans past this timestamp, so don't need to be considered.
               * We insert one empty key value per row per timestamp.
               */
              Set<Long> timeStamps = Sets.newHashSetWithExpectedSize(results.size());
              for (Cell kv : results) {
                long kvts = kv.getTimestamp();
                if (!timeStamps.contains(kvts)) {
                  Put put = new Put(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength());
                  put.add(
                      emptyCF, QueryConstants.EMPTY_COLUMN_BYTES, kvts, ByteUtil.EMPTY_BYTE_ARRAY);
                  mutations.add(put);
                }
              }
            }
            // Commit in batches based on UPSERT_BATCH_SIZE_ATTRIB in config
            if (!mutations.isEmpty() && batchSize > 0 && mutations.size() % batchSize == 0) {
              commitBatch(region, mutations, indexUUID);
              mutations.clear();
            }
          } catch (ConstraintViolationException e) {
            // Log and ignore in count
            logger.error(
                "Failed to create row in "
                    + region.getRegionNameAsString()
                    + " with values "
                    + SchemaUtil.toString(values),
                e);
            continue;
          }
          aggregators.aggregate(rowAggregators, result);
          hasAny = true;
        }
      } while (hasMore);
    } finally {
      innerScanner.close();
      region.closeRegionOperation();
    }

    if (logger.isInfoEnabled()) {
      logger.info("Finished scanning " + rowCount + " rows for ungrouped coprocessor scan " + scan);
    }

    if (!mutations.isEmpty()) {
      commitBatch(region, mutations, indexUUID);
    }

    final boolean hadAny = hasAny;
    KeyValue keyValue = null;
    if (hadAny) {
      byte[] value = aggregators.toBytes(rowAggregators);
      keyValue =
          KeyValueUtil.newKeyValue(
              UNGROUPED_AGG_ROW_KEY,
              SINGLE_COLUMN_FAMILY,
              SINGLE_COLUMN,
              AGG_TIMESTAMP,
              value,
              0,
              value.length);
    }
    final KeyValue aggKeyValue = keyValue;

    RegionScanner scanner =
        new BaseRegionScanner() {
          private boolean done = !hadAny;

          @Override
          public HRegionInfo getRegionInfo() {
            return innerScanner.getRegionInfo();
          }

          @Override
          public boolean isFilterDone() {
            return done;
          }

          @Override
          public void close() throws IOException {
            innerScanner.close();
          }

          @Override
          public boolean next(List<Cell> results) throws IOException {
            if (done) return false;
            done = true;
            results.add(aggKeyValue);
            return false;
          }

          @Override
          public long getMaxResultSize() {
            return scan.getMaxResultSize();
          }
        };
    return scanner;
  }