protected void doReconstructionLog(
      final Path oldCoreLogFile,
      final long minSeqId,
      final long maxSeqId,
      final Progressable reporter)
      throws UnsupportedEncodingException, IOException {

    Path trxPath = new Path(oldCoreLogFile.getParent(), THLog.HREGION_OLD_THLOGFILE_NAME);

    // We can ignore doing anything with the Trx Log table, it is
    // not-transactional.
    if (super.getTableDesc().getNameAsString().equals(HBaseBackedTransactionLogger.TABLE_NAME)) {
      return;
    }

    THLogRecoveryManager recoveryManager = new THLogRecoveryManager(this);
    Map<Long, WALEdit> commitedTransactionsById =
        recoveryManager.getCommitsFromLog(trxPath, minSeqId, reporter);

    if (commitedTransactionsById != null && commitedTransactionsById.size() > 0) {
      LOG.debug("found " + commitedTransactionsById.size() + " COMMITED transactions to recover.");

      for (Entry<Long, WALEdit> entry : commitedTransactionsById.entrySet()) {
        LOG.debug(
            "Writing " + entry.getValue().size() + " updates for transaction " + entry.getKey());
        WALEdit b = entry.getValue();

        for (KeyValue kv : b.getKeyValues()) {
          // FIXME need to convert these into puts and deletes. Not sure this is
          // the write way.
          // Could probably combine multiple KV's into single put/delete.
          // Also timestamps?
          if (kv.getType() == KeyValue.Type.Put.getCode()) {
            Put put = new Put();
            put.add(kv);
            super.put(put);
          } else if (kv.isDelete()) {
            Delete del = new Delete(kv.getRow());
            if (kv.isDeleteFamily()) {
              del.deleteFamily(kv.getFamily());
            } else if (kv.isDeleteType()) {
              del.deleteColumn(kv.getFamily(), kv.getQualifier());
            }
          }
        }
      }

      LOG.debug("Flushing cache"); // We must trigger a cache flush,
      // otherwise we will would ignore the log on subsequent failure
      if (!super.flushcache()) {
        LOG.warn("Did not flush cache");
      }
    }
  }
  /**
   * Check if the specified KeyValue buffer has been deleted by a previously seen delete.
   *
   * @param kv
   * @param ds
   * @return True is the specified KeyValue is deleted, false if not
   */
  public boolean isDeleted(final KeyValue kv, final NavigableSet<KeyValue> ds) {
    if (deletes == null || deletes.isEmpty()) return false;
    for (KeyValue d : ds) {
      long kvts = kv.getTimestamp();
      long dts = d.getTimestamp();
      if (d.isDeleteFamily()) {
        if (kvts <= dts) return true;
        continue;
      }
      // Check column
      int ret =
          Bytes.compareTo(
              kv.getBuffer(),
              kv.getQualifierOffset(),
              kv.getQualifierLength(),
              d.getBuffer(),
              d.getQualifierOffset(),
              d.getQualifierLength());
      if (ret <= -1) {
        // This delete is for an earlier column.
        continue;
      } else if (ret >= 1) {
        // Beyond this kv.
        break;
      }
      // Check Timestamp
      if (kvts > dts) return false;

      // Check Type
      switch (KeyValue.Type.codeToType(d.getType())) {
        case Delete:
          return kvts == dts;
        case DeleteColumn:
          return true;
        default:
          continue;
      }
    }
    return false;
  }
Esempio n. 3
0
  /**
   * Pretend we have done a seek but don't do it yet, if possible. The hope is that we find
   * requested columns in more recent files and won't have to seek in older files. Creates a fake
   * key/value with the given row/column and the highest (most recent) possible timestamp we might
   * get from this file. When users of such "lazy scanner" need to know the next KV precisely (e.g.
   * when this scanner is at the top of the heap), they run {@link #enforceSeek()}.
   *
   * <p>Note that this function does guarantee that the current KV of this scanner will be advanced
   * to at least the given KV. Because of this, it does have to do a real seek in cases when the
   * seek timestamp is older than the highest timestamp of the file, e.g. when we are trying to seek
   * to the next row/column and use OLDEST_TIMESTAMP in the seek key.
   */
  @Override
  public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom) throws IOException {
    if (kv.getFamilyLength() == 0) {
      useBloom = false;
    }

    boolean haveToSeek = true;
    if (useBloom) {
      // check ROWCOL Bloom filter first.
      if (reader.getBloomFilterType() == StoreFile.BloomType.ROWCOL) {
        haveToSeek =
            reader.passesGeneralBloomFilter(
                kv.getBuffer(),
                kv.getRowOffset(),
                kv.getRowLength(),
                kv.getBuffer(),
                kv.getQualifierOffset(),
                kv.getQualifierLength());
      } else if (this.matcher != null && !matcher.hasNullColumnInQuery() && kv.isDeleteFamily()) {
        // if there is no such delete family kv in the store file,
        // then no need to seek.
        haveToSeek =
            reader.passesDeleteFamilyBloomFilter(
                kv.getBuffer(), kv.getRowOffset(), kv.getRowLength());
      }
    }

    delayedReseek = forward;
    delayedSeekKV = kv;

    if (haveToSeek) {
      // This row/column might be in this store file (or we did not use the
      // Bloom filter), so we still need to seek.
      realSeekDone = false;
      long maxTimestampInFile = reader.getMaxTimestamp();
      long seekTimestamp = kv.getTimestamp();
      if (seekTimestamp > maxTimestampInFile) {
        // Create a fake key that is not greater than the real next key.
        // (Lower timestamps correspond to higher KVs.)
        // To understand this better, consider that we are asked to seek to
        // a higher timestamp than the max timestamp in this file. We know that
        // the next point when we have to consider this file again is when we
        // pass the max timestamp of this file (with the same row/column).
        cur = kv.createFirstOnRowColTS(maxTimestampInFile);
      } else {
        // This will be the case e.g. when we need to seek to the next
        // row/column, and we don't know exactly what they are, so we set the
        // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this
        // row/column.
        enforceSeek();
      }
      return cur != null;
    }

    // Multi-column Bloom filter optimization.
    // Create a fake key/value, so that this scanner only bubbles up to the top
    // of the KeyValueHeap in StoreScanner after we scanned this row/column in
    // all other store files. The query matcher will then just skip this fake
    // key/value and the store scanner will progress to the next column. This
    // is obviously not a "real real" seek, but unlike the fake KV earlier in
    // this method, we want this to be propagated to ScanQueryMatcher.
    cur = kv.createLastOnRowCol();

    realSeekDone = true;
    return true;
  }