示例#1
0
 /**
  * Opens existing lag partition if it exists or creates new one if parent journal is configured to
  * have lag partitions.
  *
  * @return Lag partition instance.
  * @throws com.nfsdb.exceptions.JournalException
  */
 public Partition<T> openOrCreateLagPartition() throws JournalException {
   Partition<T> result = getIrregularPartition();
   if (result == null) {
     result = createTempPartition();
     setIrregularPartition(result);
   }
   return result.open();
 }
示例#2
0
  private void rollback0(long address, boolean writeDiscard) throws JournalException {

    if (address == -1L) {
      notifyTxError();
      throw new IncompatibleJournalException(
          "Server txn is not compatible with %s", this.getLocation());
    }

    txLog.read(address, tx);

    if (tx.address == 0) {
      throw new JournalException("Invalid transaction address");
    }

    if (writeDiscard) {
      LOGGER.info(
          "Journal %s is rolling back to transaction #%d, timestamp %s",
          metadata.getLocation(), tx.txn, Dates.toString(tx.timestamp));
      writeDiscardFile(tx.journalMaxRowID);
    }

    // partitions need to be dealt with first to make sure new lag is assigned a correct
    // partitionIndex
    rollbackPartitions(tx);

    Partition<T> lag = getIrregularPartition();
    if (tx.lagName != null
        && tx.lagName.length() > 0
        && (lag == null || !tx.lagName.equals(lag.getName()))) {
      Partition<T> newLag = createTempPartition(tx.lagName);
      setIrregularPartition(newLag);
      newLag.applyTx(tx.lagSize, tx.lagIndexPointers);
    } else if (lag != null && tx.lagName == null) {
      removeIrregularPartitionInternal();
    } else if (lag != null) {
      lag.truncate(tx.lagSize);
    }

    if (tx.symbolTableSizes.length == 0) {
      for (int i = 0, sz = getSymbolTableCount(); i < sz; i++) {
        getSymbolTable(i).truncate();
      }
    } else {
      for (int i = 0, sz = getSymbolTableCount(); i < sz; i++) {
        getSymbolTable(i).truncate(tx.symbolTableSizes[i]);
      }
    }
    appendTimestampLo = -1;
    appendTimestampHi = -1;
    appendPartition = null;
    txLog.writeTxAddress(tx.address);
    txActive = false;
  }
示例#3
0
  /**
   * Add an object to the end of the Journal.
   *
   * @param obj the object to add
   * @throws com.nfsdb.exceptions.JournalException if there is an error
   */
  public void append(T obj) throws JournalException {

    if (obj == null) {
      throw new JournalException("Cannot append NULL to %s", this);
    }

    if (!txActive) {
      beginTx();
    }

    if (checkOrder) {
      long timestamp = getTimestamp(obj);

      if (timestamp > appendTimestampHi) {
        switchAppendPartition(timestamp);
      }

      if (timestamp < appendTimestampLo) {
        throw new JournalException(
            "Cannot insert records out of order. maxHardTimestamp=%d (%s), timestamp=%d (%s): %s",
            appendTimestampLo,
            Dates.toString(appendTimestampLo),
            timestamp,
            Dates.toString(timestamp),
            this);
      }

      appendPartition.append(obj);
      appendTimestampLo = timestamp;
    } else {
      getAppendPartition().append(obj);
    }
  }
示例#4
0
 public Partition<T> getAppendPartition(long timestamp) throws JournalException {
   int sz = partitions.size();
   if (sz > 0) {
     Partition<T> par = partitions.getQuick(sz - 1);
     Interval interval = par.getInterval();
     if (interval == null || interval.contains(timestamp)) {
       return par.open().access();
     } else if (interval.isBefore(timestamp)) {
       return createPartition(new Interval(timestamp, getMetadata().getPartitionType()), sz);
     } else {
       throw new JournalException("%s cannot be appended to %s", Dates.toString(timestamp), this);
     }
   } else {
     return createPartition(new Interval(timestamp, getMetadata().getPartitionType()), 0);
   }
 }
示例#5
0
  public void truncate() throws JournalException {
    beginTx();
    int partitionCount = getPartitionCount();
    for (int i = 0; i < partitionCount; i++) {
      Partition<T> partition = getPartition(i, true);
      partition.truncate(0);
      partition.close();
      Files.deleteOrException(partition.getPartitionDir());
    }

    closePartitions();

    for (int i = 0, sz = getSymbolTableCount(); i < sz; i++) {
      getSymbolTable(i).truncate();
    }
    appendTimestampLo = -1;
    commitDurable();
  }
示例#6
0
  private void switchAppendPartition(long timestamp) throws JournalException {
    boolean computeTimestampLo = appendPartition == null;

    appendPartition = getAppendPartition(timestamp);

    Interval interval = appendPartition.getInterval();
    if (interval == null) {
      appendTimestampHi = Long.MAX_VALUE;
    } else {
      appendTimestampHi = interval.getHi();
    }

    if (computeTimestampLo) {
      FixedColumn column = appendPartition.getTimestampColumn();
      long sz;
      if ((sz = column.size()) > 0) {
        appendTimestampLo = column.getLong(sz - 1);
      }
    } else {
      appendTimestampLo = appendPartition.getInterval().getLo();
    }
  }
示例#7
0
  private void splitAppend(Iterator<T> it, long hard, long soft, Partition<T> partition)
      throws JournalException {
    while (it.hasNext()) {
      T obj = it.next();
      if (doDiscard && getTimestamp(obj) < hard) {
        // discard
        continue;
      } else if (doDiscard) {
        doDiscard = false;
      }

      if (doJournal && getTimestamp(obj) < soft) {
        append(obj);
        continue;
      } else if (doJournal) {
        doJournal = false;
      }

      partition.append(obj);
    }
  }
示例#8
0
  private void rollbackPartitions(Tx tx) throws JournalException {
    int partitionIndex = tx.journalMaxRowID == -1 ? 0 : Rows.toPartitionIndex(tx.journalMaxRowID);
    while (true) {
      Partition<T> p = partitions.getLast();
      if (p == null) {
        break;
      }

      if (p.getPartitionIndex() > partitionIndex) {
        p.close();
        Files.deleteOrException(p.getPartitionDir());
        partitions.remove(partitions.size() - 1);
      } else if (p.getPartitionIndex() == partitionIndex) {
        p.open();
        p.truncate(tx.journalMaxRowID == -1 ? 0 : Rows.toLocalRowID(tx.journalMaxRowID));
        break;
      } else {
        break;
      }
    }
  }
示例#9
0
  private void writeDiscardFile(long rowid) throws JournalException {

    if (discardTxtRaf == null) {
      try {
        discardTxtRaf = new RandomAccessFile(discardTxt, "rw");
        discardTxtRaf.getChannel();
        discardSink =
            new FlexBufferSink(
                discardTxtRaf.getChannel().position(discardTxtRaf.getChannel().size()),
                1024 * 1024);
      } catch (IOException e) {
        throw new JournalException(e);
      }
    }

    JournalMetadata m = getMetadata();
    int p = Rows.toPartitionIndex(rowid);
    long row = Rows.toLocalRowID(rowid);
    long rowCount = 0;

    try {
      // partitions
      for (int n = getPartitionCount() - 1; p < n; p++) {
        final Partition partition = getPartition(n, true);
        // partition rows
        for (long r = row, psz = partition.size(); r < psz; r++) {
          // partition columns
          for (int c = 0, cc = m.getColumnCount(); c < cc; c++) {
            switch (m.getColumnQuick(c).type) {
              case DATE:
                Dates.appendDateTime(discardSink, partition.getLong(r, c));
                break;
              case DOUBLE:
                Numbers.append(discardSink, partition.getDouble(r, c), 12);
                break;
              case FLOAT:
                Numbers.append(discardSink, partition.getFloat(r, c), 4);
                break;
              case INT:
                Numbers.append(discardSink, partition.getInt(r, c));
                break;
              case STRING:
                partition.getStr(r, c, discardSink);
                break;
              case SYMBOL:
                discardSink.put(partition.getSym(r, c));
                break;
              case SHORT:
                Numbers.append(discardSink, partition.getShort(r, c));
                break;
              case LONG:
                Numbers.append(discardSink, partition.getLong(r, c));
                break;
              case BYTE:
                Numbers.append(discardSink, partition.getByte(r, c));
                break;
              case BOOLEAN:
                discardSink.put(partition.getBool(r, c) ? "true" : "false");
                break;
            }

            if (((++rowCount) & 7) == 0) {
              discardSink.flush();
            }
          }
        }
      }
    } finally {
      discardSink.flush();
    }
  }
示例#10
0
  private void commit(byte command, long txn, long txPin) throws JournalException {
    boolean force = command == Tx.TX_FORCE;
    Partition<T> partition = lastNonEmptyNonLag();
    Partition<T> lag = getIrregularPartition();

    tx.command = command;
    tx.txn = txn;
    tx.txPin = txPin;
    tx.prevTxAddress = txLog.getCurrentTxAddress();
    tx.journalMaxRowID =
        partition == null ? -1 : Rows.toRowID(partition.getPartitionIndex(), partition.size());
    tx.lastPartitionTimestamp =
        partition == null || partition.getInterval() == null ? 0 : partition.getInterval().getLo();
    tx.lagSize = lag == null ? 0 : lag.open().size();
    tx.lagName = lag == null ? null : lag.getName();
    tx.symbolTableSizes = new int[getSymbolTableCount()];
    tx.symbolTableIndexPointers = new long[tx.symbolTableSizes.length];
    for (int i = 0; i < tx.symbolTableSizes.length; i++) {
      SymbolTable tab = getSymbolTable(i);
      tab.commit();
      if (force) {
        tab.force();
      }
      tx.symbolTableSizes[i] = tab.size();
      tx.symbolTableIndexPointers[i] = tab.getIndexTxAddress();
    }
    tx.indexPointers = new long[getMetadata().getColumnCount()];

    for (int i = Math.max(txPartitionIndex, 0), sz = nonLagPartitionCount(); i < sz; i++) {
      Partition<T> p = getPartition(i, true);
      p.commit();
      if (force) {
        p.force();
      }
    }

    if (partition != null) {
      partition.getIndexPointers(tx.indexPointers);
    }

    tx.lagIndexPointers = new long[tx.indexPointers.length];
    if (lag != null) {
      lag.commit();
      if (force) {
        lag.force();
      }
      lag.getIndexPointers(tx.lagIndexPointers);
    }

    txLog.write(tx, txn != -1);
    if (force) {
      txLog.force();
    }
  }
示例#11
0
  public void mergeAppend(PeekingIterator<T> data) throws JournalException {

    if (lagMillis == 0) {
      throw new JournalException("This journal is not configured to have lag partition");
    }

    beginTx();

    if (data == null || data.isEmpty()) {
      return;
    }

    long dataMaxTimestamp = getTimestamp(data.peekLast());
    long hard = getAppendTimestampLo();

    if (dataMaxTimestamp < hard) {
      return;
    }

    final Partition<T> lagPartition = openOrCreateLagPartition();
    this.doDiscard = true;
    this.doJournal = true;

    long dataMinTimestamp = getTimestamp(data.peekFirst());
    long lagMaxTimestamp = getMaxTimestamp();
    long lagMinTimestamp = lagPartition.size() == 0L ? 0 : getTimestamp(lagPartition.read(0));
    long soft = Math.max(dataMaxTimestamp, lagMaxTimestamp) - lagMillis;

    if (dataMinTimestamp > lagMaxTimestamp) {
      // this could be as simple as just appending data to lag
      // the only complication is that after adding records to lag it could swell beyond
      // the allocated "lagSwellTimestamp"
      // we should check if this is going to happen and optimise copying of data

      long lagSizeMillis;
      if (hard > 0L) {
        lagSizeMillis = dataMaxTimestamp - hard;
      } else if (lagMinTimestamp > 0L) {
        lagSizeMillis = dataMaxTimestamp - lagMinTimestamp;
      } else {
        lagSizeMillis = 0L;
      }

      if (lagSizeMillis > lagSwellMillis) {
        // data would  be too big and would stretch outside of swell timestamp
        // this is when lag partition should be split, but it is still a straight split without
        // re-order

        Partition<T> tempPartition = createTempPartition().open();
        splitAppend(lagPartition.bufferedIterator(), hard, soft, tempPartition);
        splitAppend(data, hard, soft, tempPartition);
        replaceIrregularPartition(tempPartition);
      } else {
        // simplest case, just append to lag
        lagPartition.append(data);
      }
    } else {

      Partition<T> tempPartition = createTempPartition().open();
      if (dataMinTimestamp > lagMinTimestamp && dataMaxTimestamp < lagMaxTimestamp) {
        //
        // overlap scenario 1: data is fully inside of lag
        //

        // calc boundaries of lag that intersects with data
        long lagMid1 = lagPartition.indexOf(dataMinTimestamp, BSearchType.OLDER_OR_SAME);
        long lagMid2 = lagPartition.indexOf(dataMaxTimestamp, BSearchType.NEWER_OR_SAME);

        // copy part of lag above data
        splitAppend(lagPartition.bufferedIterator(0, lagMid1), hard, soft, tempPartition);

        // merge lag with data and copy result to temp partition
        splitAppendMerge(
            data,
            lagPartition.bufferedIterator(lagMid1 + 1, lagMid2 - 1),
            hard,
            soft,
            tempPartition);

        // copy part of lag below data
        splitAppend(
            lagPartition.bufferedIterator(lagMid2, lagPartition.size() - 1),
            hard,
            soft,
            tempPartition);

      } else if (dataMaxTimestamp < lagMinTimestamp && dataMaxTimestamp <= lagMinTimestamp) {
        //
        // overlap scenario 2: data sits directly above lag
        //
        splitAppend(data, hard, soft, tempPartition);
        splitAppend(lagPartition.bufferedIterator(), hard, soft, tempPartition);
      } else if (dataMinTimestamp <= lagMinTimestamp && dataMaxTimestamp < lagMaxTimestamp) {
        //
        // overlap scenario 3: bottom part of data overlaps top part of lag
        //

        // calc overlap line
        long split = lagPartition.indexOf(dataMaxTimestamp, BSearchType.NEWER_OR_SAME);

        // merge lag with data and copy result to temp partition
        splitAppendMerge(
            data, lagPartition.bufferedIterator(0, split - 1), hard, soft, tempPartition);

        // copy part of lag below data
        splitAppend(
            lagPartition.bufferedIterator(split, lagPartition.size() - 1),
            hard,
            soft,
            tempPartition);
      } else if (dataMinTimestamp > lagMinTimestamp && dataMaxTimestamp >= lagMaxTimestamp) {
        //
        // overlap scenario 4: top part of data overlaps with bottom part of lag
        //
        long split = lagPartition.indexOf(dataMinTimestamp, BSearchType.OLDER_OR_SAME);

        // copy part of lag above overlap
        splitAppend(lagPartition.bufferedIterator(0, split), hard, soft, tempPartition);

        // merge lag with data and copy result to temp partition
        splitAppendMerge(
            data,
            lagPartition.bufferedIterator(split + 1, lagPartition.size() - 1),
            hard,
            soft,
            tempPartition);
      } else if (dataMinTimestamp <= lagMinTimestamp && dataMaxTimestamp >= lagMaxTimestamp) {
        //
        // overlap scenario 5: lag is fully inside of data
        //

        // merge lag with data and copy result to temp partition
        splitAppendMerge(data, lagPartition.bufferedIterator(), hard, soft, tempPartition);
      } else {
        throw new JournalRuntimeException(
            "Unsupported overlap type: lag min/max [%s/%s] data min/max: [%s/%s]",
            Dates.toString(lagMinTimestamp),
            Dates.toString(lagMaxTimestamp),
            Dates.toString(dataMinTimestamp),
            Dates.toString(dataMaxTimestamp));
      }

      replaceIrregularPartition(tempPartition);
    }
  }