/** * Opens existing lag partition if it exists or creates new one if parent journal is configured to * have lag partitions. * * @return Lag partition instance. * @throws com.nfsdb.exceptions.JournalException */ public Partition<T> openOrCreateLagPartition() throws JournalException { Partition<T> result = getIrregularPartition(); if (result == null) { result = createTempPartition(); setIrregularPartition(result); } return result.open(); }
private void rollback0(long address, boolean writeDiscard) throws JournalException { if (address == -1L) { notifyTxError(); throw new IncompatibleJournalException( "Server txn is not compatible with %s", this.getLocation()); } txLog.read(address, tx); if (tx.address == 0) { throw new JournalException("Invalid transaction address"); } if (writeDiscard) { LOGGER.info( "Journal %s is rolling back to transaction #%d, timestamp %s", metadata.getLocation(), tx.txn, Dates.toString(tx.timestamp)); writeDiscardFile(tx.journalMaxRowID); } // partitions need to be dealt with first to make sure new lag is assigned a correct // partitionIndex rollbackPartitions(tx); Partition<T> lag = getIrregularPartition(); if (tx.lagName != null && tx.lagName.length() > 0 && (lag == null || !tx.lagName.equals(lag.getName()))) { Partition<T> newLag = createTempPartition(tx.lagName); setIrregularPartition(newLag); newLag.applyTx(tx.lagSize, tx.lagIndexPointers); } else if (lag != null && tx.lagName == null) { removeIrregularPartitionInternal(); } else if (lag != null) { lag.truncate(tx.lagSize); } if (tx.symbolTableSizes.length == 0) { for (int i = 0, sz = getSymbolTableCount(); i < sz; i++) { getSymbolTable(i).truncate(); } } else { for (int i = 0, sz = getSymbolTableCount(); i < sz; i++) { getSymbolTable(i).truncate(tx.symbolTableSizes[i]); } } appendTimestampLo = -1; appendTimestampHi = -1; appendPartition = null; txLog.writeTxAddress(tx.address); txActive = false; }
/** * Add an object to the end of the Journal. * * @param obj the object to add * @throws com.nfsdb.exceptions.JournalException if there is an error */ public void append(T obj) throws JournalException { if (obj == null) { throw new JournalException("Cannot append NULL to %s", this); } if (!txActive) { beginTx(); } if (checkOrder) { long timestamp = getTimestamp(obj); if (timestamp > appendTimestampHi) { switchAppendPartition(timestamp); } if (timestamp < appendTimestampLo) { throw new JournalException( "Cannot insert records out of order. maxHardTimestamp=%d (%s), timestamp=%d (%s): %s", appendTimestampLo, Dates.toString(appendTimestampLo), timestamp, Dates.toString(timestamp), this); } appendPartition.append(obj); appendTimestampLo = timestamp; } else { getAppendPartition().append(obj); } }
public Partition<T> getAppendPartition(long timestamp) throws JournalException { int sz = partitions.size(); if (sz > 0) { Partition<T> par = partitions.getQuick(sz - 1); Interval interval = par.getInterval(); if (interval == null || interval.contains(timestamp)) { return par.open().access(); } else if (interval.isBefore(timestamp)) { return createPartition(new Interval(timestamp, getMetadata().getPartitionType()), sz); } else { throw new JournalException("%s cannot be appended to %s", Dates.toString(timestamp), this); } } else { return createPartition(new Interval(timestamp, getMetadata().getPartitionType()), 0); } }
public void truncate() throws JournalException { beginTx(); int partitionCount = getPartitionCount(); for (int i = 0; i < partitionCount; i++) { Partition<T> partition = getPartition(i, true); partition.truncate(0); partition.close(); Files.deleteOrException(partition.getPartitionDir()); } closePartitions(); for (int i = 0, sz = getSymbolTableCount(); i < sz; i++) { getSymbolTable(i).truncate(); } appendTimestampLo = -1; commitDurable(); }
private void switchAppendPartition(long timestamp) throws JournalException { boolean computeTimestampLo = appendPartition == null; appendPartition = getAppendPartition(timestamp); Interval interval = appendPartition.getInterval(); if (interval == null) { appendTimestampHi = Long.MAX_VALUE; } else { appendTimestampHi = interval.getHi(); } if (computeTimestampLo) { FixedColumn column = appendPartition.getTimestampColumn(); long sz; if ((sz = column.size()) > 0) { appendTimestampLo = column.getLong(sz - 1); } } else { appendTimestampLo = appendPartition.getInterval().getLo(); } }
private void splitAppend(Iterator<T> it, long hard, long soft, Partition<T> partition) throws JournalException { while (it.hasNext()) { T obj = it.next(); if (doDiscard && getTimestamp(obj) < hard) { // discard continue; } else if (doDiscard) { doDiscard = false; } if (doJournal && getTimestamp(obj) < soft) { append(obj); continue; } else if (doJournal) { doJournal = false; } partition.append(obj); } }
private void rollbackPartitions(Tx tx) throws JournalException { int partitionIndex = tx.journalMaxRowID == -1 ? 0 : Rows.toPartitionIndex(tx.journalMaxRowID); while (true) { Partition<T> p = partitions.getLast(); if (p == null) { break; } if (p.getPartitionIndex() > partitionIndex) { p.close(); Files.deleteOrException(p.getPartitionDir()); partitions.remove(partitions.size() - 1); } else if (p.getPartitionIndex() == partitionIndex) { p.open(); p.truncate(tx.journalMaxRowID == -1 ? 0 : Rows.toLocalRowID(tx.journalMaxRowID)); break; } else { break; } } }
private void writeDiscardFile(long rowid) throws JournalException { if (discardTxtRaf == null) { try { discardTxtRaf = new RandomAccessFile(discardTxt, "rw"); discardTxtRaf.getChannel(); discardSink = new FlexBufferSink( discardTxtRaf.getChannel().position(discardTxtRaf.getChannel().size()), 1024 * 1024); } catch (IOException e) { throw new JournalException(e); } } JournalMetadata m = getMetadata(); int p = Rows.toPartitionIndex(rowid); long row = Rows.toLocalRowID(rowid); long rowCount = 0; try { // partitions for (int n = getPartitionCount() - 1; p < n; p++) { final Partition partition = getPartition(n, true); // partition rows for (long r = row, psz = partition.size(); r < psz; r++) { // partition columns for (int c = 0, cc = m.getColumnCount(); c < cc; c++) { switch (m.getColumnQuick(c).type) { case DATE: Dates.appendDateTime(discardSink, partition.getLong(r, c)); break; case DOUBLE: Numbers.append(discardSink, partition.getDouble(r, c), 12); break; case FLOAT: Numbers.append(discardSink, partition.getFloat(r, c), 4); break; case INT: Numbers.append(discardSink, partition.getInt(r, c)); break; case STRING: partition.getStr(r, c, discardSink); break; case SYMBOL: discardSink.put(partition.getSym(r, c)); break; case SHORT: Numbers.append(discardSink, partition.getShort(r, c)); break; case LONG: Numbers.append(discardSink, partition.getLong(r, c)); break; case BYTE: Numbers.append(discardSink, partition.getByte(r, c)); break; case BOOLEAN: discardSink.put(partition.getBool(r, c) ? "true" : "false"); break; } if (((++rowCount) & 7) == 0) { discardSink.flush(); } } } } } finally { discardSink.flush(); } }
private void commit(byte command, long txn, long txPin) throws JournalException { boolean force = command == Tx.TX_FORCE; Partition<T> partition = lastNonEmptyNonLag(); Partition<T> lag = getIrregularPartition(); tx.command = command; tx.txn = txn; tx.txPin = txPin; tx.prevTxAddress = txLog.getCurrentTxAddress(); tx.journalMaxRowID = partition == null ? -1 : Rows.toRowID(partition.getPartitionIndex(), partition.size()); tx.lastPartitionTimestamp = partition == null || partition.getInterval() == null ? 0 : partition.getInterval().getLo(); tx.lagSize = lag == null ? 0 : lag.open().size(); tx.lagName = lag == null ? null : lag.getName(); tx.symbolTableSizes = new int[getSymbolTableCount()]; tx.symbolTableIndexPointers = new long[tx.symbolTableSizes.length]; for (int i = 0; i < tx.symbolTableSizes.length; i++) { SymbolTable tab = getSymbolTable(i); tab.commit(); if (force) { tab.force(); } tx.symbolTableSizes[i] = tab.size(); tx.symbolTableIndexPointers[i] = tab.getIndexTxAddress(); } tx.indexPointers = new long[getMetadata().getColumnCount()]; for (int i = Math.max(txPartitionIndex, 0), sz = nonLagPartitionCount(); i < sz; i++) { Partition<T> p = getPartition(i, true); p.commit(); if (force) { p.force(); } } if (partition != null) { partition.getIndexPointers(tx.indexPointers); } tx.lagIndexPointers = new long[tx.indexPointers.length]; if (lag != null) { lag.commit(); if (force) { lag.force(); } lag.getIndexPointers(tx.lagIndexPointers); } txLog.write(tx, txn != -1); if (force) { txLog.force(); } }
public void mergeAppend(PeekingIterator<T> data) throws JournalException { if (lagMillis == 0) { throw new JournalException("This journal is not configured to have lag partition"); } beginTx(); if (data == null || data.isEmpty()) { return; } long dataMaxTimestamp = getTimestamp(data.peekLast()); long hard = getAppendTimestampLo(); if (dataMaxTimestamp < hard) { return; } final Partition<T> lagPartition = openOrCreateLagPartition(); this.doDiscard = true; this.doJournal = true; long dataMinTimestamp = getTimestamp(data.peekFirst()); long lagMaxTimestamp = getMaxTimestamp(); long lagMinTimestamp = lagPartition.size() == 0L ? 0 : getTimestamp(lagPartition.read(0)); long soft = Math.max(dataMaxTimestamp, lagMaxTimestamp) - lagMillis; if (dataMinTimestamp > lagMaxTimestamp) { // this could be as simple as just appending data to lag // the only complication is that after adding records to lag it could swell beyond // the allocated "lagSwellTimestamp" // we should check if this is going to happen and optimise copying of data long lagSizeMillis; if (hard > 0L) { lagSizeMillis = dataMaxTimestamp - hard; } else if (lagMinTimestamp > 0L) { lagSizeMillis = dataMaxTimestamp - lagMinTimestamp; } else { lagSizeMillis = 0L; } if (lagSizeMillis > lagSwellMillis) { // data would be too big and would stretch outside of swell timestamp // this is when lag partition should be split, but it is still a straight split without // re-order Partition<T> tempPartition = createTempPartition().open(); splitAppend(lagPartition.bufferedIterator(), hard, soft, tempPartition); splitAppend(data, hard, soft, tempPartition); replaceIrregularPartition(tempPartition); } else { // simplest case, just append to lag lagPartition.append(data); } } else { Partition<T> tempPartition = createTempPartition().open(); if (dataMinTimestamp > lagMinTimestamp && dataMaxTimestamp < lagMaxTimestamp) { // // overlap scenario 1: data is fully inside of lag // // calc boundaries of lag that intersects with data long lagMid1 = lagPartition.indexOf(dataMinTimestamp, BSearchType.OLDER_OR_SAME); long lagMid2 = lagPartition.indexOf(dataMaxTimestamp, BSearchType.NEWER_OR_SAME); // copy part of lag above data splitAppend(lagPartition.bufferedIterator(0, lagMid1), hard, soft, tempPartition); // merge lag with data and copy result to temp partition splitAppendMerge( data, lagPartition.bufferedIterator(lagMid1 + 1, lagMid2 - 1), hard, soft, tempPartition); // copy part of lag below data splitAppend( lagPartition.bufferedIterator(lagMid2, lagPartition.size() - 1), hard, soft, tempPartition); } else if (dataMaxTimestamp < lagMinTimestamp && dataMaxTimestamp <= lagMinTimestamp) { // // overlap scenario 2: data sits directly above lag // splitAppend(data, hard, soft, tempPartition); splitAppend(lagPartition.bufferedIterator(), hard, soft, tempPartition); } else if (dataMinTimestamp <= lagMinTimestamp && dataMaxTimestamp < lagMaxTimestamp) { // // overlap scenario 3: bottom part of data overlaps top part of lag // // calc overlap line long split = lagPartition.indexOf(dataMaxTimestamp, BSearchType.NEWER_OR_SAME); // merge lag with data and copy result to temp partition splitAppendMerge( data, lagPartition.bufferedIterator(0, split - 1), hard, soft, tempPartition); // copy part of lag below data splitAppend( lagPartition.bufferedIterator(split, lagPartition.size() - 1), hard, soft, tempPartition); } else if (dataMinTimestamp > lagMinTimestamp && dataMaxTimestamp >= lagMaxTimestamp) { // // overlap scenario 4: top part of data overlaps with bottom part of lag // long split = lagPartition.indexOf(dataMinTimestamp, BSearchType.OLDER_OR_SAME); // copy part of lag above overlap splitAppend(lagPartition.bufferedIterator(0, split), hard, soft, tempPartition); // merge lag with data and copy result to temp partition splitAppendMerge( data, lagPartition.bufferedIterator(split + 1, lagPartition.size() - 1), hard, soft, tempPartition); } else if (dataMinTimestamp <= lagMinTimestamp && dataMaxTimestamp >= lagMaxTimestamp) { // // overlap scenario 5: lag is fully inside of data // // merge lag with data and copy result to temp partition splitAppendMerge(data, lagPartition.bufferedIterator(), hard, soft, tempPartition); } else { throw new JournalRuntimeException( "Unsupported overlap type: lag min/max [%s/%s] data min/max: [%s/%s]", Dates.toString(lagMinTimestamp), Dates.toString(lagMaxTimestamp), Dates.toString(dataMinTimestamp), Dates.toString(dataMaxTimestamp)); } replaceIrregularPartition(tempPartition); } }