public void commitChanges(WriteAheadLog wal) throws IOException { if (!dirty) { return; } for (PageDiff diff : diffList) { wal.addPageDiff(getId(), diff); } wal.addMetaData(getId(), getNextPageId(), getCurrentFill()); }
@Test public void lazy_file_create() { File f = TT.tempDbFile(); f.delete(); File f2 = new File(f.getPath() + ".wal.0"); WriteAheadLog wal = new WriteAheadLog(f.getPath()); wal.open(WriteAheadLog.NOREPLAY); assertTrue(!f2.exists()); wal.walPutLong(111L, 111L); assertTrue(f2.exists()); wal.close(); f2.delete(); }
@Override protected byte[] loadLongStackPage(long pageOffset, boolean willBeModified) { if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError(); // if(CC.ASSERT && compactionInProgress) // throw new AssertionError(); // first try to get it from dirty pages in current TX byte[] page = uncommittedStackPages.get(pageOffset); if (page != null) { return page; } // try to get it from previous TX stored in WAL, but not yet replayed long walval = committedPageLongStack.get(pageOffset); if (walval != 0) { byte[] b = wal.walGetByteArray2(walval); // page is going to be modified, so put it back into uncommittedStackPages) if (willBeModified) { uncommittedStackPages.put(pageOffset, b); } return b; } // and finally read it from main store int pageSize = (int) (parity4Get(vol.getLong(pageOffset)) >>> 48); page = new byte[pageSize]; vol.getData(pageOffset, page, 0, pageSize); if (willBeModified) { uncommittedStackPages.put(pageOffset, page); } return page; }
@Test public void skip_rollback_last_rollback() { WriteAheadLog wal = new WriteAheadLog(null); wal.walPutLong(1L, 11L); wal.commit(); long o1 = wal.fileOffset; wal.walPutLong(2L, 33L); wal.commit(); long o2 = wal.fileOffset; wal.walPutLong(3L, 33L); wal.rollback(); wal.seal(); assertEquals(o1, wal.skipRollbacks(o1)); assertEquals(0, wal.skipRollbacks(o2)); }
@Override public void close() { commitLock.lock(); try { if (closed) { return; } if (hasUncommitedData()) { LOG.warning("Closing storage with uncommited data, this data will be discarded."); } headVol.putData(0, headVolBackup, 0, headVolBackup.length); if (!readonly) { replaySoft(); wal.destroyWalFiles(); } wal.close(); vol.close(); vol = null; headVol.close(); headVol = null; headVolBackup = null; uncommittedStackPages.clear(); if (caches != null) { for (Cache c : caches) { c.close(); } Arrays.fill(caches, null); } if (fileLockHeartbeat != null) { fileLockHeartbeat.unlock(); fileLockHeartbeat = null; } closed = true; } finally { commitLock.unlock(); } }
@Test public void overflow_record() { File f = TT.tempDbFile(); f.delete(); File f0 = new File(f.getPath() + ".wal.0"); File f1 = new File(f.getPath() + ".wal.1"); WriteAheadLog wal = new WriteAheadLog(f.getPath()); wal.open(WriteAheadLog.NOREPLAY); long lastPos = 0; while (!f1.exists()) { lastPos = wal.fileOffset; wal.walPutRecord(111L, new byte[100], 0, 100); assertTrue(f0.exists()); } assertTrue(WriteAheadLog.MAX_FILE_SIZE - 1000 < lastPos); assertTrue(WriteAheadLog.MAX_FILE_SIZE + 120 > lastPos); wal.destroyWalFiles(); }
@Test public void replay_commit_over_file_edge() { String f = TT.tempDbFile().getPath(); WriteAheadLog wal = new WriteAheadLog(f); byte[] b = TT.randomByteArray(20 * 1024 * 1024); wal.walPutRecord(11L, b, 0, b.length); wal.walPutRecord(33L, b, 0, b.length); wal.commit(); wal.close(); wal = new WriteAheadLog(f); wal.open( new WALSequence( new Object[] {WALSequence.beforeReplayStart}, new Object[] {WALSequence.writeRecord, 11L, 16L, b}, new Object[] {WALSequence.writeRecord, 33L, 4294967312L, b}, new Object[] {WALSequence.commit})); }
protected DataInput walGetData(long offset, int segment) { if (CC.ASSERT && offset % 16 != 0) throw new DBException.DataCorruption(); long longval = uncommittedDataLongs[segment].get(offset); if (longval == 0) { longval = committedDataLongs[segment].get(offset); } if (longval == 0) return null; return wal.walGetByteArray(longval); }
@Override protected void putDataSingleWithoutLink( int segment, long offset, byte[] buf, int bufPos, int size) { if (CC.ASSERT && offset < PAGE_SIZE) throw new DBException.DataCorruption("offset to small"); if (CC.ASSERT && size <= 0 || size > MAX_REC_SIZE) throw new DBException.DataCorruption("wrong length"); if (CC.ASSERT && segment >= 0) assertWriteLocked(segment); long val = wal.walPutByteArray(offset, buf, bufPos, size); uncommittedDataLongs[segment].put(offset, val); }
@Test public void preallocate() { WriteAheadLog wal = new WriteAheadLog(null); wal.open(WriteAheadLog.NOREPLAY); wal.startNextFile(); wal.walPutPreallocate(111111L); wal.seal(); final AtomicInteger c = new AtomicInteger(); wal.replayWAL( new WriteAheadLog.WALReplay() { @Override public void beforeReplayStart() {} @Override public void afterReplayFinished() {} @Override public void writeLong(long offset, long value) { fail(); } @Override public void writeRecord(long recid, long walId, Volume vol, long volOffset, int length) { fail(); } @Override public void writeByteArray( long offset, long walId, Volume vol, long volOffset, int length) { fail(); } @Override public void commit() { fail(); } @Override public void rollback() { fail(); } @Override public void writeTombstone(long recid) { fail(); } @Override public void writePreallocate(long recid) { c.incrementAndGet(); assertEquals(111111L, recid); } }); assertEquals(1, c.get()); }
@Test public void cut_broken_end_rollback() { String f = TT.tempDbFile().getPath(); WriteAheadLog wal = new WriteAheadLog(f); wal.walPutLong(1L, 11L); wal.commit(); wal.walPutLong(2L, 22L); wal.commit(); wal.walPutLong(3L, 33L); wal.rollback(); wal.walPutLong(4L, 44L); wal.curVol.sync(); wal.close(); wal = new WriteAheadLog(f); wal.open( new WALSequence( new Object[] {WALSequence.beforeReplayStart}, new Object[] {WALSequence.writeLong, 1L, 11L}, new Object[] {WALSequence.commit}, new Object[] {WALSequence.writeLong, 2L, 22L}, new Object[] {WALSequence.commit})); }
@Override public void rollback() throws UnsupportedOperationException { commitLock.lock(); try { // flush modified records for (int segment = 0; segment < locks.length; segment++) { Lock lock = locks[segment].writeLock(); lock.lock(); try { writeCache[segment].clear(); if (caches != null) { caches[segment].clear(); } uncommittedDataLongs[segment].clear(); uncommittedIndexTable[segment].clear(); } finally { lock.unlock(); } } structuralLock.lock(); try { uncommittedStackPages.clear(); // restore headVol from backup headVol.putData(0, headVolBackup, 0, headVolBackup.length); indexPages = indexPagesBackup.clone(); wal.rollback(); wal.sync(); } finally { structuralLock.unlock(); } } finally { commitLock.unlock(); } }
/** return positions of (possibly) linked record */ @Override protected long[] offsetsGet(int segment, long indexVal) { ; if (indexVal >>> 48 == 0) { return ((indexVal & MLINKED) != 0) ? null : StoreDirect.EMPTY_LONGS; } long[] ret = new long[] {indexVal}; while ((ret[ret.length - 1] & MLINKED) != 0) { ret = Arrays.copyOf(ret, ret.length + 1); long oldLink = ret[ret.length - 2] & MOFFSET; // get WAL position from current transaction, or previous (not yet fully replayed) // transactions long val = uncommittedDataLongs[segment].get(oldLink); if (val == 0) val = committedDataLongs[segment].get(oldLink); if (val != 0) { // //was found in previous position, read link from WAL // int file = (int) ((val>>>32) & 0xFFFFL); // get WAL file number // val = val & 0xFFFFFFFFL; // convert to WAL offset; // val = volumes.get(file).getLong(val); try { val = wal.walGetByteArray(val).readLong(); } catch (IOException e) { throw new DBException.VolumeIOError(e); } } else { // was not found in any transaction, read from main store val = vol.getLong(oldLink); } ret[ret.length - 1] = parity3Get(val); } if (CC.ASSERT) { offsetsVerify(ret); } if (CC.LOG_STORE && LOG.isLoggable(Level.FINEST)) { LOG.log( Level.FINEST, "indexVal={0}, ret={1}", new Object[] {Long.toHexString(indexVal), Arrays.toString(ret)}); } return ret; }
@Test public void rollback() { WriteAheadLog wal = new WriteAheadLog(null); wal.open(WriteAheadLog.NOREPLAY); wal.startNextFile(); wal.walPutLong(111L, 1000); wal.rollback(); wal.seal(); wal.replayWAL( new WALSequence( new Object[] {WALSequence.beforeReplayStart}, new Object[] {WALSequence.writeLong, 111L, 1000L}, new Object[] {WALSequence.rollback})); }
@Test public void commit() { WriteAheadLog wal = new WriteAheadLog(null); wal.open(WriteAheadLog.NOREPLAY); wal.walPutLong(111L, 1111L); wal.commit(); wal.seal(); wal.replayWAL( new WALSequence( new Object[] {WALSequence.beforeReplayStart}, new Object[] {WALSequence.writeLong, 111L, 1111L}, new Object[] {WALSequence.commit})); }
@Test public void commitChecksum() { WriteAheadLog wal = new WriteAheadLog(null); wal.open(WriteAheadLog.NOREPLAY); wal.startNextFile(); wal.walPutLong(111L, 1000); wal.commit(); long offset1 = wal.fileOffset - 5; int checksum1 = DataIO.longHash(wal.curVol.hash(16, offset1 - 16, 111L)); assertEquals(checksum1, wal.curVol.getInt(offset1 + 1)); wal.walPutLong(111L, 1000); wal.commit(); long offset2 = wal.fileOffset - 5; int checksum2 = checksum1 + DataIO.longHash(wal.curVol.hash(offset1 + 5, offset2 - offset1 - 5, 111L)); assertEquals(checksum2, wal.curVol.getInt(offset2 + 1)); }
@Test public void empty_commit() { String f = TT.tempDbFile().getPath(); WriteAheadLog wal = new WriteAheadLog(f); byte[] b = TT.randomByteArray(1024); wal.walPutRecord(33L, b, 0, b.length); wal.commit(); wal.commit(); wal.seal(); wal.close(); wal = new WriteAheadLog(f); wal.open( new WALSequence( new Object[] {WALSequence.beforeReplayStart}, new Object[] {WALSequence.writeRecord, 33L, 16L, b}, new Object[] {WALSequence.commit}, new Object[] {WALSequence.commit})); }
@Test public void test() { File f = TT.tempDbFile(); WriteAheadLog wal = new WriteAheadLog(f.getPath()); for (int i = 0; i < commitNum; i++) { for (int j = 0; j < 6; j++) { wal.walPutLong(111L, i); } wal.commit(); } int cutPoint = new Random(cutPointSeed).nextInt((int) wal.curVol.length()); wal.curVol.sync(); wal.curVol.clear(cutPoint, wal.curVol.length()); File f2 = wal.curVol.getFile(); wal.close(); wal = new WriteAheadLog(f.getPath()); final AtomicLong i = new AtomicLong(); final AtomicLong c = new AtomicLong(); wal.open( new WriteAheadLog.WALReplay() { @Override public void beforeReplayStart() {} @Override public void afterReplayFinished() {} @Override public void writeLong(long offset, long value) { assertEquals(111L, offset); assertEquals(i.get(), value); assertTrue(c.getAndIncrement() < 6); } @Override public void writeRecord(long recid, long walId, Volume vol, long volOffset, int length) { fail(); } @Override public void writeByteArray( long offset, long walId, Volume vol, long volOffset, int length) { fail(); } @Override public void commit() { assertEquals(6, c.get()); c.set(0); i.incrementAndGet(); } @Override public void rollback() { fail(); } @Override public void writeTombstone(long recid) { fail(); } @Override public void writePreallocate(long recid) { fail(); } }); assertEquals(0, c.get()); f.delete(); f2.delete(); }
@Override protected void initFailedCloseFiles() { wal.initFailedCloseFiles(); }
@Override protected void indexLongPut(long offset, long val) { if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError(); wal.walPutLong(offset, val); }
@Test public void open_ignores_rollback() { File f = TT.tempDbFile(); WriteAheadLog wal = new WriteAheadLog(f.getPath()); wal.walPutLong(1L, 11L); wal.commit(); wal.walPutLong(2L, 33L); wal.rollback(); wal.walPutLong(3L, 33L); wal.commit(); wal.seal(); wal.close(); wal = new WriteAheadLog(f.getPath()); wal.open( new WALSequence( new Object[] {WALSequence.beforeReplayStart}, new Object[] {WALSequence.writeLong, 1L, 11L}, new Object[] {WALSequence.commit}, // 2L is ignored, rollback section is skipped on hard replay new Object[] {WALSequence.writeLong, 3L, 33L}, new Object[] {WALSequence.commit})); wal.destroyWalFiles(); wal.close(); f.delete(); }
void testRecord(final long recid, final byte[] data) { WriteAheadLog wal = new WriteAheadLog(null); wal.open(WriteAheadLog.NOREPLAY); wal.startNextFile(); final AtomicBoolean called = new AtomicBoolean(); final long pointer = wal.walPutRecord(recid, data, 0, data == null ? 0 : data.length); for (int i = 0; i < 1; i++) { byte[] val = wal.walGetRecord(pointer, recid); if (data == null) assertNull(val); else assertTrue(Arrays.equals(data, val)); wal.seal(); } WriteAheadLog.WALReplay r = new WriteAheadLog.WALReplay() { @Override public void beforeReplayStart() {} @Override public void afterReplayFinished() {} @Override public void writeLong(long offset, long value) { fail(); } @Override public void writeRecord(long recid2, long walId, Volume vol, long volOffset, int length) { assertFalse(called.getAndSet(true)); assertEquals(recid, recid2); if (data == null) { assertNull(vol); assertEquals(walId, 0); assertEquals(volOffset, 0); assertEquals(length, 0); } else { byte[] data = new byte[length]; vol.getData(volOffset, data, 0, data.length); assertTrue(Arrays.equals(data, data)); assertEquals(pointer, walId); } } @Override public void writeByteArray( long offset2, long walId, Volume vol, long volOffset, int length) { fail(); } @Override public void commit() { fail(); } @Override public void rollback() { fail(); } @Override public void writeTombstone(long recid) { fail(); } @Override public void writePreallocate(long recid) { fail(); } }; wal.replayWAL(r); assertTrue(called.get()); }
@Override public void initOpen() { // TODO disable readonly feature for this store realVol = vol; if (readonly && !Volume.isEmptyFile(fileName + ".wal.0")) throw new DBException.WrongConfig( "There is dirty WAL file, but storage is read-only. Can not replay file"); wal.open( new WriteAheadLog.WALReplay() { @Override public void beforeReplayStart() {} @Override public void writeLong(long offset, long value) { if (CC.ASSERT && offset % 8 != 0) throw new AssertionError(); realVol.ensureAvailable(Fun.roundUp(offset + 8, StoreDirect.PAGE_SIZE)); realVol.putLong(offset, value); } @Override public void writeRecord(long recid, long walId, Volume vol, long volOffset, int length) { throw new DBException.DataCorruption(); } @Override public void writeByteArray( long offset, long walId, Volume vol, long volOffset, int length) { if (CC.ASSERT && offset % 8 != 0) throw new AssertionError(); realVol.ensureAvailable(Fun.roundUp(offset + length, StoreDirect.PAGE_SIZE)); vol.transferInto(volOffset, realVol, offset, length); } @Override public void beforeDestroyWAL() {} @Override public void commit() {} @Override public void rollback() { throw new DBException.DataCorruption(); } @Override public void writeTombstone(long recid) { throw new DBException.DataCorruption(); } @Override public void writePreallocate(long recid) { throw new DBException.DataCorruption(); } }); realVol.sync(); wal.destroyWalFiles(); initOpenPost(); // TODO reenable this assertion // if(CC.PARANOID) // storeCheck(); }
@Override public void commit() { commitLock.lock(); try { // flush write caches into write ahead log flushWriteCache(); // move uncommited data to committed for (int segment = 0; segment < locks.length; segment++) { locks[segment].writeLock().lock(); try { // dump index vals into WAL long[] table = uncommittedIndexTable[segment].table; for (int i = 0; i < table.length; ) { long offset = table[i++]; long val = table[i++]; if (offset == 0) continue; wal.walPutLong(offset, val); } moveAndClear(uncommittedIndexTable[segment], committedIndexTable[segment]); moveAndClear(uncommittedDataLongs[segment], committedDataLongs[segment]); } finally { locks[segment].writeLock().unlock(); } } structuralLock.lock(); try { // flush modified Long Stack pages into WAL long[] set = uncommittedStackPages.set; longStackPagesLoop: for (int i = 0; i < set.length; i++) { long offset = set[i]; if (offset == 0) continue longStackPagesLoop; byte[] val = (byte[]) uncommittedStackPages.values[i]; if (val == LONG_STACK_PAGE_TOMBSTONE) committedPageLongStack.put(offset, -1); else { if (CC.ASSERT) assertLongStackPage(offset, val); long walPointer = wal.walPutByteArray(offset, val, 0, val.length); committedPageLongStack.put(offset, walPointer); } } uncommittedStackPages.clear(); // update checksum headVol.putInt(HEAD_CHECKSUM, headChecksum(headVol)); // take backup of headVol headVol.getData(0, headVolBackup, 0, headVolBackup.length); wal.walPutByteArray(0, headVolBackup, 0, headVolBackup.length); wal.commit(); wal.seal(); replaySoft(); realVol.sync(); wal.destroyWalFiles(); } finally { structuralLock.unlock(); } } finally { commitLock.unlock(); } }
protected void replaySoft() { if (CC.ASSERT && !commitLock.isHeldByCurrentThread()) throw new AssertionError(); LongList written = CC.PARANOID ? new LongList() : null; for (int lockPos = 0; lockPos < locks.length; lockPos++) { locks[lockPos].writeLock().lock(); try { // update index table long[] table = committedIndexTable[lockPos].table; indexValLoop: for (int pos = 0; pos < table.length; ) { long recidOffset = table[pos++]; long val = table[pos++]; if (recidOffset == 0 || val == -1) continue indexValLoop; realVol.ensureAvailable(Fun.roundUp(recidOffset + 8, StoreDirect.PAGE_SIZE)); realVol.putLong(recidOffset, val); if (CC.PARANOID) { // check this is index page if (!Fun.arrayContains(indexPages, Fun.roundDown(recidOffset, PAGE_SIZE))) { throw new AssertionError("not index page"); } } } committedIndexTable[lockPos].clear(); // write data table = committedDataLongs[lockPos].table; dataLoop: for (int pos = 0; pos < table.length; ) { long volOffset = table[pos++]; long walPointer = table[pos++]; if (volOffset == 0 || walPointer == -1) continue dataLoop; byte[] b = wal.walGetByteArray2(walPointer); if (CC.ASSERT) assertRecord(volOffset, b); realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE)); realVol.putData(volOffset, b, 0, b.length); if (CC.ASSERT && b.length > MAX_REC_SIZE) throw new AssertionError(); if (CC.PARANOID) written.add((volOffset << 16) | b.length); } committedDataLongs[lockPos].clear(); } finally { locks[lockPos].writeLock().unlock(); } } structuralLock.lock(); try { // flush modified Long Stack pages dataLoop: for (int pos = 0; pos < committedPageLongStack.table.length; ) { long volOffset = committedPageLongStack.table[pos++]; long walPointer = committedPageLongStack.table[pos++]; if (volOffset == 0 || walPointer == -1) continue dataLoop; byte[] b = wal.walGetByteArray2(walPointer); if (CC.ASSERT) assertLongStackPage(volOffset, b); realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE)); realVol.putData(volOffset, b, 0, b.length); if (CC.PARANOID) written.add((volOffset << 16) | b.length); } committedPageLongStack.clear(); if (CC.PARANOID) { byte[] headVolBuf = new byte[headVolBackup.length]; headVol.getData(0, headVolBuf, 0, headVolBuf.length); if (!Arrays.equals(headVolBuf, headVolBackup)) throw new AssertionError(); } // update page header realVol.putData(0, headVolBackup, 0, headVolBackup.length); } finally { structuralLock.unlock(); } if (CC.PARANOID) { // check for overlaps long[] w = Arrays.copyOf(written.array, written.size); Arrays.sort(w); for (int i = 0; i < w.length - 1; i++) { long offset1 = w[i] >>> 16; long size1 = w[i] & 0xFF; long offset2 = w[i + 1] >>> 16; long size2 = w[i + 1] & 0xFF; if (offset1 + size1 > offset2) { throw new AssertionError( "write overlap conflict at: " + offset1 + " + " + size1 + " > " + offset2 + " (" + size2 + ")"); } } } }