@Override protected byte[] loadLongStackPage(long pageOffset, boolean willBeModified) { if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError(); // if(CC.ASSERT && compactionInProgress) // throw new AssertionError(); // first try to get it from dirty pages in current TX byte[] page = uncommittedStackPages.get(pageOffset); if (page != null) { return page; } // try to get it from previous TX stored in WAL, but not yet replayed long walval = committedPageLongStack.get(pageOffset); if (walval != 0) { byte[] b = wal.walGetByteArray2(walval); // page is going to be modified, so put it back into uncommittedStackPages) if (willBeModified) { uncommittedStackPages.put(pageOffset, b); } return b; } // and finally read it from main store int pageSize = (int) (parity4Get(vol.getLong(pageOffset)) >>> 48); page = new byte[pageSize]; vol.getData(pageOffset, page, 0, pageSize); if (willBeModified) { uncommittedStackPages.put(pageOffset, page); } return page; }
@Override public void close() { commitLock.lock(); try { if (closed) { return; } if (hasUncommitedData()) { LOG.warning("Closing storage with uncommited data, this data will be discarded."); } headVol.putData(0, headVolBackup, 0, headVolBackup.length); if (!readonly) { replaySoft(); wal.destroyWalFiles(); } wal.close(); vol.close(); vol = null; headVol.close(); headVol = null; headVolBackup = null; uncommittedStackPages.clear(); if (caches != null) { for (Cache c : caches) { c.close(); } Arrays.fill(caches, null); } if (fileLockHeartbeat != null) { fileLockHeartbeat.unlock(); fileLockHeartbeat = null; } closed = true; } finally { commitLock.unlock(); } }
protected DataInput walGetData(long offset, int segment) { if (CC.ASSERT && offset % 16 != 0) throw new DBException.DataCorruption(); long longval = uncommittedDataLongs[segment].get(offset); if (longval == 0) { longval = committedDataLongs[segment].get(offset); } if (longval == 0) return null; return wal.walGetByteArray(longval); }
@Override protected void putDataSingleWithoutLink( int segment, long offset, byte[] buf, int bufPos, int size) { if (CC.ASSERT && offset < PAGE_SIZE) throw new DBException.DataCorruption("offset to small"); if (CC.ASSERT && size <= 0 || size > MAX_REC_SIZE) throw new DBException.DataCorruption("wrong length"); if (CC.ASSERT && segment >= 0) assertWriteLocked(segment); long val = wal.walPutByteArray(offset, buf, bufPos, size); uncommittedDataLongs[segment].put(offset, val); }
@Override public void rollback() throws UnsupportedOperationException { commitLock.lock(); try { // flush modified records for (int segment = 0; segment < locks.length; segment++) { Lock lock = locks[segment].writeLock(); lock.lock(); try { writeCache[segment].clear(); if (caches != null) { caches[segment].clear(); } uncommittedDataLongs[segment].clear(); uncommittedIndexTable[segment].clear(); } finally { lock.unlock(); } } structuralLock.lock(); try { uncommittedStackPages.clear(); // restore headVol from backup headVol.putData(0, headVolBackup, 0, headVolBackup.length); indexPages = indexPagesBackup.clone(); wal.rollback(); wal.sync(); } finally { structuralLock.unlock(); } } finally { commitLock.unlock(); } }
/** return positions of (possibly) linked record */ @Override protected long[] offsetsGet(int segment, long indexVal) { ; if (indexVal >>> 48 == 0) { return ((indexVal & MLINKED) != 0) ? null : StoreDirect.EMPTY_LONGS; } long[] ret = new long[] {indexVal}; while ((ret[ret.length - 1] & MLINKED) != 0) { ret = Arrays.copyOf(ret, ret.length + 1); long oldLink = ret[ret.length - 2] & MOFFSET; // get WAL position from current transaction, or previous (not yet fully replayed) // transactions long val = uncommittedDataLongs[segment].get(oldLink); if (val == 0) val = committedDataLongs[segment].get(oldLink); if (val != 0) { // //was found in previous position, read link from WAL // int file = (int) ((val>>>32) & 0xFFFFL); // get WAL file number // val = val & 0xFFFFFFFFL; // convert to WAL offset; // val = volumes.get(file).getLong(val); try { val = wal.walGetByteArray(val).readLong(); } catch (IOException e) { throw new DBException.VolumeIOError(e); } } else { // was not found in any transaction, read from main store val = vol.getLong(oldLink); } ret[ret.length - 1] = parity3Get(val); } if (CC.ASSERT) { offsetsVerify(ret); } if (CC.LOG_STORE && LOG.isLoggable(Level.FINEST)) { LOG.log( Level.FINEST, "indexVal={0}, ret={1}", new Object[] {Long.toHexString(indexVal), Arrays.toString(ret)}); } return ret; }
protected void replaySoft() { if (CC.ASSERT && !commitLock.isHeldByCurrentThread()) throw new AssertionError(); LongList written = CC.PARANOID ? new LongList() : null; for (int lockPos = 0; lockPos < locks.length; lockPos++) { locks[lockPos].writeLock().lock(); try { // update index table long[] table = committedIndexTable[lockPos].table; indexValLoop: for (int pos = 0; pos < table.length; ) { long recidOffset = table[pos++]; long val = table[pos++]; if (recidOffset == 0 || val == -1) continue indexValLoop; realVol.ensureAvailable(Fun.roundUp(recidOffset + 8, StoreDirect.PAGE_SIZE)); realVol.putLong(recidOffset, val); if (CC.PARANOID) { // check this is index page if (!Fun.arrayContains(indexPages, Fun.roundDown(recidOffset, PAGE_SIZE))) { throw new AssertionError("not index page"); } } } committedIndexTable[lockPos].clear(); // write data table = committedDataLongs[lockPos].table; dataLoop: for (int pos = 0; pos < table.length; ) { long volOffset = table[pos++]; long walPointer = table[pos++]; if (volOffset == 0 || walPointer == -1) continue dataLoop; byte[] b = wal.walGetByteArray2(walPointer); if (CC.ASSERT) assertRecord(volOffset, b); realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE)); realVol.putData(volOffset, b, 0, b.length); if (CC.ASSERT && b.length > MAX_REC_SIZE) throw new AssertionError(); if (CC.PARANOID) written.add((volOffset << 16) | b.length); } committedDataLongs[lockPos].clear(); } finally { locks[lockPos].writeLock().unlock(); } } structuralLock.lock(); try { // flush modified Long Stack pages dataLoop: for (int pos = 0; pos < committedPageLongStack.table.length; ) { long volOffset = committedPageLongStack.table[pos++]; long walPointer = committedPageLongStack.table[pos++]; if (volOffset == 0 || walPointer == -1) continue dataLoop; byte[] b = wal.walGetByteArray2(walPointer); if (CC.ASSERT) assertLongStackPage(volOffset, b); realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE)); realVol.putData(volOffset, b, 0, b.length); if (CC.PARANOID) written.add((volOffset << 16) | b.length); } committedPageLongStack.clear(); if (CC.PARANOID) { byte[] headVolBuf = new byte[headVolBackup.length]; headVol.getData(0, headVolBuf, 0, headVolBuf.length); if (!Arrays.equals(headVolBuf, headVolBackup)) throw new AssertionError(); } // update page header realVol.putData(0, headVolBackup, 0, headVolBackup.length); } finally { structuralLock.unlock(); } if (CC.PARANOID) { // check for overlaps long[] w = Arrays.copyOf(written.array, written.size); Arrays.sort(w); for (int i = 0; i < w.length - 1; i++) { long offset1 = w[i] >>> 16; long size1 = w[i] & 0xFF; long offset2 = w[i + 1] >>> 16; long size2 = w[i + 1] & 0xFF; if (offset1 + size1 > offset2) { throw new AssertionError( "write overlap conflict at: " + offset1 + " + " + size1 + " > " + offset2 + " (" + size2 + ")"); } } } }
@Override public void commit() { commitLock.lock(); try { // flush write caches into write ahead log flushWriteCache(); // move uncommited data to committed for (int segment = 0; segment < locks.length; segment++) { locks[segment].writeLock().lock(); try { // dump index vals into WAL long[] table = uncommittedIndexTable[segment].table; for (int i = 0; i < table.length; ) { long offset = table[i++]; long val = table[i++]; if (offset == 0) continue; wal.walPutLong(offset, val); } moveAndClear(uncommittedIndexTable[segment], committedIndexTable[segment]); moveAndClear(uncommittedDataLongs[segment], committedDataLongs[segment]); } finally { locks[segment].writeLock().unlock(); } } structuralLock.lock(); try { // flush modified Long Stack pages into WAL long[] set = uncommittedStackPages.set; longStackPagesLoop: for (int i = 0; i < set.length; i++) { long offset = set[i]; if (offset == 0) continue longStackPagesLoop; byte[] val = (byte[]) uncommittedStackPages.values[i]; if (val == LONG_STACK_PAGE_TOMBSTONE) committedPageLongStack.put(offset, -1); else { if (CC.ASSERT) assertLongStackPage(offset, val); long walPointer = wal.walPutByteArray(offset, val, 0, val.length); committedPageLongStack.put(offset, walPointer); } } uncommittedStackPages.clear(); // update checksum headVol.putInt(HEAD_CHECKSUM, headChecksum(headVol)); // take backup of headVol headVol.getData(0, headVolBackup, 0, headVolBackup.length); wal.walPutByteArray(0, headVolBackup, 0, headVolBackup.length); wal.commit(); wal.seal(); replaySoft(); realVol.sync(); wal.destroyWalFiles(); } finally { structuralLock.unlock(); } } finally { commitLock.unlock(); } }
@Override protected void indexLongPut(long offset, long val) { if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError(); wal.walPutLong(offset, val); }
@Override protected void initFailedCloseFiles() { wal.initFailedCloseFiles(); }
@Override public void initOpen() { // TODO disable readonly feature for this store realVol = vol; if (readonly && !Volume.isEmptyFile(fileName + ".wal.0")) throw new DBException.WrongConfig( "There is dirty WAL file, but storage is read-only. Can not replay file"); wal.open( new WriteAheadLog.WALReplay() { @Override public void beforeReplayStart() {} @Override public void writeLong(long offset, long value) { if (CC.ASSERT && offset % 8 != 0) throw new AssertionError(); realVol.ensureAvailable(Fun.roundUp(offset + 8, StoreDirect.PAGE_SIZE)); realVol.putLong(offset, value); } @Override public void writeRecord(long recid, long walId, Volume vol, long volOffset, int length) { throw new DBException.DataCorruption(); } @Override public void writeByteArray( long offset, long walId, Volume vol, long volOffset, int length) { if (CC.ASSERT && offset % 8 != 0) throw new AssertionError(); realVol.ensureAvailable(Fun.roundUp(offset + length, StoreDirect.PAGE_SIZE)); vol.transferInto(volOffset, realVol, offset, length); } @Override public void beforeDestroyWAL() {} @Override public void commit() {} @Override public void rollback() { throw new DBException.DataCorruption(); } @Override public void writeTombstone(long recid) { throw new DBException.DataCorruption(); } @Override public void writePreallocate(long recid) { throw new DBException.DataCorruption(); } }); realVol.sync(); wal.destroyWalFiles(); initOpenPost(); // TODO reenable this assertion // if(CC.PARANOID) // storeCheck(); }