Beispiel #1
0
  @Override
  protected byte[] loadLongStackPage(long pageOffset, boolean willBeModified) {
    if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError();

    //        if(CC.ASSERT && compactionInProgress)
    //            throw new AssertionError();

    // first try to get it from dirty pages in current TX
    byte[] page = uncommittedStackPages.get(pageOffset);
    if (page != null) {
      return page;
    }

    // try to get it from previous TX stored in WAL, but not yet replayed
    long walval = committedPageLongStack.get(pageOffset);
    if (walval != 0) {
      byte[] b = wal.walGetByteArray2(walval);
      // page is going to be modified, so put it back into uncommittedStackPages)
      if (willBeModified) {
        uncommittedStackPages.put(pageOffset, b);
      }
      return b;
    }

    // and finally read it from main store
    int pageSize = (int) (parity4Get(vol.getLong(pageOffset)) >>> 48);
    page = new byte[pageSize];
    vol.getData(pageOffset, page, 0, pageSize);
    if (willBeModified) {
      uncommittedStackPages.put(pageOffset, page);
    }
    return page;
  }
Beispiel #2
0
  @Override
  public void close() {
    commitLock.lock();
    try {

      if (closed) {
        return;
      }

      if (hasUncommitedData()) {
        LOG.warning("Closing storage with uncommited data, this data will be discarded.");
      }

      headVol.putData(0, headVolBackup, 0, headVolBackup.length);

      if (!readonly) {
        replaySoft();
        wal.destroyWalFiles();
      }
      wal.close();

      vol.close();
      vol = null;

      headVol.close();
      headVol = null;
      headVolBackup = null;

      uncommittedStackPages.clear();

      if (caches != null) {
        for (Cache c : caches) {
          c.close();
        }
        Arrays.fill(caches, null);
      }
      if (fileLockHeartbeat != null) {
        fileLockHeartbeat.unlock();
        fileLockHeartbeat = null;
      }
      closed = true;
    } finally {
      commitLock.unlock();
    }
  }
Beispiel #3
0
  protected DataInput walGetData(long offset, int segment) {
    if (CC.ASSERT && offset % 16 != 0) throw new DBException.DataCorruption();

    long longval = uncommittedDataLongs[segment].get(offset);
    if (longval == 0) {
      longval = committedDataLongs[segment].get(offset);
    }
    if (longval == 0) return null;

    return wal.walGetByteArray(longval);
  }
Beispiel #4
0
  @Override
  protected void putDataSingleWithoutLink(
      int segment, long offset, byte[] buf, int bufPos, int size) {
    if (CC.ASSERT && offset < PAGE_SIZE) throw new DBException.DataCorruption("offset to small");
    if (CC.ASSERT && size <= 0 || size > MAX_REC_SIZE)
      throw new DBException.DataCorruption("wrong length");

    if (CC.ASSERT && segment >= 0) assertWriteLocked(segment);

    long val = wal.walPutByteArray(offset, buf, bufPos, size);
    uncommittedDataLongs[segment].put(offset, val);
  }
Beispiel #5
0
  @Override
  public void rollback() throws UnsupportedOperationException {
    commitLock.lock();
    try {
      // flush modified records
      for (int segment = 0; segment < locks.length; segment++) {
        Lock lock = locks[segment].writeLock();
        lock.lock();
        try {
          writeCache[segment].clear();
          if (caches != null) {
            caches[segment].clear();
          }
          uncommittedDataLongs[segment].clear();
          uncommittedIndexTable[segment].clear();
        } finally {
          lock.unlock();
        }
      }

      structuralLock.lock();
      try {
        uncommittedStackPages.clear();

        // restore headVol from backup
        headVol.putData(0, headVolBackup, 0, headVolBackup.length);
        indexPages = indexPagesBackup.clone();

        wal.rollback();
        wal.sync();
      } finally {
        structuralLock.unlock();
      }
    } finally {
      commitLock.unlock();
    }
  }
Beispiel #6
0
  /** return positions of (possibly) linked record */
  @Override
  protected long[] offsetsGet(int segment, long indexVal) {
    ;
    if (indexVal >>> 48 == 0) {
      return ((indexVal & MLINKED) != 0) ? null : StoreDirect.EMPTY_LONGS;
    }

    long[] ret = new long[] {indexVal};
    while ((ret[ret.length - 1] & MLINKED) != 0) {
      ret = Arrays.copyOf(ret, ret.length + 1);
      long oldLink = ret[ret.length - 2] & MOFFSET;

      // get WAL position from current transaction, or previous (not yet fully replayed)
      // transactions
      long val = uncommittedDataLongs[segment].get(oldLink);
      if (val == 0) val = committedDataLongs[segment].get(oldLink);
      if (val != 0) {
        //                //was found in previous position, read link from WAL
        //                int file = (int) ((val>>>32) & 0xFFFFL); // get WAL file number
        //                val = val & 0xFFFFFFFFL; // convert to WAL offset;
        //                val = volumes.get(file).getLong(val);
        try {
          val = wal.walGetByteArray(val).readLong();
        } catch (IOException e) {
          throw new DBException.VolumeIOError(e);
        }
      } else {
        // was not found in any transaction, read from main store
        val = vol.getLong(oldLink);
      }
      ret[ret.length - 1] = parity3Get(val);
    }

    if (CC.ASSERT) {
      offsetsVerify(ret);
    }

    if (CC.LOG_STORE && LOG.isLoggable(Level.FINEST)) {
      LOG.log(
          Level.FINEST,
          "indexVal={0}, ret={1}",
          new Object[] {Long.toHexString(indexVal), Arrays.toString(ret)});
    }

    return ret;
  }
Beispiel #7
0
  protected void replaySoft() {
    if (CC.ASSERT && !commitLock.isHeldByCurrentThread()) throw new AssertionError();

    LongList written = CC.PARANOID ? new LongList() : null;

    for (int lockPos = 0; lockPos < locks.length; lockPos++) {
      locks[lockPos].writeLock().lock();
      try {
        // update index table
        long[] table = committedIndexTable[lockPos].table;
        indexValLoop:
        for (int pos = 0; pos < table.length; ) {
          long recidOffset = table[pos++];
          long val = table[pos++];
          if (recidOffset == 0 || val == -1) continue indexValLoop;

          realVol.ensureAvailable(Fun.roundUp(recidOffset + 8, StoreDirect.PAGE_SIZE));
          realVol.putLong(recidOffset, val);

          if (CC.PARANOID) {
            // check this is index page
            if (!Fun.arrayContains(indexPages, Fun.roundDown(recidOffset, PAGE_SIZE))) {
              throw new AssertionError("not index page");
            }
          }
        }
        committedIndexTable[lockPos].clear();

        // write data
        table = committedDataLongs[lockPos].table;
        dataLoop:
        for (int pos = 0; pos < table.length; ) {
          long volOffset = table[pos++];
          long walPointer = table[pos++];
          if (volOffset == 0 || walPointer == -1) continue dataLoop;

          byte[] b = wal.walGetByteArray2(walPointer);
          if (CC.ASSERT) assertRecord(volOffset, b);

          realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE));
          realVol.putData(volOffset, b, 0, b.length);
          if (CC.ASSERT && b.length > MAX_REC_SIZE) throw new AssertionError();

          if (CC.PARANOID) written.add((volOffset << 16) | b.length);
        }
        committedDataLongs[lockPos].clear();
      } finally {
        locks[lockPos].writeLock().unlock();
      }
    }
    structuralLock.lock();
    try {
      // flush modified Long Stack pages
      dataLoop:
      for (int pos = 0; pos < committedPageLongStack.table.length; ) {
        long volOffset = committedPageLongStack.table[pos++];
        long walPointer = committedPageLongStack.table[pos++];
        if (volOffset == 0 || walPointer == -1) continue dataLoop;

        byte[] b = wal.walGetByteArray2(walPointer);
        if (CC.ASSERT) assertLongStackPage(volOffset, b);

        realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE));
        realVol.putData(volOffset, b, 0, b.length);

        if (CC.PARANOID) written.add((volOffset << 16) | b.length);
      }
      committedPageLongStack.clear();

      if (CC.PARANOID) {
        byte[] headVolBuf = new byte[headVolBackup.length];
        headVol.getData(0, headVolBuf, 0, headVolBuf.length);
        if (!Arrays.equals(headVolBuf, headVolBackup)) throw new AssertionError();
      }

      // update page header
      realVol.putData(0, headVolBackup, 0, headVolBackup.length);
    } finally {
      structuralLock.unlock();
    }

    if (CC.PARANOID) {
      // check for overlaps
      long[] w = Arrays.copyOf(written.array, written.size);
      Arrays.sort(w);
      for (int i = 0; i < w.length - 1; i++) {
        long offset1 = w[i] >>> 16;
        long size1 = w[i] & 0xFF;
        long offset2 = w[i + 1] >>> 16;
        long size2 = w[i + 1] & 0xFF;

        if (offset1 + size1 > offset2) {
          throw new AssertionError(
              "write overlap conflict at: "
                  + offset1
                  + " + "
                  + size1
                  + " > "
                  + offset2
                  + " ("
                  + size2
                  + ")");
        }
      }
    }
  }
Beispiel #8
0
  @Override
  public void commit() {
    commitLock.lock();
    try {
      // flush write caches into write ahead log
      flushWriteCache();

      // move uncommited data to committed
      for (int segment = 0; segment < locks.length; segment++) {
        locks[segment].writeLock().lock();
        try {
          // dump index vals into WAL
          long[] table = uncommittedIndexTable[segment].table;
          for (int i = 0; i < table.length; ) {
            long offset = table[i++];
            long val = table[i++];
            if (offset == 0) continue;
            wal.walPutLong(offset, val);
          }

          moveAndClear(uncommittedIndexTable[segment], committedIndexTable[segment]);
          moveAndClear(uncommittedDataLongs[segment], committedDataLongs[segment]);

        } finally {
          locks[segment].writeLock().unlock();
        }
      }

      structuralLock.lock();
      try {
        // flush modified Long Stack pages into WAL
        long[] set = uncommittedStackPages.set;
        longStackPagesLoop:
        for (int i = 0; i < set.length; i++) {
          long offset = set[i];
          if (offset == 0) continue longStackPagesLoop;
          byte[] val = (byte[]) uncommittedStackPages.values[i];

          if (val == LONG_STACK_PAGE_TOMBSTONE) committedPageLongStack.put(offset, -1);
          else {
            if (CC.ASSERT) assertLongStackPage(offset, val);

            long walPointer = wal.walPutByteArray(offset, val, 0, val.length);
            committedPageLongStack.put(offset, walPointer);
          }
        }
        uncommittedStackPages.clear();

        // update checksum
        headVol.putInt(HEAD_CHECKSUM, headChecksum(headVol));
        // take backup of headVol
        headVol.getData(0, headVolBackup, 0, headVolBackup.length);
        wal.walPutByteArray(0, headVolBackup, 0, headVolBackup.length);
        wal.commit();
        wal.seal();
        replaySoft();
        realVol.sync();
        wal.destroyWalFiles();
      } finally {
        structuralLock.unlock();
      }
    } finally {
      commitLock.unlock();
    }
  }
Beispiel #9
0
 @Override
 protected void indexLongPut(long offset, long val) {
   if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError();
   wal.walPutLong(offset, val);
 }
Beispiel #10
0
 @Override
 protected void initFailedCloseFiles() {
   wal.initFailedCloseFiles();
 }
Beispiel #11
0
  @Override
  public void initOpen() {
    // TODO disable readonly feature for this store

    realVol = vol;

    if (readonly && !Volume.isEmptyFile(fileName + ".wal.0"))
      throw new DBException.WrongConfig(
          "There is dirty WAL file, but storage is read-only. Can not replay file");

    wal.open(
        new WriteAheadLog.WALReplay() {

          @Override
          public void beforeReplayStart() {}

          @Override
          public void writeLong(long offset, long value) {
            if (CC.ASSERT && offset % 8 != 0) throw new AssertionError();
            realVol.ensureAvailable(Fun.roundUp(offset + 8, StoreDirect.PAGE_SIZE));
            realVol.putLong(offset, value);
          }

          @Override
          public void writeRecord(long recid, long walId, Volume vol, long volOffset, int length) {
            throw new DBException.DataCorruption();
          }

          @Override
          public void writeByteArray(
              long offset, long walId, Volume vol, long volOffset, int length) {
            if (CC.ASSERT && offset % 8 != 0) throw new AssertionError();
            realVol.ensureAvailable(Fun.roundUp(offset + length, StoreDirect.PAGE_SIZE));
            vol.transferInto(volOffset, realVol, offset, length);
          }

          @Override
          public void beforeDestroyWAL() {}

          @Override
          public void commit() {}

          @Override
          public void rollback() {
            throw new DBException.DataCorruption();
          }

          @Override
          public void writeTombstone(long recid) {
            throw new DBException.DataCorruption();
          }

          @Override
          public void writePreallocate(long recid) {
            throw new DBException.DataCorruption();
          }
        });
    realVol.sync();
    wal.destroyWalFiles();

    initOpenPost();

    // TODO reenable this assertion
    //        if(CC.PARANOID)
    //            storeCheck();
  }