예제 #1
0
 private void moveAndClear(LongLongMap from, LongLongMap to) {
   long[] table = from.table;
   for (int i = 0; i < table.length; ) {
     long key = table[i++];
     long val = table[i++];
     if (key == 0) continue;
     to.put(key, val);
   }
   from.clear();
 }
예제 #2
0
  @Override
  public void commit() {
    if (isSnapshot) return;

    if (!tx) {
      vol.sync();
      return;
    }

    commitLock.lock();
    try {
      StoreAppend[] snaps =
          snapshots == null
              ? STORE_APPENDS_ZERO_ARRAY
              : snapshots.toArray(STORE_APPENDS_ZERO_ARRAY);

      for (int i = 0; i < locks.length; i++) {
        Lock lock = locks[i].writeLock();
        lock.lock();
        try {
          long[] m = modified[i].table;
          for (int j = 0; j < m.length; j += 2) {
            long recid = m[j];
            long recidOffset = recid * 8;
            if (recidOffset == 0) continue;
            indexTable.ensureAvailable(recidOffset + 8);
            long oldVal = indexTable.getLong(recidOffset);
            indexTable.putLong(recidOffset, m[j + 1]);

            for (StoreAppend snap : snaps) {
              LongLongMap m2 = snap.modified[i];
              if (m2.get(recid) == 0) {
                m2.put(recid, oldVal);
              }
            }
          }
          modified[i].clear();
        } finally {
          lock.unlock();
        }
      }
      long offset = alloc(1, 1);
      vol.putUnsignedByte(offset, I_TX_VALID);
      vol.sync();
    } finally {
      commitLock.unlock();
    }
  }
예제 #3
0
  @Override
  protected byte[] loadLongStackPage(long pageOffset, boolean willBeModified) {
    if (CC.ASSERT && !structuralLock.isHeldByCurrentThread()) throw new AssertionError();

    //        if(CC.ASSERT && compactionInProgress)
    //            throw new AssertionError();

    // first try to get it from dirty pages in current TX
    byte[] page = uncommittedStackPages.get(pageOffset);
    if (page != null) {
      return page;
    }

    // try to get it from previous TX stored in WAL, but not yet replayed
    long walval = committedPageLongStack.get(pageOffset);
    if (walval != 0) {
      byte[] b = wal.walGetByteArray2(walval);
      // page is going to be modified, so put it back into uncommittedStackPages)
      if (willBeModified) {
        uncommittedStackPages.put(pageOffset, b);
      }
      return b;
    }

    // and finally read it from main store
    int pageSize = (int) (parity4Get(vol.getLong(pageOffset)) >>> 48);
    page = new byte[pageSize];
    vol.getData(pageOffset, page, 0, pageSize);
    if (willBeModified) {
      uncommittedStackPages.put(pageOffset, page);
    }
    return page;
  }
예제 #4
0
  protected void initOpen() {
    checkFeaturesBitmap(vol.getLong(HEAD_FEATURES));

    // replay log
    long pos = headerSize;
    final long volumeSize = vol.length();
    long lastValidPos = pos;
    long highestRecid2 = RECID_LAST_RESERVED;
    LongLongMap commitData = tx ? new LongLongMap() : null;

    try {

      while (true) {
        lastValidPos = pos;
        if (pos >= volumeSize) break;
        final long instPos = pos;
        final int inst = vol.getUnsignedByte(pos++);

        if (inst == I_INSERT || inst == I_UPDATE) {

          long recid = vol.getPackedLong(pos);
          pos += recid >>> 60;
          recid = longParityGet(recid & DataIO.PACK_LONG_RESULT_MASK);

          highestRecid2 = Math.max(highestRecid2, recid);

          commitData.put(recid, instPos);

          // skip rest of the record
          long size = vol.getPackedLong(pos);
          long dataLen = longParityGet(size & DataIO.PACK_LONG_RESULT_MASK) - 1;
          dataLen = Math.max(0, dataLen);
          pos = pos + (size >>> 60) + dataLen;
        } else if (inst == I_DELETE) {
          long recid = vol.getPackedLong(pos);
          pos += recid >>> 60;
          recid = longParityGet(recid & DataIO.PACK_LONG_RESULT_MASK);

          highestRecid2 = Math.max(highestRecid2, recid);

          commitData.put(recid, -1);
        } else if (inst == I_DELETE) {
          long recid = vol.getPackedLong(pos);
          pos += recid >>> 60;
          recid = longParityGet(recid & DataIO.PACK_LONG_RESULT_MASK);
          highestRecid2 = Math.max(highestRecid2, recid);
          commitData.put(recid, -2);

        } else if (inst == I_SKIP_SINGLE_BYTE) {
          // do nothing, just skip single byte
        } else if (inst == I_SKIP_MULTI_BYTE) {
          // read size and skip it
          // skip rest of the record
          long size = vol.getPackedLong(pos);
          pos += (size >>> 60) + longParityGet(size & DataIO.PACK_LONG_RESULT_MASK);
        } else if (inst == I_TX_VALID) {
          if (tx) {
            // apply changes from commitData to indexTable
            for (int i = 0; i < commitData.table.length; i += 2) {
              long recidOffset = commitData.table[i] * 8;
              if (recidOffset == 0) continue;
              indexTable.ensureAvailable(recidOffset + 8);
              indexTable.putLong(recidOffset, commitData.table[i + 1]);
            }
            commitData.clear();
          }
        } else if (inst == I_TX_ROLLBACK) {
          if (tx) {
            commitData.clear();
          }
        } else if (inst == 0) {
          // rollback last changes if that is necessary
          if (tx) {
            // rollback changes in index table since last valid tx
            commitData.clear();
          }

          break;
        } else {
          // TODO log here?
          LOG.warning("Unknown instruction " + inst);
          break;
        }
      }
    } catch (RuntimeException e) {
      // log replay finished
      // TODO log here?
      LOG.log(Level.WARNING, "Log replay finished", e);
      if (tx) {
        // rollback changes in index table since last valid tx
        commitData.clear();
      }
    }
    eof = lastValidPos;

    highestRecid.set(highestRecid2);
  }
예제 #5
0
  protected void replaySoft() {
    if (CC.ASSERT && !commitLock.isHeldByCurrentThread()) throw new AssertionError();

    LongList written = CC.PARANOID ? new LongList() : null;

    for (int lockPos = 0; lockPos < locks.length; lockPos++) {
      locks[lockPos].writeLock().lock();
      try {
        // update index table
        long[] table = committedIndexTable[lockPos].table;
        indexValLoop:
        for (int pos = 0; pos < table.length; ) {
          long recidOffset = table[pos++];
          long val = table[pos++];
          if (recidOffset == 0 || val == -1) continue indexValLoop;

          realVol.ensureAvailable(Fun.roundUp(recidOffset + 8, StoreDirect.PAGE_SIZE));
          realVol.putLong(recidOffset, val);

          if (CC.PARANOID) {
            // check this is index page
            if (!Fun.arrayContains(indexPages, Fun.roundDown(recidOffset, PAGE_SIZE))) {
              throw new AssertionError("not index page");
            }
          }
        }
        committedIndexTable[lockPos].clear();

        // write data
        table = committedDataLongs[lockPos].table;
        dataLoop:
        for (int pos = 0; pos < table.length; ) {
          long volOffset = table[pos++];
          long walPointer = table[pos++];
          if (volOffset == 0 || walPointer == -1) continue dataLoop;

          byte[] b = wal.walGetByteArray2(walPointer);
          if (CC.ASSERT) assertRecord(volOffset, b);

          realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE));
          realVol.putData(volOffset, b, 0, b.length);
          if (CC.ASSERT && b.length > MAX_REC_SIZE) throw new AssertionError();

          if (CC.PARANOID) written.add((volOffset << 16) | b.length);
        }
        committedDataLongs[lockPos].clear();
      } finally {
        locks[lockPos].writeLock().unlock();
      }
    }
    structuralLock.lock();
    try {
      // flush modified Long Stack pages
      dataLoop:
      for (int pos = 0; pos < committedPageLongStack.table.length; ) {
        long volOffset = committedPageLongStack.table[pos++];
        long walPointer = committedPageLongStack.table[pos++];
        if (volOffset == 0 || walPointer == -1) continue dataLoop;

        byte[] b = wal.walGetByteArray2(walPointer);
        if (CC.ASSERT) assertLongStackPage(volOffset, b);

        realVol.ensureAvailable(Fun.roundUp(volOffset + b.length, StoreDirect.PAGE_SIZE));
        realVol.putData(volOffset, b, 0, b.length);

        if (CC.PARANOID) written.add((volOffset << 16) | b.length);
      }
      committedPageLongStack.clear();

      if (CC.PARANOID) {
        byte[] headVolBuf = new byte[headVolBackup.length];
        headVol.getData(0, headVolBuf, 0, headVolBuf.length);
        if (!Arrays.equals(headVolBuf, headVolBackup)) throw new AssertionError();
      }

      // update page header
      realVol.putData(0, headVolBackup, 0, headVolBackup.length);
    } finally {
      structuralLock.unlock();
    }

    if (CC.PARANOID) {
      // check for overlaps
      long[] w = Arrays.copyOf(written.array, written.size);
      Arrays.sort(w);
      for (int i = 0; i < w.length - 1; i++) {
        long offset1 = w[i] >>> 16;
        long size1 = w[i] & 0xFF;
        long offset2 = w[i + 1] >>> 16;
        long size2 = w[i + 1] & 0xFF;

        if (offset1 + size1 > offset2) {
          throw new AssertionError(
              "write overlap conflict at: "
                  + offset1
                  + " + "
                  + size1
                  + " > "
                  + offset2
                  + " ("
                  + size2
                  + ")");
        }
      }
    }
  }
예제 #6
0
  @Override
  public void commit() {
    commitLock.lock();
    try {
      // flush write caches into write ahead log
      flushWriteCache();

      // move uncommited data to committed
      for (int segment = 0; segment < locks.length; segment++) {
        locks[segment].writeLock().lock();
        try {
          // dump index vals into WAL
          long[] table = uncommittedIndexTable[segment].table;
          for (int i = 0; i < table.length; ) {
            long offset = table[i++];
            long val = table[i++];
            if (offset == 0) continue;
            wal.walPutLong(offset, val);
          }

          moveAndClear(uncommittedIndexTable[segment], committedIndexTable[segment]);
          moveAndClear(uncommittedDataLongs[segment], committedDataLongs[segment]);

        } finally {
          locks[segment].writeLock().unlock();
        }
      }

      structuralLock.lock();
      try {
        // flush modified Long Stack pages into WAL
        long[] set = uncommittedStackPages.set;
        longStackPagesLoop:
        for (int i = 0; i < set.length; i++) {
          long offset = set[i];
          if (offset == 0) continue longStackPagesLoop;
          byte[] val = (byte[]) uncommittedStackPages.values[i];

          if (val == LONG_STACK_PAGE_TOMBSTONE) committedPageLongStack.put(offset, -1);
          else {
            if (CC.ASSERT) assertLongStackPage(offset, val);

            long walPointer = wal.walPutByteArray(offset, val, 0, val.length);
            committedPageLongStack.put(offset, walPointer);
          }
        }
        uncommittedStackPages.clear();

        // update checksum
        headVol.putInt(HEAD_CHECKSUM, headChecksum(headVol));
        // take backup of headVol
        headVol.getData(0, headVolBackup, 0, headVolBackup.length);
        wal.walPutByteArray(0, headVolBackup, 0, headVolBackup.length);
        wal.commit();
        wal.seal();
        replaySoft();
        realVol.sync();
        wal.destroyWalFiles();
      } finally {
        structuralLock.unlock();
      }
    } finally {
      commitLock.unlock();
    }
  }