public static void main(String[] args) {

    final TemporaryStore store = new TemporaryStore();

    try {

      /*
       * Register the index. There are a lot of options for the B+Tree,
       * but you only need to specify the index name and the UUID for the
       * index. Each store can hold multiple named indices.
       */
      {
        final IndexMetadata indexMetadata = new IndexMetadata("testIndex", UUID.randomUUID());

        store.registerIndex(indexMetadata);
      }

      /*
       * Lookup the unisolated B+Tree. This is the mutable B+Tree view.
       *
       * While the temporary store does not differentiate between mutable
       * and read-only views, the Journal and the scale-out architecture
       * do.
       */
      {
        final BTree btree = store.getIndex("testIndex", ITx.UNISOLATED);

        // lookup the tuple (not found).
        System.err.println("tuple: " + btree.lookup("hello"));

        // add a tuple
        btree.insert("hello", "world");

        // lookup the tuple
        System.err.println("tuple: " + btree.lookup("hello"));

        // update the tuple
        btree.insert("hello", "again");

        // lookup the new value
        System.err.println("tuple: " + btree.lookup("hello"));
      }

    } finally {

      // destroy the backing store.
      store.destroy();
    }
  }
Beispiel #2
0
  /**
   * Return a named index. The index will be isolated at the same level as this transaction. Changes
   * on the index will be made restart-safe iff the transaction successfully commits.
   *
   * @param name The name of the index.
   * @return The named index or <code>null</code> if no index is registered under that name.
   * @exception IllegalStateException if the transaction is not active.
   */
  public ILocalBTreeView getIndex(final String name) {

    if (name == null) throw new IllegalArgumentException();

    if (readOnly) {

      /*
       * Note: Access to the indices currently goes through the
       * IResourceManager interface for a read-only transaction.
       */

      throw new UnsupportedOperationException();
    }

    /*
     * @todo lock could be per index for higher concurrency rather than for
     * all indices which you might access through this tx.
     */
    lock.lock();

    try {

      if (!isActive()) {

        throw new IllegalStateException(NOT_ACTIVE);
      }

      /*
       * Test the cache - this is used so that we can recover the same
       * instance on each call within the same transaction.
       */

      if (indices.containsKey(name)) {

        // Already defined.

        return indices.get(name);
      }

      final ILocalBTreeView index;

      /*
       * See if the index was registered as of the ground state used by
       * this transaction to isolated indices.
       *
       * Note: IResourceManager#getIndex(String name,long timestamp) calls
       * us when the timestamp identifies an active transaction so we MUST
       * NOT call that method ourselves! Hence there is some replication
       * of logic between that method and this one.
       */

      final AbstractBTree[] sources =
          resourceManager.getIndexSources(name, readsOnCommitTime); // startTime);

      if (sources == null) {

        /*
         * The named index was not registered as of the transaction
         * ground state.
         */

        if (log.isInfoEnabled()) log.info("No such index: " + name + ", startTime=" + startTime);

        return null;
      }

      if (!sources[0].getIndexMetadata().isIsolatable()) {

        throw new RuntimeException("Not isolatable: " + name);
      }

      /*
       * Isolate the named btree.
       */

      //            if (readOnly) {
      //
      //                assert sources[0].isReadOnly();
      //
      //                if (sources.length == 1) {
      //
      //                    index = sources[0];
      //
      //                } else {
      //
      //                    index = new FusedView(sources);
      //
      //                }
      //
      //            } else {

      /*
       * Setup the view. The write set is always the first element in
       * the view.
       */

      // the view definition.
      final AbstractBTree[] b = new AbstractBTree[sources.length + 1];

      /*
       * Create the write set on a temporary store.
       *
       * Note: The BTree is NOT registered under a name so it can not
       * be discovered on the temporary store.  This is fine since we
       * hold onto a hard reference to the BTree in [indices].
       */
      b[0] = BTree.create(getTemporaryStore(), sources[0].getIndexMetadata().clone());

      System.arraycopy(sources, 0, b, 1, sources.length);

      // create view with isolated write set.
      index = new IsolatedFusedView(-startTime, b);

      // report event.
      ResourceManager.isolateIndex(startTime, name);

      //            }

      indices.put(name, index);

      return index;

    } finally {

      lock.unlock();
    }
  }
    /**
     * Atomic update.
     *
     * @return <code>null</code>
     */
    @Override
    protected Void doTask() throws Exception {

      updateEvent.start();

      try {

        if (resourceManager.isOverflowAllowed()) throw new IllegalStateException();

        final SegmentMetadata segmentMetadata = buildResult.segmentMetadata;

        if (INFO) log.info("Begin: name=" + getOnlyResource() + ", newSegment=" + segmentMetadata);

        /*
         * Open the unisolated B+Tree on the live journal that is
         * absorbing writes. We are going to update its index metadata.
         *
         * Note: I am using AbstractTask#getIndex(String name) so that
         * the concurrency control logic will notice the changes to the
         * BTree and cause it to be checkpointed if this task succeeds
         * normally.
         */
        final ILocalBTreeView view = (ILocalBTreeView) getIndex(getOnlyResource());

        // make sure that this is the same scale-out index.
        assertSameIndex(indexUUID, view.getMutableBTree());

        if (view instanceof BTree) {

          /*
           * Note: there is an expectation that this is not a simple
           * BTree because this the build task is supposed to be
           * invoked after an overflow event, and that event should
           * have re-defined the view to include the BTree on the new
           * journal plus the historical view.
           *
           * One explanation for finding a simple view here is that
           * the view was a simple BTree on the old journal and the
           * data was copied from the old journal into the new journal
           * and then someone decided to do a build even through a
           * copy had already been done. However, this is not a very
           * good explanation since we try to avoid doing a build if
           * we have already done a copy!
           */

          throw new RuntimeException(
              "View is only a B+Tree: name="
                  + buildResult.name
                  + ", pmd="
                  + view.getIndexMetadata().getPartitionMetadata());
        }

        // The live B+Tree.
        final BTree btree = view.getMutableBTree();

        if (INFO)
          log.info(
              "src="
                  + getOnlyResource()
                  + ",counter="
                  + view.getCounter().get()
                  + ",checkpoint="
                  + btree.getCheckpoint());

        assert btree != null : "Expecting index: " + getOnlyResource();

        // clone the current metadata record for the live index.
        final IndexMetadata indexMetadata = btree.getIndexMetadata().clone();

        /*
         * This is the index partition definition on the live index -
         * the one that will be replaced with a new view as the result
         * of this atomic update.
         */
        final LocalPartitionMetadata currentpmd = indexMetadata.getPartitionMetadata();

        // Check pre-conditions.
        final IResourceMetadata[] currentResources = currentpmd.getResources();
        {
          if (currentpmd == null) {

            throw new IllegalStateException("Not an index partition: " + getOnlyResource());
          }

          if (!currentResources[0].getUUID().equals(getJournal().getRootBlockView().getUUID())) {

            throw new IllegalStateException(
                "Expecting live journal to be the first resource: " + currentResources);
          }

          /*
           * Note: I have commented out a bunch of pre-condition tests
           * that are not valid for histories such as:
           *
           * history=create() register(0) split(0)
           * copy(entryCount=314)
           *
           * This case arises when there are not enough index entries
           * written on the journal after a split to warrant a build
           * so the buffered writes are just copied to the new
           * journal. The resources in the view are:
           *
           * 1. journal 2. segment
           *
           * And this update will replace the segment.
           */

          // // the old journal's resource metadata.
          // final IResourceMetadata oldJournalMetadata =
          // oldResources[1];
          // assert oldJournalMetadata != null;
          // assert oldJournalMetadata instanceof JournalMetadata :
          // "name="
          // + getOnlyResource() + ", old pmd=" + oldpmd
          // + ", segmentMetadata=" + buildResult.segmentMetadata;
          //
          // // live journal must be newer.
          // assert journal.getRootBlockView().getCreateTime() >
          // oldJournalMetadata
          // .getCreateTime();
          // new index segment build from a view that did not include
          // data from the live journal.
          assert segmentMetadata.getCreateTime()
                  < getJournal().getRootBlockView().getFirstCommitTime()
              : "segment createTime LT journal 1st commit time"
                  + ": segmentMetadata="
                  + segmentMetadata
                  + ", journal: "
                  + getJournal().getRootBlockView();

          // if (oldResources.length == 3) {
          //
          // // the old index segment's resource metadata.
          // final IResourceMetadata oldSegmentMetadata =
          // oldResources[2];
          // assert oldSegmentMetadata != null;
          // assert oldSegmentMetadata instanceof SegmentMetadata;
          //
          // assert oldSegmentMetadata.getCreateTime() <=
          // oldJournalMetadata
          // .getCreateTime();
          //
          // }

        }

        // new view definition.
        final IResourceMetadata[] newResources =
            new IResourceMetadata[] {
              // the live journal.
              getJournal().getResourceMetadata(),
              // the newly built index segment.
              segmentMetadata
            };

        // describe the index partition.
        indexMetadata.setPartitionMetadata(
            new LocalPartitionMetadata( //
                currentpmd.getPartitionId(), //
                currentpmd.getSourcePartitionId(), //
                currentpmd.getLeftSeparatorKey(), //
                currentpmd.getRightSeparatorKey(), //
                newResources, //
                currentpmd.getIndexPartitionCause()
                //                        currentpmd.getHistory()
                //                                + OverflowActionEnum.Merge//
                //                                + "(lastCommitTime="
                //                                + segmentMetadata.getCreateTime()//
                //                                + ",btreeEntryCount="
                //                                + btree.getEntryCount()//
                //                                + ",segmentEntryCount="
                //                                + buildResult.builder.getCheckpoint().nentries//
                //                                + ",segment="
                //                                + segmentMetadata.getUUID()//
                //                                + ",counter="
                //                                + btree.getCounter().get()//
                //                                + ",oldResources="
                //                                + Arrays.toString(currentResources) + ") "
                ));

        // update the metadata associated with the btree
        btree.setIndexMetadata(indexMetadata);

        if (INFO)
          log.info(
              "Updated view: name="
                  + getOnlyResource()
                  + ", pmd="
                  + indexMetadata.getPartitionMetadata());

        /*
         * Verify that the btree recognizes that it needs to be
         * checkpointed.
         *
         * Note: The atomic commit point is when this task commits.
         */
        assert btree.needsCheckpoint();
        //            btree.writeCheckpoint();
        //            {
        //                final long id0 = btree.getCounter().get();
        //                final long pid = id0 >> 32;
        //                final long mask = 0xffffffffL;
        //                final int ctr = (int) (id0 & mask);
        //                log.warn("name="+getOnlyResource()+", counter="+id0+", pid="+pid+",
        // ctr="+ctr);
        //            }

        // notify successful index partition build.
        resourceManager.overflowCounters.indexPartitionMergeCounter.incrementAndGet();

        return null;

      } finally {

        updateEvent.end();
      }
    } // doTask()
  /**
   * Test generates an {@link IndexSegment} from a (typically historical) fused view of an index
   * partition. The resulting {@link IndexSegment} is a complete replacement for the historical view
   * but does not possess any deleted index entries. Typically the {@link IndexSegment} will be used
   * to replace the current index partition definition such that the resources that were the inputs
   * to the view from which the {@link IndexSegment} was built are no longer required to read on
   * that view. This change needs to be recorded in the {@link MetadataIndex} before clients will
   * being reading from the new view using the new {@link IndexSegment}.
   *
   * @throws IOException
   * @throws ExecutionException
   * @throws InterruptedException
   * @todo test more complex merges.
   */
  public void test_mergeWithOverflow()
      throws IOException, InterruptedException, ExecutionException {

    /*
     * Register the index.
     */
    final String name = "testIndex";
    final UUID indexUUID = UUID.randomUUID();
    final IndexMetadata indexMetadata = new IndexMetadata(name, indexUUID);
    {

      // must support delete markers
      indexMetadata.setDeleteMarkers(true);

      // must be an index partition.
      indexMetadata.setPartitionMetadata(
          new LocalPartitionMetadata(
              0, // partitionId.
              -1, // not a move.
              new byte[] {}, // leftSeparator
              null, // rightSeparator
              new IResourceMetadata[] { //
                resourceManager.getLiveJournal().getResourceMetadata(), //
              }, //
              IndexPartitionCause.register(resourceManager)
              //                    ,"" // history
              ));

      // submit task to register the index and wait for it to complete.
      concurrencyManager
          .submit(new RegisterIndexTask(concurrencyManager, name, indexMetadata))
          .get();
    }

    /*
     * Populate the index with some data.
     */
    final BTree groundTruth =
        BTree.create(new SimpleMemoryRawStore(), new IndexMetadata(indexUUID));
    {
      final int nentries = 10;

      final byte[][] keys = new byte[nentries][];
      final byte[][] vals = new byte[nentries][];

      final Random r = new Random();

      for (int i = 0; i < nentries; i++) {

        keys[i] = TestKeyBuilder.asSortKey(i);

        vals[i] = new byte[4];

        r.nextBytes(vals[i]);

        groundTruth.insert(keys[i], vals[i]);
      }

      final IIndexProcedure proc =
          BatchInsertConstructor.RETURN_NO_VALUES.newInstance(
              indexMetadata, 0 /* fromIndex */, nentries /*toIndex*/, keys, vals);

      // submit the task and wait for it to complete.
      concurrencyManager
          .submit(new IndexProcedureTask(concurrencyManager, ITx.UNISOLATED, name, proc))
          .get();
    }

    /*
     * Force overflow causing an empty btree to be created for that index on
     * a new journal and the view definition in the new btree to be updated.
     */

    // createTime of the old journal.
    final long createTime0 = resourceManager.getLiveJournal().getRootBlockView().getCreateTime();

    // uuid of the old journal.
    final UUID uuid0 = resourceManager.getLiveJournal().getRootBlockView().getUUID();

    // force overflow onto a new journal.
    final OverflowMetadata overflowMetadata = resourceManager.doSynchronousOverflow();

    // nothing should have been copied to the new journal.
    assertEquals(0, overflowMetadata.getActionCount(OverflowActionEnum.Copy));

    // lookup the old journal again using its createTime.
    final AbstractJournal oldJournal = resourceManager.getJournal(createTime0);
    assertEquals("uuid", uuid0, oldJournal.getRootBlockView().getUUID());
    assertNotSame("closeTime", 0L, oldJournal.getRootBlockView().getCloseTime());

    // run merge task.
    final BuildResult result;
    {

      /*
       * Note: The task start time is a historical read on the final
       * committed state of the old journal. This means that the generated
       * index segment will have a createTime EQ to the lastCommitTime on
       * the old journal. This also means that it will have been generated
       * from a fused view of all data as of the final commit state of the
       * old journal.
       */
      //            final OverflowMetadata omd = new OverflowMetadata(resourceManager);

      final ViewMetadata vmd = overflowMetadata.getViewMetadata(name);

      // task to run.
      final CompactingMergeTask task = new CompactingMergeTask(vmd);

      try {

        // overflow must be disallowed as a task pre-condition.
        resourceManager.overflowAllowed.compareAndSet(true, false);

        /*
         * Submit task and await result (metadata describing the new
         * index segment).
         */
        result = concurrencyManager.submit(task).get();

      } finally {

        // re-enable overflow processing.
        resourceManager.overflowAllowed.set(true);
      }

      final IResourceMetadata segmentMetadata = result.segmentMetadata;

      if (log.isInfoEnabled()) log.info(segmentMetadata.toString());

      // verify index segment can be opened.
      resourceManager.openStore(segmentMetadata.getUUID());

      // Note: this assertion only works if we store the file path vs its basename.
      //            assertTrue(new File(segmentMetadata.getFile()).exists());

      // verify createTime == lastCommitTime on the old journal.
      assertEquals(
          "createTime",
          oldJournal.getRootBlockView().getLastCommitTime(),
          segmentMetadata.getCreateTime());

      // verify segment has all data in the groundTruth btree.
      {
        final IndexSegmentStore segStore =
            (IndexSegmentStore) resourceManager.openStore(segmentMetadata.getUUID());

        final IndexSegment seg = segStore.loadIndexSegment();

        AbstractBTreeTestCase.assertSameBTree(groundTruth, seg);
      }
    }

    /*
     * verify same data from ground truth and the new view (using btree
     * helper classes for this).
     */
    {
      final IIndex actual = resourceManager.getIndex(name, ITx.UNISOLATED);

      AbstractBTreeTestCase.assertSameBTree(groundTruth, actual);
    }
  }
  /**
   * Test correct detection and resolution of a write-write conflict. An index is registered with an
   * {@link IConflictResolver} and the journal is committed. Two transactions (tx1, tx2) are then
   * started. Both transactions write a value under the same key. tx1 prepares and commits. tx2
   * attempts to prepare, and the test verifies that the conflict resolver is invoked, that it may
   * resolve the conflict causing validation to succeed and that the value determined by conflict
   * resolution is made persistent when tx2 commits.
   */
  public void test_writeWriteConflict_conflictIsResolved() {

    final Journal journal = new Journal(getProperties());

    try {

      final String name = "abc";

      final byte[] k1 = new byte[] {1};

      final byte[] v1a = new byte[] {1};
      final byte[] v1b = new byte[] {2};
      final byte[] v1c = new byte[] {3};

      {

        /*
         * register an index with a conflict resolver and commit the
         * journal.
         */

        final IndexMetadata metadata = new IndexMetadata(name, UUID.randomUUID());

        metadata.setIsolatable(true);

        metadata.setConflictResolver(new SingleValueConflictResolver(k1, v1c));

        journal.registerIndex(name, BTree.create(journal, metadata));

        journal.commit();
      }

      /*
       * Create two transactions.
       */

      final long tx1 = journal.newTx(ITx.UNISOLATED);

      final long tx2 = journal.newTx(ITx.UNISOLATED);

      /*
       * Write a value under the same key on the same index in both
       * transactions.
       */

      journal.getIndex(name, tx1).insert(k1, v1a);

      journal.getIndex(name, tx2).insert(k1, v1b);

      journal.commit(tx1);

      /*
       * verify that the value from tx1 is found under the key on the
       * unisolated index.
       */
      assertEquals(v1a, (byte[]) journal.getIndex(name).lookup(k1));

      journal.commit(tx2);

      /*
       * verify that the resolved value is found under the key on the
       * unisolated index.
       */
      assertEquals(v1c, (byte[]) journal.getIndex(name).lookup(k1));

    } finally {

      journal.destroy();
    }
  }
  /**
   * Test correct detection of a write-write conflict. An index is registered and the journal is
   * committed. Two transactions (tx1, tx2) are then started. Both transactions write a value under
   * the same key. tx1 prepares and commits. tx2 attempts to prepare, and the test verifies that a
   * {@link ValidationError} is reported.
   */
  public void test_writeWriteConflict_correctDetection() {

    final Journal journal = new Journal(getProperties());

    try {

      String name = "abc";

      final byte[] k1 = new byte[] {1};

      final byte[] v1a = new byte[] {1};
      final byte[] v1b = new byte[] {2};

      {

        /*
         * register an index and commit the journal.
         */

        IndexMetadata metadata = new IndexMetadata(name, UUID.randomUUID());

        metadata.setIsolatable(true);

        // Note: No conflict resolver.

        journal.registerIndex(name, BTree.create(journal, metadata));

        journal.commit();
      }

      /*
       * Create two transactions.
       */

      final long tx1 = journal.newTx(ITx.UNISOLATED);

      final long tx2 = journal.newTx(ITx.UNISOLATED);

      /*
       * Write a value under the same key on the same index in both
       * transactions.
       */

      journal.getIndex(name, tx1).insert(k1, v1a);

      journal.getIndex(name, tx2).insert(k1, v1b);

      journal.commit(tx1);

      /*
       * verify that the value from tx1 is found under the key on the
       * unisolated index.
       */
      assertEquals(v1a, (byte[]) journal.getIndex(name).lookup(k1));

      //        final ITx tmp = journal.getTx(tx2);

      try {
        journal.commit(tx2);
        fail("Expecting: " + ValidationError.class);
      } catch (ValidationError ex) {
        if (log.isInfoEnabled()) log.info("Ignoring expected exception: " + ex);
        //            assertTrue(tmp.isAborted());
      }

    } finally {

      journal.destroy();
    }
  }