Example #1
0
    /**
     * Atomic update.
     *
     * @return <code>null</code>
     */
    @Override
    protected Void doTask() throws Exception {

      updateEvent.start();

      try {

        if (resourceManager.isOverflowAllowed()) throw new IllegalStateException();

        final SegmentMetadata segmentMetadata = buildResult.segmentMetadata;

        if (INFO) log.info("Begin: name=" + getOnlyResource() + ", newSegment=" + segmentMetadata);

        /*
         * Open the unisolated B+Tree on the live journal that is
         * absorbing writes. We are going to update its index metadata.
         *
         * Note: I am using AbstractTask#getIndex(String name) so that
         * the concurrency control logic will notice the changes to the
         * BTree and cause it to be checkpointed if this task succeeds
         * normally.
         */
        final ILocalBTreeView view = (ILocalBTreeView) getIndex(getOnlyResource());

        // make sure that this is the same scale-out index.
        assertSameIndex(indexUUID, view.getMutableBTree());

        if (view instanceof BTree) {

          /*
           * Note: there is an expectation that this is not a simple
           * BTree because this the build task is supposed to be
           * invoked after an overflow event, and that event should
           * have re-defined the view to include the BTree on the new
           * journal plus the historical view.
           *
           * One explanation for finding a simple view here is that
           * the view was a simple BTree on the old journal and the
           * data was copied from the old journal into the new journal
           * and then someone decided to do a build even through a
           * copy had already been done. However, this is not a very
           * good explanation since we try to avoid doing a build if
           * we have already done a copy!
           */

          throw new RuntimeException(
              "View is only a B+Tree: name="
                  + buildResult.name
                  + ", pmd="
                  + view.getIndexMetadata().getPartitionMetadata());
        }

        // The live B+Tree.
        final BTree btree = view.getMutableBTree();

        if (INFO)
          log.info(
              "src="
                  + getOnlyResource()
                  + ",counter="
                  + view.getCounter().get()
                  + ",checkpoint="
                  + btree.getCheckpoint());

        assert btree != null : "Expecting index: " + getOnlyResource();

        // clone the current metadata record for the live index.
        final IndexMetadata indexMetadata = btree.getIndexMetadata().clone();

        /*
         * This is the index partition definition on the live index -
         * the one that will be replaced with a new view as the result
         * of this atomic update.
         */
        final LocalPartitionMetadata currentpmd = indexMetadata.getPartitionMetadata();

        // Check pre-conditions.
        final IResourceMetadata[] currentResources = currentpmd.getResources();
        {
          if (currentpmd == null) {

            throw new IllegalStateException("Not an index partition: " + getOnlyResource());
          }

          if (!currentResources[0].getUUID().equals(getJournal().getRootBlockView().getUUID())) {

            throw new IllegalStateException(
                "Expecting live journal to be the first resource: " + currentResources);
          }

          /*
           * Note: I have commented out a bunch of pre-condition tests
           * that are not valid for histories such as:
           *
           * history=create() register(0) split(0)
           * copy(entryCount=314)
           *
           * This case arises when there are not enough index entries
           * written on the journal after a split to warrant a build
           * so the buffered writes are just copied to the new
           * journal. The resources in the view are:
           *
           * 1. journal 2. segment
           *
           * And this update will replace the segment.
           */

          // // the old journal's resource metadata.
          // final IResourceMetadata oldJournalMetadata =
          // oldResources[1];
          // assert oldJournalMetadata != null;
          // assert oldJournalMetadata instanceof JournalMetadata :
          // "name="
          // + getOnlyResource() + ", old pmd=" + oldpmd
          // + ", segmentMetadata=" + buildResult.segmentMetadata;
          //
          // // live journal must be newer.
          // assert journal.getRootBlockView().getCreateTime() >
          // oldJournalMetadata
          // .getCreateTime();
          // new index segment build from a view that did not include
          // data from the live journal.
          assert segmentMetadata.getCreateTime()
                  < getJournal().getRootBlockView().getFirstCommitTime()
              : "segment createTime LT journal 1st commit time"
                  + ": segmentMetadata="
                  + segmentMetadata
                  + ", journal: "
                  + getJournal().getRootBlockView();

          // if (oldResources.length == 3) {
          //
          // // the old index segment's resource metadata.
          // final IResourceMetadata oldSegmentMetadata =
          // oldResources[2];
          // assert oldSegmentMetadata != null;
          // assert oldSegmentMetadata instanceof SegmentMetadata;
          //
          // assert oldSegmentMetadata.getCreateTime() <=
          // oldJournalMetadata
          // .getCreateTime();
          //
          // }

        }

        // new view definition.
        final IResourceMetadata[] newResources =
            new IResourceMetadata[] {
              // the live journal.
              getJournal().getResourceMetadata(),
              // the newly built index segment.
              segmentMetadata
            };

        // describe the index partition.
        indexMetadata.setPartitionMetadata(
            new LocalPartitionMetadata( //
                currentpmd.getPartitionId(), //
                currentpmd.getSourcePartitionId(), //
                currentpmd.getLeftSeparatorKey(), //
                currentpmd.getRightSeparatorKey(), //
                newResources, //
                currentpmd.getIndexPartitionCause()
                //                        currentpmd.getHistory()
                //                                + OverflowActionEnum.Merge//
                //                                + "(lastCommitTime="
                //                                + segmentMetadata.getCreateTime()//
                //                                + ",btreeEntryCount="
                //                                + btree.getEntryCount()//
                //                                + ",segmentEntryCount="
                //                                + buildResult.builder.getCheckpoint().nentries//
                //                                + ",segment="
                //                                + segmentMetadata.getUUID()//
                //                                + ",counter="
                //                                + btree.getCounter().get()//
                //                                + ",oldResources="
                //                                + Arrays.toString(currentResources) + ") "
                ));

        // update the metadata associated with the btree
        btree.setIndexMetadata(indexMetadata);

        if (INFO)
          log.info(
              "Updated view: name="
                  + getOnlyResource()
                  + ", pmd="
                  + indexMetadata.getPartitionMetadata());

        /*
         * Verify that the btree recognizes that it needs to be
         * checkpointed.
         *
         * Note: The atomic commit point is when this task commits.
         */
        assert btree.needsCheckpoint();
        //            btree.writeCheckpoint();
        //            {
        //                final long id0 = btree.getCounter().get();
        //                final long pid = id0 >> 32;
        //                final long mask = 0xffffffffL;
        //                final int ctr = (int) (id0 & mask);
        //                log.warn("name="+getOnlyResource()+", counter="+id0+", pid="+pid+",
        // ctr="+ctr);
        //            }

        // notify successful index partition build.
        resourceManager.overflowCounters.indexPartitionMergeCounter.incrementAndGet();

        return null;

      } finally {

        updateEvent.end();
      }
    } // doTask()
  /**
   * Test generates an {@link IndexSegment} from a (typically historical) fused view of an index
   * partition. The resulting {@link IndexSegment} is a complete replacement for the historical view
   * but does not possess any deleted index entries. Typically the {@link IndexSegment} will be used
   * to replace the current index partition definition such that the resources that were the inputs
   * to the view from which the {@link IndexSegment} was built are no longer required to read on
   * that view. This change needs to be recorded in the {@link MetadataIndex} before clients will
   * being reading from the new view using the new {@link IndexSegment}.
   *
   * @throws IOException
   * @throws ExecutionException
   * @throws InterruptedException
   * @todo test more complex merges.
   */
  public void test_mergeWithOverflow()
      throws IOException, InterruptedException, ExecutionException {

    /*
     * Register the index.
     */
    final String name = "testIndex";
    final UUID indexUUID = UUID.randomUUID();
    final IndexMetadata indexMetadata = new IndexMetadata(name, indexUUID);
    {

      // must support delete markers
      indexMetadata.setDeleteMarkers(true);

      // must be an index partition.
      indexMetadata.setPartitionMetadata(
          new LocalPartitionMetadata(
              0, // partitionId.
              -1, // not a move.
              new byte[] {}, // leftSeparator
              null, // rightSeparator
              new IResourceMetadata[] { //
                resourceManager.getLiveJournal().getResourceMetadata(), //
              }, //
              IndexPartitionCause.register(resourceManager)
              //                    ,"" // history
              ));

      // submit task to register the index and wait for it to complete.
      concurrencyManager
          .submit(new RegisterIndexTask(concurrencyManager, name, indexMetadata))
          .get();
    }

    /*
     * Populate the index with some data.
     */
    final BTree groundTruth =
        BTree.create(new SimpleMemoryRawStore(), new IndexMetadata(indexUUID));
    {
      final int nentries = 10;

      final byte[][] keys = new byte[nentries][];
      final byte[][] vals = new byte[nentries][];

      final Random r = new Random();

      for (int i = 0; i < nentries; i++) {

        keys[i] = TestKeyBuilder.asSortKey(i);

        vals[i] = new byte[4];

        r.nextBytes(vals[i]);

        groundTruth.insert(keys[i], vals[i]);
      }

      final IIndexProcedure proc =
          BatchInsertConstructor.RETURN_NO_VALUES.newInstance(
              indexMetadata, 0 /* fromIndex */, nentries /*toIndex*/, keys, vals);

      // submit the task and wait for it to complete.
      concurrencyManager
          .submit(new IndexProcedureTask(concurrencyManager, ITx.UNISOLATED, name, proc))
          .get();
    }

    /*
     * Force overflow causing an empty btree to be created for that index on
     * a new journal and the view definition in the new btree to be updated.
     */

    // createTime of the old journal.
    final long createTime0 = resourceManager.getLiveJournal().getRootBlockView().getCreateTime();

    // uuid of the old journal.
    final UUID uuid0 = resourceManager.getLiveJournal().getRootBlockView().getUUID();

    // force overflow onto a new journal.
    final OverflowMetadata overflowMetadata = resourceManager.doSynchronousOverflow();

    // nothing should have been copied to the new journal.
    assertEquals(0, overflowMetadata.getActionCount(OverflowActionEnum.Copy));

    // lookup the old journal again using its createTime.
    final AbstractJournal oldJournal = resourceManager.getJournal(createTime0);
    assertEquals("uuid", uuid0, oldJournal.getRootBlockView().getUUID());
    assertNotSame("closeTime", 0L, oldJournal.getRootBlockView().getCloseTime());

    // run merge task.
    final BuildResult result;
    {

      /*
       * Note: The task start time is a historical read on the final
       * committed state of the old journal. This means that the generated
       * index segment will have a createTime EQ to the lastCommitTime on
       * the old journal. This also means that it will have been generated
       * from a fused view of all data as of the final commit state of the
       * old journal.
       */
      //            final OverflowMetadata omd = new OverflowMetadata(resourceManager);

      final ViewMetadata vmd = overflowMetadata.getViewMetadata(name);

      // task to run.
      final CompactingMergeTask task = new CompactingMergeTask(vmd);

      try {

        // overflow must be disallowed as a task pre-condition.
        resourceManager.overflowAllowed.compareAndSet(true, false);

        /*
         * Submit task and await result (metadata describing the new
         * index segment).
         */
        result = concurrencyManager.submit(task).get();

      } finally {

        // re-enable overflow processing.
        resourceManager.overflowAllowed.set(true);
      }

      final IResourceMetadata segmentMetadata = result.segmentMetadata;

      if (log.isInfoEnabled()) log.info(segmentMetadata.toString());

      // verify index segment can be opened.
      resourceManager.openStore(segmentMetadata.getUUID());

      // Note: this assertion only works if we store the file path vs its basename.
      //            assertTrue(new File(segmentMetadata.getFile()).exists());

      // verify createTime == lastCommitTime on the old journal.
      assertEquals(
          "createTime",
          oldJournal.getRootBlockView().getLastCommitTime(),
          segmentMetadata.getCreateTime());

      // verify segment has all data in the groundTruth btree.
      {
        final IndexSegmentStore segStore =
            (IndexSegmentStore) resourceManager.openStore(segmentMetadata.getUUID());

        final IndexSegment seg = segStore.loadIndexSegment();

        AbstractBTreeTestCase.assertSameBTree(groundTruth, seg);
      }
    }

    /*
     * verify same data from ground truth and the new view (using btree
     * helper classes for this).
     */
    {
      final IIndex actual = resourceManager.getIndex(name, ITx.UNISOLATED);

      AbstractBTreeTestCase.assertSameBTree(groundTruth, actual);
    }
  }