/** * Atomic update. * * @return <code>null</code> */ @Override protected Void doTask() throws Exception { updateEvent.start(); try { if (resourceManager.isOverflowAllowed()) throw new IllegalStateException(); final SegmentMetadata segmentMetadata = buildResult.segmentMetadata; if (INFO) log.info("Begin: name=" + getOnlyResource() + ", newSegment=" + segmentMetadata); /* * Open the unisolated B+Tree on the live journal that is * absorbing writes. We are going to update its index metadata. * * Note: I am using AbstractTask#getIndex(String name) so that * the concurrency control logic will notice the changes to the * BTree and cause it to be checkpointed if this task succeeds * normally. */ final ILocalBTreeView view = (ILocalBTreeView) getIndex(getOnlyResource()); // make sure that this is the same scale-out index. assertSameIndex(indexUUID, view.getMutableBTree()); if (view instanceof BTree) { /* * Note: there is an expectation that this is not a simple * BTree because this the build task is supposed to be * invoked after an overflow event, and that event should * have re-defined the view to include the BTree on the new * journal plus the historical view. * * One explanation for finding a simple view here is that * the view was a simple BTree on the old journal and the * data was copied from the old journal into the new journal * and then someone decided to do a build even through a * copy had already been done. However, this is not a very * good explanation since we try to avoid doing a build if * we have already done a copy! */ throw new RuntimeException( "View is only a B+Tree: name=" + buildResult.name + ", pmd=" + view.getIndexMetadata().getPartitionMetadata()); } // The live B+Tree. final BTree btree = view.getMutableBTree(); if (INFO) log.info( "src=" + getOnlyResource() + ",counter=" + view.getCounter().get() + ",checkpoint=" + btree.getCheckpoint()); assert btree != null : "Expecting index: " + getOnlyResource(); // clone the current metadata record for the live index. final IndexMetadata indexMetadata = btree.getIndexMetadata().clone(); /* * This is the index partition definition on the live index - * the one that will be replaced with a new view as the result * of this atomic update. */ final LocalPartitionMetadata currentpmd = indexMetadata.getPartitionMetadata(); // Check pre-conditions. final IResourceMetadata[] currentResources = currentpmd.getResources(); { if (currentpmd == null) { throw new IllegalStateException("Not an index partition: " + getOnlyResource()); } if (!currentResources[0].getUUID().equals(getJournal().getRootBlockView().getUUID())) { throw new IllegalStateException( "Expecting live journal to be the first resource: " + currentResources); } /* * Note: I have commented out a bunch of pre-condition tests * that are not valid for histories such as: * * history=create() register(0) split(0) * copy(entryCount=314) * * This case arises when there are not enough index entries * written on the journal after a split to warrant a build * so the buffered writes are just copied to the new * journal. The resources in the view are: * * 1. journal 2. segment * * And this update will replace the segment. */ // // the old journal's resource metadata. // final IResourceMetadata oldJournalMetadata = // oldResources[1]; // assert oldJournalMetadata != null; // assert oldJournalMetadata instanceof JournalMetadata : // "name=" // + getOnlyResource() + ", old pmd=" + oldpmd // + ", segmentMetadata=" + buildResult.segmentMetadata; // // // live journal must be newer. // assert journal.getRootBlockView().getCreateTime() > // oldJournalMetadata // .getCreateTime(); // new index segment build from a view that did not include // data from the live journal. assert segmentMetadata.getCreateTime() < getJournal().getRootBlockView().getFirstCommitTime() : "segment createTime LT journal 1st commit time" + ": segmentMetadata=" + segmentMetadata + ", journal: " + getJournal().getRootBlockView(); // if (oldResources.length == 3) { // // // the old index segment's resource metadata. // final IResourceMetadata oldSegmentMetadata = // oldResources[2]; // assert oldSegmentMetadata != null; // assert oldSegmentMetadata instanceof SegmentMetadata; // // assert oldSegmentMetadata.getCreateTime() <= // oldJournalMetadata // .getCreateTime(); // // } } // new view definition. final IResourceMetadata[] newResources = new IResourceMetadata[] { // the live journal. getJournal().getResourceMetadata(), // the newly built index segment. segmentMetadata }; // describe the index partition. indexMetadata.setPartitionMetadata( new LocalPartitionMetadata( // currentpmd.getPartitionId(), // currentpmd.getSourcePartitionId(), // currentpmd.getLeftSeparatorKey(), // currentpmd.getRightSeparatorKey(), // newResources, // currentpmd.getIndexPartitionCause() // currentpmd.getHistory() // + OverflowActionEnum.Merge// // + "(lastCommitTime=" // + segmentMetadata.getCreateTime()// // + ",btreeEntryCount=" // + btree.getEntryCount()// // + ",segmentEntryCount=" // + buildResult.builder.getCheckpoint().nentries// // + ",segment=" // + segmentMetadata.getUUID()// // + ",counter=" // + btree.getCounter().get()// // + ",oldResources=" // + Arrays.toString(currentResources) + ") " )); // update the metadata associated with the btree btree.setIndexMetadata(indexMetadata); if (INFO) log.info( "Updated view: name=" + getOnlyResource() + ", pmd=" + indexMetadata.getPartitionMetadata()); /* * Verify that the btree recognizes that it needs to be * checkpointed. * * Note: The atomic commit point is when this task commits. */ assert btree.needsCheckpoint(); // btree.writeCheckpoint(); // { // final long id0 = btree.getCounter().get(); // final long pid = id0 >> 32; // final long mask = 0xffffffffL; // final int ctr = (int) (id0 & mask); // log.warn("name="+getOnlyResource()+", counter="+id0+", pid="+pid+", // ctr="+ctr); // } // notify successful index partition build. resourceManager.overflowCounters.indexPartitionMergeCounter.incrementAndGet(); return null; } finally { updateEvent.end(); } } // doTask()
/** * Test generates an {@link IndexSegment} from a (typically historical) fused view of an index * partition. The resulting {@link IndexSegment} is a complete replacement for the historical view * but does not possess any deleted index entries. Typically the {@link IndexSegment} will be used * to replace the current index partition definition such that the resources that were the inputs * to the view from which the {@link IndexSegment} was built are no longer required to read on * that view. This change needs to be recorded in the {@link MetadataIndex} before clients will * being reading from the new view using the new {@link IndexSegment}. * * @throws IOException * @throws ExecutionException * @throws InterruptedException * @todo test more complex merges. */ public void test_mergeWithOverflow() throws IOException, InterruptedException, ExecutionException { /* * Register the index. */ final String name = "testIndex"; final UUID indexUUID = UUID.randomUUID(); final IndexMetadata indexMetadata = new IndexMetadata(name, indexUUID); { // must support delete markers indexMetadata.setDeleteMarkers(true); // must be an index partition. indexMetadata.setPartitionMetadata( new LocalPartitionMetadata( 0, // partitionId. -1, // not a move. new byte[] {}, // leftSeparator null, // rightSeparator new IResourceMetadata[] { // resourceManager.getLiveJournal().getResourceMetadata(), // }, // IndexPartitionCause.register(resourceManager) // ,"" // history )); // submit task to register the index and wait for it to complete. concurrencyManager .submit(new RegisterIndexTask(concurrencyManager, name, indexMetadata)) .get(); } /* * Populate the index with some data. */ final BTree groundTruth = BTree.create(new SimpleMemoryRawStore(), new IndexMetadata(indexUUID)); { final int nentries = 10; final byte[][] keys = new byte[nentries][]; final byte[][] vals = new byte[nentries][]; final Random r = new Random(); for (int i = 0; i < nentries; i++) { keys[i] = TestKeyBuilder.asSortKey(i); vals[i] = new byte[4]; r.nextBytes(vals[i]); groundTruth.insert(keys[i], vals[i]); } final IIndexProcedure proc = BatchInsertConstructor.RETURN_NO_VALUES.newInstance( indexMetadata, 0 /* fromIndex */, nentries /*toIndex*/, keys, vals); // submit the task and wait for it to complete. concurrencyManager .submit(new IndexProcedureTask(concurrencyManager, ITx.UNISOLATED, name, proc)) .get(); } /* * Force overflow causing an empty btree to be created for that index on * a new journal and the view definition in the new btree to be updated. */ // createTime of the old journal. final long createTime0 = resourceManager.getLiveJournal().getRootBlockView().getCreateTime(); // uuid of the old journal. final UUID uuid0 = resourceManager.getLiveJournal().getRootBlockView().getUUID(); // force overflow onto a new journal. final OverflowMetadata overflowMetadata = resourceManager.doSynchronousOverflow(); // nothing should have been copied to the new journal. assertEquals(0, overflowMetadata.getActionCount(OverflowActionEnum.Copy)); // lookup the old journal again using its createTime. final AbstractJournal oldJournal = resourceManager.getJournal(createTime0); assertEquals("uuid", uuid0, oldJournal.getRootBlockView().getUUID()); assertNotSame("closeTime", 0L, oldJournal.getRootBlockView().getCloseTime()); // run merge task. final BuildResult result; { /* * Note: The task start time is a historical read on the final * committed state of the old journal. This means that the generated * index segment will have a createTime EQ to the lastCommitTime on * the old journal. This also means that it will have been generated * from a fused view of all data as of the final commit state of the * old journal. */ // final OverflowMetadata omd = new OverflowMetadata(resourceManager); final ViewMetadata vmd = overflowMetadata.getViewMetadata(name); // task to run. final CompactingMergeTask task = new CompactingMergeTask(vmd); try { // overflow must be disallowed as a task pre-condition. resourceManager.overflowAllowed.compareAndSet(true, false); /* * Submit task and await result (metadata describing the new * index segment). */ result = concurrencyManager.submit(task).get(); } finally { // re-enable overflow processing. resourceManager.overflowAllowed.set(true); } final IResourceMetadata segmentMetadata = result.segmentMetadata; if (log.isInfoEnabled()) log.info(segmentMetadata.toString()); // verify index segment can be opened. resourceManager.openStore(segmentMetadata.getUUID()); // Note: this assertion only works if we store the file path vs its basename. // assertTrue(new File(segmentMetadata.getFile()).exists()); // verify createTime == lastCommitTime on the old journal. assertEquals( "createTime", oldJournal.getRootBlockView().getLastCommitTime(), segmentMetadata.getCreateTime()); // verify segment has all data in the groundTruth btree. { final IndexSegmentStore segStore = (IndexSegmentStore) resourceManager.openStore(segmentMetadata.getUUID()); final IndexSegment seg = segStore.loadIndexSegment(); AbstractBTreeTestCase.assertSameBTree(groundTruth, seg); } } /* * verify same data from ground truth and the new view (using btree * helper classes for this). */ { final IIndex actual = resourceManager.getIndex(name, ITx.UNISOLATED); AbstractBTreeTestCase.assertSameBTree(groundTruth, actual); } }