public static void main(String[] args) { final TemporaryStore store = new TemporaryStore(); try { /* * Register the index. There are a lot of options for the B+Tree, * but you only need to specify the index name and the UUID for the * index. Each store can hold multiple named indices. */ { final IndexMetadata indexMetadata = new IndexMetadata("testIndex", UUID.randomUUID()); store.registerIndex(indexMetadata); } /* * Lookup the unisolated B+Tree. This is the mutable B+Tree view. * * While the temporary store does not differentiate between mutable * and read-only views, the Journal and the scale-out architecture * do. */ { final BTree btree = store.getIndex("testIndex", ITx.UNISOLATED); // lookup the tuple (not found). System.err.println("tuple: " + btree.lookup("hello")); // add a tuple btree.insert("hello", "world"); // lookup the tuple System.err.println("tuple: " + btree.lookup("hello")); // update the tuple btree.insert("hello", "again"); // lookup the new value System.err.println("tuple: " + btree.lookup("hello")); } } finally { // destroy the backing store. store.destroy(); } }
/** * Return a named index. The index will be isolated at the same level as this transaction. Changes * on the index will be made restart-safe iff the transaction successfully commits. * * @param name The name of the index. * @return The named index or <code>null</code> if no index is registered under that name. * @exception IllegalStateException if the transaction is not active. */ public ILocalBTreeView getIndex(final String name) { if (name == null) throw new IllegalArgumentException(); if (readOnly) { /* * Note: Access to the indices currently goes through the * IResourceManager interface for a read-only transaction. */ throw new UnsupportedOperationException(); } /* * @todo lock could be per index for higher concurrency rather than for * all indices which you might access through this tx. */ lock.lock(); try { if (!isActive()) { throw new IllegalStateException(NOT_ACTIVE); } /* * Test the cache - this is used so that we can recover the same * instance on each call within the same transaction. */ if (indices.containsKey(name)) { // Already defined. return indices.get(name); } final ILocalBTreeView index; /* * See if the index was registered as of the ground state used by * this transaction to isolated indices. * * Note: IResourceManager#getIndex(String name,long timestamp) calls * us when the timestamp identifies an active transaction so we MUST * NOT call that method ourselves! Hence there is some replication * of logic between that method and this one. */ final AbstractBTree[] sources = resourceManager.getIndexSources(name, readsOnCommitTime); // startTime); if (sources == null) { /* * The named index was not registered as of the transaction * ground state. */ if (log.isInfoEnabled()) log.info("No such index: " + name + ", startTime=" + startTime); return null; } if (!sources[0].getIndexMetadata().isIsolatable()) { throw new RuntimeException("Not isolatable: " + name); } /* * Isolate the named btree. */ // if (readOnly) { // // assert sources[0].isReadOnly(); // // if (sources.length == 1) { // // index = sources[0]; // // } else { // // index = new FusedView(sources); // // } // // } else { /* * Setup the view. The write set is always the first element in * the view. */ // the view definition. final AbstractBTree[] b = new AbstractBTree[sources.length + 1]; /* * Create the write set on a temporary store. * * Note: The BTree is NOT registered under a name so it can not * be discovered on the temporary store. This is fine since we * hold onto a hard reference to the BTree in [indices]. */ b[0] = BTree.create(getTemporaryStore(), sources[0].getIndexMetadata().clone()); System.arraycopy(sources, 0, b, 1, sources.length); // create view with isolated write set. index = new IsolatedFusedView(-startTime, b); // report event. ResourceManager.isolateIndex(startTime, name); // } indices.put(name, index); return index; } finally { lock.unlock(); } }
/** * Atomic update. * * @return <code>null</code> */ @Override protected Void doTask() throws Exception { updateEvent.start(); try { if (resourceManager.isOverflowAllowed()) throw new IllegalStateException(); final SegmentMetadata segmentMetadata = buildResult.segmentMetadata; if (INFO) log.info("Begin: name=" + getOnlyResource() + ", newSegment=" + segmentMetadata); /* * Open the unisolated B+Tree on the live journal that is * absorbing writes. We are going to update its index metadata. * * Note: I am using AbstractTask#getIndex(String name) so that * the concurrency control logic will notice the changes to the * BTree and cause it to be checkpointed if this task succeeds * normally. */ final ILocalBTreeView view = (ILocalBTreeView) getIndex(getOnlyResource()); // make sure that this is the same scale-out index. assertSameIndex(indexUUID, view.getMutableBTree()); if (view instanceof BTree) { /* * Note: there is an expectation that this is not a simple * BTree because this the build task is supposed to be * invoked after an overflow event, and that event should * have re-defined the view to include the BTree on the new * journal plus the historical view. * * One explanation for finding a simple view here is that * the view was a simple BTree on the old journal and the * data was copied from the old journal into the new journal * and then someone decided to do a build even through a * copy had already been done. However, this is not a very * good explanation since we try to avoid doing a build if * we have already done a copy! */ throw new RuntimeException( "View is only a B+Tree: name=" + buildResult.name + ", pmd=" + view.getIndexMetadata().getPartitionMetadata()); } // The live B+Tree. final BTree btree = view.getMutableBTree(); if (INFO) log.info( "src=" + getOnlyResource() + ",counter=" + view.getCounter().get() + ",checkpoint=" + btree.getCheckpoint()); assert btree != null : "Expecting index: " + getOnlyResource(); // clone the current metadata record for the live index. final IndexMetadata indexMetadata = btree.getIndexMetadata().clone(); /* * This is the index partition definition on the live index - * the one that will be replaced with a new view as the result * of this atomic update. */ final LocalPartitionMetadata currentpmd = indexMetadata.getPartitionMetadata(); // Check pre-conditions. final IResourceMetadata[] currentResources = currentpmd.getResources(); { if (currentpmd == null) { throw new IllegalStateException("Not an index partition: " + getOnlyResource()); } if (!currentResources[0].getUUID().equals(getJournal().getRootBlockView().getUUID())) { throw new IllegalStateException( "Expecting live journal to be the first resource: " + currentResources); } /* * Note: I have commented out a bunch of pre-condition tests * that are not valid for histories such as: * * history=create() register(0) split(0) * copy(entryCount=314) * * This case arises when there are not enough index entries * written on the journal after a split to warrant a build * so the buffered writes are just copied to the new * journal. The resources in the view are: * * 1. journal 2. segment * * And this update will replace the segment. */ // // the old journal's resource metadata. // final IResourceMetadata oldJournalMetadata = // oldResources[1]; // assert oldJournalMetadata != null; // assert oldJournalMetadata instanceof JournalMetadata : // "name=" // + getOnlyResource() + ", old pmd=" + oldpmd // + ", segmentMetadata=" + buildResult.segmentMetadata; // // // live journal must be newer. // assert journal.getRootBlockView().getCreateTime() > // oldJournalMetadata // .getCreateTime(); // new index segment build from a view that did not include // data from the live journal. assert segmentMetadata.getCreateTime() < getJournal().getRootBlockView().getFirstCommitTime() : "segment createTime LT journal 1st commit time" + ": segmentMetadata=" + segmentMetadata + ", journal: " + getJournal().getRootBlockView(); // if (oldResources.length == 3) { // // // the old index segment's resource metadata. // final IResourceMetadata oldSegmentMetadata = // oldResources[2]; // assert oldSegmentMetadata != null; // assert oldSegmentMetadata instanceof SegmentMetadata; // // assert oldSegmentMetadata.getCreateTime() <= // oldJournalMetadata // .getCreateTime(); // // } } // new view definition. final IResourceMetadata[] newResources = new IResourceMetadata[] { // the live journal. getJournal().getResourceMetadata(), // the newly built index segment. segmentMetadata }; // describe the index partition. indexMetadata.setPartitionMetadata( new LocalPartitionMetadata( // currentpmd.getPartitionId(), // currentpmd.getSourcePartitionId(), // currentpmd.getLeftSeparatorKey(), // currentpmd.getRightSeparatorKey(), // newResources, // currentpmd.getIndexPartitionCause() // currentpmd.getHistory() // + OverflowActionEnum.Merge// // + "(lastCommitTime=" // + segmentMetadata.getCreateTime()// // + ",btreeEntryCount=" // + btree.getEntryCount()// // + ",segmentEntryCount=" // + buildResult.builder.getCheckpoint().nentries// // + ",segment=" // + segmentMetadata.getUUID()// // + ",counter=" // + btree.getCounter().get()// // + ",oldResources=" // + Arrays.toString(currentResources) + ") " )); // update the metadata associated with the btree btree.setIndexMetadata(indexMetadata); if (INFO) log.info( "Updated view: name=" + getOnlyResource() + ", pmd=" + indexMetadata.getPartitionMetadata()); /* * Verify that the btree recognizes that it needs to be * checkpointed. * * Note: The atomic commit point is when this task commits. */ assert btree.needsCheckpoint(); // btree.writeCheckpoint(); // { // final long id0 = btree.getCounter().get(); // final long pid = id0 >> 32; // final long mask = 0xffffffffL; // final int ctr = (int) (id0 & mask); // log.warn("name="+getOnlyResource()+", counter="+id0+", pid="+pid+", // ctr="+ctr); // } // notify successful index partition build. resourceManager.overflowCounters.indexPartitionMergeCounter.incrementAndGet(); return null; } finally { updateEvent.end(); } } // doTask()
/** * Test generates an {@link IndexSegment} from a (typically historical) fused view of an index * partition. The resulting {@link IndexSegment} is a complete replacement for the historical view * but does not possess any deleted index entries. Typically the {@link IndexSegment} will be used * to replace the current index partition definition such that the resources that were the inputs * to the view from which the {@link IndexSegment} was built are no longer required to read on * that view. This change needs to be recorded in the {@link MetadataIndex} before clients will * being reading from the new view using the new {@link IndexSegment}. * * @throws IOException * @throws ExecutionException * @throws InterruptedException * @todo test more complex merges. */ public void test_mergeWithOverflow() throws IOException, InterruptedException, ExecutionException { /* * Register the index. */ final String name = "testIndex"; final UUID indexUUID = UUID.randomUUID(); final IndexMetadata indexMetadata = new IndexMetadata(name, indexUUID); { // must support delete markers indexMetadata.setDeleteMarkers(true); // must be an index partition. indexMetadata.setPartitionMetadata( new LocalPartitionMetadata( 0, // partitionId. -1, // not a move. new byte[] {}, // leftSeparator null, // rightSeparator new IResourceMetadata[] { // resourceManager.getLiveJournal().getResourceMetadata(), // }, // IndexPartitionCause.register(resourceManager) // ,"" // history )); // submit task to register the index and wait for it to complete. concurrencyManager .submit(new RegisterIndexTask(concurrencyManager, name, indexMetadata)) .get(); } /* * Populate the index with some data. */ final BTree groundTruth = BTree.create(new SimpleMemoryRawStore(), new IndexMetadata(indexUUID)); { final int nentries = 10; final byte[][] keys = new byte[nentries][]; final byte[][] vals = new byte[nentries][]; final Random r = new Random(); for (int i = 0; i < nentries; i++) { keys[i] = TestKeyBuilder.asSortKey(i); vals[i] = new byte[4]; r.nextBytes(vals[i]); groundTruth.insert(keys[i], vals[i]); } final IIndexProcedure proc = BatchInsertConstructor.RETURN_NO_VALUES.newInstance( indexMetadata, 0 /* fromIndex */, nentries /*toIndex*/, keys, vals); // submit the task and wait for it to complete. concurrencyManager .submit(new IndexProcedureTask(concurrencyManager, ITx.UNISOLATED, name, proc)) .get(); } /* * Force overflow causing an empty btree to be created for that index on * a new journal and the view definition in the new btree to be updated. */ // createTime of the old journal. final long createTime0 = resourceManager.getLiveJournal().getRootBlockView().getCreateTime(); // uuid of the old journal. final UUID uuid0 = resourceManager.getLiveJournal().getRootBlockView().getUUID(); // force overflow onto a new journal. final OverflowMetadata overflowMetadata = resourceManager.doSynchronousOverflow(); // nothing should have been copied to the new journal. assertEquals(0, overflowMetadata.getActionCount(OverflowActionEnum.Copy)); // lookup the old journal again using its createTime. final AbstractJournal oldJournal = resourceManager.getJournal(createTime0); assertEquals("uuid", uuid0, oldJournal.getRootBlockView().getUUID()); assertNotSame("closeTime", 0L, oldJournal.getRootBlockView().getCloseTime()); // run merge task. final BuildResult result; { /* * Note: The task start time is a historical read on the final * committed state of the old journal. This means that the generated * index segment will have a createTime EQ to the lastCommitTime on * the old journal. This also means that it will have been generated * from a fused view of all data as of the final commit state of the * old journal. */ // final OverflowMetadata omd = new OverflowMetadata(resourceManager); final ViewMetadata vmd = overflowMetadata.getViewMetadata(name); // task to run. final CompactingMergeTask task = new CompactingMergeTask(vmd); try { // overflow must be disallowed as a task pre-condition. resourceManager.overflowAllowed.compareAndSet(true, false); /* * Submit task and await result (metadata describing the new * index segment). */ result = concurrencyManager.submit(task).get(); } finally { // re-enable overflow processing. resourceManager.overflowAllowed.set(true); } final IResourceMetadata segmentMetadata = result.segmentMetadata; if (log.isInfoEnabled()) log.info(segmentMetadata.toString()); // verify index segment can be opened. resourceManager.openStore(segmentMetadata.getUUID()); // Note: this assertion only works if we store the file path vs its basename. // assertTrue(new File(segmentMetadata.getFile()).exists()); // verify createTime == lastCommitTime on the old journal. assertEquals( "createTime", oldJournal.getRootBlockView().getLastCommitTime(), segmentMetadata.getCreateTime()); // verify segment has all data in the groundTruth btree. { final IndexSegmentStore segStore = (IndexSegmentStore) resourceManager.openStore(segmentMetadata.getUUID()); final IndexSegment seg = segStore.loadIndexSegment(); AbstractBTreeTestCase.assertSameBTree(groundTruth, seg); } } /* * verify same data from ground truth and the new view (using btree * helper classes for this). */ { final IIndex actual = resourceManager.getIndex(name, ITx.UNISOLATED); AbstractBTreeTestCase.assertSameBTree(groundTruth, actual); } }
/** * Test correct detection and resolution of a write-write conflict. An index is registered with an * {@link IConflictResolver} and the journal is committed. Two transactions (tx1, tx2) are then * started. Both transactions write a value under the same key. tx1 prepares and commits. tx2 * attempts to prepare, and the test verifies that the conflict resolver is invoked, that it may * resolve the conflict causing validation to succeed and that the value determined by conflict * resolution is made persistent when tx2 commits. */ public void test_writeWriteConflict_conflictIsResolved() { final Journal journal = new Journal(getProperties()); try { final String name = "abc"; final byte[] k1 = new byte[] {1}; final byte[] v1a = new byte[] {1}; final byte[] v1b = new byte[] {2}; final byte[] v1c = new byte[] {3}; { /* * register an index with a conflict resolver and commit the * journal. */ final IndexMetadata metadata = new IndexMetadata(name, UUID.randomUUID()); metadata.setIsolatable(true); metadata.setConflictResolver(new SingleValueConflictResolver(k1, v1c)); journal.registerIndex(name, BTree.create(journal, metadata)); journal.commit(); } /* * Create two transactions. */ final long tx1 = journal.newTx(ITx.UNISOLATED); final long tx2 = journal.newTx(ITx.UNISOLATED); /* * Write a value under the same key on the same index in both * transactions. */ journal.getIndex(name, tx1).insert(k1, v1a); journal.getIndex(name, tx2).insert(k1, v1b); journal.commit(tx1); /* * verify that the value from tx1 is found under the key on the * unisolated index. */ assertEquals(v1a, (byte[]) journal.getIndex(name).lookup(k1)); journal.commit(tx2); /* * verify that the resolved value is found under the key on the * unisolated index. */ assertEquals(v1c, (byte[]) journal.getIndex(name).lookup(k1)); } finally { journal.destroy(); } }
/** * Test correct detection of a write-write conflict. An index is registered and the journal is * committed. Two transactions (tx1, tx2) are then started. Both transactions write a value under * the same key. tx1 prepares and commits. tx2 attempts to prepare, and the test verifies that a * {@link ValidationError} is reported. */ public void test_writeWriteConflict_correctDetection() { final Journal journal = new Journal(getProperties()); try { String name = "abc"; final byte[] k1 = new byte[] {1}; final byte[] v1a = new byte[] {1}; final byte[] v1b = new byte[] {2}; { /* * register an index and commit the journal. */ IndexMetadata metadata = new IndexMetadata(name, UUID.randomUUID()); metadata.setIsolatable(true); // Note: No conflict resolver. journal.registerIndex(name, BTree.create(journal, metadata)); journal.commit(); } /* * Create two transactions. */ final long tx1 = journal.newTx(ITx.UNISOLATED); final long tx2 = journal.newTx(ITx.UNISOLATED); /* * Write a value under the same key on the same index in both * transactions. */ journal.getIndex(name, tx1).insert(k1, v1a); journal.getIndex(name, tx2).insert(k1, v1b); journal.commit(tx1); /* * verify that the value from tx1 is found under the key on the * unisolated index. */ assertEquals(v1a, (byte[]) journal.getIndex(name).lookup(k1)); // final ITx tmp = journal.getTx(tx2); try { journal.commit(tx2); fail("Expecting: " + ValidationError.class); } catch (ValidationError ex) { if (log.isInfoEnabled()) log.info("Ignoring expected exception: " + ex); // assertTrue(tmp.isAborted()); } } finally { journal.destroy(); } }