public void processGetVersions(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } int nVersions = params.getInt("getVersions", -1); if (nVersions == -1) return; String sync = params.get("sync"); if (sync != null) { processSync(rb, nVersions, sync); return; } UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog(); if (ulog == null) return; UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates(); try { rb.rsp.add("versions", recentUpdates.getVersions(nVersions)); } finally { recentUpdates.close(); // cache this somehow? } }
public void processGetUpdates(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } String versionsStr = params.get("getUpdates"); if (versionsStr == null) return; UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog(); if (ulog == null) return; List<String> versions = StrUtils.splitSmart(versionsStr, ",", true); List<Object> updates = new ArrayList<Object>(versions.size()); long minVersion = Long.MAX_VALUE; // TODO: get this from cache instead of rebuilding? UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates(); try { for (String versionStr : versions) { long version = Long.parseLong(versionStr); try { Object o = recentUpdates.lookup(version); if (o == null) continue; if (version > 0) { minVersion = Math.min(minVersion, version); } // TODO: do any kind of validation here? updates.add(o); } catch (SolrException e) { log.warn("Exception reading log for updates", e); } catch (ClassCastException e) { log.warn("Exception reading log for updates", e); } } // Must return all delete-by-query commands that occur after the first add requested // since they may apply. updates.addAll(recentUpdates.getDeleteByQuery(minVersion)); rb.rsp.add("updates", updates); } finally { recentUpdates.close(); // cache this somehow? } }
@Override public void processCommit(CommitUpdateCommand cmd) throws IOException { if (zkEnabled) { zkCheck(); } if (vinfo != null) { vinfo.lockForUpdate(); } try { if (ulog == null || ulog.getState() == UpdateLog.State.ACTIVE || (cmd.getFlags() & UpdateCommand.REPLAY) != 0) { super.processCommit(cmd); } else { log.info( "Ignoring commit while not ACTIVE - state: " + ulog.getState() + " replay:" + (cmd.getFlags() & UpdateCommand.REPLAY)); } } finally { if (vinfo != null) { vinfo.unlockForUpdate(); } } // TODO: we should consider this? commit everyone in the current collection if (zkEnabled) { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); if (!req.getParams().getBool(COMMIT_END_POINT, false)) { params.set(COMMIT_END_POINT, true); String nodeName = req.getCore().getCoreDescriptor().getCoreContainer().getZkController().getNodeName(); String shardZkNodeName = nodeName + "_" + req.getCore().getName(); List<Node> nodes = getCollectionUrls( req, req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName(), shardZkNodeName); if (nodes != null) { cmdDistrib.distribCommit(cmd, nodes, params); finish(); } } } }
public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes) throws IOException { SolrInputDocument sid = null; RefCounted<SolrIndexSearcher> searcherHolder = null; try { SolrIndexSearcher searcher = null; UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); if (ulog != null) { Object o = ulog.lookup(idBytes); if (o != null) { // should currently be a List<Oper,Ver,Doc/Id> List entry = (List) o; assert entry.size() >= 3; int oper = (Integer) entry.get(0) & UpdateLog.OPERATION_MASK; switch (oper) { case UpdateLog.ADD: sid = (SolrInputDocument) entry.get(entry.size() - 1); break; case UpdateLog.DELETE: return null; default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper); } } } if (sid == null) { // didn't find it in the update log, so it should be in the newest searcher opened if (searcher == null) { searcherHolder = core.getRealtimeSearcher(); searcher = searcherHolder.get(); } // SolrCore.verbose("RealTimeGet using searcher ", searcher); SchemaField idField = core.getLatestSchema().getUniqueKeyField(); int docid = searcher.getFirstMatch(new Term(idField.getName(), idBytes)); if (docid < 0) return null; StoredDocument luceneDocument = searcher.doc(docid); sid = toSolrInputDocument(luceneDocument, core.getLatestSchema()); } } finally { if (searcherHolder != null) { searcherHolder.decref(); } } return sid; }
public DistributedUpdateProcessor( SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { super(next); this.rsp = rsp; this.next = next; this.idField = req.getSchema().getUniqueKeyField(); // version init this.updateHandler = req.getCore().getUpdateHandler(); this.ulog = updateHandler.getUpdateLog(); this.vinfo = ulog == null ? null : ulog.getVersionInfo(); versionsStored = this.vinfo != null && this.vinfo.getVersionField() != null; returnVersions = req.getParams().getBool(UpdateParams.VERSIONS, false); // TODO: better way to get the response, or pass back info to it? SolrRequestInfo reqInfo = returnVersions ? SolrRequestInfo.getRequestInfo() : null; this.req = req; CoreDescriptor coreDesc = req.getCore().getCoreDescriptor(); this.zkEnabled = coreDesc.getCoreContainer().isZooKeeperAware(); zkController = req.getCore().getCoreDescriptor().getCoreContainer().getZkController(); if (zkEnabled) { numNodes = zkController.getZkStateReader().getClusterState().getLiveNodes().size(); cmdDistrib = new SolrCmdDistributor( numNodes, coreDesc.getCoreContainer().getZkController().getCmdDistribExecutor()); } // this.rsp = reqInfo != null ? reqInfo.getRsp() : null; cloudDesc = coreDesc.getCloudDescriptor(); if (cloudDesc != null) { collection = cloudDesc.getCollectionName(); } }
@Test public void testBufferingFlags() throws Exception { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; SolrQueryRequest req = req(); UpdateHandler uhandler = req.getCore().getUpdateHandler(); UpdateLog ulog = uhandler.getUpdateLog(); try { clearIndex(); assertU(commit()); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); ulog.bufferUpdates(); // simulate updates from a leader updateJ( jsonAdd(sdoc("id", "Q1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "Q2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "Q3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); req.close(); h.close(); createCore(); req = req(); uhandler = req.getCore().getUpdateHandler(); ulog = uhandler.getUpdateLog(); logReplayFinish.acquire(); // wait for replay to finish assertTrue( (ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0); // since we died while buffering, we should see this last // // Try again to ensure that the previous log replay didn't wipe out our flags // req.close(); h.close(); createCore(); req = req(); uhandler = req.getCore().getUpdateHandler(); ulog = uhandler.getUpdateLog(); assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) != 0); // now do some normal non-buffered adds updateJ( jsonAdd(sdoc("id", "Q4", "_version_", "114")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "Q5", "_version_", "115")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "Q6", "_version_", "116")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertU(commit()); req.close(); h.close(); createCore(); req = req(); uhandler = req.getCore().getUpdateHandler(); ulog = uhandler.getUpdateLog(); assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0); ulog.bufferUpdates(); // simulate receiving no updates ulog.applyBufferedUpdates(); updateJ( jsonAdd(sdoc("id", "Q7", "_version_", "117")), params( DISTRIB_UPDATE_PARAM, FROM_LEADER)); // do another add to make sure flags are back to normal req.close(); h.close(); createCore(); req = req(); uhandler = req.getCore().getUpdateHandler(); ulog = uhandler.getUpdateLog(); assertTrue((ulog.getStartingOperation() & UpdateLog.FLAG_GAP) == 0); // check flags on Q7 logReplayFinish.acquire(); assertEquals( UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; req().close(); } }
@Test @Ignore("HDFS-3107: no truncate support yet") public void testDropBuffered() throws Exception { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = new Runnable() { @Override public void run() { try { assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } } }; UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; SolrQueryRequest req = req(); UpdateHandler uhandler = req.getCore().getUpdateHandler(); UpdateLog ulog = uhandler.getUpdateLog(); try { clearIndex(); assertU(commit()); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); Future<UpdateLog.RecoveryInfo> rinfoFuture = ulog.applyBufferedUpdates(); assertTrue(rinfoFuture == null); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); // simulate updates from a leader updateJ( jsonAdd(sdoc("id", "C1", "_version_", "101")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C2", "_version_", "102")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C3", "_version_", "103")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertTrue(ulog.dropBufferedUpdates()); ulog.bufferUpdates(); updateJ( jsonAdd(sdoc("id", "C4", "_version_", "104")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C5", "_version_", "105")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); logReplay.release(1000); rinfoFuture = ulog.applyBufferedUpdates(); UpdateLog.RecoveryInfo rinfo = rinfoFuture.get(); assertEquals(2, rinfo.adds); assertJQ(req("qt", "/get", "getVersions", "2"), "=={'versions':[105,104]}"); // this time add some docs first before buffering starts (so tlog won't be at pos 0) updateJ( jsonAdd(sdoc("id", "C100", "_version_", "200")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C101", "_version_", "201")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); ulog.bufferUpdates(); updateJ( jsonAdd(sdoc("id", "C103", "_version_", "203")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C104", "_version_", "204")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertTrue(ulog.dropBufferedUpdates()); ulog.bufferUpdates(); updateJ( jsonAdd(sdoc("id", "C105", "_version_", "205")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C106", "_version_", "206")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); rinfoFuture = ulog.applyBufferedUpdates(); rinfo = rinfoFuture.get(); assertEquals(2, rinfo.adds); assertJQ( req("q", "*:*", "sort", "_version_ asc", "fl", "id,_version_"), "/response/docs==[" + "{'id':'C4','_version_':104}" + ",{'id':'C5','_version_':105}" + ",{'id':'C100','_version_':200}" + ",{'id':'C101','_version_':201}" + ",{'id':'C105','_version_':205}" + ",{'id':'C106','_version_':206}" + "]"); assertJQ(req("qt", "/get", "getVersions", "6"), "=={'versions':[206,205,201,200,105,104]}"); ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); updateJ( jsonAdd(sdoc("id", "C301", "_version_", "998")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C302", "_version_", "999")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertTrue(ulog.dropBufferedUpdates()); // make sure we can overwrite with a lower version // TODO: is this functionality needed? updateJ( jsonAdd(sdoc("id", "C301", "_version_", "301")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "C302", "_version_", "302")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertU(commit()); assertJQ(req("qt", "/get", "getVersions", "2"), "=={'versions':[302,301]}"); assertJQ( req("q", "*:*", "sort", "_version_ desc", "fl", "id,_version_", "rows", "2"), "/response/docs==[" + "{'id':'C302','_version_':302}" + ",{'id':'C301','_version_':301}" + "]"); updateJ( jsonAdd(sdoc("id", "C2", "_version_", "302")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); assertEquals( UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; req().close(); } }
@Test public void testBuffering() throws Exception { DirectUpdateHandler2.commitOnClose = false; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = new Runnable() { @Override public void run() { try { assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } } }; UpdateLog.testing_logReplayFinishHook = new Runnable() { @Override public void run() { logReplayFinish.release(); } }; SolrQueryRequest req = req(); UpdateHandler uhandler = req.getCore().getUpdateHandler(); UpdateLog ulog = uhandler.getUpdateLog(); try { clearIndex(); assertU(commit()); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); Future<UpdateLog.RecoveryInfo> rinfoFuture = ulog.applyBufferedUpdates(); assertTrue(rinfoFuture == null); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); // simulate updates from a leader updateJ( jsonAdd(sdoc("id", "B1", "_version_", "1010")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "B11", "_version_", "1015")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonDelQ("id:B1 id:B11 id:B2 id:B3"), params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-1017")); updateJ( jsonAdd(sdoc("id", "B2", "_version_", "1020")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "B3", "_version_", "1030")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); deleteAndGetVersion("B1", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-2010")); assertJQ( req("qt", "/get", "getVersions", "6"), "=={'versions':[-2010,1030,1020,-1017,1015,1010]}"); assertU(commit()); assertJQ( req("qt", "/get", "getVersions", "6"), "=={'versions':[-2010,1030,1020,-1017,1015,1010]}"); // updates should be buffered, so we should not see any results yet. assertJQ(req("q", "*:*"), "/response/numFound==0"); // real-time get should also not show anything (this could change in the future, // but it's currently used for validating version numbers too, so it would // be bad for updates to be visible if we're just buffering. assertJQ(req("qt", "/get", "id", "B3"), "=={'doc':null}"); rinfoFuture = ulog.applyBufferedUpdates(); assertTrue(rinfoFuture != null); assertEquals(UpdateLog.State.APPLYING_BUFFERED, ulog.getState()); logReplay.release(1000); UpdateLog.RecoveryInfo rinfo = rinfoFuture.get(); assertEquals(UpdateLog.State.ACTIVE, ulog.getState()); assertJQ( req("qt", "/get", "getVersions", "6"), "=={'versions':[-2010,1030,1020,-1017,1015,1010]}"); assertJQ(req("q", "*:*"), "/response/numFound==2"); // move back to recovering ulog.bufferUpdates(); assertEquals(UpdateLog.State.BUFFERING, ulog.getState()); Long ver = getVer(req("qt", "/get", "id", "B3")); assertEquals(1030L, ver.longValue()); // add a reordered doc that shouldn't overwrite one in the index updateJ( jsonAdd(sdoc("id", "B3", "_version_", "3")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); // reorder two buffered updates updateJ( jsonAdd(sdoc("id", "B4", "_version_", "1040")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); deleteAndGetVersion( "B4", params( DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-940")); // this update should not take affect updateJ( jsonAdd(sdoc("id", "B6", "_version_", "1060")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "B5", "_version_", "1050")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); updateJ( jsonAdd(sdoc("id", "B8", "_version_", "1080")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); // test that delete by query is at least buffered along with everything else so it will delete // the // currently buffered id:8 (even if it doesn't currently support versioning) updateJ( "{\"delete\": { \"query\":\"id:B2 OR id:B8\" }}", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-3000")); assertJQ( req("qt", "/get", "getVersions", "13"), "=={'versions':[-3000,1080,1050,1060,-940,1040,3,-2010,1030,1020,-1017,1015,1010]}" // the // "3" // appears because versions aren't checked while buffering ); logReplay.drainPermits(); rinfoFuture = ulog.applyBufferedUpdates(); assertTrue(rinfoFuture != null); assertEquals(UpdateLog.State.APPLYING_BUFFERED, ulog.getState()); // apply a single update logReplay.release(1); // now add another update updateJ( jsonAdd(sdoc("id", "B7", "_version_", "1070")), params(DISTRIB_UPDATE_PARAM, FROM_LEADER)); // a reordered update that should be dropped deleteAndGetVersion("B5", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-950")); deleteAndGetVersion("B6", params(DISTRIB_UPDATE_PARAM, FROM_LEADER, "_version_", "-2060")); logReplay.release(1000); UpdateLog.RecoveryInfo recInfo = rinfoFuture.get(); assertJQ( req("q", "*:*", "sort", "id asc", "fl", "id,_version_"), "/response/docs==[" + "{'id':'B3','_version_':1030}" + ",{'id':'B4','_version_':1040}" + ",{'id':'B5','_version_':1050}" + ",{'id':'B7','_version_':1070}" + "]"); assertEquals(1, recInfo.deleteByQuery); assertEquals( UpdateLog.State.ACTIVE, ulog.getState()); // leave each test method in a good state } finally { DirectUpdateHandler2.commitOnClose = true; UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; req().close(); } }
private boolean versionDelete(DeleteUpdateCommand cmd) throws IOException { BytesRef idBytes = cmd.getIndexedId(); if (vinfo == null || idBytes == null) { super.processDelete(cmd); return false; } // This is only the hash for the bucket, and must be based only on the uniqueKey (i.e. do not // use a pluggable hash here) int bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0); // at this point, there is an update we need to try and apply. // we may or may not be the leader. // Find the version long versionOnUpdate = cmd.getVersion(); if (versionOnUpdate == 0) { String versionOnUpdateS = req.getParams().get(VERSION_FIELD); versionOnUpdate = versionOnUpdateS == null ? 0 : Long.parseLong(versionOnUpdateS); } long signedVersionOnUpdate = versionOnUpdate; versionOnUpdate = Math.abs(versionOnUpdate); // normalize to positive version boolean isReplay = (cmd.getFlags() & UpdateCommand.REPLAY) != 0; boolean leaderLogic = isLeader && !isReplay; if (!leaderLogic && versionOnUpdate == 0) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing _version_ on update from leader"); } VersionBucket bucket = vinfo.bucket(bucketHash); vinfo.lockForUpdate(); try { synchronized (bucket) { if (versionsStored) { long bucketVersion = bucket.highest; if (leaderLogic) { if (signedVersionOnUpdate != 0) { Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId()); long foundVersion = lastVersion == null ? -1 : lastVersion; if ((signedVersionOnUpdate == foundVersion) || (signedVersionOnUpdate < 0 && foundVersion < 0) || (signedVersionOnUpdate == 1 && foundVersion > 0)) { // we're ok if versions match, or if both are negative (all missing docs are equal), // or if cmd // specified it must exist (versionOnUpdate==1) and it does. } else { throw new SolrException( ErrorCode.CONFLICT, "version conflict for " + cmd.getId() + " expected=" + signedVersionOnUpdate + " actual=" + foundVersion); } } long version = vinfo.getNewClock(); cmd.setVersion(-version); bucket.updateHighest(version); } else { cmd.setVersion(-versionOnUpdate); if (ulog.getState() != UpdateLog.State.ACTIVE && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) { // we're not in an active state, and this update isn't from a replay, so buffer it. cmd.setFlags(cmd.getFlags() | UpdateCommand.BUFFERING); ulog.delete(cmd); return true; } // if we aren't the leader, then we need to check that updates were not re-ordered if (bucketVersion != 0 && bucketVersion < versionOnUpdate) { // we're OK... this update has a version higher than anything we've seen // in this bucket so far, so we know that no reordering has yet occured. bucket.updateHighest(versionOnUpdate); } else { // there have been updates higher than the current update. we need to check // the specific version for this id. Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId()); if (lastVersion != null && Math.abs(lastVersion) >= versionOnUpdate) { // This update is a repeat, or was reordered. We need to drop this update. return true; } } } } doLocalDelete(cmd); return false; } // end synchronized (bucket) } finally { vinfo.unlockForUpdate(); } }
public void doDeleteByQuery(DeleteUpdateCommand cmd) throws IOException { // even in non zk mode, tests simulate updates from a leader if (!zkEnabled) { isLeader = getNonZkLeaderAssumption(req); } else { zkCheck(); } // NONE: we are the first to receive this deleteByQuery // - it must be forwarded to the leader of every shard // TO: we are a leader receiving a forwarded deleteByQuery... we must: // - block all updates (use VersionInfo) // - flush *all* updates going to our replicas // - forward the DBQ to our replicas and wait for the response // - log + execute the local DBQ // FROM: we are a replica receiving a DBQ from our leader // - log + execute the local DBQ DistribPhase phase = DistribPhase.parseParam(req.getParams().get(DISTRIB_UPDATE_PARAM)); if (zkEnabled && DistribPhase.NONE == phase) { boolean leaderForAnyShard = false; // start off by assuming we are not a leader for any shard Map<String, Slice> slices = zkController.getClusterState().getSlices(collection); if (slices == null) { throw new SolrException( ErrorCode.BAD_REQUEST, "Cannot find collection:" + collection + " in " + zkController.getClusterState().getCollections()); } ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString()); List<Node> leaders = new ArrayList<Node>(slices.size()); for (Map.Entry<String, Slice> sliceEntry : slices.entrySet()) { String sliceName = sliceEntry.getKey(); ZkNodeProps leaderProps; try { leaderProps = zkController.getZkStateReader().getLeaderProps(collection, sliceName); } catch (InterruptedException e) { throw new SolrException( ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + sliceName, e); } // TODO: What if leaders changed in the meantime? // should we send out slice-at-a-time and if a node returns "hey, I'm not a leader" (or we // get an error because it went down) then look up the new leader? // Am I the leader for this slice? ZkCoreNodeProps coreLeaderProps = new ZkCoreNodeProps(leaderProps); String leaderNodeName = coreLeaderProps.getCoreNodeName(); String coreName = req.getCore().getName(); String coreNodeName = zkController.getNodeName() + "_" + coreName; isLeader = coreNodeName.equals(leaderNodeName); if (isLeader) { // don't forward to ourself leaderForAnyShard = true; } else { leaders.add(new StdNode(coreLeaderProps)); } } params.remove("commit"); // this will be distributed from the local commit cmdDistrib.distribDelete(cmd, leaders, params); if (!leaderForAnyShard) { return; } // change the phase to TOLEADER so we look up and forward to our own replicas (if any) phase = DistribPhase.TOLEADER; } List<Node> replicas = null; if (zkEnabled && DistribPhase.TOLEADER == phase) { // This core should be a leader isLeader = true; replicas = setupRequest(); } else if (DistribPhase.FROMLEADER == phase) { isLeader = false; } if (vinfo == null) { super.processDelete(cmd); return; } // at this point, there is an update we need to try and apply. // we may or may not be the leader. // Find the version long versionOnUpdate = cmd.getVersion(); if (versionOnUpdate == 0) { String versionOnUpdateS = req.getParams().get(VERSION_FIELD); versionOnUpdate = versionOnUpdateS == null ? 0 : Long.parseLong(versionOnUpdateS); } versionOnUpdate = Math.abs(versionOnUpdate); // normalize to positive version boolean isReplay = (cmd.getFlags() & UpdateCommand.REPLAY) != 0; boolean leaderLogic = isLeader && !isReplay; if (!leaderLogic && versionOnUpdate == 0) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing _version_ on update from leader"); } vinfo.blockUpdates(); try { if (versionsStored) { if (leaderLogic) { long version = vinfo.getNewClock(); cmd.setVersion(-version); // TODO update versions in all buckets doLocalDelete(cmd); } else { cmd.setVersion(-versionOnUpdate); if (ulog.getState() != UpdateLog.State.ACTIVE && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) { // we're not in an active state, and this update isn't from a replay, so buffer it. cmd.setFlags(cmd.getFlags() | UpdateCommand.BUFFERING); ulog.deleteByQuery(cmd); return; } doLocalDelete(cmd); } } // since we don't know which documents were deleted, the easiest thing to do is to invalidate // all real-time caches (i.e. UpdateLog) which involves also getting a new version of the // IndexReader // (so cache misses will see up-to-date data) } finally { vinfo.unblockUpdates(); } // forward to all replicas if (leaderLogic && replicas != null) { ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); params.set(VERSION_FIELD, Long.toString(cmd.getVersion())); params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); params.set( "update.from", ZkCoreNodeProps.getCoreUrl(zkController.getBaseUrl(), req.getCore().getName())); cmdDistrib.distribDelete(cmd, replicas, params); cmdDistrib.finish(); } if (returnVersions && rsp != null) { if (deleteByQueryResponse == null) { deleteByQueryResponse = new NamedList<String>(); rsp.add("deleteByQuery", deleteByQueryResponse); } deleteByQueryResponse.add(cmd.getQuery(), cmd.getVersion()); } }
/** * @return whether or not to drop this cmd * @throws IOException If there is a low-level I/O error. */ private boolean versionAdd(AddUpdateCommand cmd) throws IOException { BytesRef idBytes = cmd.getIndexedId(); if (vinfo == null || idBytes == null) { super.processAdd(cmd); return false; } // This is only the hash for the bucket, and must be based only on the uniqueKey (i.e. do not // use a pluggable hash here) int bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0); // at this point, there is an update we need to try and apply. // we may or may not be the leader. // Find any existing version in the document // TODO: don't reuse update commands any more! long versionOnUpdate = cmd.getVersion(); if (versionOnUpdate == 0) { SolrInputField versionField = cmd.getSolrInputDocument().getField(VersionInfo.VERSION_FIELD); if (versionField != null) { Object o = versionField.getValue(); versionOnUpdate = o instanceof Number ? ((Number) o).longValue() : Long.parseLong(o.toString()); } else { // Find the version String versionOnUpdateS = req.getParams().get(VERSION_FIELD); versionOnUpdate = versionOnUpdateS == null ? 0 : Long.parseLong(versionOnUpdateS); } } boolean isReplay = (cmd.getFlags() & UpdateCommand.REPLAY) != 0; boolean leaderLogic = isLeader && !isReplay; VersionBucket bucket = vinfo.bucket(bucketHash); vinfo.lockForUpdate(); try { synchronized (bucket) { // we obtain the version when synchronized and then do the add so we can ensure that // if version1 < version2 then version1 is actually added before version2. // even if we don't store the version field, synchronizing on the bucket // will enable us to know what version happened first, and thus enable // realtime-get to work reliably. // TODO: if versions aren't stored, do we need to set on the cmd anyway for some reason? // there may be other reasons in the future for a version on the commands boolean checkDeleteByQueries = false; if (versionsStored) { long bucketVersion = bucket.highest; if (leaderLogic) { boolean updated = getUpdatedDocument(cmd, versionOnUpdate); if (versionOnUpdate != 0) { Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId()); long foundVersion = lastVersion == null ? -1 : lastVersion; if (versionOnUpdate == foundVersion || (versionOnUpdate < 0 && foundVersion < 0) || (versionOnUpdate == 1 && foundVersion > 0)) { // we're ok if versions match, or if both are negative (all missing docs are equal), // or if cmd // specified it must exist (versionOnUpdate==1) and it does. } else { throw new SolrException( ErrorCode.CONFLICT, "version conflict for " + cmd.getPrintableId() + " expected=" + versionOnUpdate + " actual=" + foundVersion); } } long version = vinfo.getNewClock(); cmd.setVersion(version); cmd.getSolrInputDocument().setField(VersionInfo.VERSION_FIELD, version); bucket.updateHighest(version); } else { // The leader forwarded us this update. cmd.setVersion(versionOnUpdate); if (ulog.getState() != UpdateLog.State.ACTIVE && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) { // we're not in an active state, and this update isn't from a replay, so buffer it. cmd.setFlags(cmd.getFlags() | UpdateCommand.BUFFERING); ulog.add(cmd); return true; } // if we aren't the leader, then we need to check that updates were not re-ordered if (bucketVersion != 0 && bucketVersion < versionOnUpdate) { // we're OK... this update has a version higher than anything we've seen // in this bucket so far, so we know that no reordering has yet occurred. bucket.updateHighest(versionOnUpdate); } else { // there have been updates higher than the current update. we need to check // the specific version for this id. Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId()); if (lastVersion != null && Math.abs(lastVersion) >= versionOnUpdate) { // This update is a repeat, or was reordered. We need to drop this update. return true; } // also need to re-apply newer deleteByQuery commands checkDeleteByQueries = true; } } } boolean willDistrib = isLeader && nodes != null && nodes.size() > 0; SolrInputDocument clonedDoc = null; if (willDistrib) { clonedDoc = cmd.solrDoc.deepCopy(); } // TODO: possibly set checkDeleteByQueries as a flag on the command? doLocalAdd(cmd); if (willDistrib) { cmd.solrDoc = clonedDoc; } } // end synchronized (bucket) } finally { vinfo.unlockForUpdate(); } return false; }
@Override public void process(ResponseBuilder rb) throws IOException { SolrQueryRequest req = rb.req; SolrQueryResponse rsp = rb.rsp; SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } String val = params.get("getVersions"); if (val != null) { processGetVersions(rb); return; } val = params.get("getUpdates"); if (val != null) { processGetUpdates(rb); return; } String id[] = params.getParams("id"); String ids[] = params.getParams("ids"); if (id == null && ids == null) { return; } String[] allIds = id == null ? new String[0] : id; if (ids != null) { List<String> lst = new ArrayList<String>(); for (String s : allIds) { lst.add(s); } for (String idList : ids) { lst.addAll(StrUtils.splitSmart(idList, ",", true)); } allIds = lst.toArray(new String[lst.size()]); } SolrCore core = req.getCore(); SchemaField idField = core.getLatestSchema().getUniqueKeyField(); FieldType fieldType = idField.getType(); SolrDocumentList docList = new SolrDocumentList(); UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); RefCounted<SolrIndexSearcher> searcherHolder = null; DocTransformer transformer = rsp.getReturnFields().getTransformer(); if (transformer != null) { TransformContext context = new TransformContext(); context.req = req; transformer.setContext(context); } try { SolrIndexSearcher searcher = null; BytesRef idBytes = new BytesRef(); for (String idStr : allIds) { fieldType.readableToIndexed(idStr, idBytes); if (ulog != null) { Object o = ulog.lookup(idBytes); if (o != null) { // should currently be a List<Oper,Ver,Doc/Id> List entry = (List) o; assert entry.size() >= 3; int oper = (Integer) entry.get(0) & UpdateLog.OPERATION_MASK; switch (oper) { case UpdateLog.ADD: SolrDocument doc = toSolrDoc( (SolrInputDocument) entry.get(entry.size() - 1), core.getLatestSchema()); if (transformer != null) { transformer.transform(doc, -1); // unknown docID } docList.add(doc); break; case UpdateLog.DELETE: break; default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper); } continue; } } // didn't find it in the update log, so it should be in the newest searcher opened if (searcher == null) { searcherHolder = core.getRealtimeSearcher(); searcher = searcherHolder.get(); } // SolrCore.verbose("RealTimeGet using searcher ", searcher); int docid = searcher.getFirstMatch(new Term(idField.getName(), idBytes)); if (docid < 0) continue; StoredDocument luceneDocument = searcher.doc(docid); SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema()); if (transformer != null) { transformer.transform(doc, docid); } docList.add(doc); } } finally { if (searcherHolder != null) { searcherHolder.decref(); } } // if the client specified a single id=foo, then use "doc":{ // otherwise use a standard doclist if (ids == null && allIds.length <= 1) { // if the doc was not found, then use a value of null. rsp.add("doc", docList.size() > 0 ? docList.get(0) : null); } else { docList.setNumFound(docList.size()); rsp.add("response", docList); } }