/* (non-Javadoc) * @see com.gemstone.gemfire.distributed.internal.DistributionMessage#process(com.gemstone.gemfire.distributed.internal.DistributionManager) */ @Override protected void process(DistributionManager dm) { Throwable thr = null; JmxManagerProfile p = null; try { final GemFireCacheImpl cache = GemFireCacheImpl.getInstance(); if (cache != null && !cache.isClosed()) { final JmxManagerAdvisor adv = cache.getJmxManagerAdvisor(); p = this.profile; if (p != null) { adv.putProfile(p); } } else { if (logger.isDebugEnabled()) { logger.debug("No cache {}", this); } } } catch (CancelException e) { if (logger.isDebugEnabled()) { logger.debug("Cache closed, ", this); } } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable t) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); thr = t; } finally { if (thr != null) { dm.getCancelCriterion().checkCancelInProgress(null); logger.info( LocalizedMessage.create( LocalizedStrings.ResourceAdvisor_MEMBER_CAUGHT_EXCEPTION_PROCESSING_PROFILE, new Object[] {p, toString()}, thr)); } } }
private DistributedRegion getRegion(DistributionManager dm) { if (region != null) { return region; } // set the init level requirement so that we don't hang in CacheFactory.getInstance() (bug // 36175) int oldLevel = LocalRegion.setThreadInitLevelRequirement(LocalRegion.BEFORE_INITIAL_IMAGE); try { GemFireCacheImpl gfc = (GemFireCacheImpl) CacheFactory.getInstance(dm.getSystem()); Region r = gfc.getRegionByPathForProcessing(this.regionPath); if (r instanceof DistributedRegion) { region = (DistributedRegion) r; } } finally { LocalRegion.setThreadInitLevelRequirement(oldLevel); } return region; }
/** returns a set of all DistributedRegions for allRegions processing */ private Set<DistributedRegion> getAllRegions(DistributionManager dm) { // set the init level requirement so that we don't hang in CacheFactory.getInstance() (bug // 36175) int oldLevel = LocalRegion.setThreadInitLevelRequirement(LocalRegion.BEFORE_INITIAL_IMAGE); try { GemFireCacheImpl gfc = (GemFireCacheImpl) CacheFactory.getInstance(dm.getSystem()); Set<DistributedRegion> result = new HashSet(); for (LocalRegion r : gfc.getAllRegions()) { // it's important not to check if the cache is closing, so access // the isDestroyed boolean directly if (r instanceof DistributedRegion && !r.isDestroyed) { result.add((DistributedRegion) r); } } return result; } finally { LocalRegion.setThreadInitLevelRequirement(oldLevel); } }
/** * Upon receipt of the message, both process the message and send an acknowledgement, not * necessarily in that order. Note: Any hang in this message may cause a distributed deadlock for * those threads waiting for an acknowledgement. * * @throws PartitionedRegionException if the region does not exist (typically, if it has been * destroyed) */ @Override public void process(final DistributionManager dm) { Throwable thr = null; boolean sendReply = true; LocalRegion r = null; long startTime = 0; try { if (checkCacheClosing(dm) || checkDSClosing(dm)) { thr = new CacheClosedException( LocalizedStrings.PartitionMessage_REMOTE_CACHE_IS_CLOSED_0.toLocalizedString( dm.getId())); return; } GemFireCacheImpl gfc = (GemFireCacheImpl) CacheFactory.getInstance(dm.getSystem()); r = gfc.getRegionByPathForProcessing(this.regionPath); if (r == null && failIfRegionMissing()) { // if the distributed system is disconnecting, don't send a reply saying // the partitioned region can't be found (bug 36585) thr = new RegionDestroyedException( LocalizedStrings.RemoteOperationMessage_0_COULD_NOT_FIND_REGION_1.toLocalizedString( new Object[] {dm.getDistributionManagerId(), regionPath}), regionPath); return; // reply sent in finally block below } thr = UNHANDLED_EXCEPTION; // [bruce] r might be null here, so we have to go to the cache instance to get the txmgr TXManagerImpl txMgr = GemFireCacheImpl.getInstance().getTxManager(); TXStateProxy tx = null; try { tx = txMgr.masqueradeAs(this); sendReply = operateOnRegion(dm, r, startTime); } finally { txMgr.unmasquerade(tx); } thr = null; } catch (RemoteOperationException fre) { thr = fre; } catch (DistributedSystemDisconnectedException se) { // bug 37026: this is too noisy... // throw new CacheClosedException("remote system shutting down"); // thr = se; cache is closed, no point trying to send a reply thr = null; sendReply = false; if (logger.isDebugEnabled()) { logger.debug("shutdown caught, abandoning message: {}", se.getMessage(), se); } } catch (RegionDestroyedException rde) { // [bruce] RDE does not always mean that the sender's region is also // destroyed, so we must send back an exception. If the sender's // region is also destroyed, who cares if we send it an exception // if (pr != null && pr.isClosed) { thr = new ForceReattemptException( LocalizedStrings.PartitionMessage_REGION_IS_DESTROYED_IN_0.toLocalizedString( dm.getDistributionManagerId()), rde); // } } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable t) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); // log the exception at fine level if there is no reply to the message thr = null; if (sendReply) { if (!checkDSClosing(dm)) { thr = t; } else { // don't pass arbitrary runtime exceptions and errors back if this // cache/vm is closing thr = new ForceReattemptException( LocalizedStrings.PartitionMessage_DISTRIBUTED_SYSTEM_IS_DISCONNECTING .toLocalizedString()); } } if (logger.isTraceEnabled(LogMarker.DM) && (t instanceof RuntimeException)) { logger.trace(LogMarker.DM, "Exception caught while processing message", t); } } finally { if (sendReply) { ReplyException rex = null; if (thr != null) { // don't transmit the exception if this message was to a listener // and this listener is shutting down rex = new ReplyException(thr); } // Send the reply if the operateOnPartitionedRegion returned true sendReply(getSender(), this.processorId, dm, rex, r, startTime); } } }
/** * check to see if the distributed system is closing * * @return true if the distributed system is closing */ public final boolean checkDSClosing(DistributionManager dm) { InternalDistributedSystem ds = dm.getSystem(); return (ds == null || ds.isDisconnecting()); }
/** returns the JGroupMembershipManager for the given distributed system */ public static JGroupMembershipManager getMembershipManager(DistributedSystem sys) { InternalDistributedSystem isys = (InternalDistributedSystem) sys; DistributionManager dm = (DistributionManager) isys.getDistributionManager(); JGroupMembershipManager mgr = (JGroupMembershipManager) dm.getMembershipManager(); return mgr; }
/** Creates a <code>BridgeServerResponse</code> in response to the given request. */ static BridgeServerResponse create(DistributionManager dm, BridgeServerRequest request) { BridgeServerResponse m = new BridgeServerResponse(); m.setRecipient(request.getSender()); try { GemFireCacheImpl cache = (GemFireCacheImpl) CacheFactory.getInstanceCloseOk(dm.getSystem()); if (request.getCacheId() != System.identityHashCode(cache)) { m.bridgeInfo = null; } else { int operation = request.getOperation(); switch (operation) { case BridgeServerRequest.ADD_OPERATION: { BridgeServerImpl bridge = (BridgeServerImpl) cache.addBridgeServer(); m.bridgeInfo = new RemoteBridgeServer(bridge); break; } case BridgeServerRequest.INFO_OPERATION: { int id = request.getBridgeId(); // Note that since this is only an informational request // it is not necessary to synchronize on allBridgeServersLock for (Iterator iter = cache.getBridgeServers().iterator(); iter.hasNext(); ) { BridgeServerImpl bridge = (BridgeServerImpl) iter.next(); if (System.identityHashCode(bridge) == id) { m.bridgeInfo = new RemoteBridgeServer(bridge); break; } else { m.bridgeInfo = null; } } break; } case BridgeServerRequest.START_OPERATION: { RemoteBridgeServer config = request.getBridgeInfo(); for (Iterator iter = cache.getBridgeServers().iterator(); iter.hasNext(); ) { BridgeServerImpl bridge = (BridgeServerImpl) iter.next(); if (System.identityHashCode(bridge) == config.getId()) { bridge.configureFrom(config); bridge.start(); m.bridgeInfo = new RemoteBridgeServer(bridge); break; } else { m.bridgeInfo = null; } } break; } case BridgeServerRequest.STOP_OPERATION: { RemoteBridgeServer config = request.getBridgeInfo(); for (Iterator iter = cache.getBridgeServers().iterator(); iter.hasNext(); ) { BridgeServerImpl bridge = (BridgeServerImpl) iter.next(); if (System.identityHashCode(bridge) == config.getId()) { bridge.stop(); m.bridgeInfo = new RemoteBridgeServer(bridge); break; } else { m.bridgeInfo = null; } } break; } default: Assert.assertTrue(false, "Unknown bridge server operation: " + operation); } } } catch (CancelException ex) { m.bridgeInfo = null; } catch (Exception ex) { m.exception = ex; m.bridgeInfo = null; } return m; }
public void testSimpleOutOfOffHeapMemoryMemberDisconnects() { final DistributedSystem system = getSystem(); final Cache cache = getCache(); final DistributionManager dm = (DistributionManager) ((InternalDistributedSystem) system).getDistributionManager(); Region<Object, Object> region = cache.createRegionFactory(getRegionShortcut()).setOffHeap(true).create(getRegionName()); OutOfOffHeapMemoryException ooohme; try { Object value = new byte[1024]; for (int i = 0; true; i++) { region.put("key-" + i, value); } } catch (OutOfOffHeapMemoryException e) { ooohme = e; } assertNotNull(ooohme); with() .pollInterval(100, TimeUnit.MILLISECONDS) .await() .atMost(10, TimeUnit.SECONDS) .until(() -> cache.isClosed() && !system.isConnected() && dm.isClosed()); // wait for cache instance to be nulled out with() .pollInterval(100, TimeUnit.MILLISECONDS) .await() .atMost(10, TimeUnit.SECONDS) .until( () -> GemFireCacheImpl.getInstance() == null && InternalDistributedSystem.getAnyInstance() == null); assertNull(GemFireCacheImpl.getInstance()); // verify system was closed out due to OutOfOffHeapMemoryException assertFalse(system.isConnected()); InternalDistributedSystem ids = (InternalDistributedSystem) system; try { ids.getDistributionManager(); fail( "InternalDistributedSystem.getDistributionManager() should throw DistributedSystemDisconnectedException"); } catch (DistributedSystemDisconnectedException expected) { assertRootCause(expected, OutOfOffHeapMemoryException.class); } // verify dm was closed out due to OutOfOffHeapMemoryException assertTrue(dm.isClosed()); try { dm.throwIfDistributionStopped(); fail( "DistributionManager.throwIfDistributionStopped() should throw DistributedSystemDisconnectedException"); } catch (DistributedSystemDisconnectedException expected) { assertRootCause(expected, OutOfOffHeapMemoryException.class); } // verify cache was closed out due to OutOfOffHeapMemoryException assertTrue(cache.isClosed()); try { cache.getCancelCriterion().checkCancelInProgress(null); fail( "GemFireCacheImpl.getCancelCriterion().checkCancelInProgress should throw DistributedSystemDisconnectedException"); } catch (DistributedSystemDisconnectedException expected) { assertRootCause(expected, OutOfOffHeapMemoryException.class); } }
@Override protected void process(final DistributionManager dm) { // though this message must be transmitted on an ordered connection to // ensure that datagram channnels are flushed, we need to execute // in the waiting pool to avoid blocking those connections dm.getWaitingThreadPool() .execute( new Runnable() { public void run() { if (logger.isTraceEnabled(LogMarker.STATE_FLUSH_OP)) { logger.trace(LogMarker.STATE_FLUSH_OP, "Processing {}", this); } try { if (channelState != null) { if (logger.isTraceEnabled(LogMarker.STATE_FLUSH_OP) && ((channelState != null) && (channelState.size() > 0))) { logger.trace( LogMarker.STATE_FLUSH_OP, "Waiting for channel states: {}", channelStateDescription(channelState)); } for (; ; ) { dm.getCancelCriterion().checkCancelInProgress(null); boolean interrupted = Thread.interrupted(); try { dm.getMembershipManager().waitForMessageState(getSender(), channelState); break; } catch (InterruptedException e) { interrupted = true; } finally { if (interrupted) { Thread.currentThread().interrupt(); } } } // for } } catch (ThreadDeath td) { throw td; } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable e) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); logger.fatal( LocalizedMessage.create( LocalizedStrings .StateFlushOperation_EXCEPTION_CAUGHT_WHILE_WAITING_FOR_CHANNEL_STATE), e); } finally { StateStabilizedMessage ga = new StateStabilizedMessage(); ga.setRecipient((InternalDistributedMember) requestingMember); if (isSingleFlushTo) { // not a proxied message but a simple request-response ga.sendingMember = dm.getDistributionManagerId(); } else { ga.sendingMember = getSender(); } ga.setProcessorId(processorId); if (logger.isTraceEnabled(LogMarker.STATE_FLUSH_OP)) { logger.trace(LogMarker.STATE_FLUSH_OP, "Sending {}", ga); } if (requestingMember.equals(dm.getDistributionManagerId())) { ga.dmProcess(dm); } else { dm.putOutgoing(ga); } } } }); }
@Override protected void process(DistributionManager dm) { logger.trace(LogMarker.STATE_FLUSH_OP, "Processing {}", this); if (dm.getDistributionManagerId().equals(relayRecipient)) { // no need to send a relay request to this process - just send the // ack back to the sender StateStabilizedMessage ga = new StateStabilizedMessage(); ga.sendingMember = relayRecipient; ga.setRecipient(this.getSender()); ga.setProcessorId(processorId); dm.putOutgoing(ga); } else { // 1) wait for all messages based on the membership version (or older) // at which the sender "joined" this region to be put on the pipe // 2) record the state of all communication channels from this process // to the relay point // 3) send a stabilization message to the relay point that holds the // communication channel state information StateStabilizationMessage gr = new StateStabilizationMessage(); gr.setRecipient((InternalDistributedMember) relayRecipient); gr.requestingMember = this.getSender(); gr.processorId = processorId; try { Set<DistributedRegion> regions; if (this.allRegions) { regions = getAllRegions(dm); } else { regions = Collections.singleton(this.getRegion(dm)); } for (DistributedRegion r : regions) { if (r == null) { if (logger.isTraceEnabled(LogMarker.DM)) { logger.trace(LogMarker.DM, "Region not found - skipping channel state assessment"); } } if (r != null) { if (this.allRegions && r.doesNotDistribute()) { // no need to flush a region that does no distribution continue; } boolean initialized = r.isInitialized(); if (initialized) { if (this.flushNewOps) { r.getDistributionAdvisor() .forceNewMembershipVersion(); // force a new "view" so we can track current // ops } try { r.getDistributionAdvisor().waitForCurrentOperations(); } catch (RegionDestroyedException e) { // continue with the next region } } boolean useMulticast = r.getMulticastEnabled() && r.getSystem().getConfig().getMcastPort() != 0; if (initialized) { Map channelStates = dm.getMembershipManager().getMessageState(relayRecipient, useMulticast); if (gr.channelState != null) { gr.channelState.putAll(channelStates); } else { gr.channelState = channelStates; } if (logger.isTraceEnabled(LogMarker.STATE_FLUSH_OP) && ((gr.channelState != null) && (gr.channelState.size() > 0))) { logger.trace( LogMarker.STATE_FLUSH_OP, "channel states: {}", gr.channelStateDescription(gr.channelState)); } } } } } catch (CancelException cce) { // cache is closed - no distribution advisor available for the region so nothing to do but // send the stabilization message } catch (Exception e) { logger.fatal( LocalizedMessage.create( LocalizedStrings .StateFlushOperation_0__EXCEPTION_CAUGHT_WHILE_DETERMINING_CHANNEL_STATE, this), e); } catch (ThreadDeath td) { throw td; } catch (VirtualMachineError err) { SystemFailure.initiateFailure(err); // If this ever returns, rethrow the error. We're poisoned // now, so don't let this thread continue. throw err; } catch (Throwable t) { // Whenever you catch Error or Throwable, you must also // catch VirtualMachineError (see above). However, there is // _still_ a possibility that you are dealing with a cascading // error condition, so you also need to check to see if the JVM // is still usable: SystemFailure.checkFailure(); logger.fatal( LocalizedMessage.create( LocalizedStrings .StateFlushOperation_0__THROWABLE_CAUGHT_WHILE_DETERMINING_CHANNEL_STATE, this), t); } finally { if (logger.isTraceEnabled(LogMarker.STATE_FLUSH_OP)) { logger.trace(LogMarker.STATE_FLUSH_OP, "Sending {}", gr); } dm.putOutgoing(gr); } } }