/** * Forces recovery by revoking the members the system is waiting for. It is the responsibility of * the caller to know that the correct members are being waited for since this will revoke all * waiting members. * * @param doValidation If true, then validate the missing disk stores (used for serial tests or * tests that are in a silent phase), if false then don't do validation (used for concurrent * tests). */ public static void forceRecovery(boolean doValidation) { AdminDistributedSystem adminDS = AdminHelper.getAdminDistributedSystem(); Set<PersistentID> waitingForMembers; try { waitingForMembers = adminDS.getMissingPersistentMembers(); if (doValidation) { verifyMissingDiskStoresCommandLineTool(waitingForMembers); } for (PersistentID persistId : waitingForMembers) { boolean revokeWithCommandLineTool = TestConfig.tab().getRandGen().nextBoolean(); // workaround for bug 42432; when this bug is fixed remove the following if if (HostHelper.isWindows()) { revokeWithCommandLineTool = false; // force usage of API } if (CliHelperPrms.getUseCli()) { revokeWithCommandLineTool = true; } if (revokeWithCommandLineTool) { // revoke with command line tool PersistenceUtil.runRevokeMissingDiskStore(persistId); } else { Log.getLogWriter().info("Revoking PersistentID " + persistId); adminDS.revokePersistentMember(persistId.getUUID()); } } } catch (AdminException e) { throw new TestException(TestHelper.getStackTrace(e)); } }
/** * Verify that the system is waiting for member(s) to come up before recovering. * * @param expectedWaitingForMember This specifies the vmID of the member the system should be * waiting for. * @param membersWaiting This is a List of the members that should be waiting. */ protected static void verifyWaiting( ClientVmInfo expectedWaitingForMember, List<ClientVmInfo> membersWaiting) { AdminDistributedSystem adminDS = AdminHelper.getAdminDistributedSystem(); boolean conditionPreviouslyMet = false; do { try { Set<PersistentID> waitingForMembers = adminDS.getMissingPersistentMembers(); Log.getLogWriter() .info( "System is waiting for " + waitingForMembers.size() + " member(s); " + waitingForMembers + ", test is expecting the system to be waiting for member with vmID " + expectedWaitingForMember.getVmid()); Set vmsWaitingFor = new TreeSet(); for (PersistentID id : waitingForMembers) { String diskDirWaitingFor = id.getDirectory(); String searchStr = "vm_"; int index = diskDirWaitingFor.indexOf(searchStr) + searchStr.length(); int index2 = diskDirWaitingFor.indexOf("_", index); String vmIdStr = diskDirWaitingFor.substring(index, index2); vmsWaitingFor.add(vmIdStr); } if (vmsWaitingFor.size() == 1) { // waiting for correct number of members String singleVmWaitingFor = (String) vmsWaitingFor.iterator().next(); if (singleVmWaitingFor.equals( expectedWaitingForMember .getVmid() .toString())) { // waiting for 1 member only; it is the correct member // now we found what we were waiting for, but to make sure the product sticks with this // over a period of time; we will sleep and check again Log.getLogWriter().info("System is waiting for expected member " + waitingForMembers); if (conditionPreviouslyMet) { // this is the second time the condition has been met verifyMissingDiskStoresCommandLineTool(waitingForMembers); Log.getLogWriter() .info( "Verified that the system is waiting on vm ID " + expectedWaitingForMember.getVmid() + "; system is waiting for vm with ID " + singleVmWaitingFor); return; } else { // this if the first time the correct condition has been met conditionPreviouslyMet = true; Log.getLogWriter() .info( "Sleeping for 180 seconds to make sure the member the system is waiting for does not change"); MasterController.sleepForMs(180000); } } else { Log.getLogWriter() .info( "System is not waiting for the expected member " + expectedWaitingForMember.getVmid()); conditionPreviouslyMet = false; } } else { Log.getLogWriter() .info( "System is not waiting for the expected member " + expectedWaitingForMember.getVmid()); conditionPreviouslyMet = false; } } catch (AdminException e) { throw new TestException(TestHelper.getStackTrace(e)); } MasterController.sleepForMs(2000); } while (true); }
/** * Invokes AdminDistributedSystem.shutDownAllMembers() which disconnects all members but leaves * the vms up (because hydra threads remain) including the possibility of this vm being * disconnected, then this actually stops those vms (except this vm if it was targeted in the * shutDownAllMembers...this vm will remain up but disconnect). Stopped vms are stopped with * ON_DEMAND restart. This returns when the vms disconnected by shutDownAllMembers() (other than * this one) are all stopped . * * @param adminDS The admin distributed system instance to use to call shutdownAllMembers. * @param stopModes The stop modes to choose from. * @return An Array [0] List of {@link ClientVmInfo} instances describing the VMs that were * stopped. [1] Set, the return from shutdownAllMembers() * @throws AdminException if the shutDownAllMembers call throws this exception. */ public static Object[] shutDownAllMembers( AdminDistributedSystem adminDS, List<String> stopModes) { if (adminDS == null) { throw new HydraRuntimeException("AdminDistributedSystem cannot be null"); } // Invoke shutDownAllMembers Log.getLogWriter().info("AdminDS " + adminDS + " is shutting down all members..."); Set<DistributedMember> memberSet; try { long startTime = System.currentTimeMillis(); memberSet = adminDS.shutDownAllMembers(); long duration = System.currentTimeMillis() - startTime; Log.getLogWriter() .info( "AdminDS " + adminDS + " shut down (disconnected) the following members " + "(vms remain up): " + memberSet + "; shutDownAll duration " + duration + "ms"); } catch (AdminException e1) { throw new TestException(TestHelper.getStackTrace(e1)); } // Now actually stop the vms. // First get the ClientVmInfos for the members that shutDownAllMembers // disconnected. List<ClientVmInfo> allClientInfos = new ArrayList(); // all members that were shutdown List<ClientVmInfo> allOtherClientInfos = new ArrayList(); // all members that were shutdown except this member ClientVmInfo thisClientInfo = null; // this member, or will remain null if this member was not shutdown List<String> stopModesToUse = new ArrayList(); for (DistributedMember aMember : memberSet) { Integer vmId = null; try { vmId = new Integer(RemoteTestModule.Master.getVmid(aMember.getHost(), aMember.getProcessId())); } catch (java.rmi.RemoteException e) { throw new HydraRuntimeException("Unable to get vmID for " + aMember + ": " + e); } ClientVmInfo infoFromBB = (ClientVmInfo) StopStartBB.getBB().getSharedMap().get("StopStartVMInfo_for_vmid_" + vmId); String clientName = null; if (infoFromBB != null) { clientName = infoFromBB.getClientName(); } ClientVmInfo info = new ClientVmInfo(vmId, clientName, null); allClientInfos.add(info); if (vmId == RemoteTestModule.getMyVmid()) { // shutdownAll disconnected this vm thisClientInfo = info; } else { // aMember is not the current vm allOtherClientInfos.add(info); } stopModesToUse.add( stopModes.get(TestConfig.tab().getRandGen().nextInt(0, stopModes.size() - 1))); } // now actually stop the vms; if this vm is included, do it last Object[] returnArr = new Object[2]; if (thisClientInfo == null) { // shutDownAllMembers did not disconnect this vm // we can just stop all of them now and this vm lives on StopStartVMs.stopVMs(allClientInfos, stopModesToUse); // restart is ON_DEMAND returnArr[0] = allClientInfos; } else { // this vm was disconnected by shutDownAllMembers // first shutdown all other members except this one StopStartVMs.stopVMs(allOtherClientInfos, stopModesToUse.subList(0, stopModesToUse.size())); returnArr[0] = allOtherClientInfos; } returnArr[1] = memberSet; return returnArr; }