@Test public void missingEVPartitionTest() throws Exception { final String tableName = "myTable"; List<String> allTableNames = new ArrayList<String>(); allTableNames.add(tableName); IdealState idealState = new IdealState(tableName); idealState.setPartitionState("myTable_0", "pinot1", "ONLINE"); idealState.setPartitionState("myTable_0", "pinot2", "ONLINE"); idealState.setPartitionState("myTable_0", "pinot3", "ONLINE"); idealState.setPartitionState("myTable_1", "pinot1", "ONLINE"); idealState.setPartitionState("myTable_1", "pinot2", "ONLINE"); idealState.setPartitionState("myTable_1", "pinot3", "ONLINE"); idealState.setPartitionState("myTable_2", "pinot3", "OFFLINE"); idealState.setPartitionState("myTable_3", "pinot3", "ONLINE"); idealState.setReplicas("2"); idealState.setRebalanceMode(IdealState.RebalanceMode.CUSTOMIZED); ExternalView externalView = new ExternalView(tableName); externalView.setState("myTable_0", "pinot1", "ONLINE"); externalView.setState("myTable_0", "pinot2", "ONLINE"); externalView.setState("myTable_1", "pinot1", "ERROR"); externalView.setState("myTable_1", "pinot2", "ONLINE"); HelixAdmin helixAdmin; { helixAdmin = mock(HelixAdmin.class); when(helixAdmin.getResourceIdealState("StatusChecker", "myTable")).thenReturn(idealState); when(helixAdmin.getResourceExternalView("StatusChecker", "myTable")).thenReturn(externalView); } { helixResourceManager = mock(PinotHelixResourceManager.class); when(helixResourceManager.isLeader()).thenReturn(true); when(helixResourceManager.getAllPinotTableNames()).thenReturn(allTableNames); when(helixResourceManager.getHelixClusterName()).thenReturn("StatusChecker"); when(helixResourceManager.getHelixAdmin()).thenReturn(helixAdmin); } { config = mock(ControllerConf.class); when(config.getStatusControllerFrequencyInSeconds()).thenReturn(300); } metricsRegistry = new MetricsRegistry(); controllerMetrics = new ControllerMetrics(metricsRegistry); segmentStatusChecker = new SegmentStatusChecker(helixResourceManager, config); segmentStatusChecker.setMetricsRegistry(controllerMetrics); segmentStatusChecker.runSegmentMetrics(); Assert.assertEquals( controllerMetrics.getValueOfTableGauge( externalView.getId(), ControllerGauge.SEGMENTS_IN_ERROR_STATE), 1); Assert.assertEquals( controllerMetrics.getValueOfTableGauge( externalView.getId(), ControllerGauge.NUMBER_OF_REPLICAS), 0); segmentStatusChecker.stop(); }
@Override public void onCallback(NotificationContext context) { LOG.info( "START: MasterSlaveRebalancer.onCallback running at " + _context.getHelixManager().getInstanceName()); if (context.getType().equals(NotificationContext.Type.FINALIZE)) { LOG.info( "END: MasterSlaveRebalancer.onCallback FINALIZE callback invoked. Likely lost connection to Helix"); return; } HelixManager manager = context.getManager(); String clusterName = manager.getClusterName(); HelixAdmin helixAdmin = manager.getClusterManagmentTool(); IdealState idealState = helixAdmin.getResourceIdealState(clusterName, MySQLConstants.MASTER_SLAVE_RESOURCE_NAME); if (idealState == null) { LOG.info( "END: MasterSlaveRebalancer.onCallback. " + MySQLConstants.MASTER_SLAVE_RESOURCE_NAME + " is not yet created"); } PropertyKey.Builder builder = new PropertyKey.Builder(clusterName); Map<String, LiveInstance> liveInstancesMap = manager.getHelixDataAccessor().getChildValuesMap(builder.liveInstances()); Map<String, InstanceConfig> instanceConfigs = manager.getHelixDataAccessor().getChildValuesMap(builder.instanceConfigs()); IdealState newIdealState = new IdealState(idealState.getId()); newIdealState.getRecord().setSimpleFields(idealState.getRecord().getSimpleFields()); newIdealState.getRecord().setListFields(idealState.getRecord().getListFields()); for (String partition : idealState.getPartitionSet()) { Map<String, String> instanceStateMap = idealState.getInstanceStateMap(partition); String currMaster = null; Set<String> slaveSet = new TreeSet<String>(); for (String instance : instanceStateMap.keySet()) { if ("MASTER".equalsIgnoreCase(instanceStateMap.get(instance))) { currMaster = instance; } if ("SLAVE".equalsIgnoreCase(instanceStateMap.get(instance))) { slaveSet.add(instance); } } String newMaster = currMaster; if (!liveInstancesMap.containsKey(currMaster) || !instanceConfigs.get(currMaster).getInstanceEnabled()) { // need to find a new master. newMaster = findNewMaster(liveInstancesMap, instanceConfigs, currMaster, slaveSet); } for (String instance : instanceStateMap.keySet()) { if (instance.equalsIgnoreCase(newMaster)) { newIdealState.setPartitionState(partition, instance, "MASTER"); } else { newIdealState.setPartitionState(partition, instance, "SLAVE"); } } } if (!idealState.equals(newIdealState)) { LOG.info("New idealstate computed."); LOG.info(newIdealState.toString()); manager .getClusterManagmentTool() .setResourceIdealState( clusterName, MySQLConstants.MASTER_SLAVE_RESOURCE_NAME, newIdealState); } else { LOG.info("No change in IdealState"); } LOG.info("END: MasterSlaveRebalancer.onCallback"); }
@Test public void testZKReconnect() throws Exception { final AtomicReference<ZkServer> zkServerRef = new AtomicReference<ZkServer>(); final int zkPort = TestHelper.getRandomPort(); final String zkAddr = String.format("localhost:%d", zkPort); ZkServer zkServer = TestHelper.startZkServer(zkAddr); zkServerRef.set(zkServer); String className = TestHelper.getTestClassName(); String methodName = TestHelper.getTestMethodName(); String clusterName = className + "_" + methodName; // Setup cluster LOG.info("Setup clusters"); ClusterSetup clusterSetup = new ClusterSetup(zkAddr); clusterSetup.addCluster(clusterName, true); // Registers and starts controller LOG.info("Starts controller"); HelixManager controller = HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.CONTROLLER, zkAddr); controller.connect(); // Registers and starts participant LOG.info("Starts participant"); String hostname = "localhost"; String instanceId = String.format("%s_%d", hostname, 1); clusterSetup.addInstanceToCluster(clusterName, instanceId); HelixManager participant = HelixManagerFactory.getZKHelixManager( clusterName, instanceId, InstanceType.PARTICIPANT, zkAddr); participant.connect(); LOG.info("Register state machine"); final CountDownLatch latch = new CountDownLatch(1); participant .getStateMachineEngine() .registerStateModelFactory( "OnlineOffline", new StateModelFactory<StateModel>() { @Override public StateModel createNewStateModel(String stateUnitKey) { return new SimpleStateModel(latch); } }, "test"); String resourceName = "test-resource"; LOG.info("Ideal state assignment"); HelixAdmin helixAdmin = participant.getClusterManagmentTool(); helixAdmin.addResource( clusterName, resourceName, 1, "OnlineOffline", IdealState.RebalanceMode.CUSTOMIZED.toString()); IdealState idealState = helixAdmin.getResourceIdealState(clusterName, resourceName); idealState.setReplicas("1"); idealState.setStateModelFactoryName("test"); idealState.setPartitionState(resourceName + "_0", instanceId, "ONLINE"); LOG.info("Shutdown ZK server"); TestHelper.stopZkServer(zkServerRef.get()); Executors.newSingleThreadScheduledExecutor() .schedule( new Runnable() { @Override public void run() { try { LOG.info("Restart ZK server"); // zkServer.set(TestUtils.startZookeeper(zkDir, zkPort)); zkServerRef.set(TestHelper.startZkServer(zkAddr, null, false)); } catch (Exception e) { LOG.error(e.getMessage(), e); } } }, 2L, TimeUnit.SECONDS); // future.get(); LOG.info("Before update ideal state"); helixAdmin.setResourceIdealState(clusterName, resourceName, idealState); LOG.info("After update ideal state"); LOG.info("Wait for OFFLINE->ONLINE state transition"); try { Assert.assertTrue(latch.await(10, TimeUnit.SECONDS)); // wait until stable state boolean result = ClusterStateVerifier.verifyByZkCallback( new BestPossAndExtViewZkVerifier(zkAddr, clusterName)); Assert.assertTrue(result); } finally { participant.disconnect(); zkServerRef.get().shutdown(); } }