@TransactionAttribute(TransactionAttributeType.NEVER) public MeasurementBaseline calculateAutoBaseline( Subject subject, int groupId, int definitionId, long startDate, long endDate, boolean save) throws BaselineCreationException, MeasurementNotFoundException { MeasurementBaseline result = measurementBaselineManager.calculateAutoBaselineForGroupInNewTransaction( subject, groupId, definitionId, startDate, endDate, save); if (save) { // note, this executes in a new transaction so the baseline must already be committed to the // database agentStatusManager.updateByMeasurementBaseline(result.getId()); } return result; }
@TransactionAttribute(TransactionAttributeType.NEVER) public void calculateAutoBaselines() { Properties conf = systemManager.getSystemConfiguration(subjectManager.getOverlord()); // frequency is how often the baselines are recalculated // data set is how far back for a particular scheduled measurement is included in the baseline // calcs // frequency of 3 days and data set of 10 days means "every 3 days, recalculate baselines // automatically. // For each scheduled measurement, take their last 10 days worth of data and use that data set // as the portion that will be used to get the min/max/average". String baselineFrequencyString = conf.getProperty(RHQConstants.BaselineFrequency); String baselineDataSetString = conf.getProperty(RHQConstants.BaselineDataSet); log.debug( "Found baseline defaults: " + "frequency=" + baselineFrequencyString + " dataset=" + baselineDataSetString); // Its time to auto-calculate the baselines again. // Determine how much data we need to calculate baselines for by determining the oldest and // youngest // measurement data to include in the calculations. long amountOfData = Long.parseLong(baselineDataSetString); long baselinesOlderThanTime = System.currentTimeMillis() - Long.parseLong(baselineFrequencyString); measurementBaselineManager.calculateAutoBaselines(amountOfData, baselinesOlderThanTime); // everything was calculated successfully, remember this time conf = systemManager.getSystemConfiguration( subjectManager .getOverlord()); // reload the config in case it was changed since we started try { systemManager.setSystemConfiguration(subjectManager.getOverlord(), conf, true); } catch (Exception e) { log.error( "Failed to remember the time when we just calc'ed baselines - it may recalculate again soon.", e); } }
@TransactionAttribute(TransactionAttributeType.NEVER) public long calculateAutoBaselines(long amountOfData, long baselinesOlderThanTime) { try { log.info("Calculating auto baselines"); log.info("Deleting baselines computations older than " + new Date(baselinesOlderThanTime)); log.info( "Inserting new baselines using last " + (amountOfData / (24 * 60 * 60 * 1000L)) + " days of 1H data"); long now = System.currentTimeMillis(); long computeTime = now; log.debug("computeTime = " + computeTime); int deleted = measurementBaselineManager._calculateAutoBaselinesDELETE(baselinesOlderThanTime); log.info( "Removed [" + deleted + "] old baselines - they will now be recalculated (" + (System.currentTimeMillis() - now) + ")ms"); now = System.currentTimeMillis(); int totalInserted = 0; while (true) { /* * each call is done in a separate xtn of at most 100K inserted rows; this helps to keep the xtn * shorter to avoid timeouts in scenarios where baseline calculations bunch together. the idea was that * by basing a batch of baseline calculations off of the import time of the resource into inventory, * that the total work would naturally be staggered throughout the day. in practice, this didn't always * work as intended for one of several reasons: * * 1) all servers in the cloud were down for a few days (maybe a slow product upgrade, maybe a cold * data center relocation) * 2) issues with running the job itself, if quartz had locking issues under severe load and somehow * this job wasn't get executed for a few hours / days * 3) the user tended to import all new resources / platforms at the same time of day, thus bypassing * the implicit optimization of trying to stagger the calculations by resource commit time * * 2/18/2010 NOTE: Limits weren't / aren't actually achieving the affect we want. The baseline query * follows the general form of "insert into...select from <big query> having <subquery> limit X". * In this case, the limit was reducing the number of rows inserted, but it was still taking the full * cost of calculating everything that should have been inserted. The limit was intended as a cheap * method of chunking or partitioning the work, but wasn't properly chunking the expensive * part - the "big query". What we actually want to do is come of with a strategy that lessens the * amount of data we need to select, thereby reducing the amount of time it takes to calculate the * insertion list. * * One proposed strategy for this would be to chunk on the scheduleId. So if there were, say, * 5M scheduleIds in the systems, we might take 500K of them at a time and then execute the * baseline insertion job 10 times against a much smaller set of data each time. But the * complication here is how to calculate precise groups of 500K schedules at a time, and then * walk that chunked list. * * Another strategy would be to divy things up by resource type. Since a measurementSchedule is * linked to a measurementDefinition which is linked to a resourceType, we could very easily chunk * the insertion based off the schedules that belong to each resourceType. This would create * one insert statement for each type of resource in system. The complication here, however, * is that you may have millions of resources of one type, but hardly any resources of another. * So there's still a chance that some insertions proceed slowly (in the worst case). * * In any event, an appropriate chunking solution needs to be found, and that partitioning strategy * needs to replace the limits in the query today. */ int inserted = measurementBaselineManager._calculateAutoBaselinesINSERT(amountOfData); totalInserted += inserted; // since we're batch 100K inserts at a time, we're done if we didn't have that many to // insert if (inserted < 100000) { break; } } log.info( "Calculated and inserted [" + totalInserted + "] new baselines. (" + (System.currentTimeMillis() - now) + ")ms"); MeasurementMonitor.getMBean() .incrementBaselineCalculationTime(System.currentTimeMillis() - computeTime); agentStatusManager.updateByAutoBaselineCalculationJob(); return computeTime; } catch (Exception e) { log.error("Failed to auto-calculate baselines", e); throw new RuntimeException("Auto-calculation failure", e); } }