/** * A base class for {@link JobGenerator} implementations. It is recommended to extend this class to * implement a new job generator. * * <p>The base algorithm iterates over domain configurations within the harvest definition, and * according to the configuration ({@link HarvesterSettings#JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE}, * constitutes a subset of domain configurations from which one or more jobs will be generated. */ abstract class AbstractJobGenerator implements JobGenerator { /** Logger for this class. */ private static Log log = LogFactory.getLog(AbstractJobGenerator.class); /** How many domain configurations to process in one go. */ private final long DOMAIN_CONFIG_SUBSET_SIZE = Settings.getLong(HarvesterSettings.JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE); /** Is deduplication enabled or disabled. * */ private final boolean DEDUPLICATION_ENABLED = Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED); @Override public int generateJobs(HarvestDefinition harvest) { log.info("Generating jobs for harvestdefinition # " + harvest.getOid()); int jobsMade = 0; final Iterator<DomainConfiguration> domainConfigurations = harvest.getDomainConfigurations(); while (domainConfigurations.hasNext()) { List<DomainConfiguration> subset = new ArrayList<DomainConfiguration>(); while (domainConfigurations.hasNext() && subset.size() < DOMAIN_CONFIG_SUBSET_SIZE) { subset.add(domainConfigurations.next()); } Collections.sort(subset, getDomainConfigurationSubsetComparator(harvest)); if (log.isTraceEnabled()) { log.trace( subset.size() + " domainconfigs now sorted and ready to processing " + "for harvest #" + harvest.getOid()); } jobsMade += processDomainConfigurationSubset(harvest, subset.iterator()); } harvest.setNumEvents(harvest.getNumEvents() + 1); if (!harvest.isSnapShot()) { PartialHarvest focused = (PartialHarvest) harvest; Schedule schedule = focused.getSchedule(); int numEvents = harvest.getNumEvents(); // Calculate next event Date now = new Date(); Date nextEvent = schedule.getNextEvent(focused.getNextDate(), numEvents); // Refuse to schedule event in the past if (nextEvent != null && nextEvent.before(now)) { int eventsSkipped = 0; while (nextEvent != null && nextEvent.before(now)) { nextEvent = schedule.getNextEvent(nextEvent, numEvents); eventsSkipped++; } if (log.isWarnEnabled()) { log.warn( "Refusing to schedule harvest definition '" + harvest.getName() + "' in the past. Skipped " + eventsSkipped + " events. Old nextDate was " + focused.getNextDate() + " new nextDate is " + nextEvent); } } // Set next event focused.setNextDate(nextEvent); if (log.isTraceEnabled()) { log.trace( "Next event for harvest definition " + harvest.getName() + " happens: " + (nextEvent == null ? "Never" : nextEvent.toString())); } } log.info( "Finished generating " + jobsMade + " jobs for harvestdefinition # " + harvest.getOid()); return jobsMade; } /** * Instantiates a new job. * * @param cfg the {@link DomainConfiguration} being processed * @param harvest the {@link HarvestDefinition} being processed * @return an instance of {@link Job} */ public static Job getNewJob(HarvestDefinition harvest, DomainConfiguration cfg) { HarvestChannelDAO harvestChannelDao = HarvestChannelDAO.getInstance(); HarvestChannel channel = harvestChannelDao.getChannelForHarvestDefinition(harvest.getOid()); if (channel == null) { log.info( "No channel mapping registered for harvest id " + harvest.getOid() + ", will use default."); channel = harvestChannelDao.getDefaultChannel(harvest.isSnapShot()); } if (harvest.isSnapShot()) { return Job.createSnapShotJob( harvest.getOid(), channel, cfg, harvest.getMaxCountObjects(), harvest.getMaxBytes(), ((FullHarvest) harvest).getMaxJobRunningTime(), harvest.getNumEvents()); } return Job.createJob(harvest.getOid(), channel, cfg, harvest.getNumEvents()); } /** * Returns a comparator used to sort the subset of {@link #DOMAIN_CONFIG_SUBSET_SIZE} * configurations that are scanned at each iteration. * * @param harvest the {@link HarvestDefinition} being processed. * @return a comparator */ protected abstract Comparator<DomainConfiguration> getDomainConfigurationSubsetComparator( HarvestDefinition harvest); /** * Create new jobs from a collection of configurations. All configurations must use the same * order.xml file.Jobs * * @param harvest the {@link HarvestDefinition} being processed. * @param domainConfSubset the configurations to use to create the jobs * @return The number of jobs created * @throws ArgumentNotValid if any of the parameters is null or if the cfglist does not contain * any configurations */ protected abstract int processDomainConfigurationSubset( HarvestDefinition harvest, Iterator<DomainConfiguration> domainConfSubset); @Override public boolean canAccept(Job job, DomainConfiguration cfg) { if (!checkAddDomainConfInvariant(job, cfg)) { return false; } return checkSpecificAcceptConditions(job, cfg); } /** * Called by {@link #canAccept(Job, DomainConfiguration)}. Tests the implementation-specific * conditions to accept the given {@link DomainConfiguration} in the given {@link Job}. It is * assumed that {@link #checkAddDomainConfInvariant(Job, DomainConfiguration)} has already passed. * * @param job the {@link Job} n=being built * @param cfg the {@link DomainConfiguration} to test * @return true if the configuration passes the conditions. */ protected abstract boolean checkSpecificAcceptConditions(Job job, DomainConfiguration cfg); /** * Once the job has been filled with {@link DomainConfiguration}s, performs the following * operations: * * <ol> * <li>Edit the harvest template to add/remove deduplicator configuration. * <li> * </ol> * * @param job the job */ protected void editJobOrderXml(Job job) { Document doc = job.getOrderXMLdoc(); if (DEDUPLICATION_ENABLED) { // Check that the Deduplicator element is present in the // OrderXMl and enabled. If missing or disabled log a warning if (!HeritrixTemplate.isDeduplicationEnabledInTemplate(doc)) { if (log.isWarnEnabled()) { log.warn( "Unable to perform deduplication for this job" + " as the required DeDuplicator element is " + "disabled or missing from template"); } } } else { // Remove deduplicator Element from OrderXML if present Node xpathNode = doc.selectSingleNode(HeritrixTemplate.DEDUPLICATOR_XPATH); if (xpathNode != null) { xpathNode.detach(); job.setOrderXMLDoc(doc); if (log.isInfoEnabled()) { log.info("Removed DeDuplicator element because " + "Deduplication is disabled"); } } } } /** * Tests that: * * <ol> * <li>The given domain configuration and job are not null. * <li>The job does not already contain the given domain configuration. * <li>The domain configuration has the same order xml name as the first inserted domain config. * </ol> * * @param job a given Job * @param cfg a given DomainConfiguration * @return true, if the given DomainConfiguration can be inserted into the given job */ private boolean checkAddDomainConfInvariant(Job job, DomainConfiguration cfg) { ArgumentNotValid.checkNotNull(job, "job"); ArgumentNotValid.checkNotNull(cfg, "cfg"); // check if domain in DomainConfiguration cfg is not already in this job // domainName is used as key in domainConfigurationMap if (job.getDomainConfigurationMap().containsKey(cfg.getDomainName())) { if (log.isDebugEnabled()) { log.debug("Job already has a configuration for Domain '" + cfg.getDomainName() + "'."); } return false; } // check if template is same as this job. String orderXMLname = job.getOrderXMLName(); if (!orderXMLname.equals(cfg.getOrderXmlName())) { if (log.isDebugEnabled()) { log.debug( "This Job only accept configurations " + "using the harvest template '" + orderXMLname + "'. This configuration uses the harvest template '" + cfg.getOrderXmlName() + "'."); } return false; } return true; } }
/** * This class implements a generator for an history chart of a running job. The chart traces the * progress percentage and the queued URI count over the crawl time. Charts are rendered in a PNG * image file, generated in the webapp directory. */ class StartedJobHistoryChartGen { /** Time units used to scale the crawl time values and generate the chart's time axis ticks. */ protected static enum TimeAxisResolution { /** One second. Tick step is 10s. */ second(1, 1, 10), /** One minute. Tick step is 5m. */ minute(60, 60, 5), /** One hour. Tick step is 1h. */ hour(60 * minute.seconds, 60 * minute.seconds, 1), /** Twelve hours. Tick step is 2h. */ half_day(12 * 60 * minute.seconds, 60 * minute.seconds, 2), /** One day. Tick step is 0.5d. */ day(24 * hour.seconds, 24 * hour.seconds, 0.5), /** One week. Tick step is 1w. */ week(7 * day.seconds, 7 * day.seconds, 1); /** The time unit in seconds. */ private final int seconds; /** The scale in seconds. */ private final int scaleSeconds; /** The step between two tick units. */ private final double tickStep; /** * Builds a time axis resolution. * * @param seconds the actual resolution in seconds * @param scaleSeconds the actual "scale" of ticks * @param tickStep the number of ticks in one step. */ TimeAxisResolution(int seconds, int scaleSeconds, double tickStep) { this.seconds = seconds; this.scaleSeconds = scaleSeconds; this.tickStep = tickStep; } /** * Scale down an array of seconds. * * @param timeInSeconds An array of seconds * @return a scaled down version of the given array of seconds */ double[] scale(double[] timeInSeconds) { double[] scaledTime = new double[timeInSeconds.length]; for (int i = 0; i < timeInSeconds.length; i++) { scaledTime[i] = timeInSeconds[i] / this.scaleSeconds; } return scaledTime; } /** * @param seconds the seconds * @return the proper timeUnit for the given argument */ static TimeAxisResolution findTimeUnit(double seconds) { TimeAxisResolution[] allTus = values(); for (int i = 0; i < allTus.length - 1; i++) { TimeAxisResolution nextGreater = allTus[i + 1]; if (seconds < nextGreater.seconds) { return allTus[i]; } } return week; // largest unit } } /** A chart generation task. Generates a PNG image for a job progress history. */ private static class ChartGen implements Runnable { /** The process that generates the Charts. */ private final StartedJobHistoryChartGen gen; /** * Constructor of a ChartGen objector. * * @param gen the process that generates the charts. */ ChartGen(StartedJobHistoryChartGen gen) { super(); this.gen = gen; } @Override public void run() { synchronized (gen) { gen.chartFile = null; } long jobId = gen.jobId; StartedJobInfo[] fullHistory = RunningJobsInfoDAO.getInstance().getFullJobHistory(jobId); LinkedList<Double> timeValues = new LinkedList<Double>(); LinkedList<Double> progressValues = new LinkedList<Double>(); LinkedList<Double> urlValues = new LinkedList<Double>(); for (StartedJobInfo sji : fullHistory) { timeValues.add((double) sji.getElapsedSeconds()); progressValues.add(sji.getProgress()); urlValues.add((double) sji.getQueuedFilesCount()); } // Refresh the history png image for the job. File pngFile = new File(gen.outputFolder, jobId + "-history.png"); File newPngFile; try { newPngFile = File.createTempFile(jobId + "-history", "." + System.currentTimeMillis() + ".png"); } catch (IOException e) { LOG.warn("Failed to create temp PNG file for job " + jobId); return; } long startTime = System.currentTimeMillis(); gen.generatePngChart( newPngFile, CHART_RESOLUTION[0], CHART_RESOLUTION[1], null, // no chart title I18N.getString(gen.locale, "running.job.details.chart.legend.crawlTime"), new String[] { I18N.getString(gen.locale, "running.job.details.chart.legend.progress"), I18N.getString(gen.locale, "running.job.details.chart.legend.queuedUris") }, NumberUtils.toPrimitiveArray(timeValues), new double[][] {new double[] {0, 100}, null}, new double[][] { NumberUtils.toPrimitiveArray(progressValues), NumberUtils.toPrimitiveArray(urlValues) }, new Color[] {Color.blue, Color.green.darker()}, new String[] {"%", ""}, false, Color.lightGray.brighter().brighter()); long genTime = System.currentTimeMillis() - startTime; LOG.info( "Generated history chart for job " + jobId + " in " + (genTime < TimeUtils.SECOND_IN_MILLIS ? genTime + " ms" : StringUtils.formatDuration(genTime / TimeUtils.SECOND_IN_MILLIS)) + "."); synchronized (gen) { // Overwrite old file, then delete temp file try { FileUtils.copyFile(newPngFile, pngFile); FileUtils.remove(newPngFile); } catch (IOFailure iof) { LOG.error("IOFailure while copying PNG file", iof); } gen.chartFile = pngFile; } } } /** The class logger. */ static final Log LOG = LogFactory.getLog(StartedJobHistoryChartGen.class); /** Internationalisation object. */ private static final I18n I18N = new I18n(dk.netarkivet.harvester.Constants.TRANSLATIONS_BUNDLE); /** Rate in seconds at which history charts should be generated. */ private static final long GEN_INTERVAL = Settings.getLong(HarvesterSettings.HARVEST_MONITOR_HISTORY_CHART_GEN_INTERVAL); /** The chart image resolution. */ private static final int[] CHART_RESOLUTION = new int[] {600, 450}; /** The dimension of the chart axis. */ private static final double CHART_AXIS_DIMENSION = 10.0; /** The relative path of the output. */ private static final String OUTPUT_REL_PATH = "History" + File.separator + "webapp"; /** The job id. */ private final long jobId; /** The folder where image files are output. */ private final File outputFolder; /** The chart image file. */ private File chartFile = null; /** The locale for internationalizing the chart. The locale is set to the system default. */ private final Locale locale; /** The process controlling the cyclic regeneration of charts. */ private PeriodicTaskExecutor genExec = null; /** * Constructor. Start generating charts for data belonging to the given job. * * @param jobId a job id. */ StartedJobHistoryChartGen(long jobId) { super(); this.outputFolder = new File(FileUtils.getTempDir() + File.separator + OUTPUT_REL_PATH); this.jobId = jobId; // Set the locale to the system default this.locale = Locale.getDefault(); genExec = new PeriodicTaskExecutor("ChartGen", new ChartGen(this), 0, GEN_INTERVAL); } /** * Returns the image file. * * @return the image file. Might return null if no file is currently available. */ public synchronized File getChartFile() { return chartFile; } /** Deletes the chart image if it exists and stops the generation schedule. */ public void cleanup() { if (chartFile != null && chartFile.exists()) { if (!chartFile.delete()) { chartFile.deleteOnExit(); } } if (genExec != null) { genExec.shutdown(); } } /** * Generates a chart in PNG format. * * @param outputFile the output file, it should exist. * @param pxWidth the image width in pixels. * @param pxHeight the image height in pixels. * @param chartTitle the chart title, may be null. * @param xAxisTitle the x axis title * @param yDataSeriesRange the axis range (null for auto) * @param yDataSeriesTitles the Y axis titles. * @param timeValuesInSeconds the time values in seconds * @param yDataSeries the Y axis value series. * @param yDataSeriesColors the Y axis value series drawing colors. * @param yDataSeriesTickSuffix TODO explain argument yDataSeriesTickSuffix * @param drawBorder draw, or not, the border. * @param backgroundColor the chart background color. */ final void generatePngChart( File outputFile, int pxWidth, int pxHeight, String chartTitle, String xAxisTitle, String[] yDataSeriesTitles, double[] timeValuesInSeconds, double[][] yDataSeriesRange, double[][] yDataSeries, Color[] yDataSeriesColors, String[] yDataSeriesTickSuffix, boolean drawBorder, Color backgroundColor) { // Domain axis NumberAxis xAxis = new NumberAxis(xAxisTitle); xAxis.setFixedDimension(CHART_AXIS_DIMENSION); xAxis.setLabelPaint(Color.black); xAxis.setTickLabelPaint(Color.black); double maxSeconds = getMaxValue(timeValuesInSeconds); TimeAxisResolution xAxisRes = TimeAxisResolution.findTimeUnit(maxSeconds); xAxis.setTickUnit(new NumberTickUnit(xAxisRes.tickStep)); double[] scaledTimeValues = xAxisRes.scale(timeValuesInSeconds); String tickSymbol = I18N.getString(locale, "running.job.details.chart.timeunit.symbol." + xAxisRes.name()); xAxis.setNumberFormatOverride(new DecimalFormat("###.##'" + tickSymbol + "'")); // First dataset String firstDataSetTitle = yDataSeriesTitles[0]; XYDataset firstDataSet = createXYDataSet(firstDataSetTitle, scaledTimeValues, yDataSeries[0]); Color firstDataSetColor = yDataSeriesColors[0]; // First range axis NumberAxis firstYAxis = new NumberAxis(firstDataSetTitle); firstYAxis.setFixedDimension(CHART_AXIS_DIMENSION); setAxisRange(firstYAxis, yDataSeriesRange[0]); firstYAxis.setLabelPaint(firstDataSetColor); firstYAxis.setTickLabelPaint(firstDataSetColor); String firstAxisTickSuffix = yDataSeriesTickSuffix[0]; if (firstAxisTickSuffix != null && !firstAxisTickSuffix.isEmpty()) { firstYAxis.setNumberFormatOverride(new DecimalFormat("###.##'" + firstAxisTickSuffix + "'")); } // Create the plot with domain axis and first range axis XYPlot plot = new XYPlot(firstDataSet, xAxis, firstYAxis, null); XYLineAndShapeRenderer firstRenderer = new XYLineAndShapeRenderer(true, false); plot.setRenderer(firstRenderer); plot.setOrientation(PlotOrientation.VERTICAL); plot.setBackgroundPaint(Color.lightGray); plot.setDomainGridlinePaint(Color.white); plot.setRangeGridlinePaint(Color.white); plot.setAxisOffset(new RectangleInsets(5.0, 5.0, 5.0, 5.0)); firstRenderer.setSeriesPaint(0, firstDataSetColor); // Now iterate on next axes for (int i = 1; i < yDataSeries.length; i++) { // Create axis String seriesTitle = yDataSeriesTitles[i]; Color seriesColor = yDataSeriesColors[i]; NumberAxis yAxis = new NumberAxis(seriesTitle); yAxis.setFixedDimension(CHART_AXIS_DIMENSION); setAxisRange(yAxis, yDataSeriesRange[i]); yAxis.setLabelPaint(seriesColor); yAxis.setTickLabelPaint(seriesColor); String yAxisTickSuffix = yDataSeriesTickSuffix[i]; if (yAxisTickSuffix != null && !yAxisTickSuffix.isEmpty()) { yAxis.setNumberFormatOverride(new DecimalFormat("###.##'" + yAxisTickSuffix + "'")); } // Create dataset and add axis to plot plot.setRangeAxis(i, yAxis); plot.setRangeAxisLocation(i, AxisLocation.BOTTOM_OR_LEFT); plot.setDataset(i, createXYDataSet(seriesTitle, scaledTimeValues, yDataSeries[i])); plot.mapDatasetToRangeAxis(i, i); XYItemRenderer renderer = new StandardXYItemRenderer(); renderer.setSeriesPaint(0, seriesColor); plot.setRenderer(i, renderer); } // Create the chart JFreeChart chart = new JFreeChart(chartTitle, JFreeChart.DEFAULT_TITLE_FONT, plot, false); // Customize rendering chart.setBackgroundPaint(Color.white); chart.setBorderVisible(true); chart.setBorderPaint(Color.BLACK); // Render image try { ChartUtilities.saveChartAsPNG(outputFile, chart, pxWidth, pxHeight); } catch (IOException e) { LOG.error("Chart export failed", e); } } /** * Create a XYDataset based on the given arguments. * * @param name The name * @param timeValues The timevalues * @param values the values * @return a DefaultXYDataset. */ private XYDataset createXYDataSet(String name, double[] timeValues, double[] values) { DefaultXYDataset ds = new DefaultXYDataset(); ds.addSeries(name, new double[][] {timeValues, values}); return ds; } /** * Find the maximum of the values given. If this maximum is less than {@link Double#MIN_VALUE} * then {@link Double#MIN_VALUE} is returned. * * @param values an array of doubles * @return the maximum of the values given */ private double getMaxValue(double[] values) { double max = Double.MIN_VALUE; for (double v : values) { max = Math.max(v, max); } return max; } /** * Set the axis range. * * @param axis a numberAxis * @param range a range */ private void setAxisRange(NumberAxis axis, double[] range) { if (range == null || range.length != 2) { axis.setAutoRange(true); } else { double lower = range[0]; double upper = range[1]; ArgumentNotValid.checkTrue(lower < upper, "Incorrect range"); axis.setAutoRange(false); axis.setRange(new Range(lower, upper)); } } }