/** * Once the job has been filled with {@link DomainConfiguration}s, performs the following * operations: * * <ol> * <li>Edit the harvest template to add/remove deduplicator configuration. * <li> * </ol> * * @param job the job */ protected void editJobOrderXml(Job job) { Document doc = job.getOrderXMLdoc(); if (DEDUPLICATION_ENABLED) { // Check that the Deduplicator element is present in the // OrderXMl and enabled. If missing or disabled log a warning if (!HeritrixTemplate.isDeduplicationEnabledInTemplate(doc)) { if (log.isWarnEnabled()) { log.warn( "Unable to perform deduplication for this job" + " as the required DeDuplicator element is " + "disabled or missing from template"); } } } else { // Remove deduplicator Element from OrderXML if present Node xpathNode = doc.selectSingleNode(HeritrixTemplate.DEDUPLICATOR_XPATH); if (xpathNode != null) { xpathNode.detach(); job.setOrderXMLDoc(doc); if (log.isInfoEnabled()) { log.info("Removed DeDuplicator element because " + "Deduplication is disabled"); } } } }
/** * Tests that: * * <ol> * <li>The given domain configuration and job are not null. * <li>The job does not already contain the given domain configuration. * <li>The domain configuration has the same order xml name as the first inserted domain config. * </ol> * * @param job a given Job * @param cfg a given DomainConfiguration * @return true, if the given DomainConfiguration can be inserted into the given job */ private boolean checkAddDomainConfInvariant(Job job, DomainConfiguration cfg) { ArgumentNotValid.checkNotNull(job, "job"); ArgumentNotValid.checkNotNull(cfg, "cfg"); // check if domain in DomainConfiguration cfg is not already in this job // domainName is used as key in domainConfigurationMap if (job.getDomainConfigurationMap().containsKey(cfg.getDomainName())) { if (log.isDebugEnabled()) { log.debug("Job already has a configuration for Domain '" + cfg.getDomainName() + "'."); } return false; } // check if template is same as this job. String orderXMLname = job.getOrderXMLName(); if (!orderXMLname.equals(cfg.getOrderXmlName())) { if (log.isDebugEnabled()) { log.debug( "This Job only accept configurations " + "using the harvest template '" + orderXMLname + "'. This configuration uses the harvest template '" + cfg.getOrderXmlName() + "'."); } return false; } return true; }
/** * Instantiates a new job. * * @param cfg the {@link DomainConfiguration} being processed * @param harvest the {@link HarvestDefinition} being processed * @return an instance of {@link Job} */ public static Job getNewJob(HarvestDefinition harvest, DomainConfiguration cfg) { HarvestChannelDAO harvestChannelDao = HarvestChannelDAO.getInstance(); HarvestChannel channel = harvestChannelDao.getChannelForHarvestDefinition(harvest.getOid()); if (channel == null) { log.info( "No channel mapping registered for harvest id " + harvest.getOid() + ", will use default."); channel = harvestChannelDao.getDefaultChannel(harvest.isSnapShot()); } if (harvest.isSnapShot()) { return Job.createSnapShotJob( harvest.getOid(), channel, cfg, harvest.getMaxCountObjects(), harvest.getMaxBytes(), ((FullHarvest) harvest).getMaxJobRunningTime(), harvest.getNumEvents()); } return Job.createJob(harvest.getOid(), channel, cfg, harvest.getNumEvents()); }