/** * Set the maxbytes per domain value. * * @param maxBytesPerDomain The maxBytesPerDomain to set, or -1 for no limit. */ protected void setMaxBytesPerDomain(long maxBytesPerDomain) { if (!underConstruction) { final String msg = "Cannot modify job " + this + " as it is no longer under construction"; log.debug(msg); throw new IllegalState(msg); } this.forceMaxBytesPerDomain = maxBytesPerDomain; orderXMLdoc.setMaxBytesPerDomain(maxBytesPerDomain); if (0L == maxBytesPerDomain && 0L != forceMaxObjectsPerDomain) { setMaxObjectsPerDomain(0L); } }
/** * Package private constructor for common initialisation. * * @param harvestID the id of the harvestdefinition * @param cfg the configuration to base the Job on * @param orderXMLdoc * @param channel the channel on which the job will be submitted. * @param forceMaxObjectsPerDomain the maximum number of objects harvested from a domain, * overrides individual configuration settings. -1 means no limit * @param forceMaxBytesPerDomain The maximum number of objects harvested from a domain, or -1 for * no limit. * @param forceMaxJobRunningTime The max time in seconds given to the harvester for this job * @param harvestNum the run number of the harvest definition * @throws ArgumentNotValid if cfg or priority is null or harvestID is invalid, or if any limit < * -1 */ public Job( Long harvestID, DomainConfiguration cfg, HeritrixTemplate orderXMLdoc, HarvestChannel channel, long forceMaxObjectsPerDomain, long forceMaxBytesPerDomain, long forceMaxJobRunningTime, int harvestNum) throws ArgumentNotValid { ArgumentNotValid.checkNotNull(cfg, "cfg"); ArgumentNotValid.checkNotNull(harvestID, "harvestID"); ArgumentNotValid.checkNotNegative(harvestID, "harvestID"); ArgumentNotValid.checkNotNull(channel, "channel"); if (forceMaxObjectsPerDomain < -1) { String msg = "forceMaxObjectsPerDomain must be either -1 or positive"; log.debug(msg); throw new ArgumentNotValid(msg); } if (forceMaxBytesPerDomain < -1) { String msg = "forceMaxBytesPerDomain must be either -1 or positive"; log.debug(msg); throw new ArgumentNotValid(msg); } if (forceMaxBytesPerDomain == 0L) { log.warn( "forceMaxBytesPerDomain should probably not be 0.Means 0 bytes downloaded per domain"); } if (forceMaxObjectsPerDomain == 0L) { log.warn( "forceMaxObjectsPerDomain should probably not be 0.Means 0 objects downloaded per domain"); } // setup initial members domainConfigurationMap = new HashMap<>(); origHarvestDefinitionID = harvestID; orderXMLname = cfg.getOrderXmlName(); this.orderXMLdoc = orderXMLdoc; setHarvestChannel(channel); long maxObjects = NumberUtils.minInf(forceMaxObjectsPerDomain, cfg.getMaxObjects()); setMaxObjectsPerDomain(maxObjects); configurationSetsObjectLimit = (maxObjects != forceMaxObjectsPerDomain); long maxBytes = NumberUtils.minInf(forceMaxBytesPerDomain, cfg.getMaxBytes()); setMaxBytesPerDomain(maxBytes); configurationSetsByteLimit = (maxBytes != forceMaxBytesPerDomain); long expectation = cfg.getExpectedNumberOfObjects(forceMaxObjectsPerDomain, forceMaxBytesPerDomain); maxCountObjects = expectation; minCountObjects = expectation; this.harvestNum = harvestNum; addConfiguration(cfg); setMaxJobRunningTime(forceMaxJobRunningTime); setArchiveFormatInTemplate(Settings.get(HarvesterSettings.HERITRIX_ARCHIVE_FORMAT)); setAttributes(cfg.getAttributesAndTypes()); orderXMLdoc.enableOrDisableDeduplication( Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED)); status = JobStatus.NEW; }