Java Settings примеры использования

Язык программирования: Java

Пространство имен/Пакет: dk.netarkivet.common.utils

Класс/Тип: Settings

Примеров на hotexamples.com: 17

Java Settings - 17 примеров найдено. Это лучшие примеры Java кода для dk.netarkivet.common.utils.Settings, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

set(9)

get(4)

getBoolean(3)

getLong(2)

getInt(1)

Пример #1

Показать файл

Файл: BitarchiveTesterCTOR.java Проект: netarchivesuite/netarchivesuite

  /**
   * Create bitarchive with access denied to location of admin data verify that exceptions are
   * thrown
   */
  @Test
  public void testAccessDenied() {
    // Make sure archive exists
    assertTrue("Inaccessible archive dir must exist", NOACCESS_ARCHIVE_DIR.exists());

    if (NOACCESS_ARCHIVE_DIR.canWrite()) {
      NOACCESS_ARCHIVE_DIR.setReadOnly();
    }

    // and that admin file is inaccessible
    assertFalse(
        "Must not be able to write to inaccessible admin file", NOACCESS_ARCHIVE_DIR.canWrite());

    try {
      Settings.set(
          ArchiveSettings.BITARCHIVE_SERVER_FILEDIR, NOACCESS_ARCHIVE_DIR.getAbsolutePath());
      Bitarchive ba = Bitarchive.getInstance();
      ba.close();
      fail("Accessing read-only archive should throw exception"); // do not come here
    } catch (PermissionDenied e) {
      // Expected case
      StringAsserts.assertStringContains(
          "Should mention noaccess dir", "noaccess/filedir", e.getMessage());
    }
  }

Пример #2

Показать файл

Файл: BatchGUITester.java Проект: netarchivesuite/netarchivesuite-svngit-migration

  /** FIXME Fails in Hudson */
  public void failingTestExecute() {
    File arcFile = new File(TestInfo.BATCH_DIR, "MimeUrlSearch.jar");
    assertTrue(arcFile.isFile());

    Settings.set(
        "settings.common.batch.batchjobs.batchjob.class",
        "dk.netarkivet.common.utils.batch.UrlSearch");
    Settings.set("settings.common.batch.batchjobs.batchjob.jarfile", arcFile.getAbsolutePath());

    MockHttpServletRequest request =
        new MockHttpServletRequest() {
          @Override
          public Locale getLocale() {
            return new Locale("en");
          }

          @Override
          public int getRemotePort() {
            return 0; // To change body of implemented methods use File | Settings | File Templates.
          }

          @Override
          public String getLocalName() {
            return null; // To change body of implemented methods use File | Settings | File
                         // Templates.
          }

          @Override
          public String getLocalAddr() {
            return null; // To change body of implemented methods use File | Settings | File
                         // Templates.
          }

          @Override
          public int getLocalPort() {
            return 0; // To change body of implemented methods use File | Settings | File Templates.
          }
        };
    request.setupAddParameter(Constants.FILETYPE_PARAMETER, BatchFileType.Metadata.toString());
    request.setupAddParameter(Constants.JOB_ID_PARAMETER, "1234567890");
    request.setupAddParameter(
        Constants.BATCHJOB_PARAMETER, "dk.netarkivet.common.utils.batch.UrlSearch");
    request.setupAddParameter(Constants.REPLICA_PARAMETER, "BarOne");
    request.setupAddParameter("arg1", "DUMMY-ARG");
    request.setupAddParameter("arg2", "url");
    request.setupAddParameter("arg3", "mimetype");

    Locale l = new Locale("en");
    JspWriterMockup out = new JspWriterMockup();

    PageContext context = new WebinterfaceTestCase.TestPageContext(request, out, l);
    BatchGUI.execute(context);
  }

Пример #3

Показать файл

Файл: CreateIndexTester.java Проект: netarchivesuite/netarchivesuite

 @Before
 public void setUp() {
   rs.setUp();
   mjms.setUp();
   listener = new CreateIndexListener();
   JMSConnectionFactory.getInstance().setListener(Channels.getTheIndexServer(), listener);
   Settings.set(
       CommonSettings.REMOTE_FILE_CLASS, "dk.netarkivet.common.distribute.NullRemoteFile");
   Settings.set(CommonSettings.CACHE_DIR, TestInfo.CACHE_DIR.getPath());
   mtf.setUp();
   pss.setUp();
   pse.setUp();
 }

Пример #4

Показать файл

Файл: DatabaseAdminTester.java Проект: netarchivesuite/netarchivesuite-svngit-migration

  public void setUp() throws Exception {
    ChannelsTester.resetChannels();
    rs.setUp();
    mtf.setUp();
    utrf.setUp();

    JMSConnectionMockupMQ.useJMSConnectionMockupMQ();

    DatabaseTestUtils.takeDatabase(TestInfo.DATABASE_FILE, TestInfo.DATABASE_DIR);

    // define the settings for accessing the database
    Settings.set(ArchiveSettings.BASEURL_ARCREPOSITORY_ADMIN_DATABASE, TestInfo.DATABASE_URL);
    Settings.set(ArchiveSettings.MACHINE_ARCREPOSITORY_ADMIN_DATABASE, "");
    Settings.set(ArchiveSettings.PORT_ARCREPOSITORY_ADMIN_DATABASE, "");
    Settings.set(ArchiveSettings.DIR_ARCREPOSITORY_ADMIN_DATABASE, "");

    Settings.set(CommonSettings.NOTIFICATIONS_CLASS, PrintNotifications.class.getName());
  }

Пример #5

Показать файл

Файл: IndexAggregator.java Проект: netarchivesuite/netarchivesuite-svngit-migration

  /**
   * Calls the Unix sort command with the options <code>$filesNames -o
   * $outputfile -T WaybackSettings#WAYBACK_AGGREGATOR_TEMP_DIR.
   *
   * Sets the LC_ALL environment variable before making the call.
   *
   * @param files The files to merge and sort
   * @param outputFile The resulting sorted file
   * @param additionalArgs A list af extra arguments, which (if different from
   *                       null) are added to the sort call.<p> Note: If any
   *                       of the args contain a whitespace the call will
   *                       fail.
   */
  private void processFiles(File[] files, File outputFile, List<String> additionalArgs) {
    if (files.length == 0) {
      // Empty file list will cause sort to wait for further input,
      // and the call will therefore never return
      return;
    }

    Process p = null;

    try {
      List<String> inputFileList = new LinkedList<String>();
      for (int i = 0; i < files.length; i++) {
        if (files[i].exists() && files[i].isFile()) {
          inputFileList.add(files[i].getCanonicalPath());
        } else {
          log.warn(
              "File "
                  + files[i]
                  + " doesn't exist or isn't a regular file, "
                  + "dropping from list of files to "
                  + "sort and merge");
        }
      }
      List<String> cmd = new LinkedList<String>();
      // Prepare to run the unix sort command, see sort manual page for
      // details
      cmd.add("sort");
      cmd.addAll(inputFileList);
      cmd.add("-o");
      cmd.add(outputFile.getCanonicalPath());
      cmd.add("-T");
      cmd.add(Settings.get(WaybackSettings.WAYBACK_AGGREGATOR_TEMP_DIR));
      if (additionalArgs != null && !additionalArgs.isEmpty()) {
        for (String argument : additionalArgs) {
          ArgumentNotValid.checkTrue(
              argument.indexOf(' ') == -1,
              "The argument '" + argument + "' contains spaces, this isn't allowed ");
        }
        cmd.addAll(additionalArgs);
      }
      ProcessBuilder pb = new ProcessBuilder(cmd);
      // Reset all locale definitions
      pb.environment().put("LC_ALL", "C");
      // Run the command in the user.dir directory
      pb.directory(new File(System.getProperty("user.dir")));
      p = pb.start();
      p.waitFor();
      if (p.exitValue() != 0) {
        log.error("Failed to sort index files, sort exited with " + "return code " + p.exitValue());
      }
    } catch (Exception e) {
      log.error("Failed to aggregate indexes ", e);
    }
  }

Пример #6

Показать файл

Файл: UploadTester.java Проект: netarchivesuite/netarchivesuite

 @Before
 public void setUp() {
   rs.setUp();
   ulrf.setUp();
   mjms.setUp();
   mtf.setUp();
   pss.setUp();
   pse.setUp();
   marc.setUp();
   Settings.set(CommonSettings.NOTIFICATIONS_CLASS, RememberNotifications.class.getName());
 }

Пример #7

Показать файл

Файл: BitarchiveTesterCTOR.java Проект: netarchivesuite/netarchivesuite

 /** Create bitarchive from scratch, no admin data and log files exists */
 @Test
 public void testFromScratch() {
   LogbackRecorder lr = LogbackRecorder.startRecorder();
   assertFalse("No bitarchive should exist before creating it", NEW_ARCHIVE_DIR.exists());
   // Create new test archive and close it
   Settings.set(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR, NEW_ARCHIVE_DIR.getAbsolutePath());
   Bitarchive ba = Bitarchive.getInstance();
   ba.close();
   // verify that the directory, admin and log files are created
   assertTrue("The archive dir should exist after creation", NEW_ARCHIVE_DIR.exists());
   assertTrue("Log file should exist after creation", !lr.isEmpty());
   lr.stopRecorder();
 }

Пример #8

Показать файл

Файл: BitpreserveFileStatusTester.java Проект: netarchivesuite/netarchivesuite

  @Before
  public void setUp() throws Exception {
    rs.setUp();
    mtf.setUp();
    JMSConnectionMockupMQ.useJMSConnectionMockupMQ();

    Settings.set(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN, TestInfo.WORKING_DIR.getAbsolutePath());

    if (!Replica.isKnownReplicaId("TWO") || !Replica.isKnownReplicaId("ONE")) {
      List<String> knownIds = new ArrayList<String>();

      fail(
          "These tests assume, that ONE and TWO are known replica ids. Only known replicas are: "
              + StringUtils.conjoin(", ", knownIds.toArray(Replica.getKnownIds())));
    }
    // super.setUp();
  }

Пример #9

Показать файл

Файл: IndexerTestCase.java Проект: netarchivesuite/netarchivesuite

public class IndexerTestCase {

  private String oldClient = System.getProperty(CommonSettings.ARC_REPOSITORY_CLIENT);
  private String oldFileDir = System.getProperty("settings.common.arcrepositoryClient.fileDir");
  protected static File tempdir = new File(Settings.get(WaybackSettings.WAYBACK_INDEX_TEMPDIR));

  ReloadSettings rs = new ReloadSettings();

  @Before
  public void setUp() {
    rs.setUp();
    System.setProperty(WaybackSettings.HIBERNATE_HBM2DDL_AUTO, "create-drop");
    HibernateUtil.getSession().getSessionFactory().close();
    FileUtils.removeRecursively(TestInfo.WORKING_DIR);
    TestFileUtils.copyDirectoryNonCVS(TestInfo.ORIGINALS_DIR, TestInfo.WORKING_DIR);
    System.setProperty(
        CommonSettings.ARC_REPOSITORY_CLIENT,
        "dk.netarkivet.common.distribute.arcrepository.LocalArcRepositoryClient");
    System.setProperty(
        "settings.common.arcrepositoryClient.fileDir", TestInfo.FILE_DIR.getAbsolutePath());
    System.setProperty(
        CommonSettings.REMOTE_FILE_CLASS, "dk.netarkivet.common.distribute.TestRemoteFile");
    assertTrue(
        ArcRepositoryClientFactory.getPreservationInstance() instanceof LocalArcRepositoryClient);
  }

  @After
  public void tearDown() {
    HibernateUtil.getSession().getSessionFactory().close();
    FileUtils.removeRecursively(TestInfo.WORKING_DIR);
    if (oldClient != null) {
      System.setProperty(CommonSettings.ARC_REPOSITORY_CLIENT, oldClient);
    } else {
      System.setProperty(CommonSettings.ARC_REPOSITORY_CLIENT, "");
    }
    if (oldFileDir != null) {
      System.setProperty("settings.common.arcrepositoryClient.fileDir", oldFileDir);
    } else {
      System.setProperty("settings.common.arcrepositoryClient.fileDir", "");
    }
    rs.tearDown();
  }
}

Пример #10

Показать файл

Файл: BatchGUITester.java Проект: netarchivesuite/netarchivesuite-svngit-migration

  public void setUp() {
    mtf.setUp();
    rs.setUp();

    Settings.set(CommonSettings.BATCHJOBS_BASEDIR, TestInfo.BATCH_DIR.getAbsolutePath());
  }

Пример #11

Показать файл

Файл: CrawlLogLinesMatchingRegexp.java Проект: netarchivesuite/netarchivesuite

/**
 * Batchjob that extracts lines from a crawl log matching a regular expression The batch job should
 * be restricted to run on metadata files for a specific job only, using the {@link
 * #processOnlyFilesMatching(String)} construct.
 */
@SuppressWarnings({"serial"})
public class CrawlLogLinesMatchingRegexp extends ArchiveBatchJob {

  /** The logger. */
  // private final Log log = LogFactory.getLog(getClass().getName());
  private static final Logger log = LoggerFactory.getLogger(CrawlLogLinesMatchingRegexp.class);

  /** Metadata URL for crawl logs. */
  private static final String SETUP_URL_FORMAT =
      String.format(
          "metadata://%s/crawl/logs/crawl.log", Settings.get(CommonSettings.ORGANIZATION));

  /** The regular expression to match in the crawl.log line. */
  private final String regexp;

  /**
   * Initialise the batch job.
   *
   * @param regexp The regexp to match in the crawl.log lines.
   */
  public CrawlLogLinesMatchingRegexp(String regexp) {
    ArgumentNotValid.checkNotNullOrEmpty(regexp, "regexp");
    this.regexp = regexp;

    /** One week in milliseconds. */
    batchJobTimeout = 7 * Constants.ONE_DAY_IN_MILLIES;
  }

  /**
   * Does nothing, no initialisation is needed.
   *
   * @param os Not used.
   */
  @Override
  public void initialize(OutputStream os) {}

  @Override
  public ArchiveBatchFilter getFilter() {
    return new ArchiveBatchFilter("OnlyCrawlLog") {
      public boolean accept(ArchiveRecordBase record) {
        String URL = record.getHeader().getUrl();
        if (URL == null) {
          return false;
        } else {
          return URL.startsWith(SETUP_URL_FORMAT);
        }
      }
    };
  }

  /**
   * Process a record on crawl log concerning the given domain to result.
   *
   * @param record The record to process.
   * @param os The output stream for the result.
   * @throws ArgumentNotValid on null parameters
   * @throws IOFailure on trouble processing the record.
   */
  @Override
  public void processRecord(ArchiveRecordBase record, OutputStream os) {
    ArgumentNotValid.checkNotNull(record, "ArchiveRecordBase record");
    ArgumentNotValid.checkNotNull(os, "OutputStream os");
    BufferedReader arcreader = new BufferedReader(new InputStreamReader(record.getInputStream()));
    try {
      for (String line = arcreader.readLine(); line != null; line = arcreader.readLine()) {
        if (line.matches(regexp)) {
          os.write(line.getBytes("UTF-8"));
          os.write('\n');
        }
      }
    } catch (IOException e) {
      throw new IOFailure("Unable to process (w)arc record", e);
    } finally {
      try {
        arcreader.close();
      } catch (IOException e) {
        log.warn("unable to close arcreader probably", e);
      }
    }
  }

  /**
   * Does nothing, no finishing is needed.
   *
   * @param os Not used.
   */
  @Override
  public void finish(OutputStream os) {}

  @Override
  public String toString() {
    return getClass().getName()
        + ", with arguments: Regexp = "
        + regexp
        + ", Filter = "
        + getFilter();
  }
}

Пример #12

Показать файл

Файл: BitpreserveFileStatusTester.java Проект: netarchivesuite/netarchivesuite

  @Test
  public void testProcessMissingRequest() throws Exception {

    Settings.set(
        ArchiveSettings.DIR_ARCREPOSITORY_BITPRESERVATION, TestInfo.WORKING_DIR.getAbsolutePath());
    Settings.set(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN, TestInfo.WORKING_DIR.getAbsolutePath());

    // Ensure that a admin data exists before we start.
    AdminData.getUpdateableInstance();

    MockFileBasedActiveBitPreservation mockabp = new MockFileBasedActiveBitPreservation();
    MockHttpServletRequest request = new MockHttpServletRequest();
    String replicaID1 = "ONE";
    String replicaID2 = "TWO";
    String filename1 = "foo";
    String filename2 = "bar";
    Locale defaultLocale = new Locale("da");

    // First test a working set of params
    Map<String, String[]> args = new HashMap<String, String[]>();
    args.put(
        ADD_COMMAND,
        new String[] {
          Replica.getReplicaFromId(replicaID1).getName() + STRING_FILENAME_SEPARATOR + filename1
        });
    request.setupAddParameter(
        ADD_COMMAND,
        new String[] {
          Replica.getReplicaFromId(replicaID1).getName() + STRING_FILENAME_SEPARATOR + filename1
        });
    args.put(GET_INFO_COMMAND, new String[] {filename1});
    request.setupAddParameter(GET_INFO_COMMAND, new String[] {filename1});
    args.put(BITARCHIVE_NAME_PARAM, new String[] {Replica.getReplicaFromId(replicaID1).getName()});
    request.setupAddParameter(
        BITARCHIVE_NAME_PARAM, new String[] {Replica.getReplicaFromId(replicaID1).getName()});
    request.setupGetParameterMap(args);
    request.setupGetParameterNames(new Vector<String>(args.keySet()).elements());
    Map<String, PreservationState> status =
        BitpreserveFileState.processMissingRequest(
            WebinterfaceTestCase.getDummyPageContext(defaultLocale, request), new StringBuilder());
    assertEquals("Should have one call to reestablish", 1, mockabp.getCallCount(ADD_METHOD));
    assertEquals(
        "Should have one call to getFilePreservationStatus",
        1,
        mockabp.getCallCount(GET_INFO_METHOD));
    assertEquals("Should have one info element (with mock results)", null, status.get(filename1));

    // Check that we can call without any params
    mockabp.calls.clear();
    request = new MockHttpServletRequest();
    args.clear();
    args.put(BITARCHIVE_NAME_PARAM, new String[] {Replica.getReplicaFromId(replicaID1).getName()});
    request.setupAddParameter(
        BITARCHIVE_NAME_PARAM, new String[] {Replica.getReplicaFromId(replicaID1).getName()});
    request.setupGetParameterMap(args);
    status =
        BitpreserveFileState.processMissingRequest(
            WebinterfaceTestCase.getDummyPageContext(defaultLocale, request), new StringBuilder());
    assertEquals("Should have no call to restablish", 0, mockabp.getCallCount(ADD_METHOD));
    assertEquals(
        "Should have no call to getFilePreservationStatus",
        0,
        mockabp.getCallCount(GET_INFO_METHOD));
    assertEquals("Should have no status", 0, status.size());

    // Check that we can handle more than one call to each and that the
    // args are correct.
    mockabp.calls.clear();
    request = new MockHttpServletRequest();
    args.clear();
    args.put(BITARCHIVE_NAME_PARAM, new String[] {Replica.getReplicaFromId(replicaID2).getName()});
    request.setupAddParameter(
        BITARCHIVE_NAME_PARAM, new String[] {Replica.getReplicaFromId(replicaID2).getName()});
    request.setupAddParameter(
        ADD_COMMAND,
        new String[] {
          Replica.getReplicaFromId(replicaID2).getName() + STRING_FILENAME_SEPARATOR + filename1,
          Replica.getReplicaFromId(replicaID2).getName() + STRING_FILENAME_SEPARATOR + filename1
        });
    args.put(
        ADD_COMMAND,
        new String[] {
          Replica.getReplicaFromId(replicaID2).getName() + STRING_FILENAME_SEPARATOR + filename1,
          Replica.getReplicaFromId(replicaID2).getName() + STRING_FILENAME_SEPARATOR + filename1
        });
    request.setupAddParameter(GET_INFO_COMMAND, new String[] {filename1, filename2, filename1});
    args.put(GET_INFO_COMMAND, new String[] {filename1, filename2, filename1});
    request.setupGetParameterMap(args);
    status =
        BitpreserveFileState.processMissingRequest(
            WebinterfaceTestCase.getDummyPageContext(defaultLocale, request), new StringBuilder());
    assertEquals("Should have two calls to restablish", 2, mockabp.getCallCount(ADD_METHOD));
    assertEquals(
        "Should have three calls to getFilePreservationStatus",
        3,
        mockabp.getCallCount(GET_INFO_METHOD));
    assertEquals("Should have two info elements", 2, status.size());
    assertEquals("Should have info for filename1", null, status.get(filename1));
    assertEquals("Should have info for filename2", null, status.get(filename2));

    // Iterator<String> it = mockabp.calls.get(ADD_METHOD).iterator();
    // while (it.hasNext()) {
    // System.out.println(it.next());
    // }

    CollectionAsserts.assertIteratorEquals(
        "Should have the args given add",
        Arrays.asList(new String[] {filename1 + "," + replicaID2, filename1 + "," + replicaID2})
            .iterator(),
        mockabp.calls.get(ADD_METHOD).iterator());

    CollectionAsserts.assertIteratorEquals(
        "Should have the args given info",
        Arrays.asList(new String[] {filename1, filename2, filename1}).iterator(),
        mockabp.calls.get(GET_INFO_METHOD).iterator());
  }

Пример #13

Показать файл

Файл: AbstractJobGenerator.java Проект: netarchivesuite/netarchivesuite-svngit-migration

/**
 * A base class for {@link JobGenerator} implementations. It is recommended to extend this class to
 * implement a new job generator.
 *
 * <p>The base algorithm iterates over domain configurations within the harvest definition, and
 * according to the configuration ({@link HarvesterSettings#JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE},
 * constitutes a subset of domain configurations from which one or more jobs will be generated.
 */
abstract class AbstractJobGenerator implements JobGenerator {

  /** Logger for this class. */
  private static Log log = LogFactory.getLog(AbstractJobGenerator.class);

  /** How many domain configurations to process in one go. */
  private final long DOMAIN_CONFIG_SUBSET_SIZE =
      Settings.getLong(HarvesterSettings.JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE);

  /** Is deduplication enabled or disabled. * */
  private final boolean DEDUPLICATION_ENABLED =
      Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED);

  @Override
  public int generateJobs(HarvestDefinition harvest) {
    log.info("Generating jobs for harvestdefinition # " + harvest.getOid());
    int jobsMade = 0;
    final Iterator<DomainConfiguration> domainConfigurations = harvest.getDomainConfigurations();

    while (domainConfigurations.hasNext()) {
      List<DomainConfiguration> subset = new ArrayList<DomainConfiguration>();
      while (domainConfigurations.hasNext() && subset.size() < DOMAIN_CONFIG_SUBSET_SIZE) {
        subset.add(domainConfigurations.next());
      }

      Collections.sort(subset, getDomainConfigurationSubsetComparator(harvest));
      if (log.isTraceEnabled()) {
        log.trace(
            subset.size()
                + " domainconfigs now sorted and ready to processing "
                + "for harvest #"
                + harvest.getOid());
      }
      jobsMade += processDomainConfigurationSubset(harvest, subset.iterator());
    }
    harvest.setNumEvents(harvest.getNumEvents() + 1);

    if (!harvest.isSnapShot()) {
      PartialHarvest focused = (PartialHarvest) harvest;
      Schedule schedule = focused.getSchedule();
      int numEvents = harvest.getNumEvents();

      // Calculate next event
      Date now = new Date();
      Date nextEvent = schedule.getNextEvent(focused.getNextDate(), numEvents);

      // Refuse to schedule event in the past
      if (nextEvent != null && nextEvent.before(now)) {
        int eventsSkipped = 0;
        while (nextEvent != null && nextEvent.before(now)) {
          nextEvent = schedule.getNextEvent(nextEvent, numEvents);
          eventsSkipped++;
        }
        if (log.isWarnEnabled()) {
          log.warn(
              "Refusing to schedule harvest definition '"
                  + harvest.getName()
                  + "' in the past. Skipped "
                  + eventsSkipped
                  + " events. Old nextDate was "
                  + focused.getNextDate()
                  + " new nextDate is "
                  + nextEvent);
        }
      }

      // Set next event
      focused.setNextDate(nextEvent);
      if (log.isTraceEnabled()) {
        log.trace(
            "Next event for harvest definition "
                + harvest.getName()
                + " happens: "
                + (nextEvent == null ? "Never" : nextEvent.toString()));
      }
    }

    log.info(
        "Finished generating " + jobsMade + " jobs for harvestdefinition # " + harvest.getOid());
    return jobsMade;
  }

  /**
   * Instantiates a new job.
   *
   * @param cfg the {@link DomainConfiguration} being processed
   * @param harvest the {@link HarvestDefinition} being processed
   * @return an instance of {@link Job}
   */
  public static Job getNewJob(HarvestDefinition harvest, DomainConfiguration cfg) {
    HarvestChannelDAO harvestChannelDao = HarvestChannelDAO.getInstance();
    HarvestChannel channel = harvestChannelDao.getChannelForHarvestDefinition(harvest.getOid());
    if (channel == null) {
      log.info(
          "No channel mapping registered for harvest id "
              + harvest.getOid()
              + ", will use default.");
      channel = harvestChannelDao.getDefaultChannel(harvest.isSnapShot());
    }
    if (harvest.isSnapShot()) {
      return Job.createSnapShotJob(
          harvest.getOid(),
          channel,
          cfg,
          harvest.getMaxCountObjects(),
          harvest.getMaxBytes(),
          ((FullHarvest) harvest).getMaxJobRunningTime(),
          harvest.getNumEvents());
    }
    return Job.createJob(harvest.getOid(), channel, cfg, harvest.getNumEvents());
  }

  /**
   * Returns a comparator used to sort the subset of {@link #DOMAIN_CONFIG_SUBSET_SIZE}
   * configurations that are scanned at each iteration.
   *
   * @param harvest the {@link HarvestDefinition} being processed.
   * @return a comparator
   */
  protected abstract Comparator<DomainConfiguration> getDomainConfigurationSubsetComparator(
      HarvestDefinition harvest);

  /**
   * Create new jobs from a collection of configurations. All configurations must use the same
   * order.xml file.Jobs
   *
   * @param harvest the {@link HarvestDefinition} being processed.
   * @param domainConfSubset the configurations to use to create the jobs
   * @return The number of jobs created
   * @throws ArgumentNotValid if any of the parameters is null or if the cfglist does not contain
   *     any configurations
   */
  protected abstract int processDomainConfigurationSubset(
      HarvestDefinition harvest, Iterator<DomainConfiguration> domainConfSubset);

  @Override
  public boolean canAccept(Job job, DomainConfiguration cfg) {
    if (!checkAddDomainConfInvariant(job, cfg)) {
      return false;
    }
    return checkSpecificAcceptConditions(job, cfg);
  }

  /**
   * Called by {@link #canAccept(Job, DomainConfiguration)}. Tests the implementation-specific
   * conditions to accept the given {@link DomainConfiguration} in the given {@link Job}. It is
   * assumed that {@link #checkAddDomainConfInvariant(Job, DomainConfiguration)} has already passed.
   *
   * @param job the {@link Job} n=being built
   * @param cfg the {@link DomainConfiguration} to test
   * @return true if the configuration passes the conditions.
   */
  protected abstract boolean checkSpecificAcceptConditions(Job job, DomainConfiguration cfg);

  /**
   * Once the job has been filled with {@link DomainConfiguration}s, performs the following
   * operations:
   *
   * <ol>
   *   <li>Edit the harvest template to add/remove deduplicator configuration.
   *   <li>
   * </ol>
   *
   * @param job the job
   */
  protected void editJobOrderXml(Job job) {
    Document doc = job.getOrderXMLdoc();
    if (DEDUPLICATION_ENABLED) {
      // Check that the Deduplicator element is present in the
      // OrderXMl and enabled. If missing or disabled log a warning
      if (!HeritrixTemplate.isDeduplicationEnabledInTemplate(doc)) {
        if (log.isWarnEnabled()) {
          log.warn(
              "Unable to perform deduplication for this job"
                  + " as the required DeDuplicator element is "
                  + "disabled or missing from template");
        }
      }
    } else {
      // Remove deduplicator Element from OrderXML if present
      Node xpathNode = doc.selectSingleNode(HeritrixTemplate.DEDUPLICATOR_XPATH);
      if (xpathNode != null) {
        xpathNode.detach();
        job.setOrderXMLDoc(doc);
        if (log.isInfoEnabled()) {
          log.info("Removed DeDuplicator element because " + "Deduplication is disabled");
        }
      }
    }
  }

  /**
   * Tests that:
   *
   * <ol>
   *   <li>The given domain configuration and job are not null.
   *   <li>The job does not already contain the given domain configuration.
   *   <li>The domain configuration has the same order xml name as the first inserted domain config.
   * </ol>
   *
   * @param job a given Job
   * @param cfg a given DomainConfiguration
   * @return true, if the given DomainConfiguration can be inserted into the given job
   */
  private boolean checkAddDomainConfInvariant(Job job, DomainConfiguration cfg) {
    ArgumentNotValid.checkNotNull(job, "job");
    ArgumentNotValid.checkNotNull(cfg, "cfg");

    // check if domain in DomainConfiguration cfg is not already in this job
    // domainName is used as key in domainConfigurationMap
    if (job.getDomainConfigurationMap().containsKey(cfg.getDomainName())) {
      if (log.isDebugEnabled()) {
        log.debug("Job already has a configuration for Domain '" + cfg.getDomainName() + "'.");
      }
      return false;
    }

    // check if template is same as this job.
    String orderXMLname = job.getOrderXMLName();
    if (!orderXMLname.equals(cfg.getOrderXmlName())) {
      if (log.isDebugEnabled()) {
        log.debug(
            "This Job only accept configurations "
                + "using the harvest template '"
                + orderXMLname
                + "'. This configuration uses the harvest template '"
                + cfg.getOrderXmlName()
                + "'.");
      }
      return false;
    }

    return true;
  }
}

Пример #14

Показать файл

Файл: StartedJobHistoryChartGen.java Проект: netarchivesuite/netarchivesuite-svngit-migration

/**
 * This class implements a generator for an history chart of a running job. The chart traces the
 * progress percentage and the queued URI count over the crawl time. Charts are rendered in a PNG
 * image file, generated in the webapp directory.
 */
class StartedJobHistoryChartGen {

  /** Time units used to scale the crawl time values and generate the chart's time axis ticks. */
  protected static enum TimeAxisResolution {
    /** One second. Tick step is 10s. */
    second(1, 1, 10),
    /** One minute. Tick step is 5m. */
    minute(60, 60, 5),
    /** One hour. Tick step is 1h. */
    hour(60 * minute.seconds, 60 * minute.seconds, 1),
    /** Twelve hours. Tick step is 2h. */
    half_day(12 * 60 * minute.seconds, 60 * minute.seconds, 2),
    /** One day. Tick step is 0.5d. */
    day(24 * hour.seconds, 24 * hour.seconds, 0.5),
    /** One week. Tick step is 1w. */
    week(7 * day.seconds, 7 * day.seconds, 1);

    /** The time unit in seconds. */
    private final int seconds;

    /** The scale in seconds. */
    private final int scaleSeconds;

    /** The step between two tick units. */
    private final double tickStep;

    /**
     * Builds a time axis resolution.
     *
     * @param seconds the actual resolution in seconds
     * @param scaleSeconds the actual "scale" of ticks
     * @param tickStep the number of ticks in one step.
     */
    TimeAxisResolution(int seconds, int scaleSeconds, double tickStep) {
      this.seconds = seconds;
      this.scaleSeconds = scaleSeconds;
      this.tickStep = tickStep;
    }

    /**
     * Scale down an array of seconds.
     *
     * @param timeInSeconds An array of seconds
     * @return a scaled down version of the given array of seconds
     */
    double[] scale(double[] timeInSeconds) {
      double[] scaledTime = new double[timeInSeconds.length];
      for (int i = 0; i < timeInSeconds.length; i++) {
        scaledTime[i] = timeInSeconds[i] / this.scaleSeconds;
      }
      return scaledTime;
    }

    /**
     * @param seconds the seconds
     * @return the proper timeUnit for the given argument
     */
    static TimeAxisResolution findTimeUnit(double seconds) {

      TimeAxisResolution[] allTus = values();
      for (int i = 0; i < allTus.length - 1; i++) {
        TimeAxisResolution nextGreater = allTus[i + 1];
        if (seconds < nextGreater.seconds) {
          return allTus[i];
        }
      }
      return week; // largest unit
    }
  }

  /** A chart generation task. Generates a PNG image for a job progress history. */
  private static class ChartGen implements Runnable {
    /** The process that generates the Charts. */
    private final StartedJobHistoryChartGen gen;

    /**
     * Constructor of a ChartGen objector.
     *
     * @param gen the process that generates the charts.
     */
    ChartGen(StartedJobHistoryChartGen gen) {
      super();
      this.gen = gen;
    }

    @Override
    public void run() {

      synchronized (gen) {
        gen.chartFile = null;
      }

      long jobId = gen.jobId;

      StartedJobInfo[] fullHistory = RunningJobsInfoDAO.getInstance().getFullJobHistory(jobId);

      LinkedList<Double> timeValues = new LinkedList<Double>();
      LinkedList<Double> progressValues = new LinkedList<Double>();
      LinkedList<Double> urlValues = new LinkedList<Double>();

      for (StartedJobInfo sji : fullHistory) {
        timeValues.add((double) sji.getElapsedSeconds());
        progressValues.add(sji.getProgress());
        urlValues.add((double) sji.getQueuedFilesCount());
      }

      // Refresh the history png image for the job.
      File pngFile = new File(gen.outputFolder, jobId + "-history.png");

      File newPngFile;
      try {
        newPngFile =
            File.createTempFile(jobId + "-history", "." + System.currentTimeMillis() + ".png");
      } catch (IOException e) {
        LOG.warn("Failed to create temp PNG file for job " + jobId);
        return;
      }

      long startTime = System.currentTimeMillis();
      gen.generatePngChart(
          newPngFile,
          CHART_RESOLUTION[0],
          CHART_RESOLUTION[1],
          null, // no chart title
          I18N.getString(gen.locale, "running.job.details.chart.legend.crawlTime"),
          new String[] {
            I18N.getString(gen.locale, "running.job.details.chart.legend.progress"),
            I18N.getString(gen.locale, "running.job.details.chart.legend.queuedUris")
          },
          NumberUtils.toPrimitiveArray(timeValues),
          new double[][] {new double[] {0, 100}, null},
          new double[][] {
            NumberUtils.toPrimitiveArray(progressValues), NumberUtils.toPrimitiveArray(urlValues)
          },
          new Color[] {Color.blue, Color.green.darker()},
          new String[] {"%", ""},
          false,
          Color.lightGray.brighter().brighter());

      long genTime = System.currentTimeMillis() - startTime;
      LOG.info(
          "Generated history chart for job "
              + jobId
              + " in "
              + (genTime < TimeUtils.SECOND_IN_MILLIS
                  ? genTime + " ms"
                  : StringUtils.formatDuration(genTime / TimeUtils.SECOND_IN_MILLIS))
              + ".");

      synchronized (gen) {
        // Overwrite old file, then delete temp file
        try {
          FileUtils.copyFile(newPngFile, pngFile);
          FileUtils.remove(newPngFile);
        } catch (IOFailure iof) {
          LOG.error("IOFailure while copying PNG file", iof);
        }
        gen.chartFile = pngFile;
      }
    }
  }

  /** The class logger. */
  static final Log LOG = LogFactory.getLog(StartedJobHistoryChartGen.class);

  /** Internationalisation object. */
  private static final I18n I18N = new I18n(dk.netarkivet.harvester.Constants.TRANSLATIONS_BUNDLE);

  /** Rate in seconds at which history charts should be generated. */
  private static final long GEN_INTERVAL =
      Settings.getLong(HarvesterSettings.HARVEST_MONITOR_HISTORY_CHART_GEN_INTERVAL);

  /** The chart image resolution. */
  private static final int[] CHART_RESOLUTION = new int[] {600, 450};
  /** The dimension of the chart axis. */
  private static final double CHART_AXIS_DIMENSION = 10.0;
  /** The relative path of the output. */
  private static final String OUTPUT_REL_PATH = "History" + File.separator + "webapp";

  /** The job id. */
  private final long jobId;

  /** The folder where image files are output. */
  private final File outputFolder;

  /** The chart image file. */
  private File chartFile = null;

  /** The locale for internationalizing the chart. The locale is set to the system default. */
  private final Locale locale;
  /** The process controlling the cyclic regeneration of charts. */
  private PeriodicTaskExecutor genExec = null;

  /**
   * Constructor. Start generating charts for data belonging to the given job.
   *
   * @param jobId a job id.
   */
  StartedJobHistoryChartGen(long jobId) {
    super();

    this.outputFolder = new File(FileUtils.getTempDir() + File.separator + OUTPUT_REL_PATH);

    this.jobId = jobId;

    // Set the locale to the system default
    this.locale = Locale.getDefault();

    genExec = new PeriodicTaskExecutor("ChartGen", new ChartGen(this), 0, GEN_INTERVAL);
  }

  /**
   * Returns the image file.
   *
   * @return the image file. Might return null if no file is currently available.
   */
  public synchronized File getChartFile() {
    return chartFile;
  }

  /** Deletes the chart image if it exists and stops the generation schedule. */
  public void cleanup() {

    if (chartFile != null && chartFile.exists()) {
      if (!chartFile.delete()) {
        chartFile.deleteOnExit();
      }
    }

    if (genExec != null) {
      genExec.shutdown();
    }
  }

  /**
   * Generates a chart in PNG format.
   *
   * @param outputFile the output file, it should exist.
   * @param pxWidth the image width in pixels.
   * @param pxHeight the image height in pixels.
   * @param chartTitle the chart title, may be null.
   * @param xAxisTitle the x axis title
   * @param yDataSeriesRange the axis range (null for auto)
   * @param yDataSeriesTitles the Y axis titles.
   * @param timeValuesInSeconds the time values in seconds
   * @param yDataSeries the Y axis value series.
   * @param yDataSeriesColors the Y axis value series drawing colors.
   * @param yDataSeriesTickSuffix TODO explain argument yDataSeriesTickSuffix
   * @param drawBorder draw, or not, the border.
   * @param backgroundColor the chart background color.
   */
  final void generatePngChart(
      File outputFile,
      int pxWidth,
      int pxHeight,
      String chartTitle,
      String xAxisTitle,
      String[] yDataSeriesTitles,
      double[] timeValuesInSeconds,
      double[][] yDataSeriesRange,
      double[][] yDataSeries,
      Color[] yDataSeriesColors,
      String[] yDataSeriesTickSuffix,
      boolean drawBorder,
      Color backgroundColor) {

    // Domain axis
    NumberAxis xAxis = new NumberAxis(xAxisTitle);
    xAxis.setFixedDimension(CHART_AXIS_DIMENSION);
    xAxis.setLabelPaint(Color.black);
    xAxis.setTickLabelPaint(Color.black);

    double maxSeconds = getMaxValue(timeValuesInSeconds);
    TimeAxisResolution xAxisRes = TimeAxisResolution.findTimeUnit(maxSeconds);
    xAxis.setTickUnit(new NumberTickUnit(xAxisRes.tickStep));
    double[] scaledTimeValues = xAxisRes.scale(timeValuesInSeconds);

    String tickSymbol =
        I18N.getString(locale, "running.job.details.chart.timeunit.symbol." + xAxisRes.name());
    xAxis.setNumberFormatOverride(new DecimalFormat("###.##'" + tickSymbol + "'"));

    // First dataset
    String firstDataSetTitle = yDataSeriesTitles[0];
    XYDataset firstDataSet = createXYDataSet(firstDataSetTitle, scaledTimeValues, yDataSeries[0]);
    Color firstDataSetColor = yDataSeriesColors[0];

    // First range axis
    NumberAxis firstYAxis = new NumberAxis(firstDataSetTitle);

    firstYAxis.setFixedDimension(CHART_AXIS_DIMENSION);
    setAxisRange(firstYAxis, yDataSeriesRange[0]);
    firstYAxis.setLabelPaint(firstDataSetColor);
    firstYAxis.setTickLabelPaint(firstDataSetColor);
    String firstAxisTickSuffix = yDataSeriesTickSuffix[0];
    if (firstAxisTickSuffix != null && !firstAxisTickSuffix.isEmpty()) {
      firstYAxis.setNumberFormatOverride(new DecimalFormat("###.##'" + firstAxisTickSuffix + "'"));
    }

    // Create the plot with domain axis and first range axis
    XYPlot plot = new XYPlot(firstDataSet, xAxis, firstYAxis, null);

    XYLineAndShapeRenderer firstRenderer = new XYLineAndShapeRenderer(true, false);
    plot.setRenderer(firstRenderer);

    plot.setOrientation(PlotOrientation.VERTICAL);
    plot.setBackgroundPaint(Color.lightGray);
    plot.setDomainGridlinePaint(Color.white);
    plot.setRangeGridlinePaint(Color.white);

    plot.setAxisOffset(new RectangleInsets(5.0, 5.0, 5.0, 5.0));
    firstRenderer.setSeriesPaint(0, firstDataSetColor);

    // Now iterate on next axes
    for (int i = 1; i < yDataSeries.length; i++) {
      // Create axis
      String seriesTitle = yDataSeriesTitles[i];
      Color seriesColor = yDataSeriesColors[i];
      NumberAxis yAxis = new NumberAxis(seriesTitle);

      yAxis.setFixedDimension(CHART_AXIS_DIMENSION);
      setAxisRange(yAxis, yDataSeriesRange[i]);

      yAxis.setLabelPaint(seriesColor);
      yAxis.setTickLabelPaint(seriesColor);

      String yAxisTickSuffix = yDataSeriesTickSuffix[i];
      if (yAxisTickSuffix != null && !yAxisTickSuffix.isEmpty()) {
        yAxis.setNumberFormatOverride(new DecimalFormat("###.##'" + yAxisTickSuffix + "'"));
      }

      // Create dataset and add axis to plot
      plot.setRangeAxis(i, yAxis);
      plot.setRangeAxisLocation(i, AxisLocation.BOTTOM_OR_LEFT);
      plot.setDataset(i, createXYDataSet(seriesTitle, scaledTimeValues, yDataSeries[i]));
      plot.mapDatasetToRangeAxis(i, i);
      XYItemRenderer renderer = new StandardXYItemRenderer();
      renderer.setSeriesPaint(0, seriesColor);
      plot.setRenderer(i, renderer);
    }

    // Create the chart
    JFreeChart chart = new JFreeChart(chartTitle, JFreeChart.DEFAULT_TITLE_FONT, plot, false);

    // Customize rendering
    chart.setBackgroundPaint(Color.white);
    chart.setBorderVisible(true);
    chart.setBorderPaint(Color.BLACK);

    // Render image
    try {
      ChartUtilities.saveChartAsPNG(outputFile, chart, pxWidth, pxHeight);
    } catch (IOException e) {
      LOG.error("Chart export failed", e);
    }
  }

  /**
   * Create a XYDataset based on the given arguments.
   *
   * @param name The name
   * @param timeValues The timevalues
   * @param values the values
   * @return a DefaultXYDataset.
   */
  private XYDataset createXYDataSet(String name, double[] timeValues, double[] values) {

    DefaultXYDataset ds = new DefaultXYDataset();
    ds.addSeries(name, new double[][] {timeValues, values});

    return ds;
  }

  /**
   * Find the maximum of the values given. If this maximum is less than {@link Double#MIN_VALUE}
   * then {@link Double#MIN_VALUE} is returned.
   *
   * @param values an array of doubles
   * @return the maximum of the values given
   */
  private double getMaxValue(double[] values) {
    double max = Double.MIN_VALUE;
    for (double v : values) {
      max = Math.max(v, max);
    }
    return max;
  }

  /**
   * Set the axis range.
   *
   * @param axis a numberAxis
   * @param range a range
   */
  private void setAxisRange(NumberAxis axis, double[] range) {
    if (range == null || range.length != 2) {
      axis.setAutoRange(true);
    } else {
      double lower = range[0];
      double upper = range[1];
      ArgumentNotValid.checkTrue(lower < upper, "Incorrect range");
      axis.setAutoRange(false);
      axis.setRange(new Range(lower, upper));
    }
  }
}

Пример #15

Показать файл

Файл: Job.java Проект: netarchivesuite/netarchivesuite

  /**
   * Package private constructor for common initialisation.
   *
   * @param harvestID the id of the harvestdefinition
   * @param cfg the configuration to base the Job on
   * @param orderXMLdoc
   * @param channel the channel on which the job will be submitted.
   * @param forceMaxObjectsPerDomain the maximum number of objects harvested from a domain,
   *     overrides individual configuration settings. -1 means no limit
   * @param forceMaxBytesPerDomain The maximum number of objects harvested from a domain, or -1 for
   *     no limit.
   * @param forceMaxJobRunningTime The max time in seconds given to the harvester for this job
   * @param harvestNum the run number of the harvest definition
   * @throws ArgumentNotValid if cfg or priority is null or harvestID is invalid, or if any limit <
   *     -1
   */
  public Job(
      Long harvestID,
      DomainConfiguration cfg,
      HeritrixTemplate orderXMLdoc,
      HarvestChannel channel,
      long forceMaxObjectsPerDomain,
      long forceMaxBytesPerDomain,
      long forceMaxJobRunningTime,
      int harvestNum)
      throws ArgumentNotValid {
    ArgumentNotValid.checkNotNull(cfg, "cfg");
    ArgumentNotValid.checkNotNull(harvestID, "harvestID");
    ArgumentNotValid.checkNotNegative(harvestID, "harvestID");
    ArgumentNotValid.checkNotNull(channel, "channel");

    if (forceMaxObjectsPerDomain < -1) {
      String msg = "forceMaxObjectsPerDomain must be either -1 or positive";
      log.debug(msg);
      throw new ArgumentNotValid(msg);
    }
    if (forceMaxBytesPerDomain < -1) {
      String msg = "forceMaxBytesPerDomain must be either -1 or positive";
      log.debug(msg);
      throw new ArgumentNotValid(msg);
    }

    if (forceMaxBytesPerDomain == 0L) {
      log.warn(
          "forceMaxBytesPerDomain should probably not be 0.Means 0 bytes downloaded per domain");
    }

    if (forceMaxObjectsPerDomain == 0L) {
      log.warn(
          "forceMaxObjectsPerDomain should probably not be 0.Means 0 objects downloaded per domain");
    }

    // setup initial members
    domainConfigurationMap = new HashMap<>();
    origHarvestDefinitionID = harvestID;
    orderXMLname = cfg.getOrderXmlName();
    this.orderXMLdoc = orderXMLdoc;

    setHarvestChannel(channel);

    long maxObjects = NumberUtils.minInf(forceMaxObjectsPerDomain, cfg.getMaxObjects());
    setMaxObjectsPerDomain(maxObjects);
    configurationSetsObjectLimit = (maxObjects != forceMaxObjectsPerDomain);

    long maxBytes = NumberUtils.minInf(forceMaxBytesPerDomain, cfg.getMaxBytes());
    setMaxBytesPerDomain(maxBytes);
    configurationSetsByteLimit = (maxBytes != forceMaxBytesPerDomain);

    long expectation =
        cfg.getExpectedNumberOfObjects(forceMaxObjectsPerDomain, forceMaxBytesPerDomain);
    maxCountObjects = expectation;
    minCountObjects = expectation;
    this.harvestNum = harvestNum;

    addConfiguration(cfg);

    setMaxJobRunningTime(forceMaxJobRunningTime);

    setArchiveFormatInTemplate(Settings.get(HarvesterSettings.HERITRIX_ARCHIVE_FORMAT));

    setAttributes(cfg.getAttributesAndTypes());

    orderXMLdoc.enableOrDisableDeduplication(
        Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED));

    status = JobStatus.NEW;
  }

Пример #16

Показать файл

Файл: Job.java Проект: netarchivesuite/netarchivesuite

/**
 * This class represents one job to run by Heritrix. It's based on a number of configurations all
 * based on the same order.xml and at most one configuration for each domain. Each job consists of
 * configurations of the approximate same size; that is the difference in expectation from the
 * smallest configuration to the largest configuration is within a factor of each other defined as
 * limMaxRelSize (although differences smaller than limMinAbsSize are ignored) There is a limit
 * limMaxTotalSize on the total size of the job in objects.
 *
 * <p>A job may also be limited on bytes or objects, defined either by the configurations in the job
 * or the harvest definition the job is generated by.
 *
 * <p>The job contains the order file, the seedlist and the current status of the job, as well as
 * the ID of the harvest definition that defined it and names of all the configurations it is based
 * on.
 */
@SuppressWarnings({"serial"})
public class Job implements Serializable, JobInfo {
  private static final transient Logger log = LoggerFactory.getLogger(Job.class);

  // Persistent fields stored in and read from DAO
  /** The persistent ID of this job. */
  private Long jobID;
  /** The Id of the harvestdefinition, that generated this job. */
  protected Long origHarvestDefinitionID;
  /** The status of the job. See the JobStatus class for the possible states. */
  protected JobStatus status;
  /** The name of the {@link HarvestChannel} on which this job will be posted. */
  private String channel;

  /** Whether the job belongs to a snapshot or partial harvest. */
  private boolean isSnapshot;
  /**
   * Overrides the individual configurations maximum setting for objects retrieved from a domain
   * when set to a positive value.
   */
  private long forceMaxObjectsPerDomain = Constants.HERITRIX_MAXOBJECTS_INFINITY;
  /**
   * Overrides the individual configurations maximum setting for bytes retrieved from a domain when
   * set to other than -1.
   */
  private long forceMaxBytesPerDomain = Constants.HERITRIX_MAXBYTES_INFINITY;
  /** The name of the harvest template used by the job. */
  private String orderXMLname;
  /** The harvest template used by the job. */
  private HeritrixTemplate orderXMLdoc;
  /** The list of Heritrix settings files. */
  private File[] settingsXMLfiles;

  /** The corresponding Dom4j Documents for these files. */
  // private Document[] settingsXMLdocs;

  /**
   * A set of seeds involved in this job. Outside the SetSeedList() method, the set of seeds is
   * updated in the addConfiguration() method.
   */
  private Set<String> seedListSet = new HashSet<String>();
  /** Which run of the harvest definition this is. */
  private int harvestNum;
  /** Errors during harvesting. */
  private String harvestErrors;
  /** Details about errors during harvesting. */
  private String harvestErrorDetails;
  /** Errors during upload of the harvested data. */
  private String uploadErrors;
  /** Details about errors during upload of the harvested data. */
  private String uploadErrorDetails;
  /** The starting point of the job. */
  private Date actualStart;
  /** The ending point of the job. */
  private Date actualStop;
  /** The time when this job was submitted. */
  private Date submittedDate;
  /** The time when this job was created. */
  private Date creationDate;

  /** Edition is used by the DAO to keep track of changes. */
  private long edition = -1;

  /** Resubmitted as the Job with this ID. If null, this job has not been resubmitted. */
  private Long resubmittedAsJobWithID;

  /** Continuation of this job. */
  private Long continuationOF;

  /**
   * A map (domainName, domainConfigurationName), must be accessible in order to update job
   * information (see Ass. 2.4.3)
   */
  private Map<String, String> domainConfigurationMap;
  /**
   * A hint to the DAO that configurations have changed. Since configurations are large, the DAO can
   * use that this is false to avoid updating the config list. The DAO can set it to false after
   * saving configurations.
   */
  boolean configsChanged = false;

  // Intermediate fields, non-persistent and only used while building objects

  /**
   * Whether the maxObjects field was defined by the harvest definition or the configuration limit.
   * This is deciding for whether we accept smaller configurations or not when building jobs. True
   * means the limit is defined by the configuration, false means that it is defined by the harvest
   * definition.
   */
  private boolean configurationSetsObjectLimit;

  /**
   * Whether the maxBytes field was defined by the harvest definition or the configuration limit.
   * This is deciding for whether we accept smaller configurations or not when building jobs. True
   * means the limit is defined by the configuration, false means by the harvest definition.
   */
  private boolean configurationSetsByteLimit;

  /** The lowest number of objects expected by a configuration. */
  private long minCountObjects;

  /** The highest number of objects expected by a configuration. */
  private long maxCountObjects;

  /** The total number of objects expected by all added configurations. */
  private long totalCountObjects;

  /** The max time in seconds given to the harvester for this job. 0 is unlimited. */
  private long forceMaxRunningTime;

  /**
   * If true, this job object is still undergoing changes due to having more configurations added.
   * When set to false, the object is no longer considered immutable except for updating status.
   *
   * <p>Jobs loaded from the DAO are never under construction anymore.
   */
  private boolean underConstruction = true;

  // Constants

  // Note: The following constants are intentionally left non-static for easy
  // unit testing

  private boolean maxObjectsIsSetByQuotaEnforcer =
      Settings.getBoolean(HarvesterSettings.OBJECT_LIMIT_SET_BY_QUOTA_ENFORCER);

  /**
   * The harvestname prefix used in the files generated by Heritrix. Is set using an
   * ArchiveFileNaming class when the jobID is available.
   */
  private String harvestnamePrefix;

  /** This variable is right now the same as harvestdefinitions.audience field. */
  private String harvestAudience;

  protected Job() {
    this.status = JobStatus.NEW;
  }

  /**
   * Package private constructor for common initialisation.
   *
   * @param harvestID the id of the harvestdefinition
   * @param cfg the configuration to base the Job on
   * @param orderXMLdoc
   * @param channel the channel on which the job will be submitted.
   * @param forceMaxObjectsPerDomain the maximum number of objects harvested from a domain,
   *     overrides individual configuration settings. -1 means no limit
   * @param forceMaxBytesPerDomain The maximum number of objects harvested from a domain, or -1 for
   *     no limit.
   * @param forceMaxJobRunningTime The max time in seconds given to the harvester for this job
   * @param harvestNum the run number of the harvest definition
   * @throws ArgumentNotValid if cfg or priority is null or harvestID is invalid, or if any limit <
   *     -1
   */
  public Job(
      Long harvestID,
      DomainConfiguration cfg,
      HeritrixTemplate orderXMLdoc,
      HarvestChannel channel,
      long forceMaxObjectsPerDomain,
      long forceMaxBytesPerDomain,
      long forceMaxJobRunningTime,
      int harvestNum)
      throws ArgumentNotValid {
    ArgumentNotValid.checkNotNull(cfg, "cfg");
    ArgumentNotValid.checkNotNull(harvestID, "harvestID");
    ArgumentNotValid.checkNotNegative(harvestID, "harvestID");
    ArgumentNotValid.checkNotNull(channel, "channel");

    if (forceMaxObjectsPerDomain < -1) {
      String msg = "forceMaxObjectsPerDomain must be either -1 or positive";
      log.debug(msg);
      throw new ArgumentNotValid(msg);
    }
    if (forceMaxBytesPerDomain < -1) {
      String msg = "forceMaxBytesPerDomain must be either -1 or positive";
      log.debug(msg);
      throw new ArgumentNotValid(msg);
    }

    if (forceMaxBytesPerDomain == 0L) {
      log.warn(
          "forceMaxBytesPerDomain should probably not be 0.Means 0 bytes downloaded per domain");
    }

    if (forceMaxObjectsPerDomain == 0L) {
      log.warn(
          "forceMaxObjectsPerDomain should probably not be 0.Means 0 objects downloaded per domain");
    }

    // setup initial members
    domainConfigurationMap = new HashMap<>();
    origHarvestDefinitionID = harvestID;
    orderXMLname = cfg.getOrderXmlName();
    this.orderXMLdoc = orderXMLdoc;

    setHarvestChannel(channel);

    long maxObjects = NumberUtils.minInf(forceMaxObjectsPerDomain, cfg.getMaxObjects());
    setMaxObjectsPerDomain(maxObjects);
    configurationSetsObjectLimit = (maxObjects != forceMaxObjectsPerDomain);

    long maxBytes = NumberUtils.minInf(forceMaxBytesPerDomain, cfg.getMaxBytes());
    setMaxBytesPerDomain(maxBytes);
    configurationSetsByteLimit = (maxBytes != forceMaxBytesPerDomain);

    long expectation =
        cfg.getExpectedNumberOfObjects(forceMaxObjectsPerDomain, forceMaxBytesPerDomain);
    maxCountObjects = expectation;
    minCountObjects = expectation;
    this.harvestNum = harvestNum;

    addConfiguration(cfg);

    setMaxJobRunningTime(forceMaxJobRunningTime);

    setArchiveFormatInTemplate(Settings.get(HarvesterSettings.HERITRIX_ARCHIVE_FORMAT));

    setAttributes(cfg.getAttributesAndTypes());

    orderXMLdoc.enableOrDisableDeduplication(
        Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED));

    status = JobStatus.NEW;
  }

  public void setAttributes(List<AttributeAndType> attributesAndTypes) {
    orderXMLdoc.insertAttributes(attributesAndTypes);
  }

  /** Update the order template according to the chosen archive format (arc/warc). */
  private void setArchiveFormatInTemplate(String archiveFormat) {
    if (!underConstruction) {
      final String msg = "Cannot modify job " + this + " as it is no longer under construction";
      log.debug(msg);
      throw new IllegalState(msg);
    }
    orderXMLdoc.setArchiveFormat(archiveFormat);
  }

  /**
   * Create a new Job object from basic information stored in the DAO.
   *
   * @param harvestID the id of the harvestdefinition
   * @param configurations the configurations to base the Job on
   * @param channel the name of the channel on which the job will be submitted.
   * @param snapshot whether the job belongs to a snapshot harvest
   * @param forceMaxObjectsPerDomain the maximum number of objects harvested from a domain,
   *     overrides individual configuration settings. 0 means no limit.
   * @param forceMaxBytesPerDomain The maximum number of objects harvested from a domain, or -1 for
   *     no limit.
   * @param forceMaxJobRunningTime The max time in seconds given to the harvester for this job
   * @param status the current status of the job.
   * @param orderXMLname the name of the order template used.
   * @param orderXMLdoc the (possibly modified) template
   * @param seedlist the combined seedlist from all configs.
   * @param harvestNum the run number of the harvest definition
   */
  Job(
      Long harvestID,
      Map<String, String> configurations,
      String channel,
      boolean snapshot,
      long forceMaxObjectsPerDomain,
      long forceMaxBytesPerDomain,
      long forceMaxJobRunningTime,
      JobStatus status,
      String orderXMLname,
      HeritrixTemplate orderXMLdoc,
      String seedlist,
      int harvestNum,
      Long continuationOf) {
    origHarvestDefinitionID = harvestID;
    domainConfigurationMap = configurations;
    this.channel = channel;
    this.isSnapshot = snapshot;
    this.forceMaxBytesPerDomain = forceMaxBytesPerDomain;
    this.forceMaxObjectsPerDomain = forceMaxObjectsPerDomain;
    this.forceMaxRunningTime = forceMaxJobRunningTime;
    this.status = status;
    this.orderXMLname = orderXMLname;
    this.orderXMLdoc = orderXMLdoc;
    this.setSeedList(seedlist);
    this.harvestNum = harvestNum;
    this.continuationOF = continuationOf;

    underConstruction = false;
  }

  /**
   * Adds a configuration to this Job. Seedlists and settings are updated accordingly.
   *
   * @param cfg the configuration to add
   * @throws ArgumentNotValid if cfg is null or cfg uses a different orderxml than this job or if
   *     this job already contains a configuration associated with domain of configuration cfg.
   */
  public void addConfiguration(DomainConfiguration cfg) {
    ArgumentNotValid.checkNotNull(cfg, "cfg");
    if (domainConfigurationMap.containsKey(cfg.getDomainName())) {
      throw new ArgumentNotValid(
          "Job already has a configuration for Domain " + cfg.getDomainName());
    }

    if (log.isTraceEnabled()) {
      log.trace("Adding configuration '{}' to job '{}'", cfg, cfg.getName());
    }

    if (!underConstruction) {
      final String msg = "Cannot modify job " + this + " as it is no longer under construction";
      log.debug(msg);
      throw new IllegalState(msg);
    }

    if (!cfg.getOrderXmlName().equals(getOrderXMLName())) {
      throw new ArgumentNotValid(
          "Job requires the orderxml file:'"
              + getOrderXMLName()
              + "' not:'"
              + cfg.getOrderXmlName()
              + "' used by the configuration:'"
              + cfg.getName());
    }

    domainConfigurationMap.put(cfg.getDomainName(), cfg.getName());

    // Add the seeds from the configuration to the Job seeds.
    // Take care of duplicates.
    for (Iterator<SeedList> itt = cfg.getSeedLists(); itt.hasNext(); ) {
      SeedList seed = itt.next();
      List<String> seeds = seed.getSeeds();
      for (String seedUrl : seeds) {
        seedListSet.add(seedUrl); // duplicates is silently ignored

        // TODO remove when heritrix implements this functionality
        // try to convert a seed into a Internationalized Domain Name
        try {
          String seedASCII = seedUrl;
          // It is rare to see these seeds, but they need to be
          // correctly idnaized
          if (seedUrl.contains(":") || seedUrl.contains("/")) {
            String normalizedUrl = seedUrl;
            if (!normalizedUrl.matches("^[a-zA-Z]+:.*")) {
              // If no protocol is given, assume http
              normalizedUrl = "http://" + normalizedUrl;
            }
            URL url = new URL(normalizedUrl);
            String domainName = url.getHost();
            String domainNameASCII = IDNA.toASCII(domainName);
            if (!domainName.equals(domainNameASCII)) {
              // If the domain name changed, replace that in the
              // seed.
              seedASCII = seedUrl.replaceFirst(Pattern.quote(domainName), domainNameASCII);
            }
          } else {
            seedASCII = IDNA.toASCII(seedUrl);
          }
          if (!seedASCII.equals(seedUrl)) {
            log.trace("Converted {} to {}", seedUrl, seedASCII);
            // Note that duplicates is silently ignored
            seedListSet.add(seedASCII);
          }
        } catch (IDNAException e) {
          log.trace("Cannot convert seed {} to ASCII", seedUrl, e);
        } catch (MalformedURLException e) {
          log.trace("Cannot convert seed {} to ASCII", seedUrl, e);
        }
      }
    }

    orderXMLdoc.editOrderXMLAddPerDomainCrawlerTraps(cfg);

    // TODO update limits in settings files - see also bug 269

    // Update estimates of job size
    long expectation =
        cfg.getExpectedNumberOfObjects(forceMaxObjectsPerDomain, forceMaxBytesPerDomain);
    maxCountObjects = Math.max(expectation, maxCountObjects);
    minCountObjects = Math.min(expectation, minCountObjects);
    totalCountObjects += expectation;

    configsChanged = true;

    assert (maxCountObjects >= minCountObjects) : "basic invariant";
  }

  /**
   * Get the name of the order XML file used by this Job.
   *
   * @return the name of the orderXML file
   */
  public String getOrderXMLName() {
    return orderXMLname;
  }

  /**
   * Get the actual time when this job was stopped/completed.
   *
   * @return the time as Date
   */
  public Date getActualStop() {
    return actualStop;
  }

  /**
   * Get the actual time when this job was started.
   *
   * @return the time as Date
   */
  public Date getActualStart() {
    return actualStart;
  }

  /**
   * Get the time when this job was submitted.
   *
   * @return the time as Date
   */
  public Date getSubmittedDate() {
    return submittedDate;
  }

  /**
   * Get the time when this job was created.
   *
   * @return the creation time as a <code>Date</code>
   */
  public Date getCreationDate() {
    return creationDate;
  }

  /**
   * Get a list of Heritrix settings.xml files. Note that these files have nothing to do with
   * NetarchiveSuite settings files. They are files that supplement the Heritrix order.xml files,
   * and contain overrides for specific domains.
   *
   * @return the list of Files as an array
   */
  public File[] getSettingsXMLfiles() {
    return settingsXMLfiles;
  }

  /**
   * Get the id of the HarvestDefinition from which this job originates.
   *
   * @return the id as a Long
   */
  public Long getOrigHarvestDefinitionID() {
    return origHarvestDefinitionID;
  }

  /**
   * Get the id of this Job.
   *
   * @return the id as a Long
   */
  public Long getJobID() {
    return jobID;
  }

  /**
   * Set the id of this Job.
   *
   * @param id The Id for this job.
   */
  public void setJobID(Long id) {
    jobID = id;
  }

  /**
   * Get's the total number of different domains harvested by this job.
   *
   * @return the number of configurations added to this domain
   */
  public int getCountDomains() {
    return domainConfigurationMap.size();
  }

  /**
   * Set the actual time when this job was started.
   *
   * <p>Sends a notification, if actualStart is set to a time after actualStop.
   *
   * @param actualStart A Date object representing the time when this job was started.
   */
  public void setActualStart(Date actualStart) {
    ArgumentNotValid.checkNotNull(actualStart, "actualStart");
    if (actualStop != null && actualStop.before(actualStart)) {
      log.warn(
          "Job("
              + getJobID()
              + "): Start time ("
              + actualStart
              + ") is after end time: "
              + actualStop);
    }
    this.actualStart = (Date) actualStart.clone();
  }

  /**
   * Set the actual time when this job was stopped/completed. Sends a notification, if actualStop is
   * set to a time before actualStart.
   *
   * @param actualStop A Date object representing the time when this job was stopped.
   * @throws ArgumentNotValid
   */
  public void setActualStop(Date actualStop) throws ArgumentNotValid {
    ArgumentNotValid.checkNotNull(actualStop, "actualStop");
    if (actualStart == null) {
      log.warn("Job(" + getJobID() + "): actualStart should be defined before setting actualStop");
    } else if (actualStop.before(actualStart)) {
      log.warn(
          "Job("
              + getJobID()
              + "): actualStop ("
              + actualStop
              + ") is before actualStart: "
              + actualStart);
    }
    this.actualStop = (Date) actualStop.clone();
  }

  /**
   * Set the orderxml for this job.
   *
   * @param doc A orderxml to be used by this job
   */
  public void setOrderXMLDoc(HeritrixTemplate doc) {
    ArgumentNotValid.checkNotNull(doc, "doc");
    this.orderXMLdoc = doc;
  }

  /**
   * Gets a document representation of the order.xml associated with this Job.
   *
   * @return the XML as a org.dom4j.Document
   */
  public HeritrixTemplate getOrderXMLdoc() {
    return orderXMLdoc;
  }

  //    /**
  //     * Gets a list of document representations of the settings.xml's associated with this Job.
  //     *
  //     * @return the XML as an array of org.dom4j.Document
  //     */
  //    public Document[] getSettingsXMLdocs() {
  //        return settingsXMLdocs;
  //    }

  /**
   * Set the seedlist of the job from the seedList argument. Individual seeds are separated by a
   * '\n' character. Duplicate seeds are removed.
   *
   * @param seedList List of seeds as one String
   */
  public void setSeedList(String seedList) {
    ArgumentNotValid.checkNotNullOrEmpty(seedList, "seedList");
    seedListSet = new HashSet<>();
    BufferedReader reader = new BufferedReader(new StringReader(seedList));
    String seed;
    try {
      while ((seed = reader.readLine()) != null) {
        seedListSet.add(seed); // add to seedlist if not already there
      }
    } catch (IOException e) {
      // This never happens, as we're reading from a string!
      throw new IOFailure("IOException reading from seed string", e);
    } finally {
      IOUtils.closeQuietly(reader);
    }
  }

  /**
   * Get the seedlist as a String. The individual seeds are separated by the character '\n'. The
   * order of the seeds are unknown.
   *
   * @return the seedlist as a String
   */
  public String getSeedListAsString() {
    return StringUtils.conjoin("\n", seedListSet);
  }

  /**
   * Get the current status of this Job.
   *
   * @return the status as an int in the range 0 to 4.
   */
  public JobStatus getStatus() {
    return status;
  }

  /**
   * Sets status of this job.
   *
   * @param newStatus Must be one of the values STATUS_NEW, ..., STATUS_FAILED
   * @throws ArgumentNotValid in case of invalid status argument or invalid status change
   */
  public void setStatus(JobStatus newStatus) {
    ArgumentNotValid.checkNotNull(newStatus, "newStatus");
    if (!status.legalChange(newStatus)) {
      final String message =
          "Status change from " + status + " to " + newStatus + " is not allowed";
      log.debug(message);
      throw new ArgumentNotValid(message);
    }

    if ((this.status == JobStatus.NEW || this.status == JobStatus.RESUBMITTED)
        && newStatus == JobStatus.SUBMITTED) {
      orderXMLdoc.configureQuotaEnforcer(
          maxObjectsIsSetByQuotaEnforcer, forceMaxBytesPerDomain, forceMaxObjectsPerDomain);
    }

    if (this.status == JobStatus.SUBMITTED && newStatus == JobStatus.STARTED) {
      setActualStart(new Date());
    }
    if (this.status == JobStatus.STARTED
        && (newStatus == JobStatus.DONE || newStatus == JobStatus.FAILED)) {
      setActualStop(new Date());
    }
    status = newStatus;
  }

  /**
   * Returns a map of domain names and name of their corresponding configuration.
   *
   * <p>The returned Map cannot be changed.
   *
   * @return a read-only Map (<String>, <String>)
   */
  public Map<String, String> getDomainConfigurationMap() {
    return Collections.unmodifiableMap(domainConfigurationMap);
  }

  /**
   * Gets the maximum number of objects harvested per domain.
   *
   * @return The maximum number of objects harvested per domain. 0 means no limit.
   */
  public long getMaxObjectsPerDomain() {
    return forceMaxObjectsPerDomain;
  }

  /**
   * Gets the maximum number of bytes harvested per domain.
   *
   * @return The maximum number of bytes harvested per domain. -1 means no limit.
   */
  public long getMaxBytesPerDomain() {
    return forceMaxBytesPerDomain;
  }

  /**
   * Get the edition number.
   *
   * @return The edition number
   */
  long getEdition() {
    return edition;
  }

  /**
   * Set the edition number.
   *
   * @param edition the new edition number
   */
  void setEdition(long edition) {
    this.edition = edition;
  }

  public void setHarvestChannel(HarvestChannel harvestChannel) {
    this.channel = harvestChannel.getName();
    this.isSnapshot = harvestChannel.isSnapshot();
  }

  /** @return the associated {@link HarvestChannel} name. */
  public String getChannel() {
    return channel;
  }

  /**
   * Sets the associated {@link HarvestChannel} name.
   *
   * @param channel the channel name
   */
  public void setChannel(String channel) {
    this.channel = channel;
  }

  /**
   * @return true if the job belongs to a snapshot harvest, false if it belongs to a focused
   *     harvest.
   */
  public boolean isSnapshot() {
    return isSnapshot;
  }

  /**
   * Sets whether job belongs to a snapshot or focused harvest.
   *
   * @param isSnapshot true if the job belongs to a snapshot harvest, false if it belongs to a
   *     focused harvest.
   */
  public void setSnapshot(boolean isSnapshot) {
    this.isSnapshot = isSnapshot;
  }

  @Override
  public String toString() {
    return "Job "
        + getJobID()
        + " (state = "
        + getStatus()
        + ", HD = "
        + getOrigHarvestDefinitionID()
        + ", channel = "
        + getChannel()
        + ", snapshot = "
        + isSnapshot()
        + ", forcemaxcount = "
        + getForceMaxObjectsPerDomain()
        + ", forcemaxbytes = "
        + getMaxBytesPerDomain()
        + ", forcemaxrunningtime = "
        + forceMaxRunningTime
        + ", orderxml = "
        + getOrderXMLName()
        + ", numconfigs = "
        + getDomainConfigurationMap().size()
        + ", created = "
        + getCreationDate()
        + (getSubmittedDate() != null ? ", submitted = " + getSubmittedDate() : "")
        + (getActualStart() != null ? ", started = " + getActualStart() : "")
        + (getActualStop() != null ? ", stopped = " + getActualStop() : "")
        + ")";
  }

  /** @return Returns the forceMaxObjectsPerDomain. 0 means no limit. */
  public long getForceMaxObjectsPerDomain() {
    return forceMaxObjectsPerDomain;
  }

  /**
   * Sets the maxObjectsPerDomain value.
   *
   * @param maxObjectsPerDomain The forceMaxObjectsPerDomain to set. 0 means no limit.
   * @throws IOFailure Thrown from auxiliary method editOrderXML_maxObjectsPerDomain.
   */
  protected void setMaxObjectsPerDomain(long maxObjectsPerDomain) {
    if (!underConstruction) {
      final String msg = "Cannot modify job " + this + " as it is no longer under construction";
      log.debug(msg);
      throw new IllegalState(msg);
    }

    this.forceMaxObjectsPerDomain = maxObjectsPerDomain;
    orderXMLdoc.setMaxObjectsPerDomain(
        maxObjectsPerDomain); // FIXME? add argument to maxObjectsIsSetByQuotaEnforcer to method
    // setMaxObjectsPerDomain
    // orderXMLdoc.editOrderXML_maxObjectsPerDomain(orderXMLdoc, maxObjectsPerDomain,
    //        maxObjectsIsSetByQuotaEnforcer);

    if (0L == maxObjectsPerDomain && 0L != forceMaxBytesPerDomain) {
      setMaxBytesPerDomain(0L);
    }
  }

  /**
   * Set the maxbytes per domain value.
   *
   * @param maxBytesPerDomain The maxBytesPerDomain to set, or -1 for no limit.
   */
  protected void setMaxBytesPerDomain(long maxBytesPerDomain) {
    if (!underConstruction) {
      final String msg = "Cannot modify job " + this + " as it is no longer under construction";
      log.debug(msg);
      throw new IllegalState(msg);
    }
    this.forceMaxBytesPerDomain = maxBytesPerDomain;
    orderXMLdoc.setMaxBytesPerDomain(maxBytesPerDomain);

    if (0L == maxBytesPerDomain && 0L != forceMaxObjectsPerDomain) {
      setMaxObjectsPerDomain(0L);
    }
  }

  /**
   * Set the maxJobRunningTime value.
   *
   * @param maxJobRunningTime The maxJobRunningTime in seconds to set, or 0 for no limit.
   */
  protected void setMaxJobRunningTime(long maxJobRunningTime) {
    if (!underConstruction) {
      final String msg = "Cannot modify job " + this + " as it is no longer under construction";
      log.debug(msg);
      throw new IllegalState(msg);
    }
    this.forceMaxRunningTime = maxJobRunningTime;
    orderXMLdoc.setMaxJobRunningTime(maxJobRunningTime);
  }

  /** @return Returns the MaxJobRunningTime. 0 means no limit. */
  public long getMaxJobRunningTime() {
    return forceMaxRunningTime;
  }

  /**
   * Get the harvestNum for this job. The number reflects which run of the harvest definition this
   * is.
   *
   * @return the harvestNum for this job.
   */
  public int getHarvestNum() {
    return harvestNum;
  }

  /**
   * Set the harvestNum for this job. The number reflects which run of the harvest definition this
   * is. ONLY TO BE USED IN THE CONSTRUCTION PHASE.
   *
   * @param harvestNum a given harvestNum
   */
  public void setHarvestNum(int harvestNum) {
    if (!underConstruction) {
      final String msg = "Cannot modify job " + this + " as it is no longer under construction";
      log.debug(msg);
      throw new IllegalState(msg);
    }
    this.harvestNum = harvestNum;
  }

  /**
   * Get the list of harvest errors for this job. If no harvest errors, null is returned This value
   * is not meaningful until the job is finished (FAILED,DONE, RESUBMITTED)
   *
   * @return the harvest errors for this job or null if no harvest errors.
   */
  public String getHarvestErrors() {
    return harvestErrors;
  }

  /**
   * Append to the list of harvest errors for this job. Nothing happens, if argument harvestErrors
   * is null.
   *
   * @param harvestErrors a string containing harvest errors (may be null)
   */
  public void appendHarvestErrors(String harvestErrors) {
    if (harvestErrors != null) {
      if (this.harvestErrors == null) {
        this.harvestErrors = harvestErrors;
      } else {
        this.harvestErrors += "\n" + harvestErrors;
      }
    }
  }

  /**
   * Get the list of harvest error details for this job. If no harvest error details, null is
   * returned This value is not meaningful until the job is finished (FAILED,DONE, RESUBMITTED)
   *
   * @return the list of harvest error details for this job or null if no harvest error details.
   */
  public String getHarvestErrorDetails() {
    return harvestErrorDetails;
  }

  /**
   * Append to the list of harvest error details for this job. Nothing happens, if argument
   * harvestErrorDetails is null.
   *
   * @param harvestErrorDetails a string containing harvest error details.
   */
  public void appendHarvestErrorDetails(String harvestErrorDetails) {
    if (harvestErrorDetails != null) {
      if (this.harvestErrorDetails == null) {
        this.harvestErrorDetails = harvestErrorDetails;
      } else {
        this.harvestErrorDetails += "\n" + harvestErrorDetails;
      }
    }
  }

  /**
   * Get the list of upload errors. If no upload errors, null is returned. This value is not
   * meaningful until the job is finished (FAILED,DONE, RESUBMITTED)
   *
   * @return the list of upload errors as String, or null if no upload errors.
   */
  public String getUploadErrors() {
    return uploadErrors;
  }

  /**
   * Append to the list of upload errors. Nothing happens, if argument uploadErrors is null.
   *
   * @param uploadErrors a string containing upload errors.
   */
  public void appendUploadErrors(String uploadErrors) {
    if (uploadErrors != null) {
      if (this.uploadErrors == null) {
        this.uploadErrors = uploadErrors;
      } else {
        this.uploadErrors += "\n" + uploadErrors;
      }
    }
  }

  /**
   * Get the list of upload error details. If no upload error details, null is returned. This value
   * is not meaningful until the job is finished (FAILED,DONE, RESUBMITTED)
   *
   * @return the list of upload error details as String, or null if no upload error details
   */
  public String getUploadErrorDetails() {
    return uploadErrorDetails;
  }

  /**
   * Append to the list of upload error details. Nothing happens, if argument uploadErrorDetails is
   * null.
   *
   * @param uploadErrorDetails a string containing upload error details.
   */
  public void appendUploadErrorDetails(String uploadErrorDetails) {
    if (uploadErrorDetails != null) {
      if (this.uploadErrorDetails == null) {
        this.uploadErrorDetails = uploadErrorDetails;
      } else {
        this.uploadErrorDetails += "\n" + uploadErrorDetails;
      }
    }
  }

  /**
   * Get the ID for the job which this job was resubmitted as. If null, this job has not been
   * resubmitted.
   *
   * @return this ID.
   */
  public Long getResubmittedAsJob() {
    return resubmittedAsJobWithID;
  }

  /**
   * Set the Date for when this job was submitted. If null, this job has not been submitted.
   *
   * @param submittedDate The date when this was submitted
   */
  public void setSubmittedDate(Date submittedDate) {
    this.submittedDate = submittedDate;
  }

  /**
   * Set the Date for when this job was created. If null, this job has not been created.
   *
   * @param creationDate The date when this was created
   */
  public void setCreationDate(Date creationDate) {
    this.creationDate = creationDate;
  }

  /**
   * Set the ID for the job which this job was resubmitted as.
   *
   * @param resubmittedAsJob An Id for a new job.
   */
  public void setResubmittedAsJob(Long resubmittedAsJob) {
    this.resubmittedAsJobWithID = resubmittedAsJob;
  }

  /**
   * @return id of the job that this job is supposed to continue using Heritrix recover-log or null
   *     if it starts from scratch.
   */
  public Long getContinuationOf() {
    return this.continuationOF;
  }

  @Override
  public String getHarvestFilenamePrefix() {
    if (this.harvestnamePrefix == null) {
      log.warn(
          "HarvestnamePrefix not yet set for job {}. Set it by using the naming scheme. "
              + "This should only happen for old jobs being read",
          this.jobID);
      setDefaultHarvestNamePrefix();
    }
    return this.harvestnamePrefix;
  }

  /** @param prefix */
  public void setHarvestFilenamePrefix(String prefix) {
    this.harvestnamePrefix = prefix;
  }

  /** @return the forceMaxBytesPerDomain */
  public long getForceMaxBytesPerDomain() {
    return forceMaxBytesPerDomain;
  }

  /** @return the configurationSetsObjectLimit */
  public boolean isConfigurationSetsObjectLimit() {
    return configurationSetsObjectLimit;
  }

  /** @return the configurationSetsByteLimit */
  public boolean isConfigurationSetsByteLimit() {
    return configurationSetsByteLimit;
  }

  /** @return the minCountObjects */
  public long getMinCountObjects() {
    return minCountObjects;
  }

  /** @return the maxCountObjects */
  public long getMaxCountObjects() {
    return maxCountObjects;
  }

  /** @return the totalCountObjects */
  public long getTotalCountObjects() {
    return totalCountObjects;
  }

  void setDefaultHarvestNamePrefix() {
    if (getJobID() != null) {
      ArchiveFileNaming naming = ArchiveFileNamingFactory.getInstance();
      log.debug("Applying the default ArchiveFileNaming class '{}'.", naming.getClass().getName());
      final String prefix = naming.getPrefix(this);
      setHarvestFilenamePrefix(prefix);
      log.debug("The harvestPrefix of this job is: {}", prefix);
    } else {
      log.warn(
          "The harvestnamePrefix is not set now, as it depends on the JobID, which is not set yet");
    }
  }

  /** @return the harvest-audience. */
  public String getHarvestAudience() {
    return harvestAudience;
  }

  /**
   * Set the harvest audience for this job. Taken from the harvestdefinition that generated this
   * job.
   *
   * @param theAudience the harvest-audience.
   */
  public void setHarvestAudience(String theAudience) {
    this.harvestAudience = theAudience;
  }

  ///////////// The following two methods are needed by harvestStatus-jobdetails.jsp
  // ////////////////////////////////////
  /**
   * Returns a list of sorted seeds for this job. The sorting is by domain, and inside each domain,
   * the list is sorted by url
   *
   * @return a list of sorted seeds for this job.
   */
  public List<String> getSortedSeedList() {
    Map<String, Set<String>> urlMap = new HashMap<String, Set<String>>();
    for (String seed : seedListSet) {
      String url;
      // Assume the protocol is http://, if it is missing
      if (!seed.matches(Constants.PROTOCOL_REGEXP)) {
        url = "http://" + seed;
      } else {
        url = seed;
      }
      String domain = getDomain(url);
      if (domain == null) {
        // stop processing this url, and continue to the next seed
        continue;
      }
      Set<String> set;
      if (urlMap.containsKey(domain)) {
        set = urlMap.get(domain);
      } else {
        set = new TreeSet<String>();
        urlMap.put(domain, set);
      }
      set.add(seed);
    }
    List<String> result = new ArrayList<String>();
    for (Set<String> set : urlMap.values()) {
      result.addAll(set);
    }
    return result;
  }
  /**
   * Get the domain, that the given URL belongs to.
   *
   * @param url an URL
   * @return the domain, that the given URL belongs to, or null if unable to do so.
   */
  private String getDomain(String url) {
    try {
      URL uri = new URL(url);
      return DomainUtils.domainNameFromHostname(uri.getHost());
    } catch (MalformedURLException e) {
      log.warn("The string '{}' is not a valid URL", url);
      return null;
    }
  }
}

Пример #17

Показать файл

Файл: UploadTester.java Проект: netarchivesuite/netarchivesuite

/** Unit test for the archive upload tool. */
public class UploadTester {
  private UseTestRemoteFile ulrf = new UseTestRemoteFile();
  private PreventSystemExit pse = new PreventSystemExit();
  private PreserveStdStreams pss = new PreserveStdStreams(true);
  private MoveTestFiles mtf = new MoveTestFiles(TestInfo.DATA_DIR, TestInfo.WORKING_DIR);
  private MockupJMS mjms = new MockupJMS();
  private MockupArcRepositoryClient marc = new MockupArcRepositoryClient();
  ReloadSettings rs = new ReloadSettings();

  /** Max number of store retries. */
  private final int storeRetries =
      Settings.getInt(JMSArcRepositoryClient.ARCREPOSITORY_STORE_RETRIES);

  @Before
  public void setUp() {
    rs.setUp();
    ulrf.setUp();
    mjms.setUp();
    mtf.setUp();
    pss.setUp();
    pse.setUp();
    marc.setUp();
    Settings.set(CommonSettings.NOTIFICATIONS_CLASS, RememberNotifications.class.getName());
  }

  @After
  public void tearDown() {
    marc.tearDown();
    pse.tearDown();
    pss.tearDown();
    mtf.tearDown();
    mjms.tearDown();
    ulrf.tearDown();
    RememberNotifications.resetSingleton();
    rs.tearDown();
  }

  @Test
  public void testConstructor() {
    ReflectUtils.testUtilityConstructor(Upload.class);
  }

  /** Verify that uploading a single ARC file works as expected and deletes the file locally. */
  @Test
  public void testMainOneFile() {
    Upload.main(new String[] {TestInfo.ARC1.getAbsolutePath()});
    assertMsgCount(1, 0);
    assertStoreStatus(0, TestInfo.ARC1, true);
  }

  /**
   * Verify that uploading more than one ARC file works as expected and deletes the files locally.
   */
  @Test
  public void testMainSeveralFiles() {
    Upload.main(new String[] {TestInfo.ARC1.getAbsolutePath(), TestInfo.ARC2.getAbsolutePath()});
    assertMsgCount(2, 0);
    assertStoreStatus(0, TestInfo.ARC1, true);
    assertStoreStatus(1, TestInfo.ARC2, true);
  }

  /**
   * Verify that non-ARC files are rejected and execution fails. Also verifies that nothing is
   * stored in that case.
   */
  @Test
  public void testMainNonArc() {
    try {
      Upload.main(
          new String[] {TestInfo.ARC1.getAbsolutePath(), TestInfo.INDEX_DIR.getAbsolutePath()});
      fail("Calling Upload with non-arc file should System.exit");
    } catch (SecurityException e) {
      // Expected
      assertMsgCount(0, 0);
    }
  }

  /** Verify that uploading a single WARC file works as expected and deletes the file locally. */
  @Test
  public void testMainOneWarcFile() {
    Upload.main(new String[] {TestInfo.WARC1.getAbsolutePath()});
    assertMsgCount(1, 0);
    assertStoreStatus(0, TestInfo.WARC1, true);
  }

  /**
   * Verify that the system fails as expected when the store operation fails on the server side.
   * (Local files must NOT be deleted).
   */
  @Test
  public void testMainStoreFails1() {
    marc.failOnFile(TestInfo.ARC1.getName());

    Upload.main(
        new String[] {
          TestInfo.ARC1.getAbsolutePath(),
          TestInfo.ARC2.getAbsolutePath(),
          TestInfo.ARC3.getAbsolutePath()
        });
    assertMsgCount(2, 1);
    int index = 0;
    for (int i = 0; i < storeRetries; i++) {
      assertStoreStatus(index, TestInfo.ARC1, false);
      index++;
    }
    assertStoreStatus(index, TestInfo.ARC2, true);
    index++;
    assertStoreStatus(index, TestInfo.ARC3, true);
  }

  /**
   * Verify that the system fails as expected when the store operation fails on the server side.
   * (Local files must NOT be deleted).
   */
  @Test
  public void testMainStoreFails2() {
    marc.failOnFile(TestInfo.ARC2.getName());

    Upload.main(
        new String[] {
          TestInfo.ARC1.getAbsolutePath(),
          TestInfo.ARC2.getAbsolutePath(),
          TestInfo.ARC3.getAbsolutePath()
        });
    assertMsgCount(2, 1);
    int index = 0;
    assertStoreStatus(index, TestInfo.ARC1, true);
    index++;
    for (int i = 0; i < storeRetries; i++) {
      assertStoreStatus(index, TestInfo.ARC2, false);
      index++;
    }
    assertStoreStatus(index, TestInfo.ARC3, true);
  }

  /**
   * Verify that the system fails as expected when the store operation fails on the server side.
   * (Local files must NOT be deleted).
   */
  @Test
  public void testMainStoreFails3() {
    marc.failOnFile(TestInfo.ARC3.getName());

    Upload.main(
        new String[] {
          TestInfo.ARC1.getAbsolutePath(),
          TestInfo.ARC2.getAbsolutePath(),
          TestInfo.ARC3.getAbsolutePath()
        });
    assertMsgCount(2, 1);
    int index = 0;
    assertStoreStatus(index, TestInfo.ARC1, true);
    index++;
    assertStoreStatus(index, TestInfo.ARC2, true);
    index++;
    for (int i = 0; i < storeRetries; i++) {
      assertStoreStatus(index, TestInfo.ARC3, false);
      index++;
    }
  }

  /**
   * Verifies that calling Upload without arguments fails. Also verifies that nothing is stored in
   * that case.
   */
  @Test
  public void testNoArguments() {
    try {
      Upload.main(new String[] {});
      fail("Calling Upload without arguments should System.exit");
    } catch (SecurityException e) {
      // Expected
      assertMsgCount(0, 0);
    }
  }

  /**
   * Asserts that we got the expected number of StoreMessages.
   *
   * @param succeeded Number of files successfully stored
   * @param failed Number of files that never got stored
   */
  private void assertMsgCount(int succeeded, int failed) {
    int expected = succeeded + failed * storeRetries;
    assertEquals(
        "Upload should generate exactly 1 StoreMessage "
            + "per succeeded arc file and "
            + storeRetries
            + " per failed store",
        expected,
        marc.getMsgCount());
  }

  /**
   * Asserts that the nth StoreMessage is regarding the given arc file and that the arc file is
   * delete if and only if store succeeded.
   *
   * @param n The relevant index to marc.getStoreMsgs()
   * @param arcFile The arc file that was stored
   * @param shouldSucceed Whether store was supposed to succeed
   */
  private void assertStoreStatus(int n, File arcFile, boolean shouldSucceed) {
    StoreMessage sm = marc.getStoreMsgs().get(n);
    assertEquals(
        "Upload should attempt to upload the specified files",
        arcFile.getName(),
        sm.getArcfileName());
    if (shouldSucceed) {
      assertFalse("Upload should delete a properly uploaded file", arcFile.exists());
    } else {
      assertTrue(
          "Upload should not delete a file that wasn't " + " properly uploaded", arcFile.exists());
    }
  }
}