public void processDocuments(
      int harvestType,
      List<DocumentPojo> toAdd,
      List<DocumentPojo> toUpdate_subsetOfAdd,
      List<DocumentPojo> toDelete) {
    PropertiesManager props = new PropertiesManager();

    // Note: toAdd = toAdd(old) + toUpdate
    // Need to treat updates as follows:
    // - Delete (inc children, eg events) but get fields to keep (currently _id, created; in the
    // future comments etc)

    // Delete toUpdate and toAdd (also overwriting "created" for updated docs, well all actually...)
    toDelete.addAll(toUpdate_subsetOfAdd);
    StoreAndIndexManager storageManager = new StoreAndIndexManager();
    storageManager.removeFromDatastore_byURL(toDelete, (harvestType != InfiniteEnums.DATABASE));
    // (note: expands toDelete if any sourceUrl "docs" are present, see FileHarvester)

    // (Storing docs messes up the doc/event/entity objects, so don't do that just yet...)

    // Aggregation:
    // 1+2. Create aggregate entities/events ("features") and write them to the DB
    // (then can store feeds - doesn't matter that the event/entities have been modified by the
    // aggregation)
    // 3. (Scheduled for efficiency) Update all documents' frequencies based on new entities and
    // events
    // 4. (Scheduled for efficiency) Synchronize with index [after this, queries can find them - so
    // (2) must have happened]
    // (Syncronization currently "corrupts" the entities so needs to be run last)

    AggregationManager perSourceAggregation = null;

    if (!props.getAggregationDisabled()) {
      perSourceAggregation = new AggregationManager();
    }

    // 1+2]
    if (null != perSourceAggregation) {
      perSourceAggregation.doAggregation(toAdd, toDelete);
      perSourceAggregation.createOrUpdateFeatureEntries();
    }

    // Save feeds to feeds collection in MongoDb
    // (second field determines if content gets saved)
    if (null != perSourceAggregation) {
      perSourceAggregation.applyAggregationToDocs(toAdd);
      // (First save aggregated statistics back to the docs' entity/event instances)
    }
    storeFeeds(toAdd, (harvestType != InfiniteEnums.DATABASE));

    // Then finish aggregation:

    if (null != perSourceAggregation) {
      // 3]
      perSourceAggregation.runScheduledDocumentUpdates();

      // 4] This needs to happen last because it "corrupts" the entities and events
      perSourceAggregation.runScheduledSynchronization();
    }
  } // TESTED (by eye - logic is v simple)
 /**
  * Writes the feeds to the DB and index
  *
  * @param feeds list of feeds to be added to db
  */
 private void storeFeeds(List<DocumentPojo> docs, boolean bSaveContent) {
   if (null != docs && docs.size() > 0) {
     StoreAndIndexManager store = new StoreAndIndexManager();
     store.addToDatastore(docs, bSaveContent);
   }
 } // TESTED (by eye)
  /**
   * Used to start the sync service
   *
   * @throws IOException
   * @throws InterruptedException
   */
  public void startService(LinkedList<SourcePojo> sources)
      throws IOException, InterruptedException {
    // Let the client know the server is starting
    System.out.println("[SERVER] Harvest server is coming online");

    // Intialize/update generic process controller (do this here so that it blocks before threading
    // fun starts)
    new GenericProcessingController().Initialize();

    // Start the background aggregation thread (will do nothing if disabled)
    EntityBackgroundAggregationManager.startThread();
    AssociationBackgroundAggregationManager.startThread();

    _mainThread = Thread.currentThread();

    String hostname = "unknown.host";
    try {
      hostname = java.net.InetAddress.getLocalHost().getHostName();
    } catch (Exception e) {
    }

    // Add the shutdown hook
    ShutdownHook shutdownHook = new ShutdownHook();
    Runtime.getRuntime().addShutdownHook(shutdownHook);

    Date startDate = new Date();
    _logger.info("Starting harvest process at: " + startDate + ", host=" + hostname);

    // Perform processing

    PropertiesManager threadConfig = new PropertiesManager();
    String sThreadConfig = threadConfig.getHarvestThreadConfig();

    HashSet<String> types = new HashSet<String>();
    try {
      String harvestTypes =
          new com.ikanow.infinit.e.harvest.utils.PropertiesManager().getHarvesterTypes();
      for (String s : harvestTypes.split("\\s*,\\s*")) {
        types.add(s.toLowerCase());
      }
    } catch (Exception e) {
      _logger.error(
          Globals.populateStackTrace(new StringBuffer("Failed to register all harvest types"), e));
    } // TESTED (by hand)

    // Max time for harvester (defaults to 25 mins)

    long maxTime_secs = threadConfig.getMaximumHarvestTime();
    if (maxTime_secs > 0) {
      new Timer().schedule(new InternalShutdown(), maxTime_secs * 1000); // (arg in ms)
    } // TOTEST

    try {
      // All source types in a single thread

      int nThreads = Integer.parseInt(sThreadConfig);
      SourceTypeHarvesterRunnable allTypes = new SourceTypeHarvesterRunnable(sources, nThreads);
      _logger.info("(Launching " + nThreads + " threads for all source types)");
      allTypes.run();
    } catch (NumberFormatException e) {

      // The thread config must be comma-separated list of type:threads

      // (step over each type and launch that number of threads for that type)

      String[] sConfigBlocks = sThreadConfig.split("\\s*,\\s*");
      ExecutorService exec = Executors.newFixedThreadPool(sConfigBlocks.length);
      for (String sConfigBlock : sConfigBlocks) {
        String[] sTypeOrNumThreads = sConfigBlock.split("\\s*:\\s*");
        if (2 == sTypeOrNumThreads.length) {
          try {
            int nThreads = Integer.parseInt(sTypeOrNumThreads[1]);
            types.remove(sTypeOrNumThreads[0].toLowerCase());
            SourceTypeHarvesterRunnable typeRunner =
                new SourceTypeHarvesterRunnable(sources, nThreads, sTypeOrNumThreads[0]);
            _logger.info(
                "(Launching "
                    + nThreads
                    + " threads for "
                    + sTypeOrNumThreads[0]
                    + " source types)");
            exec.submit(typeRunner);
          } catch (NumberFormatException e2) {
            _logger.error("Error in harvester thread configuration: " + sThreadConfig);
          }
        } else {
          _logger.error("Error in harvester thread configuration: " + sThreadConfig);
        }
      } // (end loop over different file types)

      // (generate one thread for everything else)
      for (String unusedType : types) { // (note case unimportant)
        SourceTypeHarvesterRunnable typeRunner =
            new SourceTypeHarvesterRunnable(sources, 1, unusedType);
        _logger.info("(Launching 1 thread for " + unusedType + " source types)");
        exec.submit(typeRunner);
      } // TESTED (by hand)

      exec.shutdown();
      int i = 0;
      while (!exec.isTerminated()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e3) {
        }
        if (_bStopHarvest) i++;
        if (i > 14400) { // emergency shutdown time...
          _logger.error("Emergency shutdown after 4 hours of waiting for manual shutdown");
          System.exit(0);
        }
      }
    }
    com.ikanow.infinit.e.processing.generic.utils.PropertiesManager aggProps =
        new com.ikanow.infinit.e.processing.generic.utils.PropertiesManager();
    boolean bAggDisabled = aggProps.getAggregationDisabled();
    StoreAndIndexManager dataStore = new StoreAndIndexManager();
    boolean bResizedDB = dataStore.resizeDB();
    boolean deletedDocs = true;
    if (!bAggDisabled) {
      deletedDocs = AggregationManager.updateEntitiesFromDeletedDocuments(dataStore.getUUID());
    }
    if (deletedDocs) { // (or if agg disabled, in which case we don't know)
      dataStore.removeSoftDeletedDocuments();
      if (!bAggDisabled) {
        AggregationManager.updateDocEntitiesFromDeletedDocuments(dataStore.getUUID());
      }
    }
    if (bResizedDB) {
      _logger.info("(resized DB, now " + dataStore.getDatabaseSize() + " documents)");
    }

    HarvestController.logHarvesterStats();
    _logger.info("Completed harvest process at: " + new Date().toString());

    Date endDate = new Date();
    // Not allowed to cycle harvester runs too quickly
    // Sleep for some period:
    long nDiff = endDate.getTime() - startDate.getTime();
    long nToSleep = threadConfig.getMinimumHarvestTimeMs() - nDiff;
    if ((nToSleep > 0) && !_bCurrentlySleepingBeforeExit) {
      try {
        _bCurrentlySleepingBeforeExit =
            true; // (don't really care there's a minor race condition here)
        Thread.sleep(nToSleep);
      } catch (InterruptedException e) {
        // Do nothing, probably got a signal
      }
    } // TESTED (cut and paste from tested Beta code)

    // Stop background aggregation
    EntityBackgroundAggregationManager.stopThreadAndWait();
    AssociationBackgroundAggregationManager.stopThreadAndWait();

    _logger.info("Harvest server is going offline");
    _bStopHarvest = true;
    _bReadyToTerminate =
        true; // (if we were terminated manually tell the shutdown hook it can stop)
    System.exit(0);
  } // TESTED