protected void localCleanUp() throws Exception {
    IThreadContext tc = ThreadContextFactory.make();

    Exception currentException = null;
    // First, tear down all jobs, connections, authority connections, and output connections.
    try {
      IRepositoryConnectionManager connMgr = RepositoryConnectionManagerFactory.make(tc);
      IAuthorityConnectionManager authConnMgr = AuthorityConnectionManagerFactory.make(tc);
      IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(tc);

      // Now, get a list of the repository connections
      IRepositoryConnection[] connections = connMgr.getAllConnections();
      int i = 0;
      while (i < connections.length) {
        connMgr.delete(connections[i++].getName());
      }

      // Get a list of authority connections
      IAuthorityConnection[] authorities = authConnMgr.getAllConnections();
      i = 0;
      while (i < authorities.length) {
        authConnMgr.delete(authorities[i++].getName());
      }

      // Finally, get rid of output connections
      IOutputConnection[] outputs = outputMgr.getAllConnections();
      i = 0;
      while (i < outputs.length) {
        outputMgr.delete(outputs[i++].getName());
      }

    } catch (Exception e) {
      currentException = e;
    }
    try {
      IDBInterface database =
          DBInterfaceFactory.make(
              tc,
              ManifoldCF.getMasterDatabaseName(),
              ManifoldCF.getMasterDatabaseUsername(),
              ManifoldCF.getMasterDatabasePassword());

      IConnectorManager mgr = ConnectorManagerFactory.make(tc);
      IAuthorityConnectorManager authMgr = AuthorityConnectorManagerFactory.make(tc);
      IOutputConnectorManager outputMgr = OutputConnectorManagerFactory.make(tc);
      IOutputConnectionManager outputConnManager = OutputConnectionManagerFactory.make(tc);
      IJobManager jobManager = JobManagerFactory.make(tc);
      IRepositoryConnectionManager connManager = RepositoryConnectionManagerFactory.make(tc);

      // Deregistration should be done in a transaction
      database.beginTransaction();
      try {
        int i;

        String[] connectorClasses = getConnectorClasses();

        i = 0;
        while (i < connectorClasses.length) {
          // Find the connection names that come with this class
          String[] connectionNames = connManager.findConnectionsForConnector(connectorClasses[i]);
          // For each connection name, modify the jobs to note that the connector is no longer
          // installed
          jobManager.noteConnectorDeregistration(connectionNames);
          // Now that all jobs have been placed into an appropriate state, actually do the
          // deregistration itself.
          mgr.unregisterConnector(connectorClasses[i]);
          i++;
        }

        String[] authorityClasses = getAuthorityClasses();

        i = 0;
        while (i < authorityClasses.length) {
          authMgr.unregisterConnector(authorityClasses[i]);
          i++;
        }

        String[] outputClasses = getOutputClasses();

        i = 0;
        while (i < outputClasses.length) {
          // Find the connection names that come with this class
          String[] connectionNames =
              outputConnManager.findConnectionsForConnector(outputClasses[i]);
          // For all connection names, notify all agents of the deregistration
          AgentManagerFactory.noteOutputConnectorDeregistration(tc, connectionNames);
          // Now that all jobs have been placed into an appropriate state, actually do the
          // deregistration itself.
          outputMgr.unregisterConnector(outputClasses[i]);
          i++;
        }

      } catch (ManifoldCFException e) {
        database.signalRollback();
        throw e;
      } catch (Error e) {
        database.signalRollback();
        throw e;
      } finally {
        database.endTransaction();
      }
    } catch (Exception e) {
      if (currentException != null) currentException = e;
    }
    try {
      super.localCleanUp();
    } catch (Exception e) {
      if (currentException != null) currentException = e;
    }
    if (currentException != null) throw currentException;
  }
  protected void localSetUp() throws Exception {

    super.localSetUp();

    IThreadContext tc = ThreadContextFactory.make();

    IDBInterface database =
        DBInterfaceFactory.make(
            tc,
            ManifoldCF.getMasterDatabaseName(),
            ManifoldCF.getMasterDatabaseUsername(),
            ManifoldCF.getMasterDatabasePassword());

    IConnectorManager mgr = ConnectorManagerFactory.make(tc);
    IAuthorityConnectorManager authMgr = AuthorityConnectorManagerFactory.make(tc);
    IJobManager jobManager = JobManagerFactory.make(tc);
    IRepositoryConnectionManager connManager = RepositoryConnectionManagerFactory.make(tc);
    IOutputConnectorManager outputMgr = OutputConnectorManagerFactory.make(tc);
    IOutputConnectionManager outputConnManager = OutputConnectionManagerFactory.make(tc);

    // Deregistration should be done in a transaction
    database.beginTransaction();
    try {
      int i;

      String[] connectorClasses = getConnectorClasses();
      String[] connectorNames = getConnectorNames();

      i = 0;
      while (i < connectorClasses.length) {
        // First, register connector
        mgr.registerConnector(connectorNames[i], connectorClasses[i]);
        // Then, signal to all jobs that might depend on this connector that they can switch state
        // Find the connection names that come with this class
        String[] connectionNames = connManager.findConnectionsForConnector(connectorClasses[i]);
        // For each connection name, modify the jobs to note that the connector is now installed
        jobManager.noteConnectorRegistration(connectionNames);
        i++;
      }

      String[] authorityClasses = getAuthorityClasses();
      String[] authorityNames = getAuthorityNames();

      i = 0;
      while (i < authorityClasses.length) {
        authMgr.registerConnector(authorityNames[i], authorityClasses[i]);
        i++;
      }

      String[] outputClasses = getOutputClasses();
      String[] outputNames = getOutputNames();

      i = 0;
      while (i < outputClasses.length) {
        // First, register connector
        outputMgr.registerConnector(outputNames[i], outputClasses[i]);
        // Then, signal to all jobs that might depend on this connector that they can switch state
        // Find the connection names that come with this class
        String[] connectionNames = outputConnManager.findConnectionsForConnector(outputClasses[i]);
        // For all connection names, notify all agents of the registration
        AgentManagerFactory.noteOutputConnectorRegistration(tc, connectionNames);
        i++;
      }

    } catch (ManifoldCFException e) {
      database.signalRollback();
      throw e;
    } catch (Error e) {
      database.signalRollback();
      throw e;
    } finally {
      database.endTransaction();
    }
  }
Example #3
0
  public void run() {
    resetManager.registerMe();

    try {
      // Create a thread context object.
      IThreadContext threadContext = ThreadContextFactory.make();
      IJobManager jobManager = JobManagerFactory.make(threadContext);
      IRepositoryConnectionManager connectionMgr =
          RepositoryConnectionManagerFactory.make(threadContext);

      IDBInterface database =
          DBInterfaceFactory.make(
              threadContext,
              ManifoldCF.getMasterDatabaseName(),
              ManifoldCF.getMasterDatabaseUsername(),
              ManifoldCF.getMasterDatabasePassword());

      // Loop
      while (true) {
        // Do another try/catch around everything in the loop
        try {
          // Before we begin, conditionally reset
          resetManager.waitForReset(threadContext);

          // Accumulate the wait before doing the next check.
          // We start with 10 seconds, which is the maximum.  If there's a service request
          // that's faster than that, we'll adjust the time downward.
          long waitTime = 10000L;

          if (Logging.threads.isDebugEnabled()) Logging.threads.debug("Checking for deleting jobs");

          // See if there are any starting jobs.
          // Note: Since this following call changes the job state, we must be careful to reset it
          // on any kind of failure.
          JobDeleteRecord[] deleteJobs = jobManager.getJobsReadyForDelete(processID);
          try {

            if (deleteJobs.length == 0) {
              ManifoldCF.sleep(waitTime);
              continue;
            }

            if (Logging.threads.isDebugEnabled())
              Logging.threads.debug(
                  "Found " + Integer.toString(deleteJobs.length) + " jobs ready to be deleted");

            long currentTime = System.currentTimeMillis();

            // Loop through jobs
            int i = 0;
            while (i < deleteJobs.length) {
              JobDeleteRecord jsr = deleteJobs[i++];
              Long jobID = jsr.getJobID();
              try {
                jobManager.prepareDeleteScan(jobID);
                // Start deleting this job!
                jobManager.noteJobDeleteStarted(jobID, currentTime);
                jsr.noteStarted();
              } catch (ManifoldCFException e) {
                if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                  throw new InterruptedException();
                if (e.getErrorCode() == ManifoldCFException.DATABASE_CONNECTION_ERROR) throw e;
                // We cannot abort the delete startup, but if we fall through, we'll put the job
                // back into
                // the state whence it came.  So, fall through.
                Logging.threads.error("Exception tossed: " + e.getMessage(), e);
              }
            }
          } finally {
            // Clean up all jobs that did not start
            ManifoldCFException exception = null;
            int i = 0;
            while (i < deleteJobs.length) {
              JobDeleteRecord jsr = deleteJobs[i++];
              if (!jsr.wasStarted()) {
                // Clean up from failed start.
                try {
                  jobManager.resetStartDeleteJob(jsr.getJobID());
                } catch (ManifoldCFException e) {
                  exception = e;
                }
              }
            }
            if (exception != null) throw exception;
          }

          // Sleep for the retry interval.
          ManifoldCF.sleep(waitTime);
        } catch (ManifoldCFException e) {
          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) break;

          if (e.getErrorCode() == ManifoldCFException.DATABASE_CONNECTION_ERROR) {
            resetManager.noteEvent();

            Logging.threads.error(
                "Start delete thread aborting and restarting due to database connection reset: "
                    + e.getMessage(),
                e);
            try {
              // Give the database a chance to catch up/wake up
              ManifoldCF.sleep(10000L);
            } catch (InterruptedException se) {
              break;
            }
            continue;
          }

          // Log it, but keep the thread alive
          Logging.threads.error("Exception tossed: " + e.getMessage(), e);

          if (e.getErrorCode() == ManifoldCFException.SETUP_ERROR) {
            // Shut the whole system down!
            System.exit(1);
          }

        } catch (InterruptedException e) {
          // We're supposed to quit
          break;
        } catch (OutOfMemoryError e) {
          System.err.println("agents process ran out of memory - shutting down");
          e.printStackTrace(System.err);
          System.exit(-200);
        } catch (Throwable e) {
          // A more severe error - but stay alive
          Logging.threads.fatal("Error tossed: " + e.getMessage(), e);
        }
      }
    } catch (Throwable e) {
      // Severe error on initialization
      System.err.println("agents process could not start - shutting down");
      Logging.threads.fatal("StartDeleteThread initialization error tossed: " + e.getMessage(), e);
      System.exit(-300);
    }
  }
Example #4
0
  public void executeTest() throws Exception {
    // Hey, we were able to install the file system connector etc.
    // Now, create a local test job and run it.
    IThreadContext tc = ThreadContextFactory.make();

    // Create a basic file system connection, and save it.
    IRepositoryConnectionManager mgr = RepositoryConnectionManagerFactory.make(tc);
    IRepositoryConnection conn = mgr.create();
    conn.setName("File Connection");
    conn.setDescription("File Connection");
    conn.setClassName("org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector");
    conn.setMaxConnections(100);
    // Now, save
    mgr.save(conn);

    // Create a basic null output connection, and save it.
    IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(tc);
    IOutputConnection outputConn = outputMgr.create();
    outputConn.setName("Null Connection");
    outputConn.setDescription("Null Connection");
    outputConn.setClassName("org.apache.manifoldcf.agents.output.nullconnector.NullConnector");
    outputConn.setMaxConnections(100);
    // Now, save
    outputMgr.save(outputConn);

    // Create a job.
    IJobManager jobManager = JobManagerFactory.make(tc);
    IJobDescription job = jobManager.createJob();
    job.setDescription("Test Job");
    job.setConnectionName("File Connection");
    job.setOutputConnectionName("Null Connection");
    job.setType(job.TYPE_SPECIFIED);
    job.setStartMethod(job.START_DISABLE);
    job.setHopcountMode(job.HOPCOUNT_ACCURATE);

    // Now, set up the document specification.
    DocumentSpecification ds = job.getSpecification();
    // Crawl everything underneath the 'testdata' area
    File testDataFile = new File("testdata").getCanonicalFile();
    if (!testDataFile.exists())
      throw new ManifoldCFException(
          "Test data area not found!  Looking in " + testDataFile.toString());
    if (!testDataFile.isDirectory())
      throw new ManifoldCFException(
          "Test data area not a directory!  Looking in " + testDataFile.toString());
    SpecificationNode sn = new SpecificationNode("startpoint");
    sn.setAttribute("path", testDataFile.toString());
    SpecificationNode n = new SpecificationNode("include");
    n.setAttribute("type", "file");
    n.setAttribute("match", "*");
    sn.addChild(sn.getChildCount(), n);
    n = new SpecificationNode("include");
    n.setAttribute("type", "directory");
    n.setAttribute("match", "*");
    sn.addChild(sn.getChildCount(), n);
    ds.addChild(ds.getChildCount(), sn);

    // Set up the output specification.
    OutputSpecification os = job.getOutputSpecification();
    // Null output connections have no output specification, so this is a no-op.

    // Save the job.
    jobManager.save(job);

    // Create the test data files.
    FileHelper.createFile(new File("testdata/test1.txt"), "This is a test file");
    FileHelper.createFile(new File("testdata/test2.txt"), "This is another test file");
    FileHelper.createDirectory(new File("testdata/testdir"));
    FileHelper.createFile(new File("testdata/testdir/test3.txt"), "This is yet another test file");

    // Now, start the job, and wait until it completes.
    jobManager.manualStart(job.getID());
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    // Check to be sure we actually processed the right number of documents.
    JobStatus status = jobManager.getStatus(job.getID());
    // The test data area has 3 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 5)
      throw new ManifoldCFException(
          "Wrong number of documents processed - expected 5, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Add a file and recrawl using minimal crawl
    FileHelper.createFile(new File("testdata/testdir/test4.txt"), "Added file");

    // Now, start the job, and wait until it completes.
    jobManager.manualStart(job.getID(), true);
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    status = jobManager.getStatus(job.getID());
    // The test data area has 4 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 6)
      throw new ManifoldCFException(
          "Wrong number of documents processed after add - expected 6, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Change a file, and recrawl, once again using minimal
    FileHelper.changeFile(new File("testdata/test1.txt"), "Modified contents");

    // Now, start the job, and wait until it completes.
    jobManager.manualStart(job.getID(), true);
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    status = jobManager.getStatus(job.getID());
    // The test data area has 4 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 6)
      throw new ManifoldCFException(
          "Wrong number of documents processed after change - expected 6, saw "
              + new Long(status.getDocumentsProcessed()).toString());
    // We also need to make sure the new document was indexed.  Have to think about how to do this
    // though.
    // MHL

    // Delete a file, and recrawl
    FileHelper.removeFile(new File("testdata/test2.txt"));

    // Do a minimal recrawl first; the delete should not be picked up.
    jobManager.manualStart(job.getID(), true);
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    status = jobManager.getStatus(job.getID());
    // The test data area has 4 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 6)
      throw new ManifoldCFException(
          "Wrong number of documents processed after delete with minimal crawl - expected 6, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Now, do a complete crawl - the delete should be found now.
    jobManager.manualStart(job.getID());
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    // Check to be sure we actually processed the right number of documents.
    status = jobManager.getStatus(job.getID());
    // The test data area has 3 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 5)
      throw new ManifoldCFException(
          "Wrong number of documents processed after delete - expected 5, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Now, delete the job.
    jobManager.deleteJob(job.getID());
    instance.waitJobDeletedNative(jobManager, job.getID(), 120000L);

    // Cleanup is automatic by the base class, so we can feel free to leave jobs and connections
    // lying around.
  }
Example #5
0
  public static void main(String[] args) {
    if (args.length != 13) {
      System.err.println(
          "Usage: DefineJob <description> <connection_name> <output_name> <type> <start_method> <hopcount_method> <recrawl_interval> <expiration_interval> <reseed_interval> <job_priority> <hop_filters> <filespec_xml> <outputspec_xml>");
      System.err.println("<type> is one of: continuous or specified");
      System.err.println("<start_method> is one of: windowbegin, windowinside, disable");
      System.err.println("<hopcount_method> is one of: accurate, nodelete, neverdelete");
      System.err.println("<recrawl_interval> is the default document recrawl interval in minutes");
      System.err.println(
          "<expiration_interval> is the default document expiration interval in minutes");
      System.err.println("<reseed_interval> is the default document reseed interval in minutes");
      System.err.println("<job_priority> is the job priority (and integer between 0 and 10)");
      System.err.println(
          "<hop_filters> is a comma-separated list of tuples, of the form 'linktype=maxhops'");
      System.err.println(
          "<filespec_xml> is the document specification XML, its form dependent on the connection type");
      System.err.println(
          "<outputspec_xml> is the output specification XML, its form dependent on the output connection type");
      System.exit(-1);
    }

    String description = args[0];
    String connectionName = args[1];
    String outputConnectionName = args[2];
    String typeString = args[3];
    String startString = args[4];
    String hopcountString = args[5];
    String recrawlInterval = args[6];
    String expirationInterval = args[7];
    String reseedInterval = args[8];
    String jobPriority = args[9];
    String hopFilters = args[10];
    String filespecXML = args[11];
    String outputspecXML = args[12];

    try {
      IThreadContext tc = ThreadContextFactory.make();
      ManifoldCF.initializeEnvironment(tc);
      IJobManager jobManager = JobManagerFactory.make(tc);
      IJobDescription desc = jobManager.createJob();

      desc.setDescription(description);
      desc.setConnectionName(connectionName);
      desc.setOutputConnectionName(outputConnectionName);

      if (typeString.equals("continuous")) desc.setType(IJobDescription.TYPE_CONTINUOUS);
      else if (typeString.equals("specified")) desc.setType(IJobDescription.TYPE_SPECIFIED);
      else throw new ManifoldCFException("Unknown type: '" + typeString + "'");
      if (startString.equals("windowbegin")) desc.setStartMethod(IJobDescription.START_WINDOWBEGIN);
      else if (startString.equals("windowinside"))
        desc.setStartMethod(IJobDescription.START_WINDOWINSIDE);
      else if (startString.equals("disable")) desc.setStartMethod(IJobDescription.START_DISABLE);
      else throw new ManifoldCFException("Unknown start method: '" + startString + "'");

      if (hopcountString.equals("accurate"))
        desc.setHopcountMode(IJobDescription.HOPCOUNT_ACCURATE);
      else if (hopcountString.equals("nodelete"))
        desc.setHopcountMode(IJobDescription.HOPCOUNT_NODELETE);
      else if (hopcountString.equals("neverdelete"))
        desc.setHopcountMode(IJobDescription.HOPCOUNT_NEVERDELETE);
      else throw new ManifoldCFException("Unknown hopcount mode: '" + hopcountString + "'");

      if (recrawlInterval.length() > 0) desc.setInterval(new Long(recrawlInterval));
      if (expirationInterval.length() > 0) desc.setExpiration(new Long(expirationInterval));
      if (reseedInterval.length() > 0) desc.setReseedInterval(new Long(reseedInterval));
      desc.setPriority(Integer.parseInt(jobPriority));

      String[] hopFilterSet = hopFilters.split(",");
      int i = 0;
      while (i < hopFilterSet.length) {
        String hopFilter = hopFilterSet[i++];
        if (hopFilter != null && hopFilter.length() > 0) {
          String[] stuff = hopFilter.trim().split("=");
          if (stuff != null && stuff.length == 2)
            desc.addHopCountFilter(stuff[0], ((stuff[1].length() > 0) ? new Long(stuff[1]) : null));
        }
      }

      desc.getSpecification().fromXML(filespecXML);
      if (outputspecXML.length() > 0) desc.getOutputSpecification().fromXML(outputspecXML);

      // Now, save
      jobManager.save(desc);

      System.out.print(desc.getID().toString());
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(-2);
    }
  }