Exemplo n.º 1
0
 public Vector runningJobs() {
   Vector v = new Vector();
   for (Iterator it = jobs.values().iterator(); it.hasNext(); ) {
     JobInProgress jip = (JobInProgress) it.next();
     JobStatus status = jip.getStatus();
     if (status.getRunState() == JobStatus.RUNNING) {
       v.add(jip);
     }
   }
   return v;
 }
Exemplo n.º 2
0
 public Vector completedJobs() {
   Vector v = new Vector();
   for (Iterator it = jobs.values().iterator(); it.hasNext(); ) {
     JobInProgress jip = (JobInProgress) it.next();
     JobStatus status = jip.getStatus();
     if (status.getRunState() == JobStatus.SUCCEEDED) {
       v.add(jip);
     }
   }
   return v;
 }
Exemplo n.º 3
0
  /**
   * Updates the status PID file for the job.
   *
   * @param conf The {@link org.apache.hadoop.conf.Configuration} for the map reduce job
   * @param status The {@link com._42six.amino.api.framework.FrameworkDriver.JobStatus} to change to
   * @param pidDir The {@link org.apache.hadoop.fs.Path} to the PID file
   * @throws IOException
   */
  public static void updateStatus(Configuration conf, JobStatus status, Path pidDir)
      throws IOException {
    final Path pidFile = new Path(pidDir, STATUS_FILE);

    // Create the file if it doesn't exist and overwrite whatever might have been in it
    try (FileSystem fs = FileSystem.get(conf);
        FSDataOutputStream os = fs.create(pidFile, true)) {
      os.writeUTF(status.toString());
    }
  }
Exemplo n.º 4
0
  public void executeTest() throws Exception {
    // Hey, we were able to install the file system connector etc.
    // Now, create a local test job and run it.
    IThreadContext tc = ThreadContextFactory.make();

    // Create a basic file system connection, and save it.
    IRepositoryConnectionManager mgr = RepositoryConnectionManagerFactory.make(tc);
    IRepositoryConnection conn = mgr.create();
    conn.setName("File Connection");
    conn.setDescription("File Connection");
    conn.setClassName("org.apache.manifoldcf.crawler.connectors.filesystem.FileConnector");
    conn.setMaxConnections(100);
    // Now, save
    mgr.save(conn);

    // Create a basic null output connection, and save it.
    IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(tc);
    IOutputConnection outputConn = outputMgr.create();
    outputConn.setName("Null Connection");
    outputConn.setDescription("Null Connection");
    outputConn.setClassName("org.apache.manifoldcf.agents.output.nullconnector.NullConnector");
    outputConn.setMaxConnections(100);
    // Now, save
    outputMgr.save(outputConn);

    // Create a job.
    IJobManager jobManager = JobManagerFactory.make(tc);
    IJobDescription job = jobManager.createJob();
    job.setDescription("Test Job");
    job.setConnectionName("File Connection");
    job.setOutputConnectionName("Null Connection");
    job.setType(job.TYPE_SPECIFIED);
    job.setStartMethod(job.START_DISABLE);
    job.setHopcountMode(job.HOPCOUNT_ACCURATE);

    // Now, set up the document specification.
    DocumentSpecification ds = job.getSpecification();
    // Crawl everything underneath the 'testdata' area
    File testDataFile = new File("testdata").getCanonicalFile();
    if (!testDataFile.exists())
      throw new ManifoldCFException(
          "Test data area not found!  Looking in " + testDataFile.toString());
    if (!testDataFile.isDirectory())
      throw new ManifoldCFException(
          "Test data area not a directory!  Looking in " + testDataFile.toString());
    SpecificationNode sn = new SpecificationNode("startpoint");
    sn.setAttribute("path", testDataFile.toString());
    SpecificationNode n = new SpecificationNode("include");
    n.setAttribute("type", "file");
    n.setAttribute("match", "*");
    sn.addChild(sn.getChildCount(), n);
    n = new SpecificationNode("include");
    n.setAttribute("type", "directory");
    n.setAttribute("match", "*");
    sn.addChild(sn.getChildCount(), n);
    ds.addChild(ds.getChildCount(), sn);

    // Set up the output specification.
    OutputSpecification os = job.getOutputSpecification();
    // Null output connections have no output specification, so this is a no-op.

    // Save the job.
    jobManager.save(job);

    // Create the test data files.
    FileHelper.createFile(new File("testdata/test1.txt"), "This is a test file");
    FileHelper.createFile(new File("testdata/test2.txt"), "This is another test file");
    FileHelper.createDirectory(new File("testdata/testdir"));
    FileHelper.createFile(new File("testdata/testdir/test3.txt"), "This is yet another test file");

    // Now, start the job, and wait until it completes.
    jobManager.manualStart(job.getID());
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    // Check to be sure we actually processed the right number of documents.
    JobStatus status = jobManager.getStatus(job.getID());
    // The test data area has 3 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 5)
      throw new ManifoldCFException(
          "Wrong number of documents processed - expected 5, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Add a file and recrawl using minimal crawl
    FileHelper.createFile(new File("testdata/testdir/test4.txt"), "Added file");

    // Now, start the job, and wait until it completes.
    jobManager.manualStart(job.getID(), true);
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    status = jobManager.getStatus(job.getID());
    // The test data area has 4 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 6)
      throw new ManifoldCFException(
          "Wrong number of documents processed after add - expected 6, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Change a file, and recrawl, once again using minimal
    FileHelper.changeFile(new File("testdata/test1.txt"), "Modified contents");

    // Now, start the job, and wait until it completes.
    jobManager.manualStart(job.getID(), true);
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    status = jobManager.getStatus(job.getID());
    // The test data area has 4 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 6)
      throw new ManifoldCFException(
          "Wrong number of documents processed after change - expected 6, saw "
              + new Long(status.getDocumentsProcessed()).toString());
    // We also need to make sure the new document was indexed.  Have to think about how to do this
    // though.
    // MHL

    // Delete a file, and recrawl
    FileHelper.removeFile(new File("testdata/test2.txt"));

    // Do a minimal recrawl first; the delete should not be picked up.
    jobManager.manualStart(job.getID(), true);
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    status = jobManager.getStatus(job.getID());
    // The test data area has 4 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 6)
      throw new ManifoldCFException(
          "Wrong number of documents processed after delete with minimal crawl - expected 6, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Now, do a complete crawl - the delete should be found now.
    jobManager.manualStart(job.getID());
    instance.waitJobInactiveNative(jobManager, job.getID(), 120000L);

    // Check to be sure we actually processed the right number of documents.
    status = jobManager.getStatus(job.getID());
    // The test data area has 3 documents and one directory, and we have to count the root directory
    // too.
    if (status.getDocumentsProcessed() != 5)
      throw new ManifoldCFException(
          "Wrong number of documents processed after delete - expected 5, saw "
              + new Long(status.getDocumentsProcessed()).toString());

    // Now, delete the job.
    jobManager.deleteJob(job.getID());
    instance.waitJobDeletedNative(jobManager, job.getID(), 120000L);

    // Cleanup is automatic by the base class, so we can feel free to leave jobs and connections
    // lying around.
  }