/**
   * Compiles and returns a report on its status.
   *
   * @param name Report name.
   * @param pw Where to print.
   */
  public void reportTo(String name, PrintWriter pw) {
    // name is ignored for now: only one kind of report

    pw.print("[");
    pw.println(getName());

    // Make a local copy of the currentCuri reference in case it gets
    // nulled while we're using it.  We're doing this because
    // alternative is synchronizing and we don't want to do this --
    // it causes hang ups as controller waits on a lock for this thread,
    // something it gets easily enough on old threading model but something
    // it can wait interminably for on NPTL threading model.
    // See [ 994946 ] Pause/Terminate ignored on 2.6 kernel 1.5 JVM.
    CrawlURI c = currentCuri;
    if (c != null) {
      pw.print(" ");
      c.shortReportLineTo(pw);
      pw.print("    ");
      pw.print(c.getFetchAttempts());
      pw.print(" attempts");
      pw.println();
      pw.print("    ");
      pw.print("in processor: ");
      pw.print(currentProcessorName);
    } else {
      pw.print(" -no CrawlURI- ");
    }
    pw.println();

    long now = System.currentTimeMillis();
    long time = 0;

    pw.print("    ");
    if (lastFinishTime > lastStartTime) {
      // That means we finished something after we last started something
      // or in other words we are not working on anything.
      pw.print("WAITING for ");
      time = now - lastFinishTime;
    } else if (lastStartTime > 0) {
      // We are working on something
      pw.print("ACTIVE for ");
      time = now - lastStartTime;
    }
    pw.print(ArchiveUtils.formatMillisecondsToConventional(time));
    pw.println();

    pw.print("    ");
    pw.print("step: ");
    pw.print(step);
    pw.print(" for ");
    pw.print(
        ArchiveUtils.formatMillisecondsToConventional(System.currentTimeMillis() - atStepSince));
    pw.println();

    reportThread(this, pw);
    pw.print("]");
    pw.println();

    pw.flush();
  }
  public Map<String, Object> shortReportMap() {
    Map<String, Object> data = new LinkedHashMap<String, Object>();
    data.put("serialNumber", serialNumber);
    CrawlURI c = currentCuri;
    if (c != null) {
      data.put("currentURI", c.toString());
      data.put("currentProcessor", currentProcessorName);
      data.put("fetchAttempts", c.getFetchAttempts());
    } else {
      data.put("currentURI", null);
    }

    long now = System.currentTimeMillis();
    long time = 0;
    if (lastFinishTime > lastStartTime) {
      data.put("status", "WAITING");
      time = now - lastFinishTime;
    } else if (lastStartTime > 0) {
      data.put("status", "ACTIVE");
      time = now - lastStartTime;
    }
    data.put("currentStatusElapsedMilliseconds", time);
    data.put("currentStatusElapsedPretty", ArchiveUtils.formatMillisecondsToConventional(time));
    data.put("step", step);
    return data;
  }
 {
   Random rand = new Random(1);
   try {
     byte[] buf = new byte[1024];
     rand.nextBytes(buf);
     noise1k_gz = ArchiveUtils.gzip(buf);
     buf = new byte[32 * 1024];
     rand.nextBytes(buf);
     noise32k_gz = ArchiveUtils.gzip(buf);
     a_gz = ArchiveUtils.gzip("a".getBytes("ASCII"));
     hello_gz = ArchiveUtils.gzip("hello".getBytes("ASCII"));
     allfour_gz = Bytes.concat(noise1k_gz, noise32k_gz, a_gz, hello_gz);
     sixsmall_gz = Bytes.concat(a_gz, hello_gz, a_gz, hello_gz, a_gz, hello_gz);
   } catch (IOException e) {
     // should not happen
   }
 }
  /** @param w PrintWriter to write to. */
  public void shortReportLineTo(PrintWriter w) {
    w.print("#");
    w.print(this.serialNumber);

    // Make a local copy of the currentCuri reference in case it gets
    // nulled while we're using it.  We're doing this because
    // alternative is synchronizing and we don't want to do this --
    // it causes hang ups as controller waits on a lock for this thread,
    // something it gets easily enough on old threading model but something
    // it can wait interminably for on NPTL threading model.
    // See [ 994946 ] Pause/Terminate ignored on 2.6 kernel 1.5 JVM.
    CrawlURI c = currentCuri;
    if (c != null) {
      w.print(" ");
      w.print(currentProcessorName);
      w.print(" ");
      w.print(c.toString());
      w.print(" (");
      w.print(c.getFetchAttempts());
      w.print(") ");
    } else {
      w.print(" [no CrawlURI] ");
    }

    long now = System.currentTimeMillis();
    long time = 0;

    if (lastFinishTime > lastStartTime) {
      // That means we finished something after we last started something
      // or in other words we are not working on anything.
      w.print("WAITING for ");
      time = now - lastFinishTime;
    } else if (lastStartTime > 0) {
      // We are working on something
      w.print("ACTIVE for ");
      time = now - lastStartTime;
    }
    w.print(ArchiveUtils.formatMillisecondsToConventional(time));
    w.print(" at ");
    w.print(step);
    w.print(" for ");
    w.print(ArchiveUtils.formatMillisecondsToConventional(now - atStepSince));
    w.print("\n");
    w.flush();
  }
Beispiel #5
0
  protected CrawlURI peekItem(final WorkQueueFrontier frontier) throws IOException {
    final BdbMultipleWorkQueues queues = ((BdbFrontier) frontier).getWorkQueues();
    DatabaseEntry key = new DatabaseEntry(origin);
    CrawlURI curi = null;
    int tries = 1;
    while (true) {
      try {
        curi = queues.get(key);
      } catch (DatabaseException e) {
        LOGGER.log(Level.SEVERE, "peekItem failure; retrying", e);
      }

      // ensure CrawlURI, if any,  came from acceptable range:
      if (!ArchiveUtils.startsWith(key.getData(), origin)) {
        LOGGER.severe(
            "inconsistency: "
                + classKey
                + "("
                + getPrefixClassKey(origin)
                + ") with "
                + getCount()
                + " items gave "
                + curi
                + "("
                + getPrefixClassKey(key.getData()));
        // clear curi to allow retry
        curi = null;
        // reset key to original origin for retry
        key.setData(origin);
      }

      if (curi != null) {
        // success
        break;
      }

      if (tries > 3) {
        LOGGER.severe("no item where expected in queue " + classKey);
        break;
      }
      tries++;
      LOGGER.severe(
          "Trying get #"
              + Integer.toString(tries)
              + " in queue "
              + classKey
              + " with "
              + getCount()
              + " items using key "
              + getPrefixClassKey(key.getData()));
    }

    return curi;
  }
Beispiel #6
0
 /**
  * Populates a given StoredSortedMap (history map) from an old environment db or a persist log. If
  * a map is not provided, only logs the entries that would have been populated.
  *
  * @param sourceFile source of old entries: can be a path to an existing environment db or persist
  *     log
  * @param historyMap map to populate (or null for a dry run)
  * @return number of records
  * @throws DatabaseException
  * @throws IOException
  */
 public static int copyPersistSourceToHistoryMap(
     File sourceFile, StoredSortedMap<String, Map> historyMap)
     throws DatabaseException, IOException {
   // delegate depending on the source
   if (sourceFile.isDirectory()) {
     return copyPersistEnv(sourceFile, historyMap);
   } else {
     BufferedReader persistLogReader = ArchiveUtils.getBufferedReader(sourceFile);
     return populatePersistEnvFromLog(persistLogReader, historyMap);
   }
 }
Beispiel #7
0
  /**
   * Populates an environment db from a persist log. If historyMap is not provided, only logs the
   * entries that would have been populated.
   *
   * @param persistLogReader persist log
   * @param historyMap new environment db (or null for a dry run)
   * @return number of records
   * @throws UnsupportedEncodingException
   * @throws DatabaseException
   */
  private static int populatePersistEnvFromLog(
      BufferedReader persistLogReader, StoredSortedMap<String, Map> historyMap)
      throws UnsupportedEncodingException, DatabaseException {
    int count = 0;

    Iterator<String> iter = new LineReadingIterator(persistLogReader);
    while (iter.hasNext()) {
      String line = iter.next();
      if (line.length() == 0) {
        continue;
      }
      String[] splits = line.split(" ");
      if (splits.length != 2) {
        logger.severe("bad line has " + splits.length + " fields (should be 2): " + line);
        continue;
      }

      Map alist;
      try {
        alist =
            (Map) SerializationUtils.deserialize(Base64.decodeBase64(splits[1].getBytes("UTF-8")));
      } catch (Exception e) {
        logger.severe("caught exception " + e + " deserializing line: " + line);
        continue;
      }

      if (logger.isLoggable(Level.FINE)) {
        logger.fine(splits[0] + " " + ArchiveUtils.prettyString(alist));
      }

      if (historyMap != null)
        try {
          historyMap.put(splits[0], alist);
        } catch (Exception e) {
          logger.log(
              Level.SEVERE,
              "caught exception after loading "
                  + count
                  + " urls from the persist log (perhaps crawl was stopped by user?)",
              e);
          IOUtils.closeQuietly(persistLogReader);

          // seems to finish most cleanly when we return rather than throw something
          return count;
        }

      count++;
    }
    IOUtils.closeQuietly(persistLogReader);

    return count;
  }
 /**
  * Calculate the insertKey that places a CrawlURI in the desired spot. First bytes are always
  * classKey (usu. host) based -- ensuring grouping by host -- terminated by a zero byte. Then 8
  * bytes of data ensuring desired ordering within that 'queue' are used. The first byte of these 8
  * is priority -- allowing 'immediate' and 'soon' items to sort above regular. Next 1 byte is
  * 'precedence'. Last 6 bytes are ordinal serial number, ensuring earlier-discovered URIs sort
  * before later.
  *
  * <p>NOTE: Dangers here are: (1) priorities or precedences over 2^7 (signed byte comparison) (2)
  * ordinals over 2^48
  *
  * <p>Package access & static for testing purposes.
  *
  * @param curi
  * @return a DatabaseEntry key for the CrawlURI
  */
 static DatabaseEntry calculateInsertKey(CrawlURI curi) {
   byte[] classKeyBytes = null;
   int len = 0;
   classKeyBytes = curi.getClassKey().getBytes(Charsets.UTF_8);
   len = classKeyBytes.length;
   byte[] keyData = new byte[len + 9];
   System.arraycopy(classKeyBytes, 0, keyData, 0, len);
   keyData[len] = 0;
   long ordinalPlus = curi.getOrdinal() & 0x0000FFFFFFFFFFFFL;
   ordinalPlus = ((long) curi.getSchedulingDirective() << 56) | ordinalPlus;
   long precedence = Math.min(curi.getPrecedence(), 127);
   ordinalPlus = (((precedence) & 0xFFL) << 48) | ordinalPlus;
   ArchiveUtils.longIntoByteArray(ordinalPlus, keyData, len + 1);
   return new DatabaseEntry(keyData);
 }
  private void seriousError(Error err) {
    // try to prevent timeslicing until we have a chance to deal with OOM
    // Note that modern-day JVM priority indifference with native threads
    // may make this priority-jumbling pointless
    setPriority(DEFAULT_PRIORITY + 1);
    if (controller != null) {
      // hold all ToeThreads from proceeding to next processor
      controller.freeReserveMemory();
      controller.requestCrawlPause();
      if (controller.getFrontier().getFrontierJournal() != null) {
        controller.getFrontier().getFrontierJournal().seriousError(getName() + err.getMessage());
      }
    }

    // OutOfMemory etc.
    String extraInfo = DevUtils.extraInfo();
    System.err.println("<<<");
    System.err.println(ArchiveUtils.getLog17Date());
    System.err.println(err);
    System.err.println(extraInfo);
    err.printStackTrace(System.err);

    if (controller != null) {
      PrintWriter pw = new PrintWriter(System.err);
      controller.getToePool().compactReportTo(pw);
      pw.flush();
    }
    System.err.println(">>>");
    //        DevUtils.sigquitSelf();

    String context = "unknown";
    if (currentCuri != null) {
      // update fetch-status, saving original as annotation
      currentCuri.getAnnotations().add("err=" + err.getClass().getName());
      currentCuri.getAnnotations().add("os" + currentCuri.getFetchStatus());
      currentCuri.setFetchStatus(S_SERIOUS_ERROR);
      context = currentCuri.shortReportLine() + " in " + currentProcessorName;
    }
    String message = "Serious error occured trying " + "to process '" + context + "'\n" + extraInfo;
    logger.log(Level.SEVERE, message.toString(), err);
    setPriority(DEFAULT_PRIORITY);
  }
  /** Explode the archive into its constituent elements */
  public void explode() throws CacheException {
    int goodEntries = 0;
    int badEntries = 0;
    int entriesBetweenSleep = 0;
    ArchiveReader arcReader = null;

    logger.debug(
        (storeArchive ? "Storing" : "Fetching") + " WARC file: " + origUrl + " will explode");
    try {
      // Wrap it in an ArchiveReader
      logger.debug3("About to wrap stream");
      arcReader = wrapStream(fetchUrl, arcStream);
      logger.debug3("wrapStream() returns " + (arcReader == null ? "null" : "non-null"));
      // Explode it
      if (arcReader == null) {
        throw new CacheException.ExploderException("no WarcReader for " + origUrl);
      }
      ArchivalUnit au = crawlFacade.getAu();
      logger.debug("Exploding " + fetchUrl);
      // Iterate through the elements in the WARC file, except the first
      Iterator<ArchiveRecord> iter = arcReader.iterator();
      // Skip first record
      if (iter.hasNext()) iter.next();
      while (iter.hasNext()) {
        helper.pokeWDog();
        // check need to pause
        handlePause(++entriesBetweenSleep);
        // handle each element in the archive
        ArchiveRecord element = iter.next();
        // Each element is a URL to be cached in our AU
        ArchiveRecordHeader elementHeader = element.getHeader();
        String elementUrl = elementHeader.getUrl();
        String elementMimeType = elementHeader.getMimetype();
        long elementLength = elementHeader.getLength();
        long elementDate;
        try {
          elementDate = ArchiveUtils.parse14DigitDate(elementHeader.getDate()).getTime();
        } catch (ParseException e) {
          elementDate = 0;
        }
        logger.debug2("WARC url " + elementUrl + " mime " + elementMimeType);
        // add check to determine if this is a url which should be cached
        if (au.shouldBeCached(elementUrl) && elementUrl.startsWith("http:")) {
          ArchiveEntry ae =
              new ArchiveEntry(
                  elementUrl,
                  elementLength,
                  elementDate,
                  element, // ArchiveRecord extends InputStream
                  this,
                  fetchUrl);
          ae.setHeaderFields(makeCIProperties(elementHeader));
          long bytesStored = elementLength;
          logger.debug3("ArchiveEntry: " + ae.getName() + " bytes " + bytesStored);
          try {
            helper.process(ae);
          } catch (PluginException ex) {
            throw new CacheException.ExploderException("helper.process() threw", ex);
          }
          if (ae.getBaseUrl() != null) {
            if (ae.getRestOfUrl() != null && ae.getHeaderFields() != null) {
              storeEntry(ae);
              handleAddText(ae);
              goodEntries++;
              // this needs to use the correct depth ? how
              CrawlUrlData cud = new CrawlUrlData(elementUrl, 0);
              crawlFacade.addToParseQueue(cud);
              crawlFacade.getCrawlerStatus().addContentBytesFetched(bytesStored);
            }
          } else {
            badEntries++;
            logger.debug2("Can't map " + elementUrl + " from " + archiveUrl);
          }
        }
      }
    } catch (IOException ex) {
      throw new CacheException.ExploderException(ex);
    } finally {
      if (arcReader != null) {
        try {
          arcReader.close();
        } catch (IOException ex) {
          throw new CacheException.ExploderException(ex);
        }
      }
      IOUtil.safeClose(arcStream);
    }
    // report failed fetches
    if (badEntries != 0) {
      String msg = archiveUrl + ": " + badEntries + "/" + goodEntries + " bad entries";
      throw new CacheException.UnretryableException(msg);
    }
  }
 public String shortReportLine() {
   return ArchiveUtils.shortReportLine(this);
 }
  /**
   * (non-Javadoc)
   *
   * @see java.lang.Thread#run()
   */
  public void run() {
    String name = controller.getMetadata().getJobName();
    logger.fine(getName() + " started for order '" + name + "'");
    Recorder.setHttpRecorder(httpRecorder);

    try {
      while (true) {
        ArchiveUtils.continueCheck();

        setStep(Step.ABOUT_TO_GET_URI, null);

        CrawlURI curi = controller.getFrontier().next();

        synchronized (this) {
          ArchiveUtils.continueCheck();
          setCurrentCuri(curi);
          currentCuri.setThreadNumber(this.serialNumber);
          lastStartTime = System.currentTimeMillis();
          currentCuri.setRecorder(httpRecorder);
        }

        try {
          KeyedProperties.loadOverridesFrom(curi);

          controller.getFetchChain().process(curi, this);

          controller.getFrontier().beginDisposition(curi);

          controller.getDispositionChain().process(curi, this);

        } catch (RuntimeExceptionWrapper e) {
          // Workaround to get cause from BDB
          if (e.getCause() == null) {
            e.initCause(e.getCause());
          }
          recoverableProblem(e);
        } catch (AssertionError ae) {
          // This risks leaving crawl in fatally inconsistent state,
          // but is often reasonable for per-Processor assertion problems
          recoverableProblem(ae);
        } catch (RuntimeException e) {
          recoverableProblem(e);
        } catch (InterruptedException e) {
          if (currentCuri != null) {
            recoverableProblem(e);
            Thread.interrupted(); // clear interrupt status
          } else {
            throw e;
          }
        } catch (StackOverflowError err) {
          recoverableProblem(err);
        } catch (Error err) {
          // OutOfMemory and any others
          seriousError(err);
        } finally {
          KeyedProperties.clearOverridesFrom(curi);
        }

        setStep(Step.ABOUT_TO_RETURN_URI, null);
        ArchiveUtils.continueCheck();

        synchronized (this) {
          controller.getFrontier().finished(currentCuri);
          controller.getFrontier().endDisposition();
          setCurrentCuri(null);
        }

        setStep(Step.FINISHING_PROCESS, null);
        lastFinishTime = System.currentTimeMillis();
        if (shouldRetire) {
          break; // from while(true)
        }
      }
    } catch (InterruptedException e) {
      if (currentCuri != null) {
        logger.log(
            Level.SEVERE,
            "Interrupt leaving unfinished CrawlURI " + getName() + " - job may hang",
            e);
      }
      // thread interrupted, ok to end
      logger.log(Level.FINE, this.getName() + " ended with Interruption");
    } catch (Exception e) {
      // everything else (including interruption)
      logger.log(Level.SEVERE, "Fatal exception in " + getName(), e);
    } catch (OutOfMemoryError err) {
      seriousError(err);
    } finally {
      controller.getFrontier().endDisposition();
    }

    setCurrentCuri(null);
    // Do cleanup so that objects can be GC.
    this.httpRecorder.closeRecorders();
    this.httpRecorder = null;

    logger.fine(getName() + " finished for order '" + name + "'");
    setStep(Step.FINISHED, null);
    controller = null;
  }
Beispiel #13
0
/**
 * One independent queue of items with the same 'classKey' (eg host).
 *
 * @author gojomo
 */
public class BdbWorkQueue extends WorkQueue implements Comparable, Serializable {
  private static Logger LOGGER = Logger.getLogger(BdbWorkQueue.class.getName());

  // be robust against trivial implementation changes
  private static final long serialVersionUID =
      ArchiveUtils.classnameBasedUID(BdbWorkQueue.class, 1);

  /** All items in this queue have this same 'origin' prefix to their keys. */
  private byte[] origin;

  /**
   * Create a virtual queue inside the given BdbMultipleWorkQueues
   *
   * @param classKey
   */
  public BdbWorkQueue(String classKey, BdbFrontier frontier) {
    super(classKey);
    this.origin = BdbMultipleWorkQueues.calculateOriginKey(classKey);
    if (LOGGER.isLoggable(Level.FINE)) {
      LOGGER.fine(getPrefixClassKey(this.origin) + " " + classKey);
    }
    // add the queue-front 'cap' entry; see...
    // http://sourceforge.net/tracker/index.php?func=detail&aid=1262665&group_id=73833&atid=539102
    frontier.getWorkQueues().addCap(origin);
  }

  protected long deleteMatchingFromQueue(final WorkQueueFrontier frontier, final String match)
      throws IOException {
    try {
      final BdbMultipleWorkQueues queues = ((BdbFrontier) frontier).getWorkQueues();
      return queues.deleteMatchingFromQueue(match, classKey, new DatabaseEntry(origin));
    } catch (DatabaseException e) {
      throw IoUtils.wrapAsIOException(e);
    }
  }

  protected void deleteItem(final WorkQueueFrontier frontier, final CrawlURI peekItem)
      throws IOException {
    try {
      final BdbMultipleWorkQueues queues = ((BdbFrontier) frontier).getWorkQueues();
      queues.delete(peekItem);
    } catch (DatabaseException e) {
      e.printStackTrace();
      throw IoUtils.wrapAsIOException(e);
    }
  }

  protected CrawlURI peekItem(final WorkQueueFrontier frontier) throws IOException {
    final BdbMultipleWorkQueues queues = ((BdbFrontier) frontier).getWorkQueues();
    DatabaseEntry key = new DatabaseEntry(origin);
    CrawlURI curi = null;
    int tries = 1;
    while (true) {
      try {
        curi = queues.get(key);
      } catch (DatabaseException e) {
        LOGGER.log(Level.SEVERE, "peekItem failure; retrying", e);
      }

      // ensure CrawlURI, if any,  came from acceptable range:
      if (!ArchiveUtils.startsWith(key.getData(), origin)) {
        LOGGER.severe(
            "inconsistency: "
                + classKey
                + "("
                + getPrefixClassKey(origin)
                + ") with "
                + getCount()
                + " items gave "
                + curi
                + "("
                + getPrefixClassKey(key.getData()));
        // clear curi to allow retry
        curi = null;
        // reset key to original origin for retry
        key.setData(origin);
      }

      if (curi != null) {
        // success
        break;
      }

      if (tries > 3) {
        LOGGER.severe("no item where expected in queue " + classKey);
        break;
      }
      tries++;
      LOGGER.severe(
          "Trying get #"
              + Integer.toString(tries)
              + " in queue "
              + classKey
              + " with "
              + getCount()
              + " items using key "
              + getPrefixClassKey(key.getData()));
    }

    return curi;
  }

  protected void insertItem(
      final WorkQueueFrontier frontier, final CrawlURI curi, boolean overwriteIfPresent)
      throws IOException {
    try {
      final BdbMultipleWorkQueues queues = ((BdbFrontier) frontier).getWorkQueues();
      queues.put(curi, overwriteIfPresent);
      if (LOGGER.isLoggable(Level.FINE)) {
        LOGGER.fine(
            "Inserted into "
                + getPrefixClassKey(this.origin)
                + " (count "
                + Long.toString(getCount())
                + "): "
                + curi.toString());
      }
    } catch (DatabaseException e) {
      throw IoUtils.wrapAsIOException(e);
    }
  }

  /**
   * @param byteArray Byte array to get hex string of.
   * @return Hex string of passed in byte array (Used logging key-prefixes).
   */
  protected static String getPrefixClassKey(final byte[] byteArray) {
    int zeroIndex = 0;
    while (byteArray[zeroIndex] != 0) {
      zeroIndex++;
    }
    try {
      return new String(byteArray, 0, zeroIndex, "UTF-8");
    } catch (UnsupportedEncodingException e) {
      // should be impossible; UTF-8 always available
      e.printStackTrace();
      return e.getMessage();
    }
  }
}
Beispiel #14
0
 /**
  * Populates a given StoredSortedMap (history map) from an old persist log. If a map is not
  * provided, only logs the entries that would have been populated.
  *
  * @param sourceUrl url of source persist log
  * @param historyMap map to populate (or null for a dry run)
  * @return number of records
  * @throws DatabaseException
  * @throws IOException
  */
 public static int copyPersistSourceToHistoryMap(
     URL sourceUrl, StoredSortedMap<String, Map> historyMap)
     throws DatabaseException, IOException {
   BufferedReader persistLogReader = ArchiveUtils.getBufferedReader(sourceUrl);
   return populatePersistEnvFromLog(persistLogReader, historyMap);
 }
  @Override
  public void map(
      Text key, WritableArchiveRecord value, OutputCollector<Text, Text> output, Reporter reporter)
      throws IOException {
    ArchiveRecord record = value.getRecord();
    ArchiveRecordHeader header = record.getHeader();

    // Logging for debug info:
    log.debug(
        "Processing @"
            + header.getOffset()
            + "+"
            + record.available()
            + ","
            + header.getLength()
            + ": "
            + header.getUrl());
    for (String h : header.getHeaderFields().keySet()) {
      log.debug("ArchiveHeader: " + h + " -> " + header.getHeaderValue(h));
    }

    try {
      MDX mdx = new MDX();
      Date crawl_date = ArchiveUtils.parse14DigitISODate(header.getDate(), null);
      if (crawl_date != null) {
        mdx.setTs(ArchiveUtils.get14DigitDate(crawl_date));
      } else {
        mdx.setTs(header.getDate());
      }
      mdx.setUrl(header.getUrl());
      mdx.setHash(header.getDigest());

      // Data from WARC record:
      mdx.put("source-file", key.toString());
      mdx.put("content-type", header.getMimetype());
      mdx.put("content-length", "" + header.getContentLength());
      mdx.put("length", "" + header.getLength());
      mdx.put("source-offset", "" + header.getOffset());
      mdx.put("record-identifier", header.getRecordIdentifier());
      for (String k : header.getHeaderFieldKeys()) {
        mdx.put("HEADER-" + k, "" + header.getHeaderValue(k));
      }

      // check record type and look for HTTP data:
      Header[] httpHeaders = null;
      if (record instanceof WARCRecord) {
        mdx.setRecordType("warc." + header.getHeaderValue(HEADER_KEY_TYPE));
        mdx.setHash("" + header.getHeaderValue(WARCConstants.HEADER_KEY_PAYLOAD_DIGEST));
        // There are not always headers! The code should check first.
        String statusLine = HttpParser.readLine(record, "UTF-8");
        if (statusLine != null && statusLine.startsWith("HTTP")) {
          String firstLine[] = statusLine.split(" ");
          if (firstLine.length > 1) {
            String statusCode = firstLine[1].trim();
            mdx.put("status-code", statusCode);
            try {
              httpHeaders = HttpParser.parseHeaders(record, "UTF-8");
            } catch (ProtocolException p) {
              log.error(
                  "ProtocolException ["
                      + statusCode
                      + "]: "
                      + header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME)
                      + "@"
                      + header.getHeaderValue(WARCConstants.ABSOLUTE_OFFSET_KEY),
                  p);
            }
          } else {
            log.warn("Could not parse status line: " + statusLine);
          }
        } else {
          log.warn(
              "Invalid status line: "
                  + header.getHeaderValue(WARCConstants.HEADER_KEY_FILENAME)
                  + "@"
                  + header.getHeaderValue(WARCConstants.ABSOLUTE_OFFSET_KEY));
        }

      } else if (record instanceof ARCRecord) {
        mdx.setRecordType("arc");
        ARCRecord arcr = (ARCRecord) record;
        mdx.put("status-code", "" + arcr.getStatusCode());
        httpHeaders = arcr.getHttpHeaders();

      } else {
        mdx.setRecordType("unknown");
      }

      // Add in http headers
      if (httpHeaders != null) {
        for (Header h : httpHeaders) {
          mdx.put("HTTP-" + h.getName(), h.getValue());
        }
      }

      // URL:
      String uri = header.getUrl();
      if (uri != null) {
        UsableURI uuri = UsableURIFactory.getInstance(uri);
        // Hosts:
        if ("https".contains(uuri.getScheme())) {
          mdx.put("host", uuri.getAuthority());
        }
      } else {
        mdx.put("errors", "malformed-url");
      }

      // Year
      String date = header.getDate();
      if (date != null && date.length() > 4) {
        mdx.put("year", date.substring(0, 4));
      } else {
        mdx.put("errors", "malformed-date");
      }

      // And collect:
      String outKey = mdx.getHash();
      if (outKey == null || outKey == "" || "null".equals(outKey)) {
        outKey = mdx.getRecordType() + ":" + header.getMimetype();
      } else {
        outKey = mdx.getRecordType() + ":" + outKey;
      }

      output.collect(new Text(outKey), new Text(mdx.toString()));
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
Beispiel #16
0
  public void _jspService(HttpServletRequest request, HttpServletResponse response)
      throws java.io.IOException, ServletException {

    JspFactory _jspxFactory = null;
    javax.servlet.jsp.PageContext pageContext = null;
    HttpSession session = null;
    ServletContext application = null;
    ServletConfig config = null;
    JspWriter out = null;
    Object page = this;
    JspWriter _jspx_out = null;

    try {
      _jspxFactory = JspFactory.getDefaultFactory();
      response.setContentType("text/html; charset=UTF-8");
      pageContext =
          _jspxFactory.getPageContext(this, request, response, "/error.jsp", true, 8192, true);
      application = pageContext.getServletContext();
      config = pageContext.getServletConfig();
      session = pageContext.getSession();
      out = pageContext.getOut();
      _jspx_out = out;

      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");

      /** This include page ensures that the handler exists and is ready to be accessed. */
      CrawlJobHandler handler = (CrawlJobHandler) application.getAttribute("handler");
      Heritrix heritrix = (Heritrix) application.getAttribute("heritrix");

      // If handler is empty then this is the first time this bit of code is
      // being run since the server came online. In that case get or create the
      // handler.
      if (handler == null) {
        if (Heritrix.isSingleInstance()) {
          heritrix = Heritrix.getSingleInstance();
          handler = heritrix.getJobHandler();
          application.setAttribute("heritrix", heritrix);
          application.setAttribute("handler", handler);
        } else {
          // TODO:
          // If we get here, then there are multiple heritrix instances
          // and we have to put up a screen allowing the user choose between.
          // Otherwise, there is no Heritrix instance.  Thats a problem.
          throw new RuntimeException(
              "No heritrix instance (or multiple "
                  + "to choose from and we haven't implemented this yet)");
        }
      }

      // ensure controller's settingsHandler is always thread-installed
      // in web ui threads
      if (handler != null) {
        CrawlJob job = handler.getCurrentJob();
        if (job != null) {
          CrawlController controller = job.getController();
          if (controller != null) {
            controller.installThreadContextSettingsHandler();
          }
        }
      }

      out.write("\n");
      out.write("\n\n");

      String title = "Help";
      int tab = 6;

      out.write("\n\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");
      out.write("\n");

      String currentHeritrixName =
          (heritrix == null)
              ? "No current Heritrix instance"
              : (heritrix.getMBeanName() == null)
                  ? heritrix.getInstances().keySet().iterator().next().toString()
                  : heritrix.getMBeanName().toString();

      /**
       * An include file that handles the "look" and navigation of a web page. Include at top (where
       * you would normally begin the HTML code). If used, the include "foot.jsp" should be included
       * at the end of the HTML code. It will close any table, body and html tags left open in this
       * one. Any custom HTML code is thus placed between the two.
       *
       * <p>The following variables must exist prior to this file being included:
       *
       * <p>String title - Title of the web page int tab - Which to display as 'selected'. 0 -
       * Console 1 - Jobs 2 - Profiles 3 - Logs 4 - Reports 5 - Settings 6 - Help
       *
       * <p>SimpleHandler handler - In general this is provided by the include page 'handler.jsp'
       * which should be included prior to this one.
       *
       * @author Kristinn Sigurdsson
       */
      String shortJobStatus = null;
      if (handler.getCurrentJob() != null) {
        shortJobStatus = TextUtils.getFirstWord(handler.getCurrentJob().getStatus());
      }
      String favicon = System.getProperties().getProperty("heritrix.favicon", "h.ico");

      out.write("\n");

      StatisticsTracker stats = null;
      if (handler.getCurrentJob() != null) {
        // Assume that StatisticsTracker is being used.
        stats = (StatisticsTracker) handler.getCurrentJob().getStatisticsTracking();
      }

      out.write("\n");
      out.write("\n\n");
      out.write("<html>\n    ");
      out.write("<head>\n    \t");
      out.write(
          "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>\n        ");
      out.write("<title>Heritrix: ");
      out.print(title);
      out.write("</title>\n        ");
      out.write("<link rel=\"stylesheet\" \n            href=\"");
      out.print(request.getContextPath());
      out.write("/css/heritrix.css\">\n        ");
      out.write("<link rel=\"icon\" href=\"");
      out.print(request.getContextPath());
      out.write("/images/");
      out.print(favicon);
      out.write("\" type=\"image/x-icon\" />\n        ");
      out.write("<link rel=\"shortcut icon\" href=\"");
      out.print(request.getContextPath());
      out.write("/images/");
      out.print(favicon);
      out.write("\" type=\"image/x-icon\" />\n        ");
      out.write("<script src=\"/js/util.js\">\n        ");
      out.write("</script>\n    ");
      out.write("</head>\n\n    ");
      out.write("<body>\n        ");
      out.write(
          "<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\" width=\"100%\">\n            ");
      out.write("<tr>\n                ");
      out.write("<td>\n                    ");
      out.write(
          "<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\" height=\"100%\">\n                        ");
      out.write("<tr>\n                            ");
      out.write(
          "<td height=\"60\" width=\"155\" valign=\"top\" nowrap>\n                                ");
      out.write(
          "<table border=\"0\" width=\"155\" cellspacing=\"0\" cellpadding=\"0\" height=\"60\">\n                                    ");
      out.write("<tr>\n                                        ");
      out.write(
          "<td align=\"center\" height=\"40\" valign=\"bottom\">\n                                            ");
      out.write("<a border=\"0\" \n                                            href=\"");
      out.print(request.getContextPath());
      out.write("/index.jsp\">");
      out.write("<img border=\"0\" src=\"");
      out.print(request.getContextPath());
      out.write("/images/logo.gif\" height=\"37\" width=\"145\">");
      out.write("</a>\n                                        ");
      out.write("</td>\n                                    ");
      out.write("</tr>\n                                    ");
      out.write("<tr>\n                                        ");
      out.write("<td class=\"subheading\">\n                                            ");
      out.print(title);
      out.write("\n                                        ");
      out.write("</td>\n                                    ");
      out.write("</tr>\n                                ");
      out.write("</table>\n                            ");
      out.write("</td>\n                            ");
      out.write(
          "<td width=\"5\" nowrap>\n                                &nbsp;&nbsp;\n                            ");
      out.write("</td>\n                            ");
      out.write("<td width=\"460\" align=\"left\" nowrap>\n                                ");
      out.write(
          "<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\" height=\"60\">\n                                    ");
      out.write("<tr>\n                                        ");
      out.write("<td colspan=\"2\" nowrap>\n                                            ");

      SimpleDateFormat sdf = new SimpleDateFormat("MMM. d, yyyy HH:mm:ss");
      sdf.setTimeZone(java.util.TimeZone.getTimeZone("GMT"));

      out.write("\n                                            ");
      out.write("<b>\n                                                Status as of ");
      out.write("<a style=\"color: #000000\" href=\"");
      out.print(request.getRequestURL());
      out.write("\">");
      out.print(sdf.format(new java.util.Date()));
      out.write(" GMT");
      out.write("</a>\n                                            ");
      out.write(
          "</b>\n                                            &nbsp;&nbsp;\n                                            ");
      out.write("<span style=\"text-align:right\">\n                                            ");
      out.write(
          "<b>\n                                                Alerts: \n                                            ");
      out.write("</b>\n                                            ");
      if (heritrix.getAlertsCount() == 0) {
        out.write("\n                                                ");
        out.write("<a style=\"color: #000000; text-decoration: none\" href=\"");
        out.print(request.getContextPath());
        out.write("/console/alerts.jsp\">no alerts");
        out.write("</a>\n                                            ");
      } else if (heritrix.getNewAlertsCount() > 0) {
        out.write("\n                                                ");
        out.write("<b>");
        out.write("<a href=\"");
        out.print(request.getContextPath());
        out.write("/console/alerts.jsp\">");
        out.print(heritrix.getAlerts().size());
        out.write(" (");
        out.print(heritrix.getNewAlertsCount());
        out.write(" new)");
        out.write("</a>");
        out.write("</b>\n                                            ");
      } else {
        out.write("\n                                                ");
        out.write("<a style=\"color: #000000\" href=\"");
        out.print(request.getContextPath());
        out.write("/console/alerts.jsp\">");
        out.print(heritrix.getAlertsCount());
        out.write(" (");
        out.print(heritrix.getNewAlertsCount());
        out.write(" new)");
        out.write("</a>\n                                            ");
      }
      out.write("\n                                            ");
      out.write("</span>\n                                        ");
      out.write("</td>\n                                    ");
      out.write("</tr>\n                                    ");
      out.write("<tr>\n                                        ");
      out.write("<td valign=\"top\" nowrap>\n\t\t\t\t\t\t\t\t\t\t");
      out.print(
          handler.isRunning()
              ? "<span class='status'>Crawling Jobs</span>"
              : "<span class='status'>Holding Jobs</span>");
      out.write("<i>&nbsp;");
      out.write("</i>\n\t\t\t\t\t\t\t\t\t\t");
      out.write("</td>\n\t\t\t\t\t\t\t\t\t\t");
      out.write("<td valign=\"top\" align=\"right\" nowrap>\n\t\t\t\t\t\t\t\t\t\t");

      if (handler.isRunning() || handler.isCrawling()) {
        if (handler.getCurrentJob() != null) {
          out.write("\n\t\t\t\t\t\t\t\t\t\t");
          out.write("<span class='status'>\n\t\t\t\t\t\t\t\t\t\t");
          out.print(shortJobStatus);
          out.write("</span> job:\n\t\t\t\t\t\t\t\t\t\t");
          out.write("<i>");
          out.print(handler.getCurrentJob().getJobName());
          out.write("</i>\n\t\t\t\t\t\t\t\t\t\t");

        } else {
          out.println("No job ready <a href=\"");
          out.println(request.getContextPath());
          out.println("/jobs.jsp\" style='color: #000000'>(create new)</a>");
        }
      }

      out.write("\n\t\t\t\t\t\t\t\t\t\t");
      out.write("</td>\n                                    ");
      out.write("</tr>\n                                    ");
      out.write("<tr>\n                                        ");
      out.write("<td nowrap>\n                                            ");
      out.print(handler.getPendingJobs().size());
      out.write(
          "\n                                            jobs\n                                            ");
      out.write("<a style=\"color: #000000\" href=\"");
      out.print(request.getContextPath());
      out.write("/jobs.jsp#pending\">pending");
      out.write("</a>,\n                                            ");
      out.print(handler.getCompletedJobs().size());
      out.write("\n                                            ");
      out.write("<a style=\"color: #000000\" href=\"");
      out.print(request.getContextPath());
      out.write("/jobs.jsp#completed\">completed");
      out.write(
          "</a>\n                                            &nbsp;\n                                        ");
      out.write("</td>\n                                        ");
      out.write("<td nowrap align=\"right\">\n                                            ");
      if (handler.isCrawling()) {
        out.write("\n                                                    ");
        out.print((stats != null) ? stats.successfullyFetchedCount() : 0);
        out.write(" URIs in \n\t\t                                            ");
        out.print(
            ArchiveUtils.formatMillisecondsToConventional(
                ((stats != null) ? (stats.getCrawlerTotalElapsedTime()) : 0), false));
        out.write("\n\t\t                                            (");
        out.print(
            ArchiveUtils.doubleToString(
                ((stats != null) ? stats.currentProcessedDocsPerSec() : 0), 2));
        out.write("/sec)\n                                            ");
      }
      out.write("\n                                        ");
      out.write("</td>\n                                    ");
      out.write("</tr>\n                                ");
      out.write("</table>\n                            ");
      out.write("</td>\n                        ");
      out.write("</tr>\n                    ");
      out.write("</table>\n                ");
      out.write("</td>\n                ");
      out.write("<td width=\"100%\" nowrap>\n                    &nbsp;\n                ");
      out.write("</td>\n            ");
      out.write("</tr>\n            ");
      out.write("<tr>\n                ");
      out.write("<td bgcolor=\"#0000FF\" height=\"1\" colspan=\"4\">\n                ");
      out.write("</td>\n            ");
      out.write("</tr>\n            ");
      out.write("<tr>\n                ");
      out.write("<td colspan=\"4\" height=\"20\">\n                    ");
      out.write(
          "<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\" width=\"100%\" height=\"20\">\n                        ");
      out.write("<tr>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 0 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/index.jsp\" class=\"tab_text");
      out.print(tab == 0 ? "_selected" : "");
      out.write("\">Console");
      out.write("</a>\n                            ");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 1 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/jobs.jsp\" class=\"tab_text");
      out.print(tab == 1 ? "_selected" : "");
      out.write("\">Jobs");
      out.write("</a>\n                            ");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 2 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/profiles.jsp\" class=\"tab_text");
      out.print(tab == 2 ? "_selected" : "");
      out.write("\">Profiles");
      out.write("</a>\n                            ");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 3 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/logs.jsp\" class=\"tab_text");
      out.print(tab == 3 ? "_selected" : "");
      out.write("\">Logs");
      out.write("</a>\n                            ");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 4 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/reports.jsp\" class=\"tab_text");
      out.print(tab == 4 ? "_selected" : "");
      out.write("\">Reports");
      out.write("</a>\n                            ");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 5 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/setup.jsp\" class=\"tab_text");
      out.print(tab == 5 ? "_selected" : "");
      out.write("\">Setup");
      out.write("</a>\n                            ");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab_seperator\">&nbsp;");
      out.write("</td>\n                            ");
      out.write("<td class=\"tab");
      out.print(tab == 6 ? "_selected" : "");
      out.write("\">\n                                ");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/help.jsp\" class=\"tab_text");
      out.print(tab == 6 ? "_selected" : "");
      out.write("\">Help");
      out.write("</a>\n                             ");
      out.write("</td>\n                            ");
      out.write("<td width=\"100%\">\n                            ");
      out.write("</td>\n                        ");
      out.write("</tr>\n                    ");
      out.write("</table>\n                ");
      out.write("</td>\n            ");
      out.write("</tr>\n            ");
      out.write("<tr>\n                ");
      out.write("<td bgcolor=\"#0000FF\" height=\"1\" colspan=\"4\">");
      out.write("</td>\n            ");
      out.write("</tr>\n         ");
      out.write("</table>\n                    ");
      out.write("<!-- MAIN BODY -->\n");
      out.write("\n\n");
      out.write("<div class=\"margined\">\n    ");
      out.write("<h1>Heritrix online help");
      out.write("</h1>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/about.jsp\">About Heritrix");
      out.write("</a>");
      out.write("</b>");
      out.write("</br>\n    Includes license and current environment information.\n");
      out.write("</p>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write("<a target=\"_blank\" \n    href=\"");
      out.print(request.getContextPath());
      out.write("/docs/articles/user_manual/index.html\">User\n        Manual");
      out.write("</a>");
      out.write("</b>");
      out.write(
          "<br> Covers creating, configuring, launching,\n        monitoring and analysing crawl jobs. For all users.\n");
      out.write("</p>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write("<a target=\"_blank\" \n        href=\"");
      out.print(request.getContextPath());
      out.write("/docs/articles/developer_manual/index.html\">Developer Manual");
      out.write("</a>");
      out.write("</b>");
      out.write(
          "<br> Covers how to write add on modules for Heritrix\n        and provides in depth coverage of Heritrix's architecture. For\n        advanced users.\n");
      out.write("</p>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write("<a target=\"_blank\" \n        href=\"");
      out.print(request.getContextPath());
      out.write("/docs/articles/releasenotes/index.html\">Release Notes");
      out.write("</a>");
      out.write("</b>");
      out.write("<br>\n");
      out.write("</p>\n");
      out.write("<p>\n\t");
      out.write("<b>");
      out.write(
          "<a href=\"http://crawler.archive.org/issue-tracking.html\" target=\"_blank\">Issue Tracking");
      out.write("</a>");
      out.write("</b>");
      out.write(
          "<br />\n\tIf you have found a bug or would like to see new features in Heritrix, check the following links:\n\t");
      out.write("<ul>\n\t\t");
      out.write("<li>");
      out.write(
          "<a href=\"http://sourceforge.net/tracker/?atid=539099&amp;group_id=73833&amp;func=browse\" target=\"_blank\">Bugs");
      out.write("</a>");
      out.write("</li>\n\t\t");
      out.write("<li>");
      out.write(
          "<a href=\"http://sourceforge.net/tracker/?atid=539102&amp;group_id=73833&amp;func=browse\" target=\"_blank\">Feature Requests");
      out.write("</a>");
      out.write("</li>\n\t");
      out.write("</ul>\n");
      out.write("</p>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write(
          "<a href=\"http://crawler.archive.org/mail-lists.html\" target=\"_blank\">Mailing Lists");
      out.write("</a>");
      out.write("</b>");
      out.write("<br />\n    For general discussion on Heritrix, use our ");
      out.write(
          "<a href=\"http://groups.yahoo.com/group/archive-crawler/\" target=\"_blank\">Crawler Discussion List");
      out.write("</a>.\n");
      out.write("</p>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/help/regexpr.jsp\">Regular Expressions");
      out.write("</a>");
      out.write("</b>");
      out.write(
          "<br />\n    Information about the regular expressions used in Heritrix and a tool to double check that your regular expressions are valid and that they correctly identify the desired strings.\n");
      out.write("</p>\n");
      out.write("<p>\n    ");
      out.write("<b>");
      out.write("<a href=\"");
      out.print(request.getContextPath());
      out.write("/help/codes.jsp\">URI Fetch Status Codes");
      out.write("</a>");
      out.write("</b>");
      out.write(
          "<br />\n    This reference details what each of the fetch status codes assigned to URIs means.\n");
      out.write("</p>\n");
      out.write("<hr />\n");
      out.write("<font size=\"-1\">Heritrix version @VERSION@");
      out.write("</font>\n");
      out.write("</div>\n");

      /**
       * An include file that handles the "look" and navigation of a web page. Wrapps up things
       * begun in the "head.jsp" include file. See it for more details.
       *
       * @author Kristinn Sigurdsson
       */
      out.write("\n");
      out.write("<br/>\n");
      out.write("<br/>\n        ");
      out.write(
          "<table border=\"0\" cellspacing=\"0\" cellpadding=\"0\" width=\"100%\">\n            ");
      out.write("<tr>\n            ");
      out.write("<td bgcolor=\"#0000FF\" height=\"1\" colspan=\"4\">");
      out.write("</td>\n            ");
      out.write("</tr>\n            ");
      out.write("<tr>\n            ");
      out.write("<td class=\"instance_name\">Identifier: ");
      out.print(currentHeritrixName);
      out.write("</td>\n            ");
      out.write("</tr>\n        ");
      out.write("</table>\n                    ");
      out.write("<!-- END MAIN BODY -->\n    ");
      out.write("</body>\n");
      out.write("</html>");
      out.write("\n");
    } catch (Throwable t) {
      out = _jspx_out;
      if (out != null && out.getBufferSize() != 0) out.clearBuffer();
      if (pageContext != null) pageContext.handlePageException(t);
    } finally {
      if (_jspxFactory != null) _jspxFactory.releasePageContext(pageContext);
    }
  }