Exemplo n.º 1
0
  /**
   * Performs a parse by iterating through a List of preferred {@link Parser}s until a successful
   * parse is performed and a {@link Parse} object is returned. If the parse is unsuccessful, a
   * message is logged to the <code>WARNING</code> level, and an empty parse is returned.
   *
   * @throws ParserNotFound If there is no suitable parser found.
   * @throws ParseException If there is an error parsing.
   */
  public Parse parse(String url, WebPage page) throws ParserNotFound, ParseException {
    Parser[] parsers = null;

    String contentType = TableUtil.toString(page.getContentType());

    parsers = this.parserFactory.getParsers(contentType, url);

    for (int i = 0; i < parsers.length; i++) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Parsing [" + url + "] with [" + parsers[i] + "]");
      }
      Parse parse = null;

      if (maxParseTime != -1) parse = runParser(parsers[i], url, page);
      else parse = parsers[i].getParse(url, page);

      if (parse != null && ParseStatusUtils.isSuccess(parse.getParseStatus())) {
        return parse;
      }
    }

    LOG.warn("Unable to successfully parse content " + url + " of type " + contentType);
    return ParseStatusUtils.getEmptyParse(
        new ParseException("Unable to successfully parse content"), null);
  }