Exemplo n.º 1
0
  /**
   * Parse any kind of document.
   *
   * @throws ApplicationException if there's no parser available for the type of resource.
   * @param url - the URL to the document.
   * @return an {@link IDocument} containing the fulltext and maybe metadata for the parsed
   *     document.
   */
  public IDocument parse(final String url) throws ApplicationException {
    ResourceParser parser = null;

    if (EdocIndexMetadataFetcherTool.isEDocIndex(url)) {
      parser = EdocParserImpl.getInstance();
    } else if (getExtension(url).equals(EXT_PDF)) {
      parser = PdfParserImpl.getInstance();
    } else if (getExtension(url).equals(EXT_DOC)) {
      parser = DocParserImpl.getInstance();
    } else if (getExtension(url).equals(EXT_ODT)) {
      parser = OdfParserImpl.getInstance();
    } else if (getExtension(url).equals(EXT_XML)) {
      parser = XmlParserImpl.getInstance();
    } else if (getExtension(url).equals(EXT_HTM)
        || getExtension(url).equals(EXT_HTML)
        || getExtension(url).equals(EXT_XHTML)) {
      parser = HtmlParserImpl.getInstance();
    } else if (getExtension(url).equals(EXT_TXT)) {
      parser = TxtParserImpl.getInstance();
    }

    if (parser != null) {
      IDocument result = (IDocument) parser.parse("", url);
      return result;
    } else {
      throw new ApplicationException(
          "There's no parser available for this type of resource: " + getExtension(url));
    }
  }
Exemplo n.º 2
0
  public XmlParser newXmlParser(final File xmlFile, final Properties xmlProperties)
      throws TransformerConfigurationException, ParserConfigurationException, SAXException,
          IOException {
    if (!xmlFile.exists()) {
      throw new IllegalStateException(
          String.format("The given xml file %s does not exist.", xmlFile.getAbsolutePath()));
    }

    final XmlParserImpl xmlParserImpl =
        new XmlParserImpl(
            xmlFile,
            xmlProperties,
            documentBuilderFactory.newDocumentBuilder(),
            transformerFactory.newTransformer());
    xmlParserImpl.open();

    return xmlParserImpl;
  }