Ejemplo n.º 1
0
  /**
   * Configure schema validation as recommended by the JAXP 1.2 spec. The <code>properties</code>
   * object may contains information about the schema local and language.
   *
   * @param properties parser optional info
   */
  private static void configureOldXerces(SAXParser parser, Properties properties)
      throws ParserConfigurationException, SAXNotSupportedException {

    String schemaLocation = (String) properties.get("schemaLocation");
    String schemaLanguage = (String) properties.get("schemaLanguage");

    try {
      if (schemaLocation != null) {
        parser.setProperty(JAXP_SCHEMA_LANGUAGE, schemaLanguage);
        parser.setProperty(JAXP_SCHEMA_SOURCE, schemaLocation);
      }
    } catch (SAXNotRecognizedException e) {
      log.info(parser.getClass().getName() + ": " + e.getMessage() + " not supported.");
    }
  }
  @Override
  public String parseFulltext(String html) {
    if (!judge(html)) return null;
    Parser theParser = null;

    HTMLSchema theSchema = null;
    XMLReader r;
    if (theParser == null) theParser = new Parser();
    r = theParser;
    theSchema = new HTMLSchema();
    try {
      r.setProperty(Parser.schemaProperty, theSchema);
      r.setFeature(Parser.namespacesFeature, false);
    } catch (SAXNotRecognizedException e) {
      logger.fatal(e.getMessage());
      return null;
    } catch (SAXNotSupportedException e) {
      logger.fatal(e.getMessage());
      return null;
    }

    RejuvenationResearchParserHandler h = new RejuvenationResearchParserHandler();
    r.setContentHandler(h);
    InputStream fin = new ByteArrayInputStream(html.getBytes());
    InputSource s = new InputSource(fin);
    s.setEncoding("utf8");
    try {
      r.parse(s);
      fin.close();
      return h.getContent();
    } catch (IOException e) {
      logger.fatal(e.getMessage());
    } catch (SAXException e) {
      logger.fatal(e.getMessage());
    }

    return null;
  }
Ejemplo n.º 3
0
  /**
   * Retrieves a cached XMLReader for this thread, or creates a new XMLReader, if the existing
   * reader is in use. When the caller no longer needs the reader, it must release it with a call to
   * {@link #releaseXMLReader}.
   */
  public synchronized XMLReader getXMLReader() throws SAXException {
    XMLReader reader;

    if (m_readers == null) {
      // When the m_readers.get() method is called for the first time
      // on a thread, a new XMLReader will automatically be created.
      m_readers = new ThreadLocal();
    }

    if (m_inUse == null) {
      m_inUse = new HashMap();
    }

    // If the cached reader for this thread is in use, construct a new
    // one; otherwise, return the cached reader unless it isn't an
    // instance of the class set in the 'org.xml.sax.driver' property
    reader = (XMLReader) m_readers.get();
    boolean threadHasReader = (reader != null);
    String factory = SecuritySupport.getSystemProperty(property);
    if (threadHasReader
        && m_inUse.get(reader) != Boolean.TRUE
        && (factory == null || reader.getClass().getName().equals(factory))) {
      m_inUse.put(reader, Boolean.TRUE);
    } else {
      try {
        try {
          // According to JAXP 1.2 specification, if a SAXSource
          // is created using a SAX InputSource the Transformer or
          // TransformerFactory creates a reader via the
          // XMLReaderFactory if setXMLReader is not used
          reader = XMLReaderFactory.createXMLReader();
          try {
            reader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, _secureProcessing);
          } catch (SAXNotRecognizedException e) {
            System.err.println("Warning:  " + reader.getClass().getName() + ": " + e.getMessage());
          }
        } catch (Exception e) {
          try {
            // If unable to create an instance, let's try to use
            // the XMLReader from JAXP
            if (m_parserFactory == null) {
              m_parserFactory = FactoryImpl.getSAXFactory(m_useServicesMechanism);
              m_parserFactory.setNamespaceAware(true);
            }

            reader = m_parserFactory.newSAXParser().getXMLReader();
          } catch (ParserConfigurationException pce) {
            throw pce; // pass along pce
          }
        }
        try {
          reader.setFeature(NAMESPACES_FEATURE, true);
          reader.setFeature(NAMESPACE_PREFIXES_FEATURE, false);
        } catch (SAXException se) {
          // Try to carry on if we've got a parser that
          // doesn't know about namespace prefixes.
        }
      } catch (ParserConfigurationException ex) {
        throw new SAXException(ex);
      } catch (FactoryConfigurationError ex1) {
        throw new SAXException(ex1.toString());
      } catch (NoSuchMethodError ex2) {
      } catch (AbstractMethodError ame) {
      }

      // Cache the XMLReader if this is the first time we've created
      // a reader for this thread.
      if (!threadHasReader) {
        m_readers.set(reader);
        m_inUse.put(reader, Boolean.TRUE);
      }
    }

    try {
      // reader is cached, but this property might have been reset
      reader.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, _accessExternalDTD);
    } catch (SAXException se) {
      System.err.println("Warning:  " + reader.getClass().getName() + ": " + se.getMessage());
    }

    try {
      if (_xmlSecurityManager != null) {
        for (XMLSecurityManager.Limit limit : XMLSecurityManager.Limit.values()) {
          reader.setProperty(limit.apiProperty(), _xmlSecurityManager.getLimitValueAsString(limit));
        }
        if (_xmlSecurityManager.printEntityCountInfo()) {
          reader.setProperty(XalanConstants.JDK_ENTITY_COUNT_INFO, XalanConstants.JDK_YES);
        }
      }
    } catch (SAXException se) {
      System.err.println("Warning:  " + reader.getClass().getName() + ": " + se.getMessage());
    }

    return reader;
  }
Ejemplo n.º 4
0
 public Phylogeny[] parse() throws IOException, PhylogenyParserException {
   reset();
   final TolXmlHandler handler = new TolXmlHandler();
   final SAXParserFactory factory = SAXParserFactory.newInstance();
   factory.setNamespaceAware(true);
   try {
     if (!ForesterUtil.isEmpty(getSchemaLocation())) {
       factory.setFeature(SAX_FEATURES_VALIDATION, true);
       factory.setFeature(APACHE_FEATURES_VALIDATION_SCHEMA, true);
       factory.setFeature(APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true);
     }
   } catch (final SAXNotRecognizedException e) {
     e.printStackTrace();
     throw new PhylogenyParserException("sax not recognized exception: " + e.getMessage());
   } catch (final SAXNotSupportedException e) {
     e.printStackTrace();
     throw new PhylogenyParserException("sax not supported exception: " + e.getMessage());
   } catch (final ParserConfigurationException e) {
     e.printStackTrace();
     throw new PhylogenyParserException("parser _configuration exception: " + e.getMessage());
   } catch (final Exception e) {
     e.printStackTrace();
     throw new PhylogenyParserException("error while configuring sax parser: " + e.getMessage());
   }
   try {
     final SAXParser parser = factory.newSAXParser();
     if (!ForesterUtil.isEmpty(getSchemaLocation())) {
       parser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
       parser.setProperty(JAXP_SCHEMA_SOURCE, getSchemaLocation());
       parser.setProperty(APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation());
     }
     final XMLReader xml_reader = parser.getXMLReader();
     xml_reader.setContentHandler(handler);
     xml_reader.setErrorHandler(new TolParserErrorHandler());
     if (getSource() instanceof File) {
       if (!getSource().toString().toLowerCase().endsWith(".zip")) {
         xml_reader.parse(new InputSource(new FileReader((File) getSource())));
       } else {
         final Reader reader = getReaderFromZipFile();
         if (reader == null) {
           throw new PhylogenyParserException(
               "Zip file \"" + getSource() + "\" appears not to contain any entries");
         }
         xml_reader.parse(new InputSource(reader));
       }
     } else if (getSource() instanceof InputSource) {
       xml_reader.parse((InputSource) getSource());
     } else if (getSource() instanceof InputStream) {
       if (!isZippedInputstream()) {
         final InputStream is = (InputStream) getSource();
         final Reader reader = new InputStreamReader(is);
         xml_reader.parse(new InputSource(reader));
       } else {
         final ZipInputStream zip_is = new ZipInputStream((InputStream) getSource());
         zip_is.getNextEntry();
         final Reader reader = new InputStreamReader(zip_is);
         if (reader == null) {
           throw new PhylogenyParserException(
               "Zip input stream \"" + getSource() + "\" appears not to contain any data");
         }
         xml_reader.parse(new InputSource(reader));
       }
     } else if (getSource() instanceof String) {
       final File file = new File(getSource().toString());
       final Reader reader = new FileReader(file);
       xml_reader.parse(new InputSource(reader));
     } else if (getSource() instanceof StringBuffer) {
       final StringReader string_reader = new StringReader(getSource().toString());
       xml_reader.parse(new InputSource(string_reader));
     } else {
       throw new PhylogenyParserException(
           "attempt to parse object of unsupported type: \"" + getSource().getClass() + "\"");
     }
   } catch (final SAXException sax_exception) {
     throw new PhylogenyParserException(
         "Failed to parse [" + getSource() + "]: " + sax_exception.getMessage());
   } catch (final ParserConfigurationException parser_config_exception) {
     throw new PhylogenyParserException(
         "Failed to parse ["
             + getSource()
             + "] Problem with xml parser _configuration: "
             + parser_config_exception.getMessage());
   } catch (final IOException e) {
     throw new PhylogenyParserException(
         "Problem with input source [" + getSource() + "]: \n" + e.getMessage());
   } catch (final Exception e) {
     e.printStackTrace();
     throw new PhylogenyParserException(
         "Failed to parse [" + getSource() + "]: " + e.getMessage());
   } catch (final Error err) {
     err.printStackTrace();
     throw new PhylogenyParserException("Severe error: " + err.getMessage());
   }
   final Phylogeny[] ps = new Phylogeny[handler.getPhylogenies().size()];
   int i = 0;
   for (final Phylogeny phylogeny : handler.getPhylogenies()) {
     ps[i++] = phylogeny;
   }
   return ps;
 }