/** * Configure schema validation as recommended by the JAXP 1.2 spec. The <code>properties</code> * object may contains information about the schema local and language. * * @param properties parser optional info */ private static void configureOldXerces(SAXParser parser, Properties properties) throws ParserConfigurationException, SAXNotSupportedException { String schemaLocation = (String) properties.get("schemaLocation"); String schemaLanguage = (String) properties.get("schemaLanguage"); try { if (schemaLocation != null) { parser.setProperty(JAXP_SCHEMA_LANGUAGE, schemaLanguage); parser.setProperty(JAXP_SCHEMA_SOURCE, schemaLocation); } } catch (SAXNotRecognizedException e) { log.info(parser.getClass().getName() + ": " + e.getMessage() + " not supported."); } }
@Override public String parseFulltext(String html) { if (!judge(html)) return null; Parser theParser = null; HTMLSchema theSchema = null; XMLReader r; if (theParser == null) theParser = new Parser(); r = theParser; theSchema = new HTMLSchema(); try { r.setProperty(Parser.schemaProperty, theSchema); r.setFeature(Parser.namespacesFeature, false); } catch (SAXNotRecognizedException e) { logger.fatal(e.getMessage()); return null; } catch (SAXNotSupportedException e) { logger.fatal(e.getMessage()); return null; } RejuvenationResearchParserHandler h = new RejuvenationResearchParserHandler(); r.setContentHandler(h); InputStream fin = new ByteArrayInputStream(html.getBytes()); InputSource s = new InputSource(fin); s.setEncoding("utf8"); try { r.parse(s); fin.close(); return h.getContent(); } catch (IOException e) { logger.fatal(e.getMessage()); } catch (SAXException e) { logger.fatal(e.getMessage()); } return null; }
/** * Retrieves a cached XMLReader for this thread, or creates a new XMLReader, if the existing * reader is in use. When the caller no longer needs the reader, it must release it with a call to * {@link #releaseXMLReader}. */ public synchronized XMLReader getXMLReader() throws SAXException { XMLReader reader; if (m_readers == null) { // When the m_readers.get() method is called for the first time // on a thread, a new XMLReader will automatically be created. m_readers = new ThreadLocal(); } if (m_inUse == null) { m_inUse = new HashMap(); } // If the cached reader for this thread is in use, construct a new // one; otherwise, return the cached reader unless it isn't an // instance of the class set in the 'org.xml.sax.driver' property reader = (XMLReader) m_readers.get(); boolean threadHasReader = (reader != null); String factory = SecuritySupport.getSystemProperty(property); if (threadHasReader && m_inUse.get(reader) != Boolean.TRUE && (factory == null || reader.getClass().getName().equals(factory))) { m_inUse.put(reader, Boolean.TRUE); } else { try { try { // According to JAXP 1.2 specification, if a SAXSource // is created using a SAX InputSource the Transformer or // TransformerFactory creates a reader via the // XMLReaderFactory if setXMLReader is not used reader = XMLReaderFactory.createXMLReader(); try { reader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, _secureProcessing); } catch (SAXNotRecognizedException e) { System.err.println("Warning: " + reader.getClass().getName() + ": " + e.getMessage()); } } catch (Exception e) { try { // If unable to create an instance, let's try to use // the XMLReader from JAXP if (m_parserFactory == null) { m_parserFactory = FactoryImpl.getSAXFactory(m_useServicesMechanism); m_parserFactory.setNamespaceAware(true); } reader = m_parserFactory.newSAXParser().getXMLReader(); } catch (ParserConfigurationException pce) { throw pce; // pass along pce } } try { reader.setFeature(NAMESPACES_FEATURE, true); reader.setFeature(NAMESPACE_PREFIXES_FEATURE, false); } catch (SAXException se) { // Try to carry on if we've got a parser that // doesn't know about namespace prefixes. } } catch (ParserConfigurationException ex) { throw new SAXException(ex); } catch (FactoryConfigurationError ex1) { throw new SAXException(ex1.toString()); } catch (NoSuchMethodError ex2) { } catch (AbstractMethodError ame) { } // Cache the XMLReader if this is the first time we've created // a reader for this thread. if (!threadHasReader) { m_readers.set(reader); m_inUse.put(reader, Boolean.TRUE); } } try { // reader is cached, but this property might have been reset reader.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, _accessExternalDTD); } catch (SAXException se) { System.err.println("Warning: " + reader.getClass().getName() + ": " + se.getMessage()); } try { if (_xmlSecurityManager != null) { for (XMLSecurityManager.Limit limit : XMLSecurityManager.Limit.values()) { reader.setProperty(limit.apiProperty(), _xmlSecurityManager.getLimitValueAsString(limit)); } if (_xmlSecurityManager.printEntityCountInfo()) { reader.setProperty(XalanConstants.JDK_ENTITY_COUNT_INFO, XalanConstants.JDK_YES); } } } catch (SAXException se) { System.err.println("Warning: " + reader.getClass().getName() + ": " + se.getMessage()); } return reader; }
public Phylogeny[] parse() throws IOException, PhylogenyParserException { reset(); final TolXmlHandler handler = new TolXmlHandler(); final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); try { if (!ForesterUtil.isEmpty(getSchemaLocation())) { factory.setFeature(SAX_FEATURES_VALIDATION, true); factory.setFeature(APACHE_FEATURES_VALIDATION_SCHEMA, true); factory.setFeature(APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true); } } catch (final SAXNotRecognizedException e) { e.printStackTrace(); throw new PhylogenyParserException("sax not recognized exception: " + e.getMessage()); } catch (final SAXNotSupportedException e) { e.printStackTrace(); throw new PhylogenyParserException("sax not supported exception: " + e.getMessage()); } catch (final ParserConfigurationException e) { e.printStackTrace(); throw new PhylogenyParserException("parser _configuration exception: " + e.getMessage()); } catch (final Exception e) { e.printStackTrace(); throw new PhylogenyParserException("error while configuring sax parser: " + e.getMessage()); } try { final SAXParser parser = factory.newSAXParser(); if (!ForesterUtil.isEmpty(getSchemaLocation())) { parser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); parser.setProperty(JAXP_SCHEMA_SOURCE, getSchemaLocation()); parser.setProperty(APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation()); } final XMLReader xml_reader = parser.getXMLReader(); xml_reader.setContentHandler(handler); xml_reader.setErrorHandler(new TolParserErrorHandler()); if (getSource() instanceof File) { if (!getSource().toString().toLowerCase().endsWith(".zip")) { xml_reader.parse(new InputSource(new FileReader((File) getSource()))); } else { final Reader reader = getReaderFromZipFile(); if (reader == null) { throw new PhylogenyParserException( "Zip file \"" + getSource() + "\" appears not to contain any entries"); } xml_reader.parse(new InputSource(reader)); } } else if (getSource() instanceof InputSource) { xml_reader.parse((InputSource) getSource()); } else if (getSource() instanceof InputStream) { if (!isZippedInputstream()) { final InputStream is = (InputStream) getSource(); final Reader reader = new InputStreamReader(is); xml_reader.parse(new InputSource(reader)); } else { final ZipInputStream zip_is = new ZipInputStream((InputStream) getSource()); zip_is.getNextEntry(); final Reader reader = new InputStreamReader(zip_is); if (reader == null) { throw new PhylogenyParserException( "Zip input stream \"" + getSource() + "\" appears not to contain any data"); } xml_reader.parse(new InputSource(reader)); } } else if (getSource() instanceof String) { final File file = new File(getSource().toString()); final Reader reader = new FileReader(file); xml_reader.parse(new InputSource(reader)); } else if (getSource() instanceof StringBuffer) { final StringReader string_reader = new StringReader(getSource().toString()); xml_reader.parse(new InputSource(string_reader)); } else { throw new PhylogenyParserException( "attempt to parse object of unsupported type: \"" + getSource().getClass() + "\""); } } catch (final SAXException sax_exception) { throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + sax_exception.getMessage()); } catch (final ParserConfigurationException parser_config_exception) { throw new PhylogenyParserException( "Failed to parse [" + getSource() + "] Problem with xml parser _configuration: " + parser_config_exception.getMessage()); } catch (final IOException e) { throw new PhylogenyParserException( "Problem with input source [" + getSource() + "]: \n" + e.getMessage()); } catch (final Exception e) { e.printStackTrace(); throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + e.getMessage()); } catch (final Error err) { err.printStackTrace(); throw new PhylogenyParserException("Severe error: " + err.getMessage()); } final Phylogeny[] ps = new Phylogeny[handler.getPhylogenies().size()]; int i = 0; for (final Phylogeny phylogeny : handler.getPhylogenies()) { ps[i++] = phylogeny; } return ps; }