@Override protected String getDocumentText(String absolutePath, String encoding) throws IOException { String text; try { SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setValidating(false); spf.setNamespaceAware(true); spf.setFeature("http://xml.org/sax/features/namespaces", true); spf.setFeature("http://xml.org/sax/features/validation", false); spf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); SAXParser saxParser = spf.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); TeiToTxtSaxHandler handler = new TeiToTxtSaxHandler(); xmlReader.setContentHandler(handler); xmlReader.parse(new InputSource(absolutePath)); text = handler.getText(); } catch (ParserConfigurationException | SAXException e) { logger.error("Could not read TEI document: {}", absolutePath); logger.warn("Ignoring TEI document " + absolutePath); text = ""; this.failedFileCounter++; } return text; }
/** * @see MessageBodyReader#readFrom(Class, Type, MediaType, Annotation[], MultivaluedMap, * InputStream) */ @Override public Object readFrom( Class<Object> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException { try { SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setXIncludeAware(isXIncludeAware()); spf.setNamespaceAware(true); spf.setValidating(isValidatingDtd()); spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, isSecureProcessing()); spf.setFeature( "http://xml.org/sax/features/external-general-entities", isExpandingEntityRefs()); spf.setFeature( "http://xml.org/sax/features/external-parameter-entities", isExpandingEntityRefs()); XMLReader reader = spf.newSAXParser().getXMLReader(); JAXBContext jaxbContext = getJaxbContext(type); Unmarshaller um = jaxbContext.createUnmarshaller(); return um.unmarshal(new SAXSource(reader, new InputSource(entityStream))); } catch (Exception e) { throw new IOException("Could not unmarshal to " + type.getName()); } }
/** * constructor, sets up SAX parser, turns off validation, turns on namespaces, sets up content * handler and error handler as this object. sax exceptions go to System.err */ public XBASEXMLParser() { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setFeature("http://xml.org/sax/features/namespaces", false); factory.setFeature("http://xml.org/sax/features/validation", true); parser = factory.newSAXParser(); } catch (Exception e1) { e1.printStackTrace(); } }
public void init() throws LagoonException { try { spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); spf.setValidating(false); spf.setFeature("http://xml.org/sax/features/namespaces", true); spf.setFeature("http://xml.org/sax/features/namespace-prefixes", false); spf.setFeature("http://xml.org/sax/features/validation", false); } catch (ParserConfigurationException e) { throw new Error("Unable to configure XML parser"); } catch (SAXException e) { throw new Error("Unable to configure XML parser"); } target = null; }
private static Service parseServicePayload(String payload) throws ResponseParseException { JAXBContext jaxbContext; Service service = null; try { jaxbContext = JAXBContext.newInstance(Service.class); Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller(); InputStream inputStream = new ByteArrayInputStream(payload.getBytes(Charset.forName("UTF-8"))); SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); SAXParser sp = spf.newSAXParser(); XMLReader xmlReader = sp.getXMLReader(); InputSource inputSource = new InputSource(inputStream); SAXSource saxSource = new SAXSource(xmlReader, inputSource); service = (Service) jaxbUnmarshaller.unmarshal(saxSource); } catch (JAXBException | FactoryConfigurationError | ParserConfigurationException | SAXException e) { throw new ResponseParseException(e); } return service; }
private static synchronized SAXParserFactory getSAXParserFactory() throws FactoryConfigurationError { if (ourSAXParserFactory == null) { ourSAXParserFactory = SAXParserFactory.newInstance(); Map supportedFeatures = new SVNHashMap(); try { ourSAXParserFactory.setFeature("http://xml.org/sax/features/namespaces", true); supportedFeatures.put("http://xml.org/sax/features/namespaces", Boolean.TRUE); } catch (SAXNotRecognizedException e) { } catch (SAXNotSupportedException e) { } catch (ParserConfigurationException e) { } try { ourSAXParserFactory.setFeature("http://xml.org/sax/features/validation", false); supportedFeatures.put("http://xml.org/sax/features/validation", Boolean.FALSE); } catch (SAXNotRecognizedException e) { } catch (SAXNotSupportedException e) { } catch (ParserConfigurationException e) { } try { ourSAXParserFactory.setFeature( "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); supportedFeatures.put( "http://apache.org/xml/features/nonvalidating/load-external-dtd", Boolean.FALSE); } catch (SAXNotRecognizedException e) { } catch (SAXNotSupportedException e) { } catch (ParserConfigurationException e) { } if (supportedFeatures.size() < 3) { ourSAXParserFactory = SAXParserFactory.newInstance(); for (Iterator names = supportedFeatures.keySet().iterator(); names.hasNext(); ) { String name = (String) names.next(); try { ourSAXParserFactory.setFeature(name, supportedFeatures.get(name) == Boolean.TRUE); } catch (SAXNotRecognizedException e) { } catch (SAXNotSupportedException e) { } catch (ParserConfigurationException e) { } } } ourSAXParserFactory.setNamespaceAware(true); ourSAXParserFactory.setValidating(false); } return ourSAXParserFactory; }
/** * Parse the xml in the main index to read fields for this object * * @param is InputStream for main index file * @throws org.xml.sax.SAXException * @throws java.io.IOException */ private void parse(InputStream is) throws SAXException, IOException { SAXParserFactory factory = SAXParserFactory.newInstance(); // Logger.normal(this, "Parsing main index"); try { factory.setNamespaceAware(true); factory.setFeature("http://xml.org/sax/features/namespaces", true); factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); SAXParser saxParser = factory.newSAXParser(); MainIndexParser parser = new MainIndexParser(); saxParser.parse(is, parser); version = parser.getVersion(); if (version == 1) { indexMeta.put("title", parser.getHeader("title")); indexMeta.put("ownerName", parser.getHeader("owner")); indexMeta.put("ownerEmail", parser.getHeader("email")); subIndiceList = new ArrayList<String>(); subIndice = new TreeMap<String, SubIndex>(); for (String key : parser.getSubIndice()) { subIndiceList.add(key); String stillbase; try { FreenetURI furi = new FreenetURI(indexuri); stillbase = (furi.isUSK() ? furi.sskForUSK() : furi).toString(); } catch (MalformedURLException e) { stillbase = indexuri; } subIndice.put(key, new SubIndex(stillbase, "index_" + key + ".xml")); } Collections.sort(subIndiceList); } } catch (ParserConfigurationException e) { Logger.error(this, "SAX ParserConfigurationException", e); throw new SAXException(e); } }
/** * Returns a Capabilities object created from parsing the XML document * * @param document */ public SosCapabilities parseCapabilities(String document) { // check if there is a valid XML declaration // TODO: needs to be case insensitive if (!document.trim().startsWith("<?xml")) { log.warn("The Capabilities document is not a valid XML document"); return null; } // get a factory SAXParserFactory spf = SAXParserFactory.newInstance(); try { spf.setFeature("http://xml.org/sax/features/namespaces", true); // get a new instance of parser SAXParser sp = spf.newSAXParser(); ByteArrayInputStream bs = new ByteArrayInputStream(document.getBytes()); log.info("Starting to parse Capabilities document..."); // parse the document and also register this class for call backs sp.parse(bs, this); log.info("Done parsing Capabilities document."); // return the capabilities object return capabilities; } catch (SAXException e) { log.error("Error parsing Capabilities document: " + e.getMessage()); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } log.error("Something went wrong when parsing Capabilities document"); // default in case of errors return null; }
public Document parseDocument(String fileName) { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser; try { factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); parser = factory.newSAXParser(); parser.parse(new File(fileName), this); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParserConfigurationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SAXException e) { // TODO Auto-generated catch block e.printStackTrace(); } return document; }
public void importXML( String path, InputStream inputStream, int uuidBehavior, int rootBehavior, Map<String, String> replacements) throws IOException, InvalidSerializedDataException, RepositoryException { JCRNodeWrapper node = getNode(path); try { if (!node.isCheckedOut()) { checkout(node); } } catch (UnsupportedRepositoryOperationException ex) { // versioning not supported } DocumentViewImportHandler documentViewImportHandler = new DocumentViewImportHandler(this, path); documentViewImportHandler.setRootBehavior(rootBehavior); documentViewImportHandler.setUuidBehavior(uuidBehavior); documentViewImportHandler.setReplacements(replacements); try { SAXParserFactory factory; factory = new SAXParserFactoryImpl(); factory.setNamespaceAware(true); factory.setValidating(false); factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); SAXParser parser = factory.newSAXParser(); parser.parse(inputStream, documentViewImportHandler); } catch (SAXParseException e) { logger.error("Cannot import - File is not a valid XML", e); } catch (Exception e) { logger.error("Cannot import", e); } }
/** * Create the set of training and evaluation sets from the annotated examples with extraction of * citations in the patent description body. * * @param rank rank associated to the set for n-fold data generation * @param type type of data to be created, 0 is training data, 1 is evaluation data */ public void createDataSet( String setName, String rank, String corpusPath, String outputPath, int type) { int nbFiles = 0; int nbNPLRef = 0; int nbPatentRef = 0; int maxRef = 0; try { // PATENT REF. textual data // we use a SAX parser on the patent XML files MarecSaxParser sax = new MarecSaxParser(); sax.patentReferences = true; sax.nplReferences = false; int srCitations = 0; int previousSrCitations = 0; int withSR = 0; List<OffsetPosition> journalsPositions = null; List<OffsetPosition> abbrevJournalsPositions = null; List<OffsetPosition> conferencesPositions = null; List<OffsetPosition> publishersPositions = null; if (type == 0) { // training set sax.setN(trainWindow); } else { // for the test set we enlarge the focus window to include all the document. sax.setN(-1); } // get a factory /*SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setValidating(false); spf.setFeature("http://xml.org/sax/features/namespaces", false); spf.setFeature("http://xml.org/sax/features/validation", false); LinkedList<File> fileList = new LinkedList<File>(); if (setName == null) { fileList.add(new File(corpusPath)); } else if (rank == null) { fileList.add(new File(corpusPath)); } else { // n-fold evaluation fileList.add(new File(corpusPath + File.separator + setName + "ing" + rank + File.separator)); } Writer writer = null; if ((setName == null) || (setName.length() == 0)) { writer = new OutputStreamWriter(new FileOutputStream( new File(outputPath + "/patent.train"), false), "UTF-8"); } else if (rank == null) { writer = new OutputStreamWriter(new FileOutputStream( new File(outputPath + "/patent." + setName), false), "UTF-8"); } else { writer = new OutputStreamWriter(new FileOutputStream( new File(outputPath + setName + "ing" + rank + "/patent." + setName), false), "UTF-8"); } while (fileList.size() > 0) { File file = fileList.removeFirst(); if (file.isDirectory()) { for (File subFile : file.listFiles()) fileList.addLast(subFile); } else { if (file.getName().endsWith(".xml")) { nbFiles++; System.out.println(file.getAbsolutePath()); try { //get a new instance of parser SAXParser p = spf.newSAXParser(); FileInputStream in = new FileInputStream(file); sax.setFileName(file.getName()); p.parse(in, sax); //writer1.write("\n"); nbPatentRef += sax.getNbPatentRef(); if (sax.citations != null) { if (sax.citations.size() > previousSrCitations) { previousSrCitations = sax.citations.size(); withSR++; } } journalsPositions = sax.journalsPositions; abbrevJournalsPositions = sax.abbrevJournalsPositions; conferencesPositions = sax.conferencesPositions; publishersPositions = sax.publishersPositions; if (sax.accumulatedText != null) { String text = sax.accumulatedText.toString(); if (text.trim().length() > 0) { // add features for the patent tokens addFeatures(text, writer, journalsPositions, abbrevJournalsPositions, conferencesPositions, publishersPositions); writer.write("\n \n"); } } } catch (Exception e) { throw new GrobidException("An exception occured while running Grobid.", e); } } } }*/ // NPL REF. textual data /*sax = new MarecSaxParser(); sax.patentReferences = false; sax.nplReferences = true; if (type == 0) { // training set sax.setN(trainWindow); } else { // for the test set we enlarge the focus window to include all the document. sax.setN(-1); } // get a factory spf = SAXParserFactory.newInstance(); spf.setValidating(false); spf.setFeature("http://xml.org/sax/features/namespaces", false); spf.setFeature("http://xml.org/sax/features/validation", false); fileList = new LinkedList<File>(); if (setName == null) { fileList.add(new File(corpusPath)); } else if (rank == null) { fileList.add(new File(corpusPath)); } else { fileList.add(new File(corpusPath + File.separator + setName + "ing" + rank + File.separator)); } if ((setName == null) || (setName.length() == 0)) { writer = new OutputStreamWriter(new FileOutputStream( new File(outputPath + "/npl.train"), false), "UTF-8"); } else if (rank == null) { writer = new OutputStreamWriter(new FileOutputStream( new File(outputPath + "/npl." + setName), false), "UTF-8"); } else { writer = new OutputStreamWriter(new FileOutputStream( new File(outputPath + File.separator + setName + "ing" + rank + File.separator + "npl." + setName), false), "UTF-8"); } while (fileList.size() > 0) { File file = fileList.removeFirst(); if (file.isDirectory()) { for (File subFile : file.listFiles()) fileList.addLast(subFile); } else { if (file.getName().endsWith(".xml")) { //nbFiles++; //String text = Files.readFromFile(file,"UTF-8"); try { //get a new instance of parser SAXParser p = spf.newSAXParser(); FileInputStream in = new FileInputStream(file); sax.setFileName(file.toString()); p.parse(in, sax); //writer2.write("\n"); nbNPLRef += sax.getNbNPLRef(); if (sax.nbAllRef > maxRef) { maxRef = sax.nbAllRef; } if (sax.citations != null) { if (sax.citations.size() > previousSrCitations) { previousSrCitations = sax.citations.size(); withSR++; } } journalsPositions = sax.journalsPositions; abbrevJournalsPositions = sax.abbrevJournalsPositions; conferencesPositions = sax.conferencesPositions; publishersPositions = sax.publishersPositions; //totalLength += sax.totalLength; if (sax.accumulatedText != null) { String text = sax.accumulatedText.toString(); // add features for NPL addFeatures(text, writer, journalsPositions, abbrevJournalsPositions, conferencesPositions, publishersPositions); writer.write("\n"); } } catch (Exception e) { throw new GrobidException("An exception occured while running Grobid.", e); } } } } if (sax.citations != null) srCitations += sax.citations.size();*/ // Patent + NPL REF. textual data (the "all" model) sax = new MarecSaxParser(); sax.patentReferences = true; sax.nplReferences = true; if (type == 0) { // training set sax.setN(trainWindow); } else { // for the test set we enlarge the focus window to include all the document. sax.setN(-1); } // get a factory SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setValidating(false); spf.setFeature("http://xml.org/sax/features/namespaces", false); spf.setFeature("http://xml.org/sax/features/validation", false); LinkedList<File> fileList = new LinkedList<File>(); if (setName == null) { fileList.add(new File(corpusPath)); } else if (rank == null) { fileList.add(new File(corpusPath)); } else { fileList.add( new File(corpusPath + File.separator + setName + "ing" + rank + File.separator)); } Writer writer = null; if ((setName == null) || (setName.length() == 0)) { writer = new OutputStreamWriter( new FileOutputStream(new File(outputPath + File.separator + "all.train"), false), "UTF-8"); } else if (rank == null) { writer = new OutputStreamWriter( new FileOutputStream( new File(outputPath + File.separator + "all." + setName), false), "UTF-8"); } else { writer = new OutputStreamWriter( new FileOutputStream( new File( outputPath + File.separator + setName + "ing" + rank + File.separator + "all." + setName), false), "UTF-8"); } // int totalLength = 0; while (fileList.size() > 0) { File file = fileList.removeFirst(); if (file.isDirectory()) { for (File subFile : file.listFiles()) { fileList.addLast(subFile); } } else { if (file.getName().endsWith(".xml")) { nbFiles++; try { // get a new instance of parser SAXParser p = spf.newSAXParser(); FileInputStream in = new FileInputStream(file); sax.setFileName(file.toString()); p.parse(in, sax); // writer3.write("\n"); nbNPLRef += sax.getNbNPLRef(); nbPatentRef += sax.getNbPatentRef(); if (sax.nbAllRef > maxRef) { maxRef = sax.nbAllRef; } if (sax.citations != null) { if (sax.citations.size() > previousSrCitations) { previousSrCitations = sax.citations.size(); withSR++; } } journalsPositions = sax.journalsPositions; abbrevJournalsPositions = sax.abbrevJournalsPositions; conferencesPositions = sax.conferencesPositions; publishersPositions = sax.publishersPositions; // totalLength += sax.totalLength; if (sax.accumulatedText != null) { String text = sax.accumulatedText.toString(); // add features for patent+NPL addFeatures( text, writer, journalsPositions, abbrevJournalsPositions, conferencesPositions, publishersPositions); writer.write("\n"); } } catch (Exception e) { throw new GrobidException("An exception occured while running Grobid.", e); } } } } if (sax.citations != null) { srCitations += sax.citations.size(); } if (setName != null) { System.out.println(setName + "ing on " + nbFiles + " files"); } else { System.out.println("training on " + nbFiles + " files"); } // System.out.println("Number of file with search report: " + withSR); System.out.println("Number of references: " + (nbNPLRef + nbPatentRef)); System.out.println("Number of patent references: " + nbPatentRef); System.out.println("Number of NPL references: " + nbNPLRef); // System.out.println("Number of search report citations: " + srCitations); System.out.println( "Average number of references: " + TextUtilities.formatTwoDecimals((double) (nbNPLRef + nbPatentRef) / nbFiles)); System.out.println("Max number of references in file: " + maxRef); /*if ((setName == null) || (setName.length() == 0)) { System.out.println("patent data set under: " + outputPath + "/patent.train"); } else { System.out.println("patent data set under: " + outputPath + "/patent." + setName); } if ((setName == null) || (setName.length() == 0)) { System.out.println("npl data set under: " + outputPath + "/npl.train"); } else { System.out.println("npl data set under: " + outputPath + "/npl." + setName); }*/ if ((setName == null) || (setName.length() == 0)) { System.out.println("common data set under: " + outputPath + "/all.train"); } else { System.out.println("common data set under: " + outputPath + "/all." + setName); } } catch (Exception e) { throw new GrobidException("An exception occurred while running Grobid.", e); } }
public void parseSubIndex() throws TaskAbortException { synchronized (parsingSubindex) { // Transfer all requests waiting on this subindex to the parsing list synchronized (waitingOnSubindex) { parsingSubindex.addAll(waitingOnSubindex); waitingOnSubindex.removeAll(parsingSubindex); } // Set status of all those about to be parsed to PARSE for (FindRequest r : parsingSubindex) r.setStage(FindRequest.Stages.PARSE); // Multi-stage parse to minimise memory usage. // Stage 1: Extract the declaration (first tag), copy everything before "<files " to one // bucket, plus everything after "</files>". // Copy the declaration, plus everything between the two (inclusive) to another bucket. Bucket mainBucket, filesBucket; try { InputStream is = bucket.getInputStream(); mainBucket = pr.getNode().clientCore.tempBucketFactory.makeBucket(-1); filesBucket = pr.getNode().clientCore.tempBucketFactory.makeBucket(-1); OutputStream mainOS = new BufferedOutputStream(mainBucket.getOutputStream()); OutputStream filesOS = new BufferedOutputStream(filesBucket.getOutputStream()); // OutputStream mainOS = new BufferedOutputStream(new FileOutputStream("main.tmp")); // OutputStream filesOS = new BufferedOutputStream(new FileOutputStream("files.tmp")); BufferedInputStream bis = new BufferedInputStream(is); byte greaterThan = ">".getBytes("UTF-8")[0]; byte[] filesPrefix = "<files ".getBytes("UTF-8"); byte[] filesPrefixAlt = "<files>".getBytes("UTF-8"); assert (filesPrefix.length == filesPrefixAlt.length); byte[] filesEnd = "</files>".getBytes("UTF-8"); final int MODE_SEEKING_DECLARATION = 1; final int MODE_SEEKING_FILES = 2; final int MODE_COPYING_FILES = 3; final int MODE_COPYING_REST = 4; int mode = MODE_SEEKING_DECLARATION; int b; byte[] declarationBuf = new byte[100]; int declarationPtr = 0; byte[] prefixBuffer = new byte[filesPrefix.length]; int prefixPtr = 0; byte[] endBuffer = new byte[filesEnd.length]; int endPtr = 0; while ((b = bis.read()) != -1) { if (mode == MODE_SEEKING_DECLARATION) { if (declarationPtr == declarationBuf.length) throw new TaskAbortException("Could not split up XML: declaration too long", null); declarationBuf[declarationPtr++] = (byte) b; mainOS.write(b); filesOS.write(b); if (b == greaterThan) { mode = MODE_SEEKING_FILES; } } else if (mode == MODE_SEEKING_FILES) { if (prefixPtr != prefixBuffer.length) { prefixBuffer[prefixPtr++] = (byte) b; } else { if (Fields.byteArrayEqual(filesPrefix, prefixBuffer) || Fields.byteArrayEqual(filesPrefixAlt, prefixBuffer)) { mode = MODE_COPYING_FILES; filesOS.write(prefixBuffer); filesOS.write(b); } else { mainOS.write(prefixBuffer[0]); System.arraycopy(prefixBuffer, 1, prefixBuffer, 0, prefixBuffer.length - 1); prefixBuffer[prefixBuffer.length - 1] = (byte) b; } } } else if (mode == MODE_COPYING_FILES) { if (endPtr != endBuffer.length) { endBuffer[endPtr++] = (byte) b; } else { if (Fields.byteArrayEqual(filesEnd, endBuffer)) { mode = MODE_COPYING_REST; filesOS.write(endBuffer); mainOS.write(b); } else { filesOS.write(endBuffer[0]); System.arraycopy(endBuffer, 1, endBuffer, 0, endBuffer.length - 1); endBuffer[endBuffer.length - 1] = (byte) b; } } } else if (mode == MODE_COPYING_REST) { mainOS.write(b); } } if (mode != MODE_COPYING_REST) throw new TaskAbortException("Could not split up XML: Last mode was " + mode, null); mainOS.close(); filesOS.close(); } catch (IOException e) { throw new TaskAbortException("Could not split XML: ", e); } if (logMINOR) Logger.minor(this, "Finished splitting XML"); try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); SAXParser saxParser = factory.newSAXParser(); // Stage 2: Parse the first bucket, find the keyword we want, find the file id's. InputStream is = mainBucket.getInputStream(); StageTwoHandler stageTwoHandler = new StageTwoHandler(); saxParser.parse(is, stageTwoHandler); if (logMINOR) Logger.minor(this, "Finished stage two XML parse"); is.close(); // Stage 3: Parse the second bucket, extract the <file>'s for the specific ID's. is = filesBucket.getInputStream(); StageThreeHandler stageThreeHandler = new StageThreeHandler(); saxParser.parse(is, stageThreeHandler); if (logMINOR) Logger.minor(this, "Finished stage three XML parse"); is.close(); Logger.minor(this, "parsing finished " + parsingSubindex.toString()); for (FindRequest findRequest : parsingSubindex) { findRequest.setFinished(); } parsingSubindex.clear(); } catch (Exception err) { Logger.error(this, "Error parsing " + filename, err); throw new TaskAbortException("Could not parse XML: ", err); } } }
/* */ private void doTransform() { /* */ try { /* 101 */ Class clazz = ObjectFactory.findProviderClass(this._className, true); /* 102 */ AbstractTranslet translet = (AbstractTranslet) clazz.newInstance(); /* 103 */ translet.postInitialization(); /* */ /* 106 */ SAXParserFactory factory = SAXParserFactory.newInstance(); /* */ try { /* 108 */ factory.setFeature("http://xml.org/sax/features/namespaces", true); /* */ } /* */ catch (Exception e) { /* 111 */ factory.setNamespaceAware(true); /* */ } /* 113 */ SAXParser parser = factory.newSAXParser(); /* 114 */ XMLReader reader = parser.getXMLReader(); /* */ /* 117 */ XSLTCDTMManager dtmManager = (XSLTCDTMManager) XSLTCDTMManager.getDTMManagerClass().newInstance(); /* */ DTMWSFilter wsfilter; /* */ DTMWSFilter wsfilter; /* 122 */ if ((translet != null) && ((translet instanceof StripFilter))) /* 123 */ wsfilter = new DOMWSFilter(translet); /* */ else { /* 125 */ wsfilter = null; /* */ } /* */ /* 128 */ DOMEnhancedForDTM dom = (DOMEnhancedForDTM) dtmManager.getDTM( new SAXSource(reader, new InputSource(this._fileName)), false, wsfilter, true, false, translet.hasIdCall()); /* */ /* 133 */ dom.setDocumentURI(this._fileName); /* 134 */ translet.prepassDocument(dom); /* */ /* 137 */ int n = this._params.size(); /* 138 */ for (int i = 0; i < n; i++) { /* 139 */ Parameter param = (Parameter) this._params.elementAt(i); /* 140 */ translet.addParameter(param._name, param._value); /* */ } /* */ /* 144 */ TransletOutputHandlerFactory tohFactory = TransletOutputHandlerFactory.newInstance(); /* */ /* 146 */ tohFactory.setOutputType(0); /* 147 */ tohFactory.setEncoding(translet._encoding); /* 148 */ tohFactory.setOutputMethod(translet._method); /* */ /* 150 */ if (this._iterations == -1) { /* 151 */ translet.transform(dom, tohFactory.getSerializationHandler()); /* */ } /* 153 */ else if (this._iterations > 0) { /* 154 */ long mm = System.currentTimeMillis(); /* 155 */ for (int i = 0; i < this._iterations; i++) { /* 156 */ translet.transform(dom, tohFactory.getSerializationHandler()); /* */ } /* */ /* 159 */ mm = System.currentTimeMillis() - mm; /* */ /* 161 */ System.err.println("\n<!--"); /* 162 */ System.err.println(" transform = " + mm / this._iterations + " ms"); /* */ /* 165 */ System.err.println( " throughput = " + 1000.0D / (mm / this._iterations) + " tps"); /* */ /* 169 */ System.err.println("-->"); /* */ } /* */ } /* */ catch (TransletException e) { /* 173 */ if (this._debug) e.printStackTrace(); /* 174 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + e.getMessage()); /* */ } /* */ catch (RuntimeException e) /* */ { /* 178 */ if (this._debug) e.printStackTrace(); /* 179 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + e.getMessage()); /* */ } /* */ catch (FileNotFoundException e) /* */ { /* 183 */ if (this._debug) e.printStackTrace(); /* 184 */ ErrorMsg err = new ErrorMsg("FILE_NOT_FOUND_ERR", this._fileName); /* 185 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + err.toString()); /* */ } /* */ catch (MalformedURLException e) /* */ { /* 189 */ if (this._debug) e.printStackTrace(); /* 190 */ ErrorMsg err = new ErrorMsg("INVALID_URI_ERR", this._fileName); /* 191 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + err.toString()); /* */ } /* */ catch (ClassNotFoundException e) /* */ { /* 195 */ if (this._debug) e.printStackTrace(); /* 196 */ ErrorMsg err = new ErrorMsg("CLASS_NOT_FOUND_ERR", this._className); /* 197 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + err.toString()); /* */ } /* */ catch (UnknownHostException e) /* */ { /* 201 */ if (this._debug) e.printStackTrace(); /* 202 */ ErrorMsg err = new ErrorMsg("INVALID_URI_ERR", this._fileName); /* 203 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + err.toString()); /* */ } /* */ catch (SAXException e) /* */ { /* 207 */ Exception ex = e.getException(); /* 208 */ if (this._debug) { /* 209 */ if (ex != null) ex.printStackTrace(); /* 210 */ e.printStackTrace(); /* */ } /* 212 */ System.err.print(new ErrorMsg("RUNTIME_ERROR_KEY")); /* 213 */ if (ex != null) /* 214 */ System.err.println(ex.getMessage()); /* */ else /* 216 */ System.err.println(e.getMessage()); /* */ } /* */ catch (Exception e) { /* 219 */ if (this._debug) e.printStackTrace(); /* 220 */ System.err.println(new ErrorMsg("RUNTIME_ERROR_KEY") + e.getMessage()); /* */ } /* */ }
public Phylogeny[] parse() throws IOException, PhylogenyParserException { reset(); final TolXmlHandler handler = new TolXmlHandler(); final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); try { if (!ForesterUtil.isEmpty(getSchemaLocation())) { factory.setFeature(SAX_FEATURES_VALIDATION, true); factory.setFeature(APACHE_FEATURES_VALIDATION_SCHEMA, true); factory.setFeature(APACHE_FEATURES_VALIDATION_SCHEMA_FULL, true); } } catch (final SAXNotRecognizedException e) { e.printStackTrace(); throw new PhylogenyParserException("sax not recognized exception: " + e.getMessage()); } catch (final SAXNotSupportedException e) { e.printStackTrace(); throw new PhylogenyParserException("sax not supported exception: " + e.getMessage()); } catch (final ParserConfigurationException e) { e.printStackTrace(); throw new PhylogenyParserException("parser _configuration exception: " + e.getMessage()); } catch (final Exception e) { e.printStackTrace(); throw new PhylogenyParserException("error while configuring sax parser: " + e.getMessage()); } try { final SAXParser parser = factory.newSAXParser(); if (!ForesterUtil.isEmpty(getSchemaLocation())) { parser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); parser.setProperty(JAXP_SCHEMA_SOURCE, getSchemaLocation()); parser.setProperty(APACHE_PROPERTIES_SCHEMA_EXTERNAL_LOCATION, getSchemaLocation()); } final XMLReader xml_reader = parser.getXMLReader(); xml_reader.setContentHandler(handler); xml_reader.setErrorHandler(new TolParserErrorHandler()); if (getSource() instanceof File) { if (!getSource().toString().toLowerCase().endsWith(".zip")) { xml_reader.parse(new InputSource(new FileReader((File) getSource()))); } else { final Reader reader = getReaderFromZipFile(); if (reader == null) { throw new PhylogenyParserException( "Zip file \"" + getSource() + "\" appears not to contain any entries"); } xml_reader.parse(new InputSource(reader)); } } else if (getSource() instanceof InputSource) { xml_reader.parse((InputSource) getSource()); } else if (getSource() instanceof InputStream) { if (!isZippedInputstream()) { final InputStream is = (InputStream) getSource(); final Reader reader = new InputStreamReader(is); xml_reader.parse(new InputSource(reader)); } else { final ZipInputStream zip_is = new ZipInputStream((InputStream) getSource()); zip_is.getNextEntry(); final Reader reader = new InputStreamReader(zip_is); if (reader == null) { throw new PhylogenyParserException( "Zip input stream \"" + getSource() + "\" appears not to contain any data"); } xml_reader.parse(new InputSource(reader)); } } else if (getSource() instanceof String) { final File file = new File(getSource().toString()); final Reader reader = new FileReader(file); xml_reader.parse(new InputSource(reader)); } else if (getSource() instanceof StringBuffer) { final StringReader string_reader = new StringReader(getSource().toString()); xml_reader.parse(new InputSource(string_reader)); } else { throw new PhylogenyParserException( "attempt to parse object of unsupported type: \"" + getSource().getClass() + "\""); } } catch (final SAXException sax_exception) { throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + sax_exception.getMessage()); } catch (final ParserConfigurationException parser_config_exception) { throw new PhylogenyParserException( "Failed to parse [" + getSource() + "] Problem with xml parser _configuration: " + parser_config_exception.getMessage()); } catch (final IOException e) { throw new PhylogenyParserException( "Problem with input source [" + getSource() + "]: \n" + e.getMessage()); } catch (final Exception e) { e.printStackTrace(); throw new PhylogenyParserException( "Failed to parse [" + getSource() + "]: " + e.getMessage()); } catch (final Error err) { err.printStackTrace(); throw new PhylogenyParserException("Severe error: " + err.getMessage()); } final Phylogeny[] ps = new Phylogeny[handler.getPhylogenies().size()]; int i = 0; for (final Phylogeny phylogeny : handler.getPhylogenies()) { ps[i++] = phylogeny; } return ps; }
/** Creates a SAX2 InputSource object from a TrAX Source object */ public static InputSource getInputSource(XSLTC xsltc, Source source) throws TransformerConfigurationException { InputSource input = null; String systemId = source.getSystemId(); try { // Try to get InputSource from SAXSource input if (source instanceof SAXSource) { final SAXSource sax = (SAXSource) source; input = sax.getInputSource(); // Pass the SAX parser to the compiler try { XMLReader reader = sax.getXMLReader(); /* * Fix for bug 24695 * According to JAXP 1.2 specification if a SAXSource * is created using a SAX InputSource the Transformer or * TransformerFactory creates a reader via the * XMLReaderFactory if setXMLReader is not used */ if (reader == null) { try { reader = XMLReaderFactory.createXMLReader(); } catch (Exception e) { try { // Incase there is an exception thrown // resort to JAXP SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setNamespaceAware(true); if (xsltc.isSecureProcessing()) { try { parserFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (org.xml.sax.SAXException se) { } } reader = parserFactory.newSAXParser().getXMLReader(); } catch (ParserConfigurationException pce) { throw new TransformerConfigurationException("ParserConfigurationException", pce); } } } reader.setFeature("http://xml.org/sax/features/namespaces", true); reader.setFeature("http://xml.org/sax/features/namespace-prefixes", false); xsltc.setXMLReader(reader); } catch (SAXNotRecognizedException snre) { throw new TransformerConfigurationException("SAXNotRecognizedException ", snre); } catch (SAXNotSupportedException snse) { throw new TransformerConfigurationException("SAXNotSupportedException ", snse); } catch (SAXException se) { throw new TransformerConfigurationException("SAXException ", se); } } // handle DOMSource else if (source instanceof DOMSource) { final DOMSource domsrc = (DOMSource) source; final Document dom = (Document) domsrc.getNode(); final DOM2SAX dom2sax = new DOM2SAX(dom); xsltc.setXMLReader(dom2sax); // Try to get SAX InputSource from DOM Source. input = SAXSource.sourceToInputSource(source); if (input == null) { input = new InputSource(domsrc.getSystemId()); } } // Try to get InputStream or Reader from StreamSource else if (source instanceof StreamSource) { final StreamSource stream = (StreamSource) source; final InputStream istream = stream.getInputStream(); final Reader reader = stream.getReader(); xsltc.setXMLReader(null); // Clear old XML reader // Create InputSource from Reader or InputStream in Source if (istream != null) { input = new InputSource(istream); } else if (reader != null) { input = new InputSource(reader); } else { input = new InputSource(systemId); } } else { ErrorMsg err = new ErrorMsg(ErrorMsg.JAXP_UNKNOWN_SOURCE_ERR); throw new TransformerConfigurationException(err.toString()); } input.setSystemId(systemId); } catch (NullPointerException e) { ErrorMsg err = new ErrorMsg(ErrorMsg.JAXP_NO_SOURCE_ERR, "TransformerFactory.newTemplates()"); throw new TransformerConfigurationException(err.toString()); } catch (SecurityException e) { ErrorMsg err = new ErrorMsg(ErrorMsg.FILE_ACCESS_ERR, systemId); throw new TransformerConfigurationException(err.toString()); } return input; }
public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { final XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); DefaultHandler dh = new ElementMappingContentHandler(xhtml, MAPPINGS) { private final BitSet textNodeStack = new BitSet(); private int nodeDepth = 0; private int completelyFiltered = 0; private Stack<String> headingStack = new Stack<String>(); @Override public void characters(char[] ch, int start, int length) throws SAXException { // only forward content of tags from text:-namespace if (completelyFiltered == 0 && nodeDepth > 0 && textNodeStack.get(nodeDepth - 1)) { super.characters(ch, start, length); } } // helper for checking tags which need complete filtering // (with sub-tags) private boolean needsCompleteFiltering(String namespaceURI, String localName) { if (TEXT_NS.equals(namespaceURI)) { return localName.endsWith("-template") || localName.endsWith("-style"); } else if (TABLE_NS.equals(namespaceURI)) { return "covered-table-cell".equals(localName); } else { return false; } } // map the heading level to <hX> HTML tags private String getXHTMLHeaderTagName(Attributes atts) { String depthStr = atts.getValue(TEXT_NS, "outline-level"); if (depthStr == null) { return "h1"; } int depth = Integer.parseInt(depthStr); if (depth >= 6) { return "h6"; } else if (depth <= 1) { return "h1"; } else { return "h" + depth; } } /** Check if a node is a text node */ private boolean isTextNode(String namespaceURI, String localName) { if (TEXT_NS.equals(namespaceURI)) { return true; } if (SVG_NS.equals(namespaceURI)) { return "title".equals(localName) || "desc".equals(localName); } return false; } @Override public void startElement( String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { // keep track of current node type. If it is a text node, // a bit at the current depth ist set in textNodeStack. // characters() checks the top bit to determine, if the // actual node is a text node to print out nodeDepth contains // the depth of the current node and also marks top of stack. assert nodeDepth >= 0; textNodeStack.set(nodeDepth++, isTextNode(namespaceURI, localName)); // filter *all* content of some tags assert completelyFiltered >= 0; if (needsCompleteFiltering(namespaceURI, localName)) { completelyFiltered++; } // call next handler if no filtering if (completelyFiltered == 0) { // special handling of text:h, that are directly passed // to xhtml handler if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) { xhtml.startElement(headingStack.push(getXHTMLHeaderTagName(atts))); } else { super.startElement(namespaceURI, localName, qName, atts); } } } @Override public void endElement(String namespaceURI, String localName, String qName) throws SAXException { // call next handler if no filtering if (completelyFiltered == 0) { // special handling of text:h, that are directly passed // to xhtml handler if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) { xhtml.endElement(headingStack.pop()); } else { super.endElement(namespaceURI, localName, qName); } // special handling of tabulators if (TEXT_NS.equals(namespaceURI) && ("tab-stop".equals(localName) || "tab".equals(localName))) { this.characters(TAB, 0, TAB.length); } } // revert filter for *all* content of some tags if (needsCompleteFiltering(namespaceURI, localName)) { completelyFiltered--; } assert completelyFiltered >= 0; // reduce current node depth nodeDepth--; assert nodeDepth >= 0; } @Override public void startPrefixMapping(String prefix, String uri) { // remove prefix mappings as they should not occur in XHTML } @Override public void endPrefixMapping(String prefix) { // remove prefix mappings as they should not occur in XHTML } }; try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(false); factory.setNamespaceAware(true); try { factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (SAXNotRecognizedException e) { // TIKA-329: Some XML parsers do not support the secure-processing // feature, even though it's required by JAXP in Java 5. Ignoring // the exception is fine here, deployments without this feature // are inherently vulnerable to XML denial-of-service attacks. } SAXParser parser = factory.newSAXParser(); parser.parse( new CloseShieldInputStream(stream), new OfflineContentHandler(new NSNormalizerContentHandler(dh))); } catch (ParserConfigurationException e) { throw new TikaException("XML parser configuration error", e); } }
/** * Configure schema validation as recommended by the Xerces spec. Both DTD and Schema validation * will be enabled simultaneously. * * @param factory SAXParserFactory to be configured */ private static void configureXerces(SAXParserFactory factory) throws ParserConfigurationException, SAXNotRecognizedException, SAXNotSupportedException { factory.setFeature(XERCES_DYNAMIC, true); factory.setFeature(XERCES_SCHEMA, true); }
@Override public void setFeature(String name, boolean value) throws ParserConfigurationException, SAXNotRecognizedException, SAXNotSupportedException { delegate.setFeature(name, value); }
// J2SE does not support Xalan interpretive // main -> _main public static void _main(String argv[]) { // Runtime.getRuntime().traceMethodCalls(false); // turns Java tracing off boolean doStackDumpOnError = false; boolean setQuietMode = false; boolean doDiag = false; String msg = null; boolean isSecureProcessing = false; // Runtime.getRuntime().traceMethodCalls(false); // Runtime.getRuntime().traceInstructions(false); /** The default diagnostic writer... */ java.io.PrintWriter diagnosticsWriter = new PrintWriter(System.err, true); java.io.PrintWriter dumpWriter = diagnosticsWriter; ResourceBundle resbundle = (SecuritySupport.getResourceBundle( com.sun.org.apache.xml.internal.utils.res.XResourceBundle.ERROR_RESOURCES)); String flavor = "s2s"; if (argv.length < 1) { printArgOptions(resbundle); } else { // J2SE does not support Xalan interpretive // false -> true boolean useXSLTC = true; for (int i = 0; i < argv.length; i++) { if ("-XSLTC".equalsIgnoreCase(argv[i])) { useXSLTC = true; } } TransformerFactory tfactory; if (useXSLTC) { String key = "javax.xml.transform.TransformerFactory"; String value = "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl"; Properties props = System.getProperties(); props.put(key, value); System.setProperties(props); } try { tfactory = TransformerFactory.newInstance(); tfactory.setErrorListener(new DefaultErrorHandler()); } catch (TransformerFactoryConfigurationError pfe) { pfe.printStackTrace(dumpWriter); // "XSL Process was not successful."); msg = XSLMessages.createMessage(XSLTErrorResources.ER_NOT_SUCCESSFUL, null); diagnosticsWriter.println(msg); tfactory = null; // shut up compiler doExit(msg); } boolean formatOutput = false; boolean useSourceLocation = false; String inFileName = null; String outFileName = null; String dumpFileName = null; String xslFileName = null; String treedumpFileName = null; // J2SE does not support Xalan interpretive /* PrintTraceListener tracer = null; */ String outputType = null; String media = null; Vector params = new Vector(); boolean quietConflictWarnings = false; URIResolver uriResolver = null; EntityResolver entityResolver = null; ContentHandler contentHandler = null; int recursionLimit = -1; for (int i = 0; i < argv.length; i++) { if ("-XSLTC".equalsIgnoreCase(argv[i])) { // The -XSLTC option has been processed. } // J2SE does not support Xalan interpretive /* else if ("-TT".equalsIgnoreCase(argv[i])) { if (!useXSLTC) { if (null == tracer) tracer = new PrintTraceListener(diagnosticsWriter); tracer.m_traceTemplates = true; } else printInvalidXSLTCOption("-TT"); // tfactory.setTraceTemplates(true); } else if ("-TG".equalsIgnoreCase(argv[i])) { if (!useXSLTC) { if (null == tracer) tracer = new PrintTraceListener(diagnosticsWriter); tracer.m_traceGeneration = true; } else printInvalidXSLTCOption("-TG"); // tfactory.setTraceSelect(true); } else if ("-TS".equalsIgnoreCase(argv[i])) { if (!useXSLTC) { if (null == tracer) tracer = new PrintTraceListener(diagnosticsWriter); tracer.m_traceSelection = true; } else printInvalidXSLTCOption("-TS"); // tfactory.setTraceTemplates(true); } else if ("-TTC".equalsIgnoreCase(argv[i])) { if (!useXSLTC) { if (null == tracer) tracer = new PrintTraceListener(diagnosticsWriter); tracer.m_traceElements = true; } else printInvalidXSLTCOption("-TTC"); // tfactory.setTraceTemplateChildren(true); } */ else if ("-INDENT".equalsIgnoreCase(argv[i])) { int indentAmount; if (((i + 1) < argv.length) && (argv[i + 1].charAt(0) != '-')) { indentAmount = Integer.parseInt(argv[++i]); } else { indentAmount = 0; } // TBD: // xmlProcessorLiaison.setIndent(indentAmount); } else if ("-IN".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') inFileName = argv[++i]; else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-IN"})); // "Missing argument for); } else if ("-MEDIA".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length) media = argv[++i]; else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-MEDIA"})); // "Missing argument for); } else if ("-OUT".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') outFileName = argv[++i]; else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-OUT"})); // "Missing argument for); } else if ("-XSL".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') xslFileName = argv[++i]; else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-XSL"})); // "Missing argument for); } else if ("-FLAVOR".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length) { flavor = argv[++i]; } else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-FLAVOR"})); // "Missing argument for); } else if ("-PARAM".equalsIgnoreCase(argv[i])) { if (i + 2 < argv.length) { String name = argv[++i]; params.addElement(name); String expression = argv[++i]; params.addElement(expression); } else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-PARAM"})); // "Missing argument for); } else if ("-E".equalsIgnoreCase(argv[i])) { // TBD: // xmlProcessorLiaison.setShouldExpandEntityRefs(false); } else if ("-V".equalsIgnoreCase(argv[i])) { diagnosticsWriter.println( resbundle.getString("version") // ">>>>>>> Xalan Version " + Version.getVersion() + ", " + /* xmlProcessorLiaison.getParserDescription()+ */ resbundle.getString("version2")); // "<<<<<<<"); } // J2SE does not support Xalan interpretive /* else if ("-QC".equalsIgnoreCase(argv[i])) { if (!useXSLTC) quietConflictWarnings = true; else printInvalidXSLTCOption("-QC"); } */ else if ("-Q".equalsIgnoreCase(argv[i])) { setQuietMode = true; } else if ("-DIAG".equalsIgnoreCase(argv[i])) { doDiag = true; } else if ("-XML".equalsIgnoreCase(argv[i])) { outputType = "xml"; } else if ("-TEXT".equalsIgnoreCase(argv[i])) { outputType = "text"; } else if ("-HTML".equalsIgnoreCase(argv[i])) { outputType = "html"; } else if ("-EDUMP".equalsIgnoreCase(argv[i])) { doStackDumpOnError = true; if (((i + 1) < argv.length) && (argv[i + 1].charAt(0) != '-')) { dumpFileName = argv[++i]; } } else if ("-URIRESOLVER".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length) { try { uriResolver = (URIResolver) ObjectFactory.newInstance(argv[++i], true); tfactory.setURIResolver(uriResolver); } catch (ConfigurationError cnfe) { msg = XSLMessages.createMessage( XSLTErrorResources.ER_CLASS_NOT_FOUND_FOR_OPTION, new Object[] {"-URIResolver"}); System.err.println(msg); doExit(msg); } } else { msg = XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-URIResolver"}); // "Missing argument for); System.err.println(msg); doExit(msg); } } else if ("-ENTITYRESOLVER".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length) { try { entityResolver = (EntityResolver) ObjectFactory.newInstance(argv[++i], true); } catch (ConfigurationError cnfe) { msg = XSLMessages.createMessage( XSLTErrorResources.ER_CLASS_NOT_FOUND_FOR_OPTION, new Object[] {"-EntityResolver"}); System.err.println(msg); doExit(msg); } } else { // "Missing argument for); msg = XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-EntityResolver"}); System.err.println(msg); doExit(msg); } } else if ("-CONTENTHANDLER".equalsIgnoreCase(argv[i])) { if (i + 1 < argv.length) { try { contentHandler = (ContentHandler) ObjectFactory.newInstance(argv[++i], true); } catch (ConfigurationError cnfe) { msg = XSLMessages.createMessage( XSLTErrorResources.ER_CLASS_NOT_FOUND_FOR_OPTION, new Object[] {"-ContentHandler"}); System.err.println(msg); doExit(msg); } } else { // "Missing argument for); msg = XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-ContentHandler"}); System.err.println(msg); doExit(msg); } } // J2SE does not support Xalan interpretive /* else if ("-L".equalsIgnoreCase(argv[i])) { if (!useXSLTC) tfactory.setAttribute(XalanProperties.SOURCE_LOCATION, Boolean.TRUE); else printInvalidXSLTCOption("-L"); } else if ("-INCREMENTAL".equalsIgnoreCase(argv[i])) { if (!useXSLTC) tfactory.setAttribute ("http://xml.apache.org/xalan/features/incremental", java.lang.Boolean.TRUE); else printInvalidXSLTCOption("-INCREMENTAL"); } else if ("-NOOPTIMIZE".equalsIgnoreCase(argv[i])) { // Default is true. // // %REVIEW% We should have a generalized syntax for negative // switches... and probably should accept the inverse even // if it is the default. if (!useXSLTC) tfactory.setAttribute ("http://xml.apache.org/xalan/features/optimize", java.lang.Boolean.FALSE); else printInvalidXSLTCOption("-NOOPTIMIZE"); } else if ("-RL".equalsIgnoreCase(argv[i])) { if (!useXSLTC) { if (i + 1 < argv.length) recursionLimit = Integer.parseInt(argv[++i]); else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[]{ "-rl" })); //"Missing argument for); } else { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') i++; printInvalidXSLTCOption("-RL"); } } */ // Generate the translet class and optionally specify the name // of the translet class. else if ("-XO".equalsIgnoreCase(argv[i])) { if (useXSLTC) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') { tfactory.setAttribute("generate-translet", "true"); tfactory.setAttribute("translet-name", argv[++i]); } else tfactory.setAttribute("generate-translet", "true"); } else { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') i++; printInvalidXalanOption("-XO"); } } // Specify the destination directory for the translet classes. else if ("-XD".equalsIgnoreCase(argv[i])) { if (useXSLTC) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') tfactory.setAttribute("destination-directory", argv[++i]); else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-XD"})); // "Missing argument for); } else { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') i++; printInvalidXalanOption("-XD"); } } // Specify the jar file name which the translet classes are packaged into. else if ("-XJ".equalsIgnoreCase(argv[i])) { if (useXSLTC) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') { tfactory.setAttribute("generate-translet", "true"); tfactory.setAttribute("jar-name", argv[++i]); } else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-XJ"})); // "Missing argument for); } else { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') i++; printInvalidXalanOption("-XJ"); } } // Specify the package name prefix for the generated translet classes. else if ("-XP".equalsIgnoreCase(argv[i])) { if (useXSLTC) { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') tfactory.setAttribute("package-name", argv[++i]); else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_MISSING_ARG_FOR_OPTION, new Object[] {"-XP"})); // "Missing argument for); } else { if (i + 1 < argv.length && argv[i + 1].charAt(0) != '-') i++; printInvalidXalanOption("-XP"); } } // Enable template inlining. else if ("-XN".equalsIgnoreCase(argv[i])) { if (useXSLTC) { tfactory.setAttribute("enable-inlining", "true"); } else printInvalidXalanOption("-XN"); } // Turns on additional debugging message output else if ("-XX".equalsIgnoreCase(argv[i])) { if (useXSLTC) { tfactory.setAttribute("debug", "true"); } else printInvalidXalanOption("-XX"); } // Create the Transformer from the translet if the translet class is newer // than the stylesheet. else if ("-XT".equalsIgnoreCase(argv[i])) { if (useXSLTC) { tfactory.setAttribute("auto-translet", "true"); } else printInvalidXalanOption("-XT"); } else if ("-SECURE".equalsIgnoreCase(argv[i])) { isSecureProcessing = true; try { tfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (TransformerConfigurationException e) { } } else System.err.println( XSLMessages.createMessage( XSLTErrorResources.ER_INVALID_OPTION, new Object[] {argv[i]})); // "Invalid argument:); } // Print usage instructions if no xml and xsl file is specified in the command line if (inFileName == null && xslFileName == null) { msg = resbundle.getString("xslProc_no_input"); System.err.println(msg); doExit(msg); } // Note that there are usage cases for calling us without a -IN arg // The main XSL transformation occurs here! try { long start = System.currentTimeMillis(); if (null != dumpFileName) { dumpWriter = new PrintWriter(new FileWriter(dumpFileName)); } Templates stylesheet = null; if (null != xslFileName) { if (flavor.equals("d2d")) { // Parse in the xml data into a DOM DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(true); if (isSecureProcessing) { try { dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (ParserConfigurationException pce) { } } DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); Node xslDOM = docBuilder.parse(new InputSource(xslFileName)); stylesheet = tfactory.newTemplates(new DOMSource(xslDOM, xslFileName)); } else { // System.out.println("Calling newTemplates: "+xslFileName); stylesheet = tfactory.newTemplates(new StreamSource(xslFileName)); // System.out.println("Done calling newTemplates: "+xslFileName); } } PrintWriter resultWriter; StreamResult strResult; if (null != outFileName) { strResult = new StreamResult(new FileOutputStream(outFileName)); // One possible improvement might be to ensure this is // a valid URI before setting the systemId, but that // might have subtle changes that pre-existing users // might notice; we can think about that later -sc r1.46 strResult.setSystemId(outFileName); } else { strResult = new StreamResult(System.out); // We used to default to incremental mode in this case. // We've since decided that since the -INCREMENTAL switch is // available, that default is probably not necessary nor // necessarily a good idea. } SAXTransformerFactory stf = (SAXTransformerFactory) tfactory; // J2SE does not support Xalan interpretive /* // This is currently controlled via TransformerFactoryImpl. if (!useXSLTC && useSourceLocation) stf.setAttribute(XalanProperties.SOURCE_LOCATION, Boolean.TRUE); */ // Did they pass in a stylesheet, or should we get it from the // document? if (null == stylesheet) { Source source = stf.getAssociatedStylesheet(new StreamSource(inFileName), media, null, null); if (null != source) stylesheet = tfactory.newTemplates(source); else { if (null != media) throw new TransformerException( XSLMessages.createMessage( XSLTErrorResources.ER_NO_STYLESHEET_IN_MEDIA, new Object[] {inFileName, media})); // "No stylesheet found in: " // + inFileName + ", media=" // + media); else throw new TransformerException( XSLMessages.createMessage( XSLTErrorResources.ER_NO_STYLESHEET_PI, new Object[] {inFileName})); // "No xml-stylesheet PI found in: " // + inFileName); } } if (null != stylesheet) { Transformer transformer = flavor.equals("th") ? null : stylesheet.newTransformer(); transformer.setErrorListener(new DefaultErrorHandler()); // Override the output format? if (null != outputType) { transformer.setOutputProperty(OutputKeys.METHOD, outputType); } // J2SE does not support Xalan interpretive /* if (transformer instanceof com.sun.org.apache.xalan.internal.transformer.TransformerImpl) { com.sun.org.apache.xalan.internal.transformer.TransformerImpl impl = (com.sun.org.apache.xalan.internal.transformer.TransformerImpl)transformer; TraceManager tm = impl.getTraceManager(); if (null != tracer) tm.addTraceListener(tracer); impl.setQuietConflictWarnings(quietConflictWarnings); // This is currently controlled via TransformerFactoryImpl. if (useSourceLocation) impl.setProperty(XalanProperties.SOURCE_LOCATION, Boolean.TRUE); if(recursionLimit>0) impl.setRecursionLimit(recursionLimit); // sc 28-Feb-01 if we re-implement this, please uncomment helpmsg in printArgOptions // impl.setDiagnosticsOutput( setQuietMode ? null : diagnosticsWriter ); } */ int nParams = params.size(); for (int i = 0; i < nParams; i += 2) { transformer.setParameter( (String) params.elementAt(i), (String) params.elementAt(i + 1)); } if (uriResolver != null) transformer.setURIResolver(uriResolver); if (null != inFileName) { if (flavor.equals("d2d")) { // Parse in the xml data into a DOM DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setCoalescing(true); dfactory.setNamespaceAware(true); if (isSecureProcessing) { try { dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (ParserConfigurationException pce) { } } DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); if (entityResolver != null) docBuilder.setEntityResolver(entityResolver); Node xmlDoc = docBuilder.parse(new InputSource(inFileName)); Document doc = docBuilder.newDocument(); org.w3c.dom.DocumentFragment outNode = doc.createDocumentFragment(); transformer.transform(new DOMSource(xmlDoc, inFileName), new DOMResult(outNode)); // Now serialize output to disk with identity transformer Transformer serializer = stf.newTransformer(); serializer.setErrorListener(new DefaultErrorHandler()); Properties serializationProps = stylesheet.getOutputProperties(); serializer.setOutputProperties(serializationProps); if (contentHandler != null) { SAXResult result = new SAXResult(contentHandler); serializer.transform(new DOMSource(outNode), result); } else serializer.transform(new DOMSource(outNode), strResult); } else if (flavor.equals("th")) { for (int i = 0; i < 1; i++) // Loop for diagnosing bugs with inconsistent behavior { // System.out.println("Testing the TransformerHandler..."); XMLReader reader = null; // Use JAXP1.1 ( if possible ) try { javax.xml.parsers.SAXParserFactory factory = javax.xml.parsers.SAXParserFactory.newInstance(); factory.setNamespaceAware(true); if (isSecureProcessing) { try { factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (org.xml.sax.SAXException se) { } } javax.xml.parsers.SAXParser jaxpParser = factory.newSAXParser(); reader = jaxpParser.getXMLReader(); } catch (javax.xml.parsers.ParserConfigurationException ex) { throw new org.xml.sax.SAXException(ex); } catch (javax.xml.parsers.FactoryConfigurationError ex1) { throw new org.xml.sax.SAXException(ex1.toString()); } catch (NoSuchMethodError ex2) { } catch (AbstractMethodError ame) { } if (null == reader) { reader = XMLReaderFactory.createXMLReader(); } // J2SE does not support Xalan interpretive /* if (!useXSLTC) stf.setAttribute(com.sun.org.apache.xalan.internal.processor.TransformerFactoryImpl.FEATURE_INCREMENTAL, Boolean.TRUE); */ TransformerHandler th = stf.newTransformerHandler(stylesheet); reader.setContentHandler(th); reader.setDTDHandler(th); if (th instanceof org.xml.sax.ErrorHandler) reader.setErrorHandler((org.xml.sax.ErrorHandler) th); try { reader.setProperty("http://xml.org/sax/properties/lexical-handler", th); } catch (org.xml.sax.SAXNotRecognizedException e) { } catch (org.xml.sax.SAXNotSupportedException e) { } try { reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); } catch (org.xml.sax.SAXException se) { } th.setResult(strResult); reader.parse(new InputSource(inFileName)); } } else { if (entityResolver != null) { XMLReader reader = null; // Use JAXP1.1 ( if possible ) try { javax.xml.parsers.SAXParserFactory factory = javax.xml.parsers.SAXParserFactory.newInstance(); factory.setNamespaceAware(true); if (isSecureProcessing) { try { factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (org.xml.sax.SAXException se) { } } javax.xml.parsers.SAXParser jaxpParser = factory.newSAXParser(); reader = jaxpParser.getXMLReader(); } catch (javax.xml.parsers.ParserConfigurationException ex) { throw new org.xml.sax.SAXException(ex); } catch (javax.xml.parsers.FactoryConfigurationError ex1) { throw new org.xml.sax.SAXException(ex1.toString()); } catch (NoSuchMethodError ex2) { } catch (AbstractMethodError ame) { } if (null == reader) { reader = XMLReaderFactory.createXMLReader(); } reader.setEntityResolver(entityResolver); if (contentHandler != null) { SAXResult result = new SAXResult(contentHandler); transformer.transform(new SAXSource(reader, new InputSource(inFileName)), result); } else { transformer.transform( new SAXSource(reader, new InputSource(inFileName)), strResult); } } else if (contentHandler != null) { SAXResult result = new SAXResult(contentHandler); transformer.transform(new StreamSource(inFileName), result); } else { // System.out.println("Starting transform"); transformer.transform(new StreamSource(inFileName), strResult); // System.out.println("Done with transform"); } } } else { StringReader reader = new StringReader("<?xml version=\"1.0\"?> <doc/>"); transformer.transform(new StreamSource(reader), strResult); } } else { // "XSL Process was not successful."); msg = XSLMessages.createMessage(XSLTErrorResources.ER_NOT_SUCCESSFUL, null); diagnosticsWriter.println(msg); doExit(msg); } // close output streams if (null != outFileName && strResult != null) { java.io.OutputStream out = strResult.getOutputStream(); java.io.Writer writer = strResult.getWriter(); try { if (out != null) out.close(); if (writer != null) writer.close(); } catch (java.io.IOException ie) { } } long stop = System.currentTimeMillis(); long millisecondsDuration = stop - start; if (doDiag) { Object[] msgArgs = new Object[] {inFileName, xslFileName, new Long(millisecondsDuration)}; msg = XSLMessages.createMessage("diagTiming", msgArgs); diagnosticsWriter.println('\n'); diagnosticsWriter.println(msg); } } catch (Throwable throwable) { while (throwable instanceof com.sun.org.apache.xml.internal.utils.WrappedRuntimeException) { throwable = ((com.sun.org.apache.xml.internal.utils.WrappedRuntimeException) throwable) .getException(); } if ((throwable instanceof NullPointerException) || (throwable instanceof ClassCastException)) doStackDumpOnError = true; diagnosticsWriter.println(); if (doStackDumpOnError) throwable.printStackTrace(dumpWriter); else { DefaultErrorHandler.printLocation(diagnosticsWriter, throwable); diagnosticsWriter.println( XSLMessages.createMessage(XSLTErrorResources.ER_XSLT_ERROR, null) + " (" + throwable.getClass().getName() + "): " + throwable.getMessage()); } // diagnosticsWriter.println(XSLMessages.createMessage(XSLTErrorResources.ER_NOT_SUCCESSFUL, // null)); //"XSL Process was not successful."); if (null != dumpFileName) { dumpWriter.close(); } doExit(throwable.getMessage()); } if (null != dumpFileName) { dumpWriter.close(); } if (null != diagnosticsWriter) { // diagnosticsWriter.close(); } // if(!setQuietMode) // diagnosticsWriter.println(resbundle.getString("xsldone")); //"Xalan: done"); // else // diagnosticsWriter.println(""); //"Xalan: done"); } }