private static Element parseElement(XMLStreamReader xsr) throws XMLStreamException { // xsr points to a START_ELEMENT event. Create the element and read all its attributes // Then read all its children events Element element = new Element(xsr.getLocalName()); // text that will be added to the element. Text can come in different events, so we add it here // and add it to the element at the end StringBuilder elementText = new StringBuilder(); int attributeCount = xsr.getAttributeCount(); for (int i = 0; i < attributeCount; i++) { element.putAttribute(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i)); } while (xsr.hasNext()) { xsr.next(); if (xsr.getEventType() == XMLStreamConstants.END_ELEMENT) { // element is closed. Move the cursor and return it // check if there is some text to add before (empty text is not added, but added text is not // trimmed) // we set empty text also if the element has no children if (!elementText.toString().trim().isEmpty() || !element.hasChildren()) { element.setText(elementText.toString()); } // xsr.next(); return element; } else if (xsr.getEventType() == XMLStreamConstants.CHARACTERS) { // an attribute of the current element elementText.append(xsr.getText()); } else if (xsr.getEventType() == XMLStreamConstants.START_ELEMENT) { // new element begins -> read it recursively and add it to the current element element.addChild(parseElement(xsr)); } } // we reached the end of the document without the tag end -> error parsing throw new XMLStreamException( "End of the document unexpectedly reached. Element " + element.getName() + " not closed"); }
private static Element parse(XMLStreamReader xsr) throws XMLStreamException { // we skip the events until we reach the root element while (xsr.getEventType() != XMLStreamConstants.START_ELEMENT) { xsr.next(); } return parseElement(xsr); }
/** Note: calling this method will move stream to the next non-textual event. */ protected String collectAllText(XMLStreamReader sr) throws XMLStreamException { StringBuilder sb = new StringBuilder(100); while (true) { int type = sr.getEventType(); if (type == CHARACTERS || type == SPACE || type == CDATA) { sb.append(sr.getText()); sr.next(); } else { break; } } return sb.toString(); }
/** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { if (args.length != 1) { printUsage(); } filename = args[0]; XMLInputFactory xmlif = null; try { xmlif = XMLInputFactory.newInstance(); xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE); xmlif.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE); xmlif.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE); xmlif.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE); } catch (Exception ex) { ex.printStackTrace(); } System.out.println("XMLInputFactory: " + xmlif); System.out.println("filename = " + filename); XMLStreamReader xmlr = null; try { FileInputStream fis = new FileInputStream(filename); xmlr = xmlif.createXMLStreamReader(fis); } catch (Exception ex) { ex.printStackTrace(); } for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { if (event == XMLStreamConstants.START_ELEMENT) { String element = xmlr.getLocalName(); } } }
/** * ns4 is declared on Envelope and Body and is used in faultcode. So making sure the correct ns4 * is picked up for payload source */ public void testPayloadSource1() throws Exception { String soap18Msg = "<S:Envelope xmlns:S='http://schemas.xmlsoap.org/soap/envelope/' xmlns:ns4='A'>" + "<S:Body xmlns:ns4='http://schemas.xmlsoap.org/soap/envelope/'>" + "<S:Fault>" + "<faultcode>ns4:Server</faultcode>" + "<faultstring>com.sun.istack.XMLStreamException2</faultstring>" + "</S:Fault>" + "</S:Body>" + "</S:Envelope>"; Message message = useStreamCodec(soap18Msg); Source source = message.readPayloadAsSource(); InputStream is = getInputStream(source); XMLStreamReader xsr = XMLInputFactory.newInstance().createXMLStreamReader(is); xsr.next(); xsr.next(); assertEquals("http://schemas.xmlsoap.org/soap/envelope/", xsr.getNamespaceURI("ns4")); }
/** * Method that will iterate through contents of an XML document using specified stream reader; * will also access some of data to make sure reader reads most of lazy-loadable data. Method is * usually called to try to get an exception for invalid content. * * @return Dummy value calculated on contents; used to make sure no dead code is eliminated */ protected int streamThrough(XMLStreamReader sr) throws XMLStreamException { int result = 0; while (sr.hasNext()) { int type = sr.next(); result += type; if (sr.hasText()) { /* will also do basic verification for text content, to * see that all text accessor methods return same content */ result += getAndVerifyText(sr).hashCode(); } if (sr.hasName()) { result += sr.getName().hashCode(); } } return result; }
public static void main(String[] args) throws Exception { String urlString; if (args.length == 0) { urlString = "http://www.w3c.org"; System.out.println("Using " + urlString); } else urlString = args[0]; URL url = new URL(urlString); InputStream in = url.openStream(); XMLInputFactory factory = XMLInputFactory.newInstance(); XMLStreamReader parser = factory.createXMLStreamReader(in); while (parser.hasNext()) { int event = parser.next(); if (event == XMLStreamConstants.START_ELEMENT) { if (parser.getLocalName().equals("a")) { String href = parser.getAttributeValue(null, "href"); if (href != null) System.out.println(href); } } } }
@Override protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { String document = value.toString(); System.out.println("'" + document + "'"); try { XMLStreamReader reader = XMLInputFactory.newInstance() .createXMLStreamReader(new ByteArrayInputStream(document.getBytes())); String propertyName = ""; String propertyValue = ""; String currentElement = ""; while (reader.hasNext()) { int code = reader.next(); switch (code) { case XMLStreamConstants.START_ELEMENT: // START_ELEMENT: currentElement = reader.getLocalName(); break; case XMLStreamConstants.CHARACTERS: // CHARACTERS: if (currentElement.equalsIgnoreCase("uid")) { propertyName += reader.getText().trim(); System.out.println(propertyName); } else if (currentElement.equalsIgnoreCase("location")) { propertyValue += reader.getText().trim(); System.out.println(propertyValue); } else if (currentElement.equalsIgnoreCase("age")) { propertyValue += ("," + reader.getText().trim()); System.out.println(propertyValue); } break; } } reader.close(); context.write(new Text(propertyName.trim()), new Text(propertyValue.trim())); } catch (Exception e) { throw new IOException(e); } }