@Override
  public String pullNextXmlChunk() throws KettleException {
    Stack<String> elementStack = xmlChunkerState.getElementStack();
    XMLStreamReader xmlStreamReader = xmlChunkerState.getXmlStreamReader();

    try {

      while (xmlStreamReader.hasNext()) {

        switch (xmlStreamReader.next()) {
          case XMLStreamConstants.END_DOCUMENT:
            return null;

          case XMLStreamConstants.END_ELEMENT:
            elementStack.pop();
            break;

          case XMLStreamConstants.START_ELEMENT:
            elementStack.push(xmlStreamReader.getLocalName());

            if (actualElementStackHasExpectedElements(xmlChunkerState)) {
              return pullNextXmlChunkFromTopElementOnStack(xmlChunkerState);
            }

            break;
        }
      }
    } catch (Exception e) {
      throw new KettleException("a problem has arisen reading the xero xml stream", e);
    }

    return null;
  }
Ejemplo n.º 2
0
 private static Element parseElement(XMLStreamReader xsr) throws XMLStreamException {
   // xsr points to a START_ELEMENT event. Create the element and read all its attributes
   // Then read all its children events
   Element element = new Element(xsr.getLocalName());
   // text that will be added to the element. Text can come in different events, so we add it here
   // and add it to the element at the end
   StringBuilder elementText = new StringBuilder();
   int attributeCount = xsr.getAttributeCount();
   for (int i = 0; i < attributeCount; i++) {
     element.putAttribute(xsr.getAttributeLocalName(i), xsr.getAttributeValue(i));
   }
   while (xsr.hasNext()) {
     xsr.next();
     if (xsr.getEventType() == XMLStreamConstants.END_ELEMENT) {
       // element is closed. Move the cursor and return it
       // check if there is some text to add before (empty text is not added, but added text is not
       // trimmed)
       // we set empty text also if the element has no children
       if (!elementText.toString().trim().isEmpty() || !element.hasChildren()) {
         element.setText(elementText.toString());
       }
       //                xsr.next();
       return element;
     } else if (xsr.getEventType() == XMLStreamConstants.CHARACTERS) {
       // an attribute of the current element
       elementText.append(xsr.getText());
     } else if (xsr.getEventType() == XMLStreamConstants.START_ELEMENT) {
       // new element begins -> read it recursively and add it to the current element
       element.addChild(parseElement(xsr));
     }
   }
   // we reached the end of the document without the tag end -> error parsing
   throw new XMLStreamException(
       "End of the document unexpectedly reached. Element " + element.getName() + " not closed");
 }
  private String pullNextXmlChunkFromTopElementOnStack(XMLChunkerState data)
      throws KettleException {
    Stack<String> elementStack = data.getElementStack();
    XMLStreamReader xmlStreamReader = data.getXmlStreamReader();

    int elementStackDepthOnEntry = elementStack.size();
    StringWriter stringWriter = new StringWriter();

    try {
      XMLStreamWriter xmlStreamWriter =
          data.getXmlOutputFactory().createXMLStreamWriter(stringWriter);

      xmlStreamWriter.writeStartDocument(CharEncoding.UTF_8, "1.0");

      // put the current element on because presumably it's the open element for the one
      // that is being looked for.

      XmlReaderToWriter.write(xmlStreamReader, xmlStreamWriter);

      while (xmlStreamReader.hasNext() & elementStack.size() >= elementStackDepthOnEntry) {

        switch (xmlStreamReader.next()) {
          case XMLStreamConstants.END_DOCUMENT:
            break; // handled below explicitly.

          case XMLStreamConstants.END_ELEMENT:
            elementStack.pop();
            XmlReaderToWriter.write(xmlStreamReader, xmlStreamWriter);
            break;

          case XMLStreamConstants.START_ELEMENT:
            elementStack.push(xmlStreamReader.getLocalName());
            XmlReaderToWriter.write(xmlStreamReader, xmlStreamWriter);
            break;

          default:
            XmlReaderToWriter.write(xmlStreamReader, xmlStreamWriter);
            break;
        }
      }

      xmlStreamWriter.writeEndDocument();
      xmlStreamWriter.close();
    } catch (Exception e) {
      throw new KettleException("unable to process a chunk of the xero xml stream", e);
    }

    return stringWriter.toString();
  }
Ejemplo n.º 4
0
  /**
   * Method that will iterate through contents of an XML document using specified stream reader;
   * will also access some of data to make sure reader reads most of lazy-loadable data. Method is
   * usually called to try to get an exception for invalid content.
   *
   * @return Dummy value calculated on contents; used to make sure no dead code is eliminated
   */
  protected int streamThrough(XMLStreamReader sr) throws XMLStreamException {
    int result = 0;

    while (sr.hasNext()) {
      int type = sr.next();
      result += type;
      if (sr.hasText()) {
        /* will also do basic verification for text content, to
         * see that all text accessor methods return same content
         */
        result += getAndVerifyText(sr).hashCode();
      }
      if (sr.hasName()) {
        result += sr.getName().hashCode();
      }
    }

    return result;
  }
Ejemplo n.º 5
0
 public static void main(String[] args) throws Exception {
   String urlString;
   if (args.length == 0) {
     urlString = "http://www.w3c.org";
     System.out.println("Using " + urlString);
   } else urlString = args[0];
   URL url = new URL(urlString);
   InputStream in = url.openStream();
   XMLInputFactory factory = XMLInputFactory.newInstance();
   XMLStreamReader parser = factory.createXMLStreamReader(in);
   while (parser.hasNext()) {
     int event = parser.next();
     if (event == XMLStreamConstants.START_ELEMENT) {
       if (parser.getLocalName().equals("a")) {
         String href = parser.getAttributeValue(null, "href");
         if (href != null) System.out.println(href);
       }
     }
   }
 }
Ejemplo n.º 6
0
    @Override
    protected void map(LongWritable key, Text value, Mapper.Context context)
        throws IOException, InterruptedException {
      String document = value.toString();
      System.out.println("'" + document + "'");
      try {
        XMLStreamReader reader =
            XMLInputFactory.newInstance()
                .createXMLStreamReader(new ByteArrayInputStream(document.getBytes()));
        String propertyName = "";
        String propertyValue = "";
        String currentElement = "";
        while (reader.hasNext()) {
          int code = reader.next();
          switch (code) {
            case XMLStreamConstants.START_ELEMENT: // START_ELEMENT:
              currentElement = reader.getLocalName();
              break;
            case XMLStreamConstants.CHARACTERS: // CHARACTERS:
              if (currentElement.equalsIgnoreCase("uid")) {
                propertyName += reader.getText().trim();
                System.out.println(propertyName);
              } else if (currentElement.equalsIgnoreCase("location")) {
                propertyValue += reader.getText().trim();
                System.out.println(propertyValue);

              } else if (currentElement.equalsIgnoreCase("age")) {
                propertyValue += ("," + reader.getText().trim());
                System.out.println(propertyValue);
              }
              break;
          }
        }
        reader.close();
        context.write(new Text(propertyName.trim()), new Text(propertyValue.trim()));

      } catch (Exception e) {
        throw new IOException(e);
      }
    }