Example #1
0
  /** External TMX - level 1. Skip text inside inline tags. */
  protected void parseSegExtLevel1() throws Exception {
    segContent.setLength(0);

    int inlineLevel = 0;

    while (true) {
      XMLEvent e = xml.nextEvent();
      switch (e.getEventType()) {
        case XMLEvent.START_ELEMENT:
          inlineLevel++;
          break;
        case XMLEvent.END_ELEMENT:
          inlineLevel--;
          EndElement eEnd = (EndElement) e;
          if ("seg".equals(eEnd.getName().getLocalPart())) {
            return;
          }
          break;
        case XMLEvent.CHARACTERS:
          if (inlineLevel == 0) {
            Characters c = (Characters) e;
            segContent.append(c.getData());
          }
          break;
      }
    }
  }
Example #2
0
  /**
   * This test was inspired by an actual bug in one of implementations: initial state was not
   * properly set if nextTag() was called (instead of nextEvent()), and subsequent peek() failed.
   */
  public void testPeek() throws XMLStreamException {
    String XML = "<root>text</root>";

    for (int i = 0; i < 4; ++i) {
      boolean ns = (i & 1) != 0;
      boolean coal = (i & 2) != 0;
      XMLEventReader er = getReader(XML, ns, coal);
      assertTokenType(START_DOCUMENT, er.nextEvent().getEventType());

      XMLEvent tag = er.nextTag();
      assertTokenType(START_ELEMENT, tag.getEventType());

      // Now, peek() should produce text..
      XMLEvent text = er.peek();
      assertTokenType(CHARACTERS, text.getEventType());
      Characters chars = text.asCharacters();
      assertNotNull(chars);
      assertEquals("text", chars.getData());

      // and need nextEvent() to get rid of it, too:
      text = er.nextEvent();
      // Let's verify it again:
      assertTokenType(CHARACTERS, text.getEventType());
      chars = text.asCharacters();
      assertNotNull(chars);
      assertEquals("text", chars.getData());
      assertTokenType(END_ELEMENT, er.nextTag().getEventType());
      assertTokenType(END_DOCUMENT, er.nextEvent().getEventType());

      // And at the end, peek() should return null
      assertNull(er.peek());
    }
  }
Example #3
0
  public DataMetadata parse(XMLEventReader xmlEventReader, StartElement start)
      throws GraphIOException {

    try {
      // Create the new port.
      DataMetadata data = new DataMetadata();

      // Parse the attributes.
      @SuppressWarnings("unchecked")
      Iterator<Attribute> iterator = start.getAttributes();
      while (iterator.hasNext()) {
        Attribute attribute = (Attribute) iterator.next();
        String name = attribute.getName().getLocalPart();
        String value = attribute.getValue();
        if (data.getKey() == null && GraphMLConstants.KEY_NAME.equals(name)) {
          data.setKey(value);
        }
      }

      // Make sure the key has been set.
      if (data.getKey() == null) {
        throw new GraphIOException("Element 'data' is missing attribute 'key'");
      }

      while (xmlEventReader.hasNext()) {

        XMLEvent event = xmlEventReader.nextEvent();
        if (event.isStartElement()) {
          StartElement element = (StartElement) event;

          // Treat any child elements as unknown
          getUnknownParser().parse(xmlEventReader, element);
        }
        if (event.isCharacters()) {
          Characters characters = (Characters) event;
          data.setValue(characters.getData());
        }
        if (event.isEndElement()) {
          EndElement end = (EndElement) event;
          verifyMatch(start, end);
          break;
        }
      }

      return data;

    } catch (Exception e) {
      ExceptionConverter.convert(e);
    }

    return null;
  }
Example #4
0
  /** OmegaT TMX - just read full text. */
  protected void parseSegOmegaT() throws Exception {
    segContent.setLength(0);

    while (true) {
      XMLEvent e = xml.nextEvent();
      switch (e.getEventType()) {
        case XMLEvent.END_ELEMENT:
          EndElement eEnd = (EndElement) e;
          if ("seg".equals(eEnd.getName().getLocalPart())) {
            return;
          }
          break;
        case XMLEvent.CHARACTERS:
          Characters c = (Characters) e;
          segContent.append(c.getData());
          break;
      }
    }
  }
Example #5
0
  protected void parseNote(StartElement element) throws Exception {
    noteContent.setLength(0);

    while (true) {
      XMLEvent e = xml.nextEvent();
      switch (e.getEventType()) {
        case XMLEvent.END_ELEMENT:
          EndElement eEnd = (EndElement) e;
          if ("note".equals(eEnd.getName().getLocalPart())) {
            currentTu.note = noteContent.toString();
            return;
          }
          break;
        case XMLEvent.CHARACTERS:
          Characters c = (Characters) e;
          noteContent.append(c.getData());
          break;
      }
    }
  }
Example #6
0
  protected void parseProp(StartElement element) throws Exception {
    String propType = getAttributeValue(element, "type");
    propContent.setLength(0);

    while (true) {
      XMLEvent e = xml.nextEvent();
      switch (e.getEventType()) {
        case XMLEvent.END_ELEMENT:
          EndElement eEnd = (EndElement) e;
          if ("prop".equals(eEnd.getName().getLocalPart())) {
            currentTu.props.add(new TMXProp(propType, propContent.toString()));
            return;
          }
          break;
        case XMLEvent.CHARACTERS:
          Characters c = (Characters) e;
          propContent.append(c.getData());
          break;
      }
    }
  }
Example #7
0
 private boolean isEqual(Characters e1, Characters e2) {
   if (bopt) return isEqual(e1.getData().trim(), e2.getData().trim());
   else return isEqual(e1.getData(), e2.getData());
 }
Example #8
0
 private boolean isBlank(Characters e) {
   if (e.isIgnorableWhiteSpace()) return true;
   return Util.isBlank(e.getData());
 }
Example #9
0
  /** External TMX - level 2. Replace all tags into shortcuts. */
  protected void parseSegExtLevel2() throws Exception {
    segContent.setLength(0);
    segInlineTag.setLength(0);
    inlineTagHandler.reset();

    int inlineLevel = 0;
    while (true) {
      XMLEvent e = xml.nextEvent();
      switch (e.getEventType()) {
        case XMLEvent.START_ELEMENT:
          StartElement eStart = e.asStartElement();
          if ("hi".equals(eStart.getName().getLocalPart())) {
            // tag should be skipped
            break;
          }
          inlineLevel++;
          segInlineTag.setLength(0);
          if ("bpt".equals(eStart.getName().getLocalPart())) {
            inlineTagHandler.startBPT(
                getAttributeValue(eStart, "i"), getAttributeValue(eStart, "x"));
            inlineTagHandler.setTagShortcutLetter(
                StringUtil.getFirstLetterLowercase(getAttributeValue(eStart, "type")));
          } else if ("ept".equals(eStart.getName().getLocalPart())) {
            inlineTagHandler.startEPT(getAttributeValue(eStart, "i"));
          } else if ("it".equals(eStart.getName().getLocalPart())) {
            inlineTagHandler.startOTHER();
            inlineTagHandler.setOtherTagShortcutLetter(
                StringUtil.getFirstLetterLowercase(getAttributeValue(eStart, "type")));
            inlineTagHandler.setCurrentPos(getAttributeValue(eStart, "pos"));
          } else if ("ph".equals(eStart.getName().getLocalPart())) {
            inlineTagHandler.startOTHER();
            inlineTagHandler.setOtherTagShortcutLetter(
                StringUtil.getFirstLetterLowercase(getAttributeValue(eStart, "type")));
          } else {
            inlineTagHandler.startOTHER();
          }
          break;
        case XMLEvent.END_ELEMENT:
          EndElement eEnd = e.asEndElement();
          if ("hi".equals(eEnd.getName().getLocalPart())) {
            // tag should be skipped
            break;
          }
          inlineLevel--;
          if ("seg".equals(eEnd.getName().getLocalPart())) {
            return;
          }
          boolean slashBefore = false;
          boolean slashAfter = false;
          char tagName = StringUtil.getFirstLetterLowercase(segInlineTag);
          Integer tagN;
          if ("bpt".equals(eEnd.getName().getLocalPart())) {
            if (tagName != 0) {
              inlineTagHandler.setTagShortcutLetter(tagName);
            } else {
              tagName = inlineTagHandler.getTagShortcutLetter();
            }
            tagN = inlineTagHandler.endBPT();
          } else if ("ept".equals(eEnd.getName().getLocalPart())) {
            slashBefore = true;
            tagName = inlineTagHandler.getTagShortcutLetter();
            tagN = inlineTagHandler.endEPT();
          } else if ("it".equals(eEnd.getName().getLocalPart())) {
            if (tagName != 0) {
              inlineTagHandler.setOtherTagShortcutLetter(tagName);
            } else {
              tagName = inlineTagHandler.getOtherTagShortcutLetter();
            }
            tagN = inlineTagHandler.endOTHER();
            if ("end".equals(inlineTagHandler.getCurrentPos())) {
              slashBefore = true;
            }
          } else if ("ph".equals(eEnd.getName().getLocalPart())) {
            if (tagName != 0) {
              inlineTagHandler.setOtherTagShortcutLetter(tagName);
            } else {
              tagName = inlineTagHandler.getOtherTagShortcutLetter();
            }
            tagN = inlineTagHandler.endOTHER();
            if (useSlash) {
              slashAfter = true;
            }
          } else {
            tagN = inlineTagHandler.endOTHER();
            if (useSlash) {
              slashAfter = true;
            }
          }
          if (tagName == 0) {
            tagName = 'f';
          }
          if (tagN == null) {
            // check error of TMX reading
            Log.logErrorRB(
                "TMX_ERROR_READING_LEVEL2",
                e.getLocation().getLineNumber(),
                e.getLocation().getColumnNumber());
            errorsCount++;
            segContent.setLength(0);
            // wait for end seg
            while (true) {
              XMLEvent ev = xml.nextEvent();
              switch (ev.getEventType()) {
                case XMLEvent.END_ELEMENT:
                  EndElement evEnd = (EndElement) ev;
                  if ("seg".equals(evEnd.getName().getLocalPart())) {
                    return;
                  }
              }
            }
          }

          segContent.append('<');
          if (slashBefore) {
            segContent.append('/');
          }
          segContent.append(tagName);
          segContent.append(Integer.toString(tagN));
          if (slashAfter) {
            segContent.append('/');
          }
          segContent.append('>');
          break;
        case XMLEvent.CHARACTERS:
          Characters c = (Characters) e;
          if (inlineLevel == 0) {
            segContent.append(c.getData());
          } else {
            segInlineTag.append(c.getData());
          }
          break;
      }
    }
  }
Example #10
0
 @Override
 public boolean isIgnorableWhiteSpace() {
   return delegate.isIgnorableWhiteSpace();
 }
Example #11
0
 @Override
 public boolean isCData() {
   return delegate.isCData();
 }
Example #12
0
  /**
   * Handles a Stream event.
   *
   * @param event
   * @throws SAXException
   */
  private void handleEvent(XMLEvent event) throws SAXException {
    if (event.getEventType() == XMLStreamConstants.CHARACTERS) {
      Characters c = event.asCharacters();
      lastContents += c.getData();
    } else if (event.getEventType() == XMLStreamConstants.START_ELEMENT) {
      StartElement startElement = event.asStartElement();
      String tagLocalName = startElement.getName().getLocalPart();

      if ("row".equals(tagLocalName)) {
        Attribute rowIndex = startElement.getAttributeByName(new QName("r"));
        if (firstRowIndex == -1) {
          firstRowIndex = Integer.parseInt(rowIndex.getValue());
        }
        currentRow = new StreamingRow(Integer.parseInt(rowIndex.getValue()) - 1);
      } else if ("cols".equals(tagLocalName)) {
        parsingCols = true;
      } else if ("col".equals(tagLocalName) && parsingCols) {
        colNumber = colNumber + 1;
      } else if ("c".equals(tagLocalName)) {
        Attribute ref = startElement.getAttributeByName(new QName("r"));

        String[] coord = ref.getValue().split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)");
        currentCell =
            new StreamingCell(
                CellReference.convertColStringToIndex(coord[0]), Integer.parseInt(coord[1]) - 1);
        setFormatString(startElement, currentCell);

        Attribute type = startElement.getAttributeByName(new QName("t"));
        if (type != null) {
          currentCell.setType(type.getValue());
        } else {
          currentCell.setType("n");
        }

        Attribute style = startElement.getAttributeByName(new QName("s"));
        if (style != null) {
          String indexStr = style.getValue();
          try {
            int index = Integer.parseInt(indexStr);
            currentCell.setCellStyle(stylesTable.getStyleAt(index));
          } catch (NumberFormatException nfe) {
            LOGGER.warn("Ignoring invalid style index {}", indexStr);
          }
        }
        // we store the dimension as well to revert with this method when cols not found
        // can happen see xlsx attached here https://jira.talendforge.org/browse/TDP-1957
        // <dimension ref="A1:B60"/>
      } else if ("dimension".equals(tagLocalName)) {
        Attribute attribute = startElement.getAttributeByName(new QName("ref"));
        if (attribute != null) {
          this.dimension = attribute.getValue();
        }
      }

      // Clear contents cache
      lastContents = "";
    } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT) {
      EndElement endElement = event.asEndElement();
      String tagLocalName = endElement.getName().getLocalPart();

      if ("v".equals(tagLocalName) || "t".equals(tagLocalName)) {
        currentCell.setRawContents(unformattedContents());
        currentCell.setContents(formattedContents());
      } else if ("row".equals(tagLocalName) && currentRow != null) {
        rowCache.add(currentRow);
      } else if ("c".equals(tagLocalName)) {
        currentRow.getCellMap().put(currentCell.getColumnIndex(), currentCell);
      } else if ("cols".equals(tagLocalName)) {
        parsingCols = false;
      }
    }
  }
  @SuppressWarnings("null")
  public Feed readFeed() {
    Feed feed = null;
    try {

      boolean isFeedHeader = true;
      // Set header values intial to the empty string
      String description = "";
      String title = "";
      String link = "";
      String language = "";
      String copyright = "";
      String author = "";
      String pubdate = "";
      String guid = "";

      // First create a new XMLInputFactory
      XMLInputFactory inputFactory = XMLInputFactory.newInstance();
      // Setup a new eventReader
      InputStream in = read();
      XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
      // Read the XML document
      while (eventReader.hasNext()) {

        XMLEvent event = eventReader.nextEvent();

        if (event.isStartElement()) {
          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(ITEM)) {
            if (isFeedHeader) {
              isFeedHeader = false;
              feed = new Feed(title, link, description, language, copyright, pubdate);
            }
            event = eventReader.nextEvent();
            continue;
          }

          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(TITLE)) {
            event = eventReader.nextEvent();
            title = event.asCharacters().getData();
            continue;
          }
          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(DESCRIPTION)) {
            event = eventReader.nextEvent();
            description = event.asCharacters().getData();
            continue;
          }

          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(LINK)) {
            event = eventReader.nextEvent();
            // System.out.println("Teh hack: " + event.toString() + event.getClass());
            Object chars = event.asCharacters();
            if (chars instanceof javax.xml.stream.events.Characters) {
              javax.xml.stream.events.Characters jchars =
                  (javax.xml.stream.events.Characters) chars;
              link = jchars.getData();
            } else {
              link = event.asCharacters().getData();
            }
            continue;
          }

          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(GUID)) {
            event = eventReader.nextEvent();
            guid = event.asCharacters().getData();
            continue;
          }
          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(LANGUAGE)) {
            event = eventReader.nextEvent();
            language = event.asCharacters().getData();
            continue;
          }
          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(AUTHOR)) {
            event = eventReader.nextEvent();
            author = event.asCharacters().getData();
            continue;
          }
          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(PUB_DATE)) {
            event = eventReader.nextEvent();
            pubdate = event.asCharacters().getData();
            continue;
          }
          if (event.asStartElement().getName().getLocalPart() != null
              && event.asStartElement().getName().getLocalPart().equals(COPYRIGHT)) {
            event = eventReader.nextEvent();
            copyright = event.asCharacters().getData();
            continue;
          }
        } else if (event.isEndElement()) {
          if (event.asEndElement().getName().getLocalPart() != null
              && event.asEndElement().getName().getLocalPart().equals(ITEM)) {
            FeedMessage message = new FeedMessage();
            message.setAuthor(author);
            message.setDescription(description);
            message.setGuid(guid);
            message.setLink(link);
            message.setTitle(title);
            feed.getMessages().add(message);
            event = eventReader.nextEvent();
            continue;
          }
        }
      }
    } catch (XMLStreamException e) {
      throw new RuntimeException(e);
    }
    return feed;
  }