/** External TMX - level 1. Skip text inside inline tags. */ protected void parseSegExtLevel1() throws Exception { segContent.setLength(0); int inlineLevel = 0; while (true) { XMLEvent e = xml.nextEvent(); switch (e.getEventType()) { case XMLEvent.START_ELEMENT: inlineLevel++; break; case XMLEvent.END_ELEMENT: inlineLevel--; EndElement eEnd = (EndElement) e; if ("seg".equals(eEnd.getName().getLocalPart())) { return; } break; case XMLEvent.CHARACTERS: if (inlineLevel == 0) { Characters c = (Characters) e; segContent.append(c.getData()); } break; } } }
/** * This test was inspired by an actual bug in one of implementations: initial state was not * properly set if nextTag() was called (instead of nextEvent()), and subsequent peek() failed. */ public void testPeek() throws XMLStreamException { String XML = "<root>text</root>"; for (int i = 0; i < 4; ++i) { boolean ns = (i & 1) != 0; boolean coal = (i & 2) != 0; XMLEventReader er = getReader(XML, ns, coal); assertTokenType(START_DOCUMENT, er.nextEvent().getEventType()); XMLEvent tag = er.nextTag(); assertTokenType(START_ELEMENT, tag.getEventType()); // Now, peek() should produce text.. XMLEvent text = er.peek(); assertTokenType(CHARACTERS, text.getEventType()); Characters chars = text.asCharacters(); assertNotNull(chars); assertEquals("text", chars.getData()); // and need nextEvent() to get rid of it, too: text = er.nextEvent(); // Let's verify it again: assertTokenType(CHARACTERS, text.getEventType()); chars = text.asCharacters(); assertNotNull(chars); assertEquals("text", chars.getData()); assertTokenType(END_ELEMENT, er.nextTag().getEventType()); assertTokenType(END_DOCUMENT, er.nextEvent().getEventType()); // And at the end, peek() should return null assertNull(er.peek()); } }
public DataMetadata parse(XMLEventReader xmlEventReader, StartElement start) throws GraphIOException { try { // Create the new port. DataMetadata data = new DataMetadata(); // Parse the attributes. @SuppressWarnings("unchecked") Iterator<Attribute> iterator = start.getAttributes(); while (iterator.hasNext()) { Attribute attribute = (Attribute) iterator.next(); String name = attribute.getName().getLocalPart(); String value = attribute.getValue(); if (data.getKey() == null && GraphMLConstants.KEY_NAME.equals(name)) { data.setKey(value); } } // Make sure the key has been set. if (data.getKey() == null) { throw new GraphIOException("Element 'data' is missing attribute 'key'"); } while (xmlEventReader.hasNext()) { XMLEvent event = xmlEventReader.nextEvent(); if (event.isStartElement()) { StartElement element = (StartElement) event; // Treat any child elements as unknown getUnknownParser().parse(xmlEventReader, element); } if (event.isCharacters()) { Characters characters = (Characters) event; data.setValue(characters.getData()); } if (event.isEndElement()) { EndElement end = (EndElement) event; verifyMatch(start, end); break; } } return data; } catch (Exception e) { ExceptionConverter.convert(e); } return null; }
/** OmegaT TMX - just read full text. */ protected void parseSegOmegaT() throws Exception { segContent.setLength(0); while (true) { XMLEvent e = xml.nextEvent(); switch (e.getEventType()) { case XMLEvent.END_ELEMENT: EndElement eEnd = (EndElement) e; if ("seg".equals(eEnd.getName().getLocalPart())) { return; } break; case XMLEvent.CHARACTERS: Characters c = (Characters) e; segContent.append(c.getData()); break; } } }
protected void parseNote(StartElement element) throws Exception { noteContent.setLength(0); while (true) { XMLEvent e = xml.nextEvent(); switch (e.getEventType()) { case XMLEvent.END_ELEMENT: EndElement eEnd = (EndElement) e; if ("note".equals(eEnd.getName().getLocalPart())) { currentTu.note = noteContent.toString(); return; } break; case XMLEvent.CHARACTERS: Characters c = (Characters) e; noteContent.append(c.getData()); break; } } }
protected void parseProp(StartElement element) throws Exception { String propType = getAttributeValue(element, "type"); propContent.setLength(0); while (true) { XMLEvent e = xml.nextEvent(); switch (e.getEventType()) { case XMLEvent.END_ELEMENT: EndElement eEnd = (EndElement) e; if ("prop".equals(eEnd.getName().getLocalPart())) { currentTu.props.add(new TMXProp(propType, propContent.toString())); return; } break; case XMLEvent.CHARACTERS: Characters c = (Characters) e; propContent.append(c.getData()); break; } } }
private boolean isEqual(Characters e1, Characters e2) { if (bopt) return isEqual(e1.getData().trim(), e2.getData().trim()); else return isEqual(e1.getData(), e2.getData()); }
private boolean isBlank(Characters e) { if (e.isIgnorableWhiteSpace()) return true; return Util.isBlank(e.getData()); }
/** External TMX - level 2. Replace all tags into shortcuts. */ protected void parseSegExtLevel2() throws Exception { segContent.setLength(0); segInlineTag.setLength(0); inlineTagHandler.reset(); int inlineLevel = 0; while (true) { XMLEvent e = xml.nextEvent(); switch (e.getEventType()) { case XMLEvent.START_ELEMENT: StartElement eStart = e.asStartElement(); if ("hi".equals(eStart.getName().getLocalPart())) { // tag should be skipped break; } inlineLevel++; segInlineTag.setLength(0); if ("bpt".equals(eStart.getName().getLocalPart())) { inlineTagHandler.startBPT( getAttributeValue(eStart, "i"), getAttributeValue(eStart, "x")); inlineTagHandler.setTagShortcutLetter( StringUtil.getFirstLetterLowercase(getAttributeValue(eStart, "type"))); } else if ("ept".equals(eStart.getName().getLocalPart())) { inlineTagHandler.startEPT(getAttributeValue(eStart, "i")); } else if ("it".equals(eStart.getName().getLocalPart())) { inlineTagHandler.startOTHER(); inlineTagHandler.setOtherTagShortcutLetter( StringUtil.getFirstLetterLowercase(getAttributeValue(eStart, "type"))); inlineTagHandler.setCurrentPos(getAttributeValue(eStart, "pos")); } else if ("ph".equals(eStart.getName().getLocalPart())) { inlineTagHandler.startOTHER(); inlineTagHandler.setOtherTagShortcutLetter( StringUtil.getFirstLetterLowercase(getAttributeValue(eStart, "type"))); } else { inlineTagHandler.startOTHER(); } break; case XMLEvent.END_ELEMENT: EndElement eEnd = e.asEndElement(); if ("hi".equals(eEnd.getName().getLocalPart())) { // tag should be skipped break; } inlineLevel--; if ("seg".equals(eEnd.getName().getLocalPart())) { return; } boolean slashBefore = false; boolean slashAfter = false; char tagName = StringUtil.getFirstLetterLowercase(segInlineTag); Integer tagN; if ("bpt".equals(eEnd.getName().getLocalPart())) { if (tagName != 0) { inlineTagHandler.setTagShortcutLetter(tagName); } else { tagName = inlineTagHandler.getTagShortcutLetter(); } tagN = inlineTagHandler.endBPT(); } else if ("ept".equals(eEnd.getName().getLocalPart())) { slashBefore = true; tagName = inlineTagHandler.getTagShortcutLetter(); tagN = inlineTagHandler.endEPT(); } else if ("it".equals(eEnd.getName().getLocalPart())) { if (tagName != 0) { inlineTagHandler.setOtherTagShortcutLetter(tagName); } else { tagName = inlineTagHandler.getOtherTagShortcutLetter(); } tagN = inlineTagHandler.endOTHER(); if ("end".equals(inlineTagHandler.getCurrentPos())) { slashBefore = true; } } else if ("ph".equals(eEnd.getName().getLocalPart())) { if (tagName != 0) { inlineTagHandler.setOtherTagShortcutLetter(tagName); } else { tagName = inlineTagHandler.getOtherTagShortcutLetter(); } tagN = inlineTagHandler.endOTHER(); if (useSlash) { slashAfter = true; } } else { tagN = inlineTagHandler.endOTHER(); if (useSlash) { slashAfter = true; } } if (tagName == 0) { tagName = 'f'; } if (tagN == null) { // check error of TMX reading Log.logErrorRB( "TMX_ERROR_READING_LEVEL2", e.getLocation().getLineNumber(), e.getLocation().getColumnNumber()); errorsCount++; segContent.setLength(0); // wait for end seg while (true) { XMLEvent ev = xml.nextEvent(); switch (ev.getEventType()) { case XMLEvent.END_ELEMENT: EndElement evEnd = (EndElement) ev; if ("seg".equals(evEnd.getName().getLocalPart())) { return; } } } } segContent.append('<'); if (slashBefore) { segContent.append('/'); } segContent.append(tagName); segContent.append(Integer.toString(tagN)); if (slashAfter) { segContent.append('/'); } segContent.append('>'); break; case XMLEvent.CHARACTERS: Characters c = (Characters) e; if (inlineLevel == 0) { segContent.append(c.getData()); } else { segInlineTag.append(c.getData()); } break; } } }
@Override public boolean isIgnorableWhiteSpace() { return delegate.isIgnorableWhiteSpace(); }
@Override public boolean isCData() { return delegate.isCData(); }
/** * Handles a Stream event. * * @param event * @throws SAXException */ private void handleEvent(XMLEvent event) throws SAXException { if (event.getEventType() == XMLStreamConstants.CHARACTERS) { Characters c = event.asCharacters(); lastContents += c.getData(); } else if (event.getEventType() == XMLStreamConstants.START_ELEMENT) { StartElement startElement = event.asStartElement(); String tagLocalName = startElement.getName().getLocalPart(); if ("row".equals(tagLocalName)) { Attribute rowIndex = startElement.getAttributeByName(new QName("r")); if (firstRowIndex == -1) { firstRowIndex = Integer.parseInt(rowIndex.getValue()); } currentRow = new StreamingRow(Integer.parseInt(rowIndex.getValue()) - 1); } else if ("cols".equals(tagLocalName)) { parsingCols = true; } else if ("col".equals(tagLocalName) && parsingCols) { colNumber = colNumber + 1; } else if ("c".equals(tagLocalName)) { Attribute ref = startElement.getAttributeByName(new QName("r")); String[] coord = ref.getValue().split("(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)"); currentCell = new StreamingCell( CellReference.convertColStringToIndex(coord[0]), Integer.parseInt(coord[1]) - 1); setFormatString(startElement, currentCell); Attribute type = startElement.getAttributeByName(new QName("t")); if (type != null) { currentCell.setType(type.getValue()); } else { currentCell.setType("n"); } Attribute style = startElement.getAttributeByName(new QName("s")); if (style != null) { String indexStr = style.getValue(); try { int index = Integer.parseInt(indexStr); currentCell.setCellStyle(stylesTable.getStyleAt(index)); } catch (NumberFormatException nfe) { LOGGER.warn("Ignoring invalid style index {}", indexStr); } } // we store the dimension as well to revert with this method when cols not found // can happen see xlsx attached here https://jira.talendforge.org/browse/TDP-1957 // <dimension ref="A1:B60"/> } else if ("dimension".equals(tagLocalName)) { Attribute attribute = startElement.getAttributeByName(new QName("ref")); if (attribute != null) { this.dimension = attribute.getValue(); } } // Clear contents cache lastContents = ""; } else if (event.getEventType() == XMLStreamConstants.END_ELEMENT) { EndElement endElement = event.asEndElement(); String tagLocalName = endElement.getName().getLocalPart(); if ("v".equals(tagLocalName) || "t".equals(tagLocalName)) { currentCell.setRawContents(unformattedContents()); currentCell.setContents(formattedContents()); } else if ("row".equals(tagLocalName) && currentRow != null) { rowCache.add(currentRow); } else if ("c".equals(tagLocalName)) { currentRow.getCellMap().put(currentCell.getColumnIndex(), currentCell); } else if ("cols".equals(tagLocalName)) { parsingCols = false; } } }
@SuppressWarnings("null") public Feed readFeed() { Feed feed = null; try { boolean isFeedHeader = true; // Set header values intial to the empty string String description = ""; String title = ""; String link = ""; String language = ""; String copyright = ""; String author = ""; String pubdate = ""; String guid = ""; // First create a new XMLInputFactory XMLInputFactory inputFactory = XMLInputFactory.newInstance(); // Setup a new eventReader InputStream in = read(); XMLEventReader eventReader = inputFactory.createXMLEventReader(in); // Read the XML document while (eventReader.hasNext()) { XMLEvent event = eventReader.nextEvent(); if (event.isStartElement()) { if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(ITEM)) { if (isFeedHeader) { isFeedHeader = false; feed = new Feed(title, link, description, language, copyright, pubdate); } event = eventReader.nextEvent(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(TITLE)) { event = eventReader.nextEvent(); title = event.asCharacters().getData(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(DESCRIPTION)) { event = eventReader.nextEvent(); description = event.asCharacters().getData(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(LINK)) { event = eventReader.nextEvent(); // System.out.println("Teh hack: " + event.toString() + event.getClass()); Object chars = event.asCharacters(); if (chars instanceof javax.xml.stream.events.Characters) { javax.xml.stream.events.Characters jchars = (javax.xml.stream.events.Characters) chars; link = jchars.getData(); } else { link = event.asCharacters().getData(); } continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(GUID)) { event = eventReader.nextEvent(); guid = event.asCharacters().getData(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(LANGUAGE)) { event = eventReader.nextEvent(); language = event.asCharacters().getData(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(AUTHOR)) { event = eventReader.nextEvent(); author = event.asCharacters().getData(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(PUB_DATE)) { event = eventReader.nextEvent(); pubdate = event.asCharacters().getData(); continue; } if (event.asStartElement().getName().getLocalPart() != null && event.asStartElement().getName().getLocalPart().equals(COPYRIGHT)) { event = eventReader.nextEvent(); copyright = event.asCharacters().getData(); continue; } } else if (event.isEndElement()) { if (event.asEndElement().getName().getLocalPart() != null && event.asEndElement().getName().getLocalPart().equals(ITEM)) { FeedMessage message = new FeedMessage(); message.setAuthor(author); message.setDescription(description); message.setGuid(guid); message.setLink(link); message.setTitle(title); feed.getMessages().add(message); event = eventReader.nextEvent(); continue; } } } } catch (XMLStreamException e) { throw new RuntimeException(e); } return feed; }