private static Element createResults0(Element element) { Element newElement = null; String tag = element.getLocalName(); if (ResultsElement.TAG.equals(tag)) { newElement = new ResultsElement(); } else if (ResultElement.TAG.equals(tag)) { newElement = new ResultElement(); } else { LOG.error("Unknown element: " + tag); } XMLUtil.copyAttributes(element, newElement); for (int i = 0; i < element.getChildCount(); i++) { Node child = element.getChild(i); if (child instanceof Text) { child = child.copy(); } else { child = ResultsElement.createResults0((Element) child); } if (newElement != null && child != null) { newElement.appendChild(child); } } LOG.trace("XML :" + newElement.toXML()); return newElement; }
private String childrenToString(Element e) { if (e == null) { return null; } String ret = ""; for (int i = 0; i < e.getChildCount(); i++) { Node n = e.getChild(i); if (n instanceof Element) { ret += getText((Element) n) + ","; } } return ret.substring(0, ret.length() - 1); }
/** * Unmarshal the text element into this object. * * <p>This unmarshaller only handles plain text content, although it can recognise the three * different type elements of text, html and xhtml. This is an area that can be improved in a * future implementation, if necessary. * * @param element The text element. * @param validationProperties * @throws UnmarshallException If the specified element is not of the correct type, where the * localname is used to specify the valid name. Also thrown if there is an issue accessing the * data. */ public SwordValidationInfo unmarshall(Element element, Properties validationProperties) throws UnmarshallException { if (!isInstanceOf(element, xmlName)) { return handleIncorrectElement(element, validationProperties); } ArrayList<SwordValidationInfo> validationItems = new ArrayList<SwordValidationInfo>(); ArrayList<SwordValidationInfo> attributeItems = new ArrayList<SwordValidationInfo>(); try { processUnexpectedAttributes(element, attributeItems); int length = element.getChildCount(); if (length > 0) { try { unmarshallContent(element); } catch (UnmarshallException ume) { log.error( "Error accessing the content of the " + xmlName.getQualifiedName() + " element"); if (validationProperties == null) { throw ume; } else { SwordValidationInfo info = new SwordValidationInfo( xmlName, SwordValidationInfo.ERROR_WITH_CONTENT, SwordValidationInfoType.ERROR); info.setContentDescription(element.getValue()); validationItems.add(info); } } } } catch (Exception ex) { log.error("Unable to parse an element in " + getQualifiedName() + ": " + ex.getMessage()); if (validationProperties == null) { throw new UnmarshallException("Unable to parse an element in " + getQualifiedName(), ex); } } SwordValidationInfo result = null; if (validationProperties != null) { result = validate(validationItems, attributeItems, validationProperties); } return result; }
protected static void createSubclassedChildren( Element oldElement, AbstractEditorElement newElement) { if (oldElement != null) { for (int i = 0; i < oldElement.getChildCount(); i++) { Node node = oldElement.getChild(i); Node newNode = null; if (node instanceof Text) { String value = node.getValue(); newNode = new Text(value); } else if (node instanceof Comment) { newNode = new Comment(node.getValue()); } else if (node instanceof ProcessingInstruction) { newNode = new ProcessingInstruction((ProcessingInstruction) node); } else if (node instanceof Element) { newNode = createEditorElement((Element) node); } else { throw new RuntimeException("Cannot create new node: " + node.getClass()); } newElement.appendChild(newNode); } } }
/** * @param text * @param validate * @return * @throws org.purl.sword.base.UnmarshallException */ public SwordValidationInfo unmarshall(Element text, Properties validationProperties) throws UnmarshallException { if (!isInstanceOf(text, xmlName)) { return handleIncorrectElement(text, validationProperties); } ArrayList<SwordValidationInfo> validationItems = new ArrayList<SwordValidationInfo>(); ArrayList<SwordValidationInfo> attributeItems = new ArrayList<SwordValidationInfo>(); try { initialise(); // get the attributes int attributeCount = text.getAttributeCount(); Attribute attribute = null; for (int i = 0; i < attributeCount; i++) { attribute = text.getAttribute(i); if (ATTRIBUTE_TYPE.equals(attribute.getQualifiedName())) { boolean success = true; String value = attribute.getValue(); if (ContentType.TEXT.toString().equals(value)) { type = ContentType.TEXT; } else if (ContentType.HTML.toString().equals(value)) { type = ContentType.HTML; } else if (ContentType.XHTML.toString().equals(value)) { type = ContentType.XHTML; } else { log.error("Unable to parse extract type in " + getQualifiedName()); SwordValidationInfo info = new SwordValidationInfo( xmlName, new XmlName(attribute), "Invalid content type has been specified", SwordValidationInfoType.ERROR); info.setContentDescription(value); attributeItems.add(info); success = false; } if (success) { SwordValidationInfo info = new SwordValidationInfo(xmlName, new XmlName(attribute)); info.setContentDescription(type.toString()); attributeItems.add(info); } } else { SwordValidationInfo info = new SwordValidationInfo( xmlName, new XmlName(attribute), SwordValidationInfo.UNKNOWN_ATTRIBUTE, SwordValidationInfoType.INFO); info.setContentDescription(attribute.getValue()); attributeItems.add(info); } } // retrieve all of the sub-elements int length = text.getChildCount(); if (length > 0) { content = unmarshallString(text); } } catch (Exception ex) { log.error("Unable to parse an element in " + getQualifiedName() + ": " + ex.getMessage()); throw new UnmarshallException("Unable to parse an element in " + getQualifiedName(), ex); } SwordValidationInfo result = null; if (validationProperties != null) { result = validate(validationItems, attributeItems, validationProperties); } return result; }
private static List<CoreMap> toTimexCoreMaps(Element docElem, CoreMap originalDocument) { // --Collect Token Offsets HashMap<Integer, Integer> beginMap = new HashMap<Integer, Integer>(); HashMap<Integer, Integer> endMap = new HashMap<Integer, Integer>(); boolean haveTokenOffsets = true; for (CoreMap sent : originalDocument.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sent.get(CoreAnnotations.TokensAnnotation.class)) { Integer tokBegin = token.get(CoreAnnotations.TokenBeginAnnotation.class); Integer tokEnd = token.get(CoreAnnotations.TokenEndAnnotation.class); if (tokBegin == null || tokEnd == null) { haveTokenOffsets = false; } int charBegin = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); int charEnd = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); beginMap.put(charBegin, tokBegin); endMap.put(charEnd, tokEnd); } } // --Set Timexes List<CoreMap> timexMaps = new ArrayList<CoreMap>(); int offset = 0; Element textElem = docElem.getFirstChildElement("text"); for (int i = 0; i < textElem.getChildCount(); i++) { Node content = textElem.getChild(i); if (content instanceof Text) { Text text = (Text) content; offset += text.getValue().length(); } else if (content instanceof Element) { Element child = (Element) content; if (child.getLocalName().equals("TIMEX3")) { Timex timex = new Timex(child); if (child.getChildCount() != 1) { throw new RuntimeException("TIMEX3 should only contain text " + child); } String timexText = child.getValue(); CoreMap timexMap = new ArrayCoreMap(); // (timex) timexMap.set(TimexAnnotation.class, timex); // (text) timexMap.set(CoreAnnotations.TextAnnotation.class, timexText); // (characters) int charBegin = offset; timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charBegin); offset += timexText.length(); int charEnd = offset; timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, charEnd); // (tokens) if (haveTokenOffsets) { Integer tokBegin = beginMap.get(charBegin); int searchStep = 1; // if no exact match, search around the character offset while (tokBegin == null) { tokBegin = beginMap.get(charBegin - searchStep); if (tokBegin == null) { tokBegin = beginMap.get(charBegin + searchStep); } searchStep += 1; } searchStep = 1; Integer tokEnd = endMap.get(charEnd); while (tokEnd == null) { tokEnd = endMap.get(charEnd - searchStep); if (tokEnd == null) { tokEnd = endMap.get(charEnd + searchStep); } searchStep += 1; } timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokBegin); timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokEnd); } // (add) timexMaps.add(timexMap); } else { throw new RuntimeException("unexpected element " + child); } } else { throw new RuntimeException("unexpected content " + content); } } return timexMaps; }