Ejemplo n.º 1
0
 private static Element createResults0(Element element) {
   Element newElement = null;
   String tag = element.getLocalName();
   if (ResultsElement.TAG.equals(tag)) {
     newElement = new ResultsElement();
   } else if (ResultElement.TAG.equals(tag)) {
     newElement = new ResultElement();
   } else {
     LOG.error("Unknown element: " + tag);
   }
   XMLUtil.copyAttributes(element, newElement);
   for (int i = 0; i < element.getChildCount(); i++) {
     Node child = element.getChild(i);
     if (child instanceof Text) {
       child = child.copy();
     } else {
       child = ResultsElement.createResults0((Element) child);
     }
     if (newElement != null && child != null) {
       newElement.appendChild(child);
     }
   }
   LOG.trace("XML :" + newElement.toXML());
   return newElement;
 }
Ejemplo n.º 2
0
 private String childrenToString(Element e) {
   if (e == null) {
     return null;
   }
   String ret = "";
   for (int i = 0; i < e.getChildCount(); i++) {
     Node n = e.getChild(i);
     if (n instanceof Element) {
       ret += getText((Element) n) + ",";
     }
   }
   return ret.substring(0, ret.length() - 1);
 }
  /**
   * Unmarshal the text element into this object.
   *
   * <p>This unmarshaller only handles plain text content, although it can recognise the three
   * different type elements of text, html and xhtml. This is an area that can be improved in a
   * future implementation, if necessary.
   *
   * @param element The text element.
   * @param validationProperties
   * @throws UnmarshallException If the specified element is not of the correct type, where the
   *     localname is used to specify the valid name. Also thrown if there is an issue accessing the
   *     data.
   */
  public SwordValidationInfo unmarshall(Element element, Properties validationProperties)
      throws UnmarshallException {
    if (!isInstanceOf(element, xmlName)) {
      return handleIncorrectElement(element, validationProperties);
    }

    ArrayList<SwordValidationInfo> validationItems = new ArrayList<SwordValidationInfo>();
    ArrayList<SwordValidationInfo> attributeItems = new ArrayList<SwordValidationInfo>();

    try {
      processUnexpectedAttributes(element, attributeItems);

      int length = element.getChildCount();
      if (length > 0) {
        try {
          unmarshallContent(element);
        } catch (UnmarshallException ume) {
          log.error(
              "Error accessing the content of the " + xmlName.getQualifiedName() + "  element");
          if (validationProperties == null) {
            throw ume;
          } else {
            SwordValidationInfo info =
                new SwordValidationInfo(
                    xmlName, SwordValidationInfo.ERROR_WITH_CONTENT, SwordValidationInfoType.ERROR);
            info.setContentDescription(element.getValue());
            validationItems.add(info);
          }
        }
      }

    } catch (Exception ex) {
      log.error("Unable to parse an element in " + getQualifiedName() + ": " + ex.getMessage());
      if (validationProperties == null) {
        throw new UnmarshallException("Unable to parse an element in " + getQualifiedName(), ex);
      }
    }

    SwordValidationInfo result = null;
    if (validationProperties != null) {
      result = validate(validationItems, attributeItems, validationProperties);
    }
    return result;
  }
 protected static void createSubclassedChildren(
     Element oldElement, AbstractEditorElement newElement) {
   if (oldElement != null) {
     for (int i = 0; i < oldElement.getChildCount(); i++) {
       Node node = oldElement.getChild(i);
       Node newNode = null;
       if (node instanceof Text) {
         String value = node.getValue();
         newNode = new Text(value);
       } else if (node instanceof Comment) {
         newNode = new Comment(node.getValue());
       } else if (node instanceof ProcessingInstruction) {
         newNode = new ProcessingInstruction((ProcessingInstruction) node);
       } else if (node instanceof Element) {
         newNode = createEditorElement((Element) node);
       } else {
         throw new RuntimeException("Cannot create new node: " + node.getClass());
       }
       newElement.appendChild(newNode);
     }
   }
 }
  /**
   * @param text
   * @param validate
   * @return
   * @throws org.purl.sword.base.UnmarshallException
   */
  public SwordValidationInfo unmarshall(Element text, Properties validationProperties)
      throws UnmarshallException {
    if (!isInstanceOf(text, xmlName)) {
      return handleIncorrectElement(text, validationProperties);
    }

    ArrayList<SwordValidationInfo> validationItems = new ArrayList<SwordValidationInfo>();
    ArrayList<SwordValidationInfo> attributeItems = new ArrayList<SwordValidationInfo>();

    try {
      initialise();

      // get the attributes
      int attributeCount = text.getAttributeCount();
      Attribute attribute = null;
      for (int i = 0; i < attributeCount; i++) {
        attribute = text.getAttribute(i);
        if (ATTRIBUTE_TYPE.equals(attribute.getQualifiedName())) {
          boolean success = true;
          String value = attribute.getValue();
          if (ContentType.TEXT.toString().equals(value)) {
            type = ContentType.TEXT;
          } else if (ContentType.HTML.toString().equals(value)) {
            type = ContentType.HTML;
          } else if (ContentType.XHTML.toString().equals(value)) {
            type = ContentType.XHTML;
          } else {
            log.error("Unable to parse extract type in " + getQualifiedName());
            SwordValidationInfo info =
                new SwordValidationInfo(
                    xmlName,
                    new XmlName(attribute),
                    "Invalid content type has been specified",
                    SwordValidationInfoType.ERROR);
            info.setContentDescription(value);
            attributeItems.add(info);
            success = false;
          }

          if (success) {
            SwordValidationInfo info = new SwordValidationInfo(xmlName, new XmlName(attribute));
            info.setContentDescription(type.toString());
            attributeItems.add(info);
          }
        } else {
          SwordValidationInfo info =
              new SwordValidationInfo(
                  xmlName,
                  new XmlName(attribute),
                  SwordValidationInfo.UNKNOWN_ATTRIBUTE,
                  SwordValidationInfoType.INFO);
          info.setContentDescription(attribute.getValue());
          attributeItems.add(info);
        }
      }

      // retrieve all of the sub-elements
      int length = text.getChildCount();
      if (length > 0) {
        content = unmarshallString(text);
      }

    } catch (Exception ex) {
      log.error("Unable to parse an element in " + getQualifiedName() + ": " + ex.getMessage());
      throw new UnmarshallException("Unable to parse an element in " + getQualifiedName(), ex);
    }

    SwordValidationInfo result = null;
    if (validationProperties != null) {
      result = validate(validationItems, attributeItems, validationProperties);
    }
    return result;
  }
Ejemplo n.º 6
0
 private static List<CoreMap> toTimexCoreMaps(Element docElem, CoreMap originalDocument) {
   // --Collect Token Offsets
   HashMap<Integer, Integer> beginMap = new HashMap<Integer, Integer>();
   HashMap<Integer, Integer> endMap = new HashMap<Integer, Integer>();
   boolean haveTokenOffsets = true;
   for (CoreMap sent : originalDocument.get(CoreAnnotations.SentencesAnnotation.class)) {
     for (CoreLabel token : sent.get(CoreAnnotations.TokensAnnotation.class)) {
       Integer tokBegin = token.get(CoreAnnotations.TokenBeginAnnotation.class);
       Integer tokEnd = token.get(CoreAnnotations.TokenEndAnnotation.class);
       if (tokBegin == null || tokEnd == null) {
         haveTokenOffsets = false;
       }
       int charBegin = token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
       int charEnd = token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
       beginMap.put(charBegin, tokBegin);
       endMap.put(charEnd, tokEnd);
     }
   }
   // --Set Timexes
   List<CoreMap> timexMaps = new ArrayList<CoreMap>();
   int offset = 0;
   Element textElem = docElem.getFirstChildElement("text");
   for (int i = 0; i < textElem.getChildCount(); i++) {
     Node content = textElem.getChild(i);
     if (content instanceof Text) {
       Text text = (Text) content;
       offset += text.getValue().length();
     } else if (content instanceof Element) {
       Element child = (Element) content;
       if (child.getLocalName().equals("TIMEX3")) {
         Timex timex = new Timex(child);
         if (child.getChildCount() != 1) {
           throw new RuntimeException("TIMEX3 should only contain text " + child);
         }
         String timexText = child.getValue();
         CoreMap timexMap = new ArrayCoreMap();
         // (timex)
         timexMap.set(TimexAnnotation.class, timex);
         // (text)
         timexMap.set(CoreAnnotations.TextAnnotation.class, timexText);
         // (characters)
         int charBegin = offset;
         timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, charBegin);
         offset += timexText.length();
         int charEnd = offset;
         timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, charEnd);
         // (tokens)
         if (haveTokenOffsets) {
           Integer tokBegin = beginMap.get(charBegin);
           int searchStep = 1; // if no exact match, search around the character offset
           while (tokBegin == null) {
             tokBegin = beginMap.get(charBegin - searchStep);
             if (tokBegin == null) {
               tokBegin = beginMap.get(charBegin + searchStep);
             }
             searchStep += 1;
           }
           searchStep = 1;
           Integer tokEnd = endMap.get(charEnd);
           while (tokEnd == null) {
             tokEnd = endMap.get(charEnd - searchStep);
             if (tokEnd == null) {
               tokEnd = endMap.get(charEnd + searchStep);
             }
             searchStep += 1;
           }
           timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokBegin);
           timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokEnd);
         }
         // (add)
         timexMaps.add(timexMap);
       } else {
         throw new RuntimeException("unexpected element " + child);
       }
     } else {
       throw new RuntimeException("unexpected content " + content);
     }
   }
   return timexMaps;
 }