Java Document.getAnnotations примеры использования

Язык программирования: Java

Класс/Тип: Document

Метод/Функция: getAnnotations

Примеров на hotexamples.com: 4

Java Document.getAnnotations - 4 примера найдено. Это лучшие примеры Java кода для Document.getAnnotations, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

createElement(30)

createTextNode(30)

select(30)

get(30)

insertString(30)

getTextLength(30)

getCharsSequence(30)

getText(30)

appendChild(30)

getRootElement(30)

getDocumentElement(30)

getLineStartOffset(30)

add(30)

getElementsByTagName(30)

getLineEndOffset(29)

getLength(23)

getLineNumber(22)

getLineCount(19)

createRangeMarker(18)

createElementNS(16)

getContent(15)

getElementById(15)

importNode(12)

text(12)

body(11)

save(11)

replaceString(10)

deleteString(10)

remove(10)

createDivElement(9)

getDefaultRootElement(8)

check(8)

getId(8)

replaceItemValue(7)

open(7)

getGamma(7)

close(7)

getNumberOfTypes(7)

getNumberOfTopics(7)

getNumberOfTokens(7)

getFirstChild(7)

getChildNodes(7)

getHtml(6)

html(6)

getBody(6)

getName(6)

getSentence(6)

annotate(6)

toString(6)

setRootElement(6)

Пример #1

Показать файл

Файл: GateDataStore.java Проект: JohnChang2014/EmailSummarization

 public ArrayList<String> getStringFromAnnotation(Object docID, String type)
     throws ResourceInstantiationException {
   ArrayList<String> strings = new ArrayList<String>();
   Document doc = getDocument(docID);
   for (Annotation annt : doc.getAnnotations().get(type)) strings.add(stringFor(doc, annt));
   return strings;
 }

Пример #2

Показать файл

Файл: TestEqual.java Проект: rafaelhss/analisador-dou

  /**
   * Checks two documents for equality.
   *
   * @param doc1 a document
   * @param doc2 another document
   * @return a boolean.
   */
  public static boolean documentsEqual(Document doc1, Document doc2) {
    message = "";
    if (doc1 == null ^ doc2 == null) {
      message = "Documents not equal: null<>non-null!";
      return false;
    }
    if (doc1 == null) return true;
    if (!check(doc1.getContent(), doc2.getContent())) {
      message = "Document contents different!";
      return false;
    }

    if (!check(doc1.getAnnotations(), doc2.getAnnotations())) {
      message = "Documents default AS not equal!";
      return false;
    }

    if (doc1 instanceof TextualDocument) {
      if (doc2 instanceof TextualDocument) {
        if (!check(
            ((TextualDocument) doc1).getEncoding(), ((TextualDocument) doc2).getEncoding())) {
          message = "Textual documents with different encodings!";
          return false;
        }
      } else {
        message = "Documents not equal: textual<>non-textual!";
        return false;
      }
    }
    if (!check(doc1.getFeatures(), doc2.getFeatures())) {
      message = "Documents features not equal!";
      return false;
    }

    // needs friend declaration :(
    //    if(!markupAware.equals(doc.markupAware)) return false;

    if (!check(doc1.getNamedAnnotationSets(), doc2.getNamedAnnotationSets())) {
      message = "Documents named annots not equal!";
      return false;
    }

    //    if(doc1 instanceof DocumentImpl){
    //      if(doc2 instanceof DocumentImpl){
    //        if(! check(((DocumentImpl)doc1).getNextNodeId(),
    //                   ((DocumentImpl)doc2).getNextNodeId())){
    //          message = "Documents next nodeID not equal!";
    //          return false;
    //        }
    //        if(! check(((DocumentImpl)doc1).getNextAnnotationId(),
    //                   ((DocumentImpl)doc2).getNextAnnotationId())){
    //          message = "Documents next annotationIDs not equal!";
    //          return false;
    //        }
    //      }else{
    //        message = "Documents not equal: DocumentImpl<>non-DocumentImpl!";
    //        return false;
    //      }
    //    }

    if (!check(doc1.getSourceUrl(), doc2.getSourceUrl())) {
      message = "Documents sourceURLs not equal!";
      return false;
    }
    if (!(check(doc1.getSourceUrlStartOffset(), doc2.getSourceUrlStartOffset())
        && check(doc1.getSourceUrlEndOffset(), doc2.getSourceUrlEndOffset()))) {
      message = "Documents sourceURLOffsets not equal!";
      return false;
    }
    return true;
  }

Пример #3

Показать файл

Файл: StandAloneAnnie.java Проект: ecohealthalliance/grits-gate

  /**
   * Run from the command-line, with a list of URLs as argument.
   *
   * <p><B>NOTE:</B><br>
   * This code will run with all the documents in memory - if you want to unload each from memory
   * after use, add code to store the corpus in a DataStore.
   */
  public static void main(String args[]) throws GateException, IOException {
    // initialise the GATE library
    Out.prln("Initialising GATE...");
    Gate.init();
    Out.prln("...GATE initialised");

    // initialise ANNIE (this may take several minutes)
    StandAloneAnnie annie = new StandAloneAnnie();
    annie.initAnnie();

    // create a GATE corpus and add a document for each command-line
    // argument
    Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus");
    for (int i = 0; i < args.length; i++) {
      URL u = new URL(args[i]);
      FeatureMap params = Factory.newFeatureMap();
      params.put("sourceUrl", u);
      params.put("preserveOriginalContent", new Boolean(true));
      params.put("collectRepositioningInfo", new Boolean(true));
      Out.prln("Creating doc for " + u);
      Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
      corpus.add(doc);
    } // for each of args

    // tell the pipeline about the corpus and run it
    annie.setCorpus(corpus);
    annie.execute();

    // for each document, get an XML document with the
    // person and location names added
    Iterator iter = corpus.iterator();
    int count = 0;
    String startTagPart_1 = "<span GateID=\"";
    String startTagPart_2 = "\" title=\"";
    String startTagPart_3 = "\" style=\"background:Red;\">";
    String endTag = "</span>";

    while (iter.hasNext()) {
      Document doc = (Document) iter.next();
      AnnotationSet defaultAnnotSet = doc.getAnnotations();
      Set annotTypesRequired = new HashSet();
      annotTypesRequired.add("Person");
      annotTypesRequired.add("Location");
      Set<Annotation> peopleAndPlaces =
          new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired));

      FeatureMap features = doc.getFeatures();
      String originalContent =
          (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
      RepositioningInfo info =
          (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);

      ++count;
      File file = new File("StANNIE_" + count + ".HTML");
      Out.prln("File name: '" + file.getAbsolutePath() + "'");
      if (originalContent != null && info != null) {
        Out.prln("OrigContent and reposInfo existing. Generate file...");

        Iterator it = peopleAndPlaces.iterator();
        Annotation currAnnot;
        SortedAnnotationList sortedAnnotations = new SortedAnnotationList();

        while (it.hasNext()) {
          currAnnot = (Annotation) it.next();
          sortedAnnotations.addSortedExclusive(currAnnot);
        } // while

        StringBuffer editableContent = new StringBuffer(originalContent);
        long insertPositionEnd;
        long insertPositionStart;
        // insert anotation tags backward
        Out.prln("Unsorted annotations count: " + peopleAndPlaces.size());
        Out.prln("Sorted annotations count: " + sortedAnnotations.size());
        for (int i = sortedAnnotations.size() - 1; i >= 0; --i) {
          currAnnot = (Annotation) sortedAnnotations.get(i);
          insertPositionStart = currAnnot.getStartNode().getOffset().longValue();
          insertPositionStart = info.getOriginalPos(insertPositionStart);
          insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
          insertPositionEnd = info.getOriginalPos(insertPositionEnd, true);
          if (insertPositionEnd != -1 && insertPositionStart != -1) {
            editableContent.insert((int) insertPositionEnd, endTag);
            editableContent.insert((int) insertPositionStart, startTagPart_3);
            editableContent.insert((int) insertPositionStart, currAnnot.getType());
            editableContent.insert((int) insertPositionStart, startTagPart_2);
            editableContent.insert((int) insertPositionStart, currAnnot.getId().toString());
            editableContent.insert((int) insertPositionStart, startTagPart_1);
          } // if
        } // for

        FileWriter writer = new FileWriter(file);
        writer.write(editableContent.toString());
        writer.close();
      } // if - should generate
      else if (originalContent != null) {
        Out.prln("OrigContent existing. Generate file...");

        Iterator it = peopleAndPlaces.iterator();
        Annotation currAnnot;
        SortedAnnotationList sortedAnnotations = new SortedAnnotationList();

        while (it.hasNext()) {
          currAnnot = (Annotation) it.next();
          sortedAnnotations.addSortedExclusive(currAnnot);
        } // while

        StringBuffer editableContent = new StringBuffer(originalContent);
        long insertPositionEnd;
        long insertPositionStart;
        // insert anotation tags backward
        Out.prln("Unsorted annotations count: " + peopleAndPlaces.size());
        Out.prln("Sorted annotations count: " + sortedAnnotations.size());
        for (int i = sortedAnnotations.size() - 1; i >= 0; --i) {
          currAnnot = (Annotation) sortedAnnotations.get(i);
          insertPositionStart = currAnnot.getStartNode().getOffset().longValue();
          insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
          if (insertPositionEnd != -1 && insertPositionStart != -1) {
            editableContent.insert((int) insertPositionEnd, endTag);
            editableContent.insert((int) insertPositionStart, startTagPart_3);
            editableContent.insert((int) insertPositionStart, currAnnot.getType());
            editableContent.insert((int) insertPositionStart, startTagPart_2);
            editableContent.insert((int) insertPositionStart, currAnnot.getId().toString());
            editableContent.insert((int) insertPositionStart, startTagPart_1);
          } // if
        } // for

        FileWriter writer = new FileWriter(file);
        writer.write(editableContent.toString());
        writer.close();
      } else {
        Out.prln("Content : " + originalContent);
        Out.prln("Repositioning: " + info);
      }

      String xmlDocument = doc.toXml(peopleAndPlaces, false);
      String fileName = new String("StANNIE_toXML_" + count + ".HTML");
      FileWriter writer = new FileWriter(fileName);
      writer.write(xmlDocument);
      writer.close();
    } // for each doc
  } // main

Пример #4

Показать файл

Файл: TextualDocumentFormat.java Проект: rafaelhss/analisador-dou

 /**
  * This method annotates paragraphs in a GATE document. The investigated text spans beetween start
  * and end offsets and the paragraph annotations are created in the annotSetName. If annotSetName
  * is null then they are creted in the default annotation set.
  *
  * @param aDoc is the gate document on which the paragraph detection would be performed.If it is
  *     null or its content it's null then the method woul simply return doing nothing.
  * @param startOffset is the index form the document content from which the paragraph detection
  *     will start
  * @param endOffset is the offset where the detection will end.
  * @param annotSetName is the name of the set in which paragraph annotation would be created.The
  *     annotation type created will be "paragraph"
  */
 public void annotateParagraphs(Document aDoc, int startOffset, int endOffset, String annotSetName)
     throws DocumentFormatException {
   // Simply return if the document is null or its content
   if (aDoc == null || aDoc.getContent() == null) return;
   // Simply return if the start is > than the end
   if (startOffset > endOffset) return;
   // Decide where to put the newly detected annotations
   AnnotationSet annotSet = null;
   if (annotSetName == null) annotSet = aDoc.getAnnotations();
   else annotSet = aDoc.getAnnotations(annotSetName);
   // Extract the document content
   String content = aDoc.getContent().toString();
   // This is the offset marking the start of a para
   int startOffsetPara = startOffset;
   // This marks the ned of a para
   int endOffsetPara = endOffset;
   // The initial sate of the FSA
   int state = 1;
   // This field marks that a BR entity was read
   // A BR entity can be NL or NL CR, depending on the operating system (UNIX
   // or DOS)
   boolean readBR = false;
   int index = startOffset;
   while (index < endOffset) {
     // Read the current char
     char ch = content.charAt(index);
     // Test if a BR entity was read
     if (ch == '\n') {
       readBR = true;
       // If \n is followed by a \r then advance the index in order to read a
       // BR entity
       while ((index + 1 < endOffset) && (content.charAt(index + 1) == '\r')) index++;
     } // End if
     switch (state) {
         // It is the initial and also a final state
         // Stay in state 1 while it reads whitespaces
       case 1:
         {
           // If reads a non whitespace char then move to state 2 and record
           // the beggining of a paragraph
           if (!Character.isWhitespace(ch)) {
             state = 2;
             startOffsetPara = index;
           } // End if
         }
         break;
         // It can be also a final state.
       case 2:
         {
           // Stay in state 2 while reading chars != BR entities
           if (readBR) {
             // If you find a BR char go to state 3. The possible end of the para
             // can be index. This will be confirmed by state 3. So, this is why
             // the end of a para is recorded here.
             readBR = false;
             endOffsetPara = index;
             state = 3;
           } // End if
         }
         break;
         // It can be also a final state
         // From state 3 there are only 2 possible ways: (state 2 or state1)
         // In state 1 it needs to read a BR
         // For state 2 it nead to read something different then a BR
       case 3:
         {
           if (readBR) {
             // A BR was read. Go to state 1
             readBR = false;
             state = 1;
             // Create an annotation type paragraph
             try {
               annotSet.add(
                   new Long(startOffsetPara),
                   new Long(endOffsetPara),
                   "paragraph",
                   Factory.newFeatureMap());
             } catch (gate.util.InvalidOffsetException ioe) {
               throw new DocumentFormatException(
                   "Coudn't create a paragraph" + " annotation", ioe);
             } // End try
           } else {
             // Go to state 2 an keep reading chars
             state = 2;
           } // End if
         }
         break;
     } // End switch
     // Prepare to read the next char.
     index++;
   } // End while
   endOffsetPara = index;
   // Investigate where the finite automata has stoped
   if (state == 2 || state == 3) {
     // Create an annotation type paragraph
     try {
       annotSet.add(
           new Long(startOffsetPara),
           // Create the final annotation using the endOffset
           new Long(endOffsetPara),
           "paragraph",
           Factory.newFeatureMap());
     } catch (gate.util.InvalidOffsetException ioe) {
       throw new DocumentFormatException("Coudn't create a paragraph" + " annotation", ioe);
     } // End try
   } // End if
 } // End annotateParagraphs();