@SuppressWarnings("unchecked")
  public static AnnotationDiffer computeDiffWithDocFeatures(
      Document document, List<String> featureNames, AnnotationSet responsesAnnotations) {
    FeatureMap doc_fm = document.getFeatures();
    // Logger log = Logger.getLogger(DocumentFeaturesDiff.class);

    int correct = 0;
    int missing = 0;
    int spurious = 0;

    for (String feature_name : featureNames) {
      // int cur_correct = 0;

      List<String> f = (List<String>) doc_fm.get(feature_name);
      if (f == null) {
        f = (List<String>) doc_fm.get(feature_name + "s");
      }

      AnnotationDiffer diff =
          computeDiffWithGoldStandardDataForSingleFeature(
              feature_name, Utils.setFromList(f), responsesAnnotations);

      spurious += diff.getSpurious();
      correct += diff.getCorrectMatches();
      missing += diff.getMissing();
    }

    return new AnnotationDifferDocumentFeaturesImpl(correct, missing, spurious);
  }
  public void tokenize() {
    AnnotationSet tokenizationAs = gateDocument.getAnnotations("Tokenization");
    AnnotationSet defaultAs = gateDocument.getAnnotations("");

    for (Iterator<Annotation> it = tokenizationAs.iterator(); it.hasNext(); ) {

      Annotation currentTokenAnnotation = it.next();
      FeatureMap tokenFeaturesMap = currentTokenAnnotation.getFeatures();
      FeatureMap curFeaturesMap = Factory.newFeatureMap();

      if ("Token".compareToIgnoreCase(currentTokenAnnotation.getType()) == 0) {
        curFeaturesMap.put("string", tokenFeaturesMap.get("string"));
        curFeaturesMap.put("root", tokenFeaturesMap.get("lemma"));
        curFeaturesMap.put("category", tokenFeaturesMap.get("POS"));

        // Add the new Token to the Annotation Set

        defaultAs.add(
            currentTokenAnnotation.getStartNode(),
            currentTokenAnnotation.getEndNode(),
            currentTokenAnnotation.getType(),
            curFeaturesMap);
      }
    }
    gateDocument.removeAnnotationSet("Tokenization");
  }
Exemplo n.º 3
0
  public JSONObject persian_sentiment(String text) throws Exception {

    oncreate();

    File PersianGapp = new File("C:/Users/mohammad/Desktop/New folder/Gate/application.xgapp");
    // initialise GATE - this must be done before calling any GATE APIs
    Gate.init();

    // load the saved application

    CorpusController application =
        (CorpusController) PersistenceManager.loadObjectFromFile(PersianGapp);

    // Create a Corpus to use.  We recycle the same Corpus object for each
    // iteration.  The string parameter to newCorpus() is simply the
    // GATE-internal name to use for the corpus.  It has no particular
    // significance.
    Corpus corpus = Factory.newCorpus("BatchProcessApp Corpus");
    application.setCorpus(corpus);

    // process the files one by one

    // load the document (using the specified encoding if one was given)

    Document doc = Factory.newDocument(text);

    // put the document in the corpus
    corpus.add(doc);

    // run the application
    application.execute();

    String featureName = "Doc_sentiment";
    FeatureMap features = doc.getFeatures();
    // remove the document from the corpus again
    corpus.clear();

    // doc.getFeatures().
    // Release the document, as it is no longer needed
    Factory.deleteResource(doc);

    LinkedHashMap originalContent = (LinkedHashMap) features.get(featureName);

    String obj = (String) originalContent.get("sentiment");
    // BigDecimal pos =(BigDecimal) originalContent.get("positive");
    // BigDecimal neg =(BigDecimal) originalContent.get("negative");
    // System.out.println(obj);
    // create Json for response to user
    JSONObject obj1 = new JSONObject();
    obj1.put("sentiment", obj);
    /*obj1.put("positive",pos);
    //obj1.put("negative",neg);
    System.out.print("----------");
    System.out.print(obj1);
    System.out.print("----------");*/
    // application.cleanup();
    return obj1;
  }
  @Test
  public void testAddFeatureStemmingEnabled() {
    Annotation mockedAnnot1 = Mockito.mock(Annotation.class);
    Annotation mockedAnnot2 = Mockito.mock(Annotation.class);
    FeatureMap mockedMap1 = Mockito.mock(FeatureMap.class);
    FeatureMap mockedMap2 = Mockito.mock(FeatureMap.class);
    Node startNode = Mockito.mock(Node.class);
    Node endNode = Mockito.mock(Node.class);

    String wholeSentence = "First Second Third Fourth.";

    Mockito.when(startNode.getOffset()).thenReturn((long) 0);
    Mockito.when(endNode.getOffset()).thenReturn((long) 11);

    Mockito.when(mockedAnnot1.getFeatures()).thenReturn(mockedMap1);
    Mockito.when(mockedMap1.get("string")).thenReturn("First");
    Mockito.when(mockedMap1.get("stem")).thenReturn("stem1");
    Mockito.when(mockedAnnot1.getStartNode()).thenReturn(startNode);

    Mockito.when(mockedAnnot2.getFeatures()).thenReturn(mockedMap2);
    Mockito.when(mockedMap2.get("string")).thenReturn("Second");
    Mockito.when(mockedMap2.get("stem")).thenReturn("stem2");
    Mockito.when(mockedAnnot2.getEndNode()).thenReturn(endNode);

    Document gateDocument = Mockito.mock(Document.class);
    Mockito.when(gateDocument.getName()).thenReturn("doc1");

    ArrayList<Annotation> featureAnnots = new ArrayList<Annotation>();
    featureAnnots.add(mockedAnnot1);
    featureAnnots.add(mockedAnnot2);

    Mockito.when(options.isEnableStemming()).thenReturn(true);

    String featureString = "First Second";
    String featureStem = "stem1 stem2";
    featureContainer.addFeature(featureAnnots, wholeSentence, gateDocument, "content");

    Assert.assertTrue(featureContainer.getFeatureDictionary().get(featureString) != null);
    Assert.assertTrue(featureContainer.getFeatureStorage().get(featureStem) != null);
  }
  /**
   * @param inputAS input annotation set
   * @param outputAS output annotation set
   * @param term String matched
   * @param startOffset match start offset
   * @param endOffset match end offset
   */
  private void addLookup(
      AnnotationSet inputAS,
      AnnotationSet outputAS,
      String term,
      String outputASType,
      Long startOffset,
      Long endOffset,
      boolean useNounChunk) {
    if (useNounChunk && nounChunkType != null && !nounChunkType.isEmpty()) {
      AnnotationSet nounChunkAS = inputAS.getCovering(nounChunkType, startOffset, endOffset);
      if (!nounChunkAS.isEmpty()) {
        startOffset = nounChunkAS.firstNode().getOffset();
        endOffset = nounChunkAS.lastNode().getOffset();
      }
    }
    try {
      AnnotationSet diseaseAS = inputAS.get(outputASType, startOffset, endOffset);
      if (diseaseAS.isEmpty()) {
        FeatureMap fm = Factory.newFeatureMap();
        fm.put("match", term);
        outputAS.add(startOffset, endOffset, outputASType, fm);
      } else {
        Annotation disease = diseaseAS.iterator().next();
        FeatureMap fm = disease.getFeatures();
        String meta = (String) fm.get("match");
        if (meta != null) {
          meta = meta + " " + term;
        }
        fm.put("match", meta);
      }

    } catch (InvalidOffsetException ie) {
      // shouldn't happen
      gate.util.Err.println(ie);
    }
  }
  /**
   * Run from the command-line, with a list of URLs as argument.
   *
   * <p><B>NOTE:</B><br>
   * This code will run with all the documents in memory - if you want to unload each from memory
   * after use, add code to store the corpus in a DataStore.
   */
  public static void main(String args[]) throws GateException, IOException {
    // initialise the GATE library
    Out.prln("Initialising GATE...");
    Gate.init();
    Out.prln("...GATE initialised");

    // initialise ANNIE (this may take several minutes)
    StandAloneAnnie annie = new StandAloneAnnie();
    annie.initAnnie();

    // create a GATE corpus and add a document for each command-line
    // argument
    Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus");
    for (int i = 0; i < args.length; i++) {
      URL u = new URL(args[i]);
      FeatureMap params = Factory.newFeatureMap();
      params.put("sourceUrl", u);
      params.put("preserveOriginalContent", new Boolean(true));
      params.put("collectRepositioningInfo", new Boolean(true));
      Out.prln("Creating doc for " + u);
      Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
      corpus.add(doc);
    } // for each of args

    // tell the pipeline about the corpus and run it
    annie.setCorpus(corpus);
    annie.execute();

    // for each document, get an XML document with the
    // person and location names added
    Iterator iter = corpus.iterator();
    int count = 0;
    String startTagPart_1 = "<span GateID=\"";
    String startTagPart_2 = "\" title=\"";
    String startTagPart_3 = "\" style=\"background:Red;\">";
    String endTag = "</span>";

    while (iter.hasNext()) {
      Document doc = (Document) iter.next();
      AnnotationSet defaultAnnotSet = doc.getAnnotations();
      Set annotTypesRequired = new HashSet();
      annotTypesRequired.add("Person");
      annotTypesRequired.add("Location");
      Set<Annotation> peopleAndPlaces =
          new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired));

      FeatureMap features = doc.getFeatures();
      String originalContent =
          (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
      RepositioningInfo info =
          (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);

      ++count;
      File file = new File("StANNIE_" + count + ".HTML");
      Out.prln("File name: '" + file.getAbsolutePath() + "'");
      if (originalContent != null && info != null) {
        Out.prln("OrigContent and reposInfo existing. Generate file...");

        Iterator it = peopleAndPlaces.iterator();
        Annotation currAnnot;
        SortedAnnotationList sortedAnnotations = new SortedAnnotationList();

        while (it.hasNext()) {
          currAnnot = (Annotation) it.next();
          sortedAnnotations.addSortedExclusive(currAnnot);
        } // while

        StringBuffer editableContent = new StringBuffer(originalContent);
        long insertPositionEnd;
        long insertPositionStart;
        // insert anotation tags backward
        Out.prln("Unsorted annotations count: " + peopleAndPlaces.size());
        Out.prln("Sorted annotations count: " + sortedAnnotations.size());
        for (int i = sortedAnnotations.size() - 1; i >= 0; --i) {
          currAnnot = (Annotation) sortedAnnotations.get(i);
          insertPositionStart = currAnnot.getStartNode().getOffset().longValue();
          insertPositionStart = info.getOriginalPos(insertPositionStart);
          insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
          insertPositionEnd = info.getOriginalPos(insertPositionEnd, true);
          if (insertPositionEnd != -1 && insertPositionStart != -1) {
            editableContent.insert((int) insertPositionEnd, endTag);
            editableContent.insert((int) insertPositionStart, startTagPart_3);
            editableContent.insert((int) insertPositionStart, currAnnot.getType());
            editableContent.insert((int) insertPositionStart, startTagPart_2);
            editableContent.insert((int) insertPositionStart, currAnnot.getId().toString());
            editableContent.insert((int) insertPositionStart, startTagPart_1);
          } // if
        } // for

        FileWriter writer = new FileWriter(file);
        writer.write(editableContent.toString());
        writer.close();
      } // if - should generate
      else if (originalContent != null) {
        Out.prln("OrigContent existing. Generate file...");

        Iterator it = peopleAndPlaces.iterator();
        Annotation currAnnot;
        SortedAnnotationList sortedAnnotations = new SortedAnnotationList();

        while (it.hasNext()) {
          currAnnot = (Annotation) it.next();
          sortedAnnotations.addSortedExclusive(currAnnot);
        } // while

        StringBuffer editableContent = new StringBuffer(originalContent);
        long insertPositionEnd;
        long insertPositionStart;
        // insert anotation tags backward
        Out.prln("Unsorted annotations count: " + peopleAndPlaces.size());
        Out.prln("Sorted annotations count: " + sortedAnnotations.size());
        for (int i = sortedAnnotations.size() - 1; i >= 0; --i) {
          currAnnot = (Annotation) sortedAnnotations.get(i);
          insertPositionStart = currAnnot.getStartNode().getOffset().longValue();
          insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
          if (insertPositionEnd != -1 && insertPositionStart != -1) {
            editableContent.insert((int) insertPositionEnd, endTag);
            editableContent.insert((int) insertPositionStart, startTagPart_3);
            editableContent.insert((int) insertPositionStart, currAnnot.getType());
            editableContent.insert((int) insertPositionStart, startTagPart_2);
            editableContent.insert((int) insertPositionStart, currAnnot.getId().toString());
            editableContent.insert((int) insertPositionStart, startTagPart_1);
          } // if
        } // for

        FileWriter writer = new FileWriter(file);
        writer.write(editableContent.toString());
        writer.close();
      } else {
        Out.prln("Content : " + originalContent);
        Out.prln("Repositioning: " + info);
      }

      String xmlDocument = doc.toXml(peopleAndPlaces, false);
      String fileName = new String("StANNIE_toXML_" + count + ".HTML");
      FileWriter writer = new FileWriter(fileName);
      writer.write(xmlDocument);
      writer.close();
    } // for each doc
  } // main