@SuppressWarnings("unchecked") public static AnnotationDiffer computeDiffWithDocFeatures( Document document, List<String> featureNames, AnnotationSet responsesAnnotations) { FeatureMap doc_fm = document.getFeatures(); // Logger log = Logger.getLogger(DocumentFeaturesDiff.class); int correct = 0; int missing = 0; int spurious = 0; for (String feature_name : featureNames) { // int cur_correct = 0; List<String> f = (List<String>) doc_fm.get(feature_name); if (f == null) { f = (List<String>) doc_fm.get(feature_name + "s"); } AnnotationDiffer diff = computeDiffWithGoldStandardDataForSingleFeature( feature_name, Utils.setFromList(f), responsesAnnotations); spurious += diff.getSpurious(); correct += diff.getCorrectMatches(); missing += diff.getMissing(); } return new AnnotationDifferDocumentFeaturesImpl(correct, missing, spurious); }
public void tokenize() { AnnotationSet tokenizationAs = gateDocument.getAnnotations("Tokenization"); AnnotationSet defaultAs = gateDocument.getAnnotations(""); for (Iterator<Annotation> it = tokenizationAs.iterator(); it.hasNext(); ) { Annotation currentTokenAnnotation = it.next(); FeatureMap tokenFeaturesMap = currentTokenAnnotation.getFeatures(); FeatureMap curFeaturesMap = Factory.newFeatureMap(); if ("Token".compareToIgnoreCase(currentTokenAnnotation.getType()) == 0) { curFeaturesMap.put("string", tokenFeaturesMap.get("string")); curFeaturesMap.put("root", tokenFeaturesMap.get("lemma")); curFeaturesMap.put("category", tokenFeaturesMap.get("POS")); // Add the new Token to the Annotation Set defaultAs.add( currentTokenAnnotation.getStartNode(), currentTokenAnnotation.getEndNode(), currentTokenAnnotation.getType(), curFeaturesMap); } } gateDocument.removeAnnotationSet("Tokenization"); }
public JSONObject persian_sentiment(String text) throws Exception { oncreate(); File PersianGapp = new File("C:/Users/mohammad/Desktop/New folder/Gate/application.xgapp"); // initialise GATE - this must be done before calling any GATE APIs Gate.init(); // load the saved application CorpusController application = (CorpusController) PersistenceManager.loadObjectFromFile(PersianGapp); // Create a Corpus to use. We recycle the same Corpus object for each // iteration. The string parameter to newCorpus() is simply the // GATE-internal name to use for the corpus. It has no particular // significance. Corpus corpus = Factory.newCorpus("BatchProcessApp Corpus"); application.setCorpus(corpus); // process the files one by one // load the document (using the specified encoding if one was given) Document doc = Factory.newDocument(text); // put the document in the corpus corpus.add(doc); // run the application application.execute(); String featureName = "Doc_sentiment"; FeatureMap features = doc.getFeatures(); // remove the document from the corpus again corpus.clear(); // doc.getFeatures(). // Release the document, as it is no longer needed Factory.deleteResource(doc); LinkedHashMap originalContent = (LinkedHashMap) features.get(featureName); String obj = (String) originalContent.get("sentiment"); // BigDecimal pos =(BigDecimal) originalContent.get("positive"); // BigDecimal neg =(BigDecimal) originalContent.get("negative"); // System.out.println(obj); // create Json for response to user JSONObject obj1 = new JSONObject(); obj1.put("sentiment", obj); /*obj1.put("positive",pos); //obj1.put("negative",neg); System.out.print("----------"); System.out.print(obj1); System.out.print("----------");*/ // application.cleanup(); return obj1; }
@Test public void testAddFeatureStemmingEnabled() { Annotation mockedAnnot1 = Mockito.mock(Annotation.class); Annotation mockedAnnot2 = Mockito.mock(Annotation.class); FeatureMap mockedMap1 = Mockito.mock(FeatureMap.class); FeatureMap mockedMap2 = Mockito.mock(FeatureMap.class); Node startNode = Mockito.mock(Node.class); Node endNode = Mockito.mock(Node.class); String wholeSentence = "First Second Third Fourth."; Mockito.when(startNode.getOffset()).thenReturn((long) 0); Mockito.when(endNode.getOffset()).thenReturn((long) 11); Mockito.when(mockedAnnot1.getFeatures()).thenReturn(mockedMap1); Mockito.when(mockedMap1.get("string")).thenReturn("First"); Mockito.when(mockedMap1.get("stem")).thenReturn("stem1"); Mockito.when(mockedAnnot1.getStartNode()).thenReturn(startNode); Mockito.when(mockedAnnot2.getFeatures()).thenReturn(mockedMap2); Mockito.when(mockedMap2.get("string")).thenReturn("Second"); Mockito.when(mockedMap2.get("stem")).thenReturn("stem2"); Mockito.when(mockedAnnot2.getEndNode()).thenReturn(endNode); Document gateDocument = Mockito.mock(Document.class); Mockito.when(gateDocument.getName()).thenReturn("doc1"); ArrayList<Annotation> featureAnnots = new ArrayList<Annotation>(); featureAnnots.add(mockedAnnot1); featureAnnots.add(mockedAnnot2); Mockito.when(options.isEnableStemming()).thenReturn(true); String featureString = "First Second"; String featureStem = "stem1 stem2"; featureContainer.addFeature(featureAnnots, wholeSentence, gateDocument, "content"); Assert.assertTrue(featureContainer.getFeatureDictionary().get(featureString) != null); Assert.assertTrue(featureContainer.getFeatureStorage().get(featureStem) != null); }
/** * @param inputAS input annotation set * @param outputAS output annotation set * @param term String matched * @param startOffset match start offset * @param endOffset match end offset */ private void addLookup( AnnotationSet inputAS, AnnotationSet outputAS, String term, String outputASType, Long startOffset, Long endOffset, boolean useNounChunk) { if (useNounChunk && nounChunkType != null && !nounChunkType.isEmpty()) { AnnotationSet nounChunkAS = inputAS.getCovering(nounChunkType, startOffset, endOffset); if (!nounChunkAS.isEmpty()) { startOffset = nounChunkAS.firstNode().getOffset(); endOffset = nounChunkAS.lastNode().getOffset(); } } try { AnnotationSet diseaseAS = inputAS.get(outputASType, startOffset, endOffset); if (diseaseAS.isEmpty()) { FeatureMap fm = Factory.newFeatureMap(); fm.put("match", term); outputAS.add(startOffset, endOffset, outputASType, fm); } else { Annotation disease = diseaseAS.iterator().next(); FeatureMap fm = disease.getFeatures(); String meta = (String) fm.get("match"); if (meta != null) { meta = meta + " " + term; } fm.put("match", meta); } } catch (InvalidOffsetException ie) { // shouldn't happen gate.util.Err.println(ie); } }
/** * Run from the command-line, with a list of URLs as argument. * * <p><B>NOTE:</B><br> * This code will run with all the documents in memory - if you want to unload each from memory * after use, add code to store the corpus in a DataStore. */ public static void main(String args[]) throws GateException, IOException { // initialise the GATE library Out.prln("Initialising GATE..."); Gate.init(); Out.prln("...GATE initialised"); // initialise ANNIE (this may take several minutes) StandAloneAnnie annie = new StandAloneAnnie(); annie.initAnnie(); // create a GATE corpus and add a document for each command-line // argument Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus"); for (int i = 0; i < args.length; i++) { URL u = new URL(args[i]); FeatureMap params = Factory.newFeatureMap(); params.put("sourceUrl", u); params.put("preserveOriginalContent", new Boolean(true)); params.put("collectRepositioningInfo", new Boolean(true)); Out.prln("Creating doc for " + u); Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params); corpus.add(doc); } // for each of args // tell the pipeline about the corpus and run it annie.setCorpus(corpus); annie.execute(); // for each document, get an XML document with the // person and location names added Iterator iter = corpus.iterator(); int count = 0; String startTagPart_1 = "<span GateID=\""; String startTagPart_2 = "\" title=\""; String startTagPart_3 = "\" style=\"background:Red;\">"; String endTag = "</span>"; while (iter.hasNext()) { Document doc = (Document) iter.next(); AnnotationSet defaultAnnotSet = doc.getAnnotations(); Set annotTypesRequired = new HashSet(); annotTypesRequired.add("Person"); annotTypesRequired.add("Location"); Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired)); FeatureMap features = doc.getFeatures(); String originalContent = (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); RepositioningInfo info = (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); ++count; File file = new File("StANNIE_" + count + ".HTML"); Out.prln("File name: '" + file.getAbsolutePath() + "'"); if (originalContent != null && info != null) { Out.prln("OrigContent and reposInfo existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionStart = info.getOriginalPos(insertPositionStart); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); insertPositionEnd = info.getOriginalPos(insertPositionEnd, true); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } // if - should generate else if (originalContent != null) { Out.prln("OrigContent existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } else { Out.prln("Content : " + originalContent); Out.prln("Repositioning: " + info); } String xmlDocument = doc.toXml(peopleAndPlaces, false); String fileName = new String("StANNIE_toXML_" + count + ".HTML"); FileWriter writer = new FileWriter(fileName); writer.write(xmlDocument); writer.close(); } // for each doc } // main