public static final String doCorefResolution(Annotation annotation) { Map<Integer, CorefChain> corefs = annotation.get(CorefChainAnnotation.class); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); List<String> resolved = new ArrayList<String>(); for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { Integer corefClustId = token.get(CorefCoreAnnotations.CorefClusterIdAnnotation.class); CorefChain chain = corefs.get(corefClustId); if (chain == null) resolved.add(token.word()); else { int sentINdx = chain.getRepresentativeMention().sentNum - 1; CoreMap corefSentence = sentences.get(sentINdx); List<CoreLabel> corefSentenceTokens = corefSentence.get(TokensAnnotation.class); CorefMention reprMent = chain.getRepresentativeMention(); if (token.index() < reprMent.startIndex || token.index() > reprMent.endIndex) { for (int i = reprMent.startIndex; i < reprMent.endIndex; i++) { CoreLabel matchedLabel = corefSentenceTokens.get(i - 1); resolved.add(matchedLabel.word()); } } else resolved.add(token.word()); } } } String resolvedStr = ""; System.out.println(); for (String str : resolved) { resolvedStr += str + " "; } System.out.println(resolvedStr); return resolvedStr; }
public static void main(String[] args) throws IOException, ClassNotFoundException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); boolean verbose = false; ap.addAnnotator(new TokenizerAnnotator(verbose, "en")); ap.addAnnotator(new WordsToSentencesAnnotator(verbose)); // ap.addAnnotator(new NERCombinerAnnotator(verbose)); // ap.addAnnotator(new OldNERAnnotator(verbose)); // ap.addAnnotator(new NERMergingAnnotator(verbose)); ap.addAnnotator(new ParserAnnotator(verbose, -1)); /** * ap.addAnnotator(new UpdateSentenceFromParseAnnotator(verbose)); ap.addAnnotator(new * NumberAnnotator(verbose)); ap.addAnnotator(new * QuantifiableEntityNormalizingAnnotator(verbose)); ap.addAnnotator(new * StemmerAnnotator(verbose)); ap.addAnnotator(new MorphaAnnotator(verbose)); */ // ap.addAnnotator(new SRLAnnotator()); String text = ("USAir said in the filings that Mr. Icahn first contacted Mr. Colodny last September to discuss the benefits of combining TWA and USAir -- either by TWA's acquisition of USAir, or USAir's acquisition of TWA."); Annotation a = new Annotation(text); ap.annotate(a); System.out.println(a.get(CoreAnnotations.TokensAnnotation.class)); for (CoreMap sentence : a.get(CoreAnnotations.SentencesAnnotation.class)) { System.out.println(sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); } if (TIME) { System.out.println(ap.timingInformation()); System.err.println("Total time for AnnotationPipeline: " + tim.toSecondsString() + " sec."); } }
public String resolveAnnotationType(Annotation annotation) { StringBuffer type = new StringBuffer(); AnnotationDefinition annotationDefinition = annotation.getAnnotationDefinition(); if (annotationDefinition == null) { logger.warn("Annotation definition for annotation: " + annotation + " is not defined."); return type.toString(); } if (annotationDefinition.isMarker()) { return type.toString(); } // finally we can process annotation members. Object memberValue; int memberCount = 0; for (AnnotationMemberDefinition memberDefinition : annotationDefinition.getAnnotationMembers()) { if ((memberValue = annotation.getValue(memberDefinition.getName())) != null) { // a value has been set for this member. if (memberCount == 0) type.append("("); if (memberCount > 0) type.append(", "); type.append(resolveMemberType(memberDefinition, memberValue)); memberCount++; } } if (memberCount > 0) type.append(")"); return type.toString(); }
public boolean addSortedExclusive(Annotation annot) { Annotation currAnot = null; // overlapping check for (int i = 0; i < size(); ++i) { currAnot = (Annotation) get(i); if (annot.overlaps(currAnot)) { return false; } // if } // for long annotStart = annot.getStartNode().getOffset().longValue(); long currStart; // insert for (int i = 0; i < size(); ++i) { currAnot = (Annotation) get(i); currStart = currAnot.getStartNode().getOffset().longValue(); if (annotStart < currStart) { insertElementAt(annot, i); /* Out.prln("Insert start: "+annotStart+" at position: "+i+" size="+size()); Out.prln("Current start: "+currStart); */ return true; } // if } // for int size = size(); insertElementAt(annot, size); // Out.prln("Insert start: "+annotStart+" at size position: "+size); return true; } // addSorted
/** splits document 'doc' into sentences, adding 'sentence' annotations */ static void addSentences(Document doc) { SpecialZoner.findSpecialZones(doc); Vector<Annotation> textSegments = doc.annotationsOfType("TEXT"); if (textSegments == null) { System.out.println("No <TEXT> in document"); return; } for (Annotation ann : textSegments) { Span textSpan = ann.span(); // check document case Ace.monocase = Ace.allLowerCase(doc); // split into sentences SentenceSplitter.split(doc, textSpan); } Vector<Annotation> sentences = doc.annotationsOfType("sentence"); if (sentences != null) { int sentNo = 0; for (Annotation sentence : sentences) { sentNo++; sentence.put("ID", "SENT-" + sentNo); } } doc.removeAnnotationsOfType("dateline"); doc.removeAnnotationsOfType("textBreak"); doc.shrink("sentence"); }
public static boolean hasAnnotation(Annotation[] anns, Class<? extends Annotation> clazz) { for (Annotation ann : anns) { if (clazz == ann.annotationType()) { return true; } } return false; }
/** * train the tagger using the DocumentCollection in file 'trainingCollection'. * 'trainingCollection' should consist of documents which have been explicitly tagged with * part-of-speech information. */ void train(String trainingCollection) { for (int i = 0; i < posTable.length; i++) tagTable[i] = new String[] {"constit", "cat", posTable[i], posTable[i]}; // build ergodic HMM with one state for each POS (plus start and end states) HMMstate startState = new HMMstate("start", "", WordFeatureHMMemitter.class); posh.addState(startState); for (int j = 0; j < posTable.length; j++) startState.addArc(new HMMarc(posTable[j], 0)); HMMstate endState = new HMMstate("end", "", WordFeatureHMMemitter.class); posh.addState(endState); for (int i = 0; i < posTable.length; i++) { String pos = posTable[i]; HMMstate state = new HMMstate(pos, pos, WordFeatureHMMemitter.class); posh.addState(state); for (int j = 0; j < posTable.length; j++) state.addArc(new HMMarc(posTable[j], 0)); state.addArc(new HMMarc("end", 0)); } posh.resolveNames(); posh.resetForTraining(); annotator = new HMMannotator(posh); annotator.setTagTable(tagTable); annotator.setBItag(false); DocumentCollection col = new DocumentCollection(trainingCollection); col.open(); for (int i = 0; i < col.size(); i++) { ExternalDocument doc = col.get(i); doc.open(); System.out.println("Training from " + doc.fileName()); // divide at endmarks (constit cat="."), adding "S" marks int posn = 0; int start = posn; Vector anns; while ((anns = doc.annotationsAt(posn, "constit")) != null) { Annotation ann = (Annotation) anns.get(0); posn = ann.span().end(); String pos = (String) ann.get("cat"); if (pos.equals(".")) { doc.annotate("S", new Span(start, posn), new FeatureSet()); start = posn; } } annotator.train(doc); // free up space taken by annotations on document doc.clearAnnotations(); } posh.computeProbabilities(); }
// Ang's suggestion on getting annotation values public static String getClassAnnotationValue( Class classType, Class annotationType, String attributeName) { String value = null; Annotation annotation = classType.getAnnotation(annotationType); if (annotation != null) { try { value = (String) annotation.annotationType().getMethod(attributeName).invoke(annotation); } catch (Exception ex) { System.out.println("Failed loading class annotations"); } } return value; }
static void writeDoc1(Document doc, PrintStream out) throws IOException { Vector<Annotation> entities = doc.annotationsOfType("entity"); if (entities == null) { System.err.println("No Entity: " + doc); return; } Iterator<Annotation> entityIt = entities.iterator(); int i = 0; while (entityIt.hasNext()) { Annotation entity = entityIt.next(); Vector mentions = (Vector) entity.get("mentions"); Iterator mentionIt = mentions.iterator(); String nameType = (String) entity.get("nameType"); while (mentionIt.hasNext()) { Annotation mention1 = (Annotation) mentionIt.next(); Annotation mention2 = new Annotation("refobj", mention1.span(), new FeatureSet()); mention2.put("objid", Integer.toString(i)); if (nameType != null) { mention2.put("netype", nameType); } doc.addAnnotation(mention2); } i++; } // remove other annotations. String[] annotypes = doc.getAnnotationTypes(); for (i = 0; i < annotypes.length; i++) { String t = annotypes[i]; if (!(t.equals("tagger") || t.equals("refobj") || t.equals("ENAMEX"))) { doc.removeAnnotationsOfType(t); } } writeDocRaw(doc, out); return; }
private void registerAnnotatedSlots() { for (DomainClass domClass : classes.values()) { for (Slot slot : domClass.getSlotsList()) { for (Annotation ann : slot.getAnnotations()) { List<AnnotatedSlot> annotatedSlotsList = this.annotatedSlots.get(ann.getName()); if (annotatedSlotsList == null) { annotatedSlotsList = new ArrayList<AnnotatedSlot>(); this.annotatedSlots.put(ann.getName(), annotatedSlotsList); } annotatedSlotsList.add(new AnnotatedSlot(domClass, slot)); } } } }
/** * Parses an annotation type definition * * @param docClass * @return */ protected static Annotation ParseAnnotation(ClassDoc docClass) { AnnotationTypeDoc docAnnotation = (AnnotationTypeDoc) docClass; assert (docAnnotation != null); Annotation xmlAnnotation = new Annotation(); xmlAnnotation.name = docClass.name(); xmlAnnotation.qualifiedName = docClass.qualifiedName(); xmlAnnotation.comment = docClass.commentText(); xmlAnnotation.isIncluded = docClass.isIncluded(); xmlAnnotation.scope = DetermineScope(docClass); AnnotationTypeElementDoc[] elements = docAnnotation.elements(); if (elements != null && elements.length > 0) { ArrayList<AnnotationElement> elementList = new ArrayList<AnnotationElement>(); for (AnnotationTypeElementDoc element : elements) { elementList.add(ParseAnnotationElement(element)); } xmlAnnotation.elements = elementList.toArray(new AnnotationElement[] {}); } else { log.debug("No elements in annotation: " + docClass.name()); } xmlAnnotation.annotationInstances = ParseAnnotationInstances(docClass.annotations(), docClass.qualifiedName()); return xmlAnnotation; }
/** * parse all the sentences in Document 'doc', returning a SyntacticRelationSet containing all the * dependency relations. */ public static SyntacticRelationSet parseDocument(Document doc) { Vector<Annotation> sentences = doc.annotationsOfType("sentence"); if (sentences == null || sentences.size() == 0) { System.out.println("DepParser: no sentences"); return null; } if (fsw == null) { System.out.println("DepParser: no model loaded"); return null; } SyntacticRelationSet relations = new SyntacticRelationSet(); for (Annotation sentence : sentences) { Span span = sentence.span(); parseSentence(doc, span, relations); } return relations; }
private void addAsTestedFieldIfApplicable(@NotNull Field fieldFromTestClass) { for (Annotation fieldAnnotation : fieldFromTestClass.getDeclaredAnnotations()) { Tested testedMetadata; if (fieldAnnotation instanceof Tested) { testedMetadata = (Tested) fieldAnnotation; } else { testedMetadata = fieldAnnotation.annotationType().getAnnotation(Tested.class); } if (testedMetadata != null) { TestedField testedField = new TestedField(injectionState, fieldFromTestClass, testedMetadata); testedFields.add(testedField); break; } } }
@Override public List<HashSet<ScoredTag>> preProcessOutput(List<HashSet<ScoredTag>> computedOutput) { try { Annotation.prefetchRedirectList(computedOutput, api); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } return computedOutput; }
private static Vector<Annotation> parseAnnotations(Node parent) throws XmlParserException { Vector<Annotation> annotations = new Vector<Annotation>(); NodeList nodes = parent.getChildNodes(); for (int nodeid = 0; nodeid < nodes.getLength(); ++nodeid) { Node node = nodes.item(nodeid); if (node.getNodeType() != Node.ELEMENT_NODE) continue; Element element = (Element) node; if (element.getTagName().equals("annotation")) { String label = null, value = null, valuetype = null, unit = null; NodeList annotation_nodes = element.getChildNodes(); for (int annotationid = 0; annotationid < annotation_nodes.getLength(); ++annotationid) { Node annotation_node = annotation_nodes.item(annotationid); if (annotation_node.getNodeType() != Node.ELEMENT_NODE) continue; Element annotation_element = (Element) annotation_node; if (annotation_element.getTagName().equals("label")) label = annotation_element.getTextContent(); else if (annotation_element.getTagName().equals("value")) value = annotation_element.getTextContent(); else if (annotation_element.getTagName().equals("valuetype")) valuetype = annotation_element.getTextContent(); } if (label == null || value == null || valuetype == null) throw new XmlParserException("Annotation is missing either: label, value or valuetype"); Annotation annotation = new Annotation(label, value, Annotation.ValueType.valueOf(valuetype)); annotation.setUnit(unit); if (annotation.getValueType() == Annotation.ValueType.ONTOLOGY) annotation.setOntologyRef(element.getAttribute("ontologyref")); if (element.getAttribute("unit") != null) annotation.setUnit(element.getAttribute("unit")); annotations.add(annotation); } } return annotations; }
@Override public void annotate(Annotation annotation) { if (verbose) { System.err.print("Adding TokensRegexNER annotations ... "); } List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); if (sentences != null) { for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); annotateMatched(tokens); } } else { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); if (tokens != null) { annotateMatched(tokens); } else { throw new RuntimeException("Unable to find sentences or tokens in " + annotation); } } if (verbose) System.err.println("done."); }
private void findAnnotation() { mPosition = null; mCanFix = false; IDocumentProvider provider = mTextEditor.getDocumentProvider(); IAnnotationModel model = provider.getAnnotationModel(mTextEditor.getEditorInput()); IAnnotationAccessExtension annotationAccess = getAnnotationAccessExtension(); IDocument document = getDocument(); if (model == null) { return; } Iterator<?> iter = model.getAnnotationIterator(); int layer = Integer.MIN_VALUE; while (iter.hasNext()) { Annotation annotation = (Annotation) iter.next(); if (annotation.isMarkedDeleted()) { continue; } int annotationLayer = Integer.MAX_VALUE; if (annotationAccess != null) { annotationLayer = annotationAccess.getLayer(annotation); if (annotationLayer < layer) { continue; } } Position position = model.getPosition(annotation); if (!includesRulerLine(position, document)) { continue; } boolean isReadOnly = mTextEditor instanceof ITextEditorExtension && ((ITextEditorExtension) mTextEditor).isEditorInputReadOnly(); if (!isReadOnly && annotation instanceof INIProblemAnnotation && ((INIProblemAnnotation) annotation).isQuickFixable()) { mPosition = position; mCanFix = true; layer = annotationLayer; continue; } else { AnnotationPreference preference = mAnnotationPreferenceLookup.getAnnotationPreference(annotation); if (preference == null) { continue; } String key = preference.getVerticalRulerPreferenceKey(); if (key == null) { continue; } if (mStore.getBoolean(key)) { mPosition = position; mCanFix = false; layer = annotationLayer; } } } }
/** * Run from the command-line, with a list of URLs as argument. * * <p><B>NOTE:</B><br> * This code will run with all the documents in memory - if you want to unload each from memory * after use, add code to store the corpus in a DataStore. */ public static void main(String args[]) throws GateException, IOException { // initialise the GATE library Out.prln("Initialising GATE..."); Gate.init(); Out.prln("...GATE initialised"); // initialise ANNIE (this may take several minutes) StandAloneAnnie annie = new StandAloneAnnie(); annie.initAnnie(); // create a GATE corpus and add a document for each command-line // argument Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus"); for (int i = 0; i < args.length; i++) { URL u = new URL(args[i]); FeatureMap params = Factory.newFeatureMap(); params.put("sourceUrl", u); params.put("preserveOriginalContent", new Boolean(true)); params.put("collectRepositioningInfo", new Boolean(true)); Out.prln("Creating doc for " + u); Document doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params); corpus.add(doc); } // for each of args // tell the pipeline about the corpus and run it annie.setCorpus(corpus); annie.execute(); // for each document, get an XML document with the // person and location names added Iterator iter = corpus.iterator(); int count = 0; String startTagPart_1 = "<span GateID=\""; String startTagPart_2 = "\" title=\""; String startTagPart_3 = "\" style=\"background:Red;\">"; String endTag = "</span>"; while (iter.hasNext()) { Document doc = (Document) iter.next(); AnnotationSet defaultAnnotSet = doc.getAnnotations(); Set annotTypesRequired = new HashSet(); annotTypesRequired.add("Person"); annotTypesRequired.add("Location"); Set<Annotation> peopleAndPlaces = new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired)); FeatureMap features = doc.getFeatures(); String originalContent = (String) features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME); RepositioningInfo info = (RepositioningInfo) features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME); ++count; File file = new File("StANNIE_" + count + ".HTML"); Out.prln("File name: '" + file.getAbsolutePath() + "'"); if (originalContent != null && info != null) { Out.prln("OrigContent and reposInfo existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionStart = info.getOriginalPos(insertPositionStart); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); insertPositionEnd = info.getOriginalPos(insertPositionEnd, true); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } // if - should generate else if (originalContent != null) { Out.prln("OrigContent existing. Generate file..."); Iterator it = peopleAndPlaces.iterator(); Annotation currAnnot; SortedAnnotationList sortedAnnotations = new SortedAnnotationList(); while (it.hasNext()) { currAnnot = (Annotation) it.next(); sortedAnnotations.addSortedExclusive(currAnnot); } // while StringBuffer editableContent = new StringBuffer(originalContent); long insertPositionEnd; long insertPositionStart; // insert anotation tags backward Out.prln("Unsorted annotations count: " + peopleAndPlaces.size()); Out.prln("Sorted annotations count: " + sortedAnnotations.size()); for (int i = sortedAnnotations.size() - 1; i >= 0; --i) { currAnnot = (Annotation) sortedAnnotations.get(i); insertPositionStart = currAnnot.getStartNode().getOffset().longValue(); insertPositionEnd = currAnnot.getEndNode().getOffset().longValue(); if (insertPositionEnd != -1 && insertPositionStart != -1) { editableContent.insert((int) insertPositionEnd, endTag); editableContent.insert((int) insertPositionStart, startTagPart_3); editableContent.insert((int) insertPositionStart, currAnnot.getType()); editableContent.insert((int) insertPositionStart, startTagPart_2); editableContent.insert((int) insertPositionStart, currAnnot.getId().toString()); editableContent.insert((int) insertPositionStart, startTagPart_1); } // if } // for FileWriter writer = new FileWriter(file); writer.write(editableContent.toString()); writer.close(); } else { Out.prln("Content : " + originalContent); Out.prln("Repositioning: " + info); } String xmlDocument = doc.toXml(peopleAndPlaces, false); String fileName = new String("StANNIE_toXML_" + count + ".HTML"); FileWriter writer = new FileWriter(fileName); writer.write(xmlDocument); writer.close(); } // for each doc } // main
/** * @param annotations * @return */ private List<Tag> handleAnnotation( Set<? extends org.jf.dexlib2.iface.Annotation> annotations, String classType) { if (annotations == null || annotations.size() == 0) return null; List<Tag> tags = new ArrayList<Tag>(); VisibilityAnnotationTag[] vatg = new VisibilityAnnotationTag[3]; // RUNTIME_VISIBLE, RUNTIME_INVISIBLE, SOURCE_VISIBLE, see // soot.tagkit.AnnotationConstants for (Annotation a : annotations) { int v = getVisibility(a.getVisibility()); Tag t = null; Type atype = DexType.toSoot(a.getType()); String atypes = atype.toString(); int eSize = a.getElements().size(); Debug.printDbg("annotation type: ", atypes, " elements: ", eSize); if (atypes.equals("dalvik.annotation.AnnotationDefault")) { if (eSize != 1) throw new RuntimeException( "error: expected 1 element for annotation Default. Got " + eSize + " instead."); // get element AnnotationElem e = getElements(a.getElements()).get(0); AnnotationTag adt = new AnnotationTag(a.getType()); adt.addElem(e); if (vatg[v] == null) vatg[v] = new VisibilityAnnotationTag(v); vatg[v].addAnnotation(adt); } else if (atypes.equals("dalvik.annotation.EnclosingClass")) { if (eSize != 1) throw new RuntimeException( "error: expected 1 element for annotation EnclosingClass. Got " + eSize + " instead."); for (AnnotationElement elem : a.getElements()) { String outerClass = ((TypeEncodedValue) elem.getValue()).getValue(); outerClass = Util.dottedClassName(outerClass); deps.typesToSignature.add(RefType.v(outerClass)); clazz.setOuterClass(SootResolver.v().makeClassRef(outerClass)); assert clazz.getOuterClass() != clazz; } // EnclosingClass comes in pair with InnerClass. // Those are generated from a single InnerClassTag, // that is re-constructed only for the InnerClass Dalvik // annotation. continue; } else if (atypes.equals("dalvik.annotation.EnclosingMethod")) { if (eSize != 1) throw new RuntimeException( "error: expected 1 element for annotation EnclosingMethod. Got " + eSize + " instead."); AnnotationStringElem e = (AnnotationStringElem) getElements(a.getElements()).get(0); String[] split1 = e.getValue().split("\\ \\|"); String classString = split1[0]; String methodString = split1[1]; String parameters = split1[2]; String returnType = split1[3]; String methodSigString = "(" + parameters + ")" + returnType; t = new EnclosingMethodTag(classString, methodString, methodSigString); String outerClass = classString.replace("/", "."); deps.typesToSignature.add(RefType.v(outerClass)); clazz.setOuterClass(SootResolver.v().makeClassRef(outerClass)); assert clazz.getOuterClass() != clazz; } else if (atypes.equals("dalvik.annotation.InnerClass")) { int accessFlags = -1; // access flags of the inner class String name = null; // name of the inner class for (AnnotationElem ele : getElements(a.getElements())) { if (ele instanceof AnnotationIntElem && ele.getName().equals("accessFlags")) accessFlags = ((AnnotationIntElem) ele).getValue(); else if (ele instanceof AnnotationStringElem && ele.getName().equals("name")) name = ((AnnotationStringElem) ele).getValue(); else throw new RuntimeException("Unexpected inner class annotation element"); } String outerClass; // outer class name if (name == null) outerClass = classType.replaceAll("\\$[0-9,a-z,A-Z]*;$", ";"); else outerClass = classType.replaceFirst("\\$" + name + ";$", ";"); // Make sure that no funny business is going on if the // annotation is broken and does not end in $nn. if (outerClass.equals(classType)) { outerClass = null; } Tag innerTag = new InnerClassTag( DexType.toSootICAT(classType), outerClass == null ? null : DexType.toSootICAT(outerClass), name, accessFlags); tags.add(innerTag); if (outerClass != null && !clazz.hasOuterClass()) { String sootOuterClass = Util.dottedClassName(outerClass); deps.typesToSignature.add(RefType.v(sootOuterClass)); clazz.setOuterClass(SootResolver.v().makeClassRef(sootOuterClass)); assert clazz.getOuterClass() != clazz; } continue; } else if (atypes.equals("dalvik.annotation.MemberClasses")) { AnnotationArrayElem e = (AnnotationArrayElem) getElements(a.getElements()).get(0); for (AnnotationElem ae : e.getValues()) { AnnotationClassElem c = (AnnotationClassElem) ae; String innerClass = c.getDesc(); String outerClass = innerClass.replaceAll("\\$[^\\$]*$", ""); String name = innerClass.replaceAll("^.*\\$", "").replaceAll(";$", ""); if (name.replaceAll("[0-9].*", "").equals("")) { // anonymous or local inner classes name = null; } int accessFlags = 0; // seems like this information is lost during the .class -- dx --> .dex process. Tag innerTag = new InnerClassTag( DexType.toSootICAT(innerClass), DexType.toSootICAT(outerClass), name, accessFlags); tags.add(innerTag); } continue; } else if (atypes.equals("dalvik.annotation.Signature")) { if (eSize != 1) throw new RuntimeException( "error: expected 1 element for annotation Signature. Got " + eSize + " instead."); AnnotationArrayElem e = (AnnotationArrayElem) getElements(a.getElements()).get(0); String sig = ""; for (AnnotationElem ae : e.getValues()) { AnnotationStringElem s = (AnnotationStringElem) ae; sig += s.getValue(); } t = new SignatureTag(sig); } else if (atypes.equals("dalvik.annotation.Throws")) { // this is handled in soot.dexpler.DexMethod continue; } else if (atypes.equals("java.lang.Deprecated")) { if (eSize != 0) throw new RuntimeException( "error: expected 1 element for annotation Deprecated. Got " + eSize + " instead."); t = new DeprecatedTag(); AnnotationTag adt = new AnnotationTag("Ljava/lang/Deprecated;"); if (vatg[v] == null) vatg[v] = new VisibilityAnnotationTag(v); vatg[v].addAnnotation(adt); } else { Debug.printDbg("read visibility tag: ", a.getType()); if (vatg[v] == null) vatg[v] = new VisibilityAnnotationTag(v); AnnotationTag tag = new AnnotationTag(a.getType()); for (AnnotationElem e : getElements(a.getElements())) tag.addElem(e); vatg[v].addAnnotation(tag); } tags.add(t); } for (VisibilityAnnotationTag vat : vatg) if (vat != null) tags.add(vat); return tags; }
/** * Retrieve a field. * * @param key field name * @return the field (even if the field does not exist you get a field) */ public Field field(String key) { // Value String fieldValue = null; if (data.containsKey(key)) { fieldValue = data.get(key); } else { if (value.isPresent()) { BeanWrapper beanWrapper = new BeanWrapperImpl(value.get()); beanWrapper.setAutoGrowNestedPaths(true); String objectKey = key; if (rootName != null && key.startsWith(rootName + ".")) { objectKey = key.substring(rootName.length() + 1); } if (beanWrapper.isReadableProperty(objectKey)) { Object oValue = beanWrapper.getPropertyValue(objectKey); if (oValue != null) { final String objectKeyFinal = objectKey; fieldValue = withRequestLocale( () -> formatters.print( beanWrapper.getPropertyTypeDescriptor(objectKeyFinal), oValue)); } } } } // Error List<ValidationError> fieldErrors = errors.get(key); if (fieldErrors == null) { fieldErrors = new ArrayList<>(); } // Format Tuple<String, List<Object>> format = null; BeanWrapper beanWrapper = new BeanWrapperImpl(blankInstance()); beanWrapper.setAutoGrowNestedPaths(true); try { for (Annotation a : beanWrapper.getPropertyTypeDescriptor(key).getAnnotations()) { Class<?> annotationType = a.annotationType(); if (annotationType.isAnnotationPresent(play.data.Form.Display.class)) { play.data.Form.Display d = annotationType.getAnnotation(play.data.Form.Display.class); if (d.name().startsWith("format.")) { List<Object> attributes = new ArrayList<>(); for (String attr : d.attributes()) { Object attrValue = null; try { attrValue = a.getClass().getDeclaredMethod(attr).invoke(a); } catch (Exception e) { // do nothing } attributes.add(attrValue); } format = Tuple(d.name(), attributes); } } } } catch (NullPointerException e) { // do nothing } // Constraints List<Tuple<String, List<Object>>> constraints = new ArrayList<>(); Class<?> classType = backedType; String leafKey = key; if (rootName != null && leafKey.startsWith(rootName + ".")) { leafKey = leafKey.substring(rootName.length() + 1); } int p = leafKey.lastIndexOf('.'); if (p > 0) { classType = beanWrapper.getPropertyType(leafKey.substring(0, p)); leafKey = leafKey.substring(p + 1); } if (classType != null) { BeanDescriptor beanDescriptor = play.data.validation.Validation.getValidator().getConstraintsForClass(classType); if (beanDescriptor != null) { PropertyDescriptor property = beanDescriptor.getConstraintsForProperty(leafKey); if (property != null) { constraints = Constraints.displayableConstraint(property.getConstraintDescriptors()); } } } return new Field(this, key, constraints, format, fieldErrors, fieldValue); }
public static void main(String[] args) { if (args.length != 2) { System.out.println( "This program expects two parameters: \n" + "1. Input predicted genes XML filename \n" + "2. Output GFF filename\n"); } else { String inFileString = args[0]; String outFileString = args[1]; File inFile = new File(inFileString); File outFile = new File(outFileString); try { BufferedWriter outBuff = new BufferedWriter(new FileWriter(outFile)); // writing header first outBuff.write(GFF_HEADER + "\n"); Date currentDate = new Date(); outBuff.write(DATE_HEADER + currentDate.toString() + "\n"); outBuff.write(TYPE_HEADER + "\n"); BufferedReader reader = new BufferedReader(new FileReader(inFile)); String tempSt; StringBuilder stBuilder = new StringBuilder(); while ((tempSt = reader.readLine()) != null) { stBuilder.append(tempSt); } // closing input file reader reader.close(); Annotation annotation = new Annotation(stBuilder.toString()); HashMap<String, TreeSet<GffLine>> linesPerContig = new HashMap<String, TreeSet<GffLine>>(); // -----------POTATIZING GENES---------------- List<Element> contigsGenes = annotation .asJDomElement() .getChild(PredictedGenes.TAG_NAME) .getChildren(ContigXML.TAG_NAME); for (Element element : contigsGenes) { ContigXML contig = new ContigXML(element); TreeSet<GffLine> lines = new TreeSet<GffLine>(); linesPerContig.put(contig.getId(), lines); List<XMLElement> genes = contig.getChildrenWith(PredictedGene.TAG_NAME); for (XMLElement xMLElement : genes) { PredictedGene gene = new PredictedGene(xMLElement.asJDomElement()); String geneLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + GENE + SEPARATOR; int beginPos = gene.getStartPosition(); int endPos = gene.getEndPosition(); int initPos = beginPos; if (beginPos < endPos) { geneLine += beginPos + SEPARATOR + endPos + SEPARATOR; } else { geneLine += endPos + SEPARATOR + beginPos + SEPARATOR; initPos = endPos; } geneLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "." + SEPARATOR + LOCUS_TAG + gene.getId() + ";\n"; lines.add(new GffLine(initPos, geneLine)); // outBuff.write(geneLine); String cdsLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + CDS + SEPARATOR; if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) { cdsLine += gene.getStartPosition() + SEPARATOR + (gene.getEndPosition() - 3) + SEPARATOR; } else { cdsLine += (gene.getEndPosition() - 3) + SEPARATOR + gene.getStartPosition() + SEPARATOR; } cdsLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR; cdsLine += LOCUS_TAG + gene.getId() + ";" + PRODUCT + gene.getProteinNames() + ";" + CHORIZO_INFERENCE + gene.getAccession() + "\n"; // outBuff.write(cdsLine); lines.add(new GffLine(initPos, cdsLine)); String startCodonLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + START_CODON + SEPARATOR; if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) { startCodonLine += gene.getStartPosition() + SEPARATOR + (gene.getStartPosition() + 2) + SEPARATOR; } else { startCodonLine += (gene.getStartPosition() - 2) + SEPARATOR + gene.getStartPosition() + SEPARATOR; } startCodonLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR + LOCUS_TAG + gene.getId() + ";"; startCodonLine += PRODUCT + gene.getProteinNames() + ";" + CHORIZO_INFERENCE + gene.getAccession() + "\n"; // outBuff.write(startCodonLine); lines.add(new GffLine(initPos, startCodonLine)); String stopCodonLine = contig.getId() + SEPARATOR + CHORIZO_GEN + SEPARATOR + STOP_CODON + SEPARATOR; if (gene.getStrand().equals(PredictedGene.POSITIVE_STRAND)) { stopCodonLine += (gene.getEndPosition() + 1) + SEPARATOR + (gene.getEndPosition() + 3) + SEPARATOR; } else { stopCodonLine += (gene.getEndPosition() - 3) + SEPARATOR + (gene.getEndPosition() - 1) + SEPARATOR; } stopCodonLine += gene.getEvalue() + SEPARATOR + gene.getStrand() + SEPARATOR + "0" + SEPARATOR + LOCUS_TAG + gene.getId() + ";"; stopCodonLine += PRODUCT + gene.getProteinNames() + ";" + CHORIZO_INFERENCE + gene.getAccession() + "\n"; // outBuff.write(stopCodonLine); lines.add(new GffLine(initPos, stopCodonLine)); } } // -----------POTATIZING RNAS----------------- List<Element> contigsRnas = annotation .asJDomElement() .getChild(PredictedRnas.TAG_NAME) .getChildren(ContigXML.TAG_NAME); for (Element element : contigsRnas) { ContigXML contig = new ContigXML(element); List<XMLElement> rnas = contig.getChildrenWith(PredictedRna.TAG_NAME); TreeSet<GffLine> lines = linesPerContig.get(contig.getId()); if (lines == null) { lines = new TreeSet<GffLine>(); linesPerContig.put(contig.getId(), lines); } for (XMLElement xMLElement : rnas) { PredictedRna rna = new PredictedRna(xMLElement.asJDomElement()); String rnaLine = contig.getId() + SEPARATOR + CHORIZO_RNA + SEPARATOR + RNA + SEPARATOR; int beginPos = rna.getStartPosition(); int endPos = rna.getEndPosition(); int initPos = beginPos; if (beginPos < endPos) { rnaLine += beginPos + SEPARATOR + endPos + SEPARATOR; } else { rnaLine += endPos + SEPARATOR + beginPos + SEPARATOR; initPos = endPos; } rnaLine += rna.getEvalue() + SEPARATOR + rna.getStrand() + SEPARATOR + "." + SEPARATOR + LOCUS_TAG + rna.getId() + ";"; String columns[] = rna.getAnnotationUniprotId().split("\\|"); String rnaProduct = columns[3]; String refSeqId = columns[1]; String positions = columns[2].substring(1); // ref|NC_007413|:3894075-3895562|16S ribosomal RNA| [locus_tag=Ava_R0035] rnaLine += PRODUCT + rnaProduct + "," + "rna:RefSeq:" + refSeqId + " " + positions + "\n"; // outBuff.write(rnaLine); lines.add(new GffLine(initPos, rnaLine)); } } Set<String> keys = linesPerContig.keySet(); for (String key : keys) { TreeSet<GffLine> lines = linesPerContig.get(key); GffLine line = lines.pollFirst(); while (line != null) { outBuff.write(line.getLine()); line = lines.pollFirst(); } } outBuff.close(); System.out.println("Done!!! :D"); } catch (Exception e) { e.printStackTrace(); } } }
/** generate the dependency parse for a sentence, adding its arcs to 'relations'. */ public static void parseSentence(Document doc, Span span, SyntacticRelationSet relations) { if (fsw == null) { System.out.println("DepParser: no model loaded"); return; } // System.out.println ("parseSentence: " + doc.text(span)); // run Penn part-of-speech tagger // JetTest.tagger.annotate(doc, span, "tagger"); // build sentence List<Token> tokens = new ArrayList<Token>(); List<Integer> offset = new ArrayList<Integer>(); offset.add(0); // don't use 0th entry int tokenNum = 0; int posn = span.start(); while (posn < span.end()) { tokenNum++; Annotation tokenAnnotation = doc.tokenAt(posn); for (String s : SPECIAL_TOKEN) { Vector<Annotation> va = doc.annotationsAt(posn, s); if (va != null && va.size() > 0) { tokenAnnotation = va.get(0); break; } } if (tokenAnnotation == null) return; String tokenText = doc.normalizedText(tokenAnnotation).replaceAll(" ", "_"); Vector v = doc.annotationsAt(posn, "tagger"); Annotation a = (Annotation) v.get(0); String pos = (String) a.get("cat"); tokens.add(new Token(tokenText, pos, tokenNum)); offset.add(posn); if (posn >= tokenAnnotation.end()) { break; } posn = tokenAnnotation.end(); } Sentence sent = new Sentence(tokens); // parse sentence Arc[] arcs = fsw.process( sent, tokens.size() > 0 && tokens.get(0).getPos() == null, true, true, true, true, true) .getParse() .getHeadArcs(); // get dependencies for (Arc arc : arcs) { if (arc == null) continue; if (arc.getDependency().equalsIgnoreCase("ROOT")) continue; Token head = arc.getHead(); String headText = head.getText(); String headPos = head.getPos(); Integer headOffset = offset.get(head.getIndex()); Token dep = arc.getChild(); String depText = dep.getText(); String depPos = dep.getPos(); Integer depOffset = offset.get(dep.getIndex()); String type = arc.getDependency(); SyntacticRelation r = new SyntacticRelation(headOffset, headText, headPos, type, depOffset, depText, depPos); relations.add(r); // System.out.println ("parseSentence: adding relation " + r); } }
/** Adds the annotations to the digest using a UTF8 encoding. */ private static void addDigest(MessageDigest digest, Annotation ann) { addDigest(digest, ann.annotationType().getName()); }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); Future<String> json = corenlpExecutor.submit( () -> { try { // Get the document Properties props = new Properties() { { setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,depparse"); } }; Annotation doc = getDocument(props, httpExchange); if (!doc.containsKey(CoreAnnotations.SentencesAnnotation.class)) { StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); pipeline.annotate(doc); } // Construct the matcher Map<String, String> params = getURLParams(httpExchange.getRequestURI()); // (get the pattern) if (!params.containsKey("pattern")) { respondError("Missing required parameter 'pattern'", httpExchange); return ""; } String pattern = params.get("pattern"); // (get whether to filter / find) String filterStr = params.getOrDefault("filter", "false"); final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); // (create the matcher) final SemgrexPattern regex = SemgrexPattern.compile(pattern); // Run TokensRegex return JSONOutputter.JSONWriter.objectToJSON( (docWriter) -> { if (filter) { // Case: just filter sentences docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> regex .matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)) .matches()) .collect(Collectors.toList())); } else { // Case: find matches docWriter.set( "sentences", doc.get(CoreAnnotations.SentencesAnnotation.class) .stream() .map( sentence -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer sentWriter) -> { SemgrexMatcher matcher = regex.matcher( sentence.get( SemanticGraphCoreAnnotations .CollapsedCCProcessedDependenciesAnnotation .class)); int i = 0; while (matcher.find()) { sentWriter.set( Integer.toString(i), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { IndexedWord match = matcher.getMatch(); matchWriter.set("text", match.word()); matchWriter.set( "begin", match.index() - 1); matchWriter.set("end", match.index()); for (String capture : matcher.getNodeNames()) { matchWriter.set( "$" + capture, (Consumer<JSONOutputter.Writer>) groupWriter -> { IndexedWord node = matcher.getNode( capture); groupWriter.set( "text", node.word()); groupWriter.set( "begin", node.index() - 1); groupWriter.set( "end", node.index()); }); } }); i += 1; } sentWriter.set("length", i); })); } }); } catch (Exception e) { e.printStackTrace(); try { respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } catch (IOException ignored) { } } return ""; }); // Send response byte[] response = new byte[0]; try { response = json.get(5, TimeUnit.SECONDS).getBytes(); } catch (InterruptedException | ExecutionException | TimeoutException e) { respondError("Timeout when executing Semgrex query", httpExchange); } if (response.length > 0) { httpExchange.getResponseHeaders().add("Content-Type", "text/json"); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } }
@Override public void handle(HttpExchange httpExchange) throws IOException { // Set common response headers httpExchange.getResponseHeaders().add("Access-Control-Allow-Origin", "*"); // Get sentence. Properties props; Annotation ann; StanfordCoreNLP.OutputFormat of; log("[" + httpExchange.getRemoteAddress() + "] Received message"); try { props = getProperties(httpExchange); ann = getDocument(props, httpExchange); of = StanfordCoreNLP.OutputFormat.valueOf( props.getProperty("outputFormat", "json").toUpperCase()); // Handle direct browser connections (i.e., not a POST request). if (ann.get(CoreAnnotations.TextAnnotation.class).length() == 0) { log("[" + httpExchange.getRemoteAddress() + "] Interactive connection"); staticPageHandle.handle(httpExchange); return; } log("[" + httpExchange.getRemoteAddress() + "] API call"); } catch (Exception e) { // Return error message. e.printStackTrace(); String response = e.getMessage(); httpExchange.getResponseHeaders().add("Content-Type", "text/plain"); httpExchange.sendResponseHeaders(HTTP_BAD_INPUT, response.length()); httpExchange.getResponseBody().write(response.getBytes()); httpExchange.close(); return; } try { // Annotate StanfordCoreNLP pipeline = mkStanfordCoreNLP(props); Future<Annotation> completedAnnotationFuture = corenlpExecutor.submit( () -> { pipeline.annotate(ann); return ann; }); Annotation completedAnnotation = completedAnnotationFuture.get(5, TimeUnit.SECONDS); // Get output ByteArrayOutputStream os = new ByteArrayOutputStream(); StanfordCoreNLP.createOutputter(props, AnnotationOutputter.getOptions(pipeline)) .accept(completedAnnotation, os); os.close(); byte[] response = os.toByteArray(); httpExchange.getResponseHeaders().add("Content-Type", getContentType(props, of)); httpExchange.getResponseHeaders().add("Content-Length", Integer.toString(response.length)); httpExchange.sendResponseHeaders(HTTP_OK, response.length); httpExchange.getResponseBody().write(response); httpExchange.close(); } catch (TimeoutException e) { respondError("CoreNLP request timed out", httpExchange); } catch (Exception e) { // Return error message. respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange); } }