/** * given an AgentID, find the text span associated with this Agent and highlight the appropriate * areas in the raw text panel * * @param agentRef the AgentID */ public void highlightText(Annotation an, Color highlightTextColor) { if (an != null) { // highlight the annotation // also define color to the highlight tvPanel.highlightSpan( an.getStartOffset(), an.getEndOffset() - an.getStartOffset(), highlightTextColor); } }
public static Annotation getHeadVerb( Annotation verb, Map<String, AnnotationSet> annotations, String text) { AnnotationSet verbs = annotations.get(Constants.POS).getContained(verb); if (verbs == null || verbs.size() < 1) return verb; Annotation result = verbs.getFirst(); for (Annotation v : verbs) { if (!SyntaxUtils.isPreposition(v.getType())) { result = v; } } return result; }
@Override public String produceValue( Annotation np1, Annotation np2, Document doc, Map<Feature, String> featVector) { if (!ProperName.getValue(np1, doc) || !ProperName.getValue(np2, doc)) return NA; Annotation ne1 = (Annotation) np1.getProperty(Property.LINKED_PROPER_NAME); Annotation ne2 = (Annotation) np2.getProperty(Property.LINKED_PROPER_NAME); String[] infW1 = InfWords.getValue(ne1, doc); String[] infW2 = InfWords.getValue(ne2, doc); if (infW1 == null || infW2 == null || infW1.length < 1 || infW2.length < 1) return INCOMPATIBLE; if (Utils.isAnySubset(infW1, infW2)) return COMPATIBLE; else return INCOMPATIBLE; }
public Component getListCellRendererComponent( JList list, Object value, int index, boolean isSelected, boolean cellHasFocus) { if (!(value instanceof Annotation)) throw new RuntimeException("Invalid value in opinion list"); // System.err.println("Rendering..."+value); Annotation a = (Annotation) value; StringBuilder s = new StringBuilder(a.getType()); s.append(" - "); Map<String, String> features = a.getFeatures(); for (String k : features.keySet()) { s.append(k).append(": ").append(features.get(k)).append(" "); } setText(s.toString()); if (isSelected) { setBackground(list.getSelectionBackground()); setForeground(list.getSelectionForeground()); } else { setBackground(list.getBackground()); setForeground(list.getForeground()); } setEnabled(list.isEnabled()); setFont(list.getFont()); setOpaque(true); return this; }
@Override public Object produceValue(Annotation np, Document doc) { // Get the sentence annotations AnnotationSet par = doc.getAnnotationSet(Constants.PAR); AnnotationSet nps = doc.getAnnotationSet(Constants.NP); for (Annotation p : par) { int num; if (Constants.PAR_NUMS_UNAVAILABLE) { num = 0; } else { num = Integer.parseInt(p.getAttribute("parNum")); } AnnotationSet enclosed = nps.getContained(p); for (Annotation e : enclosed) { e.setProperty(this, num); } } // Make sure that all annotations have an associated PARNUM for (Annotation n : nps) { if (n.getProperty(this) == null) { AnnotationSet o = par.getOverlapping(0, n.getEndOffset()); if (o == null || o.size() < 1) { n.setProperty(this, 0); } else { Annotation p = o.getLast(); int num = Integer.parseInt(p.getAttribute("parNum")); // = 0; n.setProperty(this, num); } } } if (np.getProperty(this) == null) { AnnotationSet o = par.getOverlapping(0, np.getEndOffset()); if (o == null || o.size() < 1) { np.setProperty(this, 0); } else { Annotation p = o.getLast(); int num = Integer.parseInt(p.getAttribute("parNum")); np.setProperty(this, num); } } return np.getProperty(this); }
@Override public void run(Document doc, String annSetNames[]) { String tagChunk = currentConfig.getTagChunk(); String listDir = currentConfig.getTagChunkLists(); AnnotationSet namedEntities = new AnnotationSet(annSetNames[0]); // get the sentences from the input AnnotationSet sentSet = doc.getAnnotationSet(Constants.SENT); // get the tokens from each sentence AnnotationSet tokenSet = doc.getAnnotationSet(Constants.TOKEN); // Read in the text from the raw file String text = doc.getText(); Iterator<Annotation> sents = sentSet.iterator(); ArrayList<String> lines = new ArrayList<String>(); ArrayList<Vector<Annotation>> tokenList = new ArrayList<Vector<Annotation>>(); while (sents.hasNext()) { Vector<Annotation> annVector = new Vector<Annotation>(); Annotation sent = sents.next(); int sentStart = sent.getStartOffset(); int sentEnd = sent.getEndOffset(); String sentText = Annotation.getAnnotText(sent, text); AnnotationSet sentTokens = tokenSet.getContained(sentStart, sentEnd); // gather all sentences to tag if (!sentText.matches("\\W+")) { StringBuilder tmp = new StringBuilder(); for (Annotation a : sentTokens) { tmp.append(Annotation.getAnnotTextClean(a, text)).append(" "); annVector.add(a); } lines.add(tmp.toString()); tokenList.add(annVector); } } // write out a tmp file that contains the words to be tagged File tmpFile = new File(doc.getRootDir(), "tmp.ner"); try { tmpFile.deleteOnExit(); FileWriter fw = new FileWriter(tmpFile); BufferedWriter bw = new BufferedWriter(fw); for (String l : lines) { // System.out.println(l); bw.write(l + "\n"); } bw.close(); fw.close(); } catch (IOException ioe) { ioe.printStackTrace(); } // run the tagger String command = tagChunk + " -predict . " + modelDir + Utils.SEPARATOR + models[0] + " " + tmpFile.getAbsolutePath() + " " + listDir; // collect the results ArrayList<String> results; int i = 0; try { results = Utils.runExternalCaptureOutput(command); Annotation current = null; for (String l : results) { Vector<Annotation> annVector = tokenList.get(i); // get rid of these extraneous tags l = l.replace("_O-O", ""); String[] tokens = l.split(" "); // System.out.println(l); int j = 0; int underscore; int nes = 1; String tag; for (String t : tokens) { underscore = t.lastIndexOf('_'); tag = t.substring(underscore + 1, t.length()); Annotation a = annVector.get(j); // System.out.print(Utils.getAnnotTextClean(a, text) + "_" + tag + " "); if (tag.equals("B-O")) { j++; if (current != null) { namedEntities.add(current); // System.out.println("NE Found: " + Utils.getAnnotTextClean(current, text) + ":" + // current.getType()); nes++; current = null; } continue; } String entityType = tag.substring(tag.indexOf("-") + 1, tag.length()); if (entityType.equals("ORG")) { entityType = "ORGANIZATION"; } else if (entityType.equals("LOC")) { entityType = "LOCATION"; } else if (entityType.equals("PER")) { entityType = "PERSON"; } else if (entityType.equals("VEH")) { entityType = "VEHICLE"; } if (tag.startsWith("B-")) { if (current != null) { namedEntities.add(current); nes++; current = null; // System.out.println("NE Found: " + Utils.getAnnotTextClean(current, text)); } current = new Annotation(nes, a.getStartOffset(), a.getEndOffset(), entityType); } else if (tag.startsWith("I-")) { if (current != null) { current.setEndOffset(a.getEndOffset()); } else { current = new Annotation(nes, a.getStartOffset(), a.getEndOffset(), entityType); } } j++; } // System.out.println(); i++; } FileUtils.delete(tmpFile); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } addResultSet(doc, namedEntities); }
public static Annotation getContainedProperName(Annotation np, Document doc) { Annotation cont = ContainsProperName.getValue(np, doc); if (cont.getId() < 0) return null; else return cont; }
public static boolean containsProperName(Annotation np, Document doc) { Annotation cont = ContainsProperName.getValue(np, doc); if (cont.getId() < 0) return false; else return true; }
public static boolean isAdverb(Annotation pos) { final String[] ADV_POS = {"RB", "RBR", "RBS"}; return memberArray(pos.getType(), ADV_POS); }
public static boolean isAdjective(Annotation pos) { final String[] ADJ_POS = {"JJ", "JJR", "JJS"}; return memberArray(pos.getType(), ADJ_POS); }
public static boolean isPossesivePronoun(Annotation pos) { return pos.getType().equals("PRP$"); }
public static boolean isPredeterminer(Annotation pos) { return pos.getType().equals("PDT"); }
public static boolean isProperNoun(Annotation pos) { return pos.getType().startsWith("NNP"); }
public static boolean isCardinalNumber(Annotation pos) { final String[] CN_POS = {"CC"}; return memberArray(pos.getType(), CN_POS); }