/* * Returns 0 - If NA 1 - If Incompatible 2 - If compatible */ public static int sameSemanticClass(Annotation np1, Annotation np2, Document doc) { NPSemTypeEnum type1 = NPSemanticType.getValue(np1, doc); NPSemTypeEnum type2 = NPSemanticType.getValue(np2, doc); if (type1.equals(type2) && !type1.equals(NPSemTypeEnum.UNKNOWN)) return 2; String[] wn_senses1 = WNSemClass.getValue(np1, doc); String[] wn_senses2 = WNSemClass.getValue(np2, doc); if (wn_senses1.length < 1 || wn_senses2.length < 1) return 0; for (String s : COMP_SUPERTYPES) { if (memberArray(s, wn_senses1) && memberArray(s, wn_senses2)) return 2; } if (overlaps(wn_senses1, wn_senses2)) { if (FeatureUtils.isPronoun(np1, doc) || FeatureUtils.isPronoun(np2, doc)) return 2; String[] w1 = doc.getWords(np1); String[] w2 = doc.getWords(np2); if (intersection(w1, w2) > 0) return 2; } return 1; }
/** * @param doc * @return */ private JPanel createForwardBackPanel(Document doc) { JPanel panel = new JPanel(); JButton prev = new JButton("Prev"); prev.setFont(font); prev.addActionListener( new ActionListener() { public void actionPerformed(java.awt.event.ActionEvent e) { previousFile(); }; }); JButton next = new JButton("Next"); next.setFont(font); next.addActionListener( new ActionListener() { public void actionPerformed(java.awt.event.ActionEvent e) { nextFile(); }; }); fileNameField = new JTextField(doc.getDocumentId()); fileNameField.setEditable(false); fileNameField.setFont(font); panel.setLayout(new BoxLayout(panel, BoxLayout.X_AXIS)); panel.add(prev); panel.add(Box.createHorizontalGlue()); panel.add(fileNameField); panel.add(Box.createHorizontalGlue()); panel.add(next); return panel; }
@Override public Object produceValue(Annotation np, Document doc) { // Get the sentence annotations AnnotationSet par = doc.getAnnotationSet(Constants.PAR); AnnotationSet nps = doc.getAnnotationSet(Constants.NP); for (Annotation p : par) { int num; if (Constants.PAR_NUMS_UNAVAILABLE) { num = 0; } else { num = Integer.parseInt(p.getAttribute("parNum")); } AnnotationSet enclosed = nps.getContained(p); for (Annotation e : enclosed) { e.setProperty(this, num); } } // Make sure that all annotations have an associated PARNUM for (Annotation n : nps) { if (n.getProperty(this) == null) { AnnotationSet o = par.getOverlapping(0, n.getEndOffset()); if (o == null || o.size() < 1) { n.setProperty(this, 0); } else { Annotation p = o.getLast(); int num = Integer.parseInt(p.getAttribute("parNum")); // = 0; n.setProperty(this, num); } } } if (np.getProperty(this) == null) { AnnotationSet o = par.getOverlapping(0, np.getEndOffset()); if (o == null || o.size() < 1) { np.setProperty(this, 0); } else { Annotation p = o.getLast(); int num = Integer.parseInt(p.getAttribute("parNum")); np.setProperty(this, num); } } return np.getProperty(this); }
public void setDoc(Document document) { predictionPanel.clear(); doc = document; tvPanel.clearHighlights(); displayRawText(); predictionPanel.redraw(); fileNameField.setText(doc.getDocumentId()); }
public static ArticleTypeEnum articleType(Annotation np, Document doc) { String[] indefs = {"a", "an", "one"}; // String[] defs = { "the", "this", "that", "these", "those" }; String[] quans = {"every", "all", "some", "most", "few", "many", "much"}; String[] words = doc.getWords(np); String first = words[0]; if (ProperName.getValue(np, doc)) return ArticleTypeEnum.DEFINITE; if (isPronoun(np, doc)) return ArticleTypeEnum.DEFINITE; if (memberArray(first, indefs)) return ArticleTypeEnum.INDEFINITE; if (memberArray(first, quans)) { if (words.length < 2 || !words[1].equalsIgnoreCase("of")) return ArticleTypeEnum.QUANTIFIED; else return ArticleTypeEnum.DEFINITE; } AnnotationSet pos = doc.getAnnotationSet(Constants.POS); pos = pos.getContained(np); if (pos != null && pos.size() > 0) { if (isCardinalNumber(pos.getFirst()) || isPredeterminer(pos.getFirst())) return ArticleTypeEnum.QUANTIFIED; if (isPossesivePronoun(pos.getFirst()) || isProperNoun(pos.getLast())) return ArticleTypeEnum.DEFINITE; } return ArticleTypeEnum.INDEFINITE; }
public static boolean isIndefinite(Annotation np, Document doc) { String[] indefs = {"a", "an", "one"}; String[] words = doc.getWords(np); String first = words[0]; return memberArray(first, indefs); /* if(memberArray(first, defs)) return false; if(isPronoun(first)) return false; if(containsProperName(np, annotations, text)) return false; return true;*/ }
@Override public String produceValue( Annotation np1, Annotation np2, Document doc, Map<Feature, String> featVector) { if (FeatureUtils.isPronoun(np1, doc) || FeatureUtils.isPronoun(np2, doc)) return INCOMPATIBLE; Annotation head1 = HeadNoun.getValue(np1, doc); Annotation head2 = HeadNoun.getValue(np2, doc); String h1 = doc.getAnnotString(head1).toLowerCase(); String h2 = doc.getAnnotString(head2).toLowerCase(); /* Checking to ensure the head nouns differ. */ if (h1.equals(h2)) return COMPATIBLE; if (FeatureUtils.isPronoun(np1, doc) || FeatureUtils.isPronoun(np2, doc)) return INCOMPATIBLE; if (ProperName.getValue(np2, doc) || ProperName.getValue(np1, doc)) return INCOMPATIBLE; if (FeatureUtils.isSubclass(np1, np2, doc) || FeatureUtils.isSubclass(np2, np2, doc)) return COMPATIBLE; return INCOMPATIBLE; }
@Override public void run(Document doc, String annSetNames[]) { String tagChunk = currentConfig.getTagChunk(); String listDir = currentConfig.getTagChunkLists(); AnnotationSet namedEntities = new AnnotationSet(annSetNames[0]); // get the sentences from the input AnnotationSet sentSet = doc.getAnnotationSet(Constants.SENT); // get the tokens from each sentence AnnotationSet tokenSet = doc.getAnnotationSet(Constants.TOKEN); // Read in the text from the raw file String text = doc.getText(); Iterator<Annotation> sents = sentSet.iterator(); ArrayList<String> lines = new ArrayList<String>(); ArrayList<Vector<Annotation>> tokenList = new ArrayList<Vector<Annotation>>(); while (sents.hasNext()) { Vector<Annotation> annVector = new Vector<Annotation>(); Annotation sent = sents.next(); int sentStart = sent.getStartOffset(); int sentEnd = sent.getEndOffset(); String sentText = Annotation.getAnnotText(sent, text); AnnotationSet sentTokens = tokenSet.getContained(sentStart, sentEnd); // gather all sentences to tag if (!sentText.matches("\\W+")) { StringBuilder tmp = new StringBuilder(); for (Annotation a : sentTokens) { tmp.append(Annotation.getAnnotTextClean(a, text)).append(" "); annVector.add(a); } lines.add(tmp.toString()); tokenList.add(annVector); } } // write out a tmp file that contains the words to be tagged File tmpFile = new File(doc.getRootDir(), "tmp.ner"); try { tmpFile.deleteOnExit(); FileWriter fw = new FileWriter(tmpFile); BufferedWriter bw = new BufferedWriter(fw); for (String l : lines) { // System.out.println(l); bw.write(l + "\n"); } bw.close(); fw.close(); } catch (IOException ioe) { ioe.printStackTrace(); } // run the tagger String command = tagChunk + " -predict . " + modelDir + Utils.SEPARATOR + models[0] + " " + tmpFile.getAbsolutePath() + " " + listDir; // collect the results ArrayList<String> results; int i = 0; try { results = Utils.runExternalCaptureOutput(command); Annotation current = null; for (String l : results) { Vector<Annotation> annVector = tokenList.get(i); // get rid of these extraneous tags l = l.replace("_O-O", ""); String[] tokens = l.split(" "); // System.out.println(l); int j = 0; int underscore; int nes = 1; String tag; for (String t : tokens) { underscore = t.lastIndexOf('_'); tag = t.substring(underscore + 1, t.length()); Annotation a = annVector.get(j); // System.out.print(Utils.getAnnotTextClean(a, text) + "_" + tag + " "); if (tag.equals("B-O")) { j++; if (current != null) { namedEntities.add(current); // System.out.println("NE Found: " + Utils.getAnnotTextClean(current, text) + ":" + // current.getType()); nes++; current = null; } continue; } String entityType = tag.substring(tag.indexOf("-") + 1, tag.length()); if (entityType.equals("ORG")) { entityType = "ORGANIZATION"; } else if (entityType.equals("LOC")) { entityType = "LOCATION"; } else if (entityType.equals("PER")) { entityType = "PERSON"; } else if (entityType.equals("VEH")) { entityType = "VEHICLE"; } if (tag.startsWith("B-")) { if (current != null) { namedEntities.add(current); nes++; current = null; // System.out.println("NE Found: " + Utils.getAnnotTextClean(current, text)); } current = new Annotation(nes, a.getStartOffset(), a.getEndOffset(), entityType); } else if (tag.startsWith("I-")) { if (current != null) { current.setEndOffset(a.getEndOffset()); } else { current = new Annotation(nes, a.getStartOffset(), a.getEndOffset(), entityType); } } j++; } // System.out.println(); i++; } FileUtils.delete(tmpFile); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } addResultSet(doc, namedEntities); }
@Override public void initialize(Annotation[] nps, Document doc, boolean training) { super.initialize(nps, doc, training); pairs = new ArrayList<Annotation[]>(); HashMap<Annotation, ArrayList<Annotation>> posessives = new HashMap<Annotation, ArrayList<Annotation>>(); RuleResolvers.addAllPossesives(doc.getAnnotationSet(Constants.NP), doc, posessives); int propNames = 0, regular = 0, is = 0, def = 0; for (int j = nps.length - 1; j >= 0; j--) { Annotation np2 = nps[j]; RuleResolvers.NPType type2 = RuleResolvers.getNPtype(np2, doc, posessives); // int par2 = ParNum.getValue(np2, doc); int sen2 = SentNum.getValue(np2, doc); boolean pn2 = type2.equals(RuleResolvers.NPType.PROPER_NAME); boolean pron2 = type2.equals(RuleResolvers.NPType.PRONOUN); boolean def2 = !pron2 && !pn2 && !FeatureUtils.isIndefinite(np2, doc); boolean specPronoun2 = pron2 && FeatureUtils.getPronounPerson(doc.getAnnotText(np2)) != PersonPronounTypeEnum.THIRD; boolean person2 = pn2 && ProperNameType.getValue(np2, doc).equals(FeatureUtils.NPSemTypeEnum.PERSON); boolean done = false; for (int k = j - 1; k >= 0 && !done; k--) { Annotation np1 = nps[k]; // Get the type of the first np RuleResolvers.NPType type1 = RuleResolvers.getNPtype(np1, doc, posessives); // int par1 = ParNum.getValue(np1, doc); // int parNum = Math.abs(par1 - par2); int sen1 = SentNum.getValue(np1, doc); int senNum = Math.abs(sen1 - sen2); boolean pron1 = type1.equals(RuleResolvers.NPType.PRONOUN); boolean pn1 = type1.equals(RuleResolvers.NPType.PROPER_NAME); boolean specPronoun1 = pron1 && FeatureUtils.getPronounPerson(doc.getAnnotText(np1)) != PersonPronounTypeEnum.THIRD; boolean person1 = pn1 && ProperNameType.getValue(np1, doc).equals(FeatureUtils.NPSemTypeEnum.PERSON); boolean includePair = false; if (pn1 && pn2 && ProperNameType.getValue(np1, doc).equals(ProperNameType.getValue(np2, doc))) { includePair = true; propNames++; } else if (person2 && specPronoun1) { includePair = true; is++; } else if (specPronoun1 && (specPronoun2 || person2)) { includePair = true; is++; } else if (specPronoun2 && (specPronoun1 || person1)) { includePair = true; is++; } else if (def2 && !pron1 && (senNum <= 6)) { includePair = true; def++; } else if (senNum <= 2) { includePair = true; regular++; } if (includePair) { pairs.add(new Annotation[] {np1, np2}); } else { if (!pn2 && !specPronoun2 && (!def2 || (senNum > 6))) { done = true; } } } } pairIter = pairs.iterator(); }
/** * Classifies the instances located in default feature file for the document. Uses the default * classifier options specified in the config file * * @param doc * @return the minimum and maximum numerical values of the classified instances */ public double[] test(Document doc, String[] options) { double[] score = test(doc.getFeatureFile(), doc.getPredictionFile(), options); return score; }
/** * Classifies the instances located in default feature file for the document. Uses the default * classifier options specified in the config file * * @param doc * @return the minimum and maximum numerical values of the classified instances */ public double[] test(Document doc) { double[] score = test(doc.getFeatureFile(), doc.getPredictionFile()); return score; }
/** * displays the raw text of the document in the text panel * * @param xmlFile the XML Summary File */ public void displayRawText() { tvPanel.clearPanel(); tvPanel.setInitialText(doc.getText()); fileDisplayed = true; }
@Override public String produceValue( Annotation np1, Annotation np2, Document doc, Map<Feature, String> featVector) { boolean sub = GramRole.getValue(np1, doc).equals("SUBJECT"); return sub && SyntaxUtils.isMainClause(np1, doc.getAnnotationSet(Constants.PARSE)) ? "Y" : "N"; }