/*
   * Returns 0 - If NA 1 - If Incompatible 2 - If compatible
   */
  public static int sameSemanticClass(Annotation np1, Annotation np2, Document doc) {

    NPSemTypeEnum type1 = NPSemanticType.getValue(np1, doc);
    NPSemTypeEnum type2 = NPSemanticType.getValue(np2, doc);

    if (type1.equals(type2) && !type1.equals(NPSemTypeEnum.UNKNOWN)) return 2;

    String[] wn_senses1 = WNSemClass.getValue(np1, doc);
    String[] wn_senses2 = WNSemClass.getValue(np2, doc);

    if (wn_senses1.length < 1 || wn_senses2.length < 1) return 0;

    for (String s : COMP_SUPERTYPES) {
      if (memberArray(s, wn_senses1) && memberArray(s, wn_senses2)) return 2;
    }

    if (overlaps(wn_senses1, wn_senses2)) {
      if (FeatureUtils.isPronoun(np1, doc) || FeatureUtils.isPronoun(np2, doc)) return 2;
      String[] w1 = doc.getWords(np1);
      String[] w2 = doc.getWords(np2);
      if (intersection(w1, w2) > 0) return 2;
    }

    return 1;
  }
  /**
   * @param doc
   * @return
   */
  private JPanel createForwardBackPanel(Document doc) {
    JPanel panel = new JPanel();
    JButton prev = new JButton("Prev");
    prev.setFont(font);
    prev.addActionListener(
        new ActionListener() {

          public void actionPerformed(java.awt.event.ActionEvent e) {
            previousFile();
          };
        });
    JButton next = new JButton("Next");
    next.setFont(font);
    next.addActionListener(
        new ActionListener() {

          public void actionPerformed(java.awt.event.ActionEvent e) {
            nextFile();
          };
        });
    fileNameField = new JTextField(doc.getDocumentId());
    fileNameField.setEditable(false);
    fileNameField.setFont(font);

    panel.setLayout(new BoxLayout(panel, BoxLayout.X_AXIS));
    panel.add(prev);
    panel.add(Box.createHorizontalGlue());
    panel.add(fileNameField);
    panel.add(Box.createHorizontalGlue());
    panel.add(next);
    return panel;
  }
Exemple #3
0
  @Override
  public Object produceValue(Annotation np, Document doc) {
    // Get the sentence annotations
    AnnotationSet par = doc.getAnnotationSet(Constants.PAR);
    AnnotationSet nps = doc.getAnnotationSet(Constants.NP);

    for (Annotation p : par) {
      int num;
      if (Constants.PAR_NUMS_UNAVAILABLE) {
        num = 0;
      } else {
        num = Integer.parseInt(p.getAttribute("parNum"));
      }
      AnnotationSet enclosed = nps.getContained(p);
      for (Annotation e : enclosed) {
        e.setProperty(this, num);
      }
    }

    // Make sure that all annotations have an associated PARNUM
    for (Annotation n : nps) {
      if (n.getProperty(this) == null) {
        AnnotationSet o = par.getOverlapping(0, n.getEndOffset());
        if (o == null || o.size() < 1) {
          n.setProperty(this, 0);
        } else {
          Annotation p = o.getLast();
          int num = Integer.parseInt(p.getAttribute("parNum")); // = 0;
          n.setProperty(this, num);
        }
      }
    }

    if (np.getProperty(this) == null) {
      AnnotationSet o = par.getOverlapping(0, np.getEndOffset());
      if (o == null || o.size() < 1) {
        np.setProperty(this, 0);
      } else {
        Annotation p = o.getLast();
        int num = Integer.parseInt(p.getAttribute("parNum"));
        np.setProperty(this, num);
      }
    }

    return np.getProperty(this);
  }
  public void setDoc(Document document) {
    predictionPanel.clear();

    doc = document;

    tvPanel.clearHighlights();
    displayRawText();

    predictionPanel.redraw();
    fileNameField.setText(doc.getDocumentId());
  }
 public static ArticleTypeEnum articleType(Annotation np, Document doc) {
   String[] indefs = {"a", "an", "one"};
   // String[] defs = { "the", "this", "that", "these", "those" };
   String[] quans = {"every", "all", "some", "most", "few", "many", "much"};
   String[] words = doc.getWords(np);
   String first = words[0];
   if (ProperName.getValue(np, doc)) return ArticleTypeEnum.DEFINITE;
   if (isPronoun(np, doc)) return ArticleTypeEnum.DEFINITE;
   if (memberArray(first, indefs)) return ArticleTypeEnum.INDEFINITE;
   if (memberArray(first, quans)) {
     if (words.length < 2 || !words[1].equalsIgnoreCase("of")) return ArticleTypeEnum.QUANTIFIED;
     else return ArticleTypeEnum.DEFINITE;
   }
   AnnotationSet pos = doc.getAnnotationSet(Constants.POS);
   pos = pos.getContained(np);
   if (pos != null && pos.size() > 0) {
     if (isCardinalNumber(pos.getFirst()) || isPredeterminer(pos.getFirst()))
       return ArticleTypeEnum.QUANTIFIED;
     if (isPossesivePronoun(pos.getFirst()) || isProperNoun(pos.getLast()))
       return ArticleTypeEnum.DEFINITE;
   }
   return ArticleTypeEnum.INDEFINITE;
 }
  public static boolean isIndefinite(Annotation np, Document doc) {

    String[] indefs = {"a", "an", "one"};
    String[] words = doc.getWords(np);
    String first = words[0];
    return memberArray(first, indefs);
    /*
    if(memberArray(first, defs))
    	return false;
    if(isPronoun(first))
    	return false;
    if(containsProperName(np, annotations, text))
    	return false;
    return true;*/
  }
Exemple #7
0
  @Override
  public String produceValue(
      Annotation np1, Annotation np2, Document doc, Map<Feature, String> featVector) {
    if (FeatureUtils.isPronoun(np1, doc) || FeatureUtils.isPronoun(np2, doc)) return INCOMPATIBLE;
    Annotation head1 = HeadNoun.getValue(np1, doc);
    Annotation head2 = HeadNoun.getValue(np2, doc);

    String h1 = doc.getAnnotString(head1).toLowerCase();
    String h2 = doc.getAnnotString(head2).toLowerCase();

    /* Checking to ensure the head nouns differ. */
    if (h1.equals(h2)) return COMPATIBLE;
    if (FeatureUtils.isPronoun(np1, doc) || FeatureUtils.isPronoun(np2, doc)) return INCOMPATIBLE;
    if (ProperName.getValue(np2, doc) || ProperName.getValue(np1, doc)) return INCOMPATIBLE;
    if (FeatureUtils.isSubclass(np1, np2, doc) || FeatureUtils.isSubclass(np2, np2, doc))
      return COMPATIBLE;
    return INCOMPATIBLE;
  }
  @Override
  public void run(Document doc, String annSetNames[]) {

    String tagChunk = currentConfig.getTagChunk();
    String listDir = currentConfig.getTagChunkLists();

    AnnotationSet namedEntities = new AnnotationSet(annSetNames[0]);

    // get the sentences from the input
    AnnotationSet sentSet = doc.getAnnotationSet(Constants.SENT);

    // get the tokens from each sentence
    AnnotationSet tokenSet = doc.getAnnotationSet(Constants.TOKEN);

    // Read in the text from the raw file
    String text = doc.getText();

    Iterator<Annotation> sents = sentSet.iterator();
    ArrayList<String> lines = new ArrayList<String>();
    ArrayList<Vector<Annotation>> tokenList = new ArrayList<Vector<Annotation>>();

    while (sents.hasNext()) {
      Vector<Annotation> annVector = new Vector<Annotation>();
      Annotation sent = sents.next();
      int sentStart = sent.getStartOffset();
      int sentEnd = sent.getEndOffset();
      String sentText = Annotation.getAnnotText(sent, text);
      AnnotationSet sentTokens = tokenSet.getContained(sentStart, sentEnd);

      // gather all sentences to tag
      if (!sentText.matches("\\W+")) {
        StringBuilder tmp = new StringBuilder();
        for (Annotation a : sentTokens) {
          tmp.append(Annotation.getAnnotTextClean(a, text)).append(" ");
          annVector.add(a);
        }

        lines.add(tmp.toString());
        tokenList.add(annVector);
      }
    }

    // write out a tmp file that contains the words to be tagged
    File tmpFile = new File(doc.getRootDir(), "tmp.ner");
    try {
      tmpFile.deleteOnExit();
      FileWriter fw = new FileWriter(tmpFile);
      BufferedWriter bw = new BufferedWriter(fw);
      for (String l : lines) {
        // System.out.println(l);
        bw.write(l + "\n");
      }

      bw.close();
      fw.close();
    } catch (IOException ioe) {
      ioe.printStackTrace();
    }

    // run the tagger
    String command =
        tagChunk
            + " -predict . "
            + modelDir
            + Utils.SEPARATOR
            + models[0]
            + " "
            + tmpFile.getAbsolutePath()
            + " "
            + listDir;

    // collect the results
    ArrayList<String> results;
    int i = 0;
    try {
      results = Utils.runExternalCaptureOutput(command);
      Annotation current = null;
      for (String l : results) {
        Vector<Annotation> annVector = tokenList.get(i);

        // get rid of these extraneous tags
        l = l.replace("_O-O", "");
        String[] tokens = l.split(" ");
        // System.out.println(l);

        int j = 0;
        int underscore;
        int nes = 1;
        String tag;
        for (String t : tokens) {
          underscore = t.lastIndexOf('_');
          tag = t.substring(underscore + 1, t.length());
          Annotation a = annVector.get(j);
          // System.out.print(Utils.getAnnotTextClean(a, text) + "_" + tag + " ");

          if (tag.equals("B-O")) {
            j++;
            if (current != null) {
              namedEntities.add(current);
              // System.out.println("NE Found: " + Utils.getAnnotTextClean(current, text) + ":" +
              // current.getType());
              nes++;
              current = null;
            }

            continue;
          }

          String entityType = tag.substring(tag.indexOf("-") + 1, tag.length());

          if (entityType.equals("ORG")) {
            entityType = "ORGANIZATION";
          } else if (entityType.equals("LOC")) {
            entityType = "LOCATION";
          } else if (entityType.equals("PER")) {
            entityType = "PERSON";
          } else if (entityType.equals("VEH")) {
            entityType = "VEHICLE";
          }

          if (tag.startsWith("B-")) {
            if (current != null) {
              namedEntities.add(current);
              nes++;
              current = null;
              // System.out.println("NE Found: " + Utils.getAnnotTextClean(current, text));
            }

            current = new Annotation(nes, a.getStartOffset(), a.getEndOffset(), entityType);
          } else if (tag.startsWith("I-")) {
            if (current != null) {
              current.setEndOffset(a.getEndOffset());
            } else {
              current = new Annotation(nes, a.getStartOffset(), a.getEndOffset(), entityType);
            }
          }

          j++;
        }

        // System.out.println();
        i++;
      }
      FileUtils.delete(tmpFile);
    } catch (IOException e) {
      throw new RuntimeException(e);
    } catch (InterruptedException e) {
      throw new RuntimeException(e);
    }

    addResultSet(doc, namedEntities);
  }
  @Override
  public void initialize(Annotation[] nps, Document doc, boolean training) {
    super.initialize(nps, doc, training);
    pairs = new ArrayList<Annotation[]>();
    HashMap<Annotation, ArrayList<Annotation>> posessives =
        new HashMap<Annotation, ArrayList<Annotation>>();

    RuleResolvers.addAllPossesives(doc.getAnnotationSet(Constants.NP), doc, posessives);
    int propNames = 0, regular = 0, is = 0, def = 0;
    for (int j = nps.length - 1; j >= 0; j--) {
      Annotation np2 = nps[j];
      RuleResolvers.NPType type2 = RuleResolvers.getNPtype(np2, doc, posessives);
      // int par2 = ParNum.getValue(np2, doc);
      int sen2 = SentNum.getValue(np2, doc);
      boolean pn2 = type2.equals(RuleResolvers.NPType.PROPER_NAME);
      boolean pron2 = type2.equals(RuleResolvers.NPType.PRONOUN);
      boolean def2 = !pron2 && !pn2 && !FeatureUtils.isIndefinite(np2, doc);
      boolean specPronoun2 =
          pron2
              && FeatureUtils.getPronounPerson(doc.getAnnotText(np2))
                  != PersonPronounTypeEnum.THIRD;
      boolean person2 =
          pn2 && ProperNameType.getValue(np2, doc).equals(FeatureUtils.NPSemTypeEnum.PERSON);
      boolean done = false;
      for (int k = j - 1; k >= 0 && !done; k--) {
        Annotation np1 = nps[k];
        // Get the type of the first np
        RuleResolvers.NPType type1 = RuleResolvers.getNPtype(np1, doc, posessives);
        // int par1 = ParNum.getValue(np1, doc);
        // int parNum = Math.abs(par1 - par2);
        int sen1 = SentNum.getValue(np1, doc);
        int senNum = Math.abs(sen1 - sen2);
        boolean pron1 = type1.equals(RuleResolvers.NPType.PRONOUN);
        boolean pn1 = type1.equals(RuleResolvers.NPType.PROPER_NAME);
        boolean specPronoun1 =
            pron1
                && FeatureUtils.getPronounPerson(doc.getAnnotText(np1))
                    != PersonPronounTypeEnum.THIRD;
        boolean person1 =
            pn1 && ProperNameType.getValue(np1, doc).equals(FeatureUtils.NPSemTypeEnum.PERSON);
        boolean includePair = false;
        if (pn1
            && pn2
            && ProperNameType.getValue(np1, doc).equals(ProperNameType.getValue(np2, doc))) {
          includePair = true;
          propNames++;
        } else if (person2 && specPronoun1) {
          includePair = true;
          is++;
        } else if (specPronoun1 && (specPronoun2 || person2)) {
          includePair = true;
          is++;
        } else if (specPronoun2 && (specPronoun1 || person1)) {
          includePair = true;
          is++;
        } else if (def2 && !pron1 && (senNum <= 6)) {
          includePair = true;
          def++;
        } else if (senNum <= 2) {
          includePair = true;
          regular++;
        }
        if (includePair) {
          pairs.add(new Annotation[] {np1, np2});
        } else {
          if (!pn2 && !specPronoun2 && (!def2 || (senNum > 6))) {
            done = true;
          }
        }
      }
    }
    pairIter = pairs.iterator();
  }
 /**
  * Classifies the instances located in default feature file for the document. Uses the default
  * classifier options specified in the config file
  *
  * @param doc
  * @return the minimum and maximum numerical values of the classified instances
  */
 public double[] test(Document doc, String[] options) {
   double[] score = test(doc.getFeatureFile(), doc.getPredictionFile(), options);
   return score;
 }
  /**
   * Classifies the instances located in default feature file for the document. Uses the default
   * classifier options specified in the config file
   *
   * @param doc
   * @return the minimum and maximum numerical values of the classified instances
   */
  public double[] test(Document doc) {
    double[] score = test(doc.getFeatureFile(), doc.getPredictionFile());

    return score;
  }
 /**
  * displays the raw text of the document in the text panel
  *
  * @param xmlFile the XML Summary File
  */
 public void displayRawText() {
   tvPanel.clearPanel();
   tvPanel.setInitialText(doc.getText());
   fileDisplayed = true;
 }
 @Override
 public String produceValue(
     Annotation np1, Annotation np2, Document doc, Map<Feature, String> featVector) {
   boolean sub = GramRole.getValue(np1, doc).equals("SUBJECT");
   return sub && SyntaxUtils.isMainClause(np1, doc.getAnnotationSet(Constants.PARSE)) ? "Y" : "N";
 }