Java TextAnnotation示例，edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation Java示例

示例#1

0

显示文件

文件： Redactor.java 项目： ryannk/open-eval

 /**
  * Static method to remove views from `TextAnnotation` objects. Each task variant has a number of
  * views necessary for the solver to solve it. All other views should be removed.
  */
 private static List<TextAnnotation> removeViews(
     List<TextAnnotation> textAnnotations, List<String> viewsToKeep) {
   if (viewsToKeep == null) {
     viewsToKeep = new ArrayList<String>();
   }
   viewsToKeep = new ArrayList<>(viewsToKeep);
   List<String> viewsToRemove = new ArrayList<>();
   List<TextAnnotation> cleansed = new ArrayList<>();
   for (TextAnnotation textAnnotation : textAnnotations) {
     for (String viewName : textAnnotation.getAvailableViews()) {
       if (!viewsToKeep.contains(viewName)) {
         viewsToRemove.add(viewName);
       }
     }
     TextAnnotation cleansedAnnotation;
     try {
       cleansedAnnotation = (TextAnnotation) textAnnotation.clone();
     } catch (CloneNotSupportedException ce) {
       cleansed.add(null);
       continue;
     }
     for (String viewName : viewsToRemove) {
       cleansedAnnotation.removeView(viewName);
     }
     cleansed.add(cleansedAnnotation);
   }
   return cleansed;
 }

示例#2

0

显示文件

文件： Redactor.java 项目： ryannk/open-eval

 /**
  * Removes every {@code Relation} from the {@code RELATIONVIEW} in the list of text annotations.
  */
 private static List<TextAnnotation> removeRelationsFromPredicateArgumentView(
     List<TextAnnotation> uncleansedAnnotations) {
   List<String> relationExtractionViews = new ArrayList<>();
   relationExtractionViews.add(ViewNames.SENTENCE);
   relationExtractionViews.add(ViewNames.TOKENS);
   relationExtractionViews.add("RELATIONVIEW");
   List<TextAnnotation> textAnnotations =
       removeViews(uncleansedAnnotations, relationExtractionViews);
   for (TextAnnotation textAnnotation : textAnnotations) {
     Set<String> viewNames = textAnnotation.getAvailableViews();
     for (String viewName : viewNames) {
       View view = textAnnotation.getView(viewName);
       if (view instanceof PredicateArgumentView) {
         PredicateArgumentView predicateArgumentView = (PredicateArgumentView) view;
         predicateArgumentView.removeAllRelations();
         for (Constituent c : predicateArgumentView.getConstituents()) {
           predicateArgumentView.removeConstituent(c);
           int start = c.getStartSpan();
           int end = c.getEndSpan();
           view.addConstituent(new Constituent("", "RELATIONVIEW", textAnnotation, start, end));
         }
       }
     }
   }
   return textAnnotations;
 }

示例#3

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          TextAnnotation ta = input.getTextAnnotation();
          int tokenPosition = input.getStartSpan();
          TreeView dependency = (TreeView) ta.getView(ViewNames.DEPENDENCY);

          Constituent verbNode = dependency.getConstituentsCoveringToken(tokenPosition).get(0);
          boolean done = false;

          while (!done) {
            String pos = WordHelpers.getPOS(ta, verbNode.getStartSpan());

            if (POSUtils.isPOSVerb(pos)) {
              done = true;
            } else {
              List<Relation> incoming = verbNode.getIncomingRelations();
              if (incoming == null || incoming.size() == 0) {
                return new ArrayList<>();
              } else verbNode = incoming.get(0).getSource();
            }
          }

          return Collections.singletonList(addPointerToSource(input, verbNode));
        }

示例#4

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public List<Constituent> transform(Constituent input) {
   int tokenId = input.getEndSpan();
   TextAnnotation ta = input.getTextAnnotation();
   Sentence sentence = ta.getSentence(input.getSentenceId());
   if (tokenId < sentence.size()) {
     Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1);
     return Collections.singletonList(addPointerToSource(input, c));
   } else return new ArrayList<>();
 }

示例#5

0

显示文件

文件： WordConjunctionOneTwoThreeGramWindowTwo.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

  @Override
  /**
   * This feature extractor assumes that the TOKEN View has been generated in the Constituents
   * TextAnnotation. It generate a feature for a window [-2, +2] of Forms (original text) for each
   * constituent.
   */
  public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);

    // We can assume that the constituent in this case is a Word(Token)
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // k is 3 since we need up to 3-grams
    int k = 3;
    int window = 2;

    // All our constituents are words(tokens)
    String[] forms = getWindowK(TOKENS, startspan, endspan, window);

    String id, value;
    String classifier = "WordConjunctionOneTwoThreeGramWindowTwo";
    Set<Feature> result = new LinkedHashSet<>();

    for (int j = 0; j < k; j++) {
      // k = 3, j goes from 0 to 2

      for (int i = 0; i < forms.length; i++) {
        // forms.length = 5, So i goes from 0 to 4, for each String in the forms array.

        StringBuilder f = new StringBuilder();

        // Starts with context = 0 and then increments context as long as it is below
        // the current value of j and is not out of index of the forms array.
        // This is basically creating a discrete feature for each combination of one, two
        // and three word combinations within [-2,2] window or words.
        for (int context = 0; context <= j && i + context < forms.length; context++) {
          // add a '_' between words to conjoin them together
          if (context != 0) {
            f.append("_");
          }
          f.append(forms[i + context]);
        }

        // 2 is the center object in the array so i should go from -2 to +2 (with 0 being
        // the center)
        // j is the size of the n-gram so it goes 1 to 3
        id = classifier + ":" + ((i - window) + "_" + (j + 1));
        value = "(" + (f.toString()) + ")";
        result.add(new DiscreteFeature(id + value));
      }
    }
    return result;
  }

示例#6

0

显示文件

文件： DependencyPathNgrams.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

  @Override
  public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    Set<Feature> features = new LinkedHashSet<>();
    TreeView parse = (TreeView) ta.getView(dependencyViewName);
    // get equivalent of c in the parse view
    Constituent c2 = parse.getConstituentsCoveringToken(c.getStartSpan()).get(0);
    List<Relation> incomingRelations = c2.getIncomingRelations();
    if (incomingRelations.size() > 0) {
      Constituent c1 =
          parse
              .getConstituentsCoveringToken(incomingRelations.get(0).getSource().getStartSpan())
              .get(0);

      Pair<List<Constituent>, List<Constituent>> paths =
          PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);

      List<String> path = new ArrayList<>();
      List<String> pos = new ArrayList<>();

      for (int i = 0; i < paths.getFirst().size() - 1; i++) {
        Constituent cc = paths.getFirst().get(i);
        path.add(
            cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);

        pos.add(
            WordHelpers.getPOS(ta, cc.getStartSpan())
                + ":"
                + cc.getIncomingRelations().get(0).getRelationName()
                + PathFeatureHelper.PATH_UP_STRING);
      }

      Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);

      pos.add(WordHelpers.getPOS(ta, top.getStartSpan()) + ":*");
      path.add("*");

      if (paths.getSecond().size() > 1) {
        for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
          Constituent cc = paths.getSecond().get(i);

          pos.add(
              WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + PathFeatureHelper.PATH_DOWN_STRING);
          path.add(PathFeatureHelper.PATH_DOWN_STRING);
        }
      }

      features.addAll(getNgrams(path, ""));
      features.addAll(getNgrams(pos, "pos"));
    }
    return features;
  }

示例#7

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent c) {
          TextAnnotation ta = c.getTextAnnotation();
          TreeView tree = (TreeView) ta.getView(ViewNames.PARSE_CHARNIAK);
          try {
            Constituent phrase = tree.getParsePhrase(c);
            int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
            Constituent c1 = new Constituent("", "", ta, head, head + 1);

            return Collections.singletonList(addPointerToSource(c, c1));

          } catch (Exception e) {
            throw new RuntimeException(e);
          }
        }

示例#8

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

  private static TreeView getDependencyView(TextAnnotation ta, Annotator viewGenerator) {
    if (!ta.hasView(viewGenerator.getViewName())) {
      synchronized (FeatureInputTransformer.class) {
        if (!ta.hasView(viewGenerator.getViewName())) {
          try {
            ta.addView(viewGenerator);
          } catch (AnnotatorException e) {
            throw new RuntimeException(e);
          }
        }
      }
    }

    return (TreeView) ta.getView(viewGenerator.getViewName());
  }

示例#9

0

显示文件

文件： JsonSerializer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

  JsonObject writeTextAnnotation(TextAnnotation ta, boolean doWriteTokenOffsets) {

    // get rid of the views that are empty
    Set<String> viewNames = new HashSet<>(ta.getAvailableViews());
    for (String vu : viewNames) {
      if (ta.getView(vu) == null) {
        logger.warn("View " + vu + " is null");
        ta.removeView(vu);
      }
    }

    JsonObject json = new JsonObject();

    writeString("corpusId", ta.getCorpusId(), json);
    writeString("id", ta.getId(), json);
    writeString("text", ta.getText(), json);
    writeStringArray("tokens", ta.getTokens(), json);
    if (doWriteTokenOffsets) writeTokenOffsets(TOKENOFFSETS, ta.getView(ViewNames.TOKENS), json);

    writeSentences(ta, json);

    JsonArray views = new JsonArray();
    for (String viewName : Sorters.sortSet(ta.getAvailableViews())) {
      if (viewName.equals(ViewNames.SENTENCE)) continue;

      JsonObject view = new JsonObject();

      writeString("viewName", viewName, view);
      views.add(view);

      JsonArray viewData = new JsonArray();
      List<View> topKViews = ta.getTopKViews(viewName);

      for (View topKView : topKViews) {
        JsonObject kView = new JsonObject();
        writeView(topKView, kView);
        viewData.add(kView);
      }

      view.add("viewData", viewData);
    }

    json.add("views", views);

    writeAttributes(ta, json);

    return json;
  }

示例#10

0

显示文件

文件： TestParsePath.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

/**
 * A test for {@link ParsePath}
 *
 * @author Daniel Khashabi
 */
public class TestParsePath {
  private static TextAnnotation tas =
      DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(false, 1);
  private List<Constituent> cons = tas.getView(ViewNames.PARSE_GOLD).getConstituents();
  public static ParsePath parsePath = new ParsePath(ViewNames.PARSE_GOLD);
  private static Logger logger = LoggerFactory.getLogger(TestParsePath.class);

  //    protected void setUp() throws Exception {
  //        super.setUp();
  //    }

  Set<String> correctResponses =
      new HashSet<>(
          Arrays.asList(
              new String[] {
                "The construction of the John Smith library finished on time .->[]",
                "The construction of the John Smith library finished on time .->[S, , l=1.0]",
                "The construction of the John Smith library->[SvNP, , l=2.0]",
                "The construction->[NPvNP, , l=2.0]",
                "The->[NPvDT, , l=2.0]",
                "The->[DT, , l=1.0]",
                "construction->[NPvNN, , l=2.0]",
                "construction->[NN, , l=1.0]",
                "of the John Smith library->[NPvPP, , l=2.0]",
                "of->[PPvIN, , l=2.0]",
                "of->[IN, , l=1.0]",
                "the John Smith library->[PPvNP, , l=2.0]",
                "the->[NPvDT, , l=2.0]",
                "the->[DT, , l=1.0]",
                "John->[NPvNNP, , l=2.0]",
                "John->[NNP, , l=1.0]",
                "Smith->[NPvNNP, , l=2.0]",
                "Smith->[NNP, , l=1.0]",
                "library->[NPvNN, , l=2.0]",
                "library->[NN, , l=1.0]",
                "finished on time->[SvVP, , l=2.0]",
                "finished->[VPvVBD, , l=2.0]",
                "finished->[VBD, , l=1.0]",
                "on time->[VPvPP, , l=2.0]",
                "on->[PPvIN, , l=2.0]",
                "on->[IN, , l=1.0]",
                "time->[PPvNP, , l=2.0]",
                "time->[NP, , l=1.0]",
                "time->[NP, , l=1.0]",
                ".->[Sv., , l=2.0]",
                ".->[., , l=1.0]"
              }));

  @Test
  public final void testParsePath() throws Exception {
    logger.info(String.valueOf(cons.size()));
    logger.info(tas.getView(ViewNames.PARSE_GOLD).toString());
    for (int i = 0; i < cons.size(); i++) {
      String prediction =
          cons.get(i).toString() + "->" + parsePath.getFeatures(cons.get(i)).toString();
      assertTrue(correctResponses.contains(prediction));
    }
  }
}

示例#11

0

显示文件

文件： JsonSerializer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

  private static void writeSentences(TextAnnotation ta, JsonObject json) {

    JsonObject object = new JsonObject();

    SpanLabelView sentenceView = (SpanLabelView) ta.getView(ViewNames.SENTENCE);
    writeString("generator", sentenceView.getViewGenerator(), object);

    writeDouble("score", sentenceView.getScore(), object);
    int numSentences = sentenceView.getNumberOfConstituents();
    int[] sentenceEndPositions = new int[numSentences];

    int id = 0;
    for (Sentence sentence : ta.sentences()) {
      sentenceEndPositions[id++] = sentence.getEndSpan();
    }
    writeIntArray("sentenceEndPositions", sentenceEndPositions, object);

    json.add("sentences", object);
  }

示例#12

0

显示文件

文件： TestParsePath.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

 @Test
 public final void testParsePath() throws Exception {
   logger.info(String.valueOf(cons.size()));
   logger.info(tas.getView(ViewNames.PARSE_GOLD).toString());
   for (int i = 0; i < cons.size(); i++) {
     String prediction =
         cons.get(i).toString() + "->" + parsePath.getFeatures(cons.get(i)).toString();
     assertTrue(correctResponses.contains(prediction));
   }
 }

示例#13

0

显示文件

文件： Redactor.java 项目： ryannk/open-eval

 /** Removes all coreference relations from {@code COREF} View. */
 private static List<TextAnnotation> removeCoreferenceRelations(
     List<TextAnnotation> uncleansedAnnotations) {
   List<String> coreferenceViews = new ArrayList<>();
   coreferenceViews.add(ViewNames.SENTENCE);
   coreferenceViews.add(ViewNames.TOKENS);
   coreferenceViews.add(ViewNames.COREF);
   List<TextAnnotation> textAnnotations = removeViews(uncleansedAnnotations, coreferenceViews);
   for (TextAnnotation textAnnotation : textAnnotations) {
     Set<String> viewNames = textAnnotation.getAvailableViews();
     for (String viewName : viewNames) {
       View view = textAnnotation.getView(viewName);
       if (view instanceof CoreferenceView) {
         CoreferenceView coreferenceView = (CoreferenceView) view;
         coreferenceView.removeAllRelations();
         textAnnotation.addView(viewName, coreferenceView);
       }
     }
   }
   return textAnnotations;
 }

示例#14

0

显示文件

文件： Redactor.java 项目： ryannk/open-eval

 /** Removes the label from the NER_GOLD_EXTENT_SPAN. */
 private static List<TextAnnotation> removeLabelsForNER(List<TextAnnotation> cleansedAnnotations) {
   List<String> nerViews = new ArrayList<>();
   nerViews.add(ViewNames.SENTENCE);
   nerViews.add(ViewNames.TOKENS);
   nerViews.add("NER_GOLD_EXTENT_SPAN");
   List<TextAnnotation> textAnnotations = removeViews(cleansedAnnotations, nerViews);
   for (TextAnnotation textAnnotation : textAnnotations) {
     View view = textAnnotation.getView("NER_GOLD_EXTENT_SPAN");
     List<Constituent> constituents = view.getConstituents();
     for (Constituent c : constituents) {
       view.removeConstituent(c);
       int start = c.getStartSpan();
       int end = c.getEndSpan();
       view.addConstituent(
           new Constituent("", "NER_GOLD_EXTENT_SPAN", textAnnotation, start, end));
     }
     textAnnotation.addView(view.getViewName(), view);
   }
   return textAnnotations;
 }

示例#15

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent c) {
          TextAnnotation ta = c.getTextAnnotation();
          int tokenPosition = c.getStartSpan();
          TreeView dependency = (TreeView) ta.getView(ViewNames.DEPENDENCY);

          Constituent prepositionDepConstituent =
              dependency.getConstituentsCoveringToken(tokenPosition).get(0);

          List<Relation> incomingRelations = prepositionDepConstituent.getIncomingRelations();

          List<Constituent> list = new ArrayList<>();
          if (incomingRelations != null && incomingRelations.size() > 0) {

            Constituent parent = incomingRelations.get(0).getSource();

            for (Relation out : parent.getOutgoingRelations()) {
              if (out == incomingRelations.get(0)) continue;

              String label = out.getRelationName();

              if (label.contains("prep")) {
                Constituent ppNode = out.getTarget();

                list.add(addPointerToSource(c, ppNode));

                // get the first child of the pp and add this
                List<Relation> ppOut = ppNode.getOutgoingRelations();

                if (ppOut != null && ppOut.size() != 0) {

                  Constituent child = ppOut.get(0).getTarget();
                  list.add(addPointerToSource(c, child));
                }
              }
            }
          }

          return list;
        }

示例#16

0

显示文件

文件： JsonSerializer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

  TextAnnotation readTextAnnotation(String string) throws Exception {
    JsonObject json = (JsonObject) new JsonParser().parse(string);

    String corpusId = readString("corpusId", json);
    String id = readString("id", json);
    String text = readString("text", json);
    String[] tokens = readStringArray("tokens", json);

    Pair<Pair<String, Double>, int[]> sentences = readSentences(json);

    IntPair[] offsets = TokenUtils.getTokenOffsets(text, tokens);

    TextAnnotation ta =
        new TextAnnotation(corpusId, id, text, offsets, tokens, sentences.getSecond());

    JsonArray views = json.getAsJsonArray("views");
    for (int i = 0; i < views.size(); i++) {
      JsonObject view = (JsonObject) views.get(i);
      String viewName = readString("viewName", view);

      JsonArray viewData = view.getAsJsonArray("viewData");
      List<View> topKViews = new ArrayList<>();

      for (int k = 0; k < viewData.size(); k++) {
        JsonObject kView = (JsonObject) viewData.get(k);

        topKViews.add(readView(kView, ta));
      }

      ta.addTopKView(viewName, topKViews);
    }

    readAttributes(ta, json);

    return ta;
  }

示例#17

0

显示文件

文件： FileIOAnnotationJob.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

 protected String getNERString() {
   List<Constituent> constituents = new ArrayList<>(view.getConstituents());
   Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator);
   StringBuilder sb = new StringBuilder();
   String text = textAnnotation.getText();
   int where = 0;
   for (Constituent c : constituents) {
     int start = c.getStartCharOffset();
     String startstring = text.substring(where, start);
     sb.append(startstring)
         .append("[")
         .append(c.getLabel())
         .append(" ")
         .append(c.getTokenizedSurfaceForm())
         .append(" ] ");
     where = c.getEndCharOffset();
   }
   return sb.toString();
 }

示例#18

0

显示文件

文件： FileIOAnnotationJob.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public void labelData() {
   textAnnotation = tab.createTextAnnotation(data);
   nerAnnotator.addView(textAnnotation);
   view = textAnnotation.getView(nerAnnotator.getViewName());
 }

示例#19

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public List<Constituent> transform(Constituent c) {
   TextAnnotation ta = c.getTextAnnotation();
   TreeView dependency = (TreeView) ta.getView(ViewNames.DEPENDENCY_STANFORD);
   return Collections.singletonList(getHead(c, dependency));
 }

示例#20

0

显示文件

文件： FeatureInputTransformer.java 项目： IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public List<Constituent> transform(Constituent c) {
   TextAnnotation ta = c.getTextAnnotation();
   TreeView dependency = (TreeView) ta.getView(ViewNames.DEPENDENCY_STANFORD);
   return getModifiers(c, dependency);
 }