Java Constituent 예제들, edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent Java 예제들

예제 #1

0

파일 보기

파일: Redactor.java 프로젝트: ryannk/open-eval

 /**
  * Removes every {@code Relation} from the {@code RELATIONVIEW} in the list of text annotations.
  */
 private static List<TextAnnotation> removeRelationsFromPredicateArgumentView(
     List<TextAnnotation> uncleansedAnnotations) {
   List<String> relationExtractionViews = new ArrayList<>();
   relationExtractionViews.add(ViewNames.SENTENCE);
   relationExtractionViews.add(ViewNames.TOKENS);
   relationExtractionViews.add("RELATIONVIEW");
   List<TextAnnotation> textAnnotations =
       removeViews(uncleansedAnnotations, relationExtractionViews);
   for (TextAnnotation textAnnotation : textAnnotations) {
     Set<String> viewNames = textAnnotation.getAvailableViews();
     for (String viewName : viewNames) {
       View view = textAnnotation.getView(viewName);
       if (view instanceof PredicateArgumentView) {
         PredicateArgumentView predicateArgumentView = (PredicateArgumentView) view;
         predicateArgumentView.removeAllRelations();
         for (Constituent c : predicateArgumentView.getConstituents()) {
           predicateArgumentView.removeConstituent(c);
           int start = c.getStartSpan();
           int end = c.getEndSpan();
           view.addConstituent(new Constituent("", "RELATIONVIEW", textAnnotation, start, end));
         }
       }
     }
   }
   return textAnnotations;
 }

예제 #2

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent c) {

          Constituent c1 = c.cloneForNewView(c.getViewName());
          new Relation("", c, c1, 0.0);
          return Collections.singletonList(c1);
        }

예제 #3

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  private static Constituent getHead(Constituent c, TreeView dependency) {
    Constituent end = null;
    List<Constituent> constituentsCovering = dependency.getConstituentsCovering(c);
    for (Constituent d : constituentsCovering) {
      List<Relation> in = d.getIncomingRelations();
      if (in.size() == 0) {
        end = d;
        break;
      } else {
        Constituent parent = in.get(0).getSource();

        int parentToken = parent.getStartSpan();
        if (c.getStartSpan() <= parentToken && parentToken < c.getEndSpan()) continue;

        if (end == null) {
          end = d;
        } else if (end.getStartSpan() < d.getStartSpan()) {
          end = d;
        }
      }
    }

    Constituent c1;
    if (end == null) c1 = constituentsCovering.get(0).cloneForNewView("");
    else c1 = end.cloneForNewView("");

    return addPointerToSource(c, c1);
  }

예제 #4

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          TextAnnotation ta = input.getTextAnnotation();
          int tokenPosition = input.getStartSpan();
          TreeView dependency = (TreeView) ta.getView(ViewNames.DEPENDENCY);

          Constituent verbNode = dependency.getConstituentsCoveringToken(tokenPosition).get(0);
          boolean done = false;

          while (!done) {
            String pos = WordHelpers.getPOS(ta, verbNode.getStartSpan());

            if (POSUtils.isPOSVerb(pos)) {
              done = true;
            } else {
              List<Relation> incoming = verbNode.getIncomingRelations();
              if (incoming == null || incoming.size() == 0) {
                return new ArrayList<>();
              } else verbNode = incoming.get(0).getSource();
            }
          }

          return Collections.singletonList(addPointerToSource(input, verbNode));
        }

예제 #5

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public List<Constituent> transform(Constituent input) {
   int tokenId = input.getStartSpan();
   if (tokenId > 0) {
     Constituent c =
         new Constituent("", "", input.getTextAnnotation(), tokenId - 1, tokenId);
     return Collections.singletonList(addPointerToSource(input, c));
   } else return new ArrayList<>();
 }

예제 #6

0

파일 보기

파일: QuantitySchema.java 프로젝트: Rishi51199/CreativeProjects

 public Constituent getDependencyConstituentCoveringTokenId(Problem prob, int tokenId) {
   for (int i = 0; i <= 2; ++i) {
     for (Constituent cons : prob.dependency) {
       if (tokenId + i >= cons.getStartSpan() && tokenId + i < cons.getEndSpan()) {
         return cons;
       }
     }
   }
   return null;
 }

예제 #7

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public List<Constituent> transform(Constituent input) {
   int tokenId = input.getEndSpan();
   TextAnnotation ta = input.getTextAnnotation();
   Sentence sentence = ta.getSentence(input.getSentenceId());
   if (tokenId < sentence.size()) {
     Constituent c = new Constituent("", "", ta, tokenId, tokenId + 1);
     return Collections.singletonList(addPointerToSource(input, c));
   } else return new ArrayList<>();
 }

예제 #8

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

 @Override
 public List<Constituent> transform(Constituent input) {
   return Collections.singletonList(
       new Constituent(
           "",
           "",
           input.getTextAnnotation(),
           input.getStartSpan(),
           input.getStartSpan() + 1));
 }

예제 #9

0

파일 보기

파일: JsonSerializer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

 /**
  * Add an array of objects reporting View's Constituents' surface form and character offsets. May
  * make deserialization to TextAnnotation problematic, as the relevant methods deduce token
  * character offsets directly from list of token strings and raw text.
  *
  * @param fieldName name to give to this field
  * @param view view whose character offsets will be serialized
  * @param json Json object to which resulting array will be added
  */
 private static void writeTokenOffsets(String fieldName, View view, JsonObject json) {
   JsonArray offsetArray = new JsonArray();
   for (Constituent c : view.getConstituents()) {
     JsonObject cJ = new JsonObject();
     writeString(FORM, c.getSurfaceForm(), cJ);
     writeInt(STARTCHAROFFSET, c.getStartCharOffset(), cJ);
     writeInt(ENDCHAROFFSET, c.getEndCharOffset(), cJ);
     offsetArray.add(cJ);
   }
   json.add(fieldName, offsetArray);
 }

예제 #10

0

파일 보기

파일: WordConjunctionOneTwoThreeGramWindowTwo.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  @Override
  /**
   * This feature extractor assumes that the TOKEN View has been generated in the Constituents
   * TextAnnotation. It generate a feature for a window [-2, +2] of Forms (original text) for each
   * constituent.
   */
  public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TOKENS = ta.getView(ViewNames.TOKENS);

    // We can assume that the constituent in this case is a Word(Token)
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // k is 3 since we need up to 3-grams
    int k = 3;
    int window = 2;

    // All our constituents are words(tokens)
    String[] forms = getWindowK(TOKENS, startspan, endspan, window);

    String id, value;
    String classifier = "WordConjunctionOneTwoThreeGramWindowTwo";
    Set<Feature> result = new LinkedHashSet<>();

    for (int j = 0; j < k; j++) {
      // k = 3, j goes from 0 to 2

      for (int i = 0; i < forms.length; i++) {
        // forms.length = 5, So i goes from 0 to 4, for each String in the forms array.

        StringBuilder f = new StringBuilder();

        // Starts with context = 0 and then increments context as long as it is below
        // the current value of j and is not out of index of the forms array.
        // This is basically creating a discrete feature for each combination of one, two
        // and three word combinations within [-2,2] window or words.
        for (int context = 0; context <= j && i + context < forms.length; context++) {
          // add a '_' between words to conjoin them together
          if (context != 0) {
            f.append("_");
          }
          f.append(forms[i + context]);
        }

        // 2 is the center object in the array so i should go from -2 to +2 (with 0 being
        // the center)
        // j is the size of the n-gram so it goes 1 to 3
        id = classifier + ":" + ((i - window) + "_" + (j + 1));
        value = "(" + (f.toString()) + ")";
        result.add(new DiscreteFeature(id + value));
      }
    }
    return result;
  }

예제 #11

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {
          List<Constituent> list = new ArrayList<>();
          TextAnnotation ta = input.getTextAnnotation();

          for (int i = input.getStartSpan(); i < input.getEndSpan(); i++) {
            list.add(new Constituent("", "", ta, i, i + 1));
          }

          return list;
        }

예제 #12

0

파일 보기

파일: QuantitySchema.java 프로젝트: Rishi51199/CreativeProjects

 public static int getChunkIndex(Problem prob, int tokenId) {
   for (int i = 0; i <= 2; ++i) {
     for (int j = 0; j < prob.chunks.size(); ++j) {
       Constituent cons = prob.chunks.get(j);
       if (tokenId + i >= cons.getStartSpan() && tokenId + i < cons.getEndSpan()) {
         return j;
       }
     }
   }
   return -1;
 }

예제 #13

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  private static List<Constituent> getGovernor(Constituent input, TreeView dependency) {
    List<Constituent> constituentsCovering = dependency.getConstituentsCovering(input);

    if (constituentsCovering.size() == 0) return new ArrayList<>();

    Constituent c = constituentsCovering.get(0);

    List<Relation> incomingRelations = c.getIncomingRelations();

    if (incomingRelations == null || incomingRelations.size() == 0) return new ArrayList<>();
    else
      return Collections.singletonList(
          addPointerToSource(input, incomingRelations.get(0).getSource()));
  }

예제 #14

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          TextAnnotation ta = input.getTextAnnotation();

          return getGovernor(input, getDependencyView(ta, BERKELEY_DEPENDENCIES));
        }

예제 #15

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          TreeView dependency = (TreeView) input.getTextAnnotation().getView(ViewNames.DEPENDENCY);

          return getObject(input, dependency, "obj");
        }

예제 #16

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          TreeView dependency =
              (TreeView) input.getTextAnnotation().getView(ViewNames.DEPENDENCY_STANFORD);

          return getGovernor(input, dependency);
        }

예제 #17

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          TextAnnotation ta = input.getTextAnnotation();

          TreeView dependency = getDependencyView(ta, STANFORD_DEPENDENCIES);

          return getGovernor(input, dependency);
        }

예제 #18

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent input) {

          List<Constituent> c = new ArrayList<>();
          for (Relation r : input.getOutgoingRelations()) {
            c.add(addPointerToSource(input, r.getTarget()));
          }
          return c;
        }

예제 #19

0

파일 보기

파일: FileIOAnnotationJob.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

 protected String getNERString() {
   List<Constituent> constituents = new ArrayList<>(view.getConstituents());
   Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator);
   StringBuilder sb = new StringBuilder();
   String text = textAnnotation.getText();
   int where = 0;
   for (Constituent c : constituents) {
     int start = c.getStartCharOffset();
     String startstring = text.substring(where, start);
     sb.append(startstring)
         .append("[")
         .append(c.getLabel())
         .append(" ")
         .append(c.getTokenizedSurfaceForm())
         .append(" ] ");
     where = c.getEndCharOffset();
   }
   return sb.toString();
 }

예제 #20

0

파일 보기

파일: Redactor.java 프로젝트: ryannk/open-eval

 /** Removes the label from the NER_GOLD_EXTENT_SPAN. */
 private static List<TextAnnotation> removeLabelsForNER(List<TextAnnotation> cleansedAnnotations) {
   List<String> nerViews = new ArrayList<>();
   nerViews.add(ViewNames.SENTENCE);
   nerViews.add(ViewNames.TOKENS);
   nerViews.add("NER_GOLD_EXTENT_SPAN");
   List<TextAnnotation> textAnnotations = removeViews(cleansedAnnotations, nerViews);
   for (TextAnnotation textAnnotation : textAnnotations) {
     View view = textAnnotation.getView("NER_GOLD_EXTENT_SPAN");
     List<Constituent> constituents = view.getConstituents();
     for (Constituent c : constituents) {
       view.removeConstituent(c);
       int start = c.getStartSpan();
       int end = c.getEndSpan();
       view.addConstituent(
           new Constituent("", "NER_GOLD_EXTENT_SPAN", textAnnotation, start, end));
     }
     textAnnotation.addView(view.getViewName(), view);
   }
   return textAnnotations;
 }

예제 #21

0

파일 보기

파일: ConstituentSpanSplittingEvaluator.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  public void evaluate(ClassificationTester tester, View gold, View prediction) {
    super.cleanAttributes(gold, prediction);
    Set<IntPair> goldSpans = new HashSet<>();
    for (Constituent cons : gold.getConstituents()) {
      goldSpans.add(cons.getSpan());
    }

    Set<IntPair> predictedSpans = new HashSet<>();
    for (Constituent cons : prediction.getConstituents()) {
      predictedSpans.add(cons.getSpan());
    }

    Set<IntPair> spanIntersection = new HashSet<>(goldSpans);
    spanIntersection.retainAll(predictedSpans);

    tester.recordCount(
        "" /* label doesn't matter */,
        goldSpans.size(),
        predictedSpans.size(),
        spanIntersection.size());
  }

예제 #22

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  private static List<Constituent> getModifiers(Constituent input, TreeView dependency) {
    List<Constituent> constituentsCovering = dependency.getConstituentsCovering(input);

    if (constituentsCovering.size() == 0) return new ArrayList<>();

    Constituent c = constituentsCovering.get(0);

    List<Relation> outgoingRelations = c.getOutgoingRelations();

    if (outgoingRelations == null || outgoingRelations.size() == 0) return new ArrayList<>();
    else {

      for (Relation r : outgoingRelations) {
        if (r.getRelationName().contains("mod"))
          return Collections.singletonList(addPointerToSource(input, r.getTarget()));
      }

      return Collections.singletonList(
          addPointerToSource(input, outgoingRelations.get(0).getTarget()));
    }
  }

예제 #23

0

파일 보기

파일: QuantitySchema.java 프로젝트: Rishi51199/CreativeProjects

 public Constituent getDependentVerb(Problem prob, QuantSpan qs) {
   Constituent result =
       getDependencyConstituentCoveringTokenId(
           prob, prob.ta.getTokenIdFromCharacterOffset(qs.start));
   if (result == null) {
     System.out.println(
         "Text : "
             + prob.question
             + " Token : "
             + prob.ta.getTokenIdFromCharacterOffset(qs.start));
     Tools.printCons(prob.dependency);
   }
   while (result != null) {
     if (result.getIncomingRelations().size() == 0) break;
     //			System.out.println(result.getIncomingRelations().get(0).getSource()+" --> "+result);
     result = result.getIncomingRelations().get(0).getSource();
     if (prob.posTags.get(result.getStartSpan()).getLabel().startsWith("VB")) {
       return result;
     }
   }
   return result;
 }

예제 #24

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent c) {
          TextAnnotation ta = c.getTextAnnotation();
          TreeView tree = (TreeView) ta.getView(ViewNames.PARSE_CHARNIAK);
          try {
            Constituent phrase = tree.getParsePhrase(c);
            int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
            Constituent c1 = new Constituent("", "", ta, head, head + 1);

            return Collections.singletonList(addPointerToSource(c, c1));

          } catch (Exception e) {
            throw new RuntimeException(e);
          }
        }

예제 #25

0

파일 보기

파일: JsonSerializer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  private static void writeConstituent(Constituent c, JsonObject cJ) {
    writeString("label", c.getLabel(), cJ);

    if (c.getConstituentScore() != 0) writeDouble("score", c.getConstituentScore(), cJ);
    writeInt("start", c.getStartSpan(), cJ);
    writeInt("end", c.getEndSpan(), cJ);

    writeAttributes(c, cJ);
    Map<String, Double> labelsToScores = c.getLabelsToScores();

    if (null != labelsToScores) writeLabelsToScores(labelsToScores, cJ);
  }

예제 #26

0

파일 보기

파일: FeatureInputTransformer.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

        @Override
        public List<Constituent> transform(Constituent c) {
          TextAnnotation ta = c.getTextAnnotation();
          int tokenPosition = c.getStartSpan();
          TreeView dependency = (TreeView) ta.getView(ViewNames.DEPENDENCY);

          Constituent prepositionDepConstituent =
              dependency.getConstituentsCoveringToken(tokenPosition).get(0);

          List<Relation> incomingRelations = prepositionDepConstituent.getIncomingRelations();

          List<Constituent> list = new ArrayList<>();
          if (incomingRelations != null && incomingRelations.size() > 0) {

            Constituent parent = incomingRelations.get(0).getSource();

            for (Relation out : parent.getOutgoingRelations()) {
              if (out == incomingRelations.get(0)) continue;

              String label = out.getRelationName();

              if (label.contains("prep")) {
                Constituent ppNode = out.getTarget();

                list.add(addPointerToSource(c, ppNode));

                // get the first child of the pp and add this
                List<Relation> ppOut = ppNode.getOutgoingRelations();

                if (ppOut != null && ppOut.size() != 0) {

                  Constituent child = ppOut.get(0).getTarget();
                  list.add(addPointerToSource(c, child));
                }
              }
            }
          }

          return list;
        }

예제 #27

0

파일 보기

파일: QuantitySchema.java 프로젝트: Rishi51199/CreativeProjects

 public Constituent getRateUnit(Problem prob) {
   for (Constituent cons : connectedNPs) {
     if (cons.getSurfaceForm().toLowerCase().contains("each")) {
       return cons;
     }
   }
   for (Constituent cons : connectedNPs) {
     if (cons.getSurfaceForm().toLowerCase().contains("every")) {
       return cons;
     }
   }
   if (quantPhrase.getSurfaceForm().contains("each")
       || quantPhrase.getSurfaceForm().contains("every")) {
     return quantPhrase;
   }
   int chunkId = getChunkIndex(prob, quantPhrase.getStartSpan());
   if (chunkId + 2 < prob.chunks.size()
       && prob.chunks.get(chunkId + 1).getSurfaceForm().equals("per")) {
     return prob.chunks.get(chunkId + 2);
   }
   return null;
 }

예제 #28

0

파일 보기

파일: QuantitySchema.java 프로젝트: Rishi51199/CreativeProjects

 public List<Constituent> getConnectedNPs(Problem prob) {
   List<Constituent> npList = new ArrayList<>();
   List<Constituent> npListQuantRemoved = new ArrayList<>();
   boolean onlyQuantityInSentence = true;
   int sentId = prob.ta.getSentenceFromToken(quantPhrase.getStartSpan()).getSentenceId();
   for (QuantSpan qs : prob.quantities) {
     int tokenId = prob.ta.getTokenIdFromCharacterOffset(qs.start);
     if (prob.ta.getSentenceFromToken(tokenId).getSentenceId() == sentId
         && !(quantPhrase.getStartSpan() <= tokenId && quantPhrase.getEndSpan() > tokenId)) {
       onlyQuantityInSentence = false;
       break;
     }
   }
   // Find NPs from children of verb
   if (verbPhrase != null) {
     List<Relation> relations = verbPhrase.getOutgoingRelations();
     for (Relation relation : relations) {
       if (!relation.getRelationName().equals("nsubj")) continue;
       Constituent dst = relation.getTarget();
       for (Constituent cons : prob.chunks) {
         if (cons.getStartSpan() <= dst.getStartSpan()
             && cons.getEndSpan() > dst.getStartSpan()
             && cons.getLabel().equals("NP")
             && !npList.contains(cons)) {
           npList.add(cons);
           subject = cons;
           break;
         }
       }
     }
   }
   // Find NPs from PP NP connection
   int quantPhraseId = getChunkIndex(prob, quantPhrase.getStartSpan());
   if (quantPhraseId + 2 < prob.chunks.size()
       && !prob.chunks.get(quantPhraseId + 1).getSurfaceForm().trim().equals("of")
       && prob.chunks.get(quantPhraseId + 1).getLabel().equals("PP")
       && prob.chunks.get(quantPhraseId + 2).getLabel().equals("NP")
       && !npList.contains(prob.chunks.get(quantPhraseId + 2))) {
     npList.add(prob.chunks.get(quantPhraseId + 2));
   }
   if (quantPhraseId - 2 >= 0
       && prob.chunks.get(quantPhraseId - 1).getLabel().equals("PP")
       && prob.chunks.get(quantPhraseId - 2).getLabel().equals("NP")
       && !npList.contains(prob.chunks.get(quantPhraseId - 2))) {
     npList.add(prob.chunks.get(quantPhraseId - 2));
   }
   // Get preceding NP
   if (quantPhraseId - 1 >= 0
       && prob.chunks.get(quantPhraseId - 1).getLabel().equals("NP")
       && !prob.posTags
           .get(prob.chunks.get(quantPhraseId - 1).getEndSpan())
           .getLabel()
           .equals("CC")
       && !npList.contains(prob.chunks.get(quantPhraseId - 1))) {
     npList.add(prob.chunks.get(quantPhraseId - 1));
   }
   // Get succeeding NP
   if (quantPhraseId + 1 < prob.chunks.size()
       && prob.chunks.get(quantPhraseId + 1).getLabel().equals("NP")
       && !prob.posTags.get(prob.chunks.get(quantPhraseId).getEndSpan()).getLabel().equals("CC")
       && !npList.contains(prob.chunks.get(quantPhraseId + 1))) {
     npList.add(prob.chunks.get(quantPhraseId + 1));
   }
   //		 If only quantity in sentence, all NPs are connected
   if (onlyQuantityInSentence) {
     for (int i = 0; i < prob.chunks.size(); ++i) {
       Constituent cons = prob.chunks.get(i);
       if (cons.getSentenceId() == sentId
           && (i > quantPhraseId + 2 || i < quantPhraseId - 2)
           && !npList.contains(cons)
           && cons.getLabel().equals("NP")) {
         npList.add(cons);
       }
     }
   }
   // Remove quantity phrases from npList
   for (Constituent cons : npList) {
     boolean allow = true;
     for (QuantSpan qs : prob.quantities) {
       int index = prob.ta.getTokenIdFromCharacterOffset(qs.start);
       if (index >= cons.getStartSpan() && index < cons.getEndSpan()) {
         allow = false;
         break;
       }
     }
     if (allow) {
       npListQuantRemoved.add(cons);
     }
   }
   return npListQuantRemoved;
 }

예제 #29

0

파일 보기

파일: DependencyPathNgrams.java 프로젝트: IllinoisCogComp/illinois-cogcomp-nlp

  @Override
  public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    Set<Feature> features = new LinkedHashSet<>();
    TreeView parse = (TreeView) ta.getView(dependencyViewName);
    // get equivalent of c in the parse view
    Constituent c2 = parse.getConstituentsCoveringToken(c.getStartSpan()).get(0);
    List<Relation> incomingRelations = c2.getIncomingRelations();
    if (incomingRelations.size() > 0) {
      Constituent c1 =
          parse
              .getConstituentsCoveringToken(incomingRelations.get(0).getSource().getStartSpan())
              .get(0);

      Pair<List<Constituent>, List<Constituent>> paths =
          PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);

      List<String> path = new ArrayList<>();
      List<String> pos = new ArrayList<>();

      for (int i = 0; i < paths.getFirst().size() - 1; i++) {
        Constituent cc = paths.getFirst().get(i);
        path.add(
            cc.getIncomingRelations().get(0).getRelationName() + PathFeatureHelper.PATH_UP_STRING);

        pos.add(
            WordHelpers.getPOS(ta, cc.getStartSpan())
                + ":"
                + cc.getIncomingRelations().get(0).getRelationName()
                + PathFeatureHelper.PATH_UP_STRING);
      }

      Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);

      pos.add(WordHelpers.getPOS(ta, top.getStartSpan()) + ":*");
      path.add("*");

      if (paths.getSecond().size() > 1) {
        for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
          Constituent cc = paths.getSecond().get(i);

          pos.add(
              WordHelpers.getPOS(ta, cc.getStartSpan()) + ":" + PathFeatureHelper.PATH_DOWN_STRING);
          path.add(PathFeatureHelper.PATH_DOWN_STRING);
        }
      }

      features.addAll(getNgrams(path, ""));
      features.addAll(getNgrams(pos, "pos"));
    }
    return features;
  }

예제 #30

0

파일 보기

파일: QuantitySchema.java 프로젝트: Rishi51199/CreativeProjects

 public Pair<String, Constituent> getUnit(Problem prob, int quantIndex) {
   String unit = "";
   int tokenId = prob.ta.getTokenIdFromCharacterOffset(prob.quantities.get(quantIndex).start);
   int quantPhraseId = getChunkIndex(prob, tokenId);
   Constituent quantPhrase = prob.chunks.get(quantPhraseId);
   // Detect cases like 4 red and 6 blue balls
   int numQuantInChunk = 0;
   for (QuantSpan qs : prob.quantities) {
     int index = prob.ta.getTokenIdFromCharacterOffset(qs.start);
     if (index >= quantPhrase.getStartSpan() && index < quantPhrase.getEndSpan()) {
       numQuantInChunk++;
     }
   }
   int start = quantPhrase.getStartSpan();
   int end = quantPhrase.getEndSpan();
   boolean addEndNoun = false;
   if (numQuantInChunk > 1) {
     for (int i = quantPhrase.getStartSpan(); i < quantPhrase.getEndSpan(); ++i) {
       if (prob.posTags.get(i).getLabel().equals("CC")) {
         if (tokenId < i) {
           end = i;
           addEndNoun = true;
         } else {
           start = i + 1;
         }
         break;
       }
     }
   }
   for (int i = start; i < end; ++i) {
     if (i != tokenId) {
       if (prob.ta.getToken(i).equals("$")) {
         unit += "dollar ";
       } else {
         unit += prob.lemmas.get(i) + " ";
       }
     }
   }
   // Connecting disconnected units, as in, 5 red and 6 green apples
   if (addEndNoun
       && quantPhrase.getEndSpan() <= prob.ta.size()
       && prob.posTags.get(quantPhrase.getEndSpan() - 1).getLabel().startsWith("N")) {
     unit += prob.lemmas.get(quantPhrase.getEndSpan() - 1) + " ";
   }
   // Unit from neighboring phrases
   if (quantPhraseId + 2 < prob.chunks.size()
       && prob.chunks.get(quantPhraseId + 1).getSurfaceForm().trim().equals("of")
       && prob.chunks.get(quantPhraseId + 2).getLabel().equals("NP")) {
     Constituent cons = prob.chunks.get(quantPhraseId + 2);
     for (int j = cons.getStartSpan(); j < cons.getEndSpan(); ++j) {
       unit += prob.lemmas.get(j) + " ";
     }
     quantPhraseId += 2;
   }
   return new Pair<String, Constituent>(unit, quantPhrase);
 }