Exemple #1
0
 @Override
 public boolean equals(Object unit) {
   if (!(unit instanceof WordLabelUnit)) {
     return false;
   }
   WordLabelUnit labelUnit = (WordLabelUnit) unit;
   if (this.isNegative() && labelUnit.isNegative()) {
     return true;
   }
   return getLabel().equals(labelUnit.getLabel());
 }
  protected SparseFeatureVector calcWordGlobalFeatures(
      Word word, Instance instance, Label y, int lastIndex, WordLabelUnit candidateLabelUnit) {
    SparseFeatureVector globalFv = new SparseFeatureVector(params);
    StringSparseVector wordPairFv = new StringSparseVector(params);
    StringSparseVector wordFv = new StringSparseVector(params);

    String candidateLabel = candidateLabelUnit.getLabel();
    String wBase = word.getWord().getRealBase();
    String wPOS = word.getWord().getPOS();

    int[] contextWords = new int[4];
    Arrays.fill(contextWords, -1);

    for (int i = 0; i <= lastIndex; i++) {
      if (y.getLabel(i) instanceof PairLabelUnit) {
        if (y.getLabel(i).isNegative()) continue;
        if (candidateLabelUnit.isNegative()) {
          continue;
        }
        Pair pair = (Pair) instance.getSequence().get(i);
        if (pair.getW1().getId() == word.getId()) {
          String pairLabel = ((PairLabelUnit) y.getLabel(i)).getLabel();
          wordPairFv.add(candidateLabel.concat(pairLabel), 1.);
          wordPairFv.add(candidateLabel.concat(wBase).concat(pairLabel), 1.);
          if (params.getUseFullFeatures()) {
            // w-rel-w
            int w2Idx = ((JointInstance) instance).getWord(pair.getW2().getId());
            if (w2Idx <= lastIndex) {
              String w2Base = pair.getW2().getWord().getRealBase();
              String w2Label = ((WordLabelUnit) y.getLabel(w2Idx)).getLabel();
              wordPairFv.add(candidateLabel.concat(pairLabel).concat(w2Label), 1.);
              wordPairFv.add(
                  candidateLabel.concat(wBase).concat(pairLabel).concat(w2Label).concat(w2Base),
                  1.);
            }
          }
        } else if (pair.getW2().getId() == word.getId()) {
          String pairLabel = ((PairLabelUnit) y.getLabel(i)).getLabel();
          wordPairFv.add(pairLabel.concat(candidateLabel), 1.);
          wordPairFv.add(pairLabel.concat(wBase).concat(candidateLabel), 1.);
          if (params.getUseFullFeatures()) {
            // w-rel-w
            int w1Idx = ((JointInstance) instance).getWord(pair.getW1().getId());
            if (w1Idx <= lastIndex) {
              String w1Base = pair.getW1().getWord().getRealBase();
              String w1Label = ((WordLabelUnit) y.getLabel(w1Idx)).getLabel();
              wordPairFv.add(w1Label.concat(pairLabel).concat(candidateLabel), 1.);
              wordPairFv.add(
                  w1Base.concat(w1Label).concat(pairLabel).concat(candidateLabel).concat(wBase),
                  1.);
            }
          }
        }
      } else {
        assert y.getLabel(i) instanceof WordLabelUnit;
        if (!params.getUseGlobalEntityFeatures()) continue;
        Word w2 = (Word) instance.getSequence().get(i);
        if (w2.getId() == word.getId() - 2) {
          contextWords[0] = i;
        }
        if (w2.getId() == word.getId() - 1) {
          String wordLabel = ((WordLabelUnit) y.getLabel(i)).getLabel();
          contextWords[1] = i;
          addBigramToFV(
              wordFv,
              wordLabel,
              w2.getWord().getRealBase(),
              w2.getWord().getPOS(),
              candidateLabel,
              wBase,
              wPOS);
        } else if (word.getId() == w2.getId() - 1) {
          String wordLabel = ((WordLabelUnit) y.getLabel(i)).getLabel();
          contextWords[2] = i;
          addBigramToFV(
              wordFv,
              candidateLabel,
              wBase,
              wPOS,
              wordLabel,
              w2.getWord().getRealBase(),
              w2.getWord().getPOS());
        } else if (word.getId() == w2.getId() - 2) {
          contextWords[3] = i;
        }
      }
    }

    List<Word> entityWords = getEntityWords(word, instance, y, lastIndex, candidateLabelUnit);
    if (entityWords.size() > 0) {
      if (params.getUseFullFeatures()) {
        String entString = entityString(entityWords);
        String candidateLabelType = candidateLabelUnit.getType();
        wordFv.add(candidateLabelType.concat(entString), 1.);
        Word lastWord = entityWords.get(entityWords.size() - 1);
        for (int i = 0; i <= lastIndex; i++) {
          if (y.getLabel(i) instanceof PairLabelUnit) {
            if (y.getLabel(i).isNegative()) continue;
            Pair pair = (Pair) instance.getSequence().get(i);
            if (pair.getW1().getId() == lastWord.getId()) {
              String pairLabel = ((PairLabelUnit) y.getLabel(i)).getLabel();
              wordPairFv.add(candidateLabelType.concat(pairLabel), 1.);
              wordPairFv.add(candidateLabelType.concat(entString).concat(pairLabel), 1.);
              // ent-rel-ent
              int w2Idx = ((JointInstance) instance).getWord(pair.getW2().getId());
              if (w2Idx <= lastIndex) {
                List<Word> e2Words =
                    getEntityWords(
                        pair.getW2(), instance, y, lastIndex, (WordLabelUnit) y.getLabel(w2Idx));
                if (e2Words.size() > 0) {
                  String e2String = entityString(e2Words);
                  String w2LabelType = ((WordLabelUnit) y.getLabel(w2Idx)).getType();
                  wordPairFv.add(candidateLabelType.concat(pairLabel).concat(w2LabelType), 1.);
                  wordPairFv.add(
                      candidateLabelType
                          .concat(entString)
                          .concat(pairLabel)
                          .concat(w2LabelType)
                          .concat(e2String),
                      1.);
                }
              }
            } else if (pair.getW2().getId() == lastWord.getId()) {
              String pairLabel = ((PairLabelUnit) y.getLabel(i)).getLabel();
              wordPairFv.add(pairLabel.concat(candidateLabelType), 1.);
              wordPairFv.add(pairLabel.concat(candidateLabelType).concat(entString), 1.);
              // ent-rel-ent
              int w1Idx = ((JointInstance) instance).getWord(pair.getW1().getId());
              if (w1Idx <= lastIndex) {
                List<Word> e1Words =
                    getEntityWords(
                        pair.getW1(), instance, y, lastIndex, (WordLabelUnit) y.getLabel(w1Idx));
                if (e1Words.size() > 0) {
                  String e1String = entityString(e1Words);
                  String w1LabelType = ((WordLabelUnit) y.getLabel(w1Idx)).getType();
                  wordPairFv.add(w1LabelType.concat(pairLabel).concat(candidateLabelType), 1.);
                  wordPairFv.add(
                      w1LabelType
                          .concat(e1String)
                          .concat(pairLabel)
                          .concat(candidateLabelType)
                          .concat(entString),
                      1.);
                }
              }
            }
          }
        }
      }
    }
    if (contextWords[0] >= 0 && contextWords[1] >= 0) {
      Word w0 = (Word) instance.getSequence().get(contextWords[0]);
      String l0 = ((WordLabelUnit) y.getLabel(contextWords[0])).getLabel();
      Word w1 = (Word) instance.getSequence().get(contextWords[1]);
      String l1 = ((WordLabelUnit) y.getLabel(contextWords[1])).getLabel();
      addTrigramToFV(
          wordFv,
          l0,
          w0.getWord().getRealBase(),
          w0.getWord().getPOS(),
          l1,
          w1.getWord().getRealBase(),
          w1.getWord().getPOS(),
          candidateLabel,
          wBase,
          wPOS);
    }
    if (contextWords[1] >= 0 && contextWords[2] >= 0) {
      Word w1 = (Word) instance.getSequence().get(contextWords[1]);
      String l1 = ((WordLabelUnit) y.getLabel(contextWords[1])).getLabel();
      Word w2 = (Word) instance.getSequence().get(contextWords[2]);
      String l2 = ((WordLabelUnit) y.getLabel(contextWords[2])).getLabel();
      addTrigramToFV(
          wordFv,
          l1,
          w1.getWord().getRealBase(),
          w1.getWord().getPOS(),
          candidateLabel,
          wBase,
          wPOS,
          l2,
          w2.getWord().getRealBase(),
          w2.getWord().getPOS());
    }
    if (contextWords[2] >= 0 && contextWords[3] >= 0) {
      Word w2 = (Word) instance.getSequence().get(contextWords[2]);
      String l2 = ((WordLabelUnit) y.getLabel(contextWords[2])).getLabel();
      Word w3 = (Word) instance.getSequence().get(contextWords[3]);
      String l3 = ((WordLabelUnit) y.getLabel(contextWords[3])).getLabel();
      addTrigramToFV(
          wordFv,
          candidateLabel,
          wBase,
          wPOS,
          l2,
          w2.getWord().getRealBase(),
          w2.getWord().getPOS(),
          l3,
          w3.getWord().getRealBase(),
          w3.getWord().getPOS());
    }
    globalFv.add(wordFv, "WORD");
    wordPairFv.mult(params.getRelWeight());
    globalFv.add(wordPairFv, "WORDPAIR");
    return globalFv;
  }