@Override
  public void calculateFeatures(DocumentAffiliation affiliation) {

    List<Token<AffiliationLabel>> tokens = affiliation.getTokens();
    for (Token<AffiliationLabel> token : tokens) {
      for (BinaryTokenFeatureCalculator binaryFeatureCalculator : binaryFeatures) {
        if (binaryFeatureCalculator.calculateFeaturePredicate(token, affiliation)) {
          token.addFeature(binaryFeatureCalculator.getFeatureName());
        }
      }
      String wordFeatureString = wordFeature.calculateFeatureValue(token, affiliation);
      if (wordFeatureString != null) {
        token.addFeature(wordFeatureString);
      }
    }

    for (KeywordFeatureCalculator<Token<AffiliationLabel>> dictionaryFeatureCalculator :
        keywordFeatures) {
      dictionaryFeatureCalculator.calculateDictionaryFeatures(tokens);
    }
  }
示例#2
0
 @Override
 public boolean calculateFeaturePredicate(Token<?> token, ParsableString<?> context) {
   return TextUtils.isOnlyFirstUpperCase(token.getText());
 }
示例#3
0
  public void mergeTokens() {
    if (tokens == null || tokens.isEmpty()) {
      return;
    }
    Token<AffiliationLabel> actToken = null;
    List<Token<AffiliationLabel>> newTokens = new ArrayList<Token<AffiliationLabel>>();
    for (Token<AffiliationLabel> token : tokens) {
      if (actToken == null) {
        actToken =
            new Token<AffiliationLabel>(
                token.getText(), token.getStartIndex(), token.getEndIndex(), token.getLabel());

      } else if (actToken.getLabel().equals(token.getLabel())) {
        actToken.setEndIndex(token.getEndIndex());
      } else {
        newTokens.add(actToken);
        actToken =
            new Token<AffiliationLabel>(
                token.getText(), token.getStartIndex(), token.getEndIndex(), token.getLabel());
      }
    }
    newTokens.add(actToken);
    for (Token<AffiliationLabel> token : newTokens) {
      int i = newTokens.indexOf(token);
      if (i + 1 == newTokens.size()) {
        token.setEndIndex(rawText.length());
      } else {
        token.setEndIndex(newTokens.get(i + 1).getStartIndex());
      }
      token.setText(rawText.substring(token.getStartIndex(), token.getEndIndex()));
    }
    tokens = newTokens;
  }