@Override public void calculateFeatures(DocumentAffiliation affiliation) { List<Token<AffiliationLabel>> tokens = affiliation.getTokens(); for (Token<AffiliationLabel> token : tokens) { for (BinaryTokenFeatureCalculator binaryFeatureCalculator : binaryFeatures) { if (binaryFeatureCalculator.calculateFeaturePredicate(token, affiliation)) { token.addFeature(binaryFeatureCalculator.getFeatureName()); } } String wordFeatureString = wordFeature.calculateFeatureValue(token, affiliation); if (wordFeatureString != null) { token.addFeature(wordFeatureString); } } for (KeywordFeatureCalculator<Token<AffiliationLabel>> dictionaryFeatureCalculator : keywordFeatures) { dictionaryFeatureCalculator.calculateDictionaryFeatures(tokens); } }
@Override public boolean calculateFeaturePredicate(Token<?> token, ParsableString<?> context) { return TextUtils.isOnlyFirstUpperCase(token.getText()); }
public void mergeTokens() { if (tokens == null || tokens.isEmpty()) { return; } Token<AffiliationLabel> actToken = null; List<Token<AffiliationLabel>> newTokens = new ArrayList<Token<AffiliationLabel>>(); for (Token<AffiliationLabel> token : tokens) { if (actToken == null) { actToken = new Token<AffiliationLabel>( token.getText(), token.getStartIndex(), token.getEndIndex(), token.getLabel()); } else if (actToken.getLabel().equals(token.getLabel())) { actToken.setEndIndex(token.getEndIndex()); } else { newTokens.add(actToken); actToken = new Token<AffiliationLabel>( token.getText(), token.getStartIndex(), token.getEndIndex(), token.getLabel()); } } newTokens.add(actToken); for (Token<AffiliationLabel> token : newTokens) { int i = newTokens.indexOf(token); if (i + 1 == newTokens.size()) { token.setEndIndex(rawText.length()); } else { token.setEndIndex(newTokens.get(i + 1).getStartIndex()); } token.setText(rawText.substring(token.getStartIndex(), token.getEndIndex())); } tokens = newTokens; }