public void processTerm(String t) { if (t == null) return; // current term is a delimiter if (blockDelimiterTerms.contains(t)) { // delimiters should also be indexed if (indexDelimiters) { final int[] fieldIds = new int[numFields]; int i = 0; for (String fieldName : termFields) { fieldIds[i] = fieldNames.get(fieldName); i++; } ((BlockFieldDocumentPostingList) termsInDocument).insert(t, fieldIds, blockId); if (countDelimiters) numOfTokensInDocument++; } numOfTokensInBlock = 0; blockId++; } else { // index non-delimiter term final int[] fieldIds = new int[numFields]; int i = 0; for (String fieldName : termFields) { fieldIds[i] = fieldNames.get(fieldName); i++; } ((BlockFieldDocumentPostingList) termsInDocument).insert(t, fieldIds, blockId); numOfTokensInDocument++; } }
public String getHighRecallSegmentation(String[][] data, THashSet<String> allRelatedWords) { ArrayList<String> startInds = new ArrayList<>(); for (int i = 0; i < data[0].length; i++) { startInds.add("" + i); } String tokNums = ""; for (int i = MAX_LEN; i >= 1; i--) { for (int j = 0; j <= (data[0].length - i); j++) { String ind = "" + j; if (!startInds.contains(ind)) continue; String lTok = ""; for (int k = j; k < j + i; k++) { String pos = data[1][k]; String cPos = pos.substring(0, 1); String l = data[5][k]; lTok += l + "_" + cPos + " "; } lTok = lTok.trim(); if (allRelatedWords.contains(lTok)) { String tokRep = ""; for (int k = j; k < j + i; k++) { tokRep += k + " "; ind = "" + k; startInds.remove(ind); } tokRep = tokRep.trim().replaceAll(" ", "_"); tokNums += tokRep + "\t"; } } } tokNums = tokNums.trim(); return tokNums; }
@NotNull @Override public List<Pair<LookupElement, Object>> getSortingWeights( @NotNull Iterable<LookupElement> items, @NotNull ProcessingContext context) { final THashSet<LookupElement> lifted = newIdentityTroveSet(); Iterable<LookupElement> iterable = liftShorterElements(ContainerUtil.newArrayList(items), lifted, context); return ContainerUtil.map( iterable, element -> new Pair<LookupElement, Object>(element, lifted.contains(element))); }
public void processTerm(String t) { if (t == null) return; // current term is a delimiter if (blockDelimiterTerms.contains(t)) { // delimiters should also be indexed if (indexDelimiters) { ((BlockDocumentPostingList) termsInDocument).insert(t, blockId); if (countDelimiters) numOfTokensInDocument++; } numOfTokensInBlock = 0; blockId++; } else { // index non-delimiter term ((BlockDocumentPostingList) termsInDocument).insert(t, blockId); numOfTokensInDocument++; } }
/** Checks to see if term t is a stopword. If so, return null. */ public final String processTerm(final String t) { return (stopWords.contains(t)) ? null : t; }
/** Returns true is term t is a stopword */ public boolean isStopword(final String t) { return stopWords.contains(t); }