예제 #1
0
 public void processTerm(String t) {
   if (t == null) return;
   // current term is a delimiter
   if (blockDelimiterTerms.contains(t)) {
     // delimiters should also be indexed
     if (indexDelimiters) {
       final int[] fieldIds = new int[numFields];
       int i = 0;
       for (String fieldName : termFields) {
         fieldIds[i] = fieldNames.get(fieldName);
         i++;
       }
       ((BlockFieldDocumentPostingList) termsInDocument).insert(t, fieldIds, blockId);
       if (countDelimiters) numOfTokensInDocument++;
     }
     numOfTokensInBlock = 0;
     blockId++;
   } else {
     // index non-delimiter term
     final int[] fieldIds = new int[numFields];
     int i = 0;
     for (String fieldName : termFields) {
       fieldIds[i] = fieldNames.get(fieldName);
       i++;
     }
     ((BlockFieldDocumentPostingList) termsInDocument).insert(t, fieldIds, blockId);
     numOfTokensInDocument++;
   }
 }
예제 #2
0
 public String getHighRecallSegmentation(String[][] data, THashSet<String> allRelatedWords) {
   ArrayList<String> startInds = new ArrayList<>();
   for (int i = 0; i < data[0].length; i++) {
     startInds.add("" + i);
   }
   String tokNums = "";
   for (int i = MAX_LEN; i >= 1; i--) {
     for (int j = 0; j <= (data[0].length - i); j++) {
       String ind = "" + j;
       if (!startInds.contains(ind)) continue;
       String lTok = "";
       for (int k = j; k < j + i; k++) {
         String pos = data[1][k];
         String cPos = pos.substring(0, 1);
         String l = data[5][k];
         lTok += l + "_" + cPos + " ";
       }
       lTok = lTok.trim();
       if (allRelatedWords.contains(lTok)) {
         String tokRep = "";
         for (int k = j; k < j + i; k++) {
           tokRep += k + " ";
           ind = "" + k;
           startInds.remove(ind);
         }
         tokRep = tokRep.trim().replaceAll(" ", "_");
         tokNums += tokRep + "\t";
       }
     }
   }
   tokNums = tokNums.trim();
   return tokNums;
 }
 @NotNull
 @Override
 public List<Pair<LookupElement, Object>> getSortingWeights(
     @NotNull Iterable<LookupElement> items, @NotNull ProcessingContext context) {
   final THashSet<LookupElement> lifted = newIdentityTroveSet();
   Iterable<LookupElement> iterable =
       liftShorterElements(ContainerUtil.newArrayList(items), lifted, context);
   return ContainerUtil.map(
       iterable, element -> new Pair<LookupElement, Object>(element, lifted.contains(element)));
 }
예제 #4
0
 public void processTerm(String t) {
   if (t == null) return;
   // current term is a delimiter
   if (blockDelimiterTerms.contains(t)) {
     // delimiters should also be indexed
     if (indexDelimiters) {
       ((BlockDocumentPostingList) termsInDocument).insert(t, blockId);
       if (countDelimiters) numOfTokensInDocument++;
     }
     numOfTokensInBlock = 0;
     blockId++;
   } else {
     // index non-delimiter term
     ((BlockDocumentPostingList) termsInDocument).insert(t, blockId);
     numOfTokensInDocument++;
   }
 }
예제 #5
0
 /** Checks to see if term t is a stopword. If so, return null. */
 public final String processTerm(final String t) {
   return (stopWords.contains(t)) ? null : t;
 }
예제 #6
0
 /** Returns true is term t is a stopword */
 public boolean isStopword(final String t) {
   return stopWords.contains(t);
 }