/**
   * Returns a function that filters a document map based on the given include and exclude rules.
   *
   * @see #filter(Map, String[], String[]) for details
   */
  public static Function<Map<String, ?>, Map<String, Object>> filter(
      String[] includes, String[] excludes) {
    CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());

    CharacterRunAutomaton include;
    if (includes == null || includes.length == 0) {
      include = matchAllAutomaton;
    } else {
      Automaton includeA = Regex.simpleMatchToAutomaton(includes);
      includeA = makeMatchDotsInFieldNames(includeA);
      include = new CharacterRunAutomaton(includeA);
    }

    Automaton excludeA;
    if (excludes == null || excludes.length == 0) {
      excludeA = Automata.makeEmpty();
    } else {
      excludeA = Regex.simpleMatchToAutomaton(excludes);
      excludeA = makeMatchDotsInFieldNames(excludeA);
    }
    CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);

    // NOTE: We cannot use Operations.minus because of the special case that
    // we want all sub properties to match as soon as an object matches

    return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton);
  }
Пример #2
0
 /** Test a configuration that behaves a lot like KeepWordFilter */
 public void testKeep() throws Exception {
   CharacterRunAutomaton keepWords =
       new CharacterRunAutomaton(
           Operations.complement(
               Operations.union(
                   Arrays.asList(Automata.makeString("foo"), Automata.makeString("bar"))),
               DEFAULT_MAX_DETERMINIZED_STATES));
   Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords);
   assertAnalyzesTo(
       a,
       "quick foo brown bar bar fox foo",
       new String[] {"foo", "bar", "bar", "foo"},
       new int[] {2, 2, 1, 2});
 }
Пример #3
0
 private Automaton toAutomaton() {
   Automaton a = null;
   if (include != null) {
     a = include.toAutomaton();
   } else if (includeValues != null) {
     a = Automata.makeStringUnion(includeValues);
   } else {
     a = Automata.makeAnyString();
   }
   if (exclude != null) {
     a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
   } else if (excludeValues != null) {
     a =
         Operations.minus(
             a,
             Automata.makeStringUnion(excludeValues),
             Operations.DEFAULT_MAX_DETERMINIZED_STATES);
   }
   return a;
 }
Пример #4
0
  public static Automaton toAutomaton(
      BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {

    if (lowerTerm == null) {
      // makeBinaryInterval is more picky than we are:
      includeLower = true;
    }

    if (upperTerm == null) {
      // makeBinaryInterval is more picky than we are:
      includeUpper = true;
    }

    return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper);
  }
 /**
  * Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and
  * make sure that it doesn't whitelist the world.
  */
 static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) {
   if (whitelist.isEmpty()) {
     return new CharacterRunAutomaton(Automata.makeEmpty());
   }
   Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY));
   automaton =
       MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
   if (Operations.isTotal(automaton)) {
     throw new IllegalArgumentException(
         "Refusing to start because whitelist "
             + whitelist
             + " accepts all addresses. "
             + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs "
             + "for them.");
   }
   return new CharacterRunAutomaton(automaton);
 }
  @BeforeClass
  public static void beforeClass() throws Exception {
    Random random = random();
    directory = newDirectory();
    stopword = "" + randomChar();
    CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword));
    analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
    RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
    Document doc = new Document();
    Field id = new StringField("id", "", Field.Store.NO);
    Field field = new TextField("field", "", Field.Store.NO);
    doc.add(id);
    doc.add(field);

    // index some docs
    int numDocs = atLeast(1000);
    for (int i = 0; i < numDocs; i++) {
      id.setStringValue(Integer.toString(i));
      field.setStringValue(randomFieldContents());
      iw.addDocument(doc);
    }

    // delete some docs
    int numDeletes = numDocs / 20;
    for (int i = 0; i < numDeletes; i++) {
      Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
      if (random.nextBoolean()) {
        iw.deleteDocuments(toDelete);
      } else {
        iw.deleteDocuments(new TermQuery(toDelete));
      }
    }

    reader = iw.getReader();
    s1 = newSearcher(reader);
    s2 = newSearcher(reader);
    iw.close();
  }
  /**
   * Extracts all MultiTermQueries for {@code field}, and returns equivalent automata that will
   * match terms.
   */
  static CharacterRunAutomaton[] extractAutomata(Query query, String field) {
    List<CharacterRunAutomaton> list = new ArrayList<>();
    if (query instanceof BooleanQuery) {
      for (BooleanClause clause : (BooleanQuery) query) {
        if (!clause.isProhibited()) {
          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field)));
        }
      }
    } else if (query instanceof ConstantScoreQuery) {
      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field)));
    } else if (query instanceof DisjunctionMaxQuery) {
      for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanOrQuery) {
      for (Query sub : ((SpanOrQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNearQuery) {
      for (Query sub : ((SpanNearQuery) query).getClauses()) {
        list.addAll(Arrays.asList(extractAutomata(sub, field)));
      }
    } else if (query instanceof SpanNotQuery) {
      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field)));
    } else if (query instanceof SpanPositionCheckQuery) {
      list.addAll(
          Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
    } else if (query instanceof SpanMultiTermQueryWrapper) {
      list.addAll(
          Arrays.asList(
              extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
    } else if (query instanceof AutomatonQuery) {
      final AutomatonQuery aq = (AutomatonQuery) query;
      if (aq.getField().equals(field)) {
        list.add(
            new CharacterRunAutomaton(aq.getAutomaton()) {
              @Override
              public String toString() {
                return aq.toString();
              }
            });
      }
    } else if (query instanceof PrefixQuery) {
      final PrefixQuery pq = (PrefixQuery) query;
      Term prefix = pq.getPrefix();
      if (prefix.field().equals(field)) {
        list.add(
            new CharacterRunAutomaton(
                Operations.concatenate(
                    Automata.makeString(prefix.text()), Automata.makeAnyString())) {
              @Override
              public String toString() {
                return pq.toString();
              }
            });
      }
    } else if (query instanceof FuzzyQuery) {
      final FuzzyQuery fq = (FuzzyQuery) query;
      if (fq.getField().equals(field)) {
        String utf16 = fq.getTerm().text();
        int termText[] = new int[utf16.codePointCount(0, utf16.length())];
        for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
          termText[j++] = cp = utf16.codePointAt(i);
        }
        int termLength = termText.length;
        int prefixLength = Math.min(fq.getPrefixLength(), termLength);
        String suffix =
            UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
        LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
        String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
        Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
        list.add(
            new CharacterRunAutomaton(automaton) {
              @Override
              public String toString() {
                return fq.toString();
              }
            });
      }
    } else if (query instanceof TermRangeQuery) {
      final TermRangeQuery tq = (TermRangeQuery) query;
      if (tq.getField().equals(field)) {
        final CharsRef lowerBound;
        if (tq.getLowerTerm() == null) {
          lowerBound = null;
        } else {
          lowerBound = new CharsRef(tq.getLowerTerm().utf8ToString());
        }

        final CharsRef upperBound;
        if (tq.getUpperTerm() == null) {
          upperBound = null;
        } else {
          upperBound = new CharsRef(tq.getUpperTerm().utf8ToString());
        }

        final boolean includeLower = tq.includesLower();
        final boolean includeUpper = tq.includesUpper();
        final CharsRef scratch = new CharsRef();
        final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();

        // this is *not* an automaton, but it's very simple
        list.add(
            new CharacterRunAutomaton(Automata.makeEmpty()) {
              @Override
              public boolean run(char[] s, int offset, int length) {
                scratch.chars = s;
                scratch.offset = offset;
                scratch.length = length;

                if (lowerBound != null) {
                  int cmp = comparator.compare(scratch, lowerBound);
                  if (cmp < 0 || (!includeLower && cmp == 0)) {
                    return false;
                  }
                }

                if (upperBound != null) {
                  int cmp = comparator.compare(scratch, upperBound);
                  if (cmp > 0 || (!includeUpper && cmp == 0)) {
                    return false;
                  }
                }
                return true;
              }

              @Override
              public String toString() {
                return tq.toString();
              }
            });
      }
    }
    return list.toArray(new CharacterRunAutomaton[list.size()]);
  }
 /**
  * Make matches on objects also match dots in field names. For instance, if the original simple
  * regex is `foo`, this will translate it into `foo` OR `foo.*`.
  */
 private static Automaton makeMatchDotsInFieldNames(Automaton automaton) {
   return Operations.union(
       automaton,
       Operations.concatenate(
           Arrays.asList(automaton, Automata.makeChar('.'), Automata.makeAnyString())));
 }