/**
  * Returns an automaton that accepts the intersection of the languages of the given automata.
  * Never modifies the input automata languages.
  *
  * <p>Complexity: quadratic in number of states.
  */
 public static Automaton intersection(Automaton a1, Automaton a2) {
   if (a1.isSingleton()) {
     if (BasicOperations.run(a2, a1.singleton)) return a1.cloneIfRequired();
     else return BasicAutomata.makeEmpty();
   }
   if (a2.isSingleton()) {
     if (BasicOperations.run(a1, a2.singleton)) return a2.cloneIfRequired();
     else return BasicAutomata.makeEmpty();
   }
   if (a1 == a2) return a1.cloneIfRequired();
   Transition[][] transitions1 = a1.getSortedTransitions();
   Transition[][] transitions2 = a2.getSortedTransitions();
   Automaton c = new Automaton();
   LinkedList<StatePair> worklist = new LinkedList<StatePair>();
   HashMap<StatePair, StatePair> newstates = new HashMap<StatePair, StatePair>();
   StatePair p = new StatePair(c.initial, a1.initial, a2.initial);
   worklist.add(p);
   newstates.put(p, p);
   while (worklist.size() > 0) {
     p = worklist.removeFirst();
     p.s.accept = p.s1.accept && p.s2.accept;
     Transition[] t1 = transitions1[p.s1.number];
     Transition[] t2 = transitions2[p.s2.number];
     for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
       while (b2 < t2.length && t2[b2].max < t1[n1].min) b2++;
       for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++)
         if (t2[n2].max >= t1[n1].min) {
           StatePair q = new StatePair(t1[n1].to, t2[n2].to);
           StatePair r = newstates.get(q);
           if (r == null) {
             q.s = new State();
             worklist.add(q);
             newstates.put(q, q);
             r = q;
           }
           int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
           int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
           p.s.addTransition(new Transition(min, max, r.s));
         }
     }
   }
   c.deterministic = a1.deterministic && a2.deterministic;
   c.removeDeadTransitions();
   c.checkMinimizeAlways();
   return c;
 }
 /**
  * Returns a (deterministic) automaton that accepts the intersection of the language of <code>a1
  * </code> and the complement of the language of <code>a2</code>. As a side-effect, the automata
  * may be determinized, if not already deterministic.
  *
  * <p>Complexity: quadratic in number of states (if already deterministic).
  */
 public static Automaton minus(Automaton a1, Automaton a2) {
   if (BasicOperations.isEmpty(a1) || a1 == a2) return BasicAutomata.makeEmpty();
   if (BasicOperations.isEmpty(a2)) return a1.cloneIfRequired();
   if (a1.isSingleton()) {
     if (BasicOperations.run(a2, a1.singleton)) return BasicAutomata.makeEmpty();
     else return a1.cloneIfRequired();
   }
   return intersection(a1, a2.complement());
 }
  /**
   * Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>.
   * As a side-effect, <code>a2</code> is determinized if not already marked as deterministic.
   *
   * <p>Complexity: quadratic in number of states.
   */
  public static boolean subsetOf(Automaton a1, Automaton a2) {
    if (a1 == a2) return true;
    if (a1.isSingleton()) {
      if (a2.isSingleton()) return a1.singleton.equals(a2.singleton);
      return BasicOperations.run(a2, a1.singleton);
    }
    a2.determinize();
    Transition[][] transitions1 = a1.getSortedTransitions();
    Transition[][] transitions2 = a2.getSortedTransitions();
    LinkedList<StatePair> worklist = new LinkedList<StatePair>();
    HashSet<StatePair> visited = new HashSet<StatePair>();
    StatePair p = new StatePair(a1.initial, a2.initial);
    worklist.add(p);
    visited.add(p);
    while (worklist.size() > 0) {
      p = worklist.removeFirst();
      if (p.s1.accept && !p.s2.accept) {
        return false;
      }
      Transition[] t1 = transitions1[p.s1.number];
      Transition[] t2 = transitions2[p.s2.number];
      for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
        while (b2 < t2.length && t2[b2].max < t1[n1].min) b2++;
        int min1 = t1[n1].min, max1 = t1[n1].max;

        for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
          if (t2[n2].min > min1) {
            return false;
          }
          if (t2[n2].max < Character.MAX_CODE_POINT) min1 = t2[n2].max + 1;
          else {
            min1 = Character.MAX_CODE_POINT;
            max1 = Character.MIN_CODE_POINT;
          }
          StatePair q = new StatePair(t1[n1].to, t2[n2].to);
          if (!visited.contains(q)) {
            worklist.add(q);
            visited.add(q);
          }
        }
        if (min1 <= max1) {
          return false;
        }
      }
    }
    return true;
  }
Пример #4
0
  /** tests a pre-intersected automaton against the original */
  public void testFiniteVersusInfinite() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      final List<BytesRef> matchedTerms = new ArrayList<BytesRef>();
      for (BytesRef t : terms) {
        if (BasicOperations.run(automaton, t.utf8ToString())) {
          matchedTerms.add(t);
        }
      }

      Automaton alternate = BasicAutomata.makeStringUnion(matchedTerms);
      // System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + "
      // states, sigma=" + alternate.getStartPoints().length);
      // AutomatonTestUtil.minimizeSimple(alternate);
      // System.out.println("minmize done");
      AutomatonQuery a1 = new AutomatonQuery(new Term("field", ""), automaton);
      AutomatonQuery a2 = new AutomatonQuery(new Term("field", ""), alternate);
      CheckHits.checkEqual(
          a1, searcher.search(a1, 25).scoreDocs, searcher.search(a2, 25).scoreDocs);
    }
  }
Пример #5
0
  /** seeks to every term accepted by some automata */
  public void testSeeking() throws Exception {
    for (int i = 0; i < numIterations; i++) {
      String reg = AutomatonTestUtil.randomRegexp(random());
      Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
      TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null);
      ArrayList<BytesRef> unsortedTerms = new ArrayList<BytesRef>(terms);
      Collections.shuffle(unsortedTerms, random());

      for (BytesRef term : unsortedTerms) {
        if (BasicOperations.run(automaton, term.utf8ToString())) {
          // term is accepted
          if (random().nextBoolean()) {
            // seek exact
            assertTrue(te.seekExact(term, random().nextBoolean()));
          } else {
            // seek ceil
            assertEquals(SeekStatus.FOUND, te.seekCeil(term, random().nextBoolean()));
            assertEquals(term, te.term());
          }
        }
      }
    }
  }