Exemple #1
0
  /**
   * Algorithm of Tarjan for computing the strongly connected components of a graph.
   *
   * @param v current node
   * @throws QueryException if a variable directly calls itself
   */
  private void tarjan(final int v) throws QueryException {
    final int ixv = 2 * v, llv = ixv + 1, idx = next++;
    while (list.size() <= llv) list.add(-1);
    list.set(ixv, idx);
    list.set(llv, idx);

    stack.push(v);

    for (final int w : adjacentTo(v)) {
      final int ixw = 2 * w, llw = ixw + 1;
      if (list.size() <= ixw || list.get(ixw) < 0) {
        // Successor w has not yet been visited; recurse on it
        tarjan(w);
        list.set(llv, Math.min(list.get(llv), list.get(llw)));
      } else if (stack.contains(w)) {
        // Successor w is in stack S and hence in the current SCC
        list.set(llv, Math.min(list.get(llv), list.get(ixw)));
      }
    }

    // If v is a root node, pop the stack and generate an SCC
    if (list.get(llv) == list.get(ixv)) {
      int w;
      Scope[] out = null;
      do {
        w = stack.pop();
        final Scope scp = scopes.get(w);
        out = out == null ? new Scope[] {scp} : Array.add(out, scp);
      } while (w != v);
      result.add(out);
    }
  }
Exemple #2
0
  /**
   * Evaluates the full-text match.
   *
   * @param qc query context
   * @return number of tokens, used for scoring
   * @throws QueryException query exception
   */
  private int contains(final QueryContext qc) throws QueryException {
    first = true;
    final FTLexer lexer = ftt.lexer(qc.ftToken);

    // use faster evaluation for default options
    int num = 0;
    if (fast) {
      for (final byte[] t : tokens) {
        final FTTokens qtok = ftt.cache(t);
        num = Math.max(num, ftt.contains(qtok, lexer) * qtok.length());
      }
      return num;
    }

    // find and count all occurrences
    final boolean all = mode == FTMode.ALL || mode == FTMode.ALL_WORDS;
    int oc = 0;
    for (final byte[] w : unique(tokens(qc))) {
      final FTTokens qtok = ftt.cache(w);
      final int o = ftt.contains(qtok, lexer);
      if (all && o == 0) return 0;
      num = Math.max(num, o * qtok.length());
      oc += o;
    }

    // check if occurrences are in valid range. if yes, return number of tokens
    final long mn = occ != null ? toLong(occ[0], qc) : 1;
    final long mx = occ != null ? toLong(occ[1], qc) : Long.MAX_VALUE;
    if (mn == 0 && oc == 0) matches = FTNot.not(matches);
    return oc >= mn && oc <= mx ? Math.max(1, num) : 0;
  }
Exemple #3
0
  /**
   * Returns a pre value.
   *
   * @param id unique node id
   * @return pre value or -1 if id was not found
   */
  final int preold(final int id) {
    // find pre value in table
    for (int p = Math.max(0, id); p < meta.size; ++p) if (id == id(p)) return p;
    final int ps = Math.min(meta.size, id);
    for (int p = 0; p < ps; ++p) if (id == id(p)) return p;

    // id not found
    return -1;
  }
Exemple #4
0
  @Override
  public boolean indexAccessible(final IndexInfo ii) {
    /* If the following conditions yield true, the index is accessed:
     * - all query terms are statically available
     * - no FTTimes option is specified
     * - explicitly set case, diacritics and stemming match options do not
     *   conflict with index options. */
    data = ii.ic.data;
    final MetaData md = data.meta;
    final FTOpt fto = ftt.opt;

    /* Index will be applied if no explicit match options have been set
     * that conflict with the index options. As a consequence, though, index-
     * based querying might yield other results than sequential scanning. */
    if (occ != null
        || fto.cs != null && md.casesens == (fto.cs == FTCase.INSENSITIVE)
        || fto.isSet(DC) && md.diacritics != fto.is(DC)
        || fto.isSet(ST) && md.stemming != fto.is(ST)
        || fto.ln != null && !fto.ln.equals(md.language)) return false;

    // adopt database options to tokenizer
    fto.copy(md);

    // estimate costs if text is not known at compile time
    if (tokens == null) {
      ii.costs = Math.max(2, data.meta.size / 30);
      return true;
    }

    // summarize number of hits; break loop if no hits are expected
    final FTLexer ft = new FTLexer(fto);
    ii.costs = 0;
    for (byte[] t : tokens) {
      ft.init(t);
      while (ft.hasNext()) {
        final byte[] tok = ft.nextToken();
        if (fto.sw != null && fto.sw.contains(tok)) continue;

        if (fto.is(WC)) {
          // don't use index if one of the terms starts with a wildcard
          t = ft.get();
          if (t[0] == '.') return false;
          // don't use index if certain characters or more than 1 dot are found
          int d = 0;
          for (final byte w : t) {
            if (w == '{' || w == '\\' || w == '.' && ++d > 1) return false;
          }
        }
        // favor full-text index requests over exact queries
        final int costs = data.costs(ft);
        if (costs != 0) ii.costs += Math.max(2, costs / 100);
      }
    }
    return true;
  }
Exemple #5
0
  /**
   * Adds an attribute entry to the internal update buffer.
   *
   * @param pre pre value
   * @param dist parent distance
   * @param name attribute name
   * @param value attribute value
   * @param uri namespace uri reference
   * @param ne namespace flag
   */
  public final void attr(
      final int pre,
      final int dist,
      final int name,
      final byte[] value,
      final int uri,
      final boolean ne) {

    // add attribute to text storage
    final int i = newID();
    final long v = index(pre, i, value, ATTR);
    final int n = ne ? 1 << 7 : 0;
    s(Math.min(IO.MAXATTS, dist) << 3 | ATTR);
    s(n | (byte) (name >> 8));
    s(name);
    s(v >> 32);
    s(v >> 24);
    s(v >> 16);
    s(v >> 8);
    s(v);
    s(0);
    s(0);
    s(0);
    s(uri);
    s(i >> 24);
    s(i >> 16);
    s(i >> 8);
    s(i);
  }
Exemple #6
0
  /**
   * Adds an element entry to the internal update buffer.
   *
   * @param dist parent distance
   * @param name tag name index
   * @param asize number of attributes
   * @param size node size
   * @param uri namespace uri reference
   * @param ne namespace flag
   */
  public final void elem(
      final int dist,
      final int name,
      final int asize,
      final int size,
      final int uri,
      final boolean ne) {

    // build and insert new entry
    final int i = newID();
    final int n = ne ? 1 << 7 : 0;
    s(Math.min(IO.MAXATTS, asize) << 3 | ELEM);
    s(n | (byte) (name >> 8));
    s(name);
    s(uri);
    s(dist >> 24);
    s(dist >> 16);
    s(dist >> 8);
    s(dist);
    s(size >> 24);
    s(size >> 16);
    s(size >> 8);
    s(size);
    s(i >> 24);
    s(i >> 16);
    s(i >> 8);
    s(i);
  }
Exemple #7
0
 /**
  * Finds line and column for the specified query parser.
  *
  * @param parser parser
  */
 void pos(final InputParser parser) {
   markedCol = parser.mark;
   if (info != null) return;
   // check if line/column information has already been added
   parser.pos = Math.min(parser.mark, parser.length);
   info = new InputInfo(parser);
 }
Exemple #8
0
  /**
   * Performs a wildcard search for the specified token.
   *
   * @param token token to look for
   * @return iterator
   */
  private synchronized IndexIterator wc(final byte[] token) {
    final FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final FTWildcard wc = new FTWildcard(token);
    if (!wc.parse()) return it;

    final IntList pr = new IntList();
    final IntList ps = new IntList();
    final byte[] pref = wc.prefix();
    final int pl = pref.length, tl = tp.length;
    final int l = Math.min(tl - 1, wc.max());
    for (int ti = pl; ti <= l; ti++) {
      int i = tp[ti];
      if (i == -1) continue;
      int c = ti + 1;
      int e = -1;
      while (c < tl && e == -1) e = tp[c++];
      i = find(pref, i, e, ti);

      while (i < e) {
        final byte[] t = inY.readBytes(i, ti);
        if (!startsWith(t, pref)) break;
        if (wc.match(t)) {
          inZ.cursor(pointer(i, ti));
          final int s = size(i, ti);
          for (int d = 0; d < s; d++) {
            pr.add(inZ.readNum());
            ps.add(inZ.readNum());
          }
        }
        i += ti + ENTRY;
      }
    }
    return iter(new FTCache(pr, ps), token);
  }
Exemple #9
0
  /**
   * Performs a fuzzy search for the specified token with a maximum number of errors.
   *
   * @param token token to look for
   * @param k number of errors allowed
   * @return iterator
   */
  private synchronized IndexIterator fuzzy(final byte[] token, final int k) {
    FTIndexIterator it = FTIndexIterator.FTEMPTY;
    final int tokl = token.length, tl = tp.length;
    final int e = Math.min(tl - 1, tokl + k);
    int s = Math.max(1, tokl - k) - 1;

    while (++s <= e) {
      int p = tp[s];
      if (p == -1) continue;
      int t = s + 1, r = -1;
      while (t < tl && r == -1) r = tp[t++];
      while (p < r) {
        if (ls.similar(inY.readBytes(p, s), token, k)) {
          it = FTIndexIterator.union(iter(pointer(p, s), size(p, s), inZ, token), it);
        }
        p += s + ENTRY;
      }
    }
    return it;
  }
Exemple #10
0
  @Override
  public synchronized int costs(final IndexToken it) {
    final byte[] tok = it.get();
    if (tok.length > data.meta.maxlen) return Integer.MAX_VALUE;

    // estimate costs for queries which stretch over multiple index entries
    final FTOpt opt = ((FTLexer) it).ftOpt();
    if (opt.is(FZ) || opt.is(WC)) return Math.max(1, data.meta.size >> 4);

    return entry(tok).size;
  }
Exemple #11
0
 /**
  * Retrieves and returns the whole text and closes the stream.
  *
  * @return contents
  * @throws IOException I/O exception
  */
 public byte[] content() throws IOException {
   try {
     if (length > 0) {
       // input length is known in advance
       final int sl = (int) Math.min(Integer.MAX_VALUE, length);
       final byte[] bytes = new byte[sl];
       for (int c = 0; c < sl; c++) bytes[c] = (byte) next();
       return bytes;
     }
     // parse until end of stream
     final ByteList bl = new ByteList();
     for (int ch; (ch = next()) != -1; ) bl.add(ch);
     return bl.toArray();
   } finally {
     close();
   }
 }
Exemple #12
0
  /**
   * Inserts a data instance at the specified pre value. Note that the specified data instance must
   * differ from this instance.
   *
   * @param ipre value at which to insert new data
   * @param ipar parent pre value of node
   * @param clip data clip
   */
  public final void insert(final int ipre, final int ipar, final DataClip clip) {
    meta.update();

    // update value and document indexes
    if (meta.updindex) indexBegin();
    resources.insert(ipre, clip);

    final int dsize = clip.size();
    final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER);
    // resize buffer to cache more entries
    buffer(buf);

    // find all namespaces in scope to avoid duplicate declarations
    final TokenMap nsScope = nspaces.scope(ipar, this);

    // loop through all entries
    final IntList preStack = new IntList();
    final NSNode nsRoot = nspaces.current();
    final HashSet<NSNode> newNodes = new HashSet<NSNode>();
    final IntList flagPres = new IntList();

    // indicates if database only contains a dummy node
    final Data data = clip.data;
    int c = 0;
    for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) {
      if (c != 0 && c % buf == 0) insert(ipre + c - buf);

      final int pre = ipre + c;
      final int dkind = data.kind(dpre);
      final int dpar = data.parent(dpre, dkind);
      // ipar < 0 if document nodes on top level are added
      final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0;
      final int par = dis == 0 ? -1 : pre - dis;

      if (c == 0) nspaces.root(par, this);

      while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop());

      switch (dkind) {
        case DOC:
          // add document
          nspaces.prepare();
          final int s = data.size(dpre, dkind);
          doc(pre, s, data.text(dpre, true));
          meta.ndocs++;
          preStack.push(pre);
          break;
        case ELEM:
          // add element
          nspaces.prepare();
          boolean ne = false;
          if (data.nsFlag(dpre)) {
            final Atts at = data.ns(dpre);
            for (int a = 0; a < at.size(); ++a) {
              // see if prefix has been declared/ is part of current ns scope
              final byte[] old = nsScope.get(at.name(a));
              if (old == null || !eq(old, at.value(a))) {
                // we have to keep track of all new NSNodes that are added
                // to the Namespace structure, as their pre values must not
                // be updated. I.e. if an NSNode N with pre value 3 existed
                // prior to inserting and two new nodes are inserted at
                // location pre == 3 we have to make sure N and only N gets
                // updated.
                newNodes.add(nspaces.add(at.name(a), at.value(a), pre));
                ne = true;
              }
            }
          }
          byte[] nm = data.name(dpre, dkind);
          elem(
              dis,
              tagindex.index(nm, null, false),
              data.attSize(dpre, dkind),
              data.size(dpre, dkind),
              nspaces.uri(nm, true),
              ne);
          preStack.push(pre);
          break;
        case TEXT:
        case COMM:
        case PI:
          // add text
          text(pre, dis, data.text(dpre, true), dkind);
          break;
        case ATTR:
          // add attribute
          nm = data.name(dpre, dkind);
          // check if prefix already in nsScope or not
          final byte[] attPref = prefix(nm);
          // check if prefix of attribute has already been declared, otherwise
          // add declaration to parent node
          if (data.nsFlag(dpre) && nsScope.get(attPref) == null) {
            nspaces.add(
                par,
                preStack.isEmpty() ? -1 : preStack.peek(),
                attPref,
                data.nspaces.uri(data.uri(dpre, dkind)),
                this);
            // save pre value to set ns flag later for this node. can't be done
            // here as direct table access would interfere with the buffer
            flagPres.add(par);
          }
          attr(
              pre,
              dis,
              atnindex.index(nm, null, false),
              data.text(dpre, false),
              nspaces.uri(nm, false),
              false);
          break;
      }
    }
    // finalize and update namespace structure
    while (!preStack.isEmpty()) nspaces.close(preStack.pop());
    nspaces.root(nsRoot);

    if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf);
    // reset buffer to old size
    buffer(1);

    // set ns flags
    for (int f = 0; f < flagPres.size(); f++) {
      final int fl = flagPres.get(f);
      table.write2(fl, 1, name(fl) | 1 << 15);
    }

    // increase size of ancestors
    int p = ipar;
    while (p >= 0) {
      final int k = kind(p);
      size(p, k, size(p, k) + dsize);
      p = parent(p, k);
    }

    if (meta.updindex) {
      // add the entries to the ID -> PRE mapping:
      idmap.insert(ipre, id(ipre), dsize);
      indexEnd();
    }

    if (!cache) updateDist(ipre + dsize, dsize);

    // propagate PRE value shifts to namespaces
    if (ipar != -1) nspaces.insert(ipre, dsize, newNodes);
  }
Exemple #13
0
 /**
  * Prints the current stack trace to System.err.
  *
  * @param i number of steps to print
  */
 public static void stack(final int i) {
   errln("You're here:");
   final String[] stack = toArray(new Throwable());
   final int l = Math.min(Math.max(2, i + 2), stack.length);
   for (int s = 2; s < l; ++s) errln(stack[s]);
 }
Exemple #14
0
  /**
   * Formats the specified number and returns a string representation.
   *
   * @param item item
   * @param pics pictures
   * @param ii input info
   * @return picture variables
   * @throws QueryException query exception
   */
  private byte[] format(final ANum item, final Picture[] pics, final InputInfo ii)
      throws QueryException {

    // Rule 1: return results for NaN
    final double d = item.dbl(ii);
    if (Double.isNaN(d)) return nan;

    // Rule 2: check if value if negative (smaller than zero or -0)
    final boolean neg = d < 0 || d == 0 && Double.doubleToLongBits(d) == Long.MIN_VALUE;
    final Picture pic = pics[neg && pics.length == 2 ? 1 : 0];
    final IntList res = new IntList(), intgr = new IntList(), fract = new IntList();
    int exp = 0;

    // Rule 3: percent/permille
    ANum num = item;
    if (pic.pc) num = (ANum) Calc.MULT.ev(num, Int.get(100), ii);
    if (pic.pm) num = (ANum) Calc.MULT.ev(num, Int.get(1000), ii);

    if (Double.isInfinite(num.dbl(ii))) {
      // Rule 4: infinity
      intgr.add(new TokenParser(inf).toArray());
    } else {
      // Rule 5: exponent
      if (pic.minExp != 0 && d != 0) {
        BigDecimal dec = num.dec(ii).abs().stripTrailingZeros();
        int scl = 0;
        if (dec.compareTo(BigDecimal.ONE) >= 0) {
          scl = dec.setScale(0, RoundingMode.HALF_DOWN).precision();
        } else {
          while (dec.compareTo(BigDecimal.ONE) < 0) {
            dec = dec.multiply(BigDecimal.TEN);
            scl--;
          }
          scl++;
        }
        exp = scl - pic.min[0];
        if (exp != 0) {
          final BigDecimal n = BigDecimal.TEN.pow(Math.abs(exp));
          num = (ANum) Calc.MULT.ev(num, Dec.get(exp > 0 ? BigDecimal.ONE.divide(n) : n), ii);
        }
      }
      num = num.round(pic.maxFrac, true).abs();

      // convert positive number to string
      final String s =
          (num instanceof Dbl || num instanceof Flt
                  ? Dec.get(BigDecimal.valueOf(num.dbl(ii)))
                  : num)
              .toString();

      // integer/fractional separator
      final int sep = s.indexOf('.');

      // create integer part
      final int sl = s.length();
      final int il = sep == -1 ? sl : sep;
      for (int i = il; i < pic.min[0]; ++i) intgr.add(zero);
      // fractional number: skip leading 0
      if (!s.startsWith("0.") || pic.min[0] > 0) {
        for (int i = 0; i < il; i++) intgr.add(zero + s.charAt(i) - '0');
      }

      // squeeze in grouping separators
      if (pic.group[0].length == 1 && pic.group[0][0] > 0) {
        // regular pattern with repeating separators
        for (int p = intgr.size() - (neg ? 2 : 1); p > 0; --p) {
          if (p % pic.group[0][0] == 0) intgr.insert(intgr.size() - p, grouping);
        }
      } else {
        // irregular pattern, or no separators at all
        final int gl = pic.group[0].length;
        for (int g = 0; g < gl; ++g) {
          final int pos = intgr.size() - pic.group[0][g];
          if (pos > 0) intgr.insert(pos, grouping);
        }
      }

      // create fractional part
      final int fl = sep == -1 ? 0 : sl - il - 1;
      if (fl != 0) for (int i = sep + 1; i < sl; i++) fract.add(zero + s.charAt(i) - '0');
      for (int i = fl; i < pic.min[1]; ++i) fract.add(zero);

      // squeeze in grouping separators in a reverse manner
      final int ul = fract.size();
      for (int p = pic.group[1].length - 1; p >= 0; p--) {
        final int pos = pic.group[1][p];
        if (pos < ul) fract.insert(pos, grouping);
      }
    }

    // add minus sign
    if (neg && pics.length != 2) res.add(minus);
    // add prefix and integer part
    res.add(pic.prefSuf[0].toArray()).add(intgr.finish());
    // add fractional part
    if (!fract.isEmpty()) res.add(decimal).add(fract.finish());
    // add exponent
    if (pic.minExp != 0) {
      res.add(exponent);
      if (exp < 0) res.add(minus);
      final String s = Integer.toString(Math.abs(exp));
      final int sl = s.length();
      for (int i = sl; i < pic.minExp; i++) res.add(zero);
      for (int i = 0; i < sl; i++) res.add(zero + s.charAt(i) - '0');
    }
    // add suffix
    res.add(pic.prefSuf[1].toArray());
    return new TokenBuilder(res.finish()).finish();
  }
Exemple #15
0
  /**
   * Analyzes the specified patterns.
   *
   * @param patterns patterns
   * @return picture variables
   */
  private Picture[] analyze(final byte[][] patterns) {
    // pictures
    final int picL = patterns.length;
    final Picture[] pics = new Picture[picL];

    // analyze patterns
    for (int p = 0; p < picL; p++) {
      final byte[] pt = patterns[p];
      final Picture pic = new Picture();

      // position (integer/fractional)
      int pos = 0;
      // active character found
      boolean act = false;
      // number of characters after exponent
      int exp = -1;
      // number of optional characters
      final int[] opt = new int[2];

      // loop through all characters
      final int pl = pt.length;
      for (int i = 0, cl; i < pl; i += cl) {
        final int ch = ch(pt, i);
        cl = cl(pt, i);
        boolean active = contains(actives, ch);

        if (ch == decimal) {
          ++pos;
          act = false;
        } else if (ch == optional) {
          opt[pos]++;
        } else if (ch == exponent) {
          if (act && containsActive(pt, i + cl)) {
            exp = 0;
          } else {
            active = false;
          }
        } else if (ch == grouping) {
          if (pos == 0) pic.group[pos] = Array.add(pic.group[pos], pic.min[pos] + opt[pos]);
        } else if (contains(digits, ch)) {
          if (exp == -1) pic.min[pos]++;
          else exp++;
        }

        if (active) {
          act = true;
        } else {
          // passive characters
          pic.pc |= ch == percent;
          pic.pm |= ch == permille;
          // prefixes/suffixes
          pic.prefSuf[pos == 0 && act ? pos + 1 : pos].add(ch);
        }
      }
      // finalize integer-part-grouping-positions
      final int[] igp = pic.group[0];
      final int igl = igp.length;
      for (int g = 0; g < igl; ++g) igp[g] = pic.min[0] + opt[0] - igp[g];

      // check if integer-part-grouping-positions are regular
      // if yes, they are replaced with a single position
      if (igl > 1) {
        boolean reg = true;
        final int i = igp[igl - 1];
        for (int g = igl - 2; g >= 0; --g) reg &= i * igl == igp[g];
        if (reg) pic.group[0] = new int[] {i};
      }

      pic.maxFrac = pic.min[1] + opt[1];
      pic.minExp = Math.max(0, exp);
      pics[p] = pic;
    }
    return pics;
  }