Beispiel #1
0
  /**
   * Evaluates the analyze-string function.
   *
   * @param val input value
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Item analyzeString(final byte[] val, final QueryContext ctx) throws QueryException {

    final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx);
    if (p.matcher("").matches()) REGROUP.thrw(info);
    final String str = string(val);
    final Matcher m = p.matcher(str);

    final FElem root = new FElem(Q_ANALYZE, new Atts(FN, FNURI));
    int s = 0;
    while (m.find()) {
      if (s != m.start()) nonmatch(str.substring(s, m.start()), root);
      match(m, str, root, 0);
      s = m.end();
    }
    if (s != str.length()) nonmatch(str.substring(s), root);
    return root;
  }
Beispiel #2
0
  /**
   * Evaluates the tokenize function.
   *
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Value tokenize(final QueryContext ctx) throws QueryException {
    final byte[] val = checkEStr(expr[0], ctx);
    final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx);
    if (p.matcher("").matches()) REGROUP.thrw(info);

    final TokenList tl = new TokenList();
    final String str = string(val);
    if (!str.isEmpty()) {
      final Matcher m = p.matcher(str);
      int s = 0;
      while (m.find()) {
        tl.add(str.substring(s, m.start()));
        s = m.end();
      }
      tl.add(str.substring(s, str.length()));
    }
    return StrSeq.get(tl);
  }
Beispiel #3
0
  /**
   * Evaluates the replace function.
   *
   * @param val input value
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Item replace(final byte[] val, final QueryContext ctx) throws QueryException {
    final byte[] rep = checkStr(expr[2], ctx);
    for (int i = 0; i < rep.length; ++i) {
      if (rep[i] == '\\') {
        if (i + 1 == rep.length || rep[i + 1] != '\\' && rep[i + 1] != '$') FUNREPBS.thrw(info);
        ++i;
      }
      if (rep[i] == '$'
          && (i == 0 || rep[i - 1] != '\\')
          && (i + 1 == rep.length || !digit(rep[i + 1]))) FUNREPDOL.thrw(info);
    }

    final Pattern p = pattern(expr[1], expr.length == 4 ? expr[3] : null, ctx);
    if (p.pattern().isEmpty()) REGROUP.thrw(info);

    String r = string(rep);
    if ((p.flags() & Pattern.LITERAL) != 0) {
      r = SLASH.matcher(BSLASH.matcher(r).replaceAll("\\\\\\\\")).replaceAll("\\\\\\$");
    }

    try {
      return Str.get(p.matcher(string(val)).replaceAll(r));
    } catch (final Exception ex) {
      if (ex.getMessage().contains("No group")) REGROUP.thrw(info);
      throw REGPAT.thrw(info, ex);
    }
  }
Beispiel #4
0
/**
 * String pattern functions.
 *
 * @author BaseX Team 2005-12, BSD License
 * @author Christian Gruen
 */
public final class FNPat extends StandardFunc {
  /** Pattern cache. */
  private final TokenObjMap<Pattern> patterns = new TokenObjMap<Pattern>();

  /** Slash pattern. */
  private static final Pattern SLASH = Pattern.compile("\\$");
  /** Slash pattern. */
  private static final Pattern BSLASH = Pattern.compile("\\\\");

  /** Root element for the analyze-string-result function. */
  private static final QNm Q_ANALYZE = new QNm("fn:analyze-string-result", FNURI);
  /** Element for the analyze-string-result function. */
  private static final QNm Q_MATCH = new QNm("fn:match", FNURI);
  /** Element for the analyze-string-result function. */
  private static final QNm Q_NONMATCH = new QNm("fn:non-match", FNURI);
  /** Element for the analyze-string-result function. */
  private static final QNm Q_MGROUP = new QNm("fn:group", FNURI);
  /** Attribute for the analyze-string-result function. */
  private static final QNm Q_NR = new QNm("nr");

  /**
   * Constructor.
   *
   * @param ii input info
   * @param f function definition
   * @param e arguments
   */
  public FNPat(final InputInfo ii, final Function f, final Expr... e) {
    super(ii, f, e);
  }

  @Override
  public Iter iter(final QueryContext ctx) throws QueryException {
    switch (sig) {
      case TOKENIZE:
        return tokenize(ctx).iter();
      default:
        return super.iter(ctx);
    }
  }

  @Override
  public Value value(final QueryContext ctx) throws QueryException {
    switch (sig) {
      case TOKENIZE:
        return tokenize(ctx);
      default:
        return super.value(ctx);
    }
  }

  @Override
  public Item item(final QueryContext ctx, final InputInfo ii) throws QueryException {
    switch (sig) {
      case MATCHES:
        return matches(checkEStr(expr[0], ctx), ctx);
      case REPLACE:
        return replace(checkEStr(expr[0], ctx), ctx);
      case ANALYZE_STRING:
        return analyzeString(checkEStr(expr[0], ctx), ctx);
      default:
        return super.item(ctx, ii);
    }
  }

  /**
   * Evaluates the match function.
   *
   * @param val input value
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Item matches(final byte[] val, final QueryContext ctx) throws QueryException {
    final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx);
    return Bln.get(p.matcher(string(val)).find());
  }

  /**
   * Evaluates the analyze-string function.
   *
   * @param val input value
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Item analyzeString(final byte[] val, final QueryContext ctx) throws QueryException {

    final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx);
    if (p.matcher("").matches()) REGROUP.thrw(info);
    final String str = string(val);
    final Matcher m = p.matcher(str);

    final FElem root = new FElem(Q_ANALYZE, new Atts(FN, FNURI));
    int s = 0;
    while (m.find()) {
      if (s != m.start()) nonmatch(str.substring(s, m.start()), root);
      match(m, str, root, 0);
      s = m.end();
    }
    if (s != str.length()) nonmatch(str.substring(s), root);
    return root;
  }

  /**
   * Processes a match.
   *
   * @param m matcher
   * @param str string
   * @param par parent
   * @param g group number
   * @return next group number and position in string
   */
  private static int[] match(final Matcher m, final String str, final FElem par, final int g) {

    final FElem nd = new FElem(g == 0 ? Q_MATCH : Q_MGROUP, new Atts(FN, FNURI));
    if (g > 0) nd.add(Q_NR, token(g));

    final int start = m.start(g), end = m.end(g), gc = m.groupCount();
    int[] pos = {g + 1, start}; // group and position in string
    while (pos[0] <= gc && m.end(pos[0]) <= end) {
      final int st = m.start(pos[0]);
      if (st >= 0) { // group matched
        if (pos[1] < st) nd.add(str.substring(pos[1], st));
        pos = match(m, str, nd, pos[0]);
      } else pos[0]++; // skip it
    }
    if (pos[1] < end) {
      nd.add(str.substring(pos[1], end));
      pos[1] = end;
    }
    par.add(nd);
    return pos;
  }

  /**
   * Processes a non-match.
   *
   * @param text text
   * @param par root node
   */
  private static void nonmatch(final String text, final FElem par) {
    par.add(new FElem(Q_NONMATCH, new Atts(FN, FNURI)).add(text));
  }

  /**
   * Evaluates the replace function.
   *
   * @param val input value
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Item replace(final byte[] val, final QueryContext ctx) throws QueryException {
    final byte[] rep = checkStr(expr[2], ctx);
    for (int i = 0; i < rep.length; ++i) {
      if (rep[i] == '\\') {
        if (i + 1 == rep.length || rep[i + 1] != '\\' && rep[i + 1] != '$') FUNREPBS.thrw(info);
        ++i;
      }
      if (rep[i] == '$'
          && (i == 0 || rep[i - 1] != '\\')
          && (i + 1 == rep.length || !digit(rep[i + 1]))) FUNREPDOL.thrw(info);
    }

    final Pattern p = pattern(expr[1], expr.length == 4 ? expr[3] : null, ctx);
    if (p.pattern().isEmpty()) REGROUP.thrw(info);

    String r = string(rep);
    if ((p.flags() & Pattern.LITERAL) != 0) {
      r = SLASH.matcher(BSLASH.matcher(r).replaceAll("\\\\\\\\")).replaceAll("\\\\\\$");
    }

    try {
      return Str.get(p.matcher(string(val)).replaceAll(r));
    } catch (final Exception ex) {
      if (ex.getMessage().contains("No group")) REGROUP.thrw(info);
      throw REGPAT.thrw(info, ex);
    }
  }

  /**
   * Evaluates the tokenize function.
   *
   * @param ctx query context
   * @return function result
   * @throws QueryException query exception
   */
  private Value tokenize(final QueryContext ctx) throws QueryException {
    final byte[] val = checkEStr(expr[0], ctx);
    final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx);
    if (p.matcher("").matches()) REGROUP.thrw(info);

    final TokenList tl = new TokenList();
    final String str = string(val);
    if (!str.isEmpty()) {
      final Matcher m = p.matcher(str);
      int s = 0;
      while (m.find()) {
        tl.add(str.substring(s, m.start()));
        s = m.end();
      }
      tl.add(str.substring(s, str.length()));
    }
    return StrSeq.get(tl);
  }

  /**
   * Returns a regular expression pattern.
   *
   * @param pattern input pattern
   * @param modifier modifier item
   * @param ctx query context
   * @return pattern modifier
   * @throws QueryException query exception
   */
  private Pattern pattern(final Expr pattern, final Expr modifier, final QueryContext ctx)
      throws QueryException {

    final byte[] pat = checkStr(pattern, ctx);
    final byte[] mod = modifier != null ? checkStr(modifier, ctx) : null;
    final TokenBuilder tb = new TokenBuilder(pat);
    if (mod != null) tb.add(0).add(mod);
    final byte[] key = tb.finish();
    Pattern p = patterns.get(key);
    if (p == null) {
      p = RegExParser.parse(pat, mod, ctx.sc.xquery3(), info);
      patterns.add(key, p);
    }
    return p;
  }

  @Override
  public boolean xquery3() {
    return sig == ANALYZE_STRING;
  }

  @Override
  public boolean uses(final Use u) {
    return u == Use.X30 && xquery3() || u == Use.CNS && sig == ANALYZE_STRING || super.uses(u);
  }
}
Beispiel #5
0
 /**
  * Evaluates the match function.
  *
  * @param val input value
  * @param ctx query context
  * @return function result
  * @throws QueryException query exception
  */
 private Item matches(final byte[] val, final QueryContext ctx) throws QueryException {
   final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx);
   return Bln.get(p.matcher(string(val)).find());
 }