/** * Evaluates the analyze-string function. * * @param val input value * @param ctx query context * @return function result * @throws QueryException query exception */ private Item analyzeString(final byte[] val, final QueryContext ctx) throws QueryException { final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx); if (p.matcher("").matches()) REGROUP.thrw(info); final String str = string(val); final Matcher m = p.matcher(str); final FElem root = new FElem(Q_ANALYZE, new Atts(FN, FNURI)); int s = 0; while (m.find()) { if (s != m.start()) nonmatch(str.substring(s, m.start()), root); match(m, str, root, 0); s = m.end(); } if (s != str.length()) nonmatch(str.substring(s), root); return root; }
/** * Processes a match. * * @param m matcher * @param str string * @param par parent * @param g group number * @return next group number and position in string */ private static int[] match(final Matcher m, final String str, final FElem par, final int g) { final FElem nd = new FElem(g == 0 ? Q_MATCH : Q_MGROUP, new Atts(FN, FNURI)); if (g > 0) nd.add(Q_NR, token(g)); final int start = m.start(g), end = m.end(g), gc = m.groupCount(); int[] pos = {g + 1, start}; // group and position in string while (pos[0] <= gc && m.end(pos[0]) <= end) { final int st = m.start(pos[0]); if (st >= 0) { // group matched if (pos[1] < st) nd.add(str.substring(pos[1], st)); pos = match(m, str, nd, pos[0]); } else pos[0]++; // skip it } if (pos[1] < end) { nd.add(str.substring(pos[1], end)); pos[1] = end; } par.add(nd); return pos; }
/** * Evaluates the tokenize function. * * @param ctx query context * @return function result * @throws QueryException query exception */ private Value tokenize(final QueryContext ctx) throws QueryException { final byte[] val = checkEStr(expr[0], ctx); final Pattern p = pattern(expr[1], expr.length == 3 ? expr[2] : null, ctx); if (p.matcher("").matches()) REGROUP.thrw(info); final TokenList tl = new TokenList(); final String str = string(val); if (!str.isEmpty()) { final Matcher m = p.matcher(str); int s = 0; while (m.find()) { tl.add(str.substring(s, m.start())); s = m.end(); } tl.add(str.substring(s, str.length())); } return StrSeq.get(tl); }