/** * Returns pre values. * * @param ids unique node ids * @param off start offset * @param len number of ids * @return sorted pre values */ public final int[] pre(final int[] ids, final int off, final int len) { if (meta.updindex) return idmap.pre(ids, off, len); final IntList p = new IntList(ids.length); for (int i = off; i < len; ++i) p.add(preold(ids[i])); return p.sort().toArray(); }
@Override protected long index(final int pre, final int id, final byte[] value, final int kind) { final DataAccess store; final TokenObjMap<IntList> map; if (kind == ATTR) { store = values; map = meta.attrindex ? atvBuffer : null; } else { store = texts; // don't index document names map = meta.textindex && kind != DOC ? txtBuffer : null; } // add text to map to index later if (meta.updindex && map != null && value.length <= meta.maxlen) { IntList ids = map.get(value); if (ids == null) { ids = new IntList(1); map.put(value, ids); } ids.add(id); } // add text to text file // inline integer value... final long v = toSimpleInt(value); if (v != Integer.MIN_VALUE) return v | IO.OFFNUM; // store text final long off = store.length(); final byte[] val = COMP.get().pack(value); store.writeToken(off, val); return val == value ? off : off | IO.OFFCOMP; }
/** * Computes the Soundex value for the specified codepoints. * * @param cps codepoint array * @param mapping mapping for the 26 ASCII letters * @return Soundex value * @throws QueryException if Soundex mapping is shorter or longer than 26 characters */ public static int[] encode(final int[] cps, final int[] mapping) throws QueryException { // check length of character mappings if (mapping.length != 26) throw new QueryException("Soundex mapping must have 26 characters"); // normalize input to ascii characters (ignore all others) final IntList tmp = new IntList(cps.length); for (final int cp : cps) { final int c = uc(cp); if (c >= 'A' && c <= 'Z') tmp.add(c); } final int[] out = {'0', '0', '0', '0'}, in = tmp.finish(); final int is = in.length; if (is > 0) { out[0] = in[0]; for (int op = 1, ip = 0, lastCode = map(in, ip++, mapping); ip < is && op < 4; ) { final int code = map(in, ip++, mapping); if (code != 0) { if (code != '0' && code != lastCode) out[op++] = code; lastCode = code; } } } return out; }
/** * Performs a wildcard search for the specified token. * * @param token token to look for * @return iterator */ private synchronized IndexIterator wc(final byte[] token) { final FTIndexIterator it = FTIndexIterator.FTEMPTY; final FTWildcard wc = new FTWildcard(token); if (!wc.parse()) return it; final IntList pr = new IntList(); final IntList ps = new IntList(); final byte[] pref = wc.prefix(); final int pl = pref.length, tl = tp.length; final int l = Math.min(tl - 1, wc.max()); for (int ti = pl; ti <= l; ti++) { int i = tp[ti]; if (i == -1) continue; int c = ti + 1; int e = -1; while (c < tl && e == -1) e = tp[c++]; i = find(pref, i, e, ti); while (i < e) { final byte[] t = inY.readBytes(i, ti); if (!startsWith(t, pref)) break; if (wc.match(t)) { inZ.cursor(pointer(i, ti)); final int s = size(i, ti); for (int d = 0; d < s; d++) { pr.add(inZ.readNum()); ps.add(inZ.readNum()); } } i += ti + ENTRY; } } return iter(new FTCache(pr, ps), token); }
@Override protected void indexDelete(final int pre, final int size) { final boolean textI = meta.textindex, attrI = meta.attrindex; if (textI || attrI) { // collect all keys and ids indexBegin(); final int l = pre + size; for (int p = pre; p < l; ++p) { final int k = kind(p); // consider nodes which are attribute, text, comment, or proc. instruction final boolean text = k == TEXT || k == COMM || k == PI; if (textI && text || attrI && k == ATTR) { final byte[] key = text(p, text); if (key.length <= meta.maxlen) { final TokenObjMap<IntList> m = text ? txtBuffer : atvBuffer; IntList ids = m.get(key); if (ids == null) { ids = new IntList(1); m.put(key, ids); } ids.add(id(p)); } } } indexDelete(); } }
@Override public IndexIterator iter(final IndexToken token) { final int id = values.id(token.get()); if (id == 0) return IndexIterator.EMPTY; final int len = lenList.get(id); final int[] ids = idsList.get(id), pres; if (data.meta.updindex) { final IntList tmp = new IntList(); for (int i = 0; i < len; ++i) tmp.add(data.pre(ids[i])); pres = tmp.sort().finish(); } else { pres = ids; } return new IndexIterator() { int p; @Override public boolean more() { return p < len; } @Override public int pre() { return pres[p++]; } @Override public int size() { return len; } }; }
/** * Constructor. * * @param pr pre values * @param ps positions */ private FTCache(final IntList pr, final IntList ps) { final int s = pr.size(); final double[] v = new double[s]; for (int i = 0; i < s; i++) v[i] = (long) pr.get(i) << 32 | ps.get(i); order = Array.createOrder(v, true); pre = pr; pos = ps; }
@Override public void add(final ValueCache cache) { for (final byte[] key : cache) { final IntList vals = cache.ids(key); if (!vals.isEmpty()) add(key, vals.sort().finish()); } finish(); }
/** * Removes values from the index. * * @param key key * @param vals sorted values */ void delete(final byte[] key, final int... vals) { final int id = values.id(key), vl = vals.length, l = lenList.get(id), s = l - vl; final int[] ids = idsList.get(id); for (int i = 0, n = 0, v = 0; i < l; i++) { if (v == vl || ids[i] != vals[v]) ids[n++] = ids[i]; else v++; } lenList.set(id, s); if (s == 0) idsList.set(id, null); }
/** * Returns an iterator for an index entry. * * @param off offset on entries * @param size number of id/pos entries * @param da data source * @param token index token * @return iterator */ private static FTIndexIterator iter( final long off, final int size, final DataAccess da, final byte[] token) { da.cursor(off); final IntList pr = new IntList(size); final IntList ps = new IntList(size); for (int c = 0; c < size; c++) { pr.add(da.readNum()); ps.add(da.readNum()); } return iter(new FTCache(pr, ps), token); }
/** Finishes the index creation. */ void finish() { if (reorder == null) return; for (int i = 1; i < reorder.size(); i++) { if (reorder.get(i)) Arrays.sort(idsList.get(i), 0, lenList.get(i)); } reorder = null; }
@Override public int size() { // returns the actual number of indexed entries int s = 0; for (int c = 1; c < s; c++) if (lenList.get(c) > 0) s++; return s; }
/** * Lists resources of the specified database. * * @return success flag * @throws IOException I/O exception */ private boolean listDB() throws IOException { final String db = args[0]; final String path = args[1] != null ? args[1] : ""; if (!Databases.validName(db)) return error(NAME_INVALID_X, db); final Table table = new Table(); table.description = RESOURCES; table.header.add(INPUT_PATH); table.header.add(TYPE); table.header.add(MimeTypes.CONTENT_TYPE); table.header.add(SIZE); try { // add xml documents final Data data = Open.open(db, context); final Resources res = data.resources; final IntList il = res.docs(path); final int ds = il.size(); for (int i = 0; i < ds; i++) { final int pre = il.get(i); final TokenList tl = new TokenList(3); final byte[] file = data.text(pre, true); tl.add(file); tl.add(DataText.M_XML); tl.add(MimeTypes.APP_XML); tl.add(data.size(pre, Data.DOC)); table.contents.add(tl); } // add binary resources for (final byte[] file : res.binaries(path)) { final String f = string(file); final TokenList tl = new TokenList(3); tl.add(file); tl.add(DataText.M_RAW); tl.add(MimeTypes.get(f)); tl.add(data.meta.binary(f).length()); table.contents.add(tl); } Close.close(data, context); } catch (final IOException ex) { return error(Util.message(ex)); } out.println(table.sort().finish()); return true; }
@Override public Item item(final QueryContext qc, final InputInfo ii) throws QueryException { final Data data = checkData(qc); final String path = path(1, qc); final Item item = toItem(exprs[2], qc); final Options opts = toOptions(3, Q_OPTIONS, new Options(), qc); final Updates updates = qc.resources.updates(); final IntList docs = data.resources.docs(path); int d = 0; // delete binary resources final IOFile bin = data.meta.binary(path); if (bin == null || bin.isDir()) throw BXDB_REPLACE_X.get(info, path); if (item instanceof Bin) { updates.add(new DBStore(data, path, item, info), qc); } else { if (bin.exists()) updates.add(new DBDelete(data, path, info), qc); final NewInput input = checkInput(item, token(path)); if (docs.isEmpty() || docs.get(0) == 0) { // no replacement of first document (because of TableDiskAccess#insert, used > 0, pre = 0) updates.add(new DBAdd(data, input, opts, qc, info), qc); } else { updates.add(new ReplaceDoc(docs.get(0), data, input, opts, qc, info), qc); d = 1; } } // delete old documents final int ds = docs.size(); for (; d < ds; d++) updates.add(new DeleteNode(docs.get(d), data, info), qc); return null; }
/** * Adds values to the index. * * @param key key to be indexed * @param vals sorted values */ void add(final byte[] key, final int... vals) { // token index: add values. otherwise, reference existing values final int id = type == IndexType.TOKEN ? values.put(key) : values.id(key), vl = vals.length; // updatable index: if required, resize existing arrays while (idsList.size() < id + 1) idsList.add(null); if (lenList.size() < id + 1) lenList.set(id, 0); final int len = lenList.get(id), size = len + vl; int[] ids = idsList.get(id); if (ids == null) { ids = vals; } else { if (ids.length < size) ids = Arrays.copyOf(ids, Array.newSize(size)); System.arraycopy(vals, 0, ids, len, vl); if (ids[len - 1] > vals[0]) { if (reorder == null) reorder = new BoolList(values.size()); reorder.set(id, true); } } idsList.set(id, ids); lenList.set(id, size); }
/** * Returns a string representation of the index structure. * * @param all include database contents in the representation. During updates, database lookups * must be avoided, as the data structures will be inconsistent. * @return string */ public String toString(final boolean all) { final TokenBuilder tb = new TokenBuilder(); tb.addExt(type).add(" INDEX, '").add(data.meta.name).add("':\n"); final int s = lenList.size(); for (int m = 1; m < s; m++) { final int len = lenList.get(m); if (len == 0) continue; final int[] ids = idsList.get(m); tb.add(" ").addInt(m); if (all) tb.add(", key: \"").add(data.text(data.pre(ids[0]), type == IndexType.TEXT)).add('"'); tb.add(", ids"); if (all) tb.add("/pres"); tb.add(": "); for (int n = 0; n < len; n++) { if (n != 0) tb.add(","); tb.addInt(ids[n]); if (all) tb.add('/').addInt(data.pre(ids[n])); } tb.add("\n"); } return tb.toString(); }
@Override public byte[] info(final MainOptions options) { final TokenBuilder tb = new TokenBuilder(); tb.add(LI_STRUCTURE).add(HASH).add(NL); tb.add(LI_NAMES).add(data.meta.names(type)).add(NL); final IndexStats stats = new IndexStats(options.get(MainOptions.MAXSTAT)); final int s = values.size(); for (int p = 1; p <= s; p++) { final int oc = lenList.get(p); if (oc > 0 && stats.adding(oc)) stats.add(values.key(p), oc); } stats.print(tb); return tb.finish(); }
@Override public int costs(final IndexToken it) { return lenList.get(values.id(it.get())); }
/** * Inserts a data instance at the specified pre value. Note that the specified data instance must * differ from this instance. * * @param ipre value at which to insert new data * @param ipar parent pre value of node * @param clip data clip */ public final void insert(final int ipre, final int ipar, final DataClip clip) { meta.update(); // update value and document indexes if (meta.updindex) indexBegin(); resources.insert(ipre, clip); final int dsize = clip.size(); final int buf = Math.min(dsize, IO.BLOCKSIZE >> IO.NODEPOWER); // resize buffer to cache more entries buffer(buf); // find all namespaces in scope to avoid duplicate declarations final TokenMap nsScope = nspaces.scope(ipar, this); // loop through all entries final IntList preStack = new IntList(); final NSNode nsRoot = nspaces.current(); final HashSet<NSNode> newNodes = new HashSet<NSNode>(); final IntList flagPres = new IntList(); // indicates if database only contains a dummy node final Data data = clip.data; int c = 0; for (int dpre = clip.start; dpre < clip.end; ++dpre, ++c) { if (c != 0 && c % buf == 0) insert(ipre + c - buf); final int pre = ipre + c; final int dkind = data.kind(dpre); final int dpar = data.parent(dpre, dkind); // ipar < 0 if document nodes on top level are added final int dis = dpar >= 0 ? dpre - dpar : ipar >= 0 ? pre - ipar : 0; final int par = dis == 0 ? -1 : pre - dis; if (c == 0) nspaces.root(par, this); while (!preStack.isEmpty() && preStack.peek() > par) nspaces.close(preStack.pop()); switch (dkind) { case DOC: // add document nspaces.prepare(); final int s = data.size(dpre, dkind); doc(pre, s, data.text(dpre, true)); meta.ndocs++; preStack.push(pre); break; case ELEM: // add element nspaces.prepare(); boolean ne = false; if (data.nsFlag(dpre)) { final Atts at = data.ns(dpre); for (int a = 0; a < at.size(); ++a) { // see if prefix has been declared/ is part of current ns scope final byte[] old = nsScope.get(at.name(a)); if (old == null || !eq(old, at.value(a))) { // we have to keep track of all new NSNodes that are added // to the Namespace structure, as their pre values must not // be updated. I.e. if an NSNode N with pre value 3 existed // prior to inserting and two new nodes are inserted at // location pre == 3 we have to make sure N and only N gets // updated. newNodes.add(nspaces.add(at.name(a), at.value(a), pre)); ne = true; } } } byte[] nm = data.name(dpre, dkind); elem( dis, tagindex.index(nm, null, false), data.attSize(dpre, dkind), data.size(dpre, dkind), nspaces.uri(nm, true), ne); preStack.push(pre); break; case TEXT: case COMM: case PI: // add text text(pre, dis, data.text(dpre, true), dkind); break; case ATTR: // add attribute nm = data.name(dpre, dkind); // check if prefix already in nsScope or not final byte[] attPref = prefix(nm); // check if prefix of attribute has already been declared, otherwise // add declaration to parent node if (data.nsFlag(dpre) && nsScope.get(attPref) == null) { nspaces.add( par, preStack.isEmpty() ? -1 : preStack.peek(), attPref, data.nspaces.uri(data.uri(dpre, dkind)), this); // save pre value to set ns flag later for this node. can't be done // here as direct table access would interfere with the buffer flagPres.add(par); } attr( pre, dis, atnindex.index(nm, null, false), data.text(dpre, false), nspaces.uri(nm, false), false); break; } } // finalize and update namespace structure while (!preStack.isEmpty()) nspaces.close(preStack.pop()); nspaces.root(nsRoot); if (bp != 0) insert(ipre + c - 1 - (c - 1) % buf); // reset buffer to old size buffer(1); // set ns flags for (int f = 0; f < flagPres.size(); f++) { final int fl = flagPres.get(f); table.write2(fl, 1, name(fl) | 1 << 15); } // increase size of ancestors int p = ipar; while (p >= 0) { final int k = kind(p); size(p, k, size(p, k) + dsize); p = parent(p, k); } if (meta.updindex) { // add the entries to the ID -> PRE mapping: idmap.insert(ipre, id(ipre), dsize); indexEnd(); } if (!cache) updateDist(ipre + dsize, dsize); // propagate PRE value shifts to namespaces if (ipar != -1) nspaces.insert(ipre, dsize, newNodes); }
/** * Formats the specified number and returns a string representation. * * @param item item * @param pics pictures * @param ii input info * @return picture variables * @throws QueryException query exception */ private byte[] format(final ANum item, final Picture[] pics, final InputInfo ii) throws QueryException { // Rule 1: return results for NaN final double d = item.dbl(ii); if (Double.isNaN(d)) return nan; // Rule 2: check if value if negative (smaller than zero or -0) final boolean neg = d < 0 || d == 0 && Double.doubleToLongBits(d) == Long.MIN_VALUE; final Picture pic = pics[neg && pics.length == 2 ? 1 : 0]; final IntList res = new IntList(), intgr = new IntList(), fract = new IntList(); int exp = 0; // Rule 3: percent/permille ANum num = item; if (pic.pc) num = (ANum) Calc.MULT.ev(num, Int.get(100), ii); if (pic.pm) num = (ANum) Calc.MULT.ev(num, Int.get(1000), ii); if (Double.isInfinite(num.dbl(ii))) { // Rule 4: infinity intgr.add(new TokenParser(inf).toArray()); } else { // Rule 5: exponent if (pic.minExp != 0 && d != 0) { BigDecimal dec = num.dec(ii).abs().stripTrailingZeros(); int scl = 0; if (dec.compareTo(BigDecimal.ONE) >= 0) { scl = dec.setScale(0, RoundingMode.HALF_DOWN).precision(); } else { while (dec.compareTo(BigDecimal.ONE) < 0) { dec = dec.multiply(BigDecimal.TEN); scl--; } scl++; } exp = scl - pic.min[0]; if (exp != 0) { final BigDecimal n = BigDecimal.TEN.pow(Math.abs(exp)); num = (ANum) Calc.MULT.ev(num, Dec.get(exp > 0 ? BigDecimal.ONE.divide(n) : n), ii); } } num = num.round(pic.maxFrac, true).abs(); // convert positive number to string final String s = (num instanceof Dbl || num instanceof Flt ? Dec.get(BigDecimal.valueOf(num.dbl(ii))) : num) .toString(); // integer/fractional separator final int sep = s.indexOf('.'); // create integer part final int sl = s.length(); final int il = sep == -1 ? sl : sep; for (int i = il; i < pic.min[0]; ++i) intgr.add(zero); // fractional number: skip leading 0 if (!s.startsWith("0.") || pic.min[0] > 0) { for (int i = 0; i < il; i++) intgr.add(zero + s.charAt(i) - '0'); } // squeeze in grouping separators if (pic.group[0].length == 1 && pic.group[0][0] > 0) { // regular pattern with repeating separators for (int p = intgr.size() - (neg ? 2 : 1); p > 0; --p) { if (p % pic.group[0][0] == 0) intgr.insert(intgr.size() - p, grouping); } } else { // irregular pattern, or no separators at all final int gl = pic.group[0].length; for (int g = 0; g < gl; ++g) { final int pos = intgr.size() - pic.group[0][g]; if (pos > 0) intgr.insert(pos, grouping); } } // create fractional part final int fl = sep == -1 ? 0 : sl - il - 1; if (fl != 0) for (int i = sep + 1; i < sl; i++) fract.add(zero + s.charAt(i) - '0'); for (int i = fl; i < pic.min[1]; ++i) fract.add(zero); // squeeze in grouping separators in a reverse manner final int ul = fract.size(); for (int p = pic.group[1].length - 1; p >= 0; p--) { final int pos = pic.group[1][p]; if (pos < ul) fract.insert(pos, grouping); } } // add minus sign if (neg && pics.length != 2) res.add(minus); // add prefix and integer part res.add(pic.prefSuf[0].toArray()).add(intgr.finish()); // add fractional part if (!fract.isEmpty()) res.add(decimal).add(fract.finish()); // add exponent if (pic.minExp != 0) { res.add(exponent); if (exp < 0) res.add(minus); final String s = Integer.toString(Math.abs(exp)); final int sl = s.length(); for (int i = sl; i < pic.minExp; i++) res.add(zero); for (int i = 0; i < sl; i++) res.add(zero + s.charAt(i) - '0'); } // add suffix res.add(pic.prefSuf[1].toArray()); return new TokenBuilder(res.finish()).finish(); }