public void test_datatypeLiteral_xsd_double() { final URI datatype = XMLSchema.DOUBLE; // Note: leading zeros are ignored in the xsd:int value space. final String lit1 = "-4.0"; final String lit2 = "005"; final String lit3 = "5."; final String lit4 = "5.0"; final String lit5 = "6"; final byte[] k1 = fixture.datatypeLiteral2key(datatype, lit1); final byte[] k2 = fixture.datatypeLiteral2key(datatype, lit2); final byte[] k3 = fixture.datatypeLiteral2key(datatype, lit3); final byte[] k4 = fixture.datatypeLiteral2key(datatype, lit4); final byte[] k5 = fixture.datatypeLiteral2key(datatype, lit5); if (log.isInfoEnabled()) { log.info("k1(double:" + lit1 + ") = " + BytesUtil.toString(k1)); log.info("k2(double:" + lit2 + ") = " + BytesUtil.toString(k2)); log.info("k3(double:" + lit3 + ") = " + BytesUtil.toString(k3)); log.info("k4(double:" + lit3 + ") = " + BytesUtil.toString(k4)); log.info("k5(double:" + lit5 + ") = " + BytesUtil.toString(k5)); } assertTrue(BytesUtil.compareBytes(k1, k2) < 0); assertTrue(BytesUtil.compareBytes(k4, k5) < 0); /* * Note: if we do not normalize data type values then these are * inequalities. */ assertTrue(BytesUtil.compareBytes(k2, k3) != 0); // 005 != 5. assertTrue(BytesUtil.compareBytes(k3, k4) != 0); // 5. != 5.0 }
/** Verify that some value spaces are disjoint. */ public void test_datatypeLiteral_xsd_float_not_double() { final String lit1 = "04.21"; final byte[] k1 = fixture.datatypeLiteral2key(XMLSchema.FLOAT, lit1); final byte[] k2 = fixture.datatypeLiteral2key(XMLSchema.DOUBLE, lit1); if (log.isInfoEnabled()) { log.info("k1(float:" + lit1 + ") = " + BytesUtil.toString(k1)); log.info("k2(double:" + lit1 + ") = " + BytesUtil.toString(k2)); } assertTrue(BytesUtil.compareBytes(k1, k2) != 0); }
public void test_datatypeLiteral_xsd_boolean() { final URI datatype = XMLSchema.BOOLEAN; final String lit1 = "true"; final String lit2 = "false"; final String lit3 = "1"; final String lit4 = "0"; final byte[] k1 = fixture.datatypeLiteral2key(datatype, lit1); final byte[] k2 = fixture.datatypeLiteral2key(datatype, lit2); final byte[] k3 = fixture.datatypeLiteral2key(datatype, lit3); final byte[] k4 = fixture.datatypeLiteral2key(datatype, lit4); if (log.isInfoEnabled()) { log.info("k1(boolean:" + lit1 + ") = " + BytesUtil.toString(k1)); log.info("k2(boolean:" + lit2 + ") = " + BytesUtil.toString(k2)); log.info("k3(boolean:" + lit3 + ") = " + BytesUtil.toString(k3)); log.info("k4(boolean:" + lit4 + ") = " + BytesUtil.toString(k4)); } assertTrue(BytesUtil.compareBytes(k1, k2) != 0); assertTrue(BytesUtil.compareBytes(k1, k2) > 0); /* * Note: if we do not normalize data type values then these are * inequalities. */ assertTrue(BytesUtil.compareBytes(k1, k3) != 0); // true != 1 assertTrue(BytesUtil.compareBytes(k2, k4) != 0); // false != 0 }
/** * Test verifies the ordering among URIs, Literals, and BNodes. This ordering is important when * batching terms of these different types into the term index since you want to insert the type * types according to this order for the best performance. */ public void test_termTypeOrder() { /* * one key of each type. the specific values for the types do not matter * since we are only interested in the relative order between those * types in this test. */ final byte[] k1 = fixture.uri2key("http://www.cognitiveweb.org"); final byte[] k2 = fixture.plainLiteral2key("hello world!"); final byte[] k3 = fixture.blankNode2Key("a12"); assertTrue(BytesUtil.compareBytes(k1, k2) < 0); assertTrue(BytesUtil.compareBytes(k2, k3) < 0); }
public void test_blankNode() { final String id1 = "_12"; final String id2 = "_abc"; final String id3 = "abc"; final byte[] k1 = fixture.blankNode2Key(id1); final byte[] k2 = fixture.blankNode2Key(id2); final byte[] k3 = fixture.blankNode2Key(id3); if (log.isInfoEnabled()) { log.info("k1(bnodeId:" + id1 + ") = " + BytesUtil.toString(k1)); log.info("k2(bnodeId:" + id2 + ") = " + BytesUtil.toString(k2)); log.info("k3(bnodeId:" + id3 + ") = " + BytesUtil.toString(k3)); } assertTrue(BytesUtil.compareBytes(k1, k2) < 0); assertTrue(BytesUtil.compareBytes(k2, k3) < 0); }
public void test_plainLiteral() { final String lit1 = "abc"; final String lit2 = "abcd"; final String lit3 = "abcde"; final byte[] k1 = fixture.plainLiteral2key(lit1); final byte[] k2 = fixture.plainLiteral2key(lit2); final byte[] k3 = fixture.plainLiteral2key(lit3); if (log.isInfoEnabled()) { log.info("k1(" + lit1 + ") = " + BytesUtil.toString(k1)); log.info("k2(" + lit2 + ") = " + BytesUtil.toString(k2)); log.info("k3(" + lit3 + ") = " + BytesUtil.toString(k3)); } assertTrue(BytesUtil.compareBytes(k1, k2) < 0); assertTrue(BytesUtil.compareBytes(k2, k3) < 0); }
public void test_plain_vs_languageCode_literal() { final String en = "en"; // String de = "de"; final String lit1 = "abc"; // String lit2 = "abc"; // String lit3 = "abce"; // final Literal a = new LiteralImpl("foo"); // final Literal b = new LiteralImpl("foo", "en"); final byte[] k1 = fixture.plainLiteral2key(lit1); final byte[] k2 = fixture.languageCodeLiteral2key(en, lit1); // not encoded onto the same key. assertFalse(BytesUtil.bytesEqual(k1, k2)); // the plain literals are ordered before the language code literals. assertTrue(BytesUtil.compareBytes(k1, k2) < 0); }
public void test_languageCodeLiteral() { final String en = "en"; final String de = "de"; final String lit1 = "abc"; final String lit2 = "abc"; final String lit3 = "abce"; final byte[] k1 = fixture.languageCodeLiteral2key(en, lit1); final byte[] k2 = fixture.languageCodeLiteral2key(de, lit2); final byte[] k3 = fixture.languageCodeLiteral2key(de, lit3); if (log.isInfoEnabled()) { log.info("k1(en:" + lit1 + ") = " + BytesUtil.toString(k1)); log.info("k2(de:" + lit2 + ") = " + BytesUtil.toString(k2)); log.info("k3(de:" + lit3 + ") = " + BytesUtil.toString(k3)); } // "en" sorts after "de". assertTrue(BytesUtil.compareBytes(k1, k2) > 0); // en:abc != de:abc assertTrue(BytesUtil.compareBytes(k1, k2) != 0); assertTrue(BytesUtil.compareBytes(k2, k3) < 0); }
public void test_uri() { final String uri1 = "http://www.cognitiveweb.org"; final String uri2 = "http://www.cognitiveweb.org/a"; final String uri3 = "http://www.cognitiveweb.com/a"; final byte[] k1 = fixture.uri2key(uri1); final byte[] k2 = fixture.uri2key(uri2); final byte[] k3 = fixture.uri2key(uri3); if (log.isInfoEnabled()) { log.info("k1(" + uri1 + ") = " + BytesUtil.toString(k1)); log.info("k2(" + uri2 + ") = " + BytesUtil.toString(k2)); log.info("k3(" + uri3 + ") = " + BytesUtil.toString(k3)); } // subdirectory sorts after root directory. assertTrue(BytesUtil.compareBytes(k1, k2) < 0); // .com extension sorts before .org assertTrue(BytesUtil.compareBytes(k2, k3) > 0); }
/** Verify that the value spaces for long, int, short and byte are disjoint. */ public void test_disjoint_value_space() { assertFalse( BytesUtil.bytesEqual( // fixture.datatypeLiteral2key(XMLSchema.LONG, "-1"), // fixture.datatypeLiteral2key(XMLSchema.INT, "-1") // )); assertFalse( BytesUtil.bytesEqual( // fixture.datatypeLiteral2key(XMLSchema.LONG, "-1"), // fixture.datatypeLiteral2key(XMLSchema.SHORT, "-1") // )); assertFalse( BytesUtil.bytesEqual( // fixture.datatypeLiteral2key(XMLSchema.LONG, "-1"), // fixture.datatypeLiteral2key(XMLSchema.BYTE, "-1") // )); assertFalse( BytesUtil.bytesEqual( // fixture.datatypeLiteral2key(XMLSchema.INT, "-1"), // fixture.datatypeLiteral2key(XMLSchema.SHORT, "-1") // )); assertFalse( BytesUtil.bytesEqual( // fixture.datatypeLiteral2key(XMLSchema.INT, "-1"), // fixture.datatypeLiteral2key(XMLSchema.BYTE, "-1") // )); assertFalse( BytesUtil.bytesEqual( // fixture.datatypeLiteral2key(XMLSchema.SHORT, "-1"), // fixture.datatypeLiteral2key(XMLSchema.BYTE, "-1") // )); }
public ITuple<E> seek(final byte[] key) { // clear last visited. lastVisited = -1; // clear current. current = -1; // save the sought key. lastKeyBuffer.reset().append(key); for (int i = 0; i < n; i++) { sourceTuple[i] = sourceIterator[i].seek(key); if (sourceTuple[i] != null && current == -1) { /* * Choose the tuple reported by the first source iterator in the * order in which the source iterators are processed. Any * iterator that does not have a tuple for the seek key will * report a null. The first non-null will therefore be the first * iterator having an exact match for the given key. */ if (INFO) { log.info("Found match: source=" + i + ", key=" + BytesUtil.toString(key)); } current = i; } } // the lookahead tuples are primed for forward traversal. forward = true; if (!deleted) { for (int i = 0; i < n; i++) { if (sourceTuple[i] != null && sourceTuple[i].isDeletedVersion()) { /* * The tuple is marked as "deleted" and the caller did not * request deleted tuples. In this case seek(byte[]) must * return null. */ if (INFO) log.info("Skipping deleted: source=" + current + ", tuple=" + sourceTuple[current]); /* * Clear tuples from other sources having the same key as the * current tuple. */ clearCurrent(); return null; } } } if (current == -1) { /* * There is no tuple equal to the sought key. */ // no tuple for that key. return null; } else { /* * There is a tuple for that key, so consume and return it. */ return consumeLookaheadTuple(); } }
/** * Note: The implementation of {@link #hasPrior()} closes parallels the implementation of {@link * #hasNext()} in the base class. */ public boolean hasPrior() { setForwardDirection(false /*forward*/); /* * Until we find an undeleted tuple (or any tuple if DELETED is * true). */ while (true) { if (current != -1) { if (INFO) log.info("Already matched: source=" + current); return true; } /* * First, make sure that we have a tuple for each source iterator * (unless that iterator is exhausted). */ int nexhausted = 0; for (int i = 0; i < n; i++) { if (sourceTuple[i] == null) { if (sourceIterator[i].hasPrior()) { sourceTuple[i] = sourceIterator[i].prior(); if (DEBUG) log.debug("read sourceTuple[" + i + "]=" + sourceTuple[i]); } else { nexhausted++; } } } if (nexhausted == n) { // the aggregate iterator is exhausted. return false; } /* * Now consider the current tuple for each source iterator in turn * and choose the _first_ iterator having a tuple whose key orders * GTE all the others (or LTE if [reverseScan == true]). This is the * previous tuple to be visited by the aggregate iterator. */ { // current is index of the smallest key so far. assert current == -1; byte[] key = null; // smallest key so far. for (int i = 0; i < n; i++) { if (sourceTuple[i] == null) { // This source is exhausted. continue; } if (current == -1) { current = i; key = sourceTuple[i].getKey(); assert key != null; } else { final byte[] tmp = sourceTuple[i].getKey(); final int ret = BytesUtil.compareBytes(tmp, key); // if (reverseScan ? ret < 0 : ret > 0) { if (ret > 0) { /* * This key orders GT the current key. * * Note: This test MUST be strictly GT since GTE * would break the precedence in which we are * processing the source iterators and give us the * key from the last source by preference when we * need the key from the first source by preference. */ current = i; key = tmp; } } } assert current != -1; } if (sourceTuple[current].isDeletedVersion() && !deleted) { /* * The tuple is marked as "deleted" and the caller did not * request deleted tuples so we skip this key and begin again * with the next key visible under the fused iterator view. */ if (INFO) { log.info("Skipping deleted: source=" + current + ", tuple=" + sourceTuple[current]); } /* * Clear tuples from other sources having the same key as the * current tuple. */ clearCurrent(); continue; } if (INFO) { log.info("Will visit next: source=" + current + ", tuple: " + sourceTuple[current]); } return true; } }
/** * Set the direction of iterator progress. Clears {@link #sourceTuple} iff the current direction * is different from the new direction and is otherwise a NOP. * * <p>Note: Care is required for sequences such as * * <pre> * ITuple t1 = next(); * * ITuple t2 = prior(); * </pre> * * <p>to visit the same tuple for {@link #next()} and {@link #prior()}. * * @param forward <code>true</code> iff the new direction of iterator progress is forward using * {@link #hasNext()} and {@link #next()}. */ private void setForwardDirection(boolean forward) { if (this.forward != forward) { if (INFO) log.info("Changing direction: forward=" + forward); /* * This is the last key visited -or- null iff nothing has been * visited. */ final byte[] lastKeyVisited; if (lastVisited == -1) { lastKeyVisited = null; } else { // lastKeyVisited = ((ITupleCursor2<E>) sourceIterator[lastVisited]) // .tuple().getKey(); lastKeyVisited = lastKeyBuffer.getKey(); if (INFO) log.info("key for last tuple visited=" + BytesUtil.toString(lastKeyVisited)); } for (int i = 0; i < n; i++) { /* * Recover the _current_ tuple for each source iterator. */ // current tuple for the source iterator. ITuple<E> tuple = ((ITupleCursor2<E>) sourceIterator[i]).tuple(); if (INFO) log.info("sourceIterator[" + i + "]=" + tuple); if (lastKeyVisited != null) { /* * When we are changing to [forward == true] (visiting the * next tuples in the index order), then we advance the * source iterator zero or more tuples until it is * positioned GT the lastVisitedKey. * * When we are changing to [forward == false] (visiting the * prior tuples in the index order), then we backup the * source iterator zero or more tuples until it is * positioned LT the lastVisitedKey. */ while (tuple != null) { final int ret = BytesUtil.compareBytes( // tuple.getKey(), // lastKeyVisited // ); final boolean ok = forward ? ret > 0 : ret < 0; if (ok) break; /* * If the source iterator is currently positioned on the * same key as the last tuple that we visited then we * need to move it off of that key - either to the * previous or the next visitable tuple depending on the * new direction for the iterator. */ if (forward) { if (sourceIterator[i].hasNext()) { // next tuple tuple = sourceIterator[i].next(); } else { // exhausted in this direction. tuple = null; } } else { if (sourceIterator[i].hasPrior()) { // prior tuple tuple = sourceIterator[i].prior(); } else { // exhausted in this direction. tuple = null; } } if (INFO) log.info( "skipping tuple: source=" + i + ", direction=" + (forward ? "next" : "prior") + ", newTuple=" + tuple); } } sourceTuple[i] = tuple; // as assigned to source[i]. if (INFO) log.info("sourceTuple [" + i + "]=" + sourceTuple[i]); } // set the new iterator direction. this.forward = forward; // clear current since the old lookahead choice is no longer valid. this.current = -1; } }
/** * Checks if the dividing record passed as arguments is in the multi-dimensional search range * defined by this class. */ public boolean isInSearchRange(final byte[] dividingRecord) { final boolean dimShownToBeLargerThanMin[] = new boolean[numDimensions]; final boolean dimShownToBeSmallerThanMax[] = new boolean[numDimensions]; // get first byte in which the values differ int firstDifferingByte = 0; for (; firstDifferingByte < dividingRecord.length; firstDifferingByte++) { if (dividingRecord[firstDifferingByte] != searchMinZOrder[firstDifferingByte] || dividingRecord[firstDifferingByte] != searchMaxZOrder[firstDifferingByte]) { break; } } /** * We now scan sequentially over the bit array, starting with firstDifferingByte. Thereby, we * notice whenever we detect a smaller or greater situation (and make sure that, for the * dimension under investigation, we do not check again in future). Note that this operation * operates on top of the zOrder string, in which bits are interleaved. * * <p>The unsatisfiedConstraintsCtr is for performance optimizations. It is initialized with * numDimensions*2, and when its count reaches zero we know that all dimensions have to be shown * larger than min and smaller than max, i.e. that all constraints have been satisfied. */ int unsatisfiedConstraintsCtr = numDimensions * 2; for (int i = firstDifferingByte * Byte.SIZE; i < dividingRecord.length * Byte.SIZE && unsatisfiedConstraintsCtr > 0; i++) { final int dimension = i % numDimensions; final boolean divRecordBitSet = BytesUtil.getBit(dividingRecord, i); if (!dimShownToBeLargerThanMin[dimension]) { final boolean searchMinBitSet = BytesUtil.getBit(searchMinZOrder, i); if (divRecordBitSet && !searchMinBitSet) { dimShownToBeLargerThanMin[dimension] = true; unsatisfiedConstraintsCtr--; } else if (!divRecordBitSet && searchMinBitSet) { return false; // conflict } // else: skip } if (!dimShownToBeSmallerThanMax[dimension]) { final boolean searchMaxBitSet = BytesUtil.getBit(searchMaxZOrder, i); if (!divRecordBitSet && searchMaxBitSet) { dimShownToBeSmallerThanMax[dimension] = true; unsatisfiedConstraintsCtr--; } else if (divRecordBitSet && !searchMaxBitSet) { return false; } // else: skip } } return true; // all is good }
/** * Returns the BIGMIN, i.e. the next relevant value in the search range. The value is returned as * unsigned, which needs to be converted into two's complement prior to appending as a key (see * {@link GeoSpatialLiteralExtension} for details). * * <p>This method implements the BIGMIN decision table as provided in * http://www.vision-tools.com/h-tropf/multidimensionalrangequery.pdf, see page 76. * * @param iv the IV of the dividing record * @return */ public byte[] calculateBigMin(final byte[] dividingRecord) { if (dividingRecord.length != searchMinZOrder.length || dividingRecord.length != searchMaxZOrder.length) { // this should never happen, assuming correct configuration throw new RuntimeException("Key dimenisions differs"); } final int numBytes = dividingRecord.length; System.arraycopy(searchMinZOrder, 0, min, 0, zOrderArrayLength); System.arraycopy(searchMaxZOrder, 0, max, 0, zOrderArrayLength); java.util.Arrays.fill(bigmin, (byte) 0); // reset bigmin boolean finished = false; for (int i = 0; i < numBytes * Byte.SIZE && !finished; i++) { final boolean divRecordBitSet = BytesUtil.getBit(dividingRecord, i); final boolean minBitSet = BytesUtil.getBit(min, i); final boolean maxBitSet = BytesUtil.getBit(max, i); if (!divRecordBitSet) { if (!minBitSet) { if (!maxBitSet) { // case 0 - 0 - 0: continue (nothing to do) } else { // case 0 - 0 - 1 System.arraycopy(min, 0, bigmin, 0, zOrderArrayLength); load(true /* setFirst */, i, bigmin, numDimensions); load(false, i, max, numDimensions); } } else { if (!maxBitSet) { // case 0 - 1 - 0 throw new RuntimeException("MIN must be <= MAX."); } else { // case 0 - 1 - 1 System.arraycopy(min, 0, bigmin, 0, zOrderArrayLength); finished = true; } } } else { if (!minBitSet) { if (!maxBitSet) { // case 1 - 0 - 0 finished = true; } else { // case 1 - 0 - 1 load(true, i, min, numDimensions); } } else { if (!maxBitSet) { // case 1 - 1 - 0 throw new RuntimeException("MIN must be <= MAX."); } else { // case 1 - 1 - 1: continue (nothing to do) } } } } return bigmin; }
/** * This is an odd issue someone reported for the trunk. There are two version of a plain Literal * <code>Brian McCarthy</code>, but it appears that one of the two versions has a leading bell * character when you decode the Unicode byte[]. I think that this is actually an issue with the * {@link Locale} and the Unicode sort key generation. If {@link KeyBuilder} as configured on the * system generates Unicode sort keys which compare as EQUAL for these two inputs then that will * cause the lexicon to report an "apparent" inconsistency. In fact, what we probably need to do * is just disable the inconsistency check in the lexicon. * * <pre> * ERROR: com.bigdata.rdf.lexicon.Id2TermWriteProc.apply(Id2TermWriteProc.java:205): val=[0, 2, 0, 14, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121] * ERROR: com.bigdata.rdf.lexicon.Id2TermWriteProc.apply(Id2TermWriteProc.java:206): oldval=[0, 2, 0, 15, 127, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121] * </pre> */ public void test_consistencyIssue() { final BigdataValueSerializer<Value> fixture = new BigdataValueSerializer<Value>(ValueFactoryImpl.getInstance()); final byte[] newValBytes = new byte[] {0, 2, 0, 14, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121}; final byte[] oldValBytes = new byte[] { 0, 2, 0, 15, 127, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121 }; final Value newValue = fixture.deserialize(newValBytes); final Value oldValue = fixture.deserialize(oldValBytes); if (log.isInfoEnabled()) { log.info("new=" + newValue); log.info("old=" + oldValue); } /* * Note: This uses the default Locale and the implied Unicode collation * order to generate the sort keys. */ // final IKeyBuilder keyBuilder = new KeyBuilder(); /* * Note: This allows you to explicitly configure the behavior of the * KeyBuilder instance based on the specified properties. If you want * your KB to run with these properties, then you need to specify them * either in your environment or using -D to java. */ final Properties properties = new Properties(); // specify that all aspects of the Unicode sequence are significant. properties.setProperty(KeyBuilder.Options.STRENGTH, StrengthEnum.Identical.toString()); // // specify that that only primary character differences are significant. // properties.setProperty(KeyBuilder.Options.STRENGTH,StrengthEnum.Primary.toString()); final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(properties); final LexiconKeyBuilder lexKeyBuilder = new LexiconKeyBuilder(keyBuilder); // encode as unsigned byte[] key. final byte[] newValKey = lexKeyBuilder.value2Key(newValue); final byte[] oldValKey = lexKeyBuilder.value2Key(oldValue); if (log.isInfoEnabled()) { log.info("newValKey=" + BytesUtil.toString(newValKey)); log.info("oldValKey=" + BytesUtil.toString(oldValKey)); } /* * Note: if this assert fails then the two distinct Literals were mapped * onto the same unsigned byte[] key. */ assertFalse(BytesUtil.bytesEqual(newValKey, oldValKey)); }