/** * Tests encode of a key and the decode of its "code" byte. * * @see ITermIndexCodes */ public void test_encodeDecodeCodeByte() { assertEquals(ITermIndexCodes.TERM_CODE_URI, fixture.value2Key(RDF.TYPE)[0]); assertEquals(ITermIndexCodes.TERM_CODE_BND, fixture.value2Key(new BNodeImpl("foo"))[0]); assertEquals(ITermIndexCodes.TERM_CODE_LIT, fixture.value2Key(new LiteralImpl("abc"))[0]); assertEquals(ITermIndexCodes.TERM_CODE_LCL, fixture.value2Key(new LiteralImpl("abc", "en"))[0]); assertEquals( ITermIndexCodes.TERM_CODE_DTL, fixture.value2Key(new LiteralImpl("abc", XSD.BOOLEAN))[0]); }
/** * Tests the gross ordering over the different kinds of {@link Value}s but deliberately does not * pay attention to the sort key ordering for string data. * * @see ITermIndexCodes */ public void test_keyOrder() { final byte[] uri = fixture.value2Key(RDF.TYPE); final byte[] bnd = fixture.value2Key(new BNodeImpl("foo")); final byte[] lit = fixture.value2Key(new LiteralImpl("abc")); final byte[] lcl = fixture.value2Key(new LiteralImpl("abc", "en")); final byte[] dtl = fixture.value2Key(new LiteralImpl("abc", XSD.BOOLEAN)); // URIs before plain literals. assertTrue(UnsignedByteArrayComparator.INSTANCE.compare(uri, lit) < 0); // plain literals before language code literals. assertTrue(UnsignedByteArrayComparator.INSTANCE.compare(lit, lcl) < 0); // language code literals before datatype literals. assertTrue(UnsignedByteArrayComparator.INSTANCE.compare(lcl, dtl) < 0); // datatype literals before blank nodes. assertTrue(UnsignedByteArrayComparator.INSTANCE.compare(dtl, bnd) < 0); }
/** * This is an odd issue someone reported for the trunk. There are two version of a plain Literal * <code>Brian McCarthy</code>, but it appears that one of the two versions has a leading bell * character when you decode the Unicode byte[]. I think that this is actually an issue with the * {@link Locale} and the Unicode sort key generation. If {@link KeyBuilder} as configured on the * system generates Unicode sort keys which compare as EQUAL for these two inputs then that will * cause the lexicon to report an "apparent" inconsistency. In fact, what we probably need to do * is just disable the inconsistency check in the lexicon. * * <pre> * ERROR: com.bigdata.rdf.lexicon.Id2TermWriteProc.apply(Id2TermWriteProc.java:205): val=[0, 2, 0, 14, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121] * ERROR: com.bigdata.rdf.lexicon.Id2TermWriteProc.apply(Id2TermWriteProc.java:206): oldval=[0, 2, 0, 15, 127, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121] * </pre> */ public void test_consistencyIssue() { final BigdataValueSerializer<Value> fixture = new BigdataValueSerializer<Value>(ValueFactoryImpl.getInstance()); final byte[] newValBytes = new byte[] {0, 2, 0, 14, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121}; final byte[] oldValBytes = new byte[] { 0, 2, 0, 15, 127, 66, 114, 105, 97, 110, 32, 77, 99, 67, 97, 114, 116, 104, 121 }; final Value newValue = fixture.deserialize(newValBytes); final Value oldValue = fixture.deserialize(oldValBytes); if (log.isInfoEnabled()) { log.info("new=" + newValue); log.info("old=" + oldValue); } /* * Note: This uses the default Locale and the implied Unicode collation * order to generate the sort keys. */ // final IKeyBuilder keyBuilder = new KeyBuilder(); /* * Note: This allows you to explicitly configure the behavior of the * KeyBuilder instance based on the specified properties. If you want * your KB to run with these properties, then you need to specify them * either in your environment or using -D to java. */ final Properties properties = new Properties(); // specify that all aspects of the Unicode sequence are significant. properties.setProperty(KeyBuilder.Options.STRENGTH, StrengthEnum.Identical.toString()); // // specify that that only primary character differences are significant. // properties.setProperty(KeyBuilder.Options.STRENGTH,StrengthEnum.Primary.toString()); final IKeyBuilder keyBuilder = KeyBuilder.newUnicodeInstance(properties); final LexiconKeyBuilder lexKeyBuilder = new LexiconKeyBuilder(keyBuilder); // encode as unsigned byte[] key. final byte[] newValKey = lexKeyBuilder.value2Key(newValue); final byte[] oldValKey = lexKeyBuilder.value2Key(oldValue); if (log.isInfoEnabled()) { log.info("newValKey=" + BytesUtil.toString(newValKey)); log.info("oldValKey=" + BytesUtil.toString(oldValKey)); } /* * Note: if this assert fails then the two distinct Literals were mapped * onto the same unsigned byte[] key. */ assertFalse(BytesUtil.bytesEqual(newValKey, oldValKey)); }