/** * Tests SegmentDictionaryCreator for case when there is one empty string and a string with a * single padding character * * <p>This test asserts that the padded length of the empty string is 1 in actual padded * dictionary), and not 0. * * @throws Exception */ @Test public void testPaddedConflict() throws Exception { File indexDir = new File("/tmp/dict.test"); FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true, "\t"); String[] inputStrings = new String[2]; String[] paddedStrings = new String[2]; try { inputStrings[0] = ""; inputStrings[1] = "%"; Arrays.sort(inputStrings); // Sorted order: {"", "%"} SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir); boolean[] isSorted = new boolean[1]; isSorted[0] = true; dictionaryCreator.build(isSorted); } catch (Exception e) { Assert.assertEquals( e.getMessage(), "Number of entries in dictionary != number of unique values in the data in column test"); } finally { FileUtils.deleteQuietly(indexDir); } }
/** * Tests SegmentDictionaryCreator for case when there is only one string and it is "null" * * <p>This test asserts that the padded length of the null string is 4 * * @throws Exception */ @Test public void testSingleNullString() throws Exception { File indexDir = new File("/tmp/dict.test"); FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true, "\t"); String[] inputStrings = new String[1]; String[] paddedStrings = new String[1]; inputStrings[0] = "null"; Arrays.sort(inputStrings); // Sorted order: {"null"} try { SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir); boolean[] isSorted = new boolean[1]; isSorted[0] = true; dictionaryCreator.build(isSorted); // Get the padded string as stored in the dictionary. int targetPaddedLength = dictionaryCreator.getStringColumnMaxLength(); Assert.assertTrue(targetPaddedLength == 4); for (int i = 0; i < inputStrings.length; i++) { paddedStrings[i] = SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength); } Arrays.sort(paddedStrings); // Sorted Order: {"null"} // Assert that indexOfSV for un-padded string returns the index of the corresponding padded // string. for (int i = 0; i < inputStrings.length; i++) { int paddedIndex = dictionaryCreator.indexOfSV(inputStrings[i]); Assert.assertTrue( paddedStrings[paddedIndex].equals( SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength))); } // Verify that the string "null" did not get changed Assert.assertTrue(paddedStrings[0].equals("null")); dictionaryCreator.close(); } catch (Exception e) { throw e; } finally { FileUtils.deleteQuietly(indexDir); } }
/** * Tests DictionaryCreator for case when one value is a substring of another. For example, in case * of sorted values {"abc", "abc def"} after padding, the sorted order would change to {"abc * def%%%%", "abc%%%%%%%"} * * <p>This test asserts that DictionaryCreator.indexOfSV("abc") returns 1 (ie index of * "abc%%%%%%%" in actual padded dictionary), and not 0. * * @throws Exception */ @Test public void testStringsValuesWithPadding() throws Exception { File indexDir = new File("/tmp/dict.test"); FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true, "\t"); String[] inputStrings = new String[2]; String[] paddedStrings = new String[2]; inputStrings[0] = "abc def"; inputStrings[1] = "abc"; Arrays.sort(inputStrings); // Sorted order: {"abc", "abc def"} boolean[] isSorted = new boolean[1]; isSorted[0] = true; SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir); dictionaryCreator.build(isSorted); Assert.assertFalse(isSorted[0]); // Get the padded string as stored in the dictionary. int targetPaddedLength = dictionaryCreator.getStringColumnMaxLength(); for (int i = 0; i < inputStrings.length; i++) { paddedStrings[i] = SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength); } Arrays.sort(paddedStrings); // Sorted Order: {"abc def%%%%", "abc%%%%%%%"} // Assert that indexOfSV for un-padded string returns the index of the corresponding padded // string. for (int i = 0; i < inputStrings.length; i++) { int paddedIndex = dictionaryCreator.indexOfSV(inputStrings[i]); Assert.assertTrue( paddedStrings[paddedIndex].equals( SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength))); } dictionaryCreator.close(); FileUtils.deleteQuietly(indexDir); }