예제 #1
0
  /**
   * Tests SegmentDictionaryCreator for case when there is one empty string and a string with a
   * single padding character
   *
   * <p>This test asserts that the padded length of the empty string is 1 in actual padded
   * dictionary), and not 0.
   *
   * @throws Exception
   */
  @Test
  public void testPaddedConflict() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true, "\t");

    String[] inputStrings = new String[2];
    String[] paddedStrings = new String[2];

    try {
      inputStrings[0] = "";
      inputStrings[1] = "%";
      Arrays.sort(inputStrings); // Sorted order: {"", "%"}
      SegmentDictionaryCreator dictionaryCreator =
          new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir);
      boolean[] isSorted = new boolean[1];
      isSorted[0] = true;
      dictionaryCreator.build(isSorted);
    } catch (Exception e) {
      Assert.assertEquals(
          e.getMessage(),
          "Number of entries in dictionary != number of unique values in the data in column test");
    } finally {
      FileUtils.deleteQuietly(indexDir);
    }
  }
예제 #2
0
  /**
   * Tests SegmentDictionaryCreator for case when there is only one string and it is "null"
   *
   * <p>This test asserts that the padded length of the null string is 4
   *
   * @throws Exception
   */
  @Test
  public void testSingleNullString() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true, "\t");

    String[] inputStrings = new String[1];
    String[] paddedStrings = new String[1];

    inputStrings[0] = "null";
    Arrays.sort(inputStrings); // Sorted order: {"null"}

    try {
      SegmentDictionaryCreator dictionaryCreator =
          new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir);
      boolean[] isSorted = new boolean[1];
      isSorted[0] = true;
      dictionaryCreator.build(isSorted);

      // Get the padded string as stored in the dictionary.
      int targetPaddedLength = dictionaryCreator.getStringColumnMaxLength();
      Assert.assertTrue(targetPaddedLength == 4);
      for (int i = 0; i < inputStrings.length; i++) {
        paddedStrings[i] =
            SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength);
      }
      Arrays.sort(paddedStrings); // Sorted Order: {"null"}

      // Assert that indexOfSV for un-padded string returns the index of the corresponding padded
      // string.
      for (int i = 0; i < inputStrings.length; i++) {
        int paddedIndex = dictionaryCreator.indexOfSV(inputStrings[i]);
        Assert.assertTrue(
            paddedStrings[paddedIndex].equals(
                SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength)));
      }

      // Verify that the string "null" did not get changed
      Assert.assertTrue(paddedStrings[0].equals("null"));
      dictionaryCreator.close();
    } catch (Exception e) {
      throw e;
    } finally {
      FileUtils.deleteQuietly(indexDir);
    }
  }
예제 #3
0
  /**
   * Tests DictionaryCreator for case when one value is a substring of another. For example, in case
   * of sorted values {"abc", "abc def"} after padding, the sorted order would change to {"abc
   * def%%%%", "abc%%%%%%%"}
   *
   * <p>This test asserts that DictionaryCreator.indexOfSV("abc") returns 1 (ie index of
   * "abc%%%%%%%" in actual padded dictionary), and not 0.
   *
   * @throws Exception
   */
  @Test
  public void testStringsValuesWithPadding() throws Exception {
    File indexDir = new File("/tmp/dict.test");
    FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true, "\t");

    String[] inputStrings = new String[2];
    String[] paddedStrings = new String[2];

    inputStrings[0] = "abc def";
    inputStrings[1] = "abc";
    Arrays.sort(inputStrings); // Sorted order: {"abc", "abc def"}

    boolean[] isSorted = new boolean[1];
    isSorted[0] = true;
    SegmentDictionaryCreator dictionaryCreator =
        new SegmentDictionaryCreator(false, inputStrings, fieldSpec, indexDir);
    dictionaryCreator.build(isSorted);
    Assert.assertFalse(isSorted[0]);

    // Get the padded string as stored in the dictionary.
    int targetPaddedLength = dictionaryCreator.getStringColumnMaxLength();
    for (int i = 0; i < inputStrings.length; i++) {
      paddedStrings[i] =
          SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength);
    }
    Arrays.sort(paddedStrings); // Sorted Order: {"abc def%%%%", "abc%%%%%%%"}

    // Assert that indexOfSV for un-padded string returns the index of the corresponding padded
    // string.
    for (int i = 0; i < inputStrings.length; i++) {
      int paddedIndex = dictionaryCreator.indexOfSV(inputStrings[i]);
      Assert.assertTrue(
          paddedStrings[paddedIndex].equals(
              SegmentDictionaryCreator.getPaddedString(inputStrings[i], targetPaddedLength)));
    }

    dictionaryCreator.close();
    FileUtils.deleteQuietly(indexDir);
  }