Exemplo n.º 1
0
  /**
   * Constructor for WaveletNode. Recursively builds the wavelet tree node and its subtrees.
   *
   * @param string - a string that that you want to build the wavelet tree node of
   * @param alphabet - Alphabet object containing alphabet information of the given string
   * @throws AlphabetException
   */
  public WaveletNode(StringWrapper string, Alphabet alphabet) throws AlphabetException {
    Byte[] allCharacters = alphabet.getAllCharacters();
    contentLenght = string.length();

    int alphabetSize = alphabet.size();
    /*
     * If the alphabet's size is equal to 2, the pivot is the second character.
     * Else, if alphabet's size is equal to 1, the pivot is that only character.
     * When you do this, it means you have built a leaf of the wavelet tree and you can return.
     */
    if (alphabetSize <= 2) {
      if (alphabetSize == 2) pivot = allCharacters[1];
      else pivot = allCharacters[0];
      bitContent = new RRR(pivot, string);
      return;
    }

    /*
     * Find the middle of the string and put it in variable half.
     */

    int totalNumberOfCharacters = 0;
    for (byte b : alphabet.getAllCharacters()) {
      totalNumberOfCharacters += alphabet.getOccurancesForCharacter(b);
    }

    int half = totalNumberOfCharacters / 2;

    /*
     * Finding the pivot.
     * Variable pivotIndex is an index of potential pivot in the alphabet's character array (which is
     * a sorted array of all the characters that input string contains).
     * The for loop iterates through the alpahabet's character array, adding to variable totalOccurencesSoFar
     * each of the potential pivot's occurrence. The loop breaks when the totalOccurencesSoFar reaches the
     * value of half of the input string length; therefore finding the optimal pivot considering probability
     * of it's occurrence in the string.
     */
    int totalOccurencesSoFar = 0;
    int pivotIndex = half;
    for (pivotIndex = 0; totalOccurencesSoFar < half; pivotIndex++) {
      totalOccurencesSoFar += alphabet.getOccurancesForCharacter(allCharacters[pivotIndex]);
    }
    /*
     * The final check sees if found pivot is better pivot than the character that comes after it in the alphabet's
     * character array (since the for loop stops when totalOccurencesSoFar < half).
     */
    if (pivotIndex > 1
        && (string.length() - totalOccurencesSoFar)
            < (string.length()
                + alphabet.getOccurancesForCharacter(allCharacters[pivotIndex - 1])
                - totalOccurencesSoFar)) {

      pivotIndex--;
    }

    /*
     * Building the RRR structure.
     */
    this.pivot = allCharacters[pivotIndex];
    bitContent = new RRR(pivot, string);

    /*
     * Building the contents of the child nodes.
     */
    StringWrapper zeros = new StringWrapper();
    StringWrapper ones = new StringWrapper();

    int index1 = 0;
    int index2 = 0;
    int count = 0;

    for (byte b : alphabet.getAllCharacters()) {
      if (b == pivot) {
        break;
      }
      count += alphabet.getOccurancesForCharacter(b);
    }

    zeros.string = new byte[count];
    ones.string = new byte[string.length() - count];

    for (int i = 0; i < string.length(); i++) {
      if (string.charAt(i) < pivot) zeros.string[index1++] = string.charAt(i);
      else ones.string[index2++] = string.charAt(i);
    }

    string = null;
    System.gc();
    Alphabet[] splittedAlphabets = alphabet.splitAlphabet(pivot);

    /*
     * Creating child nodes
     */
    if (splittedAlphabets[0].size() > 1) left = new WaveletNode(zeros, splittedAlphabets[0]);
    zeros = null;
    System.gc();
    if (splittedAlphabets[1].size() > 1) right = new WaveletNode(ones, splittedAlphabets[1]);
    ones = null;
    System.gc();
  }