public void testDecompressException() throws IOException {
    // build 5k array
    byte[] uncompressedData = new byte[5 * 1024];
    for (int i = 0; i < uncompressedData.length; i++) {
      uncompressedData[i] = 1;
    }

    byte[] compressedData = doCompress(uncompressedData);

    Bucket inBucket = new ArrayBucket(compressedData);
    NullBucket outBucket = new NullBucket();
    InputStream decompressorInput = null;
    OutputStream decompressorOutput = null;
    try {
      decompressorInput = inBucket.getInputStream();
      decompressorOutput = outBucket.getOutputStream();
      Compressor.COMPRESSOR_TYPE.GZIP.decompress(
          decompressorInput, decompressorOutput, 4096 + 10, 4096 + 20);
      decompressorInput.close();
      decompressorOutput.close();
    } catch (CompressionOutputSizeException e) {
      // expect this
      return;
    } finally {
      Closer.close(decompressorInput);
      Closer.close(decompressorOutput);
      inBucket.free();
      outBucket.free();
    }
    fail("did not throw expected CompressionOutputSizeException");
  }
  private void getWordsFromPages()
      throws URISyntaxException, UnsafeContentTypeException, IOException {
    // Content-XXXXXXX.html pages only, because they contain all the contents
    // and their respective URIs won't change over time
    InputStream filterInput = null;
    OutputStream filterOutput = null;
    ArrayBucket input = null;
    NullBucket output = null;
    for (final Content content : new FlogFactory(flog).getContentsTreeMap(false).values()) {
      NullFilterCallback nullFC = new NullFilterCallback();
      input =
          new ArrayBucket(
              ContentSyntax.parseSomeString(content.getContent(), content.getContentSyntax())
                  .getBytes("UTF-8"));
      output = new NullBucket();
      filterInput = input.getInputStream();
      filterOutput = output.getOutputStream();
      ContentFilter.filter(
          filterInput,
          filterOutput,
          "text/html",
          new URI("http://whocares.co:12345/"),
          nullFC,
          null,
          null);
      filterInput.close();
      filterOutput.close();
      input.free();
      output.free();
      final String cURI = "Content-" + content.getID() + ".html";
      this.pageIDs.add(cURI);
      final int pageID = this.pageIDs.indexOf(cURI);
      for (String w : nullFC.words.keySet()) {
        if (!this.ourWords.containsKey(w)) {
          this.ourWords.put(w, new HashMap<Integer, Vector<Long>>());
        }
        final HashMap<Integer, Vector<Long>> container = this.ourWords.get(w);
        if (!container.containsKey(pageID)) {
          container.put(pageID, new Vector<Long>());
        }
        final Vector<Long> subContainer = container.get(pageID);
        for (Long position : nullFC.words.get(w)) {
          if (!subContainer.contains(position)) {
            subContainer.add(position);
          }
        }

        final Byte firstMD5 = Byte.valueOf(DataFormatter.getMD5(w).substring(0, 1), 16);
        final Vector<String> md5Container = this.wordsByMD5.get(firstMD5);
        if (!md5Container.contains(w)) {
          md5Container.add(w);
        }
      }
    }
  }