public void testDecompressException() throws IOException { // build 5k array byte[] uncompressedData = new byte[5 * 1024]; for (int i = 0; i < uncompressedData.length; i++) { uncompressedData[i] = 1; } byte[] compressedData = doCompress(uncompressedData); Bucket inBucket = new ArrayBucket(compressedData); NullBucket outBucket = new NullBucket(); InputStream decompressorInput = null; OutputStream decompressorOutput = null; try { decompressorInput = inBucket.getInputStream(); decompressorOutput = outBucket.getOutputStream(); Compressor.COMPRESSOR_TYPE.GZIP.decompress( decompressorInput, decompressorOutput, 4096 + 10, 4096 + 20); decompressorInput.close(); decompressorOutput.close(); } catch (CompressionOutputSizeException e) { // expect this return; } finally { Closer.close(decompressorInput); Closer.close(decompressorOutput); inBucket.free(); outBucket.free(); } fail("did not throw expected CompressionOutputSizeException"); }
private void getWordsFromPages() throws URISyntaxException, UnsafeContentTypeException, IOException { // Content-XXXXXXX.html pages only, because they contain all the contents // and their respective URIs won't change over time InputStream filterInput = null; OutputStream filterOutput = null; ArrayBucket input = null; NullBucket output = null; for (final Content content : new FlogFactory(flog).getContentsTreeMap(false).values()) { NullFilterCallback nullFC = new NullFilterCallback(); input = new ArrayBucket( ContentSyntax.parseSomeString(content.getContent(), content.getContentSyntax()) .getBytes("UTF-8")); output = new NullBucket(); filterInput = input.getInputStream(); filterOutput = output.getOutputStream(); ContentFilter.filter( filterInput, filterOutput, "text/html", new URI("http://whocares.co:12345/"), nullFC, null, null); filterInput.close(); filterOutput.close(); input.free(); output.free(); final String cURI = "Content-" + content.getID() + ".html"; this.pageIDs.add(cURI); final int pageID = this.pageIDs.indexOf(cURI); for (String w : nullFC.words.keySet()) { if (!this.ourWords.containsKey(w)) { this.ourWords.put(w, new HashMap<Integer, Vector<Long>>()); } final HashMap<Integer, Vector<Long>> container = this.ourWords.get(w); if (!container.containsKey(pageID)) { container.put(pageID, new Vector<Long>()); } final Vector<Long> subContainer = container.get(pageID); for (Long position : nullFC.words.get(w)) { if (!subContainer.contains(position)) { subContainer.add(position); } } final Byte firstMD5 = Byte.valueOf(DataFormatter.getMD5(w).substring(0, 1), 16); final Vector<String> md5Container = this.wordsByMD5.get(firstMD5); if (!md5Container.contains(w)) { md5Container.add(w); } } } }