@Override public synchronized void append(WritableComparable key, Writable val) throws IOException { super.append(key, val); buf.reset(); key.write(buf); bloomKey.set(byteArrayForBloomKey(buf), 1.0); bloomFilter.add(bloomKey); }
@Override public synchronized void close() throws IOException { super.close(); DataOutputStream out = fs.create(new Path(dir, BLOOM_FILE_NAME), true); bloomFilter.write(out); out.flush(); out.close(); }
@SuppressWarnings("unchecked") public void writeToDisk(Configuration conf, boolean writeToDistributedCache) throws IOException { String bucketCachePath = PathUtils.getCachePath(conf) + BUCKET_CACHE_FOLDER; FileSystem fs = FileSystem.get(conf); MapFile.Writer writer = null; try { writer = new MapFile.Writer( conf, new Path(bucketCachePath), MapFile.Writer.keyClass(IntWritable.class), MapFile.Writer.valueClass(Bucket.class)); ArrayList<IntWritable> keyList = new ArrayList<IntWritable>(); for (IntWritable i : bucketCache.keySet()) { keyList.add(i); } Collections.sort(keyList); for (IntWritable i : keyList) { writer.append(i, bucketCache.get(i)); } } finally { if (writer != null) { IOUtils.closeStream(writer); } } if (writeToDistributedCache) { for (FileStatus status : fs.listStatus(new Path(bucketCachePath))) { if (!status.isDirectory()) { DistributedCache.addCacheFile(status.getPath().toUri(), conf); } } } }