Example #1
0
  public static void main(final String[] arg)
      throws IOException, JSAPException, NoSuchMethodException {

    final SimpleJSAP jsap =
        new SimpleJSAP(
            BloomFilter.class.getName(),
            "Creates a Bloom filter reading from standard input a newline-separated list of terms.",
            new Parameter[] {
              new FlaggedOption(
                  "bufferSize",
                  IntSizeStringParser.getParser(),
                  "64Ki",
                  JSAP.NOT_REQUIRED,
                  'b',
                  "buffer-size",
                  "The size of the I/O buffer used to read terms."),
              new FlaggedOption(
                  "encoding",
                  ForNameStringParser.getParser(Charset.class),
                  "UTF-8",
                  JSAP.NOT_REQUIRED,
                  'e',
                  "encoding",
                  "The term file encoding."),
              new UnflaggedOption(
                  "bloomFilter",
                  JSAP.STRING_PARSER,
                  JSAP.NO_DEFAULT,
                  JSAP.REQUIRED,
                  JSAP.NOT_GREEDY,
                  "The filename for the serialised front-coded list."),
              new UnflaggedOption(
                  "size",
                  JSAP.INTSIZE_PARSER,
                  JSAP.NO_DEFAULT,
                  JSAP.REQUIRED,
                  JSAP.NOT_GREEDY,
                  "The size of the filter (i.e., the expected number of elements in the filter; usually, the number of terms)."),
              new UnflaggedOption(
                  "precision",
                  JSAP.INTEGER_PARSER,
                  JSAP.NO_DEFAULT,
                  JSAP.REQUIRED,
                  JSAP.NOT_GREEDY,
                  "The precision of the filter.")
            });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted()) return;

    final int bufferSize = jsapResult.getInt("bufferSize");
    final String filterName = jsapResult.getString("bloomFilter");
    final Charset encoding = (Charset) jsapResult.getObject("encoding");

    BloomFilter filter = new BloomFilter(jsapResult.getInt("size"), jsapResult.getInt("precision"));
    final ProgressLogger pl = new ProgressLogger();
    pl.itemsName = "terms";
    pl.start("Reading terms...");
    MutableString s = new MutableString();
    FastBufferedReader reader =
        new FastBufferedReader(new InputStreamReader(System.in, encoding), bufferSize);
    while (reader.readLine(s) != null) {
      filter.add(s);
      pl.lightUpdate();
    }
    pl.done();

    BinIO.storeObject(filter, filterName);
  }