@Test(expected = RuntimeException.class) public void testFailedDecrement1() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); fd.increment(1); assertEquals(1, fd.getNumberOfEvents()); assertEquals(1, fd.getSumOfCounts()); assertEquals(1, fd.get(1)); fd.decrement(1); assertEquals(0, fd.getNumberOfEvents()); assertEquals(0, fd.getSumOfCounts()); assertEquals(0, fd.get(1)); fd.decrement(1); }
@Test public void test3() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); fd.increment(1); fd.increment(1); fd.increment(2); fd.increment(3); assertEquals(3, fd.getNumberOfEvents()); assertEquals(4, fd.getSumOfCounts()); assertEquals(2, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(1, fd.get(3)); fd.clear(); assertEquals(0, fd.getNumberOfEvents()); assertEquals(0, fd.getSumOfCounts()); }
@Test public void testGetSortedEvents() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); fd.set(1, 1); fd.set(4, 3); fd.set(2, 4); fd.set(5, 7); fd.set(6, 9); fd.set(3, 2); assertEquals(6, fd.getNumberOfEvents()); assertEquals(26, fd.getSumOfCounts()); List<PairOfInts> list = fd.getEntries(Order.ByLeftElementDescending); assertEquals(6, list.size()); assertEquals(1, list.get(0).getLeftElement()); assertEquals(1, list.get(0).getRightElement()); assertEquals(2, list.get(1).getLeftElement()); assertEquals(4, list.get(1).getRightElement()); assertEquals(3, list.get(2).getLeftElement()); assertEquals(2, list.get(2).getRightElement()); assertEquals(4, list.get(3).getLeftElement()); assertEquals(3, list.get(3).getRightElement()); assertEquals(5, list.get(4).getLeftElement()); assertEquals(7, list.get(4).getRightElement()); assertEquals(6, list.get(5).getLeftElement()); assertEquals(9, list.get(5).getRightElement()); list = fd.getEntries(Order.ByLeftElementDescending, 4); assertEquals(4, list.size()); assertEquals(1, list.get(0).getLeftElement()); assertEquals(1, list.get(0).getRightElement()); assertEquals(2, list.get(1).getLeftElement()); assertEquals(4, list.get(1).getRightElement()); assertEquals(3, list.get(2).getLeftElement()); assertEquals(2, list.get(2).getRightElement()); assertEquals(4, list.get(3).getLeftElement()); assertEquals(3, list.get(3).getRightElement()); }
@Test public void testIterable() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); fd.set(1, 1); fd.set(4, 3); fd.set(2, 4); fd.set(5, 7); fd.set(6, 9); fd.set(3, 2); assertEquals(6, fd.getNumberOfEvents()); assertEquals(26, fd.getSumOfCounts()); SortedSet<PairOfInts> list = new TreeSet<PairOfInts>(); for (PairOfInts pair : fd) { list.add(pair.clone()); } assertEquals(6, list.size()); Iterator<PairOfInts> iter = list.iterator(); PairOfInts e = iter.next(); assertEquals(1, e.getLeftElement()); assertEquals(1, e.getRightElement()); e = iter.next(); assertEquals(2, e.getLeftElement()); assertEquals(4, e.getRightElement()); e = iter.next(); assertEquals(3, e.getLeftElement()); assertEquals(2, e.getRightElement()); e = iter.next(); assertEquals(4, e.getLeftElement()); assertEquals(3, e.getRightElement()); e = iter.next(); assertEquals(5, e.getLeftElement()); assertEquals(7, e.getRightElement()); e = iter.next(); assertEquals(6, e.getLeftElement()); assertEquals(9, e.getRightElement()); }
/** Runs this tool. */ @SuppressWarnings({"static-access"}) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INDEX)); options.addOption( OptionBuilder.withArgName("path") .hasArg() .withDescription("output path") .create(COLLECTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (!cmdline.hasOption(INDEX) || !cmdline.hasOption(COLLECTION)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(LookupPostingsCompressed.class.getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX); String collectionPath = cmdline.getOptionValue(COLLECTION); if (collectionPath.endsWith(".gz")) { System.out.println("gzipped collection is not seekable: use compressed version!"); System.exit(-1); } Configuration config = new Configuration(); FileSystem fs = FileSystem.get(config); MapFile.Reader reader = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), config); FSDataInputStream collection = fs.open(new Path(collectionPath)); BufferedReader d = new BufferedReader(new InputStreamReader(collection)); Text key = new Text(); ArrayListWritable<PairOfInts> postings; BytesWritable bytesValue = new BytesWritable(); System.out.println("Looking up postings for the term \"starcross'd\""); key.set("starcross'd"); reader.get(key, bytesValue); postings = deserializePosting(bytesValue); // ArrayListWritable<PairOfVInts> postings = value; for (PairOfInts pair : postings) { System.out.println(pair); collection.seek(pair.getLeftElement()); System.out.println(d.readLine()); } bytesValue = new BytesWritable(); key.set("gold"); reader.get(key, bytesValue); postings = deserializePosting(bytesValue); System.out.println( "Complete postings list for 'gold': (" + postings.size() + ", " + postings + ")"); Int2IntFrequencyDistribution goldHist = new Int2IntFrequencyDistributionEntry(); // postings = value; for (PairOfInts pair : postings) { goldHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for gold"); for (PairOfInts pair : goldHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } bytesValue = new BytesWritable(); key.set("silver"); reader.get(key, bytesValue); postings = deserializePosting(bytesValue); System.out.println( "Complete postings list for 'silver': (" + postings.size() + ", " + postings + ")"); Int2IntFrequencyDistribution silverHist = new Int2IntFrequencyDistributionEntry(); // postings = value; for (PairOfInts pair : postings) { silverHist.increment(pair.getRightElement()); } System.out.println("histogram of tf values for silver"); for (PairOfInts pair : silverHist) { System.out.println(pair.getLeftElement() + "\t" + pair.getRightElement()); } bytesValue = new BytesWritable(); key.set("bronze"); Writable w = reader.get(key, bytesValue); if (w == null) { System.out.println("the term bronze does not appear in the collection"); } collection.close(); reader.close(); return 0; }
@Test public void test2() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); fd.increment(1); fd.increment(1); fd.increment(2); fd.increment(3); assertEquals(3, fd.getNumberOfEvents()); assertEquals(4, fd.getSumOfCounts()); assertEquals(2, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(1, fd.get(3)); fd.set(4, 5); assertEquals(4, fd.getNumberOfEvents()); assertEquals(9, fd.getSumOfCounts()); assertEquals(2, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(1, fd.get(3)); assertEquals(5, fd.get(4)); fd.set(1, 5); assertEquals(4, fd.getNumberOfEvents()); assertEquals(12, fd.getSumOfCounts()); assertEquals(5, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(1, fd.get(3)); assertEquals(5, fd.get(4)); fd.increment(3); fd.increment(3); fd.increment(3); assertEquals(4, fd.getNumberOfEvents()); assertEquals(15, fd.getSumOfCounts()); assertEquals(5, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(4, fd.get(3)); assertEquals(5, fd.get(4)); fd.set(3, 1); assertEquals(4, fd.getNumberOfEvents()); assertEquals(12, fd.getSumOfCounts()); assertEquals(5, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(1, fd.get(3)); assertEquals(5, fd.get(4)); }
@Test public void test1() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); assertEquals(0, fd.get(1)); fd.increment(1); fd.increment(2); fd.increment(3); fd.increment(2); fd.increment(3); fd.increment(3); assertEquals(3, fd.getNumberOfEvents()); assertEquals(6, fd.getSumOfCounts()); assertEquals(1, fd.get(1)); assertEquals(2, fd.get(2)); assertEquals(3, fd.get(3)); assertEquals((float) 1 / 6, fd.getFrequency(1), 10e-6); assertEquals((float) 2 / 6, fd.getFrequency(2), 10e-6); assertEquals((float) 3 / 6, fd.getFrequency(3), 10e-6); assertEquals(Math.log((float) 1 / 6), fd.getLogFrequency(1), 10e-6); assertEquals(Math.log((float) 2 / 6), fd.getLogFrequency(2), 10e-6); assertEquals(Math.log((float) 3 / 6), fd.getLogFrequency(3), 10e-6); fd.decrement(3); assertEquals(3, fd.getNumberOfEvents()); assertEquals(5, fd.getSumOfCounts()); assertEquals(1, fd.get(1)); assertEquals(2, fd.get(2)); assertEquals(2, fd.get(3)); assertEquals((float) 1 / 5, fd.getFrequency(1), 10e-6); assertEquals((float) 2 / 5, fd.getFrequency(2), 10e-6); assertEquals((float) 2 / 5, fd.getFrequency(3), 10e-6); assertEquals(Math.log((float) 1 / 5), fd.getLogFrequency(1), 10e-6); assertEquals(Math.log((float) 2 / 5), fd.getLogFrequency(2), 10e-6); assertEquals(Math.log((float) 2 / 5), fd.getLogFrequency(3), 10e-6); fd.decrement(1); assertEquals(2, fd.getNumberOfEvents()); assertEquals(4, fd.getSumOfCounts()); assertEquals(0, fd.get(1)); assertEquals(2, fd.get(2)); assertEquals(2, fd.get(3)); assertEquals((float) 2 / 4, fd.getFrequency(2), 10e-6); assertEquals((float) 2 / 4, fd.getFrequency(3), 10e-6); assertEquals(Math.log((float) 2 / 4), fd.getLogFrequency(2), 10e-6); assertEquals(Math.log((float) 2 / 4), fd.getLogFrequency(3), 10e-6); }
@Test public void testMultiIncrementDecrement() { Int2IntFrequencyDistribution fd = new Int2IntFrequencyDistributionOpen(); fd.increment(1, 2); fd.increment(2, 3); fd.increment(3, 4); assertEquals(3, fd.getNumberOfEvents()); assertEquals(9, fd.getSumOfCounts()); assertEquals(2, fd.get(1)); assertEquals(3, fd.get(2)); assertEquals(4, fd.get(3)); fd.decrement(2, 2); assertEquals(3, fd.getNumberOfEvents()); assertEquals(7, fd.getSumOfCounts()); assertEquals(2, fd.get(1)); assertEquals(1, fd.get(2)); assertEquals(4, fd.get(3)); }