public void count() { TIntIntHashMap docCounts = new TIntIntHashMap(); int index = 0; if (instances.size() == 0) { logger.info("Instance list is empty"); return; } if (instances.get(0).getData() instanceof FeatureSequence) { for (Instance instance : instances) { FeatureSequence features = (FeatureSequence) instance.getData(); for (int i = 0; i < features.getLength(); i++) { docCounts.adjustOrPutValue(features.getIndexAtPosition(i), 1, 1); } int[] keys = docCounts.keys(); for (int i = 0; i < keys.length - 1; i++) { int feature = keys[i]; featureCounts[feature] += docCounts.get(feature); documentFrequencies[feature]++; } docCounts = new TIntIntHashMap(); index++; if (index % 1000 == 0) { System.err.println(index); } } } else if (instances.get(0).getData() instanceof FeatureVector) { for (Instance instance : instances) { FeatureVector features = (FeatureVector) instance.getData(); for (int location = 0; location < features.numLocations(); location++) { int feature = features.indexAtLocation(location); double value = features.valueAtLocation(location); documentFrequencies[feature]++; featureCounts[feature] += value; } index++; if (index % 1000 == 0) { System.err.println(index); } } } else { logger.info("Unsupported data class: " + instances.get(0).getData().getClass().getName()); } }
public void testP2PMap() { // Long-long TLongLongHashMap llmap = new TLongLongHashMap(); assertTrue(serializesCorrectly(llmap, "p2p-ll-1")); llmap.put(0, 1); assertTrue(serializesCorrectly(llmap, "p2p-ll-2")); llmap.put(Long.MIN_VALUE, Long.MIN_VALUE); assertTrue(serializesCorrectly(llmap, "p2p-ll-3")); llmap.put(Long.MAX_VALUE, Long.MAX_VALUE); assertTrue(serializesCorrectly(llmap, "p2p-ll-4")); // Int-int TIntIntHashMap iimap = new TIntIntHashMap(); assertTrue(serializesCorrectly(iimap, "p2p-ii-1")); iimap.put(0, 1); assertTrue(serializesCorrectly(iimap, "p2p-ii-2")); iimap.put(Integer.MIN_VALUE, Integer.MIN_VALUE); assertTrue(serializesCorrectly(iimap, "p2p-ii-3")); iimap.put(Integer.MAX_VALUE, Integer.MAX_VALUE); assertTrue(serializesCorrectly(iimap, "p2p-ii-4")); // Double-double TDoubleDoubleHashMap ddmap = new TDoubleDoubleHashMap(); assertTrue(serializesCorrectly(ddmap, "p2p-dd-1")); ddmap.put(0, 1); assertTrue(serializesCorrectly(ddmap, "p2p-dd-2")); ddmap.put(Double.MIN_VALUE, Double.MIN_VALUE); assertTrue(serializesCorrectly(ddmap, "p2p-dd-3")); ddmap.put(Double.MAX_VALUE, Double.MAX_VALUE); assertTrue(serializesCorrectly(ddmap, "p2p-dd-4")); ddmap.put(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY); assertTrue(serializesCorrectly(ddmap, "p2p-dd-5")); ddmap.put(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY); assertTrue(serializesCorrectly(ddmap, "p2p-dd-6")); // NOTE: trove doesn't deal well with NaN // ddmap.put( Double.NaN, Double.NaN ); // assertTrue( serializesCorrectly( ddmap ) ); // Float-float TFloatFloatHashMap ffmap = new TFloatFloatHashMap(); assertTrue(serializesCorrectly(ffmap, "p2p-ff-1")); ffmap.put(0, 1); assertTrue(serializesCorrectly(ffmap, "p2p-ff-2")); ffmap.put(Float.MIN_VALUE, Float.MIN_VALUE); assertTrue(serializesCorrectly(ffmap, "p2p-ff-3")); ffmap.put(Float.MAX_VALUE, Float.MAX_VALUE); assertTrue(serializesCorrectly(ffmap, "p2p-ff-4")); ffmap.put(Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY); assertTrue(serializesCorrectly(ffmap, "p2p-ff-5")); ffmap.put(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY); assertTrue(serializesCorrectly(ffmap, "p2p-ff-6")); // NOTE: trove doesn't deal well with NaN // ffmap.put( Float.NaN, Float.NaN ); // assertTrue( serializesCorrectly( ffmap ) ); }