Exemplo n.º 1
0
  public void count() {

    TIntIntHashMap docCounts = new TIntIntHashMap();

    int index = 0;

    if (instances.size() == 0) {
      logger.info("Instance list is empty");
      return;
    }

    if (instances.get(0).getData() instanceof FeatureSequence) {

      for (Instance instance : instances) {
        FeatureSequence features = (FeatureSequence) instance.getData();

        for (int i = 0; i < features.getLength(); i++) {
          docCounts.adjustOrPutValue(features.getIndexAtPosition(i), 1, 1);
        }

        int[] keys = docCounts.keys();
        for (int i = 0; i < keys.length - 1; i++) {
          int feature = keys[i];
          featureCounts[feature] += docCounts.get(feature);
          documentFrequencies[feature]++;
        }

        docCounts = new TIntIntHashMap();

        index++;
        if (index % 1000 == 0) {
          System.err.println(index);
        }
      }
    } else if (instances.get(0).getData() instanceof FeatureVector) {

      for (Instance instance : instances) {
        FeatureVector features = (FeatureVector) instance.getData();

        for (int location = 0; location < features.numLocations(); location++) {
          int feature = features.indexAtLocation(location);
          double value = features.valueAtLocation(location);

          documentFrequencies[feature]++;
          featureCounts[feature] += value;
        }

        index++;
        if (index % 1000 == 0) {
          System.err.println(index);
        }
      }
    } else {
      logger.info("Unsupported data class: " + instances.get(0).getData().getClass().getName());
    }
  }
  public void testP2PMap() {
    // Long-long
    TLongLongHashMap llmap = new TLongLongHashMap();
    assertTrue(serializesCorrectly(llmap, "p2p-ll-1"));
    llmap.put(0, 1);
    assertTrue(serializesCorrectly(llmap, "p2p-ll-2"));
    llmap.put(Long.MIN_VALUE, Long.MIN_VALUE);
    assertTrue(serializesCorrectly(llmap, "p2p-ll-3"));
    llmap.put(Long.MAX_VALUE, Long.MAX_VALUE);
    assertTrue(serializesCorrectly(llmap, "p2p-ll-4"));

    // Int-int
    TIntIntHashMap iimap = new TIntIntHashMap();
    assertTrue(serializesCorrectly(iimap, "p2p-ii-1"));
    iimap.put(0, 1);
    assertTrue(serializesCorrectly(iimap, "p2p-ii-2"));
    iimap.put(Integer.MIN_VALUE, Integer.MIN_VALUE);
    assertTrue(serializesCorrectly(iimap, "p2p-ii-3"));
    iimap.put(Integer.MAX_VALUE, Integer.MAX_VALUE);
    assertTrue(serializesCorrectly(iimap, "p2p-ii-4"));

    // Double-double
    TDoubleDoubleHashMap ddmap = new TDoubleDoubleHashMap();
    assertTrue(serializesCorrectly(ddmap, "p2p-dd-1"));
    ddmap.put(0, 1);
    assertTrue(serializesCorrectly(ddmap, "p2p-dd-2"));
    ddmap.put(Double.MIN_VALUE, Double.MIN_VALUE);
    assertTrue(serializesCorrectly(ddmap, "p2p-dd-3"));
    ddmap.put(Double.MAX_VALUE, Double.MAX_VALUE);
    assertTrue(serializesCorrectly(ddmap, "p2p-dd-4"));
    ddmap.put(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
    assertTrue(serializesCorrectly(ddmap, "p2p-dd-5"));
    ddmap.put(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY);
    assertTrue(serializesCorrectly(ddmap, "p2p-dd-6"));
    // NOTE: trove doesn't deal well with NaN
    //        ddmap.put( Double.NaN, Double.NaN );
    //        assertTrue( serializesCorrectly( ddmap ) );

    // Float-float
    TFloatFloatHashMap ffmap = new TFloatFloatHashMap();
    assertTrue(serializesCorrectly(ffmap, "p2p-ff-1"));
    ffmap.put(0, 1);
    assertTrue(serializesCorrectly(ffmap, "p2p-ff-2"));
    ffmap.put(Float.MIN_VALUE, Float.MIN_VALUE);
    assertTrue(serializesCorrectly(ffmap, "p2p-ff-3"));
    ffmap.put(Float.MAX_VALUE, Float.MAX_VALUE);
    assertTrue(serializesCorrectly(ffmap, "p2p-ff-4"));
    ffmap.put(Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY);
    assertTrue(serializesCorrectly(ffmap, "p2p-ff-5"));
    ffmap.put(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY);
    assertTrue(serializesCorrectly(ffmap, "p2p-ff-6"));
    // NOTE: trove doesn't deal well with NaN
    //        ffmap.put( Float.NaN, Float.NaN );
    //        assertTrue( serializesCorrectly( ffmap ) );
  }