예제 #1
0
 private static double iccUpdate(ItemSet itemSet, Set<ItemSet> itemSets, double bestItemScore) {
   if (itemSet.ub() >= bestItemScore) {
     itemSets.add(itemSet);
     DataSet covered = data.matching(itemSet);
     double itemSetValue = var(covered.getTuples().length, covered.y());
     if (itemSetValue > bestItemScore) {
       return itemSetValue;
     }
   }
   return bestItemScore;
 }
예제 #2
0
  /**
   * TODO Vermijden dubbel berekenen u en var => miss omschrijven classes TODO BitVector ipv BitSet
   * TODO Score berekenen i.p.v. score TODO DataSet matching?
   */
  public static void main(String[] args) {
    /*Tuple[] tuples = new Tuple[]{
    	new Tuple("0 0 1 0 0", "1 3", ' '),
    	new Tuple("1 0 0 1 1", "1 2", ' '),
    	new Tuple("0 0 0 1 1", "1 1", ' '),
    	new Tuple("1 1 0 0 1", "2 1", ' '),
    	new Tuple("1 1 0 0 1", "3 1", ' '),
    	new Tuple("1 1 0 0 1", "3 2", ' '),
    	new Tuple("0 0 0 1 0", "3 3", ' '),
    	new Tuple("0 1 0 1 1", "2 3", ' ')
    };
    data = new DataSet(tuples);*/

    // data = Parser.parseAttributes("Corel5k-train.arff", 374);
    data = Parser.parseShortNotation("diabetes.txt", 1, new NumericalItemSet(new int[] {}));
    s = data.s();
    n = data.getTuples().length;
    m = data.getTuples()[0].getClassValues().length;

    /*double u = 0;
    long l = 0;
    for(int j = 0; j < 10; j++) {
    	double t = System.currentTimeMillis();
    	for(int i = 0; i < 10000; i++)
    		u = data.getBluePrint().getOneItemSet(2, 48).ub();
    	l += (System.currentTimeMillis() - t);
    }
    System.out.println(((double)l/10));*/

    System.out.println(
        "Data loaded, "
            + data.getTuples().length
            + " tuples, "
            + data.getTuples()[0].getItemSet().getLength()
            + " items, "
            + data.getTuples()[0].getClassValues().length
            + " class values");

    StopWatch.tic("Full time");
    System.out.println(icc());
    StopWatch.printToc("Full time");
  }
예제 #3
0
  private static ItemSet icc() {
    ItemSet bestItemSet = null;
    int itemNumber = data.getTuples()[0].getItemSet().getLength();
    double bestItemScore = 0.0;
    Set<ItemSet> Q1 = new HashSet<ItemSet>();
    Set<ItemSet> B1 = new HashSet<ItemSet>(Q1);
    System.out.println("ICC init, with " + itemNumber + " items");
    for (int j = 0; j < itemNumber; j++) {
      ItemSet itemSet = getData().getBluePrint().getOneItemSet(j, itemNumber);
      double newScore = iccUpdate(itemSet, Q1, bestItemScore);
      if (newScore != bestItemScore) {
        bestItemSet = itemSet;
        bestItemScore = newScore;
      }
    }

    System.out.println("ICC init finished");

    Set<ItemSet> QVorige = new HashSet<ItemSet>();
    QVorige.addAll(Q1);

    Set<ItemSet> QVolgende = QVorige;

    while (!QVolgende.isEmpty()) {
      QVolgende = new HashSet<ItemSet>();
      for (ItemSet B : Q1) {
        for (ItemSet Q : QVorige) {
          int lastSetBit = 0;
          for (int i = Q.getBitSet().nextSetBit(0); i >= 0; i = Q.getBitSet().nextSetBit(i + 1))
            lastSetBit = i;

          if (lastSetBit < B.getBitSet().nextSetBit(0) && Q.ub() >= bestItemScore) {
            if (B.ub() < bestItemScore) B1.remove(B);
            else {
              ItemSet itemSet = Q.union(B);
              double newScore = iccUpdate(itemSet, QVolgende, bestItemScore);
              if (newScore != bestItemScore) {
                bestItemSet = itemSet;
                bestItemScore = newScore;
              }
            }
          }
        }
      }
      QVorige = QVolgende;
    }
    return bestItemSet;
  }
예제 #4
0
  public static double var(double x, int[] y) {
    if (x == n || x == 0) return 0.0;

    double sum1 = 0.0;
    double sum2 = 0.0;

    double n = data.getTuples().length;
    for (int i = 0; i < y.length; i++) {
      double s_i_over_n = s[i] * n;
      double h1 = ((double) y[i] / x) - s_i_over_n;
      sum1 += h1 * h1;
      double h2 = (((double) s[i] - (double) y[i]) / (n - x)) - s_i_over_n;
      sum2 += h2 * h2;
    }

    return x * sum1 + (n - x) * sum2;
  }