Example #1
0
  public void testRule(NumericalRule rule, int minSupport, float minAccuracy) throws SQLException {
    // System.out.println("!!!"+rule.getRuleString()+"!!!");

    String prediction = rule.getHead().getConstant();
    int positivesCount = 0;
    int negativesCount = 0;

    String sparql = createTestQuery(rule);

    // System.out.println(sparql);

    long t = System.currentTimeMillis();
    ResultSet rs = (ResultSet) qh.executeQuery(sparql);
    // System.out.println("Executed in "+(System.currentTimeMillis()-t));

    float overallAcc =
        ((float)
                ArrayTools.sum(
                    ArrayTools.multiply(
                        rule.getAccuracyDistribution(), rule.getSupportDistribution())))
            / ((float) ArrayTools.sum(rule.getSupportDistribution()));

    float[] acc =
        ArrayTools.getAccuraciesWithMinSupport(
            rule.getSupportDistribution(), rule.getBodySupportDistribution(), minSupport);

    String ranges = "";

    for (int i = 0; i < acc.length; i++) {
      if (acc[i] >= minAccuracy) ranges += "+";
      else ranges += "-";
    }
    // System.out.println(ranges);

    // ArrayTools.print(acc);

    float lastNum = Float.NaN;
    while (rs.next()) {
      float num = rs.getFloat(1);
      String observation = rs.getString(2).replaceAll("\"", "");
      int count = rs.getInt(3);

      int bucket = histogram.getBucket(num);
      if (acc[bucket] >= minAccuracy) {
        if (prediction.equals(observation)) positivesCount += count;
        else negativesCount += count;
      }

      lastNum = num;
    }

    int coveringCount = positivesCount + negativesCount;
    float predictionAccuracy = ((float) positivesCount) / (((float) (coveringCount)));
    // System.out.println(rule.getRuleString() + "\n[" + predictionAccuracy + "," + coveringCount +
    // "] = ("+positivesCount+"/"+negativesCount+")");
    rule.observedConfidence = predictionAccuracy;
    rule.observedPositives = positivesCount;
    rule.observedNegatives = negativesCount;
    rule.observedOverallConfidence = overallAcc;
  }
Example #2
0
  public static void main(String[] args) throws Exception {

    String output = "";

    float confidenceThreshold = (float) 0.7;

    // double[] indepTSArray = {/*0.0,	5.0,	10.0,	15.0,	20.0, 	30.0,*/	50.0/*,	75.0,	100.0*/};
    // double[] kldivTSArray = {/*0.000,	*/0.0125, 0.0250, 0.0325, 0.0500,	0.0625,	0.0750,	0.0825,
    //	0.100};
    // int[] suppTSArray = {25/*, 50, 75, 100, 125, 150, 300, 500, 1000*/};
    double[] indepTSArray = {0.0};
    int[] suppTSArray = {5};
    // int[] maxNodesArray =
    // {5,10,15,20,25,30,40,50,60,70,80,90,100,110,120,130,140,150,200,250,300,350,400,450,500};
    // int[] maxNodesArray = {10,20,30,40,50,60,70,80,90,100};
    // int[] maxNodesArray = {10,50,100};
    int[] maxNodesArray = {1000};
    double[] kldivTSArray = {0.0};

    for (int measure = 0; measure <= 0; measure++)
      for (int numBuckets = 25; numBuckets <= 25; numBuckets += 25)
        for (int l = 0; l < maxNodesArray.length; l++)
          for (int k = 0; k < suppTSArray.length; k++)
            for (int i = 0; i < indepTSArray.length; i++) {
              for (int j = 0; j < kldivTSArray.length; j++) {
                // Connection connPartition = Driver.connect("src/rdf3x-dblp.properties");
                // Connection connPartition = Driver.connect("src/rdf3x-data91-train.properties");
                Connection connPartition = Driver.connect("src/rdf3x-yago.properties");

                QueryHandler qh = new QueryHandler(connPartition);

                info = RelationsInfo.readFromDisk("relationsInfoForRdf3x.ser");

                // Relation rootRelation = new Relation("<http://purl.org/dc/terms/citations>",
                // null, null);
                // Relation rootRelation =
                // info.getRelationFromRelations("<http://yago-knowledge.org/resource/hasHeight>");
                Relation rootRelation =
                    info.getRelationFromRelations(
                        "<http://yago-knowledge.org/resource/hasPopulation>");
                // Relation rootRelation = new
                // Relation("<http://data-gov.tw.rpi.edu/vocab/p/91/pincp>", null, null);
                // Relation rootRelation = new
                // Relation("<http://dbpedia.org/property/populationDensity>", null, null);
                // Relation rootRelation = new
                // Relation("<http://dbpedia.org/property/gdpNominalPerCapita>", null, null);
                // Relation rootRelation = new Relation("<http://purl.org/ontology/mo/duration_ms>",
                // null, null);

                RuleLearner learner = new RuleLearner(qh, null, null, rootRelation);

                ArrayTools.measure = measure;

                learner.indepThreshold = (float) indepTSArray[i];
                learner.klDivThreshold = (float) kldivTSArray[j];
                learner.suppThreshold = suppTSArray[k];
                learner.maxNodesPerLevel = maxNodesArray[l];
                learner.confidenceThreshold = confidenceThreshold;
                learner.numOfBuckets = numBuckets;

                // RuleLearner learner = new RuleLearner(qh, null, null,
                // info.getRelationFromRelations("<http://yago-knowledge.org/resource/hasPopulation>"));
                learner.learn();
                CorrelationLatticeNode root = learner.getRoot();

                // CorrelationLatticeNode root =
                // CorrelationLatticeNode.readFromDisk("correlation-lattice-income.ser");
                // Collection<NumericalRule> learnedRules =
                // CorrelationLatticeNode.searchRules(root);
                Collection<NumericalRule> learnedRules = learner.getLearnedRules();
                for (NumericalRule r : learnedRules) {
                  System.out.println(r.getRuleString());
                  System.out.print("\t");
                  ArrayTools.print(r.getAccuracyDistribution());
                }

                root.printSuggestions(null);

                // Deserialize load it
                /*CorrelationLatticeNode root = CorrelationLatticeNode.readFromDisk("correlation-lattice-income.ser");
                Histogram hs = root.getHistogram();
                ObjectInputStream ois = new ObjectInputStream(new FileInputStream("rules-income.ser"));
                Set<NumericalRule> learnedRules = (HashSet<NumericalRule>) ois.readObject();
                for (NumericalRule r: learnedRules) {
                	System.out.println(r.getRuleString());
                }
                if (root.getRootLiteral()==null) System.out.println("FODEO!");
                */
                // RuleTester rt = new RuleTester(qh,hs,rootRelation);

                /*
                // Test on test partition
                connPartition = Driver.connect("src/rdf3x-data91-test.properties");

                qh = new QueryHandler(connPartition);

                float negatives = 0;
                float positives = 0;
                float avgGain = 0;
                RuleTester rt = new RuleTester(root,qh);
                for (NumericalRule r: learnedRules) {
                	rt.testRule(r, learner.suppThreshold, confidenceThreshold);


                	boolean isSpecialization = false;
                	NumericalRule generalization = null;
                	for (NumericalRule r1: learnedRules) {
                		if (r.isSpecializationOf(r1)) {
                			isSpecialization = true;
                			generalization = r1;
                			break;
                		}
                	}

                	if (r.observedOverallConfidence<confidenceThreshold) {
                		float gain = r.observedConfidence/r.observedOverallConfidence;
                		if (!Float.isNaN(gain)) {
                			float observedTotal = (float)(r.observedPositives+r.observedNegatives);
                			float currentTotal = (float)(positives+negatives);
                			avgGain = (avgGain*(currentTotal) + gain*(observedTotal))/(currentTotal+observedTotal);
                		}
                		if (!isSpecialization) {
                			positives += r.observedPositives;
                			negatives += r.observedNegatives;
                		} else {
                			//System.out.println("Specialization of: "+generalization.getRuleString());
                		}
                	}
                }

                output += "\n\nkldiv="+learner.klDivThreshold+"\tindep="+learner.indepThreshold + "\tminsup="+learner.suppThreshold + "\tmaxnodes="+learner.maxNodesPerLevel + "\tbuckets="+numBuckets + "\tmeasure="+measure +
                		  "\nElapsedTime="+learner.elapsedTime+" \t("+learner.firstLevelTime+" + "+(learner.elapsedTime-learner.firstLevelTime)+")" +
                		  "\nacc="+(positives/(positives+negatives)) +
                		  "\nsup="+(positives+negatives) +
                		  "\ngain="+(avgGain) +
                		  "\nnodes="+root.countNonPruned() +
                		  "\nrules="+learnedRules.size();
                */

                CorrelationLatticeNode.reset();
              }
            }
    System.out.println("\n\n\n\nFinalOutput" + output);
  }