public void testRule(NumericalRule rule, int minSupport, float minAccuracy) throws SQLException { // System.out.println("!!!"+rule.getRuleString()+"!!!"); String prediction = rule.getHead().getConstant(); int positivesCount = 0; int negativesCount = 0; String sparql = createTestQuery(rule); // System.out.println(sparql); long t = System.currentTimeMillis(); ResultSet rs = (ResultSet) qh.executeQuery(sparql); // System.out.println("Executed in "+(System.currentTimeMillis()-t)); float overallAcc = ((float) ArrayTools.sum( ArrayTools.multiply( rule.getAccuracyDistribution(), rule.getSupportDistribution()))) / ((float) ArrayTools.sum(rule.getSupportDistribution())); float[] acc = ArrayTools.getAccuraciesWithMinSupport( rule.getSupportDistribution(), rule.getBodySupportDistribution(), minSupport); String ranges = ""; for (int i = 0; i < acc.length; i++) { if (acc[i] >= minAccuracy) ranges += "+"; else ranges += "-"; } // System.out.println(ranges); // ArrayTools.print(acc); float lastNum = Float.NaN; while (rs.next()) { float num = rs.getFloat(1); String observation = rs.getString(2).replaceAll("\"", ""); int count = rs.getInt(3); int bucket = histogram.getBucket(num); if (acc[bucket] >= minAccuracy) { if (prediction.equals(observation)) positivesCount += count; else negativesCount += count; } lastNum = num; } int coveringCount = positivesCount + negativesCount; float predictionAccuracy = ((float) positivesCount) / (((float) (coveringCount))); // System.out.println(rule.getRuleString() + "\n[" + predictionAccuracy + "," + coveringCount + // "] = ("+positivesCount+"/"+negativesCount+")"); rule.observedConfidence = predictionAccuracy; rule.observedPositives = positivesCount; rule.observedNegatives = negativesCount; rule.observedOverallConfidence = overallAcc; }
public static void main(String[] args) throws Exception { String output = ""; float confidenceThreshold = (float) 0.7; // double[] indepTSArray = {/*0.0, 5.0, 10.0, 15.0, 20.0, 30.0,*/ 50.0/*, 75.0, 100.0*/}; // double[] kldivTSArray = {/*0.000, */0.0125, 0.0250, 0.0325, 0.0500, 0.0625, 0.0750, 0.0825, // 0.100}; // int[] suppTSArray = {25/*, 50, 75, 100, 125, 150, 300, 500, 1000*/}; double[] indepTSArray = {0.0}; int[] suppTSArray = {5}; // int[] maxNodesArray = // {5,10,15,20,25,30,40,50,60,70,80,90,100,110,120,130,140,150,200,250,300,350,400,450,500}; // int[] maxNodesArray = {10,20,30,40,50,60,70,80,90,100}; // int[] maxNodesArray = {10,50,100}; int[] maxNodesArray = {1000}; double[] kldivTSArray = {0.0}; for (int measure = 0; measure <= 0; measure++) for (int numBuckets = 25; numBuckets <= 25; numBuckets += 25) for (int l = 0; l < maxNodesArray.length; l++) for (int k = 0; k < suppTSArray.length; k++) for (int i = 0; i < indepTSArray.length; i++) { for (int j = 0; j < kldivTSArray.length; j++) { // Connection connPartition = Driver.connect("src/rdf3x-dblp.properties"); // Connection connPartition = Driver.connect("src/rdf3x-data91-train.properties"); Connection connPartition = Driver.connect("src/rdf3x-yago.properties"); QueryHandler qh = new QueryHandler(connPartition); info = RelationsInfo.readFromDisk("relationsInfoForRdf3x.ser"); // Relation rootRelation = new Relation("<http://purl.org/dc/terms/citations>", // null, null); // Relation rootRelation = // info.getRelationFromRelations("<http://yago-knowledge.org/resource/hasHeight>"); Relation rootRelation = info.getRelationFromRelations( "<http://yago-knowledge.org/resource/hasPopulation>"); // Relation rootRelation = new // Relation("<http://data-gov.tw.rpi.edu/vocab/p/91/pincp>", null, null); // Relation rootRelation = new // Relation("<http://dbpedia.org/property/populationDensity>", null, null); // Relation rootRelation = new // Relation("<http://dbpedia.org/property/gdpNominalPerCapita>", null, null); // Relation rootRelation = new Relation("<http://purl.org/ontology/mo/duration_ms>", // null, null); RuleLearner learner = new RuleLearner(qh, null, null, rootRelation); ArrayTools.measure = measure; learner.indepThreshold = (float) indepTSArray[i]; learner.klDivThreshold = (float) kldivTSArray[j]; learner.suppThreshold = suppTSArray[k]; learner.maxNodesPerLevel = maxNodesArray[l]; learner.confidenceThreshold = confidenceThreshold; learner.numOfBuckets = numBuckets; // RuleLearner learner = new RuleLearner(qh, null, null, // info.getRelationFromRelations("<http://yago-knowledge.org/resource/hasPopulation>")); learner.learn(); CorrelationLatticeNode root = learner.getRoot(); // CorrelationLatticeNode root = // CorrelationLatticeNode.readFromDisk("correlation-lattice-income.ser"); // Collection<NumericalRule> learnedRules = // CorrelationLatticeNode.searchRules(root); Collection<NumericalRule> learnedRules = learner.getLearnedRules(); for (NumericalRule r : learnedRules) { System.out.println(r.getRuleString()); System.out.print("\t"); ArrayTools.print(r.getAccuracyDistribution()); } root.printSuggestions(null); // Deserialize load it /*CorrelationLatticeNode root = CorrelationLatticeNode.readFromDisk("correlation-lattice-income.ser"); Histogram hs = root.getHistogram(); ObjectInputStream ois = new ObjectInputStream(new FileInputStream("rules-income.ser")); Set<NumericalRule> learnedRules = (HashSet<NumericalRule>) ois.readObject(); for (NumericalRule r: learnedRules) { System.out.println(r.getRuleString()); } if (root.getRootLiteral()==null) System.out.println("FODEO!"); */ // RuleTester rt = new RuleTester(qh,hs,rootRelation); /* // Test on test partition connPartition = Driver.connect("src/rdf3x-data91-test.properties"); qh = new QueryHandler(connPartition); float negatives = 0; float positives = 0; float avgGain = 0; RuleTester rt = new RuleTester(root,qh); for (NumericalRule r: learnedRules) { rt.testRule(r, learner.suppThreshold, confidenceThreshold); boolean isSpecialization = false; NumericalRule generalization = null; for (NumericalRule r1: learnedRules) { if (r.isSpecializationOf(r1)) { isSpecialization = true; generalization = r1; break; } } if (r.observedOverallConfidence<confidenceThreshold) { float gain = r.observedConfidence/r.observedOverallConfidence; if (!Float.isNaN(gain)) { float observedTotal = (float)(r.observedPositives+r.observedNegatives); float currentTotal = (float)(positives+negatives); avgGain = (avgGain*(currentTotal) + gain*(observedTotal))/(currentTotal+observedTotal); } if (!isSpecialization) { positives += r.observedPositives; negatives += r.observedNegatives; } else { //System.out.println("Specialization of: "+generalization.getRuleString()); } } } output += "\n\nkldiv="+learner.klDivThreshold+"\tindep="+learner.indepThreshold + "\tminsup="+learner.suppThreshold + "\tmaxnodes="+learner.maxNodesPerLevel + "\tbuckets="+numBuckets + "\tmeasure="+measure + "\nElapsedTime="+learner.elapsedTime+" \t("+learner.firstLevelTime+" + "+(learner.elapsedTime-learner.firstLevelTime)+")" + "\nacc="+(positives/(positives+negatives)) + "\nsup="+(positives+negatives) + "\ngain="+(avgGain) + "\nnodes="+root.countNonPruned() + "\nrules="+learnedRules.size(); */ CorrelationLatticeNode.reset(); } } System.out.println("\n\n\n\nFinalOutput" + output); }