public double[][] compute(SingleDTGraph data) { SparseVector[] featureVectors = computeFeatureVectors(data); double[][] kernel = KernelUtils.initMatrix(data.getInstances().size(), data.getInstances().size()); long tic = System.currentTimeMillis(); kernel = KernelUtils.computeKernelMatrix(featureVectors, kernel); compTime += System.currentTimeMillis() - tic; return kernel; }
private static Map<Long, Map<Boolean, Map<Integer, Pair<SingleDTGraph, List<Double>>>>> createDataSetCache( RDFDataSet tripleStore, LargeClassificationDataSet data, long[] seeds, double fraction, int minSize, int maxClasses, int[] depths, boolean[] inference) { Map<Long, Map<Boolean, Map<Integer, Pair<SingleDTGraph, List<Double>>>>> cache = new HashMap<Long, Map<Boolean, Map<Integer, Pair<SingleDTGraph, List<Double>>>>>(); for (long seed : seeds) { cache.put(seed, new HashMap<Boolean, Map<Integer, Pair<SingleDTGraph, List<Double>>>>()); data.createSubSet(seed, fraction, minSize, maxClasses); for (boolean inf : inference) { cache.get(seed).put(inf, new HashMap<Integer, Pair<SingleDTGraph, List<Double>>>()); for (int depth : depths) { System.out.println("Getting Statements..."); Set<Statement> stmts = RDFUtils.getStatements4Depth( tripleStore, data.getRDFData().getInstances(), depth, inf); System.out.println("# Statements: " + stmts.size()); stmts.removeAll(new HashSet<Statement>(data.getRDFData().getBlackList())); System.out.println("# Statements: " + stmts.size() + ", after blackList"); System.out.println("Building Graph..."); SingleDTGraph graph = RDFUtils.statements2Graph( stmts, RDFUtils.REGULAR_LITERALS, data.getRDFData().getInstances(), true); System.out.println( "Built Graph with " + graph.getGraph().nodes().size() + ", and " + graph.getGraph().links().size() + " links"); cache .get(seed) .get(inf) .put( depth, new Pair<SingleDTGraph, List<Double>>( graph, new ArrayList<Double>(data.getTarget()))); } } } return cache; }
public SparseVector[] computeFeatureVectors(SingleDTGraph data) { SparseVector[] featureVectors = new SparseVector[data.getInstances().size()]; for (int i = 0; i < featureVectors.length; i++) { featureVectors[i] = new SparseVector(); } init(data.getGraph(), data.getInstances()); WeisfeilerLehmanIterator<DTGraph<StringLabel, StringLabel>> wl = new WeisfeilerLehmanDTGraphIterator(reverse, noDuplicateSubtrees); List<DTGraph<StringLabel, StringLabel>> gList = new ArrayList<DTGraph<StringLabel, StringLabel>>(); gList.add(rdfGraph); long tic = System.currentTimeMillis(); wl.wlInitialize(gList); double weight = 1.0; computeFVs(rdfGraph, instanceVertices, weight, featureVectors, wl.getLabelDict().size() - 1, 0); for (int i = 0; i < iterations; i++) { wl.wlIterate(gList); computeFVs( rdfGraph, instanceVertices, weight, featureVectors, wl.getLabelDict().size() - 1, i + 1); } compTime = System.currentTimeMillis() - tic; if (this.normalize) { featureVectors = KernelUtils.normalize(featureVectors); } return featureVectors; }