private CandidateSteinerSets getCandidateSteinerSets( List<ColumnNode> columnNodes, boolean useCorrectTypes, int numberOfCRFCandidates, Set<Node> addedNodes) { if (columnNodes == null || columnNodes.isEmpty()) return null; int maxNumberOfSteinerNodes = columnNodes.size() * 2; CandidateSteinerSets candidateSteinerSets = new CandidateSteinerSets(maxNumberOfSteinerNodes, ontologyManager.getContextId()); if (addedNodes == null) addedNodes = new HashSet<Node>(); Set<SemanticTypeMapping> tempSemanticTypeMappings; HashMap<ColumnNode, List<SemanticType>> columnSemanticTypes = new HashMap<ColumnNode, List<SemanticType>>(); HashMap<String, Integer> semanticTypesCount = new HashMap<String, Integer>(); List<SemanticType> candidateSemanticTypes; String domainUri = "", propertyUri = ""; for (ColumnNode n : columnNodes) { candidateSemanticTypes = n.getTopKLearnedSemanticTypes(numberOfCRFCandidates); columnSemanticTypes.put(n, candidateSemanticTypes); for (SemanticType semanticType : candidateSemanticTypes) { if (semanticType == null || semanticType.getDomain() == null || semanticType.getType() == null) continue; domainUri = semanticType.getDomain().getUri(); propertyUri = semanticType.getType().getUri(); Integer count = semanticTypesCount.get(domainUri + propertyUri); if (count == null) semanticTypesCount.put(domainUri + propertyUri, 1); else semanticTypesCount.put(domainUri + propertyUri, count.intValue() + 1); } } int numOfMappings = 1; for (ColumnNode n : columnNodes) { candidateSemanticTypes = columnSemanticTypes.get(n); if (candidateSemanticTypes == null) continue; logger.info("===== Column: " + n.getColumnName()); Set<SemanticTypeMapping> semanticTypeMappings = new HashSet<SemanticTypeMapping>(); for (SemanticType semanticType : candidateSemanticTypes) { logger.info("\t===== Semantic Type: " + semanticType.getModelLabelString()); if (semanticType == null || semanticType.getDomain() == null || semanticType.getType() == null) continue; domainUri = semanticType.getDomain().getUri(); propertyUri = semanticType.getType().getUri(); Integer countOfSemanticType = semanticTypesCount.get(domainUri + propertyUri); // logger.info("count of semantic type: " + countOfSemanticType); tempSemanticTypeMappings = findSemanticTypeInGraph(n, semanticType, semanticTypesCount, addedNodes); // logger.info("number of matches for semantic type: " + // + (tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size())); if (tempSemanticTypeMappings != null) semanticTypeMappings.addAll(tempSemanticTypeMappings); int countOfMatches = tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size(); if (countOfMatches < countOfSemanticType) // No struct in graph is matched with the semantic type, we add a // new struct to the graph { for (int i = 0; i < countOfSemanticType - countOfMatches; i++) { SemanticTypeMapping mp = addSemanticTypeStruct(n, semanticType, addedNodes); if (mp != null) semanticTypeMappings.add(mp); } } } // System.out.println("number of matches for column " + n.getColumnName() + // ": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size())); logger.info( "number of matches for column " + n.getColumnName() + ": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size())); numOfMappings *= semanticTypeMappings == null || semanticTypeMappings.isEmpty() ? 1 : semanticTypeMappings.size(); candidateSteinerSets.updateSteinerSets(semanticTypeMappings); } // System.out.println("number of possible mappings: " + numOfMappings); logger.info("number of possible mappings: " + numOfMappings); return candidateSteinerSets; }
public List<SortableSemanticModel_Old> hypothesize( boolean useCorrectTypes, int numberOfCRFCandidates) { Set<Node> addedNodes = new HashSet< Node>(); // They should be deleted from the graph after computing the semantic models logger.info("finding candidate steiner sets ... "); CandidateSteinerSets candidateSteinerSets = getCandidateSteinerSets(columnNodes, useCorrectTypes, numberOfCRFCandidates, addedNodes); if (candidateSteinerSets == null || candidateSteinerSets.getSteinerSets() == null || candidateSteinerSets.getSteinerSets().isEmpty()) { logger.error("there is no candidate set of steiner nodes."); return null; } logger.info("number of steiner sets: " + candidateSteinerSets.numberOfCandidateSets()); logger.info("updating weights according to training data ..."); long start = System.currentTimeMillis(); this.updateWeights(); long updateWightsElapsedTimeMillis = System.currentTimeMillis() - start; logger.info("time to update weights: " + (updateWightsElapsedTimeMillis / 1000F)); logger.info("computing steiner trees ..."); List<SortableSemanticModel_Old> sortableSemanticModels = new ArrayList<SortableSemanticModel_Old>(); int count = 1; for (SteinerNodes sn : candidateSteinerSets.getSteinerSets()) { logger.debug("computing steiner tree for steiner nodes set " + count + " ..."); logger.debug(sn.getScoreDetailsString()); DirectedWeightedMultigraph<Node, LabeledLink> tree = computeSteinerTree(sn.getNodes()); count++; if (tree != null) { SemanticModel sm = new SemanticModel( new RandomGUID().toString(), tree, columnNodes, sn.getMappingToSourceColumns()); SortableSemanticModel_Old sortableSemanticModel = new SortableSemanticModel_Old(sm, sn); sortableSemanticModels.add(sortableSemanticModel); } if (count == ModelingConfigurationRegistry.getInstance() .getModelingConfiguration( ContextParametersRegistry.getInstance() .getContextParameters(ontologyManager.getContextId()) .getKarmaHome()) .getNumCandidateMappings()) break; } Collections.sort(sortableSemanticModels); // logger.info("results are ready ..."); // return sortableSemanticModels; List<SortableSemanticModel_Old> uniqueModels = new ArrayList<SortableSemanticModel_Old>(); SortableSemanticModel_Old current, previous; if (sortableSemanticModels != null) { if (sortableSemanticModels.size() > 0) uniqueModels.add(sortableSemanticModels.get(0)); for (int i = 1; i < sortableSemanticModels.size(); i++) { current = sortableSemanticModels.get(i); previous = sortableSemanticModels.get(i - 1); if (current.getScore() == previous.getScore() && current.getCost() == previous.getCost()) continue; uniqueModels.add(current); } } logger.info("results are ready ..."); return uniqueModels; }