Exemplo n.º 1
0
  private CandidateSteinerSets getCandidateSteinerSets(
      List<ColumnNode> columnNodes,
      boolean useCorrectTypes,
      int numberOfCRFCandidates,
      Set<Node> addedNodes) {

    if (columnNodes == null || columnNodes.isEmpty()) return null;

    int maxNumberOfSteinerNodes = columnNodes.size() * 2;
    CandidateSteinerSets candidateSteinerSets =
        new CandidateSteinerSets(maxNumberOfSteinerNodes, ontologyManager.getContextId());

    if (addedNodes == null) addedNodes = new HashSet<Node>();

    Set<SemanticTypeMapping> tempSemanticTypeMappings;
    HashMap<ColumnNode, List<SemanticType>> columnSemanticTypes =
        new HashMap<ColumnNode, List<SemanticType>>();
    HashMap<String, Integer> semanticTypesCount = new HashMap<String, Integer>();
    List<SemanticType> candidateSemanticTypes;
    String domainUri = "", propertyUri = "";

    for (ColumnNode n : columnNodes) {

      candidateSemanticTypes = n.getTopKLearnedSemanticTypes(numberOfCRFCandidates);
      columnSemanticTypes.put(n, candidateSemanticTypes);

      for (SemanticType semanticType : candidateSemanticTypes) {

        if (semanticType == null
            || semanticType.getDomain() == null
            || semanticType.getType() == null) continue;

        domainUri = semanticType.getDomain().getUri();
        propertyUri = semanticType.getType().getUri();

        Integer count = semanticTypesCount.get(domainUri + propertyUri);
        if (count == null) semanticTypesCount.put(domainUri + propertyUri, 1);
        else semanticTypesCount.put(domainUri + propertyUri, count.intValue() + 1);
      }
    }

    int numOfMappings = 1;
    for (ColumnNode n : columnNodes) {

      candidateSemanticTypes = columnSemanticTypes.get(n);
      if (candidateSemanticTypes == null) continue;

      logger.info("===== Column: " + n.getColumnName());

      Set<SemanticTypeMapping> semanticTypeMappings = new HashSet<SemanticTypeMapping>();
      for (SemanticType semanticType : candidateSemanticTypes) {

        logger.info("\t===== Semantic Type: " + semanticType.getModelLabelString());

        if (semanticType == null
            || semanticType.getDomain() == null
            || semanticType.getType() == null) continue;

        domainUri = semanticType.getDomain().getUri();
        propertyUri = semanticType.getType().getUri();
        Integer countOfSemanticType = semanticTypesCount.get(domainUri + propertyUri);
        //				logger.info("count of semantic type: " +  countOfSemanticType);

        tempSemanticTypeMappings =
            findSemanticTypeInGraph(n, semanticType, semanticTypesCount, addedNodes);
        //				logger.info("number of matches for semantic type: " +
        //					 + (tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size()));

        if (tempSemanticTypeMappings != null) semanticTypeMappings.addAll(tempSemanticTypeMappings);

        int countOfMatches = tempSemanticTypeMappings == null ? 0 : tempSemanticTypeMappings.size();
        if (countOfMatches
            < countOfSemanticType) // No struct in graph is matched with the semantic type, we add a
                                   // new struct to the graph
        {
          for (int i = 0; i < countOfSemanticType - countOfMatches; i++) {
            SemanticTypeMapping mp = addSemanticTypeStruct(n, semanticType, addedNodes);
            if (mp != null) semanticTypeMappings.add(mp);
          }
        }
      }
      //			System.out.println("number of matches for column " + n.getColumnName() +
      //					": " + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size()));
      logger.info(
          "number of matches for column "
              + n.getColumnName()
              + ": "
              + (semanticTypeMappings == null ? 0 : semanticTypeMappings.size()));
      numOfMappings *=
          semanticTypeMappings == null || semanticTypeMappings.isEmpty()
              ? 1
              : semanticTypeMappings.size();

      candidateSteinerSets.updateSteinerSets(semanticTypeMappings);
    }

    //		System.out.println("number of possible mappings: " + numOfMappings);
    logger.info("number of possible mappings: " + numOfMappings);

    return candidateSteinerSets;
  }
Exemplo n.º 2
0
  public List<SortableSemanticModel_Old> hypothesize(
      boolean useCorrectTypes, int numberOfCRFCandidates) {

    Set<Node> addedNodes =
        new HashSet<
            Node>(); // They should be deleted from the graph after computing the semantic models

    logger.info("finding candidate steiner sets ... ");
    CandidateSteinerSets candidateSteinerSets =
        getCandidateSteinerSets(columnNodes, useCorrectTypes, numberOfCRFCandidates, addedNodes);

    if (candidateSteinerSets == null
        || candidateSteinerSets.getSteinerSets() == null
        || candidateSteinerSets.getSteinerSets().isEmpty()) {
      logger.error("there is no candidate set of steiner nodes.");
      return null;
    }

    logger.info("number of steiner sets: " + candidateSteinerSets.numberOfCandidateSets());

    logger.info("updating weights according to training data ...");
    long start = System.currentTimeMillis();
    this.updateWeights();
    long updateWightsElapsedTimeMillis = System.currentTimeMillis() - start;
    logger.info("time to update weights: " + (updateWightsElapsedTimeMillis / 1000F));

    logger.info("computing steiner trees ...");
    List<SortableSemanticModel_Old> sortableSemanticModels =
        new ArrayList<SortableSemanticModel_Old>();
    int count = 1;
    for (SteinerNodes sn : candidateSteinerSets.getSteinerSets()) {
      logger.debug("computing steiner tree for steiner nodes set " + count + " ...");
      logger.debug(sn.getScoreDetailsString());
      DirectedWeightedMultigraph<Node, LabeledLink> tree = computeSteinerTree(sn.getNodes());
      count++;
      if (tree != null) {
        SemanticModel sm =
            new SemanticModel(
                new RandomGUID().toString(), tree, columnNodes, sn.getMappingToSourceColumns());
        SortableSemanticModel_Old sortableSemanticModel = new SortableSemanticModel_Old(sm, sn);
        sortableSemanticModels.add(sortableSemanticModel);
      }

      if (count
          == ModelingConfigurationRegistry.getInstance()
              .getModelingConfiguration(
                  ContextParametersRegistry.getInstance()
                      .getContextParameters(ontologyManager.getContextId())
                      .getKarmaHome())
              .getNumCandidateMappings()) break;
    }

    Collections.sort(sortableSemanticModels);
    //		logger.info("results are ready ...");
    //		return sortableSemanticModels;

    List<SortableSemanticModel_Old> uniqueModels = new ArrayList<SortableSemanticModel_Old>();
    SortableSemanticModel_Old current, previous;
    if (sortableSemanticModels != null) {
      if (sortableSemanticModels.size() > 0) uniqueModels.add(sortableSemanticModels.get(0));
      for (int i = 1; i < sortableSemanticModels.size(); i++) {
        current = sortableSemanticModels.get(i);
        previous = sortableSemanticModels.get(i - 1);
        if (current.getScore() == previous.getScore() && current.getCost() == previous.getCost())
          continue;
        uniqueModels.add(current);
      }
    }

    logger.info("results are ready ...");
    return uniqueModels;
  }