예제 #1
0
  @SuppressWarnings("unused")
  private static void getStatistics1(List<SemanticModel> semanticModels) {
    for (int i = 0; i < semanticModels.size(); i++) {
      SemanticModel source = semanticModels.get(i);
      int attributeCount = source.getColumnNodes().size();
      int nodeCount = source.getGraph().vertexSet().size();
      int linkCount = source.getGraph().edgeSet().size();
      int datanodeCount = 0;
      int classNodeCount = 0;
      for (Node n : source.getGraph().vertexSet()) {
        if (n instanceof InternalNode) classNodeCount++;
        if (n instanceof ColumnNode) datanodeCount++;
      }
      System.out.println(
          attributeCount
              + "\t"
              + nodeCount
              + "\t"
              + linkCount
              + "\t"
              + classNodeCount
              + "\t"
              + datanodeCount);

      List<ColumnNode> columnNodes = source.getColumnNodes();
      getStatistics2(columnNodes);
    }
  }
예제 #2
0
  public static void main(String[] args) throws Exception {

    List<SemanticModel> semanticModels = null;

    try {

      semanticModels =
          importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT);
      //			semanticModels = importSemanticModels(Params.INPUT_DIR);
      if (semanticModels != null) {
        for (SemanticModel sm : semanticModels) {
          sm.print();
          sm.writeGraphviz(
              Params.GRAPHVIS_DIR + sm.getName() + Params.GRAPHVIS_MAIN_FILE_EXT, true, true);
          sm.writeJson(Params.MODEL_DIR + sm.getName() + Params.MODEL_MAIN_FILE_EXT);

          // To test JsonReader and JsonWriter
          //					SemanticModel m = SemanticModel.readJson(Params.MODEL_DIR + sm.getName() +
          // ".main.model.json");
          //					m.writeJson(Params.MODEL_DIR + sm.getName() + ".main.model2.json");
        }
      }

    } catch (IOException e) {
      e.printStackTrace();
    }
  }
예제 #3
0
  public static List<SemanticModel> importSemanticModelsFromJsonFiles(
      String path, String fileExtension) throws Exception {

    File ff = new File(path);
    File[] files = ff.listFiles();

    List<SemanticModel> semanticModels = new ArrayList<SemanticModel>();

    for (File f : files) {
      if (f.getName().endsWith(fileExtension)) {
        SemanticModel model = SemanticModel.readJson(f.getAbsolutePath());
        semanticModels.add(model);
      }
    }

    return semanticModels;
  }
예제 #4
0
  public static void test() throws Exception {
    ServletContextParameterMap contextParameters =
        ContextParametersRegistry.getInstance().getDefault();
    ModelingConfiguration modelingConfiguration =
        ModelingConfigurationRegistry.getInstance()
            .getModelingConfiguration(contextParameters.getId());

    //		String inputPath = Params.INPUT_DIR;
    String outputPath = Params.OUTPUT_DIR;
    String graphPath = Params.GRAPHS_DIR;

    //		List<SemanticModel> semanticModels = ModelReader.importSemanticModels(inputPath);
    List<SemanticModel> semanticModels =
        ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT);

    //		ModelEvaluation me2 = semanticModels.get(20).evaluate(semanticModels.get(20));
    //		System.out.println(me2.getPrecision() + "--" + me2.getRecall());
    //		if (true)
    //			return;

    List<SemanticModel> trainingData = new ArrayList<SemanticModel>();

    OntologyManager ontologyManager = new OntologyManager(contextParameters.getId());
    File ff = new File(Params.ONTOLOGY_DIR);
    File[] files = ff.listFiles();
    for (File f : files) {
      ontologyManager.doImport(f, "UTF-8");
    }
    ontologyManager.updateCache();

    //		getStatistics1(semanticModels);

    //		if (true)
    //			return;

    ModelLearningGraph modelLearningGraph = null;

    ModelLearner_Old modelLearner;

    boolean iterativeEvaluation = false;
    boolean useCorrectType = false;
    int numberOfCRFCandidates = 4;
    int numberOfKnownModels;
    String filePath = Params.RESULTS_DIR;
    String filename = "results,k=" + numberOfCRFCandidates + ".csv";
    PrintWriter resultFile = new PrintWriter(new File(filePath + filename));

    StringBuffer[] resultsArray = new StringBuffer[semanticModels.size() + 2];
    for (int i = 0; i < resultsArray.length; i++) {
      resultsArray[i] = new StringBuffer();
    }

    for (int i = 0; i < semanticModels.size(); i++) {
      //		for (int i = 0; i <= 10; i++) {
      //		int i = 3; {

      resultFile.flush();
      int newSourceIndex = i;
      SemanticModel newSource = semanticModels.get(newSourceIndex);

      logger.info("======================================================");
      logger.info(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")");
      System.out.println(
          newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")");
      logger.info("======================================================");

      if (!iterativeEvaluation) numberOfKnownModels = semanticModels.size() - 1;
      else numberOfKnownModels = 0;

      if (resultsArray[0].length() > 0) resultsArray[0].append(" \t ");
      resultsArray[0].append(
          newSource.getName()
              + "("
              + newSource.getColumnNodes().size()
              + ")"
              + "\t"
              + " "
              + "\t"
              + " ");
      if (resultsArray[1].length() > 0) resultsArray[1].append(" \t ");
      resultsArray[1].append("p \t r \t t");

      while (numberOfKnownModels <= semanticModels.size() - 1) {

        trainingData.clear();

        int j = 0, count = 0;
        while (count < numberOfKnownModels) {
          if (j != newSourceIndex) {
            trainingData.add(semanticModels.get(j));
            count++;
          }
          j++;
        }

        modelLearningGraph =
            (ModelLearningGraphSparse)
                ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Sparse);

        SemanticModel correctModel = newSource;
        List<ColumnNode> columnNodes = correctModel.getColumnNodes();
        //				if (useCorrectType && numberOfCRFCandidates > 1)
        //					updateCrfSemanticTypesForResearchEvaluation(columnNodes);

        modelLearner = new ModelLearner_Old(ontologyManager, columnNodes);
        long start = System.currentTimeMillis();

        String graphName =
            !iterativeEvaluation
                ? graphPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPH_FILE_EXT
                : graphPath
                    + semanticModels.get(newSourceIndex).getName()
                    + ".knownModels="
                    + numberOfKnownModels
                    + Params.GRAPH_FILE_EXT;

        if (new File(graphName).exists()) {
          // read graph from file
          try {
            logger.info("loading the graph ...");
            DirectedWeightedMultigraph<Node, DefaultLink> graph = GraphUtil.importJson(graphName);
            modelLearner.graphBuilder = new GraphBuilder(ontologyManager, graph, false);
            modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory();
          } catch (Exception e) {
            e.printStackTrace();
          }
        } else {
          logger.info("building the graph ...");
          for (SemanticModel sm : trainingData) modelLearningGraph.addModel(sm, false);
          modelLearner.graphBuilder = modelLearningGraph.getGraphBuilder();
          modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory();
          // save graph to file
          try {
            GraphUtil.exportJson(
                modelLearningGraph.getGraphBuilder().getGraph(), graphName, true, true);
          } catch (Exception e) {
            e.printStackTrace();
          }
        }

        List<SortableSemanticModel_Old> hypothesisList =
            modelLearner.hypothesize(useCorrectType, numberOfCRFCandidates);

        long elapsedTimeMillis = System.currentTimeMillis() - start;
        float elapsedTimeSec = elapsedTimeMillis / 1000F;

        List<SortableSemanticModel_Old> topHypotheses = null;
        if (hypothesisList != null) {

          topHypotheses =
              hypothesisList.size() > modelingConfiguration.getNumCandidateMappings()
                  ? hypothesisList.subList(0, modelingConfiguration.getNumCandidateMappings())
                  : hypothesisList;
        }

        Map<String, SemanticModel> models = new TreeMap<String, SemanticModel>();

        // export to json
        //				if (topHypotheses != null)
        //					for (int k = 0; k < topHypotheses.size() && k < 3; k++) {
        //
        //						String fileExt = null;
        //						if (k == 0) fileExt = Params.MODEL_RANK1_FILE_EXT;
        //						else if (k == 1) fileExt = Params.MODEL_RANK2_FILE_EXT;
        //						else if (k == 2) fileExt = Params.MODEL_RANK3_FILE_EXT;
        //						SortableSemanticModel m = topHypotheses.get(k);
        //						new SemanticModel(m).writeJson(Params.MODEL_DIR +
        //								newSource.getName() + fileExt);
        //
        //					}

        ModelEvaluation me;
        models.put("1-correct model", correctModel);
        if (topHypotheses != null)
          for (int k = 0; k < topHypotheses.size(); k++) {

            SortableSemanticModel_Old m = topHypotheses.get(k);

            me = m.evaluate(correctModel);

            String label =
                "candidate"
                    + k
                    + m.getSteinerNodes().getScoreDetailsString()
                    + "cost:"
                    + roundTwoDecimals(m.getCost())
                    +
                    //								"-distance:" + me.getDistance() +
                    "-precision:"
                    + me.getPrecision()
                    + "-recall:"
                    + me.getRecall();

            models.put(label, m);

            if (k == 0) { // first rank model
              System.out.println(
                  "number of known models: "
                      + numberOfKnownModels
                      + ", precision: "
                      + me.getPrecision()
                      + ", recall: "
                      + me.getRecall()
                      + ", time: "
                      + elapsedTimeSec);
              logger.info(
                  "number of known models: "
                      + numberOfKnownModels
                      + ", precision: "
                      + me.getPrecision()
                      + ", recall: "
                      + me.getRecall()
                      + ", time: "
                      + elapsedTimeSec);
              //							resultFile.println("number of known models \t precision \t recall");
              //							resultFile.println(numberOfKnownModels + "\t" + me.getPrecision() + "\t" +
              // me.getRecall());
              String s = me.getPrecision() + "\t" + me.getRecall() + "\t" + elapsedTimeSec;
              if (resultsArray[numberOfKnownModels + 2].length() > 0)
                resultsArray[numberOfKnownModels + 2].append(" \t ");
              resultsArray[numberOfKnownModels + 2].append(s);

              //							resultFile.println(me.getPrecision() + "\t" + me.getRecall() + "\t" +
              // elapsedTimeSec);
            }
          }

        String outName =
            !iterativeEvaluation
                ? outputPath
                    + semanticModels.get(newSourceIndex).getName()
                    + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT
                : outputPath
                    + semanticModels.get(newSourceIndex).getName()
                    + ".knownModels="
                    + numberOfKnownModels
                    + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT;

        //	if (!iterativeEvaluation) {
        GraphVizUtil.exportSemanticModelsToGraphviz(
            models,
            newSource.getName(),
            outName,
            GraphVizLabelType.LocalId,
            GraphVizLabelType.LocalUri,
            false,
            false);
        //				}

        numberOfKnownModels++;
      }

      //	resultFile.println("=======================================================");
    }
    for (StringBuffer s : resultsArray) resultFile.println(s.toString());

    resultFile.close();
  }
예제 #5
0
  public static List<SemanticModel> importSemanticModels(String importDir) throws IOException {

    initPrefixNsMapping();

    List<SemanticModel> semanticModels = new ArrayList<SemanticModel>();

    File dir = new File(importDir);
    File[] modelExamples = dir.listFiles();

    Pattern fileNamePattern = Pattern.compile("s[0-9](|[0-9])-.*\\.txt", Pattern.CASE_INSENSITIVE);
    //		Pattern fileNamePattern = Pattern.compile("s1-.*\\.txt", Pattern.CASE_INSENSITIVE);
    Pattern serviceNamePattern =
        Pattern.compile("S[0-9](|[0-9]):(.*)\\(", Pattern.CASE_INSENSITIVE);
    Matcher matcher;

    String subject = "", predicate = "", object = "";

    int count = 1;

    if (modelExamples != null)
      for (File f : modelExamples) {

        String id = "s" + String.valueOf(count);
        String name = "", description = "";

        matcher = fileNamePattern.matcher(f.getName());
        if (!matcher.find()) {
          continue;
        }

        List<Statement> statements = null;
        LineNumberReader lr = new LineNumberReader(new FileReader(f));
        String curLine = "";
        while ((curLine = lr.readLine()) != null) {

          matcher = serviceNamePattern.matcher(curLine);
          if (matcher.find()) {
            name = f.getName().replaceAll(".txt", "");
            description = curLine.trim();
          }

          if (!curLine.trim().startsWith("<N3>")) continue;

          statements = new ArrayList<Statement>();
          while ((curLine = lr.readLine()) != null) {
            if (curLine.trim().startsWith("</N3>")) break;
            //					System.out.println(curLine);
            if (curLine.trim().startsWith("#")) continue;

            String[] parts = curLine.trim().split("\\s+");
            if (parts == null || parts.length < 3) {
              System.out.println("Cannot extract statement from \"" + curLine + " \"");
              continue;
            }

            subject = parts[0].trim();
            predicate = parts[1].trim();
            object = parts[2].trim();
            Statement st = new Statement(subject, predicate, object);
            statements.add(st);
          }
        }

        lr.close();

        DirectedWeightedMultigraph<Node, LabeledLink> graph =
            buildGraphsFromStatements2(statements);

        SemanticModel semanticModel = new SemanticModel(id, graph);
        semanticModel.setName(name);
        semanticModel.setDescription(description);

        semanticModels.add(semanticModel);
        count++;
      }

    return semanticModels;
  }