@SuppressWarnings("unused") private static void getStatistics1(List<SemanticModel> semanticModels) { for (int i = 0; i < semanticModels.size(); i++) { SemanticModel source = semanticModels.get(i); int attributeCount = source.getColumnNodes().size(); int nodeCount = source.getGraph().vertexSet().size(); int linkCount = source.getGraph().edgeSet().size(); int datanodeCount = 0; int classNodeCount = 0; for (Node n : source.getGraph().vertexSet()) { if (n instanceof InternalNode) classNodeCount++; if (n instanceof ColumnNode) datanodeCount++; } System.out.println( attributeCount + "\t" + nodeCount + "\t" + linkCount + "\t" + classNodeCount + "\t" + datanodeCount); List<ColumnNode> columnNodes = source.getColumnNodes(); getStatistics2(columnNodes); } }
public static void main(String[] args) throws Exception { List<SemanticModel> semanticModels = null; try { semanticModels = importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); // semanticModels = importSemanticModels(Params.INPUT_DIR); if (semanticModels != null) { for (SemanticModel sm : semanticModels) { sm.print(); sm.writeGraphviz( Params.GRAPHVIS_DIR + sm.getName() + Params.GRAPHVIS_MAIN_FILE_EXT, true, true); sm.writeJson(Params.MODEL_DIR + sm.getName() + Params.MODEL_MAIN_FILE_EXT); // To test JsonReader and JsonWriter // SemanticModel m = SemanticModel.readJson(Params.MODEL_DIR + sm.getName() + // ".main.model.json"); // m.writeJson(Params.MODEL_DIR + sm.getName() + ".main.model2.json"); } } } catch (IOException e) { e.printStackTrace(); } }
public static List<SemanticModel> importSemanticModelsFromJsonFiles( String path, String fileExtension) throws Exception { File ff = new File(path); File[] files = ff.listFiles(); List<SemanticModel> semanticModels = new ArrayList<SemanticModel>(); for (File f : files) { if (f.getName().endsWith(fileExtension)) { SemanticModel model = SemanticModel.readJson(f.getAbsolutePath()); semanticModels.add(model); } } return semanticModels; }
public static void test() throws Exception { ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().getDefault(); ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance() .getModelingConfiguration(contextParameters.getId()); // String inputPath = Params.INPUT_DIR; String outputPath = Params.OUTPUT_DIR; String graphPath = Params.GRAPHS_DIR; // List<SemanticModel> semanticModels = ModelReader.importSemanticModels(inputPath); List<SemanticModel> semanticModels = ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); // ModelEvaluation me2 = semanticModels.get(20).evaluate(semanticModels.get(20)); // System.out.println(me2.getPrecision() + "--" + me2.getRecall()); // if (true) // return; List<SemanticModel> trainingData = new ArrayList<SemanticModel>(); OntologyManager ontologyManager = new OntologyManager(contextParameters.getId()); File ff = new File(Params.ONTOLOGY_DIR); File[] files = ff.listFiles(); for (File f : files) { ontologyManager.doImport(f, "UTF-8"); } ontologyManager.updateCache(); // getStatistics1(semanticModels); // if (true) // return; ModelLearningGraph modelLearningGraph = null; ModelLearner_Old modelLearner; boolean iterativeEvaluation = false; boolean useCorrectType = false; int numberOfCRFCandidates = 4; int numberOfKnownModels; String filePath = Params.RESULTS_DIR; String filename = "results,k=" + numberOfCRFCandidates + ".csv"; PrintWriter resultFile = new PrintWriter(new File(filePath + filename)); StringBuffer[] resultsArray = new StringBuffer[semanticModels.size() + 2]; for (int i = 0; i < resultsArray.length; i++) { resultsArray[i] = new StringBuffer(); } for (int i = 0; i < semanticModels.size(); i++) { // for (int i = 0; i <= 10; i++) { // int i = 3; { resultFile.flush(); int newSourceIndex = i; SemanticModel newSource = semanticModels.get(newSourceIndex); logger.info("======================================================"); logger.info(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")"); System.out.println( newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")"); logger.info("======================================================"); if (!iterativeEvaluation) numberOfKnownModels = semanticModels.size() - 1; else numberOfKnownModels = 0; if (resultsArray[0].length() > 0) resultsArray[0].append(" \t "); resultsArray[0].append( newSource.getName() + "(" + newSource.getColumnNodes().size() + ")" + "\t" + " " + "\t" + " "); if (resultsArray[1].length() > 0) resultsArray[1].append(" \t "); resultsArray[1].append("p \t r \t t"); while (numberOfKnownModels <= semanticModels.size() - 1) { trainingData.clear(); int j = 0, count = 0; while (count < numberOfKnownModels) { if (j != newSourceIndex) { trainingData.add(semanticModels.get(j)); count++; } j++; } modelLearningGraph = (ModelLearningGraphSparse) ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Sparse); SemanticModel correctModel = newSource; List<ColumnNode> columnNodes = correctModel.getColumnNodes(); // if (useCorrectType && numberOfCRFCandidates > 1) // updateCrfSemanticTypesForResearchEvaluation(columnNodes); modelLearner = new ModelLearner_Old(ontologyManager, columnNodes); long start = System.currentTimeMillis(); String graphName = !iterativeEvaluation ? graphPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPH_FILE_EXT : graphPath + semanticModels.get(newSourceIndex).getName() + ".knownModels=" + numberOfKnownModels + Params.GRAPH_FILE_EXT; if (new File(graphName).exists()) { // read graph from file try { logger.info("loading the graph ..."); DirectedWeightedMultigraph<Node, DefaultLink> graph = GraphUtil.importJson(graphName); modelLearner.graphBuilder = new GraphBuilder(ontologyManager, graph, false); modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory(); } catch (Exception e) { e.printStackTrace(); } } else { logger.info("building the graph ..."); for (SemanticModel sm : trainingData) modelLearningGraph.addModel(sm, false); modelLearner.graphBuilder = modelLearningGraph.getGraphBuilder(); modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory(); // save graph to file try { GraphUtil.exportJson( modelLearningGraph.getGraphBuilder().getGraph(), graphName, true, true); } catch (Exception e) { e.printStackTrace(); } } List<SortableSemanticModel_Old> hypothesisList = modelLearner.hypothesize(useCorrectType, numberOfCRFCandidates); long elapsedTimeMillis = System.currentTimeMillis() - start; float elapsedTimeSec = elapsedTimeMillis / 1000F; List<SortableSemanticModel_Old> topHypotheses = null; if (hypothesisList != null) { topHypotheses = hypothesisList.size() > modelingConfiguration.getNumCandidateMappings() ? hypothesisList.subList(0, modelingConfiguration.getNumCandidateMappings()) : hypothesisList; } Map<String, SemanticModel> models = new TreeMap<String, SemanticModel>(); // export to json // if (topHypotheses != null) // for (int k = 0; k < topHypotheses.size() && k < 3; k++) { // // String fileExt = null; // if (k == 0) fileExt = Params.MODEL_RANK1_FILE_EXT; // else if (k == 1) fileExt = Params.MODEL_RANK2_FILE_EXT; // else if (k == 2) fileExt = Params.MODEL_RANK3_FILE_EXT; // SortableSemanticModel m = topHypotheses.get(k); // new SemanticModel(m).writeJson(Params.MODEL_DIR + // newSource.getName() + fileExt); // // } ModelEvaluation me; models.put("1-correct model", correctModel); if (topHypotheses != null) for (int k = 0; k < topHypotheses.size(); k++) { SortableSemanticModel_Old m = topHypotheses.get(k); me = m.evaluate(correctModel); String label = "candidate" + k + m.getSteinerNodes().getScoreDetailsString() + "cost:" + roundTwoDecimals(m.getCost()) + // "-distance:" + me.getDistance() + "-precision:" + me.getPrecision() + "-recall:" + me.getRecall(); models.put(label, m); if (k == 0) { // first rank model System.out.println( "number of known models: " + numberOfKnownModels + ", precision: " + me.getPrecision() + ", recall: " + me.getRecall() + ", time: " + elapsedTimeSec); logger.info( "number of known models: " + numberOfKnownModels + ", precision: " + me.getPrecision() + ", recall: " + me.getRecall() + ", time: " + elapsedTimeSec); // resultFile.println("number of known models \t precision \t recall"); // resultFile.println(numberOfKnownModels + "\t" + me.getPrecision() + "\t" + // me.getRecall()); String s = me.getPrecision() + "\t" + me.getRecall() + "\t" + elapsedTimeSec; if (resultsArray[numberOfKnownModels + 2].length() > 0) resultsArray[numberOfKnownModels + 2].append(" \t "); resultsArray[numberOfKnownModels + 2].append(s); // resultFile.println(me.getPrecision() + "\t" + me.getRecall() + "\t" + // elapsedTimeSec); } } String outName = !iterativeEvaluation ? outputPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT : outputPath + semanticModels.get(newSourceIndex).getName() + ".knownModels=" + numberOfKnownModels + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT; // if (!iterativeEvaluation) { GraphVizUtil.exportSemanticModelsToGraphviz( models, newSource.getName(), outName, GraphVizLabelType.LocalId, GraphVizLabelType.LocalUri, false, false); // } numberOfKnownModels++; } // resultFile.println("======================================================="); } for (StringBuffer s : resultsArray) resultFile.println(s.toString()); resultFile.close(); }
public static List<SemanticModel> importSemanticModels(String importDir) throws IOException { initPrefixNsMapping(); List<SemanticModel> semanticModels = new ArrayList<SemanticModel>(); File dir = new File(importDir); File[] modelExamples = dir.listFiles(); Pattern fileNamePattern = Pattern.compile("s[0-9](|[0-9])-.*\\.txt", Pattern.CASE_INSENSITIVE); // Pattern fileNamePattern = Pattern.compile("s1-.*\\.txt", Pattern.CASE_INSENSITIVE); Pattern serviceNamePattern = Pattern.compile("S[0-9](|[0-9]):(.*)\\(", Pattern.CASE_INSENSITIVE); Matcher matcher; String subject = "", predicate = "", object = ""; int count = 1; if (modelExamples != null) for (File f : modelExamples) { String id = "s" + String.valueOf(count); String name = "", description = ""; matcher = fileNamePattern.matcher(f.getName()); if (!matcher.find()) { continue; } List<Statement> statements = null; LineNumberReader lr = new LineNumberReader(new FileReader(f)); String curLine = ""; while ((curLine = lr.readLine()) != null) { matcher = serviceNamePattern.matcher(curLine); if (matcher.find()) { name = f.getName().replaceAll(".txt", ""); description = curLine.trim(); } if (!curLine.trim().startsWith("<N3>")) continue; statements = new ArrayList<Statement>(); while ((curLine = lr.readLine()) != null) { if (curLine.trim().startsWith("</N3>")) break; // System.out.println(curLine); if (curLine.trim().startsWith("#")) continue; String[] parts = curLine.trim().split("\\s+"); if (parts == null || parts.length < 3) { System.out.println("Cannot extract statement from \"" + curLine + " \""); continue; } subject = parts[0].trim(); predicate = parts[1].trim(); object = parts[2].trim(); Statement st = new Statement(subject, predicate, object); statements.add(st); } } lr.close(); DirectedWeightedMultigraph<Node, LabeledLink> graph = buildGraphsFromStatements2(statements); SemanticModel semanticModel = new SemanticModel(id, graph); semanticModel.setName(name); semanticModel.setDescription(description); semanticModels.add(semanticModel); count++; } return semanticModels; }