@Override public UpdateContainer doIt(Workspace workspace) { Worksheet worksheet = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(worksheet); String worksheetName = worksheet.getTitle(); try { // preparing model file name final String modelFileName = workspace.getCommandPreferencesId() + worksheetId + "-" + worksheetName + "-model.ttl"; final String modelFileLocalPath = ServletContextParameterMap.getParameterValue(ContextParameter.R2RML_PUBLISH_DIR) + modelFileName; File f = new File(modelFileLocalPath); // preparing the graphUri where the model is published in the triple store String graphName = worksheet .getMetadataContainer() .getWorksheetProperties() .getPropertyValue(Property.graphName); if (graphName == null || graphName.isEmpty()) { SimpleDateFormat sdf = new SimpleDateFormat("dd-MMM-yyyy-kkmmssS"); String ts = sdf.format(Calendar.getInstance().getTime()); graphName = "http://localhost/" + workspace.getCommandPreferencesId() + "/" + worksheetId + "/model/" + ts; worksheet .getMetadataContainer() .getWorksheetProperties() .setPropertyValue(Property.graphName, graphName); } // If the model is not published, publish it! if (!f.exists() || !f.isFile()) { GenerateR2RMLModelCommandFactory factory = new GenerateR2RMLModelCommandFactory(); GenerateR2RMLModelCommand cmd = (GenerateR2RMLModelCommand) factory.createCommand( workspace, worksheetId, TripleStoreUtil.defaultModelsRepoUrl, graphName, selection.getName()); cmd.doIt(workspace); } else { // if the model was published 30 min ago, publish it again, just to be sure long diff = Calendar.getInstance().getTimeInMillis() - f.lastModified(); if ((diff / 1000L / 60L) > 30) { f.delete(); GenerateR2RMLModelCommandFactory factory = new GenerateR2RMLModelCommandFactory(); GenerateR2RMLModelCommand cmd = (GenerateR2RMLModelCommand) factory.createCommand( workspace, worksheetId, TripleStoreUtil.defaultModelsRepoUrl, graphName, selection.getName()); cmd.doIt(workspace); } } // TripleStoreUtil tUtil = new TripleStoreUtil(); StringBuffer query = new StringBuffer( "prefix rr: <http://www.w3.org/ns/r2rml#> prefix km-dev: <http://isi.edu/integration/karma/dev#> "); /* ****** this is the query for the list of columns. PREFIX km-dev: <http://isi.edu/integration/karma/dev#> PREFIX rr: <http://www.w3.org/ns/r2rml#> select distinct ?class where { { ?x1 rr:subjectMap/km-dev:alignmentNodeId "------- The full url of the column/class --------". ?x1 rr:predicateObjectMap/rr:objectMap/rr:column ?column . ?x1 rr:subjectMap/rr:predicate ?class . } UNION { ?x1 rr:subjectMap/km-dev:alignmentNodeId "------- The full url of the column/class --------". ?x1 (rr:predicateObjectMap/rr:objectMap/rr:parentTriplesMap)* ?x2 . ?x2 rr:predicateObjectMap/rr:objectMap/rr:column ?column . ?x2 rr:predicateObjectMap/rr:predicate ?class . } } * */ query.append("select distinct ?class ?column where { "); if (graphName != null && !graphName.trim().isEmpty()) { query.append(" graph <" + graphName + "> { "); } query .append("{ ?x1 rr:subjectMap/km-dev:alignmentNodeId \"") .append(this.nodeId) .append( "\" . ?x1 rr:predicateObjectMap/rr:objectMap/rr:column ?column . ?x1 rr:subjectMap/rr:predicate ?class .") .append(" } UNION { ") .append("?x1 rr:subjectMap/km-dev:alignmentNodeId \"") .append(this.nodeId) .append("\" . ?x1 (rr:predicateObjectMap/rr:objectMap/rr:parentTriplesMap)* ?x2 .") .append(" ?x2 rr:predicateObjectMap ?x3 . ") .append(" ?x3 rr:objectMap/rr:column ?column . ?x3 rr:predicate ?class .") .append(" } }"); if (graphName != null && !graphName.trim().isEmpty()) { query.append(" } "); } logger.info("Query: " + query.toString()); String sData = TripleStoreUtil.invokeSparqlQuery( query.toString(), TripleStoreUtil.defaultModelsRepoUrl, "application/json", null); if (sData == null | sData.isEmpty()) { logger.error("Empty response object from query : " + query); } HashMap<String, String> cols = new HashMap<String, String>(); try { JSONObject obj1 = new JSONObject(sData); JSONArray arr = obj1.getJSONObject("results").getJSONArray("bindings"); for (int i = 0; i < arr.length(); i++) { String colName = arr.getJSONObject(i).getJSONObject("column").getString("value"); String colValue = arr.getJSONObject(i).getJSONObject("class").getString("value"); if (cols.containsKey(colName)) { logger.error("Duplicate Column <-> property mapping. " + colName + " <=> " + colValue); } else { cols.put(colName, colValue); } } } catch (Exception e2) { logger.error("Error in parsing json response", e2); } logger.info("Total Columns fetched : " + cols.size()); final HashMap<String, String> columns = cols; return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject obj = new JSONObject(); try { Iterator<String> itr = columns.keySet().iterator(); JSONArray colList = new JSONArray(); while (itr.hasNext()) { JSONObject o = new JSONObject(); String k = itr.next(); o.put("name", k); o.put("url", columns.get(k)); colList.put(o); } obj.put("updateType", "FetchColumnUpdate"); obj.put("columns", colList); obj.put("rootId", nodeId); pw.println(obj.toString()); } catch (JSONException e) { logger.error("Error occurred while fetching worksheet properties!", e); } } }); } catch (Exception e) { String msg = "Error occured while fetching columns!"; logger.error(msg, e); return new UpdateContainer(new ErrorUpdate(msg)); } }
public static void test() throws Exception { ServletContextParameterMap contextParameters = ContextParametersRegistry.getInstance().getDefault(); ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance() .getModelingConfiguration(contextParameters.getId()); // String inputPath = Params.INPUT_DIR; String outputPath = Params.OUTPUT_DIR; String graphPath = Params.GRAPHS_DIR; // List<SemanticModel> semanticModels = ModelReader.importSemanticModels(inputPath); List<SemanticModel> semanticModels = ModelReader.importSemanticModelsFromJsonFiles(Params.MODEL_DIR, Params.MODEL_MAIN_FILE_EXT); // ModelEvaluation me2 = semanticModels.get(20).evaluate(semanticModels.get(20)); // System.out.println(me2.getPrecision() + "--" + me2.getRecall()); // if (true) // return; List<SemanticModel> trainingData = new ArrayList<SemanticModel>(); OntologyManager ontologyManager = new OntologyManager(contextParameters.getId()); File ff = new File(Params.ONTOLOGY_DIR); File[] files = ff.listFiles(); for (File f : files) { ontologyManager.doImport(f, "UTF-8"); } ontologyManager.updateCache(); // getStatistics1(semanticModels); // if (true) // return; ModelLearningGraph modelLearningGraph = null; ModelLearner_Old modelLearner; boolean iterativeEvaluation = false; boolean useCorrectType = false; int numberOfCRFCandidates = 4; int numberOfKnownModels; String filePath = Params.RESULTS_DIR; String filename = "results,k=" + numberOfCRFCandidates + ".csv"; PrintWriter resultFile = new PrintWriter(new File(filePath + filename)); StringBuffer[] resultsArray = new StringBuffer[semanticModels.size() + 2]; for (int i = 0; i < resultsArray.length; i++) { resultsArray[i] = new StringBuffer(); } for (int i = 0; i < semanticModels.size(); i++) { // for (int i = 0; i <= 10; i++) { // int i = 3; { resultFile.flush(); int newSourceIndex = i; SemanticModel newSource = semanticModels.get(newSourceIndex); logger.info("======================================================"); logger.info(newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")"); System.out.println( newSource.getName() + "(#attributes:" + newSource.getColumnNodes().size() + ")"); logger.info("======================================================"); if (!iterativeEvaluation) numberOfKnownModels = semanticModels.size() - 1; else numberOfKnownModels = 0; if (resultsArray[0].length() > 0) resultsArray[0].append(" \t "); resultsArray[0].append( newSource.getName() + "(" + newSource.getColumnNodes().size() + ")" + "\t" + " " + "\t" + " "); if (resultsArray[1].length() > 0) resultsArray[1].append(" \t "); resultsArray[1].append("p \t r \t t"); while (numberOfKnownModels <= semanticModels.size() - 1) { trainingData.clear(); int j = 0, count = 0; while (count < numberOfKnownModels) { if (j != newSourceIndex) { trainingData.add(semanticModels.get(j)); count++; } j++; } modelLearningGraph = (ModelLearningGraphSparse) ModelLearningGraph.getEmptyInstance(ontologyManager, ModelLearningGraphType.Sparse); SemanticModel correctModel = newSource; List<ColumnNode> columnNodes = correctModel.getColumnNodes(); // if (useCorrectType && numberOfCRFCandidates > 1) // updateCrfSemanticTypesForResearchEvaluation(columnNodes); modelLearner = new ModelLearner_Old(ontologyManager, columnNodes); long start = System.currentTimeMillis(); String graphName = !iterativeEvaluation ? graphPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPH_FILE_EXT : graphPath + semanticModels.get(newSourceIndex).getName() + ".knownModels=" + numberOfKnownModels + Params.GRAPH_FILE_EXT; if (new File(graphName).exists()) { // read graph from file try { logger.info("loading the graph ..."); DirectedWeightedMultigraph<Node, DefaultLink> graph = GraphUtil.importJson(graphName); modelLearner.graphBuilder = new GraphBuilder(ontologyManager, graph, false); modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory(); } catch (Exception e) { e.printStackTrace(); } } else { logger.info("building the graph ..."); for (SemanticModel sm : trainingData) modelLearningGraph.addModel(sm, false); modelLearner.graphBuilder = modelLearningGraph.getGraphBuilder(); modelLearner.nodeIdFactory = modelLearner.graphBuilder.getNodeIdFactory(); // save graph to file try { GraphUtil.exportJson( modelLearningGraph.getGraphBuilder().getGraph(), graphName, true, true); } catch (Exception e) { e.printStackTrace(); } } List<SortableSemanticModel_Old> hypothesisList = modelLearner.hypothesize(useCorrectType, numberOfCRFCandidates); long elapsedTimeMillis = System.currentTimeMillis() - start; float elapsedTimeSec = elapsedTimeMillis / 1000F; List<SortableSemanticModel_Old> topHypotheses = null; if (hypothesisList != null) { topHypotheses = hypothesisList.size() > modelingConfiguration.getNumCandidateMappings() ? hypothesisList.subList(0, modelingConfiguration.getNumCandidateMappings()) : hypothesisList; } Map<String, SemanticModel> models = new TreeMap<String, SemanticModel>(); // export to json // if (topHypotheses != null) // for (int k = 0; k < topHypotheses.size() && k < 3; k++) { // // String fileExt = null; // if (k == 0) fileExt = Params.MODEL_RANK1_FILE_EXT; // else if (k == 1) fileExt = Params.MODEL_RANK2_FILE_EXT; // else if (k == 2) fileExt = Params.MODEL_RANK3_FILE_EXT; // SortableSemanticModel m = topHypotheses.get(k); // new SemanticModel(m).writeJson(Params.MODEL_DIR + // newSource.getName() + fileExt); // // } ModelEvaluation me; models.put("1-correct model", correctModel); if (topHypotheses != null) for (int k = 0; k < topHypotheses.size(); k++) { SortableSemanticModel_Old m = topHypotheses.get(k); me = m.evaluate(correctModel); String label = "candidate" + k + m.getSteinerNodes().getScoreDetailsString() + "cost:" + roundTwoDecimals(m.getCost()) + // "-distance:" + me.getDistance() + "-precision:" + me.getPrecision() + "-recall:" + me.getRecall(); models.put(label, m); if (k == 0) { // first rank model System.out.println( "number of known models: " + numberOfKnownModels + ", precision: " + me.getPrecision() + ", recall: " + me.getRecall() + ", time: " + elapsedTimeSec); logger.info( "number of known models: " + numberOfKnownModels + ", precision: " + me.getPrecision() + ", recall: " + me.getRecall() + ", time: " + elapsedTimeSec); // resultFile.println("number of known models \t precision \t recall"); // resultFile.println(numberOfKnownModels + "\t" + me.getPrecision() + "\t" + // me.getRecall()); String s = me.getPrecision() + "\t" + me.getRecall() + "\t" + elapsedTimeSec; if (resultsArray[numberOfKnownModels + 2].length() > 0) resultsArray[numberOfKnownModels + 2].append(" \t "); resultsArray[numberOfKnownModels + 2].append(s); // resultFile.println(me.getPrecision() + "\t" + me.getRecall() + "\t" + // elapsedTimeSec); } } String outName = !iterativeEvaluation ? outputPath + semanticModels.get(newSourceIndex).getName() + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT : outputPath + semanticModels.get(newSourceIndex).getName() + ".knownModels=" + numberOfKnownModels + Params.GRAPHVIS_OUT_DETAILS_FILE_EXT; // if (!iterativeEvaluation) { GraphVizUtil.exportSemanticModelsToGraphviz( models, newSource.getName(), outName, GraphVizLabelType.LocalId, GraphVizLabelType.LocalUri, false, false); // } numberOfKnownModels++; } // resultFile.println("======================================================="); } for (StringBuffer s : resultsArray) resultFile.println(s.toString()); resultFile.close(); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet worksheet = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(worksheet); HNodePath selectedPath = null; List<HNodePath> columnPaths = worksheet.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } Collection<Node> nodes = new ArrayList<Node>(); workspace .getFactory() .getWorksheet(worksheetId) .getDataTable() .collectNodes(selectedPath, nodes, selection); try { JSONArray requestJsonArray = new JSONArray(); for (Node node : nodes) { String id = node.getId(); String originalVal = node.getValue().asString(); JSONObject jsonRecord = new JSONObject(); jsonRecord.put("id", id); originalVal = originalVal == null ? "" : originalVal; jsonRecord.put("value", originalVal); requestJsonArray.put(jsonRecord); } String jsonString = null; jsonString = requestJsonArray.toString(); // String url = // "http://localhost:8080/cleaningService/IdentifyData"; // String url = "http://localhost:8070/myWS/IdentifyData"; String url = ServletContextParameterMap.getParameterValue(ContextParameter.CLEANING_SERVICE_URL); HttpClient httpclient = new DefaultHttpClient(); HttpPost httppost = null; HttpResponse response = null; HttpEntity entity; StringBuffer out = new StringBuffer(); URI u = null; u = new URI(url); List<NameValuePair> formparams = new ArrayList<NameValuePair>(); formparams.add(new BasicNameValuePair("json", jsonString)); httppost = new HttpPost(u); httppost.setEntity(new UrlEncodedFormEntity(formparams, "UTF-8")); response = httpclient.execute(httppost); entity = response.getEntity(); if (entity != null) { BufferedReader buf = new BufferedReader(new InputStreamReader(entity.getContent())); String line = buf.readLine(); while (line != null) { out.append(line); line = buf.readLine(); } } // logger.trace(out.toString()); // logger.info("Connnection success : " + url + " Successful."); final JSONObject data1 = new JSONObject(out.toString()); // logger.trace("Data--->" + data1); return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject response = new JSONObject(); // logger.trace("Reached here"); try { response.put("updateType", "CleaningServiceOutput"); response.put("chartData", data1); response.put("hNodeId", hNodeId); // logger.trace(response.toString(4)); } catch (JSONException e) { pw.print("Error"); } pw.print(response.toString()); } }); } catch (Exception e) { e.printStackTrace(); return new UpdateContainer(new ErrorUpdate("Error!")); } }