private void generateRDFFromWorksheet( Worksheet wk, Workspace workspace, KR2RMLMapping mapping, List<KR2RMLRDFWriter> writers, String baseURI) throws IOException, JSONException, KarmaException { // Generate RDF for the remaining rows // Gets all the errors generated during the RDF generation ErrorReport errorReport = new ErrorReport(); this.applyHistoryToWorksheet(workspace, wk, mapping); SuperSelection selection = SuperSelectionManager.DEFAULT_SELECTION; if (selectionName != null && !selectionName.trim().isEmpty()) selection = wk.getSuperSelectionManager().getSuperSelection(selectionName); if (selection == null) return; // RDF generation object initialization KR2RMLWorksheetRDFGenerator rdfGen = new KR2RMLWorksheetRDFGenerator( wk, workspace.getFactory(), workspace.getOntologyManager(), writers, false, mapping, errorReport, selection); // Generate the rdf rdfGen.generateRDF(false); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { UpdateContainer undoEffects = workspace.getCommandHistory().undoOrRedoCommandsUntil(workspace, commandIdArg); UpdateContainer result = new UpdateContainer(new HistoryUpdate(workspace.getCommandHistory())); result.append(undoEffects); return result; }
@Override public UpdateContainer undoIt(Workspace workspace) { Node node = workspace.getFactory().getNode(nodeIdArg); SuperSelection sel = getSuperSelection(workspace); node.setValue(previousValue, previousStatus, workspace.getFactory()); UpdateContainer uc = WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates( worksheetId, sel); uc.add(new NodeChangedUpdate(worksheetId, nodeIdArg, previousValue, previousStatus)); return uc; }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { UpdateContainer uc = new UpdateContainer(); SuperSelection sel = this.getSuperSelection(workspace); for (int i = 0; i < updates.length(); i++) { String update = updates.getString(i); switch (update) { case "headers": uc.add(new WorksheetHeadersUpdate(worksheetId)); break; case "list": uc.add(new WorksheetListUpdate()); break; case "data": uc.add(new WorksheetDataUpdate(worksheetId, sel)); break; case "alignment": { Alignment alignment = AlignmentManager.Instance() .getAlignmentOrCreateIt( workspace.getId(), worksheetId, workspace.getOntologyManager()); uc.add(new AlignmentSVGVisualizationUpdate(worksheetId, alignment)); break; } case "semanticTypes": { Alignment alignment = AlignmentManager.Instance() .getAlignmentOrCreateIt( workspace.getId(), worksheetId, workspace.getOntologyManager()); uc.add( new SemanticTypesUpdate( workspace.getWorksheet(worksheetId), worksheetId, alignment)); break; } case "regenerate": uc.add(new RegenerateWorksheetUpdate(worksheetId)); break; case "all": uc = WorksheetUpdateFactory.createRegenerateWorksheetUpdates(worksheetId, sel); break; case "cleaning": uc.add(new WorksheetCleaningUpdate(worksheetId, false, sel)); break; } } return uc; }
@Override public UpdateContainer doIt(VWorkspace vWorkspace) throws CommandException { Worksheet worksheet = vWorkspace.getViewFactory().getVWorksheet(vWorksheetId).getWorksheet(); Workspace ws = vWorkspace.getWorkspace(); if (worksheet.getSemanticTypes().getListOfTypes().size() == 0) { SemanticTypeUtil.populateSemanticTypesUsingCRF( worksheet, ws.getTagsContainer().getTag(TagName.Outlier), ws.getCrfModelHandler(), ws.getOntologyManager()); } WorksheetGeospatialContent geo = new WorksheetGeospatialContent(worksheet); // Send an error update if no geospatial data found! if (geo.hasNoGeospatialData()) { return new UpdateContainer(new ErrorUpdate("No geospatial data found in the worksheet!")); } try { final File file = geo.publishKML(); // Transfer the file to a public server final boolean transfer = transferFileToPublicServer(file); if (!transfer) { logger.error( "Published KML file could not be moved to a public server to display on Google Maps!"); } return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject outputObject = new JSONObject(); try { outputObject.put(JsonKeys.updateType.name(), "PublishKMLUpdate"); outputObject.put(JsonKeys.fileName.name(), publicKMLAddress + file.getName()); outputObject.put(JsonKeys.transferSuccessful.name(), transfer); outputObject.put(JsonKeys.localFileName.name(), "KML/" + file.getName()); pw.println(outputObject.toString(4)); } catch (JSONException e) { logger.error("Error occured while generating JSON!"); } } }); } catch (FileNotFoundException e) { logger.error("KML File not found!", e); return new UpdateContainer(new ErrorUpdate("Error occurred while publishing KML layer!")); } }
public static UpdateContainer createSemanticTypesAndSVGAlignmentUpdates( String worksheetId, Workspace workspace, Alignment alignment) { Worksheet worksheet = workspace.getWorksheet(worksheetId); UpdateContainer c = new UpdateContainer(); c.add(new SemanticTypesUpdate(worksheet, worksheetId, alignment)); c.add(new AlignmentSVGVisualizationUpdate(worksheetId, alignment)); return c; }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet worksheet = workspace.getWorksheet(worksheetId); RepFactory factory = workspace.getFactory(); SuperSelection superSel = getSuperSelection(worksheet); HTable hTable = factory.getHTable(factory.getHNode(hNodeId).getHTableId()); Selection currentSel = superSel.getSelection(hTable.getId()); if (currentSel != null) { currentSel.updateSelection(); } CommandHistory history = workspace.getCommandHistory(); List<Command> tmp = gatherAllOperateSelectionCommands( history.getCommandsFromWorksheetId(worksheetId), workspace); if (tmp.size() > 0) { JSONArray inputJSON = new JSONArray(); inputJSON.put( CommandInputJSONUtil.createJsonObject( "worksheetId", worksheetId, ParameterType.worksheetId)); inputJSON.put( CommandInputJSONUtil.createJsonObject("hNodeId", hNodeId, ParameterType.hNodeId)); inputJSON.put( CommandInputJSONUtil.createJsonObject( "operation", Operation.Intersect.name(), ParameterType.other)); inputJSON.put( CommandInputJSONUtil.createJsonObject( "pythonCode", SelectionManager.defaultCode, ParameterType.other)); inputJSON.put(CommandInputJSONUtil.createJsonObject("onError", "false", ParameterType.other)); inputJSON.put( CommandInputJSONUtil.createJsonObject( "selectionName", superSel.getName(), ParameterType.other)); Command t = null; try { t = new OperateSelectionCommandFactory().createCommand(inputJSON, workspace); } catch (Exception e) { } if (t != null) history._getHistory().add(t); history._getHistory().addAll(tmp); } UpdateContainer uc = WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates( worksheetId, superSel); uc.add(new HistoryUpdate(history)); return uc; }
public static void detectSelectionStatusChange( String worksheetId, Workspace workspace, Command command) { Worksheet worksheet = workspace.getWorksheet(worksheetId); for (Selection sel : worksheet.getSelectionManager().getAllDefinedSelection()) { Set<String> inputColumns = new HashSet<String>(sel.getInputColumns()); inputColumns.retainAll(command.getOutputColumns()); if (inputColumns.size() > 0) sel.invalidateSelection(); } }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Node node = workspace.getFactory().getNode(nodeIdArg); SuperSelection sel = getSuperSelection(workspace); inputColumns.clear(); outputColumns.clear(); inputColumns.add(node.getHNodeId()); outputColumns.add(node.getHNodeId()); previousValue = node.getValue(); previousStatus = node.getStatus(); if (node.hasNestedTable()) { throw new CommandException( this, "Cell " + nodeIdArg + " has a nested table. It cannot be edited."); } node.setValue(newValueArg, Node.NodeStatus.edited, workspace.getFactory()); WorksheetUpdateFactory.detectSelectionStatusChange(worksheetId, workspace, this); UpdateContainer uc = WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates( worksheetId, sel); uc.add(new NodeChangedUpdate(worksheetId, nodeIdArg, newValueArg, Node.NodeStatus.edited)); return uc; }
private List<Command> gatherAllOperateSelectionCommands( List<Command> commands, Workspace workspace) { List<Command> operationCommands = new ArrayList<Command>(); for (Command c : commands) { if (c instanceof OperateSelectionCommand) { OperateSelectionCommand t = (OperateSelectionCommand) c; if (isSamehTableId(t.getHNodeId(), hNodeId, workspace)) { JSONObject obj = workspace.getCommandHistory().getCommandJSON(workspace, t); Command tmp = generateCommandFromJSON(workspace, obj); if (tmp != null) operationCommands.add(tmp); } } } return operationCommands; }
private JSONArray extractHistoryFromModel(Workspace workspace, UpdateContainer uc) throws RepositoryException, RDFParseException, IOException, JSONException, KarmaException { Worksheet ws = workspace.getFactory().getWorksheet(worksheetId); R2RMLMappingIdentifier id = new R2RMLMappingIdentifier(ws.getTitle(), r2rmlModelFile.toURI().toURL()); WorksheetR2RMLJenaModelParser parser = new WorksheetR2RMLJenaModelParser(id); KR2RMLMapping mapping = parser.parse(); KR2RMLVersion version = mapping.getVersion(); if (version.compareTo(KR2RMLVersion.current) < 0) { uc.add( new InfoUpdate( "Model version is " + version.toString() + ". Current version is " + KR2RMLVersion.current.toString() + ". Please publish it again.")); } return mapping.getWorksheetHistory(); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet wk = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(wk); String Msg = String.format("begin, Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(Msg); // Get the HNode HashMap<String, HashMap<String, String>> rows = new HashMap<String, HashMap<String, String>>(); HNodePath selectedPath = null; List<HNodePath> columnPaths = wk.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } // random nodes Collection<Node> nodes = new ArrayList<Node>(); wk.getDataTable().collectNodes(selectedPath, nodes, selection); HashSet<Integer> indSet = this.obtainIndexs(nodes.size()); int index = 0; for (Iterator<Node> iterator = nodes.iterator(); iterator.hasNext(); ) { Node node = iterator.next(); if (indSet.contains(index)) { String id = node.getId(); String originalVal = node.getValue().asString(); HashMap<String, String> x = new HashMap<String, String>(); x.put("Org", originalVal); x.put("Tar", originalVal); x.put("Orgdis", originalVal); x.put("Tardis", originalVal); rows.put(id, x); } index++; } Msg = String.format("end, Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(Msg); return new UpdateContainer(new FetchResultUpdate(hNodeId, rows)); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet worksheet = workspace.getWorksheet(worksheetId); CSVFileExport csvFileExport = new CSVFileExport(worksheet); try { final String fileName = csvFileExport.publishCSV(); if (fileName == null) return new UpdateContainer( new ErrorUpdate("No data to export! Have you aligned the worksheet?")); return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject outputObject = new JSONObject(); try { outputObject.put(JsonKeys.updateType.name(), "PublishCSVUpdate"); outputObject.put(JsonKeys.fileUrl.name(), fileName); outputObject.put(JsonKeys.worksheetId.name(), worksheetId); pw.println(outputObject.toString(4)); } catch (JSONException e) { logger.error("Error occured while generating JSON!"); } } }); } catch (FileNotFoundException e) { logger.error("CSV folder not found!", e); return new UpdateContainer(new ErrorUpdate("Error occurred while exporting CSV file!")); } catch (Exception e) { logger.error("CSV Export Error", e); return new UpdateContainer(new ErrorUpdate("Error occurred while exporting CSV file!")); } }
private void generateRDF( String wkname, String query, List<KR2RMLRDFWriter> writers, R2RMLMappingIdentifier id, String baseURI) throws IOException, JSONException, KarmaException, SQLException, ClassNotFoundException { logger.debug("Generating RDF..."); WorksheetR2RMLJenaModelParser parserTest = new WorksheetR2RMLJenaModelParser(id); KR2RMLMapping mapping = parserTest.parse(); AbstractJDBCUtil dbUtil = JDBCUtilFactory.getInstance(dbType); Connection conn = dbUtil.getConnection(hostname, portnumber, username, password, dBorSIDName); conn.setAutoCommit(false); java.sql.Statement stmt = conn.createStatement( java.sql.ResultSet.TYPE_FORWARD_ONLY, java.sql.ResultSet.CONCUR_READ_ONLY); stmt.setFetchSize(DATABASE_TABLE_FETCH_SIZE); ResultSet r = stmt.executeQuery(query); ResultSetMetaData meta = r.getMetaData(); ; // Get the column names List<String> columnNames = new ArrayList<>(); for (int i = 1; i <= meta.getColumnCount(); i++) { columnNames.add(meta.getColumnName(i)); } // Prepare required Karma objects Workspace workspace = initializeWorkspace(); RepFactory factory = workspace.getFactory(); Worksheet wk = factory.createWorksheet(wkname, workspace, encoding); List<String> headersList = addHeaders(wk, columnNames, factory); int counter = 0; ArrayList<String> rowValues = null; while ((rowValues = dbUtil.parseResultSetRow(r)) != null) { // Generate RDF and create a new worksheet for every DATABASE_TABLE_FETCH_SIZE rows if (counter % DATABASE_TABLE_FETCH_SIZE == 0 && counter != 0) { generateRDFFromWorksheet(wk, workspace, mapping, writers, baseURI); logger.debug("Done for " + counter + " rows ..."); removeWorkspace(workspace); parserTest = new WorksheetR2RMLJenaModelParser(id); mapping = parserTest.parse(); workspace = initializeWorkspace(); factory = workspace.getFactory(); wk = factory.createWorksheet(wkname, workspace, encoding); headersList = addHeaders(wk, columnNames, factory); } /** Add the data * */ Table dataTable = wk.getDataTable(); Row row = dataTable.addRow(factory); for (int i = 0; i < rowValues.size(); i++) { row.setValue(headersList.get(i), rowValues.get(i), factory); } counter++; } generateRDFFromWorksheet(wk, workspace, mapping, writers, baseURI); // Releasing all the resources r.close(); conn.close(); stmt.close(); logger.debug("done"); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { ModelingConfiguration modelingConfiguration = ModelingConfigurationRegistry.getInstance() .getModelingConfiguration( WorkspaceKarmaHomeRegistry.getInstance().getKarmaHome(workspace.getId())); TripleStoreUtil utilObj = new TripleStoreUtil(); boolean showModelsWithoutMatching = modelingConfiguration.isShowModelsWithoutMatching(); try { HashMap<String, List<String>> metadata = utilObj.getMappingsWithMetadata(TripleStoreUrl, context); RepFactory factory = workspace.getFactory(); List<String> model_Names = metadata.get("model_names"); List<String> model_Urls = metadata.get("model_urls"); List<String> model_Times = metadata.get("model_publishtimes"); List<String> model_Contexts = metadata.get("model_contexts"); List<String> model_inputColumns = metadata.get("model_inputcolumns"); final List<JSONObject> list = new ArrayList<>(); Set<String> worksheetcolumns = new HashSet<>(); if (worksheetId != null && !worksheetId.trim().isEmpty()) { HTable htable = factory.getWorksheet(worksheetId).getHeaders(); getHNodesForWorksheet(htable, worksheetcolumns, factory); } Iterator<String> nameitr = model_Names.iterator(); Iterator<String> urlitr = model_Urls.iterator(); Iterator<String> timeitr = model_Times.iterator(); Iterator<String> contextitr = model_Contexts.iterator(); Iterator<String> inputitr = model_inputColumns.iterator(); while (nameitr.hasNext() && urlitr.hasNext() && timeitr.hasNext() && contextitr.hasNext() && inputitr.hasNext()) { JSONObject obj = new JSONObject(); Set<String> inputs = new HashSet<>(); obj.put("name", nameitr.next()); obj.put("url", urlitr.next()); obj.put("publishTime", timeitr.next()); obj.put("context", contextitr.next()); String columns = inputitr.next(); if (columns != null && !columns.isEmpty()) { JSONArray array = new JSONArray(columns); for (int i = 0; i < array.length(); i++) inputs.add(array.get(i).toString()); } else if (showModelsWithoutMatching) { list.add(obj); } if (worksheetId != null && !worksheetId.isEmpty()) { inputs.retainAll(worksheetcolumns); obj.put("inputColumns", inputs.size()); } else obj.put("inputColumns", 0); if (!inputs.isEmpty() || (worksheetId == null || worksheetId.trim().isEmpty())) list.add(obj); } Collections.sort( list, new Comparator<JSONObject>() { @Override public int compare(JSONObject a, JSONObject b) { return b.getInt("inputColumns") - a.getInt("inputColumns"); } }); return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { try { JSONArray array = new JSONArray(); for (JSONObject obj : list) { array.put(obj); } pw.print(array.toString()); } catch (Exception e) { logger.error("Error generating JSON!", e); } } }); } catch (Exception e) { return new UpdateContainer( new ErrorUpdate("Unable to get mappings with metadata: " + e.getMessage())); } }
@Override public UpdateContainer doIt(Workspace workspace) { Worksheet worksheet = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(worksheet); String worksheetName = worksheet.getTitle(); try { // preparing model file name final String modelFileName = workspace.getCommandPreferencesId() + worksheetId + "-" + worksheetName + "-model.ttl"; final String modelFileLocalPath = ServletContextParameterMap.getParameterValue(ContextParameter.R2RML_PUBLISH_DIR) + modelFileName; File f = new File(modelFileLocalPath); // preparing the graphUri where the model is published in the triple store String graphName = worksheet .getMetadataContainer() .getWorksheetProperties() .getPropertyValue(Property.graphName); if (graphName == null || graphName.isEmpty()) { SimpleDateFormat sdf = new SimpleDateFormat("dd-MMM-yyyy-kkmmssS"); String ts = sdf.format(Calendar.getInstance().getTime()); graphName = "http://localhost/" + workspace.getCommandPreferencesId() + "/" + worksheetId + "/model/" + ts; worksheet .getMetadataContainer() .getWorksheetProperties() .setPropertyValue(Property.graphName, graphName); } // If the model is not published, publish it! if (!f.exists() || !f.isFile()) { GenerateR2RMLModelCommandFactory factory = new GenerateR2RMLModelCommandFactory(); GenerateR2RMLModelCommand cmd = (GenerateR2RMLModelCommand) factory.createCommand( workspace, worksheetId, TripleStoreUtil.defaultModelsRepoUrl, graphName, selection.getName()); cmd.doIt(workspace); } else { // if the model was published 30 min ago, publish it again, just to be sure long diff = Calendar.getInstance().getTimeInMillis() - f.lastModified(); if ((diff / 1000L / 60L) > 30) { f.delete(); GenerateR2RMLModelCommandFactory factory = new GenerateR2RMLModelCommandFactory(); GenerateR2RMLModelCommand cmd = (GenerateR2RMLModelCommand) factory.createCommand( workspace, worksheetId, TripleStoreUtil.defaultModelsRepoUrl, graphName, selection.getName()); cmd.doIt(workspace); } } // TripleStoreUtil tUtil = new TripleStoreUtil(); StringBuffer query = new StringBuffer( "prefix rr: <http://www.w3.org/ns/r2rml#> prefix km-dev: <http://isi.edu/integration/karma/dev#> "); /* ****** this is the query for the list of columns. PREFIX km-dev: <http://isi.edu/integration/karma/dev#> PREFIX rr: <http://www.w3.org/ns/r2rml#> select distinct ?class where { { ?x1 rr:subjectMap/km-dev:alignmentNodeId "------- The full url of the column/class --------". ?x1 rr:predicateObjectMap/rr:objectMap/rr:column ?column . ?x1 rr:subjectMap/rr:predicate ?class . } UNION { ?x1 rr:subjectMap/km-dev:alignmentNodeId "------- The full url of the column/class --------". ?x1 (rr:predicateObjectMap/rr:objectMap/rr:parentTriplesMap)* ?x2 . ?x2 rr:predicateObjectMap/rr:objectMap/rr:column ?column . ?x2 rr:predicateObjectMap/rr:predicate ?class . } } * */ query.append("select distinct ?class ?column where { "); if (graphName != null && !graphName.trim().isEmpty()) { query.append(" graph <" + graphName + "> { "); } query .append("{ ?x1 rr:subjectMap/km-dev:alignmentNodeId \"") .append(this.nodeId) .append( "\" . ?x1 rr:predicateObjectMap/rr:objectMap/rr:column ?column . ?x1 rr:subjectMap/rr:predicate ?class .") .append(" } UNION { ") .append("?x1 rr:subjectMap/km-dev:alignmentNodeId \"") .append(this.nodeId) .append("\" . ?x1 (rr:predicateObjectMap/rr:objectMap/rr:parentTriplesMap)* ?x2 .") .append(" ?x2 rr:predicateObjectMap ?x3 . ") .append(" ?x3 rr:objectMap/rr:column ?column . ?x3 rr:predicate ?class .") .append(" } }"); if (graphName != null && !graphName.trim().isEmpty()) { query.append(" } "); } logger.info("Query: " + query.toString()); String sData = TripleStoreUtil.invokeSparqlQuery( query.toString(), TripleStoreUtil.defaultModelsRepoUrl, "application/json", null); if (sData == null | sData.isEmpty()) { logger.error("Empty response object from query : " + query); } HashMap<String, String> cols = new HashMap<String, String>(); try { JSONObject obj1 = new JSONObject(sData); JSONArray arr = obj1.getJSONObject("results").getJSONArray("bindings"); for (int i = 0; i < arr.length(); i++) { String colName = arr.getJSONObject(i).getJSONObject("column").getString("value"); String colValue = arr.getJSONObject(i).getJSONObject("class").getString("value"); if (cols.containsKey(colName)) { logger.error("Duplicate Column <-> property mapping. " + colName + " <=> " + colValue); } else { cols.put(colName, colValue); } } } catch (Exception e2) { logger.error("Error in parsing json response", e2); } logger.info("Total Columns fetched : " + cols.size()); final HashMap<String, String> columns = cols; return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject obj = new JSONObject(); try { Iterator<String> itr = columns.keySet().iterator(); JSONArray colList = new JSONArray(); while (itr.hasNext()) { JSONObject o = new JSONObject(); String k = itr.next(); o.put("name", k); o.put("url", columns.get(k)); colList.put(o); } obj.put("updateType", "FetchColumnUpdate"); obj.put("columns", colList); obj.put("rootId", nodeId); pw.println(obj.toString()); } catch (JSONException e) { logger.error("Error occurred while fetching worksheet properties!", e); } } }); } catch (Exception e) { String msg = "Error occured while fetching columns!"; logger.error(msg, e); return new UpdateContainer(new ErrorUpdate(msg)); } }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet wk = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(wk); String msg = String.format( "Gen rule start,Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(msg); // Get the HNode HashMap<String, String> rows = new HashMap<String, String>(); HashMap<String, Integer> amb = new HashMap<String, Integer>(); HNodePath selectedPath = null; List<HNodePath> columnPaths = wk.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } Collection<Node> nodes = new ArrayList<Node>(); wk.getDataTable().collectNodes(selectedPath, nodes, selection); for (Node node : nodes) { String id = node.getId(); if (!this.nodeIds.contains(id)) continue; String originalVal = node.getValue().asString(); rows.put(id, originalVal); this.compResultString += originalVal + "\n"; calAmbScore(id, originalVal, amb); } RamblerValueCollection vc = new RamblerValueCollection(rows); HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>(); inputs = new RamblerTransformationInputs(examples, vc); // generate the program boolean results = false; int iterNum = 0; RamblerTransformationOutput rtf = null; // initialize the vocabulary Iterator<String> iterx = inputs.getInputValues().getValues().iterator(); Vector<String> v = new Vector<String>(); int vb_cnt = 0; while (iterx.hasNext() && vb_cnt < 30) { String eString = iterx.next(); v.add(eString); vb_cnt++; } Vector<String> vob = UtilTools.buildDict(v); inputs.setVocab(vob.toArray(new String[vob.size()])); while (iterNum < 1 && !results) // try to find an program within iterNum { rtf = new RamblerTransformationOutput(inputs); if (rtf.getTransformations().keySet().size() > 0) { results = true; } iterNum++; } Iterator<String> iter = rtf.getTransformations().keySet().iterator(); // id:{org: tar: orgdis: tardis: } HashMap<String, HashMap<String, String>> resdata = new HashMap<String, HashMap<String, String>>(); HashSet<String> keys = new HashSet<String>(); while (iter.hasNext()) { String tpid = iter.next(); ValueCollection rvco = rtf.getTransformedValues_debug(tpid); if (rvco == null) continue; // constructing displaying data HashMap<String, String[]> xyzHashMap = new HashMap<String, String[]>(); for (String key : rvco.getNodeIDs()) { HashMap<String, String> dict = new HashMap<String, String>(); // add to the example selection boolean isExp = false; String org = vc.getValue(key); String classLabel = rvco.getClass(key); String pretar = rvco.getValue(key); String dummyValue = pretar; if (pretar.indexOf("_FATAL_ERROR_") != -1) { dummyValue = org; // dummyValue = "#ERROR"; } try { UtilTools.StringColorCode(org, dummyValue, dict); } catch (Exception ex) { logger.info(String.format("ColorCoding Exception%s, %s", org, dummyValue)); // set dict dict.put("Org", org); dict.put("Tar", "ERROR"); dict.put("Orgdis", org); dict.put("Tardis", "ERROR"); } for (TransformationExample exp : examples) { if (exp.getNodeId().compareTo(key) == 0) { if (!expFeData.containsKey(classLabel)) { Vector<String[]> vstr = new Vector<String[]>(); String[] texp = {dict.get("Org"), pretar}; vstr.add(texp); expFeData.put(classLabel, vstr); } else { String[] texp = {dict.get("Org"), pretar}; expFeData.get(classLabel).add(texp); } isExp = true; } } if (!isExp) { String[] pair = {dict.get("Org"), dict.get("Tar"), pretar, classLabel}; xyzHashMap.put(key, pair); } resdata.put(key, dict); } if (!rtf.nullRule) keys.add(getBestExample(xyzHashMap, expFeData)); } // find the best row String vars = ""; String expstr = ""; String recmd = ""; for (TransformationExample x : examples) { expstr += String.format("%s|%s", x.getBefore(), x.getAfter()); } expstr += "|"; if (rtf.nullRule) { keys.clear(); // keys.add("-2"); // "-2 indicates null rule" } if (!resdata.isEmpty() && !rtf.nullRule) { recmd = resdata.get(keys.iterator().next()).get("Org"); } else { recmd = ""; } msg = String.format( "Gen rule end, Time,%d, Worksheet,%s,Examples:%s,Recmd:%s", System.currentTimeMillis(), worksheetId, expstr, recmd); logger.info(msg); return new UpdateContainer(new CleaningResultUpdate(hNodeId, resdata, vars, keys)); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { final OntologyManager ontMgr = workspace.getOntologyManager(); Set<LabeledLink> properties = new HashSet<>(); logger.debug( "GetPropertiesCommand:" + propertiesRange + ":" + classURI + "," + domainURI + ", " + rangeURI); if (propertiesRange == INTERNAL_PROP_RANGE.allObjectProperties) { HashMap<String, Label> linkList = ontMgr.getObjectProperties(); if (linkList != null) { for (Label label : linkList.values()) { properties.add(new DataPropertyLink(label.getUri(), label)); } } } else if (propertiesRange == INTERNAL_PROP_RANGE.allDataProperties) { HashMap<String, Label> linkList = ontMgr.getDataProperties(); for (Label label : linkList.values()) { properties.add(new DataPropertyLink(label.getUri(), label)); } } else if (propertiesRange == INTERNAL_PROP_RANGE.propertiesWithDomainRange) { Map<String, Label> linkList = ontMgr.getObjectPropertiesByDomainRange(domainURI, rangeURI, true); for (Label label : linkList.values()) { properties.add(new DataPropertyLink(label.getUri(), label)); } } else if (propertiesRange == INTERNAL_PROP_RANGE.dataPropertiesForClass) { Map<String, Label> linkList = ontMgr.getDataPropertiesByDomain(classURI, true); for (Label label : linkList.values()) { properties.add(new DataPropertyLink(label.getUri(), label)); } } else if (propertiesRange == INTERNAL_PROP_RANGE.existingProperties) { Alignment alignment = AlignmentManager.Instance() .getAlignmentOrCreateIt(workspace.getId(), worksheetId, ontMgr); Set<String> steinerTreeNodeIds = new HashSet<String>(); if (alignment != null && !alignment.isEmpty()) { DirectedWeightedMultigraph<Node, LabeledLink> steinerTree = alignment.getSteinerTree(); for (Node node : steinerTree.vertexSet()) { if (node.getType() == NodeType.InternalNode) { steinerTreeNodeIds.add(node.getId()); } } List<LabeledLink> specializedLinks = new ArrayList<LabeledLink>(); Set<LabeledLink> temp = null; temp = alignment.getLinksByType(LinkType.DataPropertyLink); if (temp != null) specializedLinks.addAll(temp); for (LabeledLink link : steinerTree.edgeSet()) if (link instanceof ObjectPropertyLink) specializedLinks.add(link); // Store the data property links for specialized edge link options properties.addAll(specializedLinks); } } logger.debug("Got back " + properties.size() + " results"); final Set<LabeledLink> finalProperties = properties; UpdateContainer upd = new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject obj = new JSONObject(); JSONArray resultArray = new JSONArray(); try { obj.put(JsonKeys.updateType.name(), "PropertyList"); for (LabeledLink link : finalProperties) { Label linkLabel = link.getLabel(); String edgeLabelStr = linkLabel.getDisplayName(); JSONObject edgeObj = new JSONObject(); if (linkLabel.getUri() != null && linkLabel.getNs() != null && linkLabel.getUri().equalsIgnoreCase(linkLabel.getNs())) { edgeLabelStr = linkLabel.getUri(); } edgeObj.put(JsonKeys.label.name(), edgeLabelStr); edgeObj.put(JsonKeys.uri.name(), linkLabel.getUri()); edgeObj.put(JsonKeys.id.name(), link.getId()); resultArray.put(edgeObj); } obj.put(JsonKeys.properties.name(), resultArray); pw.println(obj.toString()); } catch (Exception e) { logger.error("Exception:", e); e.printStackTrace(); } } }); return upd; }
/* * Pedro * * We should have an abstraction for Commands that operate on worksheets, * and this method should go there. */ protected String formatWorsheetId(Workspace workspace, String worksheetId) { return worksheetId + " (" + workspace.getWorksheet(worksheetId).getTitle() + ")"; }
@Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { Workspace workspace = vWorkspace.getWorkspace(); alignment = AlignmentManager.Instance().getAlignment(workspace.getId(), worksheetId); SemanticTypes types = worksheet.getSemanticTypes(); Map<String, ColumnNode> hNodeIdTocolumnNodeMap = createColumnNodeMap(); Map<String, SemanticTypeNode> hNodeIdToDomainNodeMap = createDomainNodeMap(); JSONStringer jsonStr = new JSONStringer(); try { JSONWriter writer = jsonStr.object(); writer.key("worksheetId").value(worksheetId).key("updateType").value("SemanticTypesUpdate"); writer.key(JsonKeys.Types.name()); writer.array(); // Iterate through all the columns for (HNodePath path : worksheet.getHeaders().getAllPaths()) { HNode node = path.getLeaf(); String nodeId = node.getId(); writer.object(); // Check if a semantic type exists for the HNode SemanticType type = types.getSemanticTypeForHNodeId(nodeId); if (type != null && type.getConfidenceLevel() != SemanticType.ConfidenceLevel.Low) { writer .key(JsonKeys.HNodeId.name()) .value(type.getHNodeId()) .key(JsonKeys.SemanticTypesArray.name()) .array(); ColumnNode alignmentColumnNode = hNodeIdTocolumnNodeMap.get(type.getHNodeId()); SemanticTypeNode domainNode = hNodeIdToDomainNodeMap.get(type.getHNodeId()); if (alignmentColumnNode == null || domainNode == null) { logger.error( "Column node or domain node not found in alignment." + " (This should not happen conceptually!):" + type); continue; } // Add the primary semantic type writer .object() .key(JsonKeys.Origin.name()) .value(type.getOrigin().name()) .key(JsonKeys.ConfidenceLevel.name()) .value(type.getConfidenceLevel().name()) .key(JsonKeys.isPrimary.name()) .value(true); // Add the RDF literal type to show in the text box String rdfLiteralType = alignmentColumnNode.getRdfLiteralType() == null ? "" : alignmentColumnNode.getRdfLiteralType().getDisplayName(); String language = alignmentColumnNode.getLanguage() == null ? "" : alignmentColumnNode.getLanguage(); writer.key(JsonKeys.rdfLiteralType.name()).value(rdfLiteralType); writer.key(JsonKeys.language.name()).value(language); // String domainDisplayLabel = (domainNode.getLabel().getPrefix() != null && // (!domainNode.getLabel().getPrefix().equals(""))) ? // (domainNode.getLabel().getPrefix() + ":" + domainNode.getLocalId()) : // domainNode.getLocalId(); if (!type.isClass()) { writer .key(JsonKeys.FullType.name()) .value(type.getType().getUri()) .key(JsonKeys.DisplayLabel.name()) .value(type.getType().getDisplayName()) .key(JsonKeys.DisplayRDFSLabel.name()) .value(type.getType().getRdfsLabel()) .key(JsonKeys.DisplayRDFSComment.name()) .value(type.getType().getRdfsComment()) .key(JsonKeys.DomainId.name()) .value(domainNode.getId()) .key(JsonKeys.DomainUri.name()) .value(domainNode.getUri()) .key(JsonKeys.DisplayDomainLabel.name()) .value(domainNode.getDisplayId()) .key(JsonKeys.DomainRDFSLabel.name()) .value(domainNode.getRdfsLabel()) .key(JsonKeys.DomainRDFSComment.name()) .value(domainNode.getRdfsComment()); } else { writer .key(JsonKeys.FullType.name()) .value(domainNode.getId()) .key(JsonKeys.DisplayLabel.name()) .value(domainNode.getDisplayId()) .key(JsonKeys.DisplayRDFSLabel.name()) .value(domainNode.getRdfsLabel()) .key(JsonKeys.DisplayRDFSComment.name()) .value(domainNode.getRdfsComment()) .key(JsonKeys.DomainId.name()) .value("") .key(JsonKeys.DomainUri.name()) .value("") .key(JsonKeys.DisplayDomainLabel.name()) .value("") .key(JsonKeys.DomainRDFSLabel.name()) .value("") .key(JsonKeys.DomainRDFSComment.name()) .value(""); } // Mark the special properties writer .key(JsonKeys.isMetaProperty.name()) .value(isMetaProperty(type.getType(), alignmentColumnNode)); writer.endObject(); // Iterate through the synonym semantic types SynonymSemanticTypes synTypes = types.getSynonymTypesForHNodeId(nodeId); if (synTypes != null) { for (SemanticType synType : synTypes.getSynonyms()) { writer .object() .key(JsonKeys.HNodeId.name()) .value(synType.getHNodeId()) .key(JsonKeys.FullType.name()) .value(synType.getType().getUri()) .key(JsonKeys.Origin.name()) .value(synType.getOrigin().name()) .key(JsonKeys.ConfidenceLevel.name()) .value(synType.getConfidenceLevel().name()) .key(JsonKeys.DisplayLabel.name()) .value(synType.getType().getDisplayName()) .key(JsonKeys.DisplayRDFSLabel.name()) .value(synType.getType().getRdfsLabel()) .key(JsonKeys.DisplayRDFSComment.name()) .value(synType.getType().getRdfsComment()) .key(JsonKeys.isPrimary.name()) .value(false); if (!synType.isClass()) { writer .key(JsonKeys.DomainUri.name()) .value(synType.getDomain().getUri()) .key(JsonKeys.DomainId.name()) .value("") .key(JsonKeys.DisplayDomainLabel.name()) .value(synType.getDomain().getDisplayName()) .key(JsonKeys.DomainRDFSLabel.name()) .value(synType.getDomain().getRdfsLabel()) .key(JsonKeys.DomainRDFSComment.name()) .value(synType.getDomain().getRdfsComment()); } else { writer .key(JsonKeys.DomainId.name()) .value("") .key(JsonKeys.DomainUri.name()) .value("") .key(JsonKeys.DisplayDomainLabel.name()) .value("") .key(JsonKeys.DomainRDFSLabel.name()) .value("") .key(JsonKeys.DomainRDFSComment.name()) .value(""); } writer.endObject(); } } writer.endArray(); } else { writer.key(JsonKeys.HNodeId.name()).value(nodeId); writer.key(JsonKeys.SemanticTypesArray.name()).array().endArray(); } writer.endObject(); } writer.endArray(); writer.endObject(); pw.print(writer.toString()); } catch (JSONException e) { logger.error("Error occured while writing to JSON!", e); } }
private boolean isSamehTableId(String hNodeId1, String hNodeId2, Workspace workspace) { HNode hNode1 = workspace.getFactory().getHNode(hNodeId1); HNode hNode2 = workspace.getFactory().getHNode(hNodeId2); if (hNode1 == null || hNode2 == null) return false; return hNode1.getHTableId().equals(hNode2.getHTableId()); }
/* * Pedro * * Return an HNodeId in a nice format for printing on command logs. */ protected String formatHNodeId(Workspace workspace, String hNodeId) { return hNodeId + " (" + workspace.getFactory().getColumnName(hNodeId) + ")"; }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet worksheet = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(worksheet); HNodePath selectedPath = null; List<HNodePath> columnPaths = worksheet.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } Collection<Node> nodes = new ArrayList<Node>(); workspace .getFactory() .getWorksheet(worksheetId) .getDataTable() .collectNodes(selectedPath, nodes, selection); try { JSONArray requestJsonArray = new JSONArray(); for (Node node : nodes) { String id = node.getId(); String originalVal = node.getValue().asString(); JSONObject jsonRecord = new JSONObject(); jsonRecord.put("id", id); originalVal = originalVal == null ? "" : originalVal; jsonRecord.put("value", originalVal); requestJsonArray.put(jsonRecord); } String jsonString = null; jsonString = requestJsonArray.toString(); // String url = // "http://localhost:8080/cleaningService/IdentifyData"; // String url = "http://localhost:8070/myWS/IdentifyData"; String url = ServletContextParameterMap.getParameterValue(ContextParameter.CLEANING_SERVICE_URL); HttpClient httpclient = new DefaultHttpClient(); HttpPost httppost = null; HttpResponse response = null; HttpEntity entity; StringBuffer out = new StringBuffer(); URI u = null; u = new URI(url); List<NameValuePair> formparams = new ArrayList<NameValuePair>(); formparams.add(new BasicNameValuePair("json", jsonString)); httppost = new HttpPost(u); httppost.setEntity(new UrlEncodedFormEntity(formparams, "UTF-8")); response = httpclient.execute(httppost); entity = response.getEntity(); if (entity != null) { BufferedReader buf = new BufferedReader(new InputStreamReader(entity.getContent())); String line = buf.readLine(); while (line != null) { out.append(line); line = buf.readLine(); } } // logger.trace(out.toString()); // logger.info("Connnection success : " + url + " Successful."); final JSONObject data1 = new JSONObject(out.toString()); // logger.trace("Data--->" + data1); return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject response = new JSONObject(); // logger.trace("Reached here"); try { response.put("updateType", "CleaningServiceOutput"); response.put("chartData", data1); response.put("hNodeId", hNodeId); // logger.trace(response.toString(4)); } catch (JSONException e) { pw.print("Error"); } pw.print(response.toString()); } }); } catch (Exception e) { e.printStackTrace(); return new UpdateContainer(new ErrorUpdate("Error!")); } }