private String getTemplateTermSetPopulatedWithValues( Map<String, String> columnValues, TemplateTermSet termSet) throws ValueNotFoundKarmaException, NoValueFoundInNodeException { StringBuilder output = new StringBuilder(); for (TemplateTerm term : termSet.getAllTerms()) { // String template term if (term instanceof StringTemplateTerm) { output.append(term.getTemplateTermValue()); } // Column template term else if (term instanceof ColumnTemplateTerm) { String hNodeId = term.getTemplateTermValue(); if (columnValues.containsKey(hNodeId)) { Node node = factory.getNode(columnValues.get(hNodeId)); if (node != null) { if (node.getValue().asString() == null || node.getValue().asString().equals("")) { throw new NoValueFoundInNodeException(); } output.append(node.getValue().asString()); } } else { String columnName = this.factory.getHNode(hNodeId).getColumnName(); throw new ValueNotFoundKarmaException( "Could not retrieve value from column: " + columnName + ".", hNodeId); } } } return output.toString(); }
@Override public UpdateContainer undoIt(Workspace workspace) { Node node = workspace.getFactory().getNode(nodeIdArg); SuperSelection sel = getSuperSelection(workspace); node.setValue(previousValue, previousStatus, workspace.getFactory()); UpdateContainer uc = WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates( worksheetId, sel); uc.add(new NodeChangedUpdate(worksheetId, nodeIdArg, previousValue, previousStatus)); return uc; }
private void generateTriplesForCell( Node node, Set<String> existingTopRowTriples, String hNodeId, Set<String> predicatesCovered, Map<String, ReportMessage> predicatesFailed) { Map<String, String> columnValues = node.getColumnValues(); List<PredicateObjectMap> pomList = this.auxInfo.getHNodeIdToPredObjLinks().get(hNodeId); if (pomList == null || pomList.isEmpty()) return; List<TriplesMap> toBeProcessedTriplesMap = new LinkedList<TriplesMap>(); for (PredicateObjectMap pom : pomList) { toBeProcessedTriplesMap.add(pom.getTriplesMap()); } Set<String> alreadyProcessedTriplesMapIds = new HashSet<String>(); while (!toBeProcessedTriplesMap.isEmpty()) { TriplesMap trMap = toBeProcessedTriplesMap.remove(0); boolean dontAddNeighboringMaps = false; // Generate properties for the triple maps for (PredicateObjectMap pom : trMap.getPredicateObjectMaps()) { if (!predicatesCovered.contains(pom.getPredicate().getId())) { generatePropertyForPredObjMap( pom, columnValues, predicatesCovered, existingTopRowTriples, hNodeId, predicatesFailed); } } // Need to stop at the root if (trMap.getSubject().isSteinerTreeRootNode()) { dontAddNeighboringMaps = true; } List<TriplesMapLink> neighboringLinks = this.auxInfo.getTriplesMapGraph().getAllNeighboringTriplesMap(trMap.getId()); for (TriplesMapLink trMapLink : neighboringLinks) { if (predicatesCovered.contains( trMapLink.getPredicateObjectMapLink().getPredicate().getId())) continue; // Add the other triplesMap in queue to be processed later if (!alreadyProcessedTriplesMapIds.contains(trMapLink.getSourceMap().getId()) && !dontAddNeighboringMaps) { toBeProcessedTriplesMap.add(trMapLink.getSourceMap()); } if (!alreadyProcessedTriplesMapIds.contains(trMapLink.getTargetMap().getId()) && !dontAddNeighboringMaps) { toBeProcessedTriplesMap.add(trMapLink.getTargetMap()); } } alreadyProcessedTriplesMapIds.add(trMap.getId()); } }
private HTable addNestedHTable(HNode hNode, String key, Row row) { HTable ht = hNode.getNestedTable(); if (ht == null) { ht = hNode.addNestedTable(createNestedTableName(key), getWorksheet(), getFactory()); // Check for all the nodes that have value and nested tables Collection<Node> nodes = new ArrayList<Node>(); getWorksheet().getDataTable().collectNodes(hNode.getHNodePath(getFactory()), nodes); for (Node node : nodes) { if (node.getBelongsToRow() == row) break; // Add an empty row for each nested table that does not have any row if (node.getNestedTable().getNumRows() == 0) { addEmptyRow(node.getNestedTable(), hNode); } } } return ht; }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet wk = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(wk); String Msg = String.format("begin, Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(Msg); // Get the HNode HashMap<String, HashMap<String, String>> rows = new HashMap<String, HashMap<String, String>>(); HNodePath selectedPath = null; List<HNodePath> columnPaths = wk.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } // random nodes Collection<Node> nodes = new ArrayList<Node>(); wk.getDataTable().collectNodes(selectedPath, nodes, selection); HashSet<Integer> indSet = this.obtainIndexs(nodes.size()); int index = 0; for (Iterator<Node> iterator = nodes.iterator(); iterator.hasNext(); ) { Node node = iterator.next(); if (indSet.contains(index)) { String id = node.getId(); String originalVal = node.getValue().asString(); HashMap<String, String> x = new HashMap<String, String>(); x.put("Org", originalVal); x.put("Tar", originalVal); x.put("Orgdis", originalVal); x.put("Tardis", originalVal); rows.put(id, x); } index++; } Msg = String.format("end, Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(Msg); return new UpdateContainer(new FetchResultUpdate(hNodeId, rows)); }
private void generateTriplesForRow( Row row, Set<String> existingTopRowTriples, Set<String> predicatesCovered, Map<String, ReportMessage> predicatesFailed) { Map<String, Node> rowNodes = row.getNodesMap(); for (String hNodeId : rowNodes.keySet()) { Node rowNode = rowNodes.get(hNodeId); if (rowNode.hasNestedTable()) { Table rowNodeTable = rowNode.getNestedTable(); if (rowNodeTable != null) { for (Row nestedTableRow : rowNodeTable.getRows(0, rowNodeTable.getNumRows())) { Set<String> rowPredicatesCovered = new HashSet<String>(); generateTriplesForRow( nestedTableRow, existingTopRowTriples, rowPredicatesCovered, predicatesFailed); } } } else { generateTriplesForCell( rowNode, existingTopRowTriples, hNodeId, predicatesCovered, predicatesFailed); } } }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Node node = workspace.getFactory().getNode(nodeIdArg); SuperSelection sel = getSuperSelection(workspace); inputColumns.clear(); outputColumns.clear(); inputColumns.add(node.getHNodeId()); outputColumns.add(node.getHNodeId()); previousValue = node.getValue(); previousStatus = node.getStatus(); if (node.hasNestedTable()) { throw new CommandException( this, "Cell " + nodeIdArg + " has a nested table. It cannot be edited."); } node.setValue(newValueArg, Node.NodeStatus.edited, workspace.getFactory()); WorksheetUpdateFactory.detectSelectionStatusChange(worksheetId, workspace, this); UpdateContainer uc = WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates( worksheetId, sel); uc.add(new NodeChangedUpdate(worksheetId, nodeIdArg, newValueArg, Node.NodeStatus.edited)); return uc; }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet wk = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(wk); String msg = String.format( "Gen rule start,Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(msg); // Get the HNode HashMap<String, String> rows = new HashMap<String, String>(); HashMap<String, Integer> amb = new HashMap<String, Integer>(); HNodePath selectedPath = null; List<HNodePath> columnPaths = wk.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } Collection<Node> nodes = new ArrayList<Node>(); wk.getDataTable().collectNodes(selectedPath, nodes, selection); for (Node node : nodes) { String id = node.getId(); if (!this.nodeIds.contains(id)) continue; String originalVal = node.getValue().asString(); rows.put(id, originalVal); this.compResultString += originalVal + "\n"; calAmbScore(id, originalVal, amb); } RamblerValueCollection vc = new RamblerValueCollection(rows); HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>(); inputs = new RamblerTransformationInputs(examples, vc); // generate the program boolean results = false; int iterNum = 0; RamblerTransformationOutput rtf = null; // initialize the vocabulary Iterator<String> iterx = inputs.getInputValues().getValues().iterator(); Vector<String> v = new Vector<String>(); int vb_cnt = 0; while (iterx.hasNext() && vb_cnt < 30) { String eString = iterx.next(); v.add(eString); vb_cnt++; } Vector<String> vob = UtilTools.buildDict(v); inputs.setVocab(vob.toArray(new String[vob.size()])); while (iterNum < 1 && !results) // try to find an program within iterNum { rtf = new RamblerTransformationOutput(inputs); if (rtf.getTransformations().keySet().size() > 0) { results = true; } iterNum++; } Iterator<String> iter = rtf.getTransformations().keySet().iterator(); // id:{org: tar: orgdis: tardis: } HashMap<String, HashMap<String, String>> resdata = new HashMap<String, HashMap<String, String>>(); HashSet<String> keys = new HashSet<String>(); while (iter.hasNext()) { String tpid = iter.next(); ValueCollection rvco = rtf.getTransformedValues_debug(tpid); if (rvco == null) continue; // constructing displaying data HashMap<String, String[]> xyzHashMap = new HashMap<String, String[]>(); for (String key : rvco.getNodeIDs()) { HashMap<String, String> dict = new HashMap<String, String>(); // add to the example selection boolean isExp = false; String org = vc.getValue(key); String classLabel = rvco.getClass(key); String pretar = rvco.getValue(key); String dummyValue = pretar; if (pretar.indexOf("_FATAL_ERROR_") != -1) { dummyValue = org; // dummyValue = "#ERROR"; } try { UtilTools.StringColorCode(org, dummyValue, dict); } catch (Exception ex) { logger.info(String.format("ColorCoding Exception%s, %s", org, dummyValue)); // set dict dict.put("Org", org); dict.put("Tar", "ERROR"); dict.put("Orgdis", org); dict.put("Tardis", "ERROR"); } for (TransformationExample exp : examples) { if (exp.getNodeId().compareTo(key) == 0) { if (!expFeData.containsKey(classLabel)) { Vector<String[]> vstr = new Vector<String[]>(); String[] texp = {dict.get("Org"), pretar}; vstr.add(texp); expFeData.put(classLabel, vstr); } else { String[] texp = {dict.get("Org"), pretar}; expFeData.get(classLabel).add(texp); } isExp = true; } } if (!isExp) { String[] pair = {dict.get("Org"), dict.get("Tar"), pretar, classLabel}; xyzHashMap.put(key, pair); } resdata.put(key, dict); } if (!rtf.nullRule) keys.add(getBestExample(xyzHashMap, expFeData)); } // find the best row String vars = ""; String expstr = ""; String recmd = ""; for (TransformationExample x : examples) { expstr += String.format("%s|%s", x.getBefore(), x.getAfter()); } expstr += "|"; if (rtf.nullRule) { keys.clear(); // keys.add("-2"); // "-2 indicates null rule" } if (!resdata.isEmpty() && !rtf.nullRule) { recmd = resdata.get(keys.iterator().next()).get("Org"); } else { recmd = ""; } msg = String.format( "Gen rule end, Time,%d, Worksheet,%s,Examples:%s,Recmd:%s", System.currentTimeMillis(), worksheetId, expstr, recmd); logger.info(msg); return new UpdateContainer(new CleaningResultUpdate(hNodeId, resdata, vars, keys)); }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet worksheet = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(worksheet); HNodePath selectedPath = null; List<HNodePath> columnPaths = worksheet.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } Collection<Node> nodes = new ArrayList<Node>(); workspace .getFactory() .getWorksheet(worksheetId) .getDataTable() .collectNodes(selectedPath, nodes, selection); try { JSONArray requestJsonArray = new JSONArray(); for (Node node : nodes) { String id = node.getId(); String originalVal = node.getValue().asString(); JSONObject jsonRecord = new JSONObject(); jsonRecord.put("id", id); originalVal = originalVal == null ? "" : originalVal; jsonRecord.put("value", originalVal); requestJsonArray.put(jsonRecord); } String jsonString = null; jsonString = requestJsonArray.toString(); // String url = // "http://localhost:8080/cleaningService/IdentifyData"; // String url = "http://localhost:8070/myWS/IdentifyData"; String url = ServletContextParameterMap.getParameterValue(ContextParameter.CLEANING_SERVICE_URL); HttpClient httpclient = new DefaultHttpClient(); HttpPost httppost = null; HttpResponse response = null; HttpEntity entity; StringBuffer out = new StringBuffer(); URI u = null; u = new URI(url); List<NameValuePair> formparams = new ArrayList<NameValuePair>(); formparams.add(new BasicNameValuePair("json", jsonString)); httppost = new HttpPost(u); httppost.setEntity(new UrlEncodedFormEntity(formparams, "UTF-8")); response = httpclient.execute(httppost); entity = response.getEntity(); if (entity != null) { BufferedReader buf = new BufferedReader(new InputStreamReader(entity.getContent())); String line = buf.readLine(); while (line != null) { out.append(line); line = buf.readLine(); } } // logger.trace(out.toString()); // logger.info("Connnection success : " + url + " Successful."); final JSONObject data1 = new JSONObject(out.toString()); // logger.trace("Data--->" + data1); return new UpdateContainer( new AbstractUpdate() { @Override public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) { JSONObject response = new JSONObject(); // logger.trace("Reached here"); try { response.put("updateType", "CleaningServiceOutput"); response.put("chartData", data1); response.put("hNodeId", hNodeId); // logger.trace(response.toString(4)); } catch (JSONException e) { pw.print("Error"); } pw.print(response.toString()); } }); } catch (Exception e) { e.printStackTrace(); return new UpdateContainer(new ErrorUpdate("Error!")); } }