public void updateCandiScore( ValueCollection rvco, HashMap<String, HashMap<String, Integer>> values) { Iterator<String> ids = rvco.getNodeIDs().iterator(); while (ids.hasNext()) { String id = ids.next(); String value = rvco.getValue(id); HashMap<String, Integer> dict; if (values.containsKey(id)) { dict = values.get(id); } else { dict = new HashMap<String, Integer>(); values.put(id, dict); } if (dict.containsKey(value)) { dict.put(value, dict.get(value) + 1); } else { dict.put(value, 1); } } return; }
@Override public UpdateContainer doIt(Workspace workspace) throws CommandException { Worksheet wk = workspace.getWorksheet(worksheetId); SuperSelection selection = getSuperSelection(wk); String msg = String.format( "Gen rule start,Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId); logger.info(msg); // Get the HNode HashMap<String, String> rows = new HashMap<String, String>(); HashMap<String, Integer> amb = new HashMap<String, Integer>(); HNodePath selectedPath = null; List<HNodePath> columnPaths = wk.getHeaders().getAllPaths(); for (HNodePath path : columnPaths) { if (path.getLeaf().getId().equals(hNodeId)) { selectedPath = path; } } Collection<Node> nodes = new ArrayList<Node>(); wk.getDataTable().collectNodes(selectedPath, nodes, selection); for (Node node : nodes) { String id = node.getId(); if (!this.nodeIds.contains(id)) continue; String originalVal = node.getValue().asString(); rows.put(id, originalVal); this.compResultString += originalVal + "\n"; calAmbScore(id, originalVal, amb); } RamblerValueCollection vc = new RamblerValueCollection(rows); HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>(); inputs = new RamblerTransformationInputs(examples, vc); // generate the program boolean results = false; int iterNum = 0; RamblerTransformationOutput rtf = null; // initialize the vocabulary Iterator<String> iterx = inputs.getInputValues().getValues().iterator(); Vector<String> v = new Vector<String>(); int vb_cnt = 0; while (iterx.hasNext() && vb_cnt < 30) { String eString = iterx.next(); v.add(eString); vb_cnt++; } Vector<String> vob = UtilTools.buildDict(v); inputs.setVocab(vob.toArray(new String[vob.size()])); while (iterNum < 1 && !results) // try to find an program within iterNum { rtf = new RamblerTransformationOutput(inputs); if (rtf.getTransformations().keySet().size() > 0) { results = true; } iterNum++; } Iterator<String> iter = rtf.getTransformations().keySet().iterator(); // id:{org: tar: orgdis: tardis: } HashMap<String, HashMap<String, String>> resdata = new HashMap<String, HashMap<String, String>>(); HashSet<String> keys = new HashSet<String>(); while (iter.hasNext()) { String tpid = iter.next(); ValueCollection rvco = rtf.getTransformedValues_debug(tpid); if (rvco == null) continue; // constructing displaying data HashMap<String, String[]> xyzHashMap = new HashMap<String, String[]>(); for (String key : rvco.getNodeIDs()) { HashMap<String, String> dict = new HashMap<String, String>(); // add to the example selection boolean isExp = false; String org = vc.getValue(key); String classLabel = rvco.getClass(key); String pretar = rvco.getValue(key); String dummyValue = pretar; if (pretar.indexOf("_FATAL_ERROR_") != -1) { dummyValue = org; // dummyValue = "#ERROR"; } try { UtilTools.StringColorCode(org, dummyValue, dict); } catch (Exception ex) { logger.info(String.format("ColorCoding Exception%s, %s", org, dummyValue)); // set dict dict.put("Org", org); dict.put("Tar", "ERROR"); dict.put("Orgdis", org); dict.put("Tardis", "ERROR"); } for (TransformationExample exp : examples) { if (exp.getNodeId().compareTo(key) == 0) { if (!expFeData.containsKey(classLabel)) { Vector<String[]> vstr = new Vector<String[]>(); String[] texp = {dict.get("Org"), pretar}; vstr.add(texp); expFeData.put(classLabel, vstr); } else { String[] texp = {dict.get("Org"), pretar}; expFeData.get(classLabel).add(texp); } isExp = true; } } if (!isExp) { String[] pair = {dict.get("Org"), dict.get("Tar"), pretar, classLabel}; xyzHashMap.put(key, pair); } resdata.put(key, dict); } if (!rtf.nullRule) keys.add(getBestExample(xyzHashMap, expFeData)); } // find the best row String vars = ""; String expstr = ""; String recmd = ""; for (TransformationExample x : examples) { expstr += String.format("%s|%s", x.getBefore(), x.getAfter()); } expstr += "|"; if (rtf.nullRule) { keys.clear(); // keys.add("-2"); // "-2 indicates null rule" } if (!resdata.isEmpty() && !rtf.nullRule) { recmd = resdata.get(keys.iterator().next()).get("Org"); } else { recmd = ""; } msg = String.format( "Gen rule end, Time,%d, Worksheet,%s,Examples:%s,Recmd:%s", System.currentTimeMillis(), worksheetId, expstr, recmd); logger.info(msg); return new UpdateContainer(new CleaningResultUpdate(hNodeId, resdata, vars, keys)); }