private String getTemplateTermSetPopulatedWithValues(
     Map<String, String> columnValues, TemplateTermSet termSet)
     throws ValueNotFoundKarmaException, NoValueFoundInNodeException {
   StringBuilder output = new StringBuilder();
   for (TemplateTerm term : termSet.getAllTerms()) {
     // String template term
     if (term instanceof StringTemplateTerm) {
       output.append(term.getTemplateTermValue());
     }
     // Column template term
     else if (term instanceof ColumnTemplateTerm) {
       String hNodeId = term.getTemplateTermValue();
       if (columnValues.containsKey(hNodeId)) {
         Node node = factory.getNode(columnValues.get(hNodeId));
         if (node != null) {
           if (node.getValue().asString() == null || node.getValue().asString().equals("")) {
             throw new NoValueFoundInNodeException();
           }
           output.append(node.getValue().asString());
         }
       } else {
         String columnName = this.factory.getHNode(hNodeId).getColumnName();
         throw new ValueNotFoundKarmaException(
             "Could not retrieve value from column: " + columnName + ".", hNodeId);
       }
     }
   }
   return output.toString();
 }
Пример #2
0
 @Override
 public UpdateContainer undoIt(Workspace workspace) {
   Node node = workspace.getFactory().getNode(nodeIdArg);
   SuperSelection sel = getSuperSelection(workspace);
   node.setValue(previousValue, previousStatus, workspace.getFactory());
   UpdateContainer uc =
       WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates(
           worksheetId, sel);
   uc.add(new NodeChangedUpdate(worksheetId, nodeIdArg, previousValue, previousStatus));
   return uc;
 }
  private void generateTriplesForCell(
      Node node,
      Set<String> existingTopRowTriples,
      String hNodeId,
      Set<String> predicatesCovered,
      Map<String, ReportMessage> predicatesFailed) {
    Map<String, String> columnValues = node.getColumnValues();
    List<PredicateObjectMap> pomList = this.auxInfo.getHNodeIdToPredObjLinks().get(hNodeId);
    if (pomList == null || pomList.isEmpty()) return;

    List<TriplesMap> toBeProcessedTriplesMap = new LinkedList<TriplesMap>();
    for (PredicateObjectMap pom : pomList) {
      toBeProcessedTriplesMap.add(pom.getTriplesMap());
    }

    Set<String> alreadyProcessedTriplesMapIds = new HashSet<String>();
    while (!toBeProcessedTriplesMap.isEmpty()) {
      TriplesMap trMap = toBeProcessedTriplesMap.remove(0);
      boolean dontAddNeighboringMaps = false;

      // Generate properties for the triple maps
      for (PredicateObjectMap pom : trMap.getPredicateObjectMaps()) {
        if (!predicatesCovered.contains(pom.getPredicate().getId())) {
          generatePropertyForPredObjMap(
              pom,
              columnValues,
              predicatesCovered,
              existingTopRowTriples,
              hNodeId,
              predicatesFailed);
        }
      }

      // Need to stop at the root
      if (trMap.getSubject().isSteinerTreeRootNode()) {
        dontAddNeighboringMaps = true;
      }

      List<TriplesMapLink> neighboringLinks =
          this.auxInfo.getTriplesMapGraph().getAllNeighboringTriplesMap(trMap.getId());

      for (TriplesMapLink trMapLink : neighboringLinks) {
        if (predicatesCovered.contains(
            trMapLink.getPredicateObjectMapLink().getPredicate().getId())) continue;

        // Add the other triplesMap in queue to be processed later
        if (!alreadyProcessedTriplesMapIds.contains(trMapLink.getSourceMap().getId())
            && !dontAddNeighboringMaps) {
          toBeProcessedTriplesMap.add(trMapLink.getSourceMap());
        }

        if (!alreadyProcessedTriplesMapIds.contains(trMapLink.getTargetMap().getId())
            && !dontAddNeighboringMaps) {
          toBeProcessedTriplesMap.add(trMapLink.getTargetMap());
        }
      }
      alreadyProcessedTriplesMapIds.add(trMap.getId());
    }
  }
Пример #4
0
  private HTable addNestedHTable(HNode hNode, String key, Row row) {
    HTable ht = hNode.getNestedTable();
    if (ht == null) {
      ht = hNode.addNestedTable(createNestedTableName(key), getWorksheet(), getFactory());

      // Check for all the nodes that have value and nested tables
      Collection<Node> nodes = new ArrayList<Node>();
      getWorksheet().getDataTable().collectNodes(hNode.getHNodePath(getFactory()), nodes);
      for (Node node : nodes) {
        if (node.getBelongsToRow() == row) break;

        // Add an empty row for each nested table that does not have any row
        if (node.getNestedTable().getNumRows() == 0) {
          addEmptyRow(node.getNestedTable(), hNode);
        }
      }
    }
    return ht;
  }
 @Override
 public UpdateContainer doIt(Workspace workspace) throws CommandException {
   Worksheet wk = workspace.getWorksheet(worksheetId);
   SuperSelection selection = getSuperSelection(wk);
   String Msg =
       String.format("begin, Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId);
   logger.info(Msg);
   // Get the HNode
   HashMap<String, HashMap<String, String>> rows = new HashMap<String, HashMap<String, String>>();
   HNodePath selectedPath = null;
   List<HNodePath> columnPaths = wk.getHeaders().getAllPaths();
   for (HNodePath path : columnPaths) {
     if (path.getLeaf().getId().equals(hNodeId)) {
       selectedPath = path;
     }
   }
   // random nodes
   Collection<Node> nodes = new ArrayList<Node>();
   wk.getDataTable().collectNodes(selectedPath, nodes, selection);
   HashSet<Integer> indSet = this.obtainIndexs(nodes.size());
   int index = 0;
   for (Iterator<Node> iterator = nodes.iterator(); iterator.hasNext(); ) {
     Node node = iterator.next();
     if (indSet.contains(index)) {
       String id = node.getId();
       String originalVal = node.getValue().asString();
       HashMap<String, String> x = new HashMap<String, String>();
       x.put("Org", originalVal);
       x.put("Tar", originalVal);
       x.put("Orgdis", originalVal);
       x.put("Tardis", originalVal);
       rows.put(id, x);
     }
     index++;
   }
   Msg = String.format("end, Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId);
   logger.info(Msg);
   return new UpdateContainer(new FetchResultUpdate(hNodeId, rows));
 }
 private void generateTriplesForRow(
     Row row,
     Set<String> existingTopRowTriples,
     Set<String> predicatesCovered,
     Map<String, ReportMessage> predicatesFailed) {
   Map<String, Node> rowNodes = row.getNodesMap();
   for (String hNodeId : rowNodes.keySet()) {
     Node rowNode = rowNodes.get(hNodeId);
     if (rowNode.hasNestedTable()) {
       Table rowNodeTable = rowNode.getNestedTable();
       if (rowNodeTable != null) {
         for (Row nestedTableRow : rowNodeTable.getRows(0, rowNodeTable.getNumRows())) {
           Set<String> rowPredicatesCovered = new HashSet<String>();
           generateTriplesForRow(
               nestedTableRow, existingTopRowTriples, rowPredicatesCovered, predicatesFailed);
         }
       }
     } else {
       generateTriplesForCell(
           rowNode, existingTopRowTriples, hNodeId, predicatesCovered, predicatesFailed);
     }
   }
 }
Пример #7
0
 @Override
 public UpdateContainer doIt(Workspace workspace) throws CommandException {
   Node node = workspace.getFactory().getNode(nodeIdArg);
   SuperSelection sel = getSuperSelection(workspace);
   inputColumns.clear();
   outputColumns.clear();
   inputColumns.add(node.getHNodeId());
   outputColumns.add(node.getHNodeId());
   previousValue = node.getValue();
   previousStatus = node.getStatus();
   if (node.hasNestedTable()) {
     throw new CommandException(
         this, "Cell " + nodeIdArg + " has a nested table. It cannot be edited.");
   }
   node.setValue(newValueArg, Node.NodeStatus.edited, workspace.getFactory());
   WorksheetUpdateFactory.detectSelectionStatusChange(worksheetId, workspace, this);
   UpdateContainer uc =
       WorksheetUpdateFactory.createWorksheetHierarchicalAndCleaningResultsUpdates(
           worksheetId, sel);
   uc.add(new NodeChangedUpdate(worksheetId, nodeIdArg, newValueArg, Node.NodeStatus.edited));
   return uc;
 }
  @Override
  public UpdateContainer doIt(Workspace workspace) throws CommandException {
    Worksheet wk = workspace.getWorksheet(worksheetId);
    SuperSelection selection = getSuperSelection(wk);
    String msg =
        String.format(
            "Gen rule start,Time,%d, Worksheet,%s", System.currentTimeMillis(), worksheetId);
    logger.info(msg);
    // Get the HNode
    HashMap<String, String> rows = new HashMap<String, String>();
    HashMap<String, Integer> amb = new HashMap<String, Integer>();
    HNodePath selectedPath = null;
    List<HNodePath> columnPaths = wk.getHeaders().getAllPaths();
    for (HNodePath path : columnPaths) {
      if (path.getLeaf().getId().equals(hNodeId)) {
        selectedPath = path;
      }
    }
    Collection<Node> nodes = new ArrayList<Node>();
    wk.getDataTable().collectNodes(selectedPath, nodes, selection);
    for (Node node : nodes) {
      String id = node.getId();
      if (!this.nodeIds.contains(id)) continue;
      String originalVal = node.getValue().asString();
      rows.put(id, originalVal);
      this.compResultString += originalVal + "\n";
      calAmbScore(id, originalVal, amb);
    }
    RamblerValueCollection vc = new RamblerValueCollection(rows);
    HashMap<String, Vector<String[]>> expFeData = new HashMap<String, Vector<String[]>>();
    inputs = new RamblerTransformationInputs(examples, vc);
    // generate the program
    boolean results = false;
    int iterNum = 0;
    RamblerTransformationOutput rtf = null;
    // initialize the vocabulary
    Iterator<String> iterx = inputs.getInputValues().getValues().iterator();
    Vector<String> v = new Vector<String>();
    int vb_cnt = 0;
    while (iterx.hasNext() && vb_cnt < 30) {
      String eString = iterx.next();
      v.add(eString);
      vb_cnt++;
    }
    Vector<String> vob = UtilTools.buildDict(v);
    inputs.setVocab(vob.toArray(new String[vob.size()]));
    while (iterNum < 1 && !results) // try to find an program within iterNum
    {
      rtf = new RamblerTransformationOutput(inputs);
      if (rtf.getTransformations().keySet().size() > 0) {
        results = true;
      }
      iterNum++;
    }
    Iterator<String> iter = rtf.getTransformations().keySet().iterator();
    // id:{org: tar: orgdis: tardis: }
    HashMap<String, HashMap<String, String>> resdata =
        new HashMap<String, HashMap<String, String>>();
    HashSet<String> keys = new HashSet<String>();
    while (iter.hasNext()) {
      String tpid = iter.next();
      ValueCollection rvco = rtf.getTransformedValues_debug(tpid);
      if (rvco == null) continue;
      // constructing displaying data
      HashMap<String, String[]> xyzHashMap = new HashMap<String, String[]>();
      for (String key : rvco.getNodeIDs()) {
        HashMap<String, String> dict = new HashMap<String, String>();
        // add to the example selection
        boolean isExp = false;
        String org = vc.getValue(key);
        String classLabel = rvco.getClass(key);
        String pretar = rvco.getValue(key);
        String dummyValue = pretar;
        if (pretar.indexOf("_FATAL_ERROR_") != -1) {
          dummyValue = org;
          // dummyValue = "#ERROR";
        }
        try {
          UtilTools.StringColorCode(org, dummyValue, dict);
        } catch (Exception ex) {
          logger.info(String.format("ColorCoding Exception%s, %s", org, dummyValue));
          // set dict
          dict.put("Org", org);
          dict.put("Tar", "ERROR");
          dict.put("Orgdis", org);
          dict.put("Tardis", "ERROR");
        }
        for (TransformationExample exp : examples) {
          if (exp.getNodeId().compareTo(key) == 0) {
            if (!expFeData.containsKey(classLabel)) {
              Vector<String[]> vstr = new Vector<String[]>();
              String[] texp = {dict.get("Org"), pretar};
              vstr.add(texp);
              expFeData.put(classLabel, vstr);
            } else {
              String[] texp = {dict.get("Org"), pretar};
              expFeData.get(classLabel).add(texp);
            }
            isExp = true;
          }
        }

        if (!isExp) {
          String[] pair = {dict.get("Org"), dict.get("Tar"), pretar, classLabel};
          xyzHashMap.put(key, pair);
        }
        resdata.put(key, dict);
      }
      if (!rtf.nullRule) keys.add(getBestExample(xyzHashMap, expFeData));
    }
    // find the best row
    String vars = "";
    String expstr = "";
    String recmd = "";
    for (TransformationExample x : examples) {
      expstr += String.format("%s|%s", x.getBefore(), x.getAfter());
    }
    expstr += "|";
    if (rtf.nullRule) {
      keys.clear();
      // keys.add("-2"); // "-2 indicates null rule"
    }
    if (!resdata.isEmpty() && !rtf.nullRule) {
      recmd = resdata.get(keys.iterator().next()).get("Org");
    } else {
      recmd = "";
    }
    msg =
        String.format(
            "Gen rule end, Time,%d, Worksheet,%s,Examples:%s,Recmd:%s",
            System.currentTimeMillis(), worksheetId, expstr, recmd);
    logger.info(msg);
    return new UpdateContainer(new CleaningResultUpdate(hNodeId, resdata, vars, keys));
  }
  @Override
  public UpdateContainer doIt(Workspace workspace) throws CommandException {
    Worksheet worksheet = workspace.getWorksheet(worksheetId);
    SuperSelection selection = getSuperSelection(worksheet);
    HNodePath selectedPath = null;
    List<HNodePath> columnPaths = worksheet.getHeaders().getAllPaths();
    for (HNodePath path : columnPaths) {
      if (path.getLeaf().getId().equals(hNodeId)) {
        selectedPath = path;
      }
    }
    Collection<Node> nodes = new ArrayList<Node>();
    workspace
        .getFactory()
        .getWorksheet(worksheetId)
        .getDataTable()
        .collectNodes(selectedPath, nodes, selection);

    try {
      JSONArray requestJsonArray = new JSONArray();
      for (Node node : nodes) {
        String id = node.getId();
        String originalVal = node.getValue().asString();
        JSONObject jsonRecord = new JSONObject();
        jsonRecord.put("id", id);
        originalVal = originalVal == null ? "" : originalVal;
        jsonRecord.put("value", originalVal);
        requestJsonArray.put(jsonRecord);
      }
      String jsonString = null;
      jsonString = requestJsonArray.toString();

      // String url =
      // "http://localhost:8080/cleaningService/IdentifyData";
      //			String url = "http://localhost:8070/myWS/IdentifyData";
      String url =
          ServletContextParameterMap.getParameterValue(ContextParameter.CLEANING_SERVICE_URL);

      HttpClient httpclient = new DefaultHttpClient();
      HttpPost httppost = null;
      HttpResponse response = null;
      HttpEntity entity;
      StringBuffer out = new StringBuffer();

      URI u = null;
      u = new URI(url);
      List<NameValuePair> formparams = new ArrayList<NameValuePair>();
      formparams.add(new BasicNameValuePair("json", jsonString));

      httppost = new HttpPost(u);
      httppost.setEntity(new UrlEncodedFormEntity(formparams, "UTF-8"));
      response = httpclient.execute(httppost);
      entity = response.getEntity();
      if (entity != null) {
        BufferedReader buf = new BufferedReader(new InputStreamReader(entity.getContent()));
        String line = buf.readLine();
        while (line != null) {
          out.append(line);
          line = buf.readLine();
        }
      }
      // logger.trace(out.toString());
      // logger.info("Connnection success : " + url + " Successful.");
      final JSONObject data1 = new JSONObject(out.toString());
      // logger.trace("Data--->" + data1);
      return new UpdateContainer(
          new AbstractUpdate() {

            @Override
            public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) {
              JSONObject response = new JSONObject();
              // logger.trace("Reached here");
              try {
                response.put("updateType", "CleaningServiceOutput");
                response.put("chartData", data1);
                response.put("hNodeId", hNodeId);
                // logger.trace(response.toString(4));
              } catch (JSONException e) {
                pw.print("Error");
              }

              pw.print(response.toString());
            }
          });
    } catch (Exception e) {
      e.printStackTrace();
      return new UpdateContainer(new ErrorUpdate("Error!"));
    }
  }