コード例 #1
0
  private void saveSemanticTypesInformation(
      Worksheet worksheet, VWorkspace vWorkspace, Collection<SemanticType> semanticTypes)
      throws JSONException {
    JSONArray typesArray = new JSONArray();

    // Add the vworksheet information
    JSONObject vwIDJObj = new JSONObject();
    vwIDJObj.put(ClientJsonKeys.name.name(), ParameterType.vWorksheetId.name());
    vwIDJObj.put(ClientJsonKeys.type.name(), ParameterType.vWorksheetId.name());
    vwIDJObj.put(ClientJsonKeys.value.name(), vWorksheetId);
    typesArray.put(vwIDJObj);

    // Add the check history information
    JSONObject chIDJObj = new JSONObject();
    chIDJObj.put(ClientJsonKeys.name.name(), ParameterType.checkHistory.name());
    chIDJObj.put(ClientJsonKeys.type.name(), ParameterType.other.name());
    chIDJObj.put(ClientJsonKeys.value.name(), false);
    typesArray.put(chIDJObj);

    for (SemanticType type : semanticTypes) {
      // Add the hNode information
      JSONObject hNodeJObj = new JSONObject();
      hNodeJObj.put(ClientJsonKeys.name.name(), ParameterType.hNodeId.name());
      hNodeJObj.put(ClientJsonKeys.type.name(), ParameterType.hNodeId.name());
      hNodeJObj.put(ClientJsonKeys.value.name(), type.getHNodeId());
      typesArray.put(hNodeJObj);

      // Add the semantic type information
      JSONObject typeJObj = new JSONObject();
      typeJObj.put(ClientJsonKeys.name.name(), ClientJsonKeys.SemanticType.name());
      typeJObj.put(ClientJsonKeys.type.name(), ParameterType.other.name());
      typeJObj.put(ClientJsonKeys.value.name(), type.getJSONArrayRepresentation());
      typesArray.put(typeJObj);
    }
    setInputParameterJson(typesArray.toString(4));
  }
コード例 #2
0
  @Override
  public void generateJson(String prefix, PrintWriter pw, VWorkspace vWorkspace) {
    Workspace workspace = vWorkspace.getWorkspace();
    alignment = AlignmentManager.Instance().getAlignment(workspace.getId(), worksheetId);
    SemanticTypes types = worksheet.getSemanticTypes();
    Map<String, ColumnNode> hNodeIdTocolumnNodeMap = createColumnNodeMap();
    Map<String, SemanticTypeNode> hNodeIdToDomainNodeMap = createDomainNodeMap();

    JSONStringer jsonStr = new JSONStringer();
    try {
      JSONWriter writer = jsonStr.object();
      writer.key("worksheetId").value(worksheetId).key("updateType").value("SemanticTypesUpdate");

      writer.key(JsonKeys.Types.name());
      writer.array();
      // Iterate through all the columns
      for (HNodePath path : worksheet.getHeaders().getAllPaths()) {
        HNode node = path.getLeaf();
        String nodeId = node.getId();

        writer.object();

        // Check if a semantic type exists for the HNode
        SemanticType type = types.getSemanticTypeForHNodeId(nodeId);
        if (type != null && type.getConfidenceLevel() != SemanticType.ConfidenceLevel.Low) {
          writer
              .key(JsonKeys.HNodeId.name())
              .value(type.getHNodeId())
              .key(JsonKeys.SemanticTypesArray.name())
              .array();

          ColumnNode alignmentColumnNode = hNodeIdTocolumnNodeMap.get(type.getHNodeId());
          SemanticTypeNode domainNode = hNodeIdToDomainNodeMap.get(type.getHNodeId());

          if (alignmentColumnNode == null || domainNode == null) {
            logger.error(
                "Column node or domain node not found in alignment."
                    + " (This should not happen conceptually!):"
                    + type);
            continue;
          }

          // Add the primary semantic type
          writer
              .object()
              .key(JsonKeys.Origin.name())
              .value(type.getOrigin().name())
              .key(JsonKeys.ConfidenceLevel.name())
              .value(type.getConfidenceLevel().name())
              .key(JsonKeys.isPrimary.name())
              .value(true);

          // Add the RDF literal type to show in the text box
          String rdfLiteralType =
              alignmentColumnNode.getRdfLiteralType() == null
                  ? ""
                  : alignmentColumnNode.getRdfLiteralType().getDisplayName();
          String language =
              alignmentColumnNode.getLanguage() == null ? "" : alignmentColumnNode.getLanguage();
          writer.key(JsonKeys.rdfLiteralType.name()).value(rdfLiteralType);
          writer.key(JsonKeys.language.name()).value(language);

          //					String domainDisplayLabel = (domainNode.getLabel().getPrefix() != null &&
          // (!domainNode.getLabel().getPrefix().equals(""))) ?
          //							(domainNode.getLabel().getPrefix() + ":" + domainNode.getLocalId()) :
          // domainNode.getLocalId();
          if (!type.isClass()) {
            writer
                .key(JsonKeys.FullType.name())
                .value(type.getType().getUri())
                .key(JsonKeys.DisplayLabel.name())
                .value(type.getType().getDisplayName())
                .key(JsonKeys.DisplayRDFSLabel.name())
                .value(type.getType().getRdfsLabel())
                .key(JsonKeys.DisplayRDFSComment.name())
                .value(type.getType().getRdfsComment())
                .key(JsonKeys.DomainId.name())
                .value(domainNode.getId())
                .key(JsonKeys.DomainUri.name())
                .value(domainNode.getUri())
                .key(JsonKeys.DisplayDomainLabel.name())
                .value(domainNode.getDisplayId())
                .key(JsonKeys.DomainRDFSLabel.name())
                .value(domainNode.getRdfsLabel())
                .key(JsonKeys.DomainRDFSComment.name())
                .value(domainNode.getRdfsComment());
          } else {
            writer
                .key(JsonKeys.FullType.name())
                .value(domainNode.getId())
                .key(JsonKeys.DisplayLabel.name())
                .value(domainNode.getDisplayId())
                .key(JsonKeys.DisplayRDFSLabel.name())
                .value(domainNode.getRdfsLabel())
                .key(JsonKeys.DisplayRDFSComment.name())
                .value(domainNode.getRdfsComment())
                .key(JsonKeys.DomainId.name())
                .value("")
                .key(JsonKeys.DomainUri.name())
                .value("")
                .key(JsonKeys.DisplayDomainLabel.name())
                .value("")
                .key(JsonKeys.DomainRDFSLabel.name())
                .value("")
                .key(JsonKeys.DomainRDFSComment.name())
                .value("");
          }

          // Mark the special properties
          writer
              .key(JsonKeys.isMetaProperty.name())
              .value(isMetaProperty(type.getType(), alignmentColumnNode));

          writer.endObject();

          // Iterate through the synonym semantic types
          SynonymSemanticTypes synTypes = types.getSynonymTypesForHNodeId(nodeId);

          if (synTypes != null) {
            for (SemanticType synType : synTypes.getSynonyms()) {
              writer
                  .object()
                  .key(JsonKeys.HNodeId.name())
                  .value(synType.getHNodeId())
                  .key(JsonKeys.FullType.name())
                  .value(synType.getType().getUri())
                  .key(JsonKeys.Origin.name())
                  .value(synType.getOrigin().name())
                  .key(JsonKeys.ConfidenceLevel.name())
                  .value(synType.getConfidenceLevel().name())
                  .key(JsonKeys.DisplayLabel.name())
                  .value(synType.getType().getDisplayName())
                  .key(JsonKeys.DisplayRDFSLabel.name())
                  .value(synType.getType().getRdfsLabel())
                  .key(JsonKeys.DisplayRDFSComment.name())
                  .value(synType.getType().getRdfsComment())
                  .key(JsonKeys.isPrimary.name())
                  .value(false);
              if (!synType.isClass()) {
                writer
                    .key(JsonKeys.DomainUri.name())
                    .value(synType.getDomain().getUri())
                    .key(JsonKeys.DomainId.name())
                    .value("")
                    .key(JsonKeys.DisplayDomainLabel.name())
                    .value(synType.getDomain().getDisplayName())
                    .key(JsonKeys.DomainRDFSLabel.name())
                    .value(synType.getDomain().getRdfsLabel())
                    .key(JsonKeys.DomainRDFSComment.name())
                    .value(synType.getDomain().getRdfsComment());
              } else {
                writer
                    .key(JsonKeys.DomainId.name())
                    .value("")
                    .key(JsonKeys.DomainUri.name())
                    .value("")
                    .key(JsonKeys.DisplayDomainLabel.name())
                    .value("")
                    .key(JsonKeys.DomainRDFSLabel.name())
                    .value("")
                    .key(JsonKeys.DomainRDFSComment.name())
                    .value("");
              }
              writer.endObject();
            }
          }
          writer.endArray();
        } else {
          writer.key(JsonKeys.HNodeId.name()).value(nodeId);
          writer.key(JsonKeys.SemanticTypesArray.name()).array().endArray();
        }

        writer.endObject();
      }
      writer.endArray();
      writer.endObject();

      pw.print(writer.toString());
    } catch (JSONException e) {
      logger.error("Error occured while writing to JSON!", e);
    }
  }
コード例 #3
0
  public void run() {
    long start = System.currentTimeMillis();
    // Find the corresponding hNodePath. Used to find examples for training the CRF Model.
    HNodePath currentColumnPath = null;
    List<HNodePath> paths = worksheet.getHeaders().getAllPaths();
    for (HNodePath path : paths) {
      if (path.getLeaf().getId().equals(newType.getHNodeId())) {
        currentColumnPath = path;
        break;
      }
    }

    Map<ColumnFeature, Collection<String>> columnFeatures =
        new HashMap<ColumnFeature, Collection<String>>();

    // Prepare the column name for training
    String columnName = currentColumnPath.getLeaf().getColumnName();
    Collection<String> columnNameList = new ArrayList<String>();
    columnNameList.add(columnName);
    columnFeatures.put(ColumnFeature.ColumnHeaderName, columnNameList);

    // Train the model with the new type
    ArrayList<String> trainingExamples =
        SemanticTypeUtil.getTrainingExamples(worksheet, currentColumnPath);
    boolean trainingResult = false;
    String newTypeString =
        (newType.getDomain() == null)
            ? newType.getType().getUri()
            : newType.getDomain().getUri() + "|" + newType.getType().getUri();

    trainingResult =
        crfModelHandler.addOrUpdateLabel(newTypeString, trainingExamples, columnFeatures);

    if (!trainingResult) {
      logger.error("Error occured while training CRF Model.");
    }
    //		logger.debug("Using type:" + newType.getDomain().getUri() + "|" +
    // newType.getType().getUri());

    // Add the new CRF column model for this column
    ArrayList<String> labels = new ArrayList<String>();
    ArrayList<Double> scores = new ArrayList<Double>();
    trainingResult =
        crfModelHandler.predictLabelForExamples(
            trainingExamples, 4, labels, scores, null, columnFeatures);
    if (!trainingResult) {
      logger.error("Error occured while predicting labels");
    }
    CRFColumnModel newModel = new CRFColumnModel(labels, scores);
    worksheet.getCrfModel().addColumnModel(newType.getHNodeId(), newModel);

    long elapsedTimeMillis = System.currentTimeMillis() - start;
    float elapsedTimeSec = elapsedTimeMillis / 1000F;
    logger.info("Time required for training the semantic type: " + elapsedTimeSec);

    //		long t2 = System.currentTimeMillis();

    // Identify the outliers for the column
    //		SemanticTypeUtil.identifyOutliers(worksheet, newTypeString,currentColumnPath,
    // vWorkspace.getWorkspace().getTagsContainer()
    //				.getTag(TagName.Outlier), columnFeatures, crfModelHandler);

    //		long t3 = System.currentTimeMillis();
    //		logger.info("Identify outliers: "+ (t3-t2));
  }