Example #1
0
  protected static void writeLabelBindings(Writer writer, ARFFModel arffModel, String delimiter)
      throws IOException {

    Map<String, Integer> labels = arffModel.getLabelBindings();
    writer.write("Label bindings for Relation " + arffModel.getRelation() + '\n');
    for (Map.Entry<String, Integer> entry : labels.entrySet()) {
      writer.write(entry.getKey());
      writer.write(delimiter);
      writer.write(String.valueOf(entry.getValue()));
      writer.write('\n');
    }
    writer.write('\n');
    writer.write("Values for nominal attributes\n");
    // emit allowed values for NOMINAL/categorical/enumerated attributes
    Map<String, Map<String, Integer>> nominalMap = arffModel.getNominalMap();
    // how many nominal attributes
    writer.write(String.valueOf(nominalMap.size()) + "\n");

    for (Entry<String, Map<String, Integer>> entry : nominalMap.entrySet()) {
      // the label of this attribute
      writer.write(entry.getKey() + "\n");
      Set<Entry<String, Integer>> attributeValues = entry.getValue().entrySet();
      // how many values does this attribute have
      writer.write(attributeValues.size() + "\n");
      for (Map.Entry<String, Integer> value : attributeValues) {
        // the value and the value index
        writer.write(
            String.format("%s%s%s\n", value.getKey(), delimiter, value.getValue().toString()));
      }
    }
  }
Example #2
0
  protected static void writeFile(
      String outDir,
      File file,
      long maxDocs,
      ARFFModel arffModel,
      File dictOut,
      String delimiter,
      boolean jsonDictonary)
      throws IOException {
    log.info("Converting File: {}", file);
    ARFFModel model =
        new MapBackedARFFModel(
            arffModel.getWords(), arffModel.getWordCount() + 1, arffModel.getNominalMap());
    Iterable<Vector> iteratable = new ARFFVectorIterable(file, model);
    String outFile = outDir + '/' + file.getName() + ".mvc";

    VectorWriter vectorWriter = getSeqFileWriter(outFile);
    try {
      long numDocs = vectorWriter.write(iteratable, maxDocs);
      writeLabelBindings(dictOut, model, delimiter, jsonDictonary);
      log.info("Wrote: {} vectors", numDocs);
    } finally {
      Closeables.close(vectorWriter, false);
    }
  }
Example #3
0
  protected static void writeLabelBindingsJSON(Writer writer, ARFFModel arffModel)
      throws IOException {

    // Turn the map of labels into a list order by order of appearance
    List<Entry<String, Integer>> attributes = Lists.newArrayList();
    attributes.addAll(arffModel.getLabelBindings().entrySet());
    Collections.sort(
        attributes,
        new Comparator<Map.Entry<String, Integer>>() {
          @Override
          public int compare(Entry<String, Integer> t, Entry<String, Integer> t1) {
            return t.getValue().compareTo(t1.getValue());
          }
        });

    // write a map for each object
    List<Map<String, Object>> jsonObjects = Lists.newLinkedList();
    for (int i = 0; i < attributes.size(); i++) {

      Entry<String, Integer> modelRepresentation = attributes.get(i);
      Map<String, Object> jsonRepresentation = Maps.newHashMap();
      jsonObjects.add(jsonRepresentation);
      // the last one is the class label
      jsonRepresentation.put(
          "label", i < (attributes.size() - 1) ? String.valueOf(false) : String.valueOf(true));
      String attribute = modelRepresentation.getKey();
      jsonRepresentation.put("attribute", attribute);
      Map<String, Integer> nominalValues = arffModel.getNominalMap().get(attribute);

      if (nominalValues != null) {
        String[] values = nominalValues.keySet().toArray(new String[1]);

        jsonRepresentation.put("values", values);
        jsonRepresentation.put("type", "categorical");
      } else {
        jsonRepresentation.put("type", "numerical");
      }
    }
    writer.write(OBJECT_MAPPER.writeValueAsString(jsonObjects));
  }