Ejemplo n.º 1
0
  @Override
  public int run(String[] args) throws Exception {
    addOption(
        SEQ_FILE_DIR_OPTION, "s", "The directory containing Sequence Files for the Clusters", true);
    addOption(
        OUTPUT_OPTION, "o", "Optional output directory. Default is to output to the console.");
    addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print");
    addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print");
    addOption(
        JSON_OPTION,
        "j",
        "Output the centroid as JSON.  Otherwise it substitues in the terms for vector cell entries");
    addOption(
        POINTS_DIR_OPTION,
        "p",
        "The directory containing points sequence files mapping input vectors to their cluster.  "
            + "If specified, then the program will output the points associated with a cluster");
    addOption(DICTIONARY_OPTION, "d", "The dictionary file");
    addOption(DICTIONARY_TYPE_OPTION, "dt", "The dictionary file type (text|sequencefile)", "text");
    if (parseArguments(args) == null) {
      return -1;
    }

    seqFileDir = new Path(getOption(SEQ_FILE_DIR_OPTION));
    if (hasOption(POINTS_DIR_OPTION)) {
      pointsDir = new Path(getOption(POINTS_DIR_OPTION));
    }
    outputFile = getOption(OUTPUT_OPTION);
    if (hasOption(SUBSTRING_OPTION)) {
      int sub = Integer.parseInt(getOption(SUBSTRING_OPTION));
      if (sub >= 0) {
        subString = sub;
      }
    }
    if (hasOption(JSON_OPTION)) {
      useJSON = true;
    }
    termDictionary = getOption(DICTIONARY_OPTION);
    dictionaryFormat = getOption(DICTIONARY_TYPE_OPTION);
    if (hasOption(NUM_WORDS_OPTION)) {
      numTopFeatures = Integer.parseInt(getOption(NUM_WORDS_OPTION));
    }
    init();
    printClusters(null);
    return 0;
  }
Ejemplo n.º 2
0
 public ClusterDumper(Path seqFileDir, Path pointsDir) throws IOException {
   this.seqFileDir = seqFileDir;
   this.pointsDir = pointsDir;
   init();
 }