예제 #1
0
  public static void main(String[] args) throws IOException, SOMToolboxException {
    // register and parse all options for the
    JSAPResult config =
        OptionFactory.parseResults(
            args,
            OptionFactory.getOptInputVectorFile(true),
            OptionFactory.getOptClassInformationFile(true),
            OptionFactory.getOptOutputFileName(true),
            OptionFactory.getOptOutputDirectory(false));

    String vectorFileName = config.getString("inputVectorFile");
    String classInfoFile = config.getString("classInformationFile");
    String outputDir = config.getString("outputDirectory", ".");
    String outputFileName = config.getString("output");

    SOMLibSparseInputData inputData = new SOMLibSparseInputData(vectorFileName);
    SOMLibClassInformation classInfo = new SOMLibClassInformation(classInfoFile);
    classInfo.removeNotPresentElements(inputData);
    InputDataWriter.writeAsSOMLib(classInfo, outputDir + File.separator + outputFileName);
  }
예제 #2
0
  public static void main(String[] args) throws IOException {
    // register and parse all options
    JSAPResult config = OptionFactory.parseResults(args, OPTIONS);

    String inputFileName = AbstractOptionFactory.getFilePath(config, "input");
    String mappingFile = AbstractOptionFactory.getFilePath(config, "nameMappingFile");
    String outputFileName = AbstractOptionFactory.getFilePath(config, "output");

    String stripFromLabel = config.getString("stripFromString");
    boolean onlyLastPathSegment = config.getBoolean("onlyLastPathSegment");

    boolean gzip = config.getBoolean("gzip");

    MatchMode matchMode = MatchMode.valueOf(config.getString("matchMode"));

    Hashtable<String, String> mapping = readMappingFile(mappingFile);
    String[] keys = null;
    if (matchMode == MatchMode.endsWith) {
      // defining the keys ones as String[] is still slow, but faster than using Hashmap#keySet()
      // often
      keys = mapping.keySet().toArray(new String[mapping.size()]);
    }

    OutputStream out =
        gzip
            ? new GZIPOutputStream(new FileOutputStream(outputFileName))
            : new FileOutputStream(outputFileName);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));

    HashMap<String, String> headers =
        FileUtils.readSOMLibFileHeaders(
            FileUtils.openFile("Input vector", inputFileName), "input vector");
    int totalVectorCount = Integer.parseInt(headers.get("$XDIM"));

    BufferedReader br = FileUtils.openFile("Input vector", inputFileName);
    String line = null;
    StdErrProgressWriter progress =
        new StdErrProgressWriter(totalVectorCount, "rewriting vector ", 100);
    int written = 0;
    int skipped = 0;
    while ((line = br.readLine()) != null) {
      if (line.startsWith("$")) {
        bw.write(line);
        bw.newLine();
      } else {
        int lastPos = line.lastIndexOf(" ");
        String label = line.substring(lastPos + 1);

        // System.out.println(stripFromLabel);
        // System.out.print(label);
        label = label.replaceAll(stripFromLabel, "");
        if (onlyLastPathSegment && label.contains("/")) {
          label = label.substring(label.lastIndexOf("/") + 1);
        }
        // System.out.println("=>" + label);

        String target = getReplacement(mapping, keys, label, matchMode);
        if (target != null) {
          bw.write(line.substring(0, lastPos));
          bw.write(" " + target);
          bw.newLine();
          written++;
        } else {
          // System.out.println("No label found for " + label);
          skipped++;
        }

        progress.progress();
      }
    }

    br.close();
    bw.close();

    System.out.println(
        "Wrote "
            + written
            + " vectors, skipped "
            + skipped
            + " because no label found in matching file");
  }