public static void main(String[] args) throws IOException, SOMToolboxException {
    // register and parse all options for the
    JSAPResult config =
        OptionFactory.parseResults(
            args,
            OptionFactory.getOptInputVectorFile(true),
            OptionFactory.getOptClassInformationFile(true),
            OptionFactory.getOptOutputFileName(true),
            OptionFactory.getOptOutputDirectory(false));

    String vectorFileName = config.getString("inputVectorFile");
    String classInfoFile = config.getString("classInformationFile");
    String outputDir = config.getString("outputDirectory", ".");
    String outputFileName = config.getString("output");

    SOMLibSparseInputData inputData = new SOMLibSparseInputData(vectorFileName);
    SOMLibClassInformation classInfo = new SOMLibClassInformation(classInfoFile);
    classInfo.removeNotPresentElements(inputData);
    InputDataWriter.writeAsSOMLib(classInfo, outputDir + File.separator + outputFileName);
  }
Exemple #2
0
  public static void main(String[] args) throws IOException {
    // register and parse all options
    JSAPResult config = OptionFactory.parseResults(args, OPTIONS);

    String inputFileName = AbstractOptionFactory.getFilePath(config, "input");
    String mappingFile = AbstractOptionFactory.getFilePath(config, "nameMappingFile");
    String outputFileName = AbstractOptionFactory.getFilePath(config, "output");

    String stripFromLabel = config.getString("stripFromString");
    boolean onlyLastPathSegment = config.getBoolean("onlyLastPathSegment");

    boolean gzip = config.getBoolean("gzip");

    MatchMode matchMode = MatchMode.valueOf(config.getString("matchMode"));

    Hashtable<String, String> mapping = readMappingFile(mappingFile);
    String[] keys = null;
    if (matchMode == MatchMode.endsWith) {
      // defining the keys ones as String[] is still slow, but faster than using Hashmap#keySet()
      // often
      keys = mapping.keySet().toArray(new String[mapping.size()]);
    }

    OutputStream out =
        gzip
            ? new GZIPOutputStream(new FileOutputStream(outputFileName))
            : new FileOutputStream(outputFileName);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));

    HashMap<String, String> headers =
        FileUtils.readSOMLibFileHeaders(
            FileUtils.openFile("Input vector", inputFileName), "input vector");
    int totalVectorCount = Integer.parseInt(headers.get("$XDIM"));

    BufferedReader br = FileUtils.openFile("Input vector", inputFileName);
    String line = null;
    StdErrProgressWriter progress =
        new StdErrProgressWriter(totalVectorCount, "rewriting vector ", 100);
    int written = 0;
    int skipped = 0;
    while ((line = br.readLine()) != null) {
      if (line.startsWith("$")) {
        bw.write(line);
        bw.newLine();
      } else {
        int lastPos = line.lastIndexOf(" ");
        String label = line.substring(lastPos + 1);

        // System.out.println(stripFromLabel);
        // System.out.print(label);
        label = label.replaceAll(stripFromLabel, "");
        if (onlyLastPathSegment && label.contains("/")) {
          label = label.substring(label.lastIndexOf("/") + 1);
        }
        // System.out.println("=>" + label);

        String target = getReplacement(mapping, keys, label, matchMode);
        if (target != null) {
          bw.write(line.substring(0, lastPos));
          bw.write(" " + target);
          bw.newLine();
          written++;
        } else {
          // System.out.println("No label found for " + label);
          skipped++;
        }

        progress.progress();
      }
    }

    br.close();
    bw.close();

    System.out.println(
        "Wrote "
            + written
            + " vectors, skipped "
            + skipped
            + " because no label found in matching file");
  }
Exemple #3
0
/**
 * Re-writes an unit description file by replacing the label names of the input vectors as defined
 * by the given mapping file.
 *
 * @author Michael Dittenbach
 * @author Rudolf Mayer
 * @version $Id: VectorFileRewriter.java 4267 2012-04-03 14:51:20Z mayer $
 */
public class VectorFileRewriter implements SOMToolboxApp {

  private static final Parameter[] OPTIONS =
      new Parameter[] {
        OptionFactory.getOptInputFileName(),
        OptionFactory.getOptNameMappingFile(true),
        OptionFactory.getOptOutputVector(),
        OptionFactory.getOptMatchMode(false),
        OptionFactory.getOptStrip(false),
        OptionFactory.getSwitchOnlyLastPathSegment(false),
        OptionFactory.getOptGZip(false, false)
      };

  public static String DESCRIPTION = "Replace labels in SOMLibVectorFiles";

  public static String LONG_DESCRIPTION =
      "Replaces labels in SOMLibVectorFiles by means of a mapping file; labels in the mapping file must be separated by a tab";

  public static final Type APPLICATION_TYPE = Type.Helper;

  public static void main(String[] args) throws IOException {
    // register and parse all options
    JSAPResult config = OptionFactory.parseResults(args, OPTIONS);

    String inputFileName = AbstractOptionFactory.getFilePath(config, "input");
    String mappingFile = AbstractOptionFactory.getFilePath(config, "nameMappingFile");
    String outputFileName = AbstractOptionFactory.getFilePath(config, "output");

    String stripFromLabel = config.getString("stripFromString");
    boolean onlyLastPathSegment = config.getBoolean("onlyLastPathSegment");

    boolean gzip = config.getBoolean("gzip");

    MatchMode matchMode = MatchMode.valueOf(config.getString("matchMode"));

    Hashtable<String, String> mapping = readMappingFile(mappingFile);
    String[] keys = null;
    if (matchMode == MatchMode.endsWith) {
      // defining the keys ones as String[] is still slow, but faster than using Hashmap#keySet()
      // often
      keys = mapping.keySet().toArray(new String[mapping.size()]);
    }

    OutputStream out =
        gzip
            ? new GZIPOutputStream(new FileOutputStream(outputFileName))
            : new FileOutputStream(outputFileName);
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));

    HashMap<String, String> headers =
        FileUtils.readSOMLibFileHeaders(
            FileUtils.openFile("Input vector", inputFileName), "input vector");
    int totalVectorCount = Integer.parseInt(headers.get("$XDIM"));

    BufferedReader br = FileUtils.openFile("Input vector", inputFileName);
    String line = null;
    StdErrProgressWriter progress =
        new StdErrProgressWriter(totalVectorCount, "rewriting vector ", 100);
    int written = 0;
    int skipped = 0;
    while ((line = br.readLine()) != null) {
      if (line.startsWith("$")) {
        bw.write(line);
        bw.newLine();
      } else {
        int lastPos = line.lastIndexOf(" ");
        String label = line.substring(lastPos + 1);

        // System.out.println(stripFromLabel);
        // System.out.print(label);
        label = label.replaceAll(stripFromLabel, "");
        if (onlyLastPathSegment && label.contains("/")) {
          label = label.substring(label.lastIndexOf("/") + 1);
        }
        // System.out.println("=>" + label);

        String target = getReplacement(mapping, keys, label, matchMode);
        if (target != null) {
          bw.write(line.substring(0, lastPos));
          bw.write(" " + target);
          bw.newLine();
          written++;
        } else {
          // System.out.println("No label found for " + label);
          skipped++;
        }

        progress.progress();
      }
    }

    br.close();
    bw.close();

    System.out.println(
        "Wrote "
            + written
            + " vectors, skipped "
            + skipped
            + " because no label found in matching file");
  }

  public static String getReplacement(
      Hashtable<String, String> mapping, String[] keys, String token, MatchMode matchMode) {
    if (matchMode == MatchMode.exact) {
      return mapping.get(token);
    } else if (matchMode == MatchMode.withoutPath) {
      int index = token.lastIndexOf("/");
      String searchToken = token.substring(index + 1);
      return mapping.get(searchToken);
    } else if (matchMode == MatchMode.endsWith) {
      // FIXME: this is not at all efficient
      for (String key : keys) {
        if (token.endsWith(key)) {
          return mapping.get(key);
        }
      }
      return null;
    } else {
      throw new IllegalArgumentException("Unsupported MatchMode '" + matchMode + "'.");
    }
  }

  private static Hashtable<String, String> readMappingFile(String fName) throws IOException {
    Hashtable<String, String> res = new Hashtable<String, String>();
    res.put("$MAPPED_VECS", "$MAPPED_VECS");
    BufferedReader br = FileUtils.openFile("Mapping file", fName);

    String line = null;
    while ((line = br.readLine()) != null) {
      String[] splits = line.split("\t", 2);
      String origString = splits[0];
      String targetString = splits[1];
      res.put(origString, targetString);
    }

    br.close();

    return res;
  }
}