public static void main(String[] args) throws IOException, SOMToolboxException { // register and parse all options for the JSAPResult config = OptionFactory.parseResults( args, OptionFactory.getOptInputVectorFile(true), OptionFactory.getOptClassInformationFile(true), OptionFactory.getOptOutputFileName(true), OptionFactory.getOptOutputDirectory(false)); String vectorFileName = config.getString("inputVectorFile"); String classInfoFile = config.getString("classInformationFile"); String outputDir = config.getString("outputDirectory", "."); String outputFileName = config.getString("output"); SOMLibSparseInputData inputData = new SOMLibSparseInputData(vectorFileName); SOMLibClassInformation classInfo = new SOMLibClassInformation(classInfoFile); classInfo.removeNotPresentElements(inputData); InputDataWriter.writeAsSOMLib(classInfo, outputDir + File.separator + outputFileName); }
public static void main(String[] args) throws IOException { // register and parse all options JSAPResult config = OptionFactory.parseResults(args, OPTIONS); String inputFileName = AbstractOptionFactory.getFilePath(config, "input"); String mappingFile = AbstractOptionFactory.getFilePath(config, "nameMappingFile"); String outputFileName = AbstractOptionFactory.getFilePath(config, "output"); String stripFromLabel = config.getString("stripFromString"); boolean onlyLastPathSegment = config.getBoolean("onlyLastPathSegment"); boolean gzip = config.getBoolean("gzip"); MatchMode matchMode = MatchMode.valueOf(config.getString("matchMode")); Hashtable<String, String> mapping = readMappingFile(mappingFile); String[] keys = null; if (matchMode == MatchMode.endsWith) { // defining the keys ones as String[] is still slow, but faster than using Hashmap#keySet() // often keys = mapping.keySet().toArray(new String[mapping.size()]); } OutputStream out = gzip ? new GZIPOutputStream(new FileOutputStream(outputFileName)) : new FileOutputStream(outputFileName); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); HashMap<String, String> headers = FileUtils.readSOMLibFileHeaders( FileUtils.openFile("Input vector", inputFileName), "input vector"); int totalVectorCount = Integer.parseInt(headers.get("$XDIM")); BufferedReader br = FileUtils.openFile("Input vector", inputFileName); String line = null; StdErrProgressWriter progress = new StdErrProgressWriter(totalVectorCount, "rewriting vector ", 100); int written = 0; int skipped = 0; while ((line = br.readLine()) != null) { if (line.startsWith("$")) { bw.write(line); bw.newLine(); } else { int lastPos = line.lastIndexOf(" "); String label = line.substring(lastPos + 1); // System.out.println(stripFromLabel); // System.out.print(label); label = label.replaceAll(stripFromLabel, ""); if (onlyLastPathSegment && label.contains("/")) { label = label.substring(label.lastIndexOf("/") + 1); } // System.out.println("=>" + label); String target = getReplacement(mapping, keys, label, matchMode); if (target != null) { bw.write(line.substring(0, lastPos)); bw.write(" " + target); bw.newLine(); written++; } else { // System.out.println("No label found for " + label); skipped++; } progress.progress(); } } br.close(); bw.close(); System.out.println( "Wrote " + written + " vectors, skipped " + skipped + " because no label found in matching file"); }