public void addMacro(Macro macro) { try { cpp.addMacro(macro.getName(), FeatureExprLib.True(), macro.getValue()); } catch (LexerException e) { throw new BuildException(e); } }
/** * Make Document for coref (for method coref(Document doc, StringBuilder[] outputs)). Mention * detection and document preprocessing is done here. * * @throws Exception */ public Document makeDocument(InputDoc input) throws Exception { if (input == null) return null; Annotation anno = input.annotation; // add missing annotation if (needMissingAnnotations) { addMissingAnnotation(anno); } if (Boolean.parseBoolean(props.getProperty("hcoref.useMarkedDiscourse", "false"))) { anno.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true); } // remove nested NP with same headword except newswire document for chinese if (input.conllDoc != null && CorefProperties.getLanguage(props) == Locale.CHINESE) { CorefProperties.setRemoveNested(props, !input.conllDoc.documentID.contains("nw")); } // mention detection: MD gives following information about mentions: mention start/end index, // span, headword // rest information will be set in preprocess step List<List<Mention>> mentions = md.findMentions(anno, dict, props); Document doc = new Document(input, mentions); // find headword for gold mentions if (input.goldMentions != null) findGoldMentionHeads(doc); // document preprocessing: initialization (assign ID), mention processing (gender, number, type, // etc), speaker extraction, etc Preprocessor.preprocess(doc, dict, singletonPredictor, headFinder); return doc; }
public void execute() { PrintWriter writer = null; try { if (input == null) throw new BuildException("Input not specified"); if (output == null) throw new BuildException("Output not specified"); cpp.addInput(this.input); writer = new PrintWriter(new FileWriter(this.output)); for (; ; ) { Token tok = cpp.getNextToken(); if (tok != null && tok.getType() == Token.EOF) break; tok.lazyPrint(writer); } } catch (Exception e) { throw new BuildException(e); } finally { if (writer != null) { writer.close(); } } }
private static void duelFieldDataBytes( Random random, AtomicReaderContext context, IndexFieldData<?> left, IndexFieldData<?> right, Preprocessor pre) throws Exception { AtomicFieldData leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context); AtomicFieldData rightData = random.nextBoolean() ? right.load(context) : right.loadDirect(context); int numDocs = context.reader().maxDoc(); SortedBinaryDocValues leftBytesValues = leftData.getBytesValues(); SortedBinaryDocValues rightBytesValues = rightData.getBytesValues(); BytesRef leftSpare = new BytesRef(); BytesRef rightSpare = new BytesRef(); for (int i = 0; i < numDocs; i++) { leftBytesValues.setDocument(i); rightBytesValues.setDocument(i); int numValues = leftBytesValues.count(); assertThat(numValues, equalTo(rightBytesValues.count())); BytesRef previous = null; for (int j = 0; j < numValues; j++) { rightSpare.copyBytes(rightBytesValues.valueAt(j)); leftSpare.copyBytes(leftBytesValues.valueAt(j)); if (previous != null) { assertThat(pre.compare(previous, rightSpare), lessThan(0)); } previous = BytesRef.deepCopyOf(rightSpare); pre.toString(rightSpare); pre.toString(leftSpare); assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare))); } } }
/** Test if the file type can be "tiled". */ private static void validateIsTilable(String typeString) { boolean affective = PreferenceManager.getInstance().getAsBoolean(PreferenceManager.AFFECTIVE_ENABLE); if (!(typeString.endsWith("cn") || typeString.endsWith("igv") || typeString.endsWith("wig") || // ifile.toLowerCase().endsWith("cpg.txt") || typeString.endsWith("ewig") || typeString.endsWith("cn") || typeString.endsWith("snp") || typeString.endsWith("xcn") || typeString.endsWith("gct") || typeString.endsWith("tab") || typeString.endsWith("mage-tab") || typeString.endsWith("bedgraph") || Preprocessor.isAlignmentFile(typeString) || affective)) { throw new PreprocessingException( "Tile command not supported for files of type: " + typeString); } }
public String doIndex(String ifile, String outputDir, int indexType, int binSize) throws IOException { String typeString = Preprocessor.getExtension(ifile); return doIndex(ifile, typeString, outputDir, indexType, binSize); }
/** * Compute coverage or density of an alignment or feature file. * * @param ifile Alignment or feature file * @param ofile Output file * @param genomeId Genome id (e.g. hg18) or full path to a .genome file (e.g. * /xchip/igv/scer2.genome) * @param maxZoomValue Maximum zoom level to precompute. Default value is 7 * @param windowFunctions * @param windowSizeValue * @param extFactorValue * @param trackLine * @param queryString * @param minMapQuality * @param countFlags * @throws IOException */ public void doCount( String ifile, String ofile, String genomeId, int maxZoomValue, Collection<WindowFunction> windowFunctions, int windowSizeValue, int extFactorValue, String trackLine, String queryString, int minMapQuality, int countFlags) throws IOException { System.out.println("Computing coverage. File = " + ifile); System.out.println("Max zoom = " + maxZoomValue); System.out.println("Window size = " + windowSizeValue); System.out.print("Window functions: "); for (WindowFunction wf : windowFunctions) { System.out.print(wf.toString() + " "); } System.out.println(); System.out.println("Ext factor = " + extFactorValue); Genome genome = loadGenome(genomeId, false); if (genome == null) { throw new PreprocessingException("Genome could not be loaded: " + genomeId); } // Multiple files allowed for count command (a tdf and a wig) File tdfFile = null; File wigFile = null; String[] files = ofile.split(","); if (files[0].endsWith("wig")) { wigFile = new File(files[0]); } else { tdfFile = new File(files[0]); } if (files.length > 1) { if (files[1].endsWith("wig")) { wigFile = new File(files[1]); } else if (files[1].endsWith("tdf")) { tdfFile = new File(files[1]); } } if (tdfFile != null && !tdfFile.getName().endsWith(".tdf")) { tdfFile = new File(tdfFile.getAbsolutePath() + ".tdf"); } Preprocessor p = new Preprocessor(tdfFile, genome, windowFunctions, -1, null); // p.count(ifile, windowSizeValue, extFactorValue, maxZoomValue, wigFile, coverageOpt, // trackLine); p.count( ifile, windowSizeValue, extFactorValue, maxZoomValue, wigFile, trackLine, queryString, minMapQuality, countFlags); p.finish(); System.out.flush(); }
public void toTDF( String typeString, String ifile, String ofile, String probeFile, String genomeId, int maxZoomValue, Collection<WindowFunction> windowFunctions, String tmpDirName, int maxRecords) throws IOException, PreprocessingException { if (!ifile.endsWith(".affective.csv")) validateIsTilable(typeString); System.out.println("toTDF. File = " + ifile); System.out.println("Max zoom = " + maxZoomValue); if (probeFile != null && probeFile.trim().length() > 0) { System.out.println("Probe file = " + probeFile); } System.out.print("Window functions: "); for (WindowFunction wf : windowFunctions) { System.out.print(wf.toString() + " "); } System.out.println(); boolean isGCT = isGCT(typeString); Genome genome = loadGenome(genomeId, isGCT); if (genome == null) { throw new PreprocessingException("Genome could not be loaded: " + genomeId); } File inputFileOrDir = new File(ifile); // Estimae the total number of lines to be parsed, for progress updates int nLines = estimateLineCount(inputFileOrDir); // TODO -- move this block of code out of here, this should be done before calling this method // Convert gct files to igv format first File deleteme = null; if (isGCT(typeString)) { File tmpDir = null; if (tmpDirName != null && tmpDirName.length() > 0) { tmpDir = new File(tmpDirName); if (!tmpDir.exists() || !tmpDir.isDirectory()) { throw new PreprocessingException( "Specified tmp directory does not exist or is not directory: " + tmpDirName); } } else { tmpDir = new File(System.getProperty("java.io.tmpdir"), System.getProperty("user.name")); } if (!tmpDir.exists()) { tmpDir.mkdir(); } String baseName = (new File(ifile)).getName(); File igvFile = new File(tmpDir, baseName + ".igv"); igvFile.deleteOnExit(); doGCTtoIGV(typeString, ifile, igvFile, probeFile, maxRecords, tmpDirName, genome); inputFileOrDir = igvFile; deleteme = igvFile; typeString = ".igv"; } // Convert to tdf File outputFile = new File(ofile); try { Preprocessor p = new Preprocessor(outputFile, genome, windowFunctions, nLines, null); if (inputFileOrDir.isDirectory() || inputFileOrDir.getName().endsWith(".list")) { List<File> files = getFilesFromDirOrList(inputFileOrDir); for (File f : files) { p.preprocess(f, maxZoomValue, typeString); } } else { p.preprocess(inputFileOrDir, maxZoomValue, typeString); } p.finish(); } catch (IOException e) { e.printStackTrace(); // Delete output file as its probably corrupt if (outputFile.exists()) { outputFile.delete(); } } finally { if (deleteme != null && deleteme.exists()) { deleteme.delete(); } } System.out.flush(); }
void run(String[] argv) { if (argv.length == 0) { System.out.println(usageString()); System.out.println("Error: No arguments provided"); return; } String command = argv[0].toLowerCase(); if (command.equals(CMD_HELP)) { if (argv.length > 1) { System.out.println(usageString(argv[1])); } else { System.out.println(usageString()); } return; } if (command.equals(CMD_GUI)) { launchGUI(); Runtime.getRuntime().halt(0); } // Do "version" now, its the only command with no arguments if (command.equals(CMD_VERSION)) { System.out.println(getVersionString()); return; } CmdLineParser parser = initParser(command); // Parse optional arguments (switches, etc) try { parser.parse(argv); } catch (CmdLineParser.OptionException e) { System.err.println(e.getMessage()); System.out.println("Enter igvtools help " + command + " for help on this command"); return; } String tmpDirName = null; if (tmpDirOption != null) { tmpDirName = (String) parser.getOptionValue(tmpDirOption, null); } int maxRecords = MAX_RECORDS_IN_RAM; if (maxRecordsOption != null) { maxRecords = (Integer) parser.getOptionValue(maxRecordsOption, MAX_RECORDS_IN_RAM); } String[] nonOptionArgs = parser.getRemainingArgs(); try { String basic_syntax = "Error in syntax. Enter igvtools help " + command + " for usage instructions."; // All remaining commands require an input file, and most need the file extension. Do that // here. validateArgsLength(nonOptionArgs, 2, "Error: No input file provided"); String ifile = nonOptionArgs[1]; boolean isList = ifile.indexOf(",") > 0; if (!isList && !FileUtils.resourceExists(ifile)) { throw new PreprocessingException("File not found: " + ifile); } String typeString = null; if (typeOption != null) { typeString = (String) parser.getOptionValue(typeOption); } if (typeString == null || typeString.length() == 0) { typeString = Preprocessor.getExtension(ifile).toLowerCase(); } else { typeString = typeString.toLowerCase(); } if (command.equals(CMD_COUNT) || command.equals(CMD_TILE) || command.equals(CMD_TOTDF)) { // Parse out options common to both count and tile validateArgsLength(nonOptionArgs, 4, basic_syntax); int maxZoomValue = (Integer) parser.getOptionValue(maxZoomOption, MAX_ZOOM); String ofile = nonOptionArgs[2]; String genomeId = nonOptionArgs[3]; boolean isGCT = typeString.endsWith("gct") || typeString.equals("mage-tab"); String wfsString = (String) parser.getOptionValue(windowFunctions); Collection<WindowFunction> wfList = parseWFS(wfsString, isGCT); if (command.equals(CMD_COUNT)) { String trackLine = null; String color = (String) parser.getOptionValue(colorOption); if (color != null) { trackLine = "track color=\"" + color + "\""; } int extFactorValue = (Integer) parser.getOptionValue(extFactorOption, EXT_FACTOR); int countFlags = parseCountFlags(parser); String queryString = (String) parser.getOptionValue(queryStringOpt); int minMapQuality = (Integer) parser.getOptionValue(minMapQualityOpt, 0); int windowSizeValue = (Integer) parser.getOptionValue(windowSizeOption, WINDOW_SIZE); doCount( ifile, ofile, genomeId, maxZoomValue, wfList, windowSizeValue, extFactorValue, trackLine, queryString, minMapQuality, countFlags); } else { String probeFile = (String) parser.getOptionValue(probeFileOption, PROBE_FILE); toTDF( typeString, ifile, ofile, probeFile, genomeId, maxZoomValue, wfList, tmpDirName, maxRecords); } } else if (command.equals(CMD_SORT)) { validateArgsLength(nonOptionArgs, 3, basic_syntax); String ofile = nonOptionArgs[2]; doSort(ifile, ofile, tmpDirName, maxRecords); } else if (command.equals(CMD_INDEX)) { int indexType = (Integer) parser.getOptionValue(indexTypeOption, LINEAR_INDEX); int defaultBinSize = indexType == LINEAR_INDEX ? LINEAR_BIN_SIZE : INTERVAL_SIZE; int binSize = (Integer) parser.getOptionValue(binSizeOption, defaultBinSize); String outputDir = (String) parser.getOptionValue(outputDirOption, null); doIndex(ifile, typeString, outputDir, indexType, binSize); } else if (command.equals(CMD_FORMATEXP)) { validateArgsLength(nonOptionArgs, 3, basic_syntax); File inputFile = new File(nonOptionArgs[1]); File outputFile = new File(nonOptionArgs[2]); (new ExpressionFormatter()).convert(inputFile, outputFile); } else if (command.equals("wibtowig")) { validateArgsLength( nonOptionArgs, 4, "Error in syntax. Expected: " + command + " [options] txtfile wibfile wigfile"); File txtFile = new File(nonOptionArgs[1]); File wibFile = new File(nonOptionArgs[2]); File wigFile = new File(nonOptionArgs[3]); String trackLine = nonOptionArgs.length > 4 ? nonOptionArgs[4] : null; doWIBtoWIG(txtFile, wibFile, wigFile, trackLine); } else if (command.equals("splitgff")) { validateArgsLength( nonOptionArgs, 3, "Error in syntax. Expected: " + command + " [options] inputfile outputdir"); String outputDirectory = nonOptionArgs[2]; GFFParser.splitFileByType(ifile, outputDirectory); } else if (command.toLowerCase().equals("gcttoigv")) { validateArgsLength(nonOptionArgs, 4, basic_syntax + " genomeId"); String ofile = nonOptionArgs[2]; // Output files must have .igv extension if (!ofile.endsWith(".igv")) { ofile = ofile + ".igv"; } String genomeId = nonOptionArgs[3]; Genome genome = loadGenome(genomeId, true); if (genome == null) { throw new PreprocessingException("Genome could not be loaded: " + genomeId); } String probeFile = (String) parser.getOptionValue(probeFileOption, PROBE_FILE); doGCTtoIGV(typeString, ifile, new File(ofile), probeFile, maxRecords, tmpDirName, genome); } else if (command.toLowerCase().equals("tdftobedgraph")) { validateArgsLength(nonOptionArgs, 3, basic_syntax); String ofile = nonOptionArgs[2]; TDFUtils.tdfToBedgraph(ifile, ofile); } else if (command.equals("wigtobed")) { validateArgsLength( nonOptionArgs, 2, "Error in syntax. Expected: " + command + " [options] inputfile"); String inputFile = nonOptionArgs[1]; float hetThreshold = 0.17f; if (nonOptionArgs.length > 2) { hetThreshold = Float.parseFloat(nonOptionArgs[2]); } float homThreshold = 0.55f; if (nonOptionArgs.length > 3) { homThreshold = Float.parseFloat(nonOptionArgs[3]); } WigToBed.run(inputFile, hetThreshold, homThreshold); } else if (command.equals("vcftobed")) { validateArgsLength(nonOptionArgs, 3, basic_syntax); String inputFile = nonOptionArgs[1]; String outputFile = nonOptionArgs[2]; VCFtoBed.convert(inputFile, outputFile); } else if (command.equals("sumwigs")) { sumWigs(nonOptionArgs[1], nonOptionArgs[2]); } else if (command.equals("densitiestobedgraph")) { validateArgsLength( nonOptionArgs, 3, "Error in syntax. Expected: " + command + " [options] inputdir outputdir"); File inputDir = new File(nonOptionArgs[1]); File outputDir = new File(nonOptionArgs[2]); if (inputDir.isDirectory() && outputDir.isDirectory()) { DensitiesToBedGraph.convert(inputDir, outputDir); } else if (inputDir.isFile() && outputDir.isFile()) { DensitiesToBedGraph.convert(inputDir, outputDir); } } else if (command.equals(CMD_BAMTOBED)) { validateArgsLength(nonOptionArgs, 3, basic_syntax); String ofile = nonOptionArgs[2]; Boolean pairOption = (Boolean) parser.getOptionValue(pairedCoverageOpt, false); BamToBed.convert(new File(ifile), new File(ofile), pairOption); } else if (command.equalsIgnoreCase("genGenomeList")) { // Generate a genomes.txt list file based on a directory // TODO Probably a better place for this. Users won't generally use it File inDir = new File(ifile); GenomeManager manager = GenomeManager.getInstance(); manager.generateGenomeList(inDir, nonOptionArgs[2], nonOptionArgs[3]); } else { throw new PreprocessingException("Unknown command: " + argv[EXT_FACTOR]); } } catch (PreprocessingException e) { System.err.println(e.getMessage()); } catch (IOException e) { throw new PreprocessingException("Unexpected IO error: ", e); } }
public void preprocessor(Preprocessor preprocessor) { // this may change in future to external class to deal with dynamic // imports javaImport(rubyRuntime, preprocessor.getClass()); this.asciidoctorModule.preprocessor(preprocessor); }
public static void main(String[] argv) throws IOException, CmdLineParser.UnknownOptionException, CmdLineParser.IllegalOptionValueException { if (argv.length < 4) { System.out.println("Usage: hictools pre <options> <inputFile> <outputFile> <genomeID>"); System.out.println(" <options>: -d only calculate intra chromosome (diagonal) [false]"); System.out.println( " : -o calculate densities (observed/expected), write to file [false]"); System.out.println(" : -t <int> only write cells with count above threshold t [0]"); System.out.println( " : -c <chromosome ID> only calculate map on specific chromosome"); System.exit(0); } Globals.setHeadless(true); CommandLineParser parser = new CommandLineParser(); parser.parse(argv); String[] args = parser.getRemainingArgs(); if (args[0].equals("sort")) { AlignmentsSorter.sort(args[1], args[2], null); } else if (args[0].equals("pairsToBin")) { String ifile = args[1]; String ofile = args[2]; String genomeId = args[3]; List<Chromosome> chromosomes = loadChromosomes(genomeId); AsciiToBinConverter.convert(ifile, ofile, chromosomes); } else if (args[0].equals("binToPairs")) { String ifile = args[1]; String ofile = args[2]; AsciiToBinConverter.convertBack(ifile, ofile); } else if (args[0].equals("printmatrix")) { if (args.length < 5) { System.err.println( "Usage: hictools printmatrix <observed/oe/pearson> hicFile chr1 chr2 binsize"); System.exit(-1); } String type = args[1]; String file = args[2]; String chr1 = args[3]; String chr2 = args[4]; String binSizeSt = args[5]; int binSize = 0; try { binSize = Integer.parseInt(binSizeSt); } catch (NumberFormatException e) { System.err.println("Integer expected. Found: " + binSizeSt); System.exit(-1); } dumpMatrix(file, chr1, chr2, binSize, type); } else if (args[0].equals("eigenvector")) { if (args.length < 4) { System.err.println("Usage: hictools eigenvector hicFile chr binsize"); } String file = args[1]; String chr = args[2]; String binSizeSt = args[3]; int binSize = 0; try { binSize = Integer.parseInt(binSizeSt); } catch (NumberFormatException e) { System.err.println("Integer expected. Found: " + binSizeSt); System.exit(-1); } calculateEigenvector(file, chr, binSize); } else if (args[0].equals("pre")) { String genomeId = ""; try { genomeId = args[3]; } catch (ArrayIndexOutOfBoundsException e) { System.err.println("No genome ID given"); System.exit(0); } List<Chromosome> chromosomes = loadChromosomes(genomeId); long genomeLength = 0; for (Chromosome c : chromosomes) { if (c != null) genomeLength += c.getSize(); } chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000))); String[] tokens = args[1].split(","); List<String> files = new ArrayList<String>(tokens.length); for (String f : tokens) { files.add(f); } Preprocessor preprocessor = new Preprocessor(new File(args[2]), chromosomes); preprocessor.setIncludedChromosomes(parser.getChromosomeOption()); preprocessor.setCountThreshold(parser.getCountThresholdOption()); preprocessor.setNumberOfThreads(parser.getThreadedOption()); preprocessor.setDiagonalsOnly(parser.getDiagonalsOption()); preprocessor.setLoadDensities(parser.getDensitiesOption()); preprocessor.preprocess(files); } }
package antenna.preprocessor.v3;
// @OverrideMustInvoke /* pp */ void init(Preprocessor pp) { setListener(pp.getListener()); this.werror = pp.getWarnings().contains(Warning.ERROR); }