Beispiel #1
0
 public void addMacro(Macro macro) {
   try {
     cpp.addMacro(macro.getName(), FeatureExprLib.True(), macro.getValue());
   } catch (LexerException e) {
     throw new BuildException(e);
   }
 }
Beispiel #2
0
  /**
   * Make Document for coref (for method coref(Document doc, StringBuilder[] outputs)). Mention
   * detection and document preprocessing is done here.
   *
   * @throws Exception
   */
  public Document makeDocument(InputDoc input) throws Exception {
    if (input == null) return null;
    Annotation anno = input.annotation;

    // add missing annotation
    if (needMissingAnnotations) {
      addMissingAnnotation(anno);
    }

    if (Boolean.parseBoolean(props.getProperty("hcoref.useMarkedDiscourse", "false"))) {
      anno.set(CoreAnnotations.UseMarkedDiscourseAnnotation.class, true);
    }

    // remove nested NP with same headword except newswire document for chinese

    if (input.conllDoc != null && CorefProperties.getLanguage(props) == Locale.CHINESE) {
      CorefProperties.setRemoveNested(props, !input.conllDoc.documentID.contains("nw"));
    }

    // mention detection: MD gives following information about mentions: mention start/end index,
    // span, headword
    // rest information will be set in preprocess step
    List<List<Mention>> mentions = md.findMentions(anno, dict, props);
    Document doc = new Document(input, mentions);

    // find headword for gold mentions
    if (input.goldMentions != null) findGoldMentionHeads(doc);

    // document preprocessing: initialization (assign ID), mention processing (gender, number, type,
    // etc), speaker extraction, etc
    Preprocessor.preprocess(doc, dict, singletonPredictor, headFinder);

    return doc;
  }
Beispiel #3
0
 public void execute() {
   PrintWriter writer = null;
   try {
     if (input == null) throw new BuildException("Input not specified");
     if (output == null) throw new BuildException("Output not specified");
     cpp.addInput(this.input);
     writer = new PrintWriter(new FileWriter(this.output));
     for (; ; ) {
       Token tok = cpp.getNextToken();
       if (tok != null && tok.getType() == Token.EOF) break;
       tok.lazyPrint(writer);
     }
   } catch (Exception e) {
     throw new BuildException(e);
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
  private static void duelFieldDataBytes(
      Random random,
      AtomicReaderContext context,
      IndexFieldData<?> left,
      IndexFieldData<?> right,
      Preprocessor pre)
      throws Exception {
    AtomicFieldData leftData = random.nextBoolean() ? left.load(context) : left.loadDirect(context);
    AtomicFieldData rightData =
        random.nextBoolean() ? right.load(context) : right.loadDirect(context);

    int numDocs = context.reader().maxDoc();
    SortedBinaryDocValues leftBytesValues = leftData.getBytesValues();
    SortedBinaryDocValues rightBytesValues = rightData.getBytesValues();
    BytesRef leftSpare = new BytesRef();
    BytesRef rightSpare = new BytesRef();

    for (int i = 0; i < numDocs; i++) {
      leftBytesValues.setDocument(i);
      rightBytesValues.setDocument(i);
      int numValues = leftBytesValues.count();
      assertThat(numValues, equalTo(rightBytesValues.count()));
      BytesRef previous = null;
      for (int j = 0; j < numValues; j++) {
        rightSpare.copyBytes(rightBytesValues.valueAt(j));
        leftSpare.copyBytes(leftBytesValues.valueAt(j));
        if (previous != null) {
          assertThat(pre.compare(previous, rightSpare), lessThan(0));
        }
        previous = BytesRef.deepCopyOf(rightSpare);
        pre.toString(rightSpare);
        pre.toString(leftSpare);
        assertThat(pre.toString(leftSpare), equalTo(pre.toString(rightSpare)));
      }
    }
  }
Beispiel #5
0
  /** Test if the file type can be "tiled". */
  private static void validateIsTilable(String typeString) {

    boolean affective =
        PreferenceManager.getInstance().getAsBoolean(PreferenceManager.AFFECTIVE_ENABLE);
    if (!(typeString.endsWith("cn")
        || typeString.endsWith("igv")
        || typeString.endsWith("wig")
        ||
        // ifile.toLowerCase().endsWith("cpg.txt") ||
        typeString.endsWith("ewig")
        || typeString.endsWith("cn")
        || typeString.endsWith("snp")
        || typeString.endsWith("xcn")
        || typeString.endsWith("gct")
        || typeString.endsWith("tab")
        || typeString.endsWith("mage-tab")
        || typeString.endsWith("bedgraph")
        || Preprocessor.isAlignmentFile(typeString)
        || affective)) {
      throw new PreprocessingException(
          "Tile command not supported for files of type: " + typeString);
    }
  }
Beispiel #6
0
 public String doIndex(String ifile, String outputDir, int indexType, int binSize)
     throws IOException {
   String typeString = Preprocessor.getExtension(ifile);
   return doIndex(ifile, typeString, outputDir, indexType, binSize);
 }
Beispiel #7
0
  /**
   * Compute coverage or density of an alignment or feature file.
   *
   * @param ifile Alignment or feature file
   * @param ofile Output file
   * @param genomeId Genome id (e.g. hg18) or full path to a .genome file (e.g.
   *     /xchip/igv/scer2.genome)
   * @param maxZoomValue Maximum zoom level to precompute. Default value is 7
   * @param windowFunctions
   * @param windowSizeValue
   * @param extFactorValue
   * @param trackLine
   * @param queryString
   * @param minMapQuality
   * @param countFlags
   * @throws IOException
   */
  public void doCount(
      String ifile,
      String ofile,
      String genomeId,
      int maxZoomValue,
      Collection<WindowFunction> windowFunctions,
      int windowSizeValue,
      int extFactorValue,
      String trackLine,
      String queryString,
      int minMapQuality,
      int countFlags)
      throws IOException {

    System.out.println("Computing coverage.  File = " + ifile);
    System.out.println("Max zoom = " + maxZoomValue);
    System.out.println("Window size = " + windowSizeValue);
    System.out.print("Window functions: ");
    for (WindowFunction wf : windowFunctions) {
      System.out.print(wf.toString() + " ");
    }
    System.out.println();
    System.out.println("Ext factor = " + extFactorValue);

    Genome genome = loadGenome(genomeId, false);
    if (genome == null) {
      throw new PreprocessingException("Genome could not be loaded: " + genomeId);
    }

    // Multiple files allowed for count command (a tdf and a wig)
    File tdfFile = null;
    File wigFile = null;
    String[] files = ofile.split(",");
    if (files[0].endsWith("wig")) {
      wigFile = new File(files[0]);
    } else {
      tdfFile = new File(files[0]);
    }
    if (files.length > 1) {
      if (files[1].endsWith("wig")) {
        wigFile = new File(files[1]);
      } else if (files[1].endsWith("tdf")) {
        tdfFile = new File(files[1]);
      }
    }

    if (tdfFile != null && !tdfFile.getName().endsWith(".tdf")) {
      tdfFile = new File(tdfFile.getAbsolutePath() + ".tdf");
    }

    Preprocessor p = new Preprocessor(tdfFile, genome, windowFunctions, -1, null);
    // p.count(ifile, windowSizeValue, extFactorValue, maxZoomValue, wigFile, coverageOpt,
    // trackLine);
    p.count(
        ifile,
        windowSizeValue,
        extFactorValue,
        maxZoomValue,
        wigFile,
        trackLine,
        queryString,
        minMapQuality,
        countFlags);

    p.finish();

    System.out.flush();
  }
Beispiel #8
0
  public void toTDF(
      String typeString,
      String ifile,
      String ofile,
      String probeFile,
      String genomeId,
      int maxZoomValue,
      Collection<WindowFunction> windowFunctions,
      String tmpDirName,
      int maxRecords)
      throws IOException, PreprocessingException {

    if (!ifile.endsWith(".affective.csv")) validateIsTilable(typeString);

    System.out.println("toTDF.  File = " + ifile);
    System.out.println("Max zoom = " + maxZoomValue);
    if (probeFile != null && probeFile.trim().length() > 0) {
      System.out.println("Probe file = " + probeFile);
    }
    System.out.print("Window functions: ");
    for (WindowFunction wf : windowFunctions) {
      System.out.print(wf.toString() + " ");
    }
    System.out.println();

    boolean isGCT = isGCT(typeString);
    Genome genome = loadGenome(genomeId, isGCT);
    if (genome == null) {
      throw new PreprocessingException("Genome could not be loaded: " + genomeId);
    }
    File inputFileOrDir = new File(ifile);

    // Estimae the total number of lines to be parsed, for progress updates
    int nLines = estimateLineCount(inputFileOrDir);

    // TODO -- move this block of code out of here, this should be done before calling this method
    // Convert  gct files to igv format first
    File deleteme = null;
    if (isGCT(typeString)) {
      File tmpDir = null;
      if (tmpDirName != null && tmpDirName.length() > 0) {
        tmpDir = new File(tmpDirName);
        if (!tmpDir.exists() || !tmpDir.isDirectory()) {
          throw new PreprocessingException(
              "Specified tmp directory does not exist or is not directory: " + tmpDirName);
        }
      } else {
        tmpDir = new File(System.getProperty("java.io.tmpdir"), System.getProperty("user.name"));
      }
      if (!tmpDir.exists()) {
        tmpDir.mkdir();
      }

      String baseName = (new File(ifile)).getName();
      File igvFile = new File(tmpDir, baseName + ".igv");
      igvFile.deleteOnExit();
      doGCTtoIGV(typeString, ifile, igvFile, probeFile, maxRecords, tmpDirName, genome);

      inputFileOrDir = igvFile;
      deleteme = igvFile;
      typeString = ".igv";
    }

    // Convert to tdf
    File outputFile = new File(ofile);
    try {
      Preprocessor p = new Preprocessor(outputFile, genome, windowFunctions, nLines, null);
      if (inputFileOrDir.isDirectory() || inputFileOrDir.getName().endsWith(".list")) {
        List<File> files = getFilesFromDirOrList(inputFileOrDir);
        for (File f : files) {
          p.preprocess(f, maxZoomValue, typeString);
        }
      } else {
        p.preprocess(inputFileOrDir, maxZoomValue, typeString);
      }
      p.finish();
    } catch (IOException e) {
      e.printStackTrace();
      // Delete output file as its probably corrupt
      if (outputFile.exists()) {
        outputFile.delete();
      }
    } finally {
      if (deleteme != null && deleteme.exists()) {
        deleteme.delete();
      }
    }

    System.out.flush();
  }
Beispiel #9
0
  void run(String[] argv) {

    if (argv.length == 0) {
      System.out.println(usageString());
      System.out.println("Error: No arguments provided");
      return;
    }

    String command = argv[0].toLowerCase();

    if (command.equals(CMD_HELP)) {
      if (argv.length > 1) {
        System.out.println(usageString(argv[1]));
      } else {
        System.out.println(usageString());
      }
      return;
    }

    if (command.equals(CMD_GUI)) {
      launchGUI();
      Runtime.getRuntime().halt(0);
    }

    // Do "version" now, its the only command with no arguments
    if (command.equals(CMD_VERSION)) {
      System.out.println(getVersionString());
      return;
    }

    CmdLineParser parser = initParser(command);

    // Parse optional arguments (switches, etc)
    try {
      parser.parse(argv);
    } catch (CmdLineParser.OptionException e) {
      System.err.println(e.getMessage());
      System.out.println("Enter igvtools help " + command + " for help on this command");
      return;
    }

    String tmpDirName = null;
    if (tmpDirOption != null) {
      tmpDirName = (String) parser.getOptionValue(tmpDirOption, null);
    }
    int maxRecords = MAX_RECORDS_IN_RAM;
    if (maxRecordsOption != null) {
      maxRecords = (Integer) parser.getOptionValue(maxRecordsOption, MAX_RECORDS_IN_RAM);
    }
    String[] nonOptionArgs = parser.getRemainingArgs();

    try {
      String basic_syntax =
          "Error in syntax. Enter igvtools help " + command + " for usage instructions.";

      // All remaining commands require an input file, and most need the file extension.  Do that
      // here.
      validateArgsLength(nonOptionArgs, 2, "Error: No input file provided");
      String ifile = nonOptionArgs[1];

      boolean isList = ifile.indexOf(",") > 0;
      if (!isList && !FileUtils.resourceExists(ifile)) {
        throw new PreprocessingException("File not found: " + ifile);
      }

      String typeString = null;
      if (typeOption != null) {
        typeString = (String) parser.getOptionValue(typeOption);
      }
      if (typeString == null || typeString.length() == 0) {
        typeString = Preprocessor.getExtension(ifile).toLowerCase();
      } else {
        typeString = typeString.toLowerCase();
      }

      if (command.equals(CMD_COUNT) || command.equals(CMD_TILE) || command.equals(CMD_TOTDF)) {
        // Parse out options common to both count and tile
        validateArgsLength(nonOptionArgs, 4, basic_syntax);
        int maxZoomValue = (Integer) parser.getOptionValue(maxZoomOption, MAX_ZOOM);
        String ofile = nonOptionArgs[2];
        String genomeId = nonOptionArgs[3];

        boolean isGCT = typeString.endsWith("gct") || typeString.equals("mage-tab");
        String wfsString = (String) parser.getOptionValue(windowFunctions);
        Collection<WindowFunction> wfList = parseWFS(wfsString, isGCT);

        if (command.equals(CMD_COUNT)) {

          String trackLine = null;
          String color = (String) parser.getOptionValue(colorOption);

          if (color != null) {
            trackLine = "track color=\"" + color + "\"";
          }

          int extFactorValue = (Integer) parser.getOptionValue(extFactorOption, EXT_FACTOR);

          int countFlags = parseCountFlags(parser);
          String queryString = (String) parser.getOptionValue(queryStringOpt);
          int minMapQuality = (Integer) parser.getOptionValue(minMapQualityOpt, 0);

          int windowSizeValue = (Integer) parser.getOptionValue(windowSizeOption, WINDOW_SIZE);
          doCount(
              ifile,
              ofile,
              genomeId,
              maxZoomValue,
              wfList,
              windowSizeValue,
              extFactorValue,
              trackLine,
              queryString,
              minMapQuality,
              countFlags);
        } else {
          String probeFile = (String) parser.getOptionValue(probeFileOption, PROBE_FILE);
          toTDF(
              typeString,
              ifile,
              ofile,
              probeFile,
              genomeId,
              maxZoomValue,
              wfList,
              tmpDirName,
              maxRecords);
        }

      } else if (command.equals(CMD_SORT)) {
        validateArgsLength(nonOptionArgs, 3, basic_syntax);
        String ofile = nonOptionArgs[2];
        doSort(ifile, ofile, tmpDirName, maxRecords);
      } else if (command.equals(CMD_INDEX)) {
        int indexType = (Integer) parser.getOptionValue(indexTypeOption, LINEAR_INDEX);
        int defaultBinSize = indexType == LINEAR_INDEX ? LINEAR_BIN_SIZE : INTERVAL_SIZE;
        int binSize = (Integer) parser.getOptionValue(binSizeOption, defaultBinSize);
        String outputDir = (String) parser.getOptionValue(outputDirOption, null);
        doIndex(ifile, typeString, outputDir, indexType, binSize);
      } else if (command.equals(CMD_FORMATEXP)) {
        validateArgsLength(nonOptionArgs, 3, basic_syntax);
        File inputFile = new File(nonOptionArgs[1]);
        File outputFile = new File(nonOptionArgs[2]);
        (new ExpressionFormatter()).convert(inputFile, outputFile);
      } else if (command.equals("wibtowig")) {
        validateArgsLength(
            nonOptionArgs,
            4,
            "Error in syntax. Expected: " + command + " [options] txtfile wibfile wigfile");
        File txtFile = new File(nonOptionArgs[1]);
        File wibFile = new File(nonOptionArgs[2]);
        File wigFile = new File(nonOptionArgs[3]);
        String trackLine = nonOptionArgs.length > 4 ? nonOptionArgs[4] : null;
        doWIBtoWIG(txtFile, wibFile, wigFile, trackLine);
      } else if (command.equals("splitgff")) {
        validateArgsLength(
            nonOptionArgs,
            3,
            "Error in syntax. Expected: " + command + " [options] inputfile outputdir");
        String outputDirectory = nonOptionArgs[2];
        GFFParser.splitFileByType(ifile, outputDirectory);
      } else if (command.toLowerCase().equals("gcttoigv")) {
        validateArgsLength(nonOptionArgs, 4, basic_syntax + " genomeId");
        String ofile = nonOptionArgs[2];
        // Output files must have .igv extension
        if (!ofile.endsWith(".igv")) {
          ofile = ofile + ".igv";
        }
        String genomeId = nonOptionArgs[3];
        Genome genome = loadGenome(genomeId, true);
        if (genome == null) {
          throw new PreprocessingException("Genome could not be loaded: " + genomeId);
        }
        String probeFile = (String) parser.getOptionValue(probeFileOption, PROBE_FILE);
        doGCTtoIGV(typeString, ifile, new File(ofile), probeFile, maxRecords, tmpDirName, genome);
      } else if (command.toLowerCase().equals("tdftobedgraph")) {
        validateArgsLength(nonOptionArgs, 3, basic_syntax);
        String ofile = nonOptionArgs[2];
        TDFUtils.tdfToBedgraph(ifile, ofile);
      } else if (command.equals("wigtobed")) {
        validateArgsLength(
            nonOptionArgs, 2, "Error in syntax. Expected: " + command + " [options] inputfile");
        String inputFile = nonOptionArgs[1];
        float hetThreshold = 0.17f;
        if (nonOptionArgs.length > 2) {
          hetThreshold = Float.parseFloat(nonOptionArgs[2]);
        }
        float homThreshold = 0.55f;
        if (nonOptionArgs.length > 3) {
          homThreshold = Float.parseFloat(nonOptionArgs[3]);
        }
        WigToBed.run(inputFile, hetThreshold, homThreshold);
      } else if (command.equals("vcftobed")) {
        validateArgsLength(nonOptionArgs, 3, basic_syntax);
        String inputFile = nonOptionArgs[1];
        String outputFile = nonOptionArgs[2];
        VCFtoBed.convert(inputFile, outputFile);
      } else if (command.equals("sumwigs")) {
        sumWigs(nonOptionArgs[1], nonOptionArgs[2]);
      } else if (command.equals("densitiestobedgraph")) {
        validateArgsLength(
            nonOptionArgs,
            3,
            "Error in syntax. Expected: " + command + " [options] inputdir outputdir");
        File inputDir = new File(nonOptionArgs[1]);
        File outputDir = new File(nonOptionArgs[2]);
        if (inputDir.isDirectory() && outputDir.isDirectory()) {
          DensitiesToBedGraph.convert(inputDir, outputDir);
        } else if (inputDir.isFile() && outputDir.isFile()) {
          DensitiesToBedGraph.convert(inputDir, outputDir);
        }

      } else if (command.equals(CMD_BAMTOBED)) {
        validateArgsLength(nonOptionArgs, 3, basic_syntax);
        String ofile = nonOptionArgs[2];
        Boolean pairOption = (Boolean) parser.getOptionValue(pairedCoverageOpt, false);
        BamToBed.convert(new File(ifile), new File(ofile), pairOption);
      } else if (command.equalsIgnoreCase("genGenomeList")) {
        // Generate a genomes.txt list file based on a directory
        // TODO Probably a better place for this. Users won't generally use it
        File inDir = new File(ifile);
        GenomeManager manager = GenomeManager.getInstance();
        manager.generateGenomeList(inDir, nonOptionArgs[2], nonOptionArgs[3]);
      } else {
        throw new PreprocessingException("Unknown command: " + argv[EXT_FACTOR]);
      }
    } catch (PreprocessingException e) {
      System.err.println(e.getMessage());
    } catch (IOException e) {
      throw new PreprocessingException("Unexpected IO error: ", e);
    }
  }
 public void preprocessor(Preprocessor preprocessor) {
   // this may change in future to external class to deal with dynamic
   // imports
   javaImport(rubyRuntime, preprocessor.getClass());
   this.asciidoctorModule.preprocessor(preprocessor);
 }
  public static void main(String[] argv)
      throws IOException, CmdLineParser.UnknownOptionException,
          CmdLineParser.IllegalOptionValueException {

    if (argv.length < 4) {
      System.out.println("Usage: hictools pre <options> <inputFile> <outputFile> <genomeID>");
      System.out.println("  <options>: -d only calculate intra chromosome (diagonal) [false]");
      System.out.println(
          "           : -o calculate densities (observed/expected), write to file [false]");
      System.out.println("           : -t <int> only write cells with count above threshold t [0]");
      System.out.println(
          "           : -c <chromosome ID> only calculate map on specific chromosome");
      System.exit(0);
    }

    Globals.setHeadless(true);

    CommandLineParser parser = new CommandLineParser();
    parser.parse(argv);
    String[] args = parser.getRemainingArgs();

    if (args[0].equals("sort")) {
      AlignmentsSorter.sort(args[1], args[2], null);
    } else if (args[0].equals("pairsToBin")) {
      String ifile = args[1];
      String ofile = args[2];
      String genomeId = args[3];
      List<Chromosome> chromosomes = loadChromosomes(genomeId);
      AsciiToBinConverter.convert(ifile, ofile, chromosomes);
    } else if (args[0].equals("binToPairs")) {
      String ifile = args[1];
      String ofile = args[2];
      AsciiToBinConverter.convertBack(ifile, ofile);
    } else if (args[0].equals("printmatrix")) {
      if (args.length < 5) {
        System.err.println(
            "Usage: hictools printmatrix <observed/oe/pearson> hicFile chr1 chr2 binsize");
        System.exit(-1);
      }
      String type = args[1];
      String file = args[2];
      String chr1 = args[3];
      String chr2 = args[4];
      String binSizeSt = args[5];
      int binSize = 0;
      try {
        binSize = Integer.parseInt(binSizeSt);
      } catch (NumberFormatException e) {
        System.err.println("Integer expected.  Found: " + binSizeSt);
        System.exit(-1);
      }

      dumpMatrix(file, chr1, chr2, binSize, type);

    } else if (args[0].equals("eigenvector")) {
      if (args.length < 4) {
        System.err.println("Usage: hictools eigenvector hicFile chr binsize");
      }
      String file = args[1];
      String chr = args[2];
      String binSizeSt = args[3];
      int binSize = 0;
      try {
        binSize = Integer.parseInt(binSizeSt);
      } catch (NumberFormatException e) {
        System.err.println("Integer expected.  Found: " + binSizeSt);
        System.exit(-1);
      }
      calculateEigenvector(file, chr, binSize);
    } else if (args[0].equals("pre")) {
      String genomeId = "";
      try {
        genomeId = args[3];
      } catch (ArrayIndexOutOfBoundsException e) {
        System.err.println("No genome ID given");
        System.exit(0);
      }
      List<Chromosome> chromosomes = loadChromosomes(genomeId);

      long genomeLength = 0;
      for (Chromosome c : chromosomes) {
        if (c != null) genomeLength += c.getSize();
      }
      chromosomes.set(0, new Chromosome(0, "All", (int) (genomeLength / 1000)));

      String[] tokens = args[1].split(",");
      List<String> files = new ArrayList<String>(tokens.length);

      for (String f : tokens) {
        files.add(f);
      }

      Preprocessor preprocessor = new Preprocessor(new File(args[2]), chromosomes);

      preprocessor.setIncludedChromosomes(parser.getChromosomeOption());
      preprocessor.setCountThreshold(parser.getCountThresholdOption());
      preprocessor.setNumberOfThreads(parser.getThreadedOption());
      preprocessor.setDiagonalsOnly(parser.getDiagonalsOption());
      preprocessor.setLoadDensities(parser.getDensitiesOption());
      preprocessor.preprocess(files);
    }
  }
package antenna.preprocessor.v3;
Beispiel #13
0
 // @OverrideMustInvoke
 /* pp */ void init(Preprocessor pp) {
   setListener(pp.getListener());
   this.werror = pp.getWarnings().contains(Warning.ERROR);
 }