/**
   * Determines the dimensions of the merged matrix, and creates a RandomAccessFile on disk large
   * enough to hold the data.
   */
  public static final void createMergeOutfile(String[] infiles, String outfile) {
    TagCountMutable theTCM =
        new TagCountMutable(
            2, maxTags); // 50,000,000 works on a Windows7 machine with 4GB of RAM - JCG.
    myLogger.info("Mutable Tag Count Created.  MaxTags:" + theTCM.getSize());
    myLogger.info("CurrentSize:" + theTCM.getCurrentSize());
    TreeSet<String> allTaxa = new TreeSet<>();
    int filesRead = 0;
    for (String inName : infiles) {
      filesRead++;
      myLogger.info("Scanning " + inName + " (file " + filesRead + " of " + infiles.length + ").");
      TagsByTaxa tbtFM = newTBT(inName);

      if (combineSynonymousTaxa) {
        tbtFM.truncateTaxonNames();
      }
      theTCM.addReadCounts(tbtFM, 1);
      for (String name : tbtFM.getTaxaNames()) {
        allTaxa.add(name);
      }
      myLogger.info("CurrentSize:" + theTCM.getCurrentSize());
      myLogger.info("Current Taxa:" + allTaxa.size());
      theTCM.collapseCounts();
      myLogger.info("CurrentSize:" + theTCM.getCurrentSize());
      myLogger.info("Size:" + theTCM.getSize());
    }
    theTCM.shrinkToCurrentRows();
    myLogger.info("Size:" + theTCM.getSize());
    String[] tn = allTaxa.toArray(new String[0]);
    TagsByTaxa tbtOut = newTBT(outfile, tn, theTCM);
  }
Пример #2
0
 // This constructor works for tbt file
 public void getClusters(TagsByTaxa tbt) {
   ArrayList<cluster> clList = new ArrayList();
   TagMatchFinder tmf = new TagMatchFinder(tbt);
   for (int i = 0; i < tbt.getTagCount(); i++) {
     long[] qTag = tbt.getTag(i);
     TreeMap<Integer, Integer> hitDiv = tmf.findMatchesWithIntLengthWords(qTag, 1, false);
     for (Entry<Integer, Integer> each : hitDiv.entrySet()) {
       if (each.getValue() > 0) {
         clList.add(new cluster(i, each.getKey(), hitDiv.size(), true));
       }
     }
   }
   cls = clList.toArray(new cluster[clList.size()]);
   // Arrays.sort(cls);
 }
 /** Inserts tag count values into the RandomAccessFile created by @link{createMergeOutfile}. */
 public static final void fillMergeOutfile(String[] infiles, String outfile) {
   TagsByTaxa tbtOut = newTBT(outfile);
   tbtOut.setMethodByRows(true);
   int count = 0;
   int filesRead = 0;
   for (String inName : infiles) {
     filesRead++;
     myLogger.info("Scanning " + inName + " (file " + filesRead + " of " + infiles.length + ").");
     TagsByTaxa tbtFM = newTBT(inName);
     if (combineSynonymousTaxa) {
       tbtFM.truncateTaxonNames();
     }
     int[] theTR = taxaRedirect(tbtFM.getTaxaNames(), tbtOut.getTaxaNames());
     for (int i = 0; i < tbtFM.getTagCount(); i++) {
       int toTag = tbtOut.getTagIndex(tbtFM.getTag(i));
       if (toTag < 0) {
         continue;
       }
       for (int t = 0; t < tbtFM.getTaxaCount(); t++) {
         if (theTR[t] < 0) {
           continue;
         }
         int tagCount =
             tbtOut.getReadCountForTagTaxon(toTag, theTR[t]) + tbtFM.getReadCountForTagTaxon(i, t);
         if (tagCount > 0) {
           tbtOut.setReadCountForTagTaxon(toTag, theTR[t], tagCount);
           count++;
         }
       }
       if (count % 100000 == 0) {
         System.out.printf("Tag:%d BitSet:%d %n", i, count);
       }
     }
   }
   tbtOut.getFileReadyForClosing();
 }