/**
   * Calcuate the number of files the gene to transcript relationships should be split to.
   *
   * @param gtr The gene to transcript relationships
   * @param transcriptIndex2FileIndex A map of transcript identifiers to file indices
   * @return The number of files to write
   */
  private int getNumberOfFiles(
      final GeneTranscriptRelationships gtr, final Int2IntMap transcriptIndex2FileIndex) {
    int numFiles = 0;
    for (int geneIndex = 0; geneIndex < gtr.getNumberOfGenes(); geneIndex++) {
      final IntSet transcriptIndices = gtr.getTranscriptSet(geneIndex);
      int fileNum = 0;
      for (final int transcriptIndex : transcriptIndices) {
        if (transcriptIndex2FileIndex.get(transcriptIndex) != -1) {
          LOG.warn("Skipping repeated transcriptIndex: " + transcriptIndex);
          continue;
        }

        numFiles = Math.max(fileNum, numFiles);
        fileNum++;
      }
    }
    return ++numFiles;
  }
  /**
   * Perform the split transcripts mode.
   *
   * @throws IOException error reading / writing
   */
  @Override
  public void execute() throws IOException {
    // Load the gene to transcripts file
    if (!config.validate()) {
      throw new IOException("Invalid SplitTranscripts configuration");
    }
    final GeneTranscriptRelationships gtr = new GeneTranscriptRelationships();
    final IndexedIdentifier transcriptIdents = new IndexedIdentifier();
    final Int2ObjectMap<MutableString> transcriptIndexToIdMap =
        new Int2ObjectOpenHashMap<MutableString>();
    final List<FastXEntry> fastxEntries = new LinkedList<FastXEntry>();
    //
    // Pass through the file once to collect the transcript - gene relationships
    //
    int entryCount = 0;
    try {
      for (final FastXEntry entry : new FastXReader(config.getInputFile())) {
        entryCount++;
        parseHeader(entry.getEntryHeader());
        final MutableString transcriptId = transcriptHeader.get("transcriptId");
        final MutableString geneId = transcriptHeader.get("geneId");

        final int transcriptIndex = transcriptIdents.registerIdentifier(transcriptId);
        gtr.addRelationship(geneId, transcriptIndex);

        transcriptIndexToIdMap.put(transcriptIndex, transcriptId);

        fastxEntries.add(entry.clone());
      }
    } catch (CloneNotSupportedException e) {
      LOG.error("Couldn't clone for some reason", e);
      throw new GobyRuntimeException("Couldn't clone for some reason", e);
    }

    LOG.info("Loading map of genes-transcripts complete.");

    //
    // Scan through the transcript-gene relationships to determine which
    // transcript id goes into which file
    //
    final Int2IntMap transcriptIndex2FileIndex = new Int2IntOpenHashMap();
    final String configOutputFilename = config.getOutputBase() + ".config";
    final String configOutputPath = FilenameUtils.getFullPath(configOutputFilename);
    if (StringUtils.isNotBlank(configOutputPath)) {
      LOG.info("Creating output directory: " + configOutputPath);
      FileUtils.forceMkdir(new File(configOutputPath));
    }

    PrintWriter configOutput = null;
    try {
      configOutput = new PrintWriter(configOutputFilename);
      configOutput.println("Ensembl Gene ID\tEnsembl Transcript ID");

      final Int2IntMap fileIndex2NumberOfEntries = new Int2IntOpenHashMap();
      fileIndex2NumberOfEntries.defaultReturnValue(0);
      transcriptIndex2FileIndex.defaultReturnValue(-1);

      final int initialNumberOfFiles = getNumberOfFiles(gtr, transcriptIndex2FileIndex);

      for (int geneIndex = 0; geneIndex < gtr.getNumberOfGenes(); geneIndex++) {
        final MutableString geneId = gtr.getGeneId(geneIndex);
        final IntSet transcriptIndices = gtr.getTranscriptSet(geneIndex);
        int fileNum = 0;

        for (final int transcriptIndex : transcriptIndices) {
          if (transcriptIndex2FileIndex.get(transcriptIndex) != -1) {
            LOG.warn("Skipping repeated transcriptIndex: " + transcriptIndex);
            continue;
          }
          final int maxEntriesPerFile = config.getMaxEntriesPerFile();
          final int numberOfEntriesInOriginalBucket = fileIndex2NumberOfEntries.get(fileNum);
          final int adjustedFileIndex =
              fileNum
                  + initialNumberOfFiles * (numberOfEntriesInOriginalBucket / maxEntriesPerFile);

          transcriptIndex2FileIndex.put(transcriptIndex, adjustedFileIndex);
          fileIndex2NumberOfEntries.put(fileNum, fileIndex2NumberOfEntries.get(fileNum) + 1);
          final MutableString transcriptId = transcriptIndexToIdMap.get(transcriptIndex);
          configOutput.printf("%s\t%s%n", geneId, transcriptId);

          fileNum++;
        }
      }
    } finally {
      IOUtils.closeQuietly(configOutput);
    }

    final int numFiles = getFileIndices(transcriptIndex2FileIndex).size();
    if (LOG.isInfoEnabled()) {
      LOG.info(
          NumberFormat.getInstance().format(entryCount)
              + " entries will be written to "
              + numFiles
              + " files");
      final int maxEntriesPerFile = config.getMaxEntriesPerFile();
      if (maxEntriesPerFile < Integer.MAX_VALUE) {
        LOG.info("Each file will contain at most " + maxEntriesPerFile + " entries");
      }
    }

    // formatter for uniquely numbering files each with the same number of digits
    final NumberFormat fileNumberFormatter = getNumberFormatter(numFiles - 1);

    final ProgressLogger progressLogger = new ProgressLogger();
    progressLogger.expectedUpdates = entryCount;
    progressLogger.itemsName = "entries";
    progressLogger.start();

    // Write each file one at a time rather than in the order they appear in the input file
    // to avoid the issue of having too many streams open at the same or continually opening
    // and closing streams which is quite costly.  We could store the gene/transcripts in
    // memory and then just write the files at the end but that could be worse.
    for (final int fileIndex : getFileIndices(transcriptIndex2FileIndex)) {
      final String filename =
          config.getOutputBase() + "." + fileNumberFormatter.format(fileIndex) + ".fa.gz";
      PrintStream printStream = null;
      try {
        // each file is compressed
        printStream = new PrintStream(new GZIPOutputStream(new FileOutputStream(filename)));

        //
        // Read through the input file get the actual sequence information
        //
        final Iterator<FastXEntry> entries = fastxEntries.iterator();
        while (entries.hasNext()) {
          final FastXEntry entry = entries.next();
          parseHeader(entry.getEntryHeader());
          final MutableString transcriptId = transcriptHeader.get("transcriptId");
          final MutableString geneId = transcriptHeader.get("geneId");
          final int transcriptIndex = transcriptIdents.getInt(transcriptId);
          final int transcriptFileIndex = transcriptIndex2FileIndex.get(transcriptIndex);
          if (transcriptFileIndex == fileIndex) {
            printStream.print(entry.getHeaderSymbol());
            printStream.print(transcriptId);
            printStream.print(" gene:");
            printStream.println(geneId);
            printStream.println(entry.getEntrySansHeader());
            entries.remove();
            progressLogger.lightUpdate();
          }
        }
      } finally {
        IOUtils.closeQuietly(printStream);
      }
    }

    assert progressLogger.count == entryCount : "Some entries were not processed!";
    progressLogger.done();
  }