예제 #1
0
  public int encode(final CharSequence s, final OutputStream os) {

    final ByteCountOutputStream bcos = new ByteCountOutputStream(os);

    // Wrap with Writer using encoder.
    final OutputStreamWriter w = new OutputStreamWriter(bcos, cs);

    try {

      if (s instanceof MutableString) {
        // Efficient: tunnels to the backing char[].
        final MutableString t = (MutableString) s;
        w.write(t.array(), 0, t.length());
      } else if (s instanceof String) {
        w.write((String) s);
      } else {
        // TODO optimize for CharBuffer, StringBuilder
        w.write(s.toString());
      }

      w.flush();

      w.close();

      return bcos.getNWritten();

    } catch (IOException ex) {

      throw new RuntimeException(ex);
    }
  }
  /**
   * Searches for toFind in sequence. It is assumed toFind will be at the end of sequence and that
   * the beginning of toFind is the most important part.
   *
   * @param sequence the sequence to search
   * @param toFind the string to find
   * @param barcodeIndex the index of the barcode within all of the barcodes
   * @param minMatchLength the minimum match length to consider
   * @return returns the number of differences between sequence and toFind
   */
  @Override
  BarcodeMatcherResult bestMatch(
      final MutableString sequence,
      final MutableString toFind,
      final int barcodeIndex,
      final int minMatchLength) {
    final OverlapResult overlapResult = overlapPortion(sequence, toFind);
    int leastNumDiffs = Integer.MAX_VALUE;
    int leastNumDiffsBarcodeStartPos = 0;
    int pos = 0;
    for (int matchLength = overlapResult.length;
        matchLength >= minMatchLength;
        matchLength--, pos++) {
      final int numDiffs =
          numDifferences(sequence, toFind, overlapResult.start + pos, 0, matchLength);
      if (numDiffs <= leastNumDiffs) {
        leastNumDiffs = numDiffs;
        leastNumDiffsBarcodeStartPos = sequence.length() - matchLength;
        if (leastNumDiffs == 0) {
          break;
        }
      }
    }
    final int preMatchSequenceLength = sequence.length();
    final int barcodeMatchedLength = preMatchSequenceLength - leastNumDiffsBarcodeStartPos;
    final int actualSequenceLength = preMatchSequenceLength - barcodeMatchedLength;

    return new BarcodeMatcherResult(
        barcodeIndex,
        leastNumDiffs,
        0,
        actualSequenceLength,
        leastNumDiffsBarcodeStartPos,
        barcodeMatchedLength);
  }
  /**
   * Extract the transcript and gene ids from the given string.
   *
   * @param header The string to extract the information from. Generally speaking this is the
   *     comment line from a FASTA entry (without the ">" character)
   */
  private void parseHeader(final MutableString header) {
    final int endOfTranscriptId = header.indexOf(' ');
    transcriptHeader.put("transcriptId", header.substring(0, endOfTranscriptId));

    final int startOfGeneId = header.lastIndexOf(' ');
    transcriptHeader.put("geneId", header.substring(startOfGeneId + 6));
  }
 /**
  * Determine the overlap portion of the two strings given their lengths.
  *
  * @param sequence the string we are searching
  * @param toFind the string we are looking for (at the end of search)
  * @return OverlapResult which specifies start and length
  */
 OverlapResult overlapPortion(final MutableString sequence, final MutableString toFind) {
   final OverlapResult overlapResult = new OverlapResult();
   final int sequenceLength = sequence.length();
   final int toFindLength = toFind.length();
   if (sequenceLength >= toFindLength) {
     overlapResult.start = (sequenceLength - toFindLength);
     overlapResult.length = toFindLength;
   } else if (sequenceLength < toFindLength) {
     overlapResult.start = 0;
     overlapResult.length = sequenceLength;
   }
   return overlapResult;
 }
예제 #5
0
  protected void ensureParsed_(Iterator<Relation> relations) throws IOException {
    objects.clear();
    predicates.clear();
    contexts.clear();
    subjectTokens.clear();

    // Index subject tokens
    // We index the BNode id. Do we need it?
    String subject = getSubject();
    FastBufferedReader fbr;
    // remove http/https or _:
    int startAt = subject.indexOf(':');

    if (startAt < 0) {
      fbr = new FastBufferedReader(subject.toCharArray());
    } else {
      startAt++;
      fbr = new FastBufferedReader(subject.toCharArray(), startAt, subject.length() - startAt);
    }
    MutableString word = new MutableString();
    MutableString nonWord = new MutableString();
    while (fbr.next(word, nonWord)) {
      if (word != null && !word.equals("")) {
        if (CombinedTermProcessor.getInstance().processTerm(word)) {
          subjectTokens.add(word.toString().toLowerCase());
        }
      }
    }
    fbr.close();

    while (relations.hasNext()) {
      Relation relation = relations.next();
      String predicate = relation.getPredicate().toString();

      // Check if prefix is on blacklist
      if (RDFDocumentFactory.isOnPredicateBlacklist(predicate.toLowerCase())) {
        factory.incrementCounter(RdfCounters.BLACKLISTED_TRIPLES, 1);
        continue;
      }

      String predicateId = factory.lookupResource(predicate, false);
      if (predicateId == null) {
        throw new IllegalStateException(
            "Predicate " + predicate + " not in resources hash function!");
      }

      String contextId = NO_CONTEXT;
      if (factory.isWithContexts() && relation.getContext() != null) {
        if (relation.getContext() instanceof Resource) {
          contextId = factory.lookupResource(relation.getContext().toString(), false);
          if (contextId == null) {
            throw new IllegalStateException(
                "Context " + relation.getContext() + " not in resources hash function!");
          }
        } else {
          throw new IllegalStateException(
              "Context " + relation.getContext() + " is not a Resource.");
        }
      }

      if (relation.getObject() instanceof Resource) {
        if (predicate.equals(RDF.TYPE.toString())) {
          factory.incrementCounter(RdfCounters.RDF_TYPE_TRIPLES, 1);
          objects.add(relation.getObject().toString());
        } else {
          String objectId = factory.lookupResource(relation.getObject().toString(), true);
          if (objectId == null) {
            throw new IllegalStateException(
                "Object " + relation.getObject() + " not in resources hash function!");
          }
          objects.add(objectId);
        }
        predicates.add(predicateId);
        contexts.add(contextId);
      } else if (relation.getObject() instanceof BNode) {
        String objectId = factory.lookupResource(relation.getObject().toString(), false);
        if (objectId == null) {
          throw new IllegalStateException(
              "Object " + relation.getObject() + " not in resources hash function!");
        }
        objects.add(objectId);
        predicates.add(predicateId);
        contexts.add(contextId);
      } else {
        String object = relation.getObject().toString();
        // Iterate over the words of the value
        fbr = new FastBufferedReader(object.toCharArray());
        while (fbr.next(word, nonWord)) {
          if (word != null && !word.equals("")) {
            if (CombinedTermProcessor.getInstance().processTerm(word)) {
              // Lowercase terms
              objects.add(word.toString());

              // Preserve casing for properties and
              // contexts
              predicates.add(predicateId);
              contexts.add(contextId);
            }
          }
        }
        fbr.close();
      }

      factory.incrementCounter(RdfCounters.INDEXED_TRIPLES, 1);
    }
  }
예제 #6
0
  public String getValueString(double position, WindowFunction windowFunction) {
    //  //LOG.info("getValueString");
    MutableString buffer = new MutableString();

    buffer.append(entry.toString());
    buffer.replace("\n", "<br>");

    if (this.isPaired()) {
      buffer.append("----------------------" + "<br>");
      buffer.append("Pair start = " + getMate().positionString() + "<br>");
      buffer.append("Pair is mapped = " + (getMate().isMapped() ? "yes" : "no") + "<br>");
      // buf.append("Pair is proper = " + (getProperPairFlag() ? "yes" : "no") + "<br>");
      if (getChr().equals(getMate().getChr())) {
        buffer.append("Insert size = " + getInferredInsertSize() + "<br>");
      }
      if (getPairOrientation().length() > 0) {
        buffer.append("Pair orientation = " + getPairOrientation() + "<br>");
      }
      if (isFirstOfPair()) {
        buffer.append("First of pair <br>");
      }
      if (isSecondOfPair()) {
        buffer.append("Second of pair <br>");
      }
    }
    return buffer.toString();
  }