public int encode(final CharSequence s, final OutputStream os) { final ByteCountOutputStream bcos = new ByteCountOutputStream(os); // Wrap with Writer using encoder. final OutputStreamWriter w = new OutputStreamWriter(bcos, cs); try { if (s instanceof MutableString) { // Efficient: tunnels to the backing char[]. final MutableString t = (MutableString) s; w.write(t.array(), 0, t.length()); } else if (s instanceof String) { w.write((String) s); } else { // TODO optimize for CharBuffer, StringBuilder w.write(s.toString()); } w.flush(); w.close(); return bcos.getNWritten(); } catch (IOException ex) { throw new RuntimeException(ex); } }
/** * Searches for toFind in sequence. It is assumed toFind will be at the end of sequence and that * the beginning of toFind is the most important part. * * @param sequence the sequence to search * @param toFind the string to find * @param barcodeIndex the index of the barcode within all of the barcodes * @param minMatchLength the minimum match length to consider * @return returns the number of differences between sequence and toFind */ @Override BarcodeMatcherResult bestMatch( final MutableString sequence, final MutableString toFind, final int barcodeIndex, final int minMatchLength) { final OverlapResult overlapResult = overlapPortion(sequence, toFind); int leastNumDiffs = Integer.MAX_VALUE; int leastNumDiffsBarcodeStartPos = 0; int pos = 0; for (int matchLength = overlapResult.length; matchLength >= minMatchLength; matchLength--, pos++) { final int numDiffs = numDifferences(sequence, toFind, overlapResult.start + pos, 0, matchLength); if (numDiffs <= leastNumDiffs) { leastNumDiffs = numDiffs; leastNumDiffsBarcodeStartPos = sequence.length() - matchLength; if (leastNumDiffs == 0) { break; } } } final int preMatchSequenceLength = sequence.length(); final int barcodeMatchedLength = preMatchSequenceLength - leastNumDiffsBarcodeStartPos; final int actualSequenceLength = preMatchSequenceLength - barcodeMatchedLength; return new BarcodeMatcherResult( barcodeIndex, leastNumDiffs, 0, actualSequenceLength, leastNumDiffsBarcodeStartPos, barcodeMatchedLength); }
/** * Extract the transcript and gene ids from the given string. * * @param header The string to extract the information from. Generally speaking this is the * comment line from a FASTA entry (without the ">" character) */ private void parseHeader(final MutableString header) { final int endOfTranscriptId = header.indexOf(' '); transcriptHeader.put("transcriptId", header.substring(0, endOfTranscriptId)); final int startOfGeneId = header.lastIndexOf(' '); transcriptHeader.put("geneId", header.substring(startOfGeneId + 6)); }
/** * Determine the overlap portion of the two strings given their lengths. * * @param sequence the string we are searching * @param toFind the string we are looking for (at the end of search) * @return OverlapResult which specifies start and length */ OverlapResult overlapPortion(final MutableString sequence, final MutableString toFind) { final OverlapResult overlapResult = new OverlapResult(); final int sequenceLength = sequence.length(); final int toFindLength = toFind.length(); if (sequenceLength >= toFindLength) { overlapResult.start = (sequenceLength - toFindLength); overlapResult.length = toFindLength; } else if (sequenceLength < toFindLength) { overlapResult.start = 0; overlapResult.length = sequenceLength; } return overlapResult; }
protected void ensureParsed_(Iterator<Relation> relations) throws IOException { objects.clear(); predicates.clear(); contexts.clear(); subjectTokens.clear(); // Index subject tokens // We index the BNode id. Do we need it? String subject = getSubject(); FastBufferedReader fbr; // remove http/https or _: int startAt = subject.indexOf(':'); if (startAt < 0) { fbr = new FastBufferedReader(subject.toCharArray()); } else { startAt++; fbr = new FastBufferedReader(subject.toCharArray(), startAt, subject.length() - startAt); } MutableString word = new MutableString(); MutableString nonWord = new MutableString(); while (fbr.next(word, nonWord)) { if (word != null && !word.equals("")) { if (CombinedTermProcessor.getInstance().processTerm(word)) { subjectTokens.add(word.toString().toLowerCase()); } } } fbr.close(); while (relations.hasNext()) { Relation relation = relations.next(); String predicate = relation.getPredicate().toString(); // Check if prefix is on blacklist if (RDFDocumentFactory.isOnPredicateBlacklist(predicate.toLowerCase())) { factory.incrementCounter(RdfCounters.BLACKLISTED_TRIPLES, 1); continue; } String predicateId = factory.lookupResource(predicate, false); if (predicateId == null) { throw new IllegalStateException( "Predicate " + predicate + " not in resources hash function!"); } String contextId = NO_CONTEXT; if (factory.isWithContexts() && relation.getContext() != null) { if (relation.getContext() instanceof Resource) { contextId = factory.lookupResource(relation.getContext().toString(), false); if (contextId == null) { throw new IllegalStateException( "Context " + relation.getContext() + " not in resources hash function!"); } } else { throw new IllegalStateException( "Context " + relation.getContext() + " is not a Resource."); } } if (relation.getObject() instanceof Resource) { if (predicate.equals(RDF.TYPE.toString())) { factory.incrementCounter(RdfCounters.RDF_TYPE_TRIPLES, 1); objects.add(relation.getObject().toString()); } else { String objectId = factory.lookupResource(relation.getObject().toString(), true); if (objectId == null) { throw new IllegalStateException( "Object " + relation.getObject() + " not in resources hash function!"); } objects.add(objectId); } predicates.add(predicateId); contexts.add(contextId); } else if (relation.getObject() instanceof BNode) { String objectId = factory.lookupResource(relation.getObject().toString(), false); if (objectId == null) { throw new IllegalStateException( "Object " + relation.getObject() + " not in resources hash function!"); } objects.add(objectId); predicates.add(predicateId); contexts.add(contextId); } else { String object = relation.getObject().toString(); // Iterate over the words of the value fbr = new FastBufferedReader(object.toCharArray()); while (fbr.next(word, nonWord)) { if (word != null && !word.equals("")) { if (CombinedTermProcessor.getInstance().processTerm(word)) { // Lowercase terms objects.add(word.toString()); // Preserve casing for properties and // contexts predicates.add(predicateId); contexts.add(contextId); } } } fbr.close(); } factory.incrementCounter(RdfCounters.INDEXED_TRIPLES, 1); } }
public String getValueString(double position, WindowFunction windowFunction) { // //LOG.info("getValueString"); MutableString buffer = new MutableString(); buffer.append(entry.toString()); buffer.replace("\n", "<br>"); if (this.isPaired()) { buffer.append("----------------------" + "<br>"); buffer.append("Pair start = " + getMate().positionString() + "<br>"); buffer.append("Pair is mapped = " + (getMate().isMapped() ? "yes" : "no") + "<br>"); // buf.append("Pair is proper = " + (getProperPairFlag() ? "yes" : "no") + "<br>"); if (getChr().equals(getMate().getChr())) { buffer.append("Insert size = " + getInferredInsertSize() + "<br>"); } if (getPairOrientation().length() > 0) { buffer.append("Pair orientation = " + getPairOrientation() + "<br>"); } if (isFirstOfPair()) { buffer.append("First of pair <br>"); } if (isSecondOfPair()) { buffer.append("Second of pair <br>"); } } return buffer.toString(); }