Java UnicodeUtil.UTF8toUTF16 Examples, org.apache.lucene.util.UnicodeUtil.UTF8toUTF16 Java Examples

Example #1

0

Show file

File: SimpleTextFieldsReader.java Project: naryad/Solr4.0

 @Override
 public int nextDoc() throws IOException {
   if (docID == NO_MORE_DOCS) {
     return docID;
   }
   boolean first = true;
   int termFreq = 0;
   while (true) {
     final long lineStart = in.getFilePointer();
     SimpleTextUtil.readLine(in, scratch);
     if (StringHelper.startsWith(scratch, DOC)) {
       if (!first && (liveDocs == null || liveDocs.get(docID))) {
         in.seek(lineStart);
         if (!omitTF) {
           tf = termFreq;
         }
         return docID;
       }
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + DOC.length,
           scratch.length - DOC.length,
           scratchUTF16);
       docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
       termFreq = 0;
       first = false;
     } else if (StringHelper.startsWith(scratch, FREQ)) {
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + FREQ.length,
           scratch.length - FREQ.length,
           scratchUTF16);
       termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
     } else if (StringHelper.startsWith(scratch, POS)) {
       // skip termFreq++;
     } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
       // skip
     } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
       // skip
     } else if (StringHelper.startsWith(scratch, PAYLOAD)) {
       // skip
     } else {
       assert StringHelper.startsWith(scratch, TERM)
               || StringHelper.startsWith(scratch, FIELD)
               || StringHelper.startsWith(scratch, END)
           : "scratch=" + scratch.utf8ToString();
       if (!first && (liveDocs == null || liveDocs.get(docID))) {
         in.seek(lineStart);
         if (!omitTF) {
           tf = termFreq;
         }
         return docID;
       }
       return docID = NO_MORE_DOCS;
     }
   }
 }

Example #2

0

Show file

File: SimpleTextFieldsReader.java Project: naryad/Solr4.0

    @Override
    public int nextPosition() throws IOException {
      final int pos;
      if (readPositions) {
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch, POS) : "got line=" + scratch.utf8ToString();
        UnicodeUtil.UTF8toUTF16(
            scratch.bytes,
            scratch.offset + POS.length,
            scratch.length - POS.length,
            scratchUTF16_2);
        pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
      } else {
        pos = -1;
      }

      if (readOffsets) {
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch, START_OFFSET)
            : "got line=" + scratch.utf8ToString();
        UnicodeUtil.UTF8toUTF16(
            scratch.bytes,
            scratch.offset + START_OFFSET.length,
            scratch.length - START_OFFSET.length,
            scratchUTF16_2);
        startOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch, END_OFFSET) : "got line=" + scratch.utf8ToString();
        UnicodeUtil.UTF8toUTF16(
            scratch.bytes,
            scratch.offset + END_OFFSET.length,
            scratch.length - END_OFFSET.length,
            scratchUTF16_2);
        endOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
      }

      final long fp = in.getFilePointer();
      SimpleTextUtil.readLine(in, scratch);
      if (StringHelper.startsWith(scratch, PAYLOAD)) {
        final int len = scratch.length - PAYLOAD.length;
        if (scratch2.bytes.length < len) {
          scratch2.grow(len);
        }
        System.arraycopy(scratch.bytes, PAYLOAD.length, scratch2.bytes, 0, len);
        scratch2.length = len;
        payload = scratch2;
      } else {
        payload = null;
        in.seek(fp);
      }
      return pos;
    }

Example #3

0

Show file

File: SimpleTextFieldsReader.java Project: naryad/Solr4.0

 @Override
 public int nextDoc() throws IOException {
   boolean first = true;
   in.seek(nextDocStart);
   long posStart = 0;
   while (true) {
     final long lineStart = in.getFilePointer();
     SimpleTextUtil.readLine(in, scratch);
     // System.out.println("NEXT DOC: " + scratch.utf8ToString());
     if (StringHelper.startsWith(scratch, DOC)) {
       if (!first && (liveDocs == null || liveDocs.get(docID))) {
         nextDocStart = lineStart;
         in.seek(posStart);
         return docID;
       }
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + DOC.length,
           scratch.length - DOC.length,
           scratchUTF16);
       docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
       tf = 0;
       first = false;
     } else if (StringHelper.startsWith(scratch, FREQ)) {
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + FREQ.length,
           scratch.length - FREQ.length,
           scratchUTF16);
       tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
       posStart = in.getFilePointer();
     } else if (StringHelper.startsWith(scratch, POS)) {
       // skip
     } else if (StringHelper.startsWith(scratch, START_OFFSET)) {
       // skip
     } else if (StringHelper.startsWith(scratch, END_OFFSET)) {
       // skip
     } else if (StringHelper.startsWith(scratch, PAYLOAD)) {
       // skip
     } else {
       assert StringHelper.startsWith(scratch, TERM)
           || StringHelper.startsWith(scratch, FIELD)
           || StringHelper.startsWith(scratch, END);
       if (!first && (liveDocs == null || liveDocs.get(docID))) {
         nextDocStart = lineStart;
         in.seek(posStart);
         return docID;
       }
       return docID = NO_MORE_DOCS;
     }
   }
 }

Example #4

0

Show file

File: WFSTCompletionLookup.java Project: ByteInternet/lucene-solr

  @Override
  public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
    assert num > 0;
    BytesRef scratch = new BytesRef(key);
    int prefixLength = scratch.length;
    Arc<Long> arc = new Arc<Long>();

    // match the prefix portion exactly
    Long prefixOutput = null;
    try {
      prefixOutput = lookupPrefix(scratch, arc);
    } catch (IOException bogus) {
      throw new RuntimeException(bogus);
    }

    if (prefixOutput == null) {
      return Collections.<LookupResult>emptyList();
    }

    List<LookupResult> results = new ArrayList<LookupResult>(num);
    CharsRef spare = new CharsRef();
    if (exactFirst && arc.isFinal()) {
      spare.grow(scratch.length);
      UnicodeUtil.UTF8toUTF16(scratch, spare);
      results.add(
          new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
      if (--num == 0) {
        return results; // that was quick
      }
    }

    // complete top-N
    MinResult<Long> completions[] = null;
    try {
      completions = Util.shortestPaths(fst, arc, weightComparator, num);
    } catch (IOException bogus) {
      throw new RuntimeException(bogus);
    }

    BytesRef suffix = new BytesRef(8);
    for (MinResult<Long> completion : completions) {
      scratch.length = prefixLength;
      // append suffix
      Util.toBytesRef(completion.input, suffix);
      scratch.append(suffix);
      spare.grow(scratch.length);
      UnicodeUtil.UTF8toUTF16(scratch, spare);
      results.add(
          new LookupResult(spare.toString(), decodeWeight(prefixOutput + completion.output)));
    }
    return results;
  }

Example #5

0

Show file

File: DocTermsIndexDocValues.java Project: naryad/Solr4.0

 @Override
 public String strVal(int doc) {
   int ord = termsIndex.getOrd(doc);
   if (ord == 0) return null;
   termsIndex.lookup(ord, spare);
   UnicodeUtil.UTF8toUTF16(spare, spareChars);
   return spareChars.toString();
 }

Example #6

0

Show file

File: SortableDoubleField.java Project: bozydar/solr

 @Override
 public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
   // TODO: this could be more efficient, but the sortable types should be deprecated instead
   UnicodeUtil.UTF8toUTF16(input, charsRef);
   final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
   charsRef.copyChars(indexedToReadable, 0, indexedToReadable.length);
   return charsRef;
 }

Example #7

0

Show file

File: PagedBytesReference.java Project: prodigeni/elasticsearch

  @Override
  public String toUtf8() {
    if (length() == 0) {
      return "";
    }

    byte[] bytes = toBytes();
    final CharsRef ref = new CharsRef(length);
    UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref);
    return ref.toString();
  }

Example #8

0

Show file

File: FSTCompletionLookup.java Project: sdgdsffdsfff/bookcodes

  @Override
  public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
    final List<Completion> completions;
    if (higherWeightsFirst) {
      completions = higherWeightsCompletion.lookup(key, num);
    } else {
      completions = normalCompletion.lookup(key, num);
    }

    final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
    CharsRef spare = new CharsRef();
    for (Completion c : completions) {
      spare.grow(c.utf8.length);
      UnicodeUtil.UTF8toUTF16(c.utf8, spare);
      results.add(new LookupResult(spare.toString(), c.bucket));
    }
    return results;
  }

Example #9

0

Show file

File: TSTLookup.java Project: jarvisxiong/read-open-source-code

  @Override
  public void build(InputIterator tfit) throws IOException {
    if (tfit.hasPayloads()) {
      throw new IllegalArgumentException("this suggester doesn't support payloads");
    }
    root = new TernaryTreeNode();
    // buffer first
    if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
      // make sure it's sorted and the comparator uses UTF16 sort order
      tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
    }

    ArrayList<String> tokens = new ArrayList<String>();
    ArrayList<Number> vals = new ArrayList<Number>();
    BytesRef spare;
    CharsRef charsSpare = new CharsRef();
    while ((spare = tfit.next()) != null) {
      charsSpare.grow(spare.length);
      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
      tokens.add(charsSpare.toString());
      vals.add(Long.valueOf(tfit.weight()));
    }
    autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
  }

Example #10

0

Show file

File: TestRegexpRandom2.java Project: simplegeo/lucene-solr

 @Override
 protected AcceptStatus accept(BytesRef term) throws IOException {
   UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16);
   return runAutomaton.run(utf16.result, 0, utf16.length) ? AcceptStatus.YES : AcceptStatus.NO;
 }

Example #11

0

Show file

File: DateField.java Project: jarvisxiong/read-open-source-code

 @Override
 public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
   UnicodeUtil.UTF8toUTF16(input, charsRef);
   charsRef.append(Z_ARRAY, 0, 1);
   return charsRef;
 }

Example #12

0

Show file

File: SimpleTextFieldsReader.java Project: naryad/Solr4.0

 private void loadTerms() throws IOException {
   PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
   final Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>> b;
   final PairOutputs<Long, Long> outputsInner =
       new PairOutputs<Long, Long>(posIntOutputs, posIntOutputs);
   final PairOutputs<Long, PairOutputs.Pair<Long, Long>> outputs =
       new PairOutputs<Long, PairOutputs.Pair<Long, Long>>(posIntOutputs, outputsInner);
   b =
       new Builder<PairOutputs.Pair<Long, PairOutputs.Pair<Long, Long>>>(
           FST.INPUT_TYPE.BYTE1, outputs);
   IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
   in.seek(termsStart);
   final BytesRef lastTerm = new BytesRef(10);
   long lastDocsStart = -1;
   int docFreq = 0;
   long totalTermFreq = 0;
   OpenBitSet visitedDocs = new OpenBitSet();
   final IntsRef scratchIntsRef = new IntsRef();
   while (true) {
     SimpleTextUtil.readLine(in, scratch);
     if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
       if (lastDocsStart != -1) {
         b.add(
             Util.toIntsRef(lastTerm, scratchIntsRef),
             outputs.newPair(
                 lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq)));
         sumTotalTermFreq += totalTermFreq;
       }
       break;
     } else if (StringHelper.startsWith(scratch, DOC)) {
       docFreq++;
       sumDocFreq++;
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + DOC.length,
           scratch.length - DOC.length,
           scratchUTF16);
       int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
       visitedDocs.set(docID);
     } else if (StringHelper.startsWith(scratch, FREQ)) {
       UnicodeUtil.UTF8toUTF16(
           scratch.bytes,
           scratch.offset + FREQ.length,
           scratch.length - FREQ.length,
           scratchUTF16);
       totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
     } else if (StringHelper.startsWith(scratch, TERM)) {
       if (lastDocsStart != -1) {
         b.add(
             Util.toIntsRef(lastTerm, scratchIntsRef),
             outputs.newPair(
                 lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq)));
       }
       lastDocsStart = in.getFilePointer();
       final int len = scratch.length - TERM.length;
       if (len > lastTerm.length) {
         lastTerm.grow(len);
       }
       System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
       lastTerm.length = len;
       docFreq = 0;
       sumTotalTermFreq += totalTermFreq;
       totalTermFreq = 0;
       termCount++;
     }
   }
   docCount = (int) visitedDocs.cardinality();
   fst = b.finish();
   /*
   PrintStream ps = new PrintStream("out.dot");
   fst.toDot(ps);
   ps.close();
   System.out.println("SAVED out.dot");
   */
   // System.out.println("FST " + fst.sizeInBytes());
 }

Example #13

0

Show file

  @Override
  protected Suggest.Suggestion<
          ? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
      innerExecute(
          String name,
          CompletionSuggestionContext suggestionContext,
          IndexReader indexReader,
          CharsRef spare)
          throws IOException {
    if (suggestionContext.mapper() == null
        || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
      throw new ElasticsearchException(
          "Field [" + suggestionContext.getField() + "] is not a completion suggest field");
    }

    CompletionSuggestion completionSuggestion =
        new CompletionSuggestion(name, suggestionContext.getSize());
    UnicodeUtil.UTF8toUTF16(suggestionContext.getText(), spare);

    CompletionSuggestion.Entry completionSuggestEntry =
        new CompletionSuggestion.Entry(new StringText(spare.toString()), 0, spare.length());
    completionSuggestion.addTerm(completionSuggestEntry);

    String fieldName = suggestionContext.getField();
    Map<String, CompletionSuggestion.Entry.Option> results =
        Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize());
    for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
      AtomicReader atomicReader = atomicReaderContext.reader();
      Terms terms = atomicReader.fields().terms(fieldName);
      if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
        final Completion090PostingsFormat.CompletionTerms lookupTerms =
            (Completion090PostingsFormat.CompletionTerms) terms;
        final Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), suggestionContext);
        if (lookup == null) {
          // we don't have a lookup for this segment.. this might be possible if a merge dropped all
          // docs from the segment that had a value in this segment.
          continue;
        }
        List<Lookup.LookupResult> lookupResults =
            lookup.lookup(spare, false, suggestionContext.getSize());
        for (Lookup.LookupResult res : lookupResults) {

          final String key = res.key.toString();
          final float score = res.value;
          final Option value = results.get(key);
          if (value == null) {
            final Option option =
                new CompletionSuggestion.Entry.Option(
                    new StringText(key),
                    score,
                    res.payload == null ? null : new BytesArray(res.payload));
            results.put(key, option);
          } else if (value.getScore() < score) {
            value.setScore(score);
            value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
          }
        }
      }
    }
    final List<CompletionSuggestion.Entry.Option> options =
        new ArrayList<CompletionSuggestion.Entry.Option>(results.values());
    CollectionUtil.introSort(options, scoreComparator);

    int optionCount = Math.min(suggestionContext.getSize(), options.size());
    for (int i = 0; i < optionCount; i++) {
      completionSuggestEntry.addOption(options.get(i));
    }

    return completionSuggestion;
  }