@Override
  public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
    final List<Completion> completions;
    if (higherWeightsFirst) {
      completions = higherWeightsCompletion.lookup(key, num);
    } else {
      completions = normalCompletion.lookup(key, num);
    }

    final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size());
    CharsRef spare = new CharsRef();
    for (Completion c : completions) {
      spare.grow(c.utf8.length);
      UnicodeUtil.UTF8toUTF16(c.utf8, spare);
      results.add(new LookupResult(spare.toString(), c.bucket));
    }
    return results;
  }
  public static String convertString(String str) {

    try {
      Matcher m = numerosIndexacaoPattern.matcher(str);
      str = m.replaceAll("");
      str = new String(convertBytes(str.getBytes()));

      m = numerosIndexacaoPattern.matcher(str);
      str = m.replaceAll("");

      for (Matcher hasUnicodeChars = findUnicodeCharsPattern.matcher(str);
          hasUnicodeChars.find();
          hasUnicodeChars = findUnicodeCharsPattern.matcher(str)) {
        String uc = "0x" + hasUnicodeChars.group(1);
        str = hasUnicodeChars.replaceFirst(UnicodeUtil.append32(Integer.decode(uc)));
      }

      return str;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }
 public void bytes(String from) {
   size = 0;
   offset = 0;
   UnicodeUtil.UTF16toUTF8(from, 0, from.length(), this);
 }
Example #5
0
 public final int countLength(Object value) {
   // for string-derived types, length means number of XML characters.
   return UnicodeUtil.countLength((String) value);
 }