/** Build the example index. */
  private void index() throws IOException {
    IndexWriterConfig iwc =
        new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();
    taxoWriter.close();
  }
  /** Build the example index. */
  public void index() throws IOException {
    IndexWriter writer =
        new IndexWriter(
            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoublePoints (for bounding box/ranges) and as NumericDocValuesFields
    // (for scoring)
    Document doc = new Document();
    doc.add(new DoublePoint("latitude", 40.759011));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoublePoint("longitude", -73.9844722));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.718266));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoublePoint("longitude", -74.007819));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoublePoint("latitude", 40.7051157));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoublePoint("longitude", -74.0088305));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer));
    writer.close();
  }
  public static void main(String[] args) throws Exception {
    // setup Lucene to use an in-memory index
    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    IndexWriter writer = new IndexWriter(directory, iwc);

    // index a few documents
    writer.addDocument(createDocument("1", "foo bar baz"));
    writer.addDocument(createDocument("2", "red green blue"));
    writer.addDocument(createDocument("3", "The Lucene was made by Doug Cutting"));
    writer.close();

    IndexReader reader = DirectoryReader.open(directory);

    IndexSearcher searcher = new IndexSearcher(reader);

    Query query = new PrefixQuery(new Term(FIELD, "cut")); // 自动在结尾添加 *
    // display search results
    TopDocs topDocs = searcher.search(query, 10);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
      Document doc = searcher.doc(scoreDoc.doc);
      System.out.println(doc);
    }
  }
  /** Test that core cache key (needed for NRT) is working */
  public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    ShardId shardId = new ShardId("fake", "_na_", 1);
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId);
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(
        ir.leaves().get(0).reader().getCoreCacheKey(),
        ir2.leaves().get(0).reader().getCoreCacheKey());
    IOUtils.close(ir, ir2, iw, dir);
  }
  public void buildIndex(JSONObject indexData) {

    try {
      Directory dir = FSDirectory.open(new File(indexDir));
      IKAnalyzer analyzer = new IKAnalyzer();
      analyzer.setUseSmart(true);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(dir, iwc);
      indexWriter.deleteAll();

      JSONArray statusData = indexData.getJSONArray("statusData");
      for (int i = 0; i < statusData.length(); i++) {
        String text = statusData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }

      JSONArray userData = indexData.getJSONArray("userData");
      for (int i = 0; i < userData.length(); i++) {
        String text = userData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }
      // indexWriter.commit();
      System.out.println("Index is done");
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        indexWriter.close();
      } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
  @Test
  public void testInfiniteValues() throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(
            dir,
            newIndexWriterConfig(
                TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
    Document doc = new Document();
    doc.add(new NumericField("double").setDoubleValue(Double.NEGATIVE_INFINITY));
    doc.add(new NumericField("long").setLongValue(Long.MIN_VALUE));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new NumericField("double").setDoubleValue(Double.POSITIVE_INFINITY));
    doc.add(new NumericField("long").setLongValue(Long.MAX_VALUE));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new NumericField("double").setDoubleValue(0.0));
    doc.add(new NumericField("long").setLongValue(0L));
    writer.addDocument(doc);
    writer.close();

    IndexSearcher s = new IndexSearcher(dir);

    Query q = NumericRangeQuery.newLongRange("long", null, null, true, true);
    TopDocs topDocs = s.search(q, 10);
    assertEquals("Score doc count", 3, topDocs.scoreDocs.length);

    q = NumericRangeQuery.newLongRange("long", null, null, false, false);
    topDocs = s.search(q, 10);
    assertEquals("Score doc count", 3, topDocs.scoreDocs.length);

    q = NumericRangeQuery.newLongRange("long", Long.MIN_VALUE, Long.MAX_VALUE, true, true);
    topDocs = s.search(q, 10);
    assertEquals("Score doc count", 3, topDocs.scoreDocs.length);

    q = NumericRangeQuery.newLongRange("long", Long.MIN_VALUE, Long.MAX_VALUE, false, false);
    topDocs = s.search(q, 10);
    assertEquals("Score doc count", 1, topDocs.scoreDocs.length);

    q = NumericRangeQuery.newDoubleRange("double", null, null, true, true);
    topDocs = s.search(q, 10);
    assertEquals("Score doc count", 3, topDocs.scoreDocs.length);

    q = NumericRangeQuery.newDoubleRange("double", null, null, false, false);
    topDocs = s.search(q, 10);
    assertEquals("Score doc count", 3, topDocs.scoreDocs.length);

    s.close();
    dir.close();
  }
Exemple #7
0
  public static void main(String[] args) throws IOException {
    Version version = Version.LUCENE_43;
    // 创建一个Document
    Document document = new Document();
    Field field =
        new TextField("fieldName", "Hello man can you see this in index!", Field.Store.YES);
    field.setBoost(2.0f);
    Field fieldStore = new StringField("fieldName2", "fieldValueOnlyStore", Field.Store.YES);
    //
    // FieldType fieldAllType = new FieldType();
    // fieldAllType.setIndexed(true);
    // fieldAllType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    // fieldAllType.setOmitNorms(false);
    // fieldAllType.setStored(true);
    // fieldAllType.setStoreTermVectorOffsets(true);
    // fieldAllType.setStoreTermVectorPayloads(true);
    // fieldAllType.setStoreTermVectorPositions(true);
    // fieldAllType.setStoreTermVectors(true);
    // fieldAllType.setTokenized(true);
    // Field fieldAll = new Field("name", "all things need to store",
    // fieldAllType);

    document.add(field);
    // document.add(new BinaryDocValuesField("name", new
    // BytesRef("hello")));
    document.add(fieldStore);
    // document.add(fieldAll);

    Document doc2 = new Document();
    doc2.add(field);

    // 创建一个目录, 用于存放索引
    Directory directory = FSDirectory.open(new File("/home/waf/tmp/index"));
    // Directory directory = new RAMDirectory();
    // 定义索引写入器的一些参数
    Analyzer analyzer = new StandardAnalyzer(version);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(version, analyzer);

    // indexWriterConfig.setCodec(new Lucene40Codec());
    // 初始化索引写入器, 并把文档写入到索引中去
    IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
    indexWriter.addDocument(document);
    indexWriter.addDocument(doc2);
    indexWriter.commit();
    indexWriter.close();
    // 对索引进行查询
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    TopDocs result = indexSearcher.search(new TermQuery(new Term("name", "value")), 10);
    System.out.println(result.totalHits);
    reader.close();
  }
  @Test
  public void baseUIMAAnalyzerIntegrationTest() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
    // add the first doc
    Document doc = new Document();
    String dummyTitle = "this is a dummy title ";
    doc.add(new TextField("title", dummyTitle, Field.Store.YES));
    String dummyContent = "there is some content written here";
    doc.add(new TextField("contents", dummyContent, Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();

    // try the search over the first doc
    DirectoryReader directoryReader = DirectoryReader.open(dir);
    IndexSearcher indexSearcher = newSearcher(directoryReader);
    TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1);
    assertTrue(result.totalHits > 0);
    Document d = indexSearcher.doc(result.scoreDocs[0].doc);
    assertNotNull(d);
    assertNotNull(d.getField("title"));
    assertEquals(dummyTitle, d.getField("title").stringValue());
    assertNotNull(d.getField("contents"));
    assertEquals(dummyContent, d.getField("contents").stringValue());

    // add a second doc
    doc = new Document();
    String dogmasTitle = "dogmas";
    doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
    String dogmasContents = "white men can't jump";
    doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();

    directoryReader.close();
    directoryReader = DirectoryReader.open(dir);
    indexSearcher = newSearcher(directoryReader);
    result = indexSearcher.search(new MatchAllDocsQuery(), 2);
    Document d1 = indexSearcher.doc(result.scoreDocs[1].doc);
    assertNotNull(d1);
    assertNotNull(d1.getField("title"));
    assertEquals(dogmasTitle, d1.getField("title").stringValue());
    assertNotNull(d1.getField("contents"));
    assertEquals(dogmasContents, d1.getField("contents").stringValue());

    // do a matchalldocs query to retrieve both docs
    result = indexSearcher.search(new MatchAllDocsQuery(), 2);
    assertEquals(2, result.totalHits);
    writer.close();
    indexSearcher.getIndexReader().close();
    dir.close();
  }
 public void index(MediaFile mediaFile) {
   try {
     if (mediaFile.isFile()) {
       songWriter.addDocument(SONG.createDocument(mediaFile));
     } else if (mediaFile.isAlbum()) {
       albumWriter.addDocument(ALBUM.createDocument(mediaFile));
     } else {
       artistWriter.addDocument(ARTIST.createDocument(mediaFile));
     }
   } catch (Exception x) {
     LOG.error("Failed to create search index for " + mediaFile, x);
   }
 }
Exemple #10
0
  /**
   * 添加文档
   *
   * @param analyzer
   * @return
   * @throws CorruptIndexException
   * @throws LockObtainFailedException
   * @throws IOException
   */
  public static Directory createRAMDirectory(Analyzer analyzer)
      throws CorruptIndexException, LockObtainFailedException, IOException {
    Directory directory = new RAMDirectory();

    IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, MaxFieldLength.UNLIMITED);
    Document doc1 = new Document();
    Document doc2 = new Document();
    Document doc3 = new Document();

    // --1
    doc1.add(new Field(FIELD_ID, "1", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc1.add(new Field(FIELD_INDEX, "a", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc1.add(new NumericField(FIELD_BIRTHDAY, Store.YES, true).setLongValue(1986));
    doc1.add(new Field(FIELD_CATEGORY, "/usa/black", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc1.add(new Field(FIELD_CITY, "beijing", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc1.add(new Field(FIELD_NAME, "Lebron James", Store.YES, Index.ANALYZED_NO_NORMS));
    doc1.add(
        new Field(
            FIELD_SIGNATURE,
            "The quick brown fox jumped over the lazy dog!",
            Store.YES,
            Index.ANALYZED));
    doc1.add(new Field(FIELD_INTRODUCTION, "中欣", Store.YES, Index.ANALYZED));

    // --2
    doc2.add(new Field(FIELD_ID, "2", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc2.add(new Field(FIELD_INDEX, "b", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc2.add(new NumericField(FIELD_BIRTHDAY, Store.YES, true).setLongValue(1988));
    doc2.add(new Field(FIELD_CATEGORY, "/usa/white", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc2.add(new Field(FIELD_CITY, "beijing", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc2.add(new Field(FIELD_NAME, "&KOBE**BYANT\n", Store.YES, Index.ANALYZED_NO_NORMS));
    doc2.add(new Field(FIELD_SIGNATURE, "I am the hero!", Store.YES, Index.ANALYZED));
    doc2.add(new Field(FIELD_INTRODUCTION, "中的欣", Store.YES, Index.ANALYZED));

    // --3
    doc3.add(new Field(FIELD_ID, "3", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc3.add(new Field(FIELD_INDEX, "c", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc3.add(new NumericField(FIELD_BIRTHDAY, Store.YES, true).setLongValue(1990));
    doc3.add(new Field(FIELD_CATEGORY, "/china", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
    doc3.add(new Field(FIELD_CITY, "jilin", Store.YES, Index.NOT_ANALYZED_NO_NORMS));
    doc3.add(new Field(FIELD_NAME, "石欣愛北的的京城", Store.YES, Index.ANALYZED_NO_NORMS));
    doc3.add(new Field(FIELD_SIGNATURE, "关山口职业技术学院", Store.YES, Index.ANALYZED));
    doc3.add(new Field(FIELD_INTRODUCTION, "我爱中的天欣气", Store.YES, Index.ANALYZED));

    indexWriter.addDocument(doc1);
    indexWriter.addDocument(doc2);
    indexWriter.addDocument(doc3);
    indexWriter.close();

    return directory;
  }
  @Test
  public void testHugeLabel() throws Exception {
    Directory indexDir = newDirectory(), taxoDir = newDirectory();
    IndexWriter indexWriter =
        new IndexWriter(
            indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    DirectoryTaxonomyWriter taxoWriter =
        new DirectoryTaxonomyWriter(
            taxoDir, OpenMode.CREATE, new Cl2oTaxonomyWriterCache(2, 1f, 1));
    FacetFields facetFields = new FacetFields(taxoWriter);

    // Add one huge label:
    String bigs = null;
    int ordinal = -1;
    CategoryPath cp = null;
    while (true) {
      int len = CategoryPath.MAX_CATEGORY_PATH_LENGTH - 4; // for the dimension and separator
      bigs = _TestUtil.randomSimpleString(random(), len, len);
      cp = new CategoryPath("dim", bigs);
      ordinal = taxoWriter.addCategory(cp);
      Document doc = new Document();
      facetFields.addFields(doc, Collections.singletonList(cp));
      indexWriter.addDocument(doc);
      break;
    }

    // Add tiny ones to cause a re-hash
    for (int i = 0; i < 3; i++) {
      String s = _TestUtil.randomSimpleString(random(), 1, 10);
      taxoWriter.addCategory(new CategoryPath("dim", s));
      Document doc = new Document();
      facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dim", s)));
      indexWriter.addDocument(doc);
    }

    // when too large components were allowed to be added, this resulted in a new added category
    assertEquals(ordinal, taxoWriter.addCategory(cp));

    IOUtils.close(indexWriter, taxoWriter);

    DirectoryReader indexReader = DirectoryReader.open(indexDir);
    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT);
    ddq.add(cp);
    assertEquals(1, searcher.search(ddq, 10).totalHits);

    IOUtils.close(indexReader, taxoReader);

    IOUtils.close(indexDir, taxoDir);
  }
  @Override
  protected void setUp() throws Exception {
    super.setUp();

    RAMDirectory dirA = new RAMDirectory();
    RAMDirectory dirB = new RAMDirectory();

    IndexWriter wA =
        new IndexWriter(dirA, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    IndexWriter wB =
        new IndexWriter(dirB, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

    long theLong = Long.MAX_VALUE;
    double theDouble = Double.MAX_VALUE;
    byte theByte = Byte.MAX_VALUE;
    short theShort = Short.MAX_VALUE;
    int theInt = Integer.MAX_VALUE;
    float theFloat = Float.MAX_VALUE;
    for (int i = 0; i < NUM_DOCS; i++) {
      Document doc = new Document();
      doc.add(
          new Field(
              "theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED));
      doc.add(
          new Field(
              "theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED));
      doc.add(
          new Field(
              "theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED));
      doc.add(
          new Field(
              "theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED));
      doc.add(
          new Field("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED));
      doc.add(
          new Field(
              "theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED));
      if (0 == i % 3) {
        wA.addDocument(doc);
      } else {
        wB.addDocument(doc);
      }
    }
    wA.close();
    wB.close();
    readerA = IndexReader.open(dirA, true);
    readerB = IndexReader.open(dirB, true);
    readerX = new MultiReader(new IndexReader[] {readerA, readerB});
  }
  @BeforeTest
  public void setUp() throws IOException {
    directory = new RAMDirectory();
    IndexWriter writer =
        new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new KeywordAnalyzer()));

    writer.addDocument(newDocument("1", newPoint(-30, -30)));
    writer.addDocument(newDocument("2", newPoint(-45, -45)));
    writer.addDocument(newDocument("3", newPoint(-45, 50)));
    writer.addDocument(
        newDocument("4", newRectangle().topLeft(-50, 50).bottomRight(-38, 38).build()));

    indexReader = IndexReader.open(writer, true);
    indexSearcher = new IndexSearcher(indexReader);
  }
  private void addDocument(final IndexWriter indexWriter, final GeoEntry geoEntry)
      throws IOException {
    final Document document = new Document();
    document.add(
        new TextField(GeoNamesLuceneConstants.NAME_FIELD, geoEntry.getName(), Field.Store.YES));
    document.add(
        new DoubleField(
            GeoNamesLuceneConstants.LATITUDE_FIELD, geoEntry.getLatitude(), Field.Store.YES));
    document.add(
        new DoubleField(
            GeoNamesLuceneConstants.LONGITUDE_FIELD, geoEntry.getLongitude(), Field.Store.YES));
    document.add(
        new StoredField(GeoNamesLuceneConstants.FEATURE_CODE_FIELD, geoEntry.getFeatureCode()));
    document.add(
        new StoredField(GeoNamesLuceneConstants.POPULATION_FIELD, geoEntry.getPopulation()));
    document.add(
        new TextField(
            GeoNamesLuceneConstants.ALTERNATE_NAMES_FIELD,
            geoEntry.getAlternateNames(),
            Field.Store.NO));

    final float boost = calculateBoost(geoEntry);
    document.add(new FloatDocValuesField(GeoNamesLuceneConstants.BOOST_FIELD, boost));

    indexWriter.addDocument(document);
  }
  public void testFarsiRangeFilterCollating(
      Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd)
      throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    Query query = new TermQuery(new Term("body", "body"));

    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
    // orders the U+0698 character before the U+0633 character, so the single
    // index Term below should NOT be returned by a TermRangeFilter with a Farsi
    // Collator (or an Arabic one for the case when Farsi searcher not
    // supported).
    ScoreDoc[] result =
        searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1)
            .scoreDocs;
    assertEquals("The index Term should not be included.", 0, result.length);

    result =
        searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1)
            .scoreDocs;
    assertEquals("The index Term should be included.", 1, result.length);

    searcher.close();
    reader.close();
    dir.close();
  }
  @Override
  public void index(List<AgeObject> aol) {
    try {
      IndexWriter iWriter =
          new IndexWriter(
              index, analyzer, objectList == null, IndexWriter.MaxFieldLength.UNLIMITED);

      if (objectList == null) objectList = aol;
      else objectList.addAll(aol);

      for (AgeObject ao : aol) {
        Document doc = new Document();

        for (TextFieldExtractor tfe : extractors)
          doc.add(
              new Field(
                  tfe.getName(),
                  tfe.getExtractor().getValue(ao),
                  Field.Store.NO,
                  Field.Index.ANALYZED));

        iWriter.addDocument(doc);
      }

      iWriter.close();

      defaultFieldName = extractors.iterator().next().getName();
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  // TODO: randomize
  public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
    Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
    PayloadAnalyzer analyzer = new PayloadAnalyzer();
    IndexWriter writer =
        new IndexWriter(
            directory,
            new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
    // writer.infoStream = System.out;
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(
          new Field(
              MULTI_FIELD,
              English.intToEnglish(i) + "  " + English.intToEnglish(i),
              Field.Store.YES,
              Field.Index.ANALYZED));
      doc.add(
          new Field(
              NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    reader = IndexReader.open(writer, true);
    writer.close();

    IndexSearcher searcher = LuceneTestCase.newSearcher(reader);
    searcher.setSimilarity(similarity);
    return searcher;
  }
  @Before
  public void setUp() throws Exception {
    serializer = new LuceneSerializer(true, true);
    entityPath = new PathBuilder<Object>(Object.class, "obj");
    title = entityPath.getString("title");
    author = entityPath.getString("author");
    text = entityPath.getString("text");
    publisher = entityPath.getString("publisher");
    year = entityPath.getNumber("year", Integer.class);
    rating = entityPath.getString("rating");
    gross = entityPath.getNumber("gross", Double.class);
    titles = entityPath.getCollection("title", String.class, StringPath.class);

    longField = entityPath.getNumber("longField", Long.class);
    shortField = entityPath.getNumber("shortField", Short.class);
    byteField = entityPath.getNumber("byteField", Byte.class);
    floatField = entityPath.getNumber("floatField", Float.class);

    idx = new RAMDirectory();
    config =
        new IndexWriterConfig(new StandardAnalyzer())
            .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    writer = new IndexWriter(idx, config);

    writer.addDocument(createDocument());

    writer.close();

    IndexReader reader = DirectoryReader.open(idx);
    searcher = new IndexSearcher(reader);
  }
Exemple #19
0
  private static void index_h(String prefix, File file, IndexWriter indexWriter)
      throws IOException {
    Document doc = null;

    if (file.isDirectory()) {
      File files[] = file.listFiles();
      for (File file1 : files) {
        index_h(prefix + FILE_SEPARATOR + file.getName(), file1, indexWriter);
      }
    } else {
      String content = FileUtils.readFileToString(file, "utf-8");

      System.out.println("==============================================================");
      System.out.println("index_h " + content);
      System.out.println("==============================================================");

      String filename = prefix + FILE_SEPARATOR + file.getName();
      String path = file.getAbsolutePath();

      doc = new Document();
      doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("relative_path", filename, Field.Store.YES, Field.Index.NOT_ANALYZED));
      indexWriter.addDocument(doc);
    }
  }
Exemple #20
0
  public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir)
      throws Exception {
    int[] freq = new int[nTerms];
    terms = new Term[nTerms];
    for (int i = 0; i < nTerms; i++) {
      int f = (nTerms + 1) - i; // make first terms less frequent
      freq[i] = (int) Math.ceil(Math.pow(f, power));
      terms[i] = new Term("f", Character.toString((char) ('A' + i)));
    }

    IndexWriter iw =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    for (int i = 0; i < nDocs; i++) {
      Document d = new Document();
      for (int j = 0; j < nTerms; j++) {
        if (r.nextInt(freq[j]) == 0) {
          d.add(new Field("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED));
          // System.out.println(d);
        }
      }
      iw.addDocument(d);
    }
    iw.optimize();
    iw.close();
  }
  public void createIndex() {

    loadTweets("datasets/sentiment-short.csv", 100);

    directory = new RAMDirectory();

    try {
      IndexWriter writer = getWriter();
      for (int i = 0; i < tweets.size(); i++) {
        Document doc = new Document();
        doc.add(
            new Field(
                "tweet",
                tweets.get(i).getText(),
                Field.Store.YES,
                Field.Index.ANALYZED,
                TermVector.YES));
        writer.addDocument(doc);
      }

      System.out.println("Docs: " + writer.numDocs());
      writer.close();

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
 private static void index(Index index, IndexWriter indexWriter) throws IOException {
   if (index.docs().size() > 1) {
     indexWriter.addDocuments(index.docs());
   } else {
     indexWriter.addDocument(index.docs().get(0));
   }
 }
Exemple #23
0
  public void addDocument(List<Document> documentList, String collectionName) {

    IndexWriter indexWriter = indexWriterManager.getIndexWriter(collectionName);

    try {

      logger.info("collectionName : {}", collectionName);
      logger.info("add indexing start................");

      int indexingDocumentCount = 0;
      for (Document doc : documentList) {
        indexingDocumentCount++;
        if ((indexingDocumentCount % 50000) == 0) {
          logger.info("{} indexed...", indexingDocumentCount);
        }

        indexWriter.addDocument(doc);
      }

      logger.info("total indexed document count {}", indexingDocumentCount);

      logger.info("end");

    } catch (IOException e) {

      logger.error("error : ", e);
      throw new RuntimeException("색인 중 에러가 발생하였습니다. [" + e.getMessage() + "]");
    }
  }
Exemple #24
0
 private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex)
     throws IOException {
   ArrayList<String> images = FileUtils.getAllImages(new File(dir), true);
   IndexWriter iw =
       LuceneUtils.createIndexWriter(
           index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
   // select one feature for the large index:
   int count = 0;
   long ms = System.currentTimeMillis();
   DocumentBuilder builder = new ChainedDocumentBuilder();
   ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]);
   //        ((ChainedDocumentBuilder) builder).addBuilder(builders[0]);
   for (Iterator<String> iterator = images.iterator(); iterator.hasNext(); ) {
     count++;
     if (count > 100 && count % 5000 == 0) {
       System.out.println(
           count
               + " files indexed. "
               + (System.currentTimeMillis() - ms) / (count)
               + " ms per file");
     }
     String file = iterator.next();
     try {
       iw.addDocument(builder.createDocument(new FileInputStream(file), file));
     } catch (Exception e) {
       System.err.println("Error: " + e.getMessage());
     }
   }
   iw.close();
 }
  @Test
  public void testDirectoryCleaned() throws Exception {
    final RAMDirectory directory = new RAMDirectory();
    final StandardAnalyzer analyzer = new StandardAnalyzer(DefaultIndexManager.LUCENE_VERSION);
    {
      IndexWriterConfig conf = new IndexWriterConfig(DefaultIndexManager.LUCENE_VERSION, analyzer);
      final IndexWriter writer = new IndexWriter(directory, conf);
      writer.addDocument(new Document());
      writer.close();
    }
    final DefaultConfiguration configuration = new DefaultConfiguration(directory, analyzer);
    final DefaultIndexEngine engine =
        new DefaultIndexEngine(
            new Supplier<IndexSearcher>() {
              public IndexSearcher get() {
                throw new AssertionFailedError("no searcher required");
              }
            },
            new Function<Index.UpdateMode, Writer>() {
              public Writer get(final Index.UpdateMode mode) {
                throw new AssertionFailedError("no writer required");
              }
            },
            configuration,
            FlushPolicy.NONE);

    assertEquals(1, new IndexSearcher(directory).getIndexReader().numDocs());
    engine.clean();
    assertEquals(0, new IndexSearcher(directory).getIndexReader().numDocs());
  }
  @Test
  public void cretaeIndex() throws Exception {

    IndexWriter indexWriter = LuceneUtil.getIndexWriter();
    indexWriter.addDocument(getDocument());
    indexWriter.commit();
  }
Exemple #27
0
  private static void search(IndexSearcher searcher, Query query, IndexWriter out, String field)
      throws IOException {

    /* Carlos's hack */
    int hitsPerPage = 1;
    System.out.println("Consulta: " + query);
    TopDocs results = searcher.search(query, hitsPerPage);

    int numTotalHits = results.totalHits;
    if (numTotalHits > 0) results = searcher.search(query, numTotalHits);
    ScoreDoc[] hits = results.scoreDocs;
    /* End hack */

    for (int i = 0; i < numTotalHits; i++) {
      Document doc = searcher.doc(hits[i].doc);
      // System.out.println("Title: " + doc.get("title"));

      if (field != null) {
        System.out.println(hits[i].doc + "\t" + hits[i].score + "\t" + doc.get(field));
      }

      if (out != null) {
        out.addDocument(doc);
      }
    }
    System.out.println("Resultados: " + numTotalHits);
  }
  public void testDemo() throws IOException, ParseException {

    Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);

    // Store the index in memory:
    Directory directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    // Directory directory = FSDirectory.open("/tmp/testindex");
    IndexWriter iwriter =
        new IndexWriter(directory, analyzer, true, new IndexWriter.MaxFieldLength(25000));
    Document doc = new Document();
    String text = "This is the text to be indexed.";
    doc.add(new Field("fieldname", text, Field.Store.YES, Field.Index.ANALYZED));
    iwriter.addDocument(doc);
    iwriter.close();

    // Now search the index:
    IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
    // Parse a simple query that searches for "text":
    QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fieldname", analyzer);
    Query query = parser.parse("text");
    ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    // Iterate through the results:
    for (int i = 0; i < hits.length; i++) {
      Document hitDoc = isearcher.doc(hits[i].doc);
      assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    }
    isearcher.close();
    directory.close();
  }
  @Test
  public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter =
        new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);

    indexWriter.addDocument(
        doc()
            .add(field("_id", "1"))
            .add(
                field(
                    "content",
                    "the big bad dog",
                    Field.Store.YES,
                    Field.Index.ANALYZED,
                    Field.TermVector.WITH_POSITIONS_OFFSETS))
            .build());

    IndexReader reader = indexWriter.getReader();
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    String fragment =
        highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, notNullValue());
    System.out.println(fragment);
  }
  public void testListenerCalled() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null));
    final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false);
    SearcherManager sm = new SearcherManager(iw, true, new SearcherFactory());
    sm.addListener(
        new ReferenceManager.RefreshListener() {
          @Override
          public void beforeRefresh() {}

          @Override
          public void afterRefresh(boolean didRefresh) {
            if (didRefresh) {
              afterRefreshCalled.set(true);
            }
          }
        });
    iw.addDocument(new Document());
    iw.commit();
    assertFalse(afterRefreshCalled.get());
    sm.maybeRefreshBlocking();
    assertTrue(afterRefreshCalled.get());
    sm.close();
    iw.close();
    dir.close();
  }