@Override
 protected Document convertDocument(DemoData data, String param) {
   Document doc = new Document();
   if (true) {
     Field fd = new Field(TYPE_FIELD_NAME, TYPE_VALUE, Field.Store.YES, Field.Index.NOT_ANALYZED);
     doc.add(fd);
   }
   if (true) {
     Field fd =
         new Field(
             "termId", TYPE_VALUE + "_" + data.getId(), Field.Store.NO, Field.Index.NOT_ANALYZED);
     doc.add(fd);
   }
   if (true) {
     Field fd = new Field("id", Integer.toString(data.getId()), Field.Store.YES, Field.Index.NO);
     doc.add(fd);
   }
   if (true) {
     Field fd =
         new Field(
             "name",
             data.getName(),
             Field.Store.YES,
             Field.Index.ANALYZED,
             Field.TermVector.WITH_POSITIONS_OFFSETS);
     doc.add(fd);
   }
   if (true) {
     Field fd =
         new Field("refId", Integer.toString(data.getRefId()), Field.Store.YES, Field.Index.NO);
     doc.add(fd);
   }
   return doc;
 }
  // TODO: randomize
  public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
    Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
    PayloadAnalyzer analyzer = new PayloadAnalyzer();
    IndexWriter writer =
        new IndexWriter(
            directory,
            new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
    // writer.infoStream = System.out;
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(
          new Field(
              MULTI_FIELD,
              English.intToEnglish(i) + "  " + English.intToEnglish(i),
              Field.Store.YES,
              Field.Index.ANALYZED));
      doc.add(
          new Field(
              NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    reader = IndexReader.open(writer, true);
    writer.close();

    IndexSearcher searcher = LuceneTestCase.newSearcher(reader);
    searcher.setSimilarity(similarity);
    return searcher;
  }
  public void testMoreThan32ProhibitedClauses() throws Exception {
    final Directory d = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    doc.add(
        new TextField(
            "field",
            "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33",
            Field.Store.NO));
    w.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("field", "33", Field.Store.NO));
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    final IndexSearcher s = newSearcher(r);

    final BooleanQuery q = new BooleanQuery();
    for (int term = 0; term < 33; term++) {
      q.add(
          new BooleanClause(
              new TermQuery(new Term("field", "" + term)), BooleanClause.Occur.MUST_NOT));
    }
    q.add(new BooleanClause(new TermQuery(new Term("field", "33")), BooleanClause.Occur.SHOULD));

    final int[] count = new int[1];
    s.search(
        q,
        new Collector() {
          private Scorer scorer;

          @Override
          public void setScorer(Scorer scorer) {
            // Make sure we got BooleanScorer:
            this.scorer = scorer;
            assertEquals(
                "Scorer is implemented by wrong class",
                BooleanScorer.class.getName() + "$BucketScorer",
                scorer.getClass().getName());
          }

          @Override
          public void collect(int doc) {
            count[0]++;
          }

          @Override
          public void setNextReader(AtomicReaderContext context) {}

          @Override
          public boolean acceptsDocsOutOfOrder() {
            return true;
          }
        });

    assertEquals(1, count[0]);

    r.close();
    d.close();
  }
Esempio n. 4
0
 public void update() {
   IndexWriter writer = null;
   try {
     writer =
         new IndexWriter(
             directory,
             new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
     /*
      * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集
      * 先删除之后再添加
      */
     Document doc = new Document();
     doc.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
     doc.add(new Field("email", emails[0], Field.Store.YES, Field.Index.NOT_ANALYZED));
     doc.add(new Field("content", contents[0], Field.Store.NO, Field.Index.ANALYZED));
     doc.add(new Field("name", names[0], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
     writer.updateDocument(new Term("id", "1"), doc);
   } catch (CorruptIndexException e) {
     e.printStackTrace();
   } catch (LockObtainFailedException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   } finally {
     try {
       if (writer != null) writer.close();
     } catch (CorruptIndexException e) {
       e.printStackTrace();
     } catch (IOException e) {
       e.printStackTrace();
     }
   }
 }
  @Test
  public void testFuzzyQuery() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw =
        new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    Document document = new Document();

    document.add(new SuggestField("suggest_field", "suggestion", 2));
    document.add(new SuggestField("suggest_field", "suaggestion", 4));
    document.add(new SuggestField("suggest_field", "ssuggestion", 1));
    iw.addDocument(document);
    document = new Document();
    document.add(new SuggestField("suggest_field", "sugfoo", 1));
    iw.addDocument(document);

    if (rarely()) {
      iw.commit();
    }

    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
    assertSuggestions(
        suggest,
        new Entry("suaggestion", 4 * 2),
        new Entry("suggestion", 2 * 3),
        new Entry("sugfoo", 1 * 3),
        new Entry("ssuggestion", 1 * 1));

    reader.close();
    iw.close();
  }
  public void testDocsWithField() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new TextField("dv", "some text", Field.Store.NO));
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);

    DirectoryReader r = writer.getReader();
    writer.close();

    AtomicReader subR = r.leaves().get(0).reader();
    assertEquals(2, subR.numDocs());

    Bits bits = FieldCache.DEFAULT.getDocsWithField(subR, "dv");
    assertTrue(bits.get(0));
    assertTrue(bits.get(1));
    r.close();
    dir.close();
  }
Esempio n. 7
0
 public Document getDocument() {
   Document doc = new Document();
   doc.add(new StringField("id", "3", Store.YES));
   doc.add(new TextField("title", "lucene", Store.YES));
   doc.add(new TextField("content", "lucene的秘诀是创建索引和查询索引", Store.YES));
   return doc;
 }
  /**
   * @param worker
   * @param connection
   * @throws Exception
   */
  private void createIndex(SearchIndexBuilderWorker worker, Connection connection)
      throws Exception {
    IndexWriter indexWrite = null;
    try {
      if (worker.isRunning()) {
        indexWrite = indexStorage.getIndexWriter(false);
      }
      if (indexWrite != null) {
        Document doc = new Document();
        // The date of indexing
        String timeStamp = String.valueOf(System.currentTimeMillis());
        doc.add(
            new Field(
                SearchService.DATE_STAMP, timeStamp, Field.Store.NO, Field.Index.NOT_ANALYZED));
        doc.add(
            new Field(
                SearchService.DATE_STAMP,
                CompressionTools.compressString(timeStamp),
                Field.Store.YES));

        String ref = "---INDEX-CREATED---";
        doc.add(
            new Field(
                SearchService.FIELD_REFERENCE,
                CompressionTools.compressString(ref),
                Field.Store.YES));
        doc.add(
            new Field(
                SearchService.FIELD_REFERENCE, ref, Field.Store.NO, Field.Index.NOT_ANALYZED));

        indexWrite.addDocument(doc);
      } else {
        log.error("Couldn't get indexWriter to add document!");
      }

    } catch (Exception ex) {
      log.error("Failed to Add Documents ", ex);
      throw new Exception(ex);
    } finally {
      if (indexWrite != null) {
        if (log.isDebugEnabled()) {
          log.debug("Closing Index Writer With " + indexWrite.maxDoc() + " documents");
          Directory d = indexWrite.getDirectory();
          String[] s = d.listAll();
          log.debug("Directory Contains ");
          for (int i = 0; i < s.length; i++) {
            File f = new File(s[i]);
            log.debug(
                "\t"
                    + String.valueOf(f.length())
                    + "\t"
                    + new Date(f.lastModified())
                    + "\t"
                    + s[i]);
          }
        }
        indexStorage.closeIndexWriter(indexWrite);
      }
    }
  }
Esempio n. 9
0
  /** @see org.wyona.yarep.impl.repo.vfs.DateIndexerSearcher#addRevision(String) */
  public void addRevision(String revisionName) throws Exception {
    Date creationDate =
        node.getRevision(revisionName)
            .getCreationDate(); // WARN: Older creation dates might not have milliseconds and hence
                                // are not corresponding exactly with the revision name, hence in
                                // order to build the date index correctly one needs to use the
                                // creation date
    log.debug(
        "Add revision '"
            + revisionName
            + "' with creation date '"
            + creationDate
            + "' to date index ...");

    Document doc = new Document();
    doc.add(
        new NumericField(CREATION_DATE_FIELD_NAME, Field.Store.YES, true)
            .setLongValue(creationDate.getTime()));
    // doc.add(new Field(CREATION_DATE_FIELD_NAME,
    // org.apache.lucene.document.DateTools.dateToString(creationDate,
    // org.apache.lucene.document.DateTools.Resolution.MILLISECOND), Field.Store.YES,
    // Field.Index.NOT_ANALYZED));
    doc.add(
        new Field(
            REVISION_NAME_FIELD_NAME, revisionName, Field.Store.YES, Field.Index.NOT_ANALYZED));

    IndexWriter iw = getIndexWriter();
    Term revisionNameTerm = new Term(REVISION_NAME_FIELD_NAME, revisionName);
    iw.updateDocument(revisionNameTerm, doc);
    iw.optimize();
    iw.close();
  }
Esempio n. 10
0
  /**
   * @param title
   * @param key
   * @param content
   * @param custom1
   * @param custom2
   * @param custom3
   * @param custom4
   * @return Document
   */
  public static Document getDocument(
      String title,
      String key,
      String content,
      String urlpath,
      String custom1,
      String custom2,
      String custom3,
      String custom4) {

    // make a new, empty document
    Document doc = new Document();
    doc.add(FieldUtil.UnIndexed("size", Caster.toString(content.length())));

    doc.add(FieldUtil.Text("key", key));
    FieldUtil.setMimeType(doc, "text/plain");
    FieldUtil.setRaw(doc, content);
    FieldUtil.setContent(doc, content);
    FieldUtil.setSummary(doc, StringUtil.max(content, SUMMERY_SIZE), false);

    FieldUtil.setTitle(doc, title);
    FieldUtil.setURL(doc, urlpath);
    FieldUtil.setCustom(doc, custom1, 1);
    FieldUtil.setCustom(doc, custom2, 2);
    FieldUtil.setCustom(doc, custom3, 3);
    FieldUtil.setCustom(doc, custom4, 4);
    return doc;
  }
  @Test
  public void testVectorHighlighter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter =
        new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER));

    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    document.add(
        new Field(
            "content",
            "the big bad dog",
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.WITH_POSITIONS_OFFSETS));
    indexWriter.addDocument(document);

    IndexReader reader = IndexReader.open(indexWriter, true);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);

    assertThat(topDocs.totalHits, equalTo(1));

    XFastVectorHighlighter highlighter = new XFastVectorHighlighter();
    String fragment =
        highlighter.getBestFragment(
            highlighter.getFieldQuery(new TermQuery(new Term("content", "bad"))),
            reader,
            topDocs.scoreDocs[0].doc,
            "content",
            30);
    assertThat(fragment, notNullValue());
    assertThat(fragment, equalTo("the big <b>bad</b> dog"));
  }
  public void testSetAllGroups() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w =
        new RandomIndexWriter(
            random(),
            dir,
            newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
    Document doc = new Document();
    doc.add(newField("group", "foo", StringField.TYPE_NOT_STORED));
    doc.add(new SortedDocValuesField("group", new BytesRef("foo")));
    w.addDocument(doc);

    IndexSearcher indexSearcher = newSearcher(w.getReader());
    w.close();

    GroupingSearch gs = new GroupingSearch("group");
    gs.setAllGroups(true);
    TopGroups<?> groups = gs.search(indexSearcher, new TermQuery(new Term("group", "foo")), 0, 10);
    assertEquals(1, groups.totalHitCount);
    // assertEquals(1, groups.totalGroupCount.intValue());
    assertEquals(1, groups.totalGroupedHitCount);
    assertEquals(1, gs.getAllMatchingGroups().size());
    indexSearcher.getIndexReader().close();
    dir.close();
  }
  // LUCENE-1727: make sure doc fields are stored in order
  public void testStoredFieldsOrder() throws Throwable {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();

    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("zzz", "a b c", customType));
    doc.add(newField("aaa", "a b c", customType));
    doc.add(newField("zzz", "1 2 3", customType));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    Document doc2 = r.document(0);
    Iterator<IndexableField> it = doc2.getFields().iterator();
    assertTrue(it.hasNext());
    Field f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "aaa");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "1 2 3");
    assertFalse(it.hasNext());
    r.close();
    w.close();
    d.close();
  }
Esempio n. 14
0
      @Override
      public Document createDocument(MediaFile mediaFile) {
        Document doc = new Document();
        doc.add(new NumericField(FIELD_ID, Field.Store.YES, false).setIntValue(mediaFile.getId()));

        if (mediaFile.getArtist() != null) {
          doc.add(
              new Field(
                  FIELD_ARTIST, mediaFile.getArtist(), Field.Store.YES, Field.Index.ANALYZED));
        }
        if (mediaFile.getAlbumName() != null) {
          doc.add(
              new Field(
                  FIELD_ALBUM, mediaFile.getAlbumName(), Field.Store.YES, Field.Index.ANALYZED));
        }
        if (mediaFile.getFolder() != null) {
          doc.add(
              new Field(
                  FIELD_FOLDER,
                  mediaFile.getFolder(),
                  Field.Store.NO,
                  Field.Index.NOT_ANALYZED_NO_NORMS));
        }

        return doc;
      }
Esempio n. 15
0
 public void testDocValuesUnstored() throws IOException {
   Directory dir = newDirectory();
   IndexWriterConfig iwconfig =
       newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
   iwconfig.setMergePolicy(newLogMergePolicy());
   IndexWriter writer = new IndexWriter(dir, iwconfig);
   for (int i = 0; i < 50; i++) {
     Document doc = new Document();
     doc.add(new NumericDocValuesField("dv", i));
     doc.add(new TextField("docId", "" + i, Field.Store.YES));
     writer.addDocument(doc);
   }
   DirectoryReader r = writer.getReader();
   SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r);
   FieldInfos fi = slow.getFieldInfos();
   FieldInfo dvInfo = fi.fieldInfo("dv");
   assertTrue(dvInfo.hasDocValues());
   NumericDocValues dv = slow.getNumericDocValues("dv");
   for (int i = 0; i < 50; i++) {
     assertEquals(i, dv.get(i));
     StoredDocument d = slow.document(i);
     // cannot use d.get("dv") due to another bug!
     assertNull(d.getField("dv"));
     assertEquals(Integer.toString(i), d.get("docId"));
   }
   slow.close();
   writer.close();
   dir.close();
 }
  @Test
  public void testContextQueryOnSuggestField() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw =
        new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    Document document = new Document();

    document.add(new SuggestField("suggest_field", "abc", 3));
    document.add(new SuggestField("suggest_field", "abd", 4));
    document.add(new SuggestField("suggest_field", "The Foo Fighters", 2));
    iw.addDocument(document);

    document = new Document();
    document.add(new SuggestField("suggest_field", "abcdd", 5));
    iw.addDocument(document);

    if (rarely()) {
      iw.commit();
    }

    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query =
        new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")));
    try {
      suggestIndexSearcher.suggest(query, 4);
    } catch (IllegalStateException expected) {
      assertTrue(expected.getMessage().contains("SuggestField"));
    }
    reader.close();
    iw.close();
  }
Esempio n. 17
0
  public void testTypeChangeViaAddIndexesIR2() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter writer = new IndexWriter(dir, conf);
    Document doc = new Document();
    doc.add(new NumericDocValuesField("dv", 0L));
    writer.addDocument(doc);
    writer.close();

    Directory dir2 = newDirectory();
    writer = new IndexWriter(dir2, conf);
    IndexReader[] readers = new IndexReader[] {DirectoryReader.open(dir)};
    writer.addIndexes(readers);
    readers[0].close();
    doc = new Document();
    doc.add(new SortedDocValuesField("dv", new BytesRef("foo")));
    try {
      writer.addDocument(doc);
      fail("did not hit exception");
    } catch (IllegalArgumentException iae) {
      // expected
    }
    writer.close();
    dir2.close();
    dir.close();
  }
  @Test
  public void testAllContextQuery() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw =
        new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    Document document = new Document();

    document.add(new ContextSuggestField("suggest_field", "suggestion1", 4, "type1"));
    document.add(new ContextSuggestField("suggest_field", "suggestion2", 3, "type2"));
    document.add(new ContextSuggestField("suggest_field", "suggestion3", 2, "type3"));
    iw.addDocument(document);
    document = new Document();
    document.add(new ContextSuggestField("suggest_field", "suggestion4", 1, "type4"));
    iw.addDocument(document);

    if (rarely()) {
      iw.commit();
    }

    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query =
        new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
    assertSuggestions(
        suggest,
        new Entry("suggestion1", "type1", 4),
        new Entry("suggestion2", "type2", 3),
        new Entry("suggestion3", "type3", 2),
        new Entry("suggestion4", "type4", 1));

    reader.close();
    iw.close();
  }
Esempio n. 19
0
  public void testMultiValuedDocValuesField() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    Field f = new NumericDocValuesField("field", 17);
    // Index doc values are single-valued so we should not
    // be able to add same field more than once:
    doc.add(f);
    doc.add(f);
    try {
      w.addDocument(doc);
      fail("didn't hit expected exception");
    } catch (IllegalArgumentException iae) {
      // expected
    }

    doc = new Document();
    doc.add(f);
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    w.close();
    assertEquals(17, FieldCache.DEFAULT.getInts(getOnlySegmentReader(r), "field", false).get(0));
    r.close();
    d.close();
  }
 private ParsedDocument testParsedDocument(
     String uid,
     String id,
     String type,
     String routing,
     long timestamp,
     long ttl,
     Document document,
     Analyzer analyzer,
     BytesReference source,
     boolean mappingsModified) {
   Field uidField = new Field("_uid", uid, UidFieldMapper.Defaults.FIELD_TYPE);
   Field versionField = new NumericDocValuesField("_version", 0);
   document.add(uidField);
   document.add(versionField);
   return new ParsedDocument(
       uidField,
       versionField,
       id,
       type,
       routing,
       timestamp,
       ttl,
       Arrays.asList(document),
       analyzer,
       source,
       mappingsModified);
 }
Esempio n. 21
0
 private Document getDocument(JournalEntry entry) {
   Document doc = new Document();
   doc.add(
       new Field(
           EntryIndexFields.ID.name(),
           entry.getUid().toString(),
           Field.Store.YES,
           Field.Index.NOT_ANALYZED));
   doc.add(
       new Field(
           EntryIndexFields.NAME.name(),
           entry.getName(),
           Field.Store.YES,
           Field.Index.NOT_ANALYZED));
   doc.add(
       new Field(
           EntryIndexFields.FULLTEXT.name(),
           entry.getText(),
           Field.Store.YES,
           Field.Index.ANALYZED));
   doc.add(
       new Field(
           EntryIndexFields.DATE.name(),
           Long.toString(entry.getDateTime().getMillis()),
           Field.Store.YES,
           Field.Index.NOT_ANALYZED));
   doc.add(
       new Field(
           EntryIndexFields.TYPE.name(),
           entry.getItemType().name(),
           Field.Store.YES,
           Field.Index.NOT_ANALYZED));
   return doc;
 }
Esempio n. 22
0
  @Override
  public void addFields(Document bannerDoc, AdDefinition bannerDefinition) {

    KeywordConditionDefinition kdef = null;
    if (bannerDefinition.hasConditionDefinition(ConditionDefinitions.KEYWORD)) {
      kdef =
          (KeywordConditionDefinition)
              bannerDefinition.getConditionDefinition(ConditionDefinitions.KEYWORD);
    }

    if (kdef != null && kdef.getKeywords().size() > 0) {
      // keywords im Dokument speichern
      List<Keyword> kws = kdef.getKeywords();
      for (Keyword k : kws) {
        bannerDoc.add(new StringField(AdDBConstants.ADDB_AD_KEYWORD, k.word, Field.Store.NO));
      }
    } else {
      /*
       * für alle Banner ohne angegebenem Keyword wird das default ALL-Keyword gesetzt
       */
      bannerDoc.add(
          new StringField(
              AdDBConstants.ADDB_AD_KEYWORD, AdDBConstants.ADDB_AD_KEYWORD_ALL, Field.Store.NO));
    }
  }
Esempio n. 23
0
 private boolean indexField(Document document, Object state, String fieldName, Type type) {
   if (Arrays.binarySearch(FIELDS, fieldName) >= 0) {
     // index all fields we know about
     document.add(new Field(fieldName, (String) state, Field.Store.YES, Field.Index.TOKENIZED));
     document.add(
         new Field(Indexer.DEFAULT_FIELD, (String) state, Field.Store.NO, Field.Index.TOKENIZED));
     return true;
   } else if (type instanceof StringType) {
     // index all strings with the exception of the fields explicitly listed as sensitive
     if (Arrays.binarySearch(SENSITIVE_FIELDS, fieldName) < 0) {
       document.add(
           new Field(
               Indexer.DEFAULT_FIELD, (String) state, Field.Store.NO, Field.Index.TOKENIZED));
     }
     return true;
   } else if (fieldName.equals("aliases")) {
     Set aliases = (Set) state;
     for (Iterator a = aliases.iterator(); a.hasNext(); ) {
       String alias = (String) a.next();
       document.add(new Field("alias", alias, Field.Store.NO, Field.Index.TOKENIZED));
       document.add(
           new Field(Indexer.DEFAULT_FIELD, alias, Field.Store.NO, Field.Index.TOKENIZED));
     }
     return true;
   }
   return false;
 }
Esempio n. 24
0
 /**
  * Create a basic Lucene document to add to the index. This document is suitable to be parsed with
  * the StandardAnalyzer.
  */
 private Document createStandardDocument(Topic topic) {
   String topicContent = topic.getTopicContent();
   if (topicContent == null) {
     topicContent = "";
   }
   Document doc = new Document();
   // store the (not analyzed) topic name to use when deleting records from the index.
   doc.add(
       new Field(
           FIELD_TOPIC_NAME, topic.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
   // add the topic namespace (not analyzed) topic namespace to allow retrieval by namespace.
   // this field is used internally in searches.
   doc.add(
       new Field(
           FIELD_TOPIC_NAMESPACE,
           topic.getNamespace().getId().toString(),
           Field.Store.NO,
           Field.Index.NOT_ANALYZED_NO_NORMS));
   // analyze the topic name so that (for example) a search for "New York" will match "New York
   // City"
   Field nameField =
       new Field(FIELD_TOPIC_NAME_ANALYZED, topic.getName(), Field.Store.NO, Field.Index.ANALYZED);
   // make the topic name worth 3x as much as topic content in searches
   nameField.setBoost(3.0f);
   doc.add(nameField);
   // analyze & store the topic content so that it is searchable and also usable for display in
   // search result summaries
   doc.add(new Field(FIELD_TOPIC_CONTENT, topicContent, Field.Store.YES, Field.Index.ANALYZED));
   return doc;
 }
Esempio n. 25
0
  private static void index_h(String prefix, File file, IndexWriter indexWriter)
      throws IOException {
    Document doc = null;

    if (file.isDirectory()) {
      File files[] = file.listFiles();
      for (File file1 : files) {
        index_h(prefix + FILE_SEPARATOR + file.getName(), file1, indexWriter);
      }
    } else {
      String content = FileUtils.readFileToString(file, "utf-8");

      System.out.println("==============================================================");
      System.out.println("index_h " + content);
      System.out.println("==============================================================");

      String filename = prefix + FILE_SEPARATOR + file.getName();
      String path = file.getAbsolutePath();

      doc = new Document();
      doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("relative_path", filename, Field.Store.YES, Field.Index.NOT_ANALYZED));
      indexWriter.addDocument(doc);
    }
  }
  private void indexDocument(IndexWriter iw, ProcessedDocument parsedDoc) throws IOException {

    org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(false);

    doc.add(new Field(INDEX_FIELD_CONTENT, parsedDoc.getText(), customType));

    doc.add(new StringField(INDEX_FIELD_URL, parsedDoc.getDocumentURL(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_ID, parsedDoc.getDocumentId(), Field.Store.YES));

    doc.add(new TextField(INDEX_FIELD_TITLE, parsedDoc.getDocumentTitle(), Field.Store.YES));

    doc.add(new StringField(INDEX_FIELD_DOC_TYPE, parsedDoc.getDocumentType(), Field.Store.YES));

    /**
     * TODO: 2.2 -- The effect of boosting (Book Section 2.1.2)
     *
     * <p>Uncomment the lines below to demonstrate the effect of boosting
     */
    // if ( parsedDoc.getDocumentId().equals("g1-d13")) {
    // doc.setBoost(2);
    // }

    iw.addDocument(doc);
  }
  public void testFarsiRangeFilterCollating(
      Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd)
      throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    Query query = new TermQuery(new Term("body", "body"));

    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
    // orders the U+0698 character before the U+0633 character, so the single
    // index Term below should NOT be returned by a TermRangeFilter with a Farsi
    // Collator (or an Arabic one for the case when Farsi searcher not
    // supported).
    ScoreDoc[] result =
        searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1)
            .scoreDocs;
    assertEquals("The index Term should not be included.", 0, result.length);

    result =
        searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1)
            .scoreDocs;
    assertEquals("The index Term should be included.", 1, result.length);

    searcher.close();
    reader.close();
    dir.close();
  }
Esempio n. 28
0
 public void testDifferentTypedDocValuesField2() throws Exception {
   Directory d = newDirectory();
   RandomIndexWriter w = new RandomIndexWriter(random(), d);
   Document doc = new Document();
   // Index doc values are single-valued so we should not
   // be able to add same field more than once:
   Field f = new NumericDocValuesField("field", 17);
   doc.add(f);
   doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
   try {
     w.addDocument(doc);
     fail("didn't hit expected exception");
   } catch (IllegalArgumentException iae) {
     // expected
   }
   doc = new Document();
   doc.add(f);
   w.addDocument(doc);
   w.forceMerge(1);
   DirectoryReader r = w.getReader();
   assertEquals(17, getOnlySegmentReader(r).getNumericDocValues("field").get(0));
   r.close();
   w.close();
   d.close();
 }
  public Document createDocument(BufferedImage image, String identifier) {
    assert (image != null);
    BufferedImage bimg = image;
    // Scaling image is especially with the correlogram features very important!
    // All images are scaled to guarantee a certain upper limit for indexing.
    if (Math.max(image.getHeight(), image.getWidth()) > MAX_IMAGE_DIMENSION) {
      bimg = ImageUtils.scaleImage(image, MAX_IMAGE_DIMENSION);
    }
    Document doc = null;
    logger.finer("Starting extraction from image [CEDD - fast].");
    CEDD vd = new CEDD();
    vd.extract(bimg);
    logger.fine("Extraction finished [CEDD - fast].");

    doc = new Document();
    doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, vd.getByteArrayRepresentation()));
    if (identifier != null)
      doc.add(
          new Field(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              identifier,
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));

    return doc;
  }
Esempio n. 30
0
 /**
  * Overwrite this method if you want to filter the input, apply hashing, etc.
  *
  * @param feature the current feature.
  * @param document the current document.
  * @param featureFieldName the field hashFunctionsFileName of the feature.
  */
 protected void addToDocument(LireFeature feature, Document document, String featureFieldName) {
   if (run == 0) {
   } // just count documents
   else if (run == 1) { // Select the representatives ...
     if (representativesID.contains(docCount)
         && feature
             .getClass()
             .getCanonicalName()
             .equals(featureClass.getCanonicalName())) { // it's a representative.
       // put it into a temporary data structure ...
       representatives.add(feature);
     }
   } else if (run
       == 2) { // actual hashing: find the nearest representatives and put those as a hash into a
     // document.
     if (feature
         .getClass()
         .getCanonicalName()
         .equals(featureClass.getCanonicalName())) { // it's a feature to be hashed
       int[] hashes = getHashes(feature);
       document.add(
           new TextField(
               featureFieldName + "_hash",
               createDocumentString(hashes, hashes.length),
               Field.Store.YES));
       document.add(
           new TextField(
               featureFieldName + "_hash_q", createDocumentString(hashes, 10), Field.Store.YES));
     }
     document.add(new StoredField(featureFieldName, feature.getByteArrayRepresentation()));
   }
 }