Beispiel #1
0
  /** @return the indexs */
  public List<Index> getIndexes() {
    List<Index> indexes = new ArrayList<Index>();
    // Method[] methods = Index.class.getDeclaredMethods();
    int numDocs = reader.numDocs();
    // System.out.println(numDocs);
    for (int i = 0; i < numDocs; i++) {
      try {
        Document document = reader.document(i);
        List<Fieldable> f = document.getFields();

        Index index = new Index();
        for (Fieldable fieldable : f) {
          Field field = (Field) fieldable;
          Method m =
              Index.class.getDeclaredMethod("set" + field.name(), new Class[] {String.class});
          m.invoke(index, new Object[] {field.stringValue()});
          // Method m2 = Index.class.getDeclaredMethod("get" + field.name(), new Class[]{});
          // Object val = m2.invoke(index, new Object[]{});
          // System.out.println(m2.getName()+" = "+val);
          // System.out.println(m.getName() + " " + field.stringValue());
        }
        // System.out.println("RHAAR-"+i+" = "+index.getRHaarFeature());
        indexes.add(index);
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    return indexes;
  }
  public void startSearch(String searchString) throws IOException {

    /*analyze(searchString);*/

    try {
      Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс
      IndexSearcher is = new IndexSearcher(directory); // объект поиска
      QueryParser parser =
          new QueryParser(
              Version.LUCENE_31,
              "name",
              new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор
      /* String str1 = "фотоаппарат";
      String str2 = "телевизор";
      String str3 = "SONY";
      String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3;
      System.out.println(total);*/
      Query query = parser.parse(searchString); // что ищем
      TopDocs results =
          is.search(
              query, null,
              10); // включаем поиск ограничиваемся 10 документами, results содержит ...
      System.out.println(
          "getMaxScore()="
              + results.getMaxScore()
              + " totalHits="
              + results
                  .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество
      // найденных документов

      /*proposalController.getProposalList().clear();*/

      for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки
        Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc

        for (Proposal proposal :
            proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) {

          proposalController.getProposalList().add(proposal);
          _log.info(
              "Предложение найдено:"
                  + proposal.getRecid().toString()
                  + ",Товар: "
                  + doc.get("recid")
                  + ", "
                  + doc.get("name"));
        }

        /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет
        addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/
      }

      directory.close();
    } catch (ParseException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    addMessage("Поиск выполнен");
  }
  public SearchResult search(String field, String query) {
    SearchResult searchResult = new SearchResult();
    try {
      Analyzer analyzer = new StandardAnalyzer();
      QueryParser queryParser = new QueryParser(field, analyzer);
      Query q = queryParser.parse(query);
      long start = System.currentTimeMillis();
      IndexSearcher searcher = getSearcher();
      TopDocs hits = searcher.search(q, 50);
      searchResult.setTotalHits(hits.totalHits);
      long end = System.currentTimeMillis();
      searchResult.setTime(end - start);
      System.err.println(
          "Found "
              + hits.totalHits
              + " document(s) (in "
              + (end - start)
              + " milliseconds) that matched query '"
              + q
              + "':");
      for (ScoreDoc scoreDoc : hits.scoreDocs) {
        Document doc = searcher.doc(scoreDoc.doc);
        ResultDocument document = new ResultDocument();
        document.setFullpath("\"" + doc.get("fullpath").replace("\"", "") + "\"");
        document.setFilename("\"" + doc.get("filename").replace("\"", "") + "\"");
        document.setTeaser("\"" + doc.get("teaser") + "\"");
        searchResult.addDocumnent(document);
      }
      close();
    } catch (Exception e) {
      e.printStackTrace();
    }

    return searchResult;
  }
  public void testMoreThan32ProhibitedClauses() throws Exception {
    final Directory d = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    doc.add(
        new TextField(
            "field",
            "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33",
            Field.Store.NO));
    w.addDocument(doc);
    doc = new Document();
    doc.add(new TextField("field", "33", Field.Store.NO));
    w.addDocument(doc);
    final IndexReader r = w.getReader();
    w.close();
    final IndexSearcher s = newSearcher(r);

    final BooleanQuery q = new BooleanQuery();
    for (int term = 0; term < 33; term++) {
      q.add(
          new BooleanClause(
              new TermQuery(new Term("field", "" + term)), BooleanClause.Occur.MUST_NOT));
    }
    q.add(new BooleanClause(new TermQuery(new Term("field", "33")), BooleanClause.Occur.SHOULD));

    final int[] count = new int[1];
    s.search(
        q,
        new Collector() {
          private Scorer scorer;

          @Override
          public void setScorer(Scorer scorer) {
            // Make sure we got BooleanScorer:
            this.scorer = scorer;
            assertEquals(
                "Scorer is implemented by wrong class",
                BooleanScorer.class.getName() + "$BucketScorer",
                scorer.getClass().getName());
          }

          @Override
          public void collect(int doc) {
            count[0]++;
          }

          @Override
          public void setNextReader(AtomicReaderContext context) {}

          @Override
          public boolean acceptsDocsOutOfOrder() {
            return true;
          }
        });

    assertEquals(1, count[0]);

    r.close();
    d.close();
  }
  @Test
  public void testSimpleMapper() throws Exception {
    DocumentMapperParser mapperParser = MapperTests.newParser();
    DocumentMapper docMapper =
        doc(
                "test",
                rootObject("person")
                    .add(object("name").add(stringField("first").store(YES).index(Field.Index.NO))))
            .build(mapperParser);

    BytesReference json =
        new BytesArray(
            copyToBytesFromClasspath(
                "/org/elasticsearch/test/unit/index/mapper/simple/test1.json"));
    Document doc = docMapper.parse("person", "1", json).rootDoc();

    assertThat((double) doc.getBoost(), closeTo(3.7, 0.01));
    assertThat(
        doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay"));
    assertThat(
        docMapper.mappers().name("first").mapper().names().fullName(), equalTo("name.first"));
    //        System.out.println("Document: " + doc);
    //        System.out.println("Json: " + docMapper.sourceMapper().value(doc));
    doc = docMapper.parse(json).rootDoc();
    //        System.out.println("Document: " + doc);
    //        System.out.println("Json: " + docMapper.sourceMapper().value(doc));
  }
  @Override
  public void index(List<AgeObject> aol) {
    try {
      IndexWriter iWriter =
          new IndexWriter(
              index, analyzer, objectList == null, IndexWriter.MaxFieldLength.UNLIMITED);

      if (objectList == null) objectList = aol;
      else objectList.addAll(aol);

      for (AgeObject ao : aol) {
        Document doc = new Document();

        for (TextFieldExtractor tfe : extractors)
          doc.add(
              new Field(
                  tfe.getName(),
                  tfe.getExtractor().getValue(ao),
                  Field.Store.NO,
                  Field.Index.ANALYZED));

        iWriter.addDocument(doc);
      }

      iWriter.close();

      defaultFieldName = extractors.iterator().next().getName();
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
    ScalableColor sc = null;
    ColorLayout cl = null;
    EdgeHistogram eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0) {
      cl = new ColorLayout();
      cl.setStringRepresentation(cls[0]);
    }
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0) {
      sc = new ScalableColor();
      sc.setStringRepresentation(scs[0]);
    }
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0) {
      eh = new EdgeHistogram();
      eh.setStringRepresentation(ehs[0]);
    }

    float maxDistance = findSimilar(reader, cl, sc, eh);

    return new SimpleImageSearchHits(this.docs, maxDistance);
  }
  public void queryIndex() {
    Query q;
    try {
      q = new MultiFieldQueryParser(new String[] {"title", "name"}, analyzer).parse("s*");
      // searching ...
      int hitsPerPage = 10;
      IndexSearcher searcher = new IndexSearcher(indexDirectory);
      TopDocCollector collector = new TopDocCollector(hitsPerPage);
      searcher.search(q, collector);
      ScoreDoc[] hits = collector.topDocs().scoreDocs;

      // output results
      System.out.println("Found " + hits.length + " hits.");
      for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        System.out.println((i + 1) + ". " + d.get("name") + ": " + d.get("title"));
      }
    } catch (ParseException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
 @Override
 public AddResponse add(Collection<InputDocument> inputDocuments) {
   try {
     if (logger.isDebugEnabled()) {
       logger.debug("adding documents...");
     }
     for (InputDocument inputDocument : inputDocuments) {
       assertIdExist(inputDocument);
     }
     for (Document document : DocumentTransformUtil.toLuceneDocuments(inputDocuments, schema)) {
       indexWriter.updateDocument(
           new Term(schema.getIdName(), document.getFieldable(schema.getIdName()).stringValue()),
           document,
           schema.getAnalyzer());
     }
     updateCount.addAndGet(inputDocuments.size());
     if (logger.isDebugEnabled()) {
       logger.debug("add documents finish.");
     }
   } catch (Exception e) {
     logger.error("add documents error", e);
     return new AddResponse(e.getMessage(), ResultCodes.COMMON_ERROR);
   }
   return new AddResponse();
 }
Beispiel #10
0
  public void createIndex() {

    loadTweets("datasets/sentiment-short.csv", 100);

    directory = new RAMDirectory();

    try {
      IndexWriter writer = getWriter();
      for (int i = 0; i < tweets.size(); i++) {
        Document doc = new Document();
        doc.add(
            new Field(
                "tweet",
                tweets.get(i).getText(),
                Field.Store.YES,
                Field.Index.ANALYZED,
                TermVector.YES));
        writer.addDocument(doc);
      }

      System.out.println("Docs: " + writer.numDocs());
      writer.close();

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
  public String searchIndexDocTextReturn(String searchString, String termString) {
    System.out.println("Searching for '" + searchString + "'");
    // Directory directory = FSDirectory.getDirectory();
    IndexReader indexReader;
    try {
      indexReader = IndexReader.open(indexDirectory);
      IndexSearcher indexSearcher = new IndexSearcher(indexReader);
      int n = w.numDocs();

      Term term = new Term(termString, searchString);
      TermQuery query = new TermQuery(term);
      TopDocs topDocs = indexSearcher.search(query, 10);
      if (topDocs.scoreDocs.length > 0) {
        // while(it.hasNext()){
        int docID = topDocs.scoreDocs[0].doc;
        Document doc = indexSearcher.doc(docID);
        textOfURL = doc.get("text");
        // sourceCodeOfURL = doc.get("html");
        this.docId = docID;
      }
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    return textOfURL;
  }
Beispiel #12
0
  public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir)
      throws Exception {
    int[] freq = new int[nTerms];
    terms = new Term[nTerms];
    for (int i = 0; i < nTerms; i++) {
      int f = (nTerms + 1) - i; // make first terms less frequent
      freq[i] = (int) Math.ceil(Math.pow(f, power));
      terms[i] = new Term("f", Character.toString((char) ('A' + i)));
    }

    IndexWriter iw =
        new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    for (int i = 0; i < nDocs; i++) {
      Document d = new Document();
      for (int j = 0; j < nTerms; j++) {
        if (r.nextInt(freq[j]) == 0) {
          d.add(new Field("f", terms[j].text(), Field.Store.NO, Field.Index.NOT_ANALYZED));
          // System.out.println(d);
        }
      }
      iw.addDocument(d);
    }
    iw.optimize();
    iw.close();
  }
Beispiel #13
0
  @Override
  public Document document(int docid) throws IOException {
    if (_subReaders != null) {
      int readerIndex = readerIndex(docid, _starts, _subReaders.length);
      BoboIndexReader subReader = _subReaders[readerIndex];
      return subReader.document(docid - _starts[readerIndex]);
    } else {
      Document doc = super.document(docid);
      Collection<FacetHandler<?>> facetHandlers = _facetHandlerMap.values();
      for (FacetHandler<?> facetHandler : facetHandlers) {
        String[] vals = facetHandler.getFieldValues(this, docid);
        if (vals != null) {
          String[] values = doc.getValues(facetHandler.getName());
          Set<String> storedVals = new HashSet<String>(Arrays.asList(values));

          for (String val : vals) {
            storedVals.add(val);
          }
          doc.removeField(facetHandler.getName());

          for (String val : storedVals) {
            doc.add(
                new Field(facetHandler.getName(), val, Field.Store.NO, Field.Index.NOT_ANALYZED));
          }
        }
      }
      return doc;
    }
  }
  @Test
  public void testFuzzyQuery() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw =
        new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    Document document = new Document();

    document.add(new SuggestField("suggest_field", "suggestion", 2));
    document.add(new SuggestField("suggest_field", "suaggestion", 4));
    document.add(new SuggestField("suggest_field", "ssuggestion", 1));
    iw.addDocument(document);
    document = new Document();
    document.add(new SuggestField("suggest_field", "sugfoo", 1));
    iw.addDocument(document);

    if (rarely()) {
      iw.commit();
    }

    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
    assertSuggestions(
        suggest,
        new Entry("suaggestion", 4 * 2),
        new Entry("suggestion", 2 * 3),
        new Entry("sugfoo", 1 * 3),
        new Entry("ssuggestion", 1 * 1));

    reader.close();
    iw.close();
  }
  // Verifies no *.nrm exists when all fields omit norms:
  public void testNoNrmFile() throws Throwable {
    Directory ram = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriter writer =
        new IndexWriter(
            ram,
            newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
                .setMaxBufferedDocs(3)
                .setMergePolicy(newLogMergePolicy()));
    LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
    lmp.setMergeFactor(2);
    lmp.setNoCFSRatio(0.0);
    Document d = new Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setOmitNorms(true);
    Field f1 = newField("f1", "This field has no norms", customType);
    d.add(f1);

    for (int i = 0; i < 30; i++) {
      writer.addDocument(d);
    }

    writer.commit();

    assertNoNrm(ram);

    // force merge
    writer.forceMerge(1);
    // flush
    writer.close();

    assertNoNrm(ram);
    ram.close();
  }
Beispiel #16
0
  /**
   * give the id list of sentences, from Lucene index
   *
   * @param input input word
   * @param catalogName catalog (domain) name which we'd like to search in
   * @param limit how many hits are needed (0 means all)
   */
  public List<String> query(String input, String catalogName, int limit) {

    List<String> res = new ArrayList<String>();
    try {

      catalog c = catalogs.get(catalogName);
      IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath)));
      IndexSearcher searcher = new IndexSearcher(reader);

      QueryParser parser = new QueryParser("contents", analyzer);
      Query query = parser.parse(QueryParser.escape(input));

      int n = limit > 0 ? limit : searcher.count(query);
      if (n == 0) n = 1;
      TopDocs results = searcher.search(query, n);

      int endPos = limit;
      if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits
      else endPos = results.totalHits; // all hits

      for (int i = 0; i < endPos; i++) {
        int id = results.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        res.add(doc.get("filename"));
      }
      reader.close();
      return res;

    } catch (ParseException e) {
      log(e.getMessage());
    } catch (IOException e) {
      log(e.getMessage());
    }
    return res;
  }
  /**
   * Find words for a more-like-this query former.
   *
   * @param docNum the id of the lucene document from which to find terms
   */
  private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
      final Fields vectors = ir.getTermVectors(docNum);
      final Terms vector;
      if (vectors != null) {
        vector = vectors.terms(fieldName);
      } else {
        vector = null;
      }

      // field does not store term vector info
      if (vector == null) {
        Document d = ir.document(docNum);
        IndexableField[] fields = d.getFields(fieldName);
        for (IndexableField field : fields) {
          final String stringValue = field.stringValue();
          if (stringValue != null) {
            addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
          }
        }
      } else {
        addTermFrequencies(field2termFreqMap, vector, fieldName);
      }
    }

    return createQueue(field2termFreqMap);
  }
  @SuppressWarnings("unchecked")
  @Override
  public List<String> getClusterByCarrot2(String query) {
    // TODO Auto-generated method stub
    List<String> strs = new ArrayList<String>();
    final Controller controller = ControllerFactory.createCachingPooling(IDocumentSource.class);
    final List<org.carrot2.core.Document> documents = Lists.newArrayList();
    try {
      q = getParser().parse(QueryParserUtil.escape(query));
      docs = getIndexSearcher().search(q, Integer.MAX_VALUE);
      hits = docs.scoreDocs;
      for (int i = 0; i < hits.length; i++) {
        Document doc = getIndexSearcher().doc(hits[i].doc);
        documents.add(
            new org.carrot2.core.Document(
                doc.get(CONTENTS_FIELD), doc.get(TITLE_FIELD), doc.get(USER_FIELD)));
      }
      final ProcessingResult byTopicClusters =
          controller.process(documents, query, LingoClusteringAlgorithm.class);
      final List<Cluster> clustersByTopic = byTopicClusters.getClusters();
      final ProcessingResult byDomainClusters =
          controller.process(documents, query, ByUrlClusteringAlgorithm.class);
      final List<Cluster> clustersByDomain = byDomainClusters.getClusters();
      for (Cluster c : clustersByDomain) {
        strs.add(c.getLabel());
      }
      for (Cluster c : clustersByTopic) {
        strs.add(c.getLabel());
      }
    } catch (Exception ex) {

    }
    return strs;
  }
  public void testFarsiRangeFilterCollating(
      Analyzer analyzer, String firstBeg, String firstEnd, String secondBeg, String secondEnd)
      throws Exception {
    Directory dir = newDirectory();
    IndexWriter writer =
        new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    Document doc = new Document();
    doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    Query query = new TermQuery(new Term("body", "body"));

    // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
    // orders the U+0698 character before the U+0633 character, so the single
    // index Term below should NOT be returned by a TermRangeFilter with a Farsi
    // Collator (or an Arabic one for the case when Farsi searcher not
    // supported).
    ScoreDoc[] result =
        searcher.search(query, new TermRangeFilter("content", firstBeg, firstEnd, true, true), 1)
            .scoreDocs;
    assertEquals("The index Term should not be included.", 0, result.length);

    result =
        searcher.search(query, new TermRangeFilter("content", secondBeg, secondEnd, true, true), 1)
            .scoreDocs;
    assertEquals("The index Term should be included.", 1, result.length);

    searcher.close();
    reader.close();
    dir.close();
  }
  public void testSpanNot() throws Exception {
    SpanQuery[] clauses = new SpanQuery[2];
    clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one"));
    clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three"));
    SpanQuery spq = new SpanNearQuery(clauses, 5, true);
    SpanNotQuery snq =
        new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")));

    Directory directory = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            directory,
            newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));

    Document doc = new Document();
    doc.add(newTextField(PayloadHelper.FIELD, "one two three one four three", Field.Store.YES));
    writer.addDocument(doc);
    IndexReader reader = writer.getReader();
    writer.close();

    checkSpans(MultiSpansWrapper.wrap(reader, snq, SpanWeight.Postings.PAYLOADS), 1, new int[] {2});
    reader.close();
    directory.close();
  }
  /** Test that core cache key (needed for NRT) is working */
  public void testCoreCacheKey() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    iwc.setMaxBufferedDocs(100);
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add two docs, id:0 and id:1
    Document doc = new Document();
    Field idField = new StringField("id", "", Field.Store.NO);
    doc.add(idField);
    idField.setStringValue("0");
    iw.addDocument(doc);
    idField.setStringValue("1");
    iw.addDocument(doc);

    // open reader
    ShardId shardId = new ShardId("fake", "_na_", 1);
    DirectoryReader ir = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(iw, true), shardId);
    assertEquals(2, ir.numDocs());
    assertEquals(1, ir.leaves().size());

    // delete id:0 and reopen
    iw.deleteDocuments(new Term("id", "0"));
    DirectoryReader ir2 = DirectoryReader.openIfChanged(ir);

    // we should have the same cache key as before
    assertEquals(1, ir2.numDocs());
    assertEquals(1, ir2.leaves().size());
    assertSame(
        ir.leaves().get(0).reader().getCoreCacheKey(),
        ir2.leaves().get(0).reader().getCoreCacheKey());
    IOUtils.close(ir, ir2, iw, dir);
  }
  private IndexSearcher getSearcher() throws Exception {
    directory = newDirectory();
    String[] docs =
        new String[] {
          "xx rr yy mm  pp",
          "xx yy mm rr pp",
          "nopayload qq ss pp np",
          "one two three four five six seven eight nine ten eleven",
          "nine one two three four five six seven eight eleven ten"
        };
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random(),
            directory,
            newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(similarity));

    Document doc = null;
    for (int i = 0; i < docs.length; i++) {
      doc = new Document();
      String docText = docs[i];
      doc.add(newTextField(PayloadHelper.FIELD, docText, Field.Store.YES));
      writer.addDocument(doc);
    }

    closeIndexReader = writer.getReader();
    writer.close();

    IndexSearcher searcher = newSearcher(closeIndexReader);
    return searcher;
  }
  public void testMethod() throws Exception {
    Directory directory = newDirectory();

    String[] values = new String[] {"1", "2", "3", "4"};

    RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
    for (int i = 0; i < values.length; i++) {
      Document doc = new Document();
      doc.add(newStringField(FIELD, values[i], Field.Store.YES));
      writer.addDocument(doc);
    }
    IndexReader ir = writer.getReader();
    writer.close();

    BooleanQuery booleanQuery1 = new BooleanQuery();
    booleanQuery1.add(new TermQuery(new Term(FIELD, "1")), BooleanClause.Occur.SHOULD);
    booleanQuery1.add(new TermQuery(new Term(FIELD, "2")), BooleanClause.Occur.SHOULD);

    BooleanQuery query = new BooleanQuery();
    query.add(booleanQuery1, BooleanClause.Occur.MUST);
    query.add(new TermQuery(new Term(FIELD, "9")), BooleanClause.Occur.MUST_NOT);

    IndexSearcher indexSearcher = newSearcher(ir);
    ScoreDoc[] hits = indexSearcher.search(query, null, 1000).scoreDocs;
    assertEquals("Number of matched documents", 2, hits.length);
    ir.close();
    directory.close();
  }
Beispiel #24
0
  private static void index_h(String prefix, File file, IndexWriter indexWriter)
      throws IOException {
    Document doc = null;

    if (file.isDirectory()) {
      File files[] = file.listFiles();
      for (File file1 : files) {
        index_h(prefix + FILE_SEPARATOR + file.getName(), file1, indexWriter);
      }
    } else {
      String content = FileUtils.readFileToString(file, "utf-8");

      System.out.println("==============================================================");
      System.out.println("index_h " + content);
      System.out.println("==============================================================");

      String filename = prefix + FILE_SEPARATOR + file.getName();
      String path = file.getAbsolutePath();

      doc = new Document();
      doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("relative_path", filename, Field.Store.YES, Field.Index.NOT_ANALYZED));
      indexWriter.addDocument(doc);
    }
  }
Beispiel #25
0
  private void dumpDocument(int docNum, Document doc) throws IOException {

    outputLn();
    outputLn("Document " + docNum);

    if (doc == null) {
      outputLn("    deleted");
      return;
    }

    // note: only stored fields will be returned
    for (Fieldable field : doc.getFields()) {
      String fieldName = field.name();

      boolean isDate = "l.date".equals(fieldName);

      outputLn("    Field [" + fieldName + "]: " + field.toString());
      String[] values = doc.getValues(fieldName);
      if (values != null) {
        int i = 0;
        for (String value : values) {
          output("         " + "(" + i++ + ") " + value);
          if (isDate) {
            try {
              Date date = DateTools.stringToDate(value);
              output(" (" + date.toString() + " (" + date.getTime() + "))");
            } catch (java.text.ParseException e) {
              assert false;
            }
          }
          outputLn();
        }
      }
    }
  }
Beispiel #26
0
  private static Map<String, List<String>> generate_result(Directory directory) {
    Map<String, List<String>> result_map = new HashMap<String, List<String>>();

    try {
      IndexReader reader = IndexReader.open(directory);
      TermEnum termEnum = reader.terms();
      while (termEnum.next()) {
        String termEnumString = termEnum.term().toString();
        if (termEnumString.startsWith("content:")) {
          String term = termEnumString.substring(termEnumString.lastIndexOf(":") + 1);
          TermDocs termDocs = reader.termDocs(termEnum.term());
          while (termDocs.next()) {
            Document doc = reader.document(termDocs.doc());
            String relative_path = doc.get("relative_path");

            if (result_map.containsKey(relative_path)) {
              result_map.get(relative_path).add(term + termDocs.freq());
            } else {
              result_map.put(relative_path, new ArrayList<String>());
            }
          }
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
    }

    return result_map;
  }
Beispiel #27
0
  private static void search(IndexSearcher searcher, Query query, IndexWriter out, String field)
      throws IOException {

    /* Carlos's hack */
    int hitsPerPage = 1;
    System.out.println("Consulta: " + query);
    TopDocs results = searcher.search(query, hitsPerPage);

    int numTotalHits = results.totalHits;
    if (numTotalHits > 0) results = searcher.search(query, numTotalHits);
    ScoreDoc[] hits = results.scoreDocs;
    /* End hack */

    for (int i = 0; i < numTotalHits; i++) {
      Document doc = searcher.doc(hits[i].doc);
      // System.out.println("Title: " + doc.get("title"));

      if (field != null) {
        System.out.println(hits[i].doc + "\t" + hits[i].score + "\t" + doc.get(field));
      }

      if (out != null) {
        out.addDocument(doc);
      }
    }
    System.out.println("Resultados: " + numTotalHits);
  }
  // TODO: randomize
  public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
    Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
    PayloadAnalyzer analyzer = new PayloadAnalyzer();
    IndexWriter writer =
        new IndexWriter(
            directory,
            new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
    // writer.infoStream = System.out;
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(
          new Field(
              MULTI_FIELD,
              English.intToEnglish(i) + "  " + English.intToEnglish(i),
              Field.Store.YES,
              Field.Index.ANALYZED));
      doc.add(
          new Field(
              NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    reader = IndexReader.open(writer, true);
    writer.close();

    IndexSearcher searcher = LuceneTestCase.newSearcher(reader);
    searcher.setSimilarity(similarity);
    return searcher;
  }
  public Document createDocument(BufferedImage image, String identifier) {
    assert (image != null);
    BufferedImage bimg = image;
    // Scaling image is especially with the correlogram features very important!
    // All images are scaled to guarantee a certain upper limit for indexing.
    if (Math.max(image.getHeight(), image.getWidth()) > MAX_IMAGE_DIMENSION) {
      bimg = ImageUtils.scaleImage(image, MAX_IMAGE_DIMENSION);
    }
    Document doc = null;
    logger.finer("Starting extraction from image [CEDD - fast].");
    CEDD vd = new CEDD();
    vd.extract(bimg);
    logger.fine("Extraction finished [CEDD - fast].");

    doc = new Document();
    doc.add(new Field(DocumentBuilder.FIELD_NAME_CEDD, vd.getByteArrayRepresentation()));
    if (identifier != null)
      doc.add(
          new Field(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              identifier,
              Field.Store.YES,
              Field.Index.NOT_ANALYZED));

    return doc;
  }
  public List<Document> searchDocuments(String text) {
    List<Document> documents = new ArrayList<Document>();
    try {
      TokenStream tokenStream = analyzer.tokenStream("text", text);
      CharTermAttribute charTermAtt = tokenStream.addAttribute(CharTermAttribute.class);
      tokenStream.reset();
      BooleanQuery bQuery = new BooleanQuery();
      while (tokenStream.incrementToken()) {
        String token = charTermAtt.toString();
        TermQuery tq = new TermQuery(new Term("text", token));
        tq.setBoost(2f);

        bQuery.add(tq, Occur.MUST);
      }
      tokenStream.close();

      TopDocs results = searcher.search(bQuery, 100000);
      ScoreDoc[] hits = results.scoreDocs;
      for (ScoreDoc hit : hits) {
        Document doc = searcher.doc(hit.doc);
        doc.add(new FloatField("score", hit.score, FloatField.TYPE_STORED));
        documents.add(doc);
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return documents;
  }