Exemplo n.º 1
0
  public void test() throws IOException {
    assertTrue(dir != null);
    assertTrue(fieldInfos != null);
    IndexReader reader = DirectoryReader.open(dir);
    Document doc = reader.document(0);
    assertTrue(doc != null);
    assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);

    Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY);
    assertTrue(field != null);
    assertTrue(field.fieldType().storeTermVectors());

    assertFalse(field.fieldType().omitNorms());
    assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY);
    assertTrue(field != null);
    assertFalse(field.fieldType().storeTermVectors());
    assertTrue(field.fieldType().omitNorms());
    assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    field = (Field) doc.getField(DocHelper.NO_TF_KEY);
    assertTrue(field != null);
    assertFalse(field.fieldType().storeTermVectors());
    assertFalse(field.fieldType().omitNorms());
    assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS);

    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY);
    reader.document(0, visitor);
    final List<IndexableField> fields = visitor.getDocument().getFields();
    assertEquals(1, fields.size());
    assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name());
    reader.close();
  }
  @Test
  public void testWithPolyFieldsAndDocumentBoost() {
    SolrCore core = h.getCore();
    IndexSchema schema = core.getLatestSchema();
    assertFalse(schema.getField("store").omitNorms());
    assertTrue(schema.getField("store_0_coordinate").omitNorms());
    assertTrue(schema.getField("store_1_coordinate").omitNorms());
    assertFalse(schema.getField("amount").omitNorms());
    assertTrue(
        schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").omitNorms());
    assertTrue(
        schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").omitNorms());

    SolrInputDocument doc = new SolrInputDocument();
    doc.setDocumentBoost(3.0f);
    doc.addField("store", "40.7143,-74.006");
    doc.addField("amount", "10.5");
    Document out = DocumentBuilder.toDocument(doc, schema);
    assertNotNull(out.get("store"));
    assertNotNull(out.getField("store_0_coordinate"));
    // NOTE: As the subtypes have omitNorm=true, they must have boost=1F, otherwise this is going to
    // fail when adding the doc to Lucene.
    assertTrue(1f == out.getField("store_0_coordinate").boost());
    assertTrue(1f == out.getField("store_1_coordinate").boost());
    assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").boost());
    assertTrue(
        1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").boost());
  }
  private void testLeftOpenRange(int precisionStep) throws Exception {
    String field = "field" + precisionStep;
    int count = 3000;
    long upper = (count - 1) * distance + (distance / 3) + startOffset;
    LegacyNumericRangeQuery<Long> q =
        LegacyNumericRangeQuery.newLongRange(field, precisionStep, null, upper, true, true);
    TopDocs topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertNotNull(sd);
    assertEquals("Score doc count", count, sd.length);
    Document doc = searcher.doc(sd[0].doc);
    assertEquals("First doc", startOffset, doc.getField(field).numericValue().longValue());
    doc = searcher.doc(sd[sd.length - 1].doc);
    assertEquals(
        "Last doc",
        (count - 1) * distance + startOffset,
        doc.getField(field).numericValue().longValue());

    q = LegacyNumericRangeQuery.newLongRange(field, precisionStep, null, upper, false, true);
    topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
    sd = topDocs.scoreDocs;
    assertNotNull(sd);
    assertEquals("Score doc count", count, sd.length);
    doc = searcher.doc(sd[0].doc);
    assertEquals("First doc", startOffset, doc.getField(field).numericValue().longValue());
    doc = searcher.doc(sd[sd.length - 1].doc);
    assertEquals(
        "Last doc",
        (count - 1) * distance + startOffset,
        doc.getField(field).numericValue().longValue());
  }
Exemplo n.º 4
0
 private void updateDoc(JSONObject object, Document doc) {
   String[] names = JSONObject.getNames(object);
   for (String name : names) {
     IndexableField fieldtype = checkFieldType(name, object, doc.getField(name));
     if (doc.getField(name) == null) {
       doc.add(fieldtype);
     }
   }
 }
 @Test
 public void testCopyFieldWithFieldBoost() {
   SolrCore core = h.getCore();
   IndexSchema schema = core.getLatestSchema();
   assertFalse(schema.getField("title").omitNorms());
   assertTrue(schema.getField("title_stringNoNorms").omitNorms());
   SolrInputDocument doc = new SolrInputDocument();
   doc.addField("title", "mytitle", 3.0f);
   Document out = DocumentBuilder.toDocument(doc, schema);
   assertNotNull(out.get("title_stringNoNorms"));
   assertTrue(
       "title_stringNoNorms has the omitNorms attribute set to true, if the boost is different than 1.0, it will fail",
       1.0f == out.getField("title_stringNoNorms").boost());
   assertTrue("It is OK that title has a boost of 3", 3.0f == out.getField("title").boost());
 }
Exemplo n.º 6
0
  public void testRAMDirectory() throws IOException {

    Directory dir = newFSDirectory(indexDir);
    MockDirectoryWrapper ramDir = new MockDirectoryWrapper(random, new RAMDirectory(dir));

    // close the underlaying directory
    dir.close();

    // Check size
    assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());

    // open reader to test document count
    IndexReader reader = IndexReader.open(ramDir, true);
    assertEquals(docsToAdd, reader.numDocs());

    // open search zo check if all doc's are there
    IndexSearcher searcher = newSearcher(reader);

    // search for all documents
    for (int i = 0; i < docsToAdd; i++) {
      Document doc = searcher.doc(i);
      assertTrue(doc.getField("content") != null);
    }

    // cleanup
    reader.close();
    searcher.close();
  }
Exemplo n.º 7
0
  public static String hightlightFeild(
      Highlighter highlighter, Analyzer analyzer, Document doc, String feild) {
    String docContent = doc.get(feild);
    try {

      String hc = highlighter.getBestFragment(analyzer, feild, docContent);
      if (hc == null) {
        if (docContent.length() >= 50) {
          hc = docContent.substring(0, 50);
        } else {
          hc = docContent;
        }
      }
      doc.getField(feild).setValue(hc);
      // System.out.println(hc);
      return hc;
    } catch (InvalidTokenOffsetsException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return docContent;
  }
  // LUCENE-1219
  public void testBinaryFieldOffsetLength() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    byte[] b = new byte[50];
    for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77);

    Document doc = new Document();
    Field f = new StoredField("binary", b, 10, 17);
    byte[] bx = f.binaryValue().bytes;
    assertTrue(bx != null);
    assertEquals(50, bx.length);
    assertEquals(10, f.binaryValue().offset);
    assertEquals(17, f.binaryValue().length);
    doc.add(f);
    w.addDocument(doc);
    w.close();

    IndexReader ir = DirectoryReader.open(dir);
    Document doc2 = ir.document(0);
    IndexableField f2 = doc2.getField("binary");
    b = f2.binaryValue().bytes;
    assertTrue(b != null);
    assertEquals(17, b.length, 17);
    assertEquals(87, b[0]);
    ir.close();
    dir.close();
  }
 public String tokens(String field) {
   try {
     Field f = doc.getField(field);
     if (f == null) fail("No such field " + field);
     if (!f.isTokenized()) {
       String val = value(field);
       Token t = new Token(val, 0, val.length());
       return t.getPositionIncrement() + " [" + t.termText() + "]";
     }
     TokenStream ts = f.tokenStreamValue();
     if (ts == null && f.stringValue() != null) ts = analyzer.tokenStream(field, f.stringValue());
     if (ts == null && f.readerValue() != null) ts = analyzer.tokenStream(field, f.readerValue());
     if (ts == null) fail("No token stream for field " + field);
     Token t = null;
     StringBuilder sb = new StringBuilder();
     while ((t = ts.next()) != null) {
       sb.append(t.getPositionIncrement() + " [" + t.termText() + "] ");
     }
     return sb.toString().trim();
   } catch (Exception e) {
     e.printStackTrace();
     fail(e.getMessage());
     return null;
   }
 }
 public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer)
     throws IOException, ConfigurationException {
   Document doc = reader.document(docId);
   doc.removeFields(fieldName);
   Field uri = doc.getField("URI");
   Term term = new Term("URI", uri.stringValue());
   writer.updateDocument(term, doc, analyzer);
 }
Exemplo n.º 11
0
  private List getHitIds(Query query) throws IOException {
    ArrayList matchIds = new ArrayList();
    LinkedHashSet matchIdsSet = new LinkedHashSet();
    if (query != null) {
      if (r != null) {
        if (!r.isCurrent()) {
          while (r.isLocked(dir)) ;
          r = IndexReader.open(dir);
          searcher = new IndexSearcher(r);
        }
      } else {
        r = IndexReader.open(dir);
        searcher = new IndexSearcher(r);
      }
      DocumentCollector s = new DocumentCollector(searcher);
      searcher.search(query, s);
      //            searcher.search(query);
      searcher.close();
      List hits = s.getStudies();
      for (int i = 0; i < hits.size(); i++) {
        Document d = (Document) hits.get(i);
        Field authority = d.getField("authority");
        String authorityStr = authority.stringValue();
        Field studyId = d.getField("studyId");
        String studyIdStr = studyId.stringValue();
        /*
        if (studyIdStr.length() < 5) {
            StringBuffer pad = new StringBuffer();
            for (int j = studyIdStr.length(); j < 5; j++) {
                pad.append('0');
            }
            studyIdStr = pad.toString() + studyIdStr;

        }
         */
        //                Long studyIdLong = Long.valueOf(studyIdStr);
        String fileName = authorityStr + File.separator + studyIdStr;
        //                System.out.println(fileName);
        matchIdsSet.add(fileName);
      }
      searcher.close();
    }
    matchIds.addAll(matchIdsSet);
    return matchIds;
  }
Exemplo n.º 12
0
  protected Diff<Document, Diff<Fieldable, DocumentDiff>> compare(
      IndexReader reader1, IndexReader reader2, String keyFieldName)
      throws IOException, ParseException {
    Diff<Document, Diff<Fieldable, DocumentDiff>> result =
        new Diff<Document, Diff<Fieldable, DocumentDiff>>();
    for (int docId = 0; docId < reader1.numDocs(); docId++) {
      if (!reader1.isDeleted(docId)) {
        Document doc1 = reader1.document(docId);
        Field keyField = doc1.getField(keyFieldName);
        if (keyField == null) {
          throw new IllegalArgumentException(
              "Key field " + keyFieldName + " should be defined in all docs in the index");
        }

        Document doc2 = findByKey(reader2, keyField);
        if (doc2 == null) {
          result.addAdded(doc1);
        } else {
          Diff<Fieldable, DocumentDiff> diff =
              CompareUtils.diff(keyField.stringValue(), doc1, doc2);
          if (!diff.isEquals()) {
            result.addDiff(diff);
          }
        }
      }
    }

    for (int docId = 0; docId < reader2.numDocs(); docId++) {
      if (!reader2.isDeleted(docId)) {
        Document doc2 = reader2.document(docId);
        Field keyField = doc2.getField(keyFieldName);
        if (keyField == null) {
          throw new IllegalArgumentException(
              "Key field '" + keyFieldName + "' should be defined in all docs in the index");
        }

        Document doc1 = findByKey(reader1, keyField);
        if (doc1 == null) {
          result.addRemoved(doc2);
        }
      }
    }

    return result;
  }
 @Override
 public void modifyIndex(final IndexWriter writer, final IndexSearcher searcher)
     throws ModifyKnowledgeBaseException {
   for (final Map.Entry<String, HashMap<String, String>> entry : this.attributes.entrySet()) {
     final String key = entry.getKey();
     final HashMap<String, String> hash = entry.getValue();
     final QueryParser qp = new QueryParser(this.docPrimaryKey, new DoserIDAnalyzer());
     try {
       final TopDocs top = searcher.search(qp.parse(QueryParserBase.escape(key)), 1);
       final ScoreDoc[] scores = top.scoreDocs;
       if (scores.length > 0) {
         final Document doc = new Document();
         final Document currentDoc = searcher.getIndexReader().document(scores[0].doc);
         // BugFix create new Document und copy Fields.
         final List<IndexableField> fields = currentDoc.getFields();
         for (final IndexableField field : fields) {
           if (field.stringValue() != null) {
             if (field.name().equalsIgnoreCase(docPrimaryKey)) {
               doc.add(new StringField(field.name(), field.stringValue(), Field.Store.YES));
             } else {
               doc.add(new TextField(field.name(), field.stringValue(), Field.Store.YES));
             }
           }
         }
         final List<Document> docListToAdd = new LinkedList<Document>();
         docListToAdd.add(doc);
         for (final Map.Entry<String, String> subentry : hash.entrySet()) {
           final IndexableField field = doc.getField(subentry.getKey());
           if (field == null) {
             throw new ModifyKnowledgeBaseException("UpdateField no found", null);
           }
           if (this.action.equals(KBModifications.OVERRIDEFIELD)) {
             doc.removeFields(subentry.getKey());
             String[] newentries = generateSeperatedFieldStrings(subentry.getValue());
             for (int i = 0; i < newentries.length; i++) {
               doc.add(new TextField(subentry.getKey(), newentries[i], Field.Store.YES));
             }
           } else if (this.action.equals(KBModifications.UPDATERELATEDLABELS)) {
             doc.removeFields(subentry.getKey());
             doc.add(updateOccurrences(subentry.getValue(), field, "surroundinglabels"));
           } else if (this.action.equals(KBModifications.UPDATEOCCURRENCES)) {
             doc.removeFields(subentry.getKey());
             IndexableField f = updateOccurrences(subentry.getValue(), field, "occurrences");
             doc.add(f);
           }
         }
         writer.updateDocuments(new Term(this.docPrimaryKey, key), docListToAdd);
       } else {
         throw new ModifyKnowledgeBaseException("Document not found", null);
       }
     } catch (final IOException e) {
       throw new ModifyKnowledgeBaseException("IOException in IndexSearcher", e);
     } catch (ParseException e) {
       throw new ModifyKnowledgeBaseException("Queryparser Exception", e);
     }
   }
 }
  private void mockDocument(int docId, double price, double discount, boolean isCloseout)
      throws IOException {
    Document doc = PowerMockito.mock(Document.class);
    when(searcher.doc(eq(docId), any(Set.class))).thenReturn(doc);
    when(searcher.getSchema()).thenReturn(schema);

    IndexableField priceField = mock(IndexableField.class);
    when(doc.getField(FIELD_PRICE)).thenReturn(priceField);
    when(priceField.numericValue()).thenReturn(new Double(price));

    IndexableField discountField = mock(IndexableField.class);
    when(doc.getField(FIELD_DISCOUNT)).thenReturn(discountField);
    when(discountField.numericValue()).thenReturn(new Double(discount));

    IndexableField closeoutField = mock(IndexableField.class);
    when(doc.getField(FIELD_CLOSEOUT)).thenReturn(closeoutField);
    when(closeoutField.stringValue()).thenReturn(isCloseout ? "T" : "F");
  }
 @Override
 public Object get(String name, Document document) {
   final IndexableField field = document.getField(name);
   if (field != null) {
     return new Date((long) field.numericValue());
   } else {
     return null;
   }
 }
  public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    if (!IndexReader.indexExists(reader.directory()))
      throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);
    ScalableColor sc = null;
    ColorLayout cl = null;
    EdgeHistogram eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0) {
      cl = new ColorLayout();
      cl.setStringRepresentation(cls[0]);
    }
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0) {
      sc = new ScalableColor();
      sc.setStringRepresentation(scs[0]);
    }
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0) {
      eh = new EdgeHistogram();
      eh.setStringRepresentation(ehs[0]);
    }

    HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

    // find duplicates ...
    boolean hasDeletions = reader.hasDeletions();

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
      if (hasDeletions && reader.isDeleted(i)) {
        continue;
      }
      Document d = reader.document(i);
      float distance = getDistance(d, cl, sc, eh);

      if (!duplicates.containsKey(distance)) {
        duplicates.put(distance, new LinkedList<String>());
      } else {
        numDuplicates++;
      }
      duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0) return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (float f : duplicates.keySet()) {
      if (duplicates.get(f).size() > 1) {
        results.add(duplicates.get(f));
      }
    }
    return new SimpleImageDuplicates(results);
  }
 @Override
 public String getShardIdentifier(
     Class<?> entityType, Serializable id, String idAsString, Document document) {
   if (entityType.equals(Animal.class)) {
     final String typeValue = document.getField("type").stringValue();
     addShard(typeValue);
     return typeValue;
   }
   throw new RuntimeException("Animal expected but found " + entityType);
 }
  @Test
  public void testWriteFields() {
    String[] fields = new String[] {"s", "i"};
    PdxLuceneSerializer mapper = new PdxLuceneSerializer(fields);

    PdxInstance i = Mockito.mock(PdxInstance.class);

    Mockito.when(i.hasField("s")).thenReturn(true);
    Mockito.when(i.hasField("i")).thenReturn(true);
    Mockito.when(i.getField("s")).thenReturn("a");
    Mockito.when(i.getField("i")).thenReturn(5);

    Document doc = new Document();
    mapper.toDocument(i, doc);

    assertEquals(2, doc.getFields().size());
    assertEquals("a", doc.getField("s").stringValue());
    assertEquals(5, doc.getField("i").numericValue());
  }
Exemplo n.º 19
0
  @Test
  public void baseUIMAAnalyzerIntegrationTest() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer));
    // add the first doc
    Document doc = new Document();
    String dummyTitle = "this is a dummy title ";
    doc.add(new TextField("title", dummyTitle, Field.Store.YES));
    String dummyContent = "there is some content written here";
    doc.add(new TextField("contents", dummyContent, Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();

    // try the search over the first doc
    DirectoryReader directoryReader = DirectoryReader.open(dir);
    IndexSearcher indexSearcher = newSearcher(directoryReader);
    TopDocs result = indexSearcher.search(new MatchAllDocsQuery(), 1);
    assertTrue(result.totalHits > 0);
    Document d = indexSearcher.doc(result.scoreDocs[0].doc);
    assertNotNull(d);
    assertNotNull(d.getField("title"));
    assertEquals(dummyTitle, d.getField("title").stringValue());
    assertNotNull(d.getField("contents"));
    assertEquals(dummyContent, d.getField("contents").stringValue());

    // add a second doc
    doc = new Document();
    String dogmasTitle = "dogmas";
    doc.add(new TextField("title", dogmasTitle, Field.Store.YES));
    String dogmasContents = "white men can't jump";
    doc.add(new TextField("contents", dogmasContents, Field.Store.YES));
    writer.addDocument(doc);
    writer.commit();

    directoryReader.close();
    directoryReader = DirectoryReader.open(dir);
    indexSearcher = newSearcher(directoryReader);
    result = indexSearcher.search(new MatchAllDocsQuery(), 2);
    Document d1 = indexSearcher.doc(result.scoreDocs[1].doc);
    assertNotNull(d1);
    assertNotNull(d1.getField("title"));
    assertEquals(dogmasTitle, d1.getField("title").stringValue());
    assertNotNull(d1.getField("contents"));
    assertEquals(dogmasContents, d1.getField("contents").stringValue());

    // do a matchalldocs query to retrieve both docs
    result = indexSearcher.search(new MatchAllDocsQuery(), 2);
    assertEquals(2, result.totalHits);
    writer.close();
    indexSearcher.getIndexReader().close();
    dir.close();
  }
  @Test
  public void testMultiField() throws Exception {
    SolrCore core = h.getCore();

    // make sure a null value is not indexed
    SolrInputDocument doc = new SolrInputDocument();
    doc.addField("home", "2.2,3.3", 1.0f);
    Document out = DocumentBuilder.toDocument(doc, core.getLatestSchema());
    assertNotNull(out.get("home")); // contains the stored value and term vector, if there is one
    assertNotNull(out.getField("home_0" + FieldType.POLY_FIELD_SEPARATOR + "double"));
    assertNotNull(out.getField("home_1" + FieldType.POLY_FIELD_SEPARATOR + "double"));
  }
  public void testReadSkip() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf =
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    FieldType ft = new FieldType();
    ft.setStored(true);
    ft.freeze();

    final String string = _TestUtil.randomSimpleString(random(), 50);
    final byte[] bytes = string.getBytes("UTF-8");
    final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
    final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
    final float f = random().nextFloat();
    final double d = random().nextDouble();

    List<Field> fields =
        Arrays.asList(
            new Field("bytes", bytes, ft),
            new Field("string", string, ft),
            new LongField("long", l, Store.YES),
            new IntField("int", i, Store.YES),
            new FloatField("float", f, Store.YES),
            new DoubleField("double", d, Store.YES));

    for (int k = 0; k < 100; ++k) {
      Document doc = new Document();
      for (Field fld : fields) {
        doc.add(fld);
      }
      iw.w.addDocument(doc);
    }
    iw.commit();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final int docID = random().nextInt(100);
    for (Field fld : fields) {
      String fldName = fld.name();
      final Document sDoc = reader.document(docID, Collections.singleton(fldName));
      final IndexableField sField = sDoc.getField(fldName);
      if (Field.class.equals(fld.getClass())) {
        assertEquals(fld.binaryValue(), sField.binaryValue());
        assertEquals(fld.stringValue(), sField.stringValue());
      } else {
        assertEquals(fld.numericValue(), sField.numericValue());
      }
    }
    reader.close();
    iw.close();
    dir.close();
  }
Exemplo n.º 22
0
  private boolean verifyIndex(Directory directory, int startAt) throws IOException {
    boolean fail = false;
    IndexReader reader = DirectoryReader.open(directory);

    int max = reader.maxDoc();
    for (int i = 0; i < max; i++) {
      Document temp = reader.document(i);
      // System.out.println("doc "+i+"="+temp.getField("count").stringValue());
      // compare the index doc number to the value that it should be
      if (!temp.getField("count").stringValue().equals((i + startAt) + "")) {
        fail = true;
        System.out.println(
            "Document "
                + (i + startAt)
                + " is returning document "
                + temp.getField("count").stringValue());
      }
    }
    reader.close();
    return fail;
  }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
  private void assertTopDocs(TopDocs topDocs, String... ids) throws IOException {
    assertTrue(ids.length <= topDocs.totalHits, "Query has more hits than expected");

    Set<String> foundIDs = new HashSet<String>();
    for (ScoreDoc doc : topDocs.scoreDocs) {
      Document foundDocument = indexSearcher.doc(doc.doc);
      foundIDs.add(foundDocument.getField("id").stringValue());
    }

    for (String id : ids) {
      assertTrue(foundIDs.contains(id), "ID [" + id + "] was not found in query results");
    }
  }
 /**
  * test for constant score + boolean query + filter, the other tests only use the constant score
  * mode
  */
 private void testRange(int precisionStep) throws Exception {
   String field = "field" + precisionStep;
   int count = 3000;
   long lower = (distance * 3 / 2) + startOffset,
       upper = lower + count * distance + (distance / 3);
   LegacyNumericRangeQuery<Long> q =
       LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
   for (byte i = 0; i < 2; i++) {
     TopDocs topDocs;
     String type;
     switch (i) {
       case 0:
         type = " (constant score filter rewrite)";
         q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
         topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
         break;
       case 1:
         type = " (constant score boolean rewrite)";
         q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
         topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
         break;
       default:
         return;
     }
     ScoreDoc[] sd = topDocs.scoreDocs;
     assertNotNull(sd);
     assertEquals("Score doc count" + type, count, sd.length);
     Document doc = searcher.doc(sd[0].doc);
     assertEquals(
         "First doc" + type,
         2 * distance + startOffset,
         doc.getField(field).numericValue().longValue());
     doc = searcher.doc(sd[sd.length - 1].doc);
     assertEquals(
         "Last doc" + type,
         (1 + count) * distance + startOffset,
         doc.getField(field).numericValue().longValue());
   }
 }
Exemplo n.º 26
0
 private String fieldsToString(Document doc, String[] fields) {
   StringBuffer buffer = new StringBuffer();
   for (int i = 0; i < fields.length; i++) {
     Field field = doc.getField(fields[i]);
     if (field == null) {
       continue;
     }
     if (buffer.length() > 0) {
       buffer.append(", ");
     }
     buffer.append(field.stringValue());
   }
   return buffer.toString();
 }
  protected NamedList serializeTopDocs(QueryCommandResult result) throws IOException {
    NamedList<Object> queryResult = new NamedList<>();
    queryResult.add("matches", result.getMatches());
    queryResult.add("totalHits", result.getTopDocs().totalHits);
    // debug: assert !Float.isNaN(result.getTopDocs().getMaxScore()) ==
    // rb.getGroupingSpec().isNeedScore();
    if (!Float.isNaN(result.getTopDocs().getMaxScore())) {
      queryResult.add("maxScore", result.getTopDocs().getMaxScore());
    }
    List<NamedList> documents = new ArrayList<>();
    queryResult.add("documents", documents);

    final IndexSchema schema = rb.req.getSearcher().getSchema();
    SchemaField uniqueField = schema.getUniqueKeyField();
    for (ScoreDoc scoreDoc : result.getTopDocs().scoreDocs) {
      NamedList<Object> document = new NamedList<>();
      documents.add(document);

      Document doc = retrieveDocument(uniqueField, scoreDoc.doc);
      document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName())));
      if (!Float.isNaN(scoreDoc.score)) {
        document.add("score", scoreDoc.score);
      }
      if (!FieldDoc.class.isInstance(scoreDoc)) {
        continue; // thus don't add sortValues below
      }

      FieldDoc fieldDoc = (FieldDoc) scoreDoc;
      Object[] convertedSortValues = new Object[fieldDoc.fields.length];
      for (int j = 0; j < fieldDoc.fields.length; j++) {
        Object sortValue = fieldDoc.fields[j];
        Sort groupSort = rb.getGroupingSpec().getGroupSort();
        SchemaField field =
            groupSort.getSort()[j].getField() != null
                ? schema.getFieldOrNull(groupSort.getSort()[j].getField())
                : null;
        if (field != null) {
          FieldType fieldType = field.getType();
          if (sortValue != null) {
            sortValue = fieldType.marshalSortValue(sortValue);
          }
        }
        convertedSortValues[j] = sortValue;
      }
      document.add("sortValues", convertedSortValues);
    }

    return queryResult;
  }
  @Test
  public void testRealisticKeys() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw =
        new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    LineFileDocs lineFileDocs = new LineFileDocs(random());
    int num = Math.min(1000, atLeast(100));
    Map<String, Integer> mappings = new HashMap<>();
    for (int i = 0; i < num; i++) {
      Document document = lineFileDocs.nextDoc();
      String title = document.getField("title").stringValue();
      int weight = Math.abs(random().nextInt());
      Integer prevWeight = mappings.get(title);
      if (prevWeight == null || prevWeight < weight) {
        mappings.put(title, weight);
      }
      Document doc = new Document();
      doc.add(new SuggestField("suggest_field", title, weight));
      iw.addDocument(doc);

      if (rarely()) {
        iw.commit();
      }
    }

    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);

    for (Map.Entry<String, Integer> entry : mappings.entrySet()) {
      String title = entry.getKey();

      PrefixCompletionQuery query =
          new PrefixCompletionQuery(analyzer, new Term("suggest_field", title));
      TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size());
      assertTrue(suggest.totalHits > 0);
      boolean matched = false;
      for (ScoreDoc scoreDoc : suggest.scoreDocs) {
        matched = Float.compare(scoreDoc.score, (float) entry.getValue()) == 0;
        if (matched) {
          break;
        }
      }
      assertTrue("at least one of the entries should have the score", matched);
    }

    reader.close();
    iw.close();
  }
Exemplo n.º 29
0
  public void testLsaFilter() throws IOException {
    // index images
    //        indexFiles();
    // search
    System.out.println("---< searching >-------------------------");
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    Document document = reader.document(0);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(100);
    ImageSearchHits hits = searcher.search(document, reader);
    // rerank
    System.out.println("---< filtering >-------------------------");
    LsaFilter filter = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
    hits = filter.filter(hits, document);

    // output
    FileUtils.saveImageResultsToHtml(
        "filtertest", hits, document.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
  }
Exemplo n.º 30
0
  @Override
  public float extract(Document doc, Terms terms, RerankerContext context) {
    final String str = doc.getField(StatusField.TEXT.name).stringValue();
    final String matchStr = "http://";

    int lastIndex = 0;
    int count = 0;

    while (lastIndex != -1) {
      lastIndex = str.indexOf(matchStr, lastIndex);
      if (lastIndex != -1) {
        count++;
        lastIndex += matchStr.length();
      }
    }

    return (float) count;
  }