Пример #1
0
  // LUCENE-1262
  public void testExceptions() throws Throwable {
    Path indexDir = createTempDir("testfieldswriterexceptions");

    Directory fsDir = newFSDirectory(indexDir);
    FaultyFSDirectory dir = new FaultyFSDirectory(fsDir);
    IndexWriterConfig iwc =
        newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    for (int i = 0; i < 2; i++) writer.addDocument(testDoc);
    writer.forceMerge(1);
    writer.close();

    IndexReader reader = DirectoryReader.open(dir);
    dir.startFailing();

    boolean exc = false;

    for (int i = 0; i < 2; i++) {
      try {
        reader.document(i);
      } catch (IOException ioe) {
        // expected
        exc = true;
      }
      try {
        reader.document(i);
      } catch (IOException ioe) {
        // expected
        exc = true;
      }
    }
    assertTrue(exc);
    reader.close();
    dir.close();
  }
Пример #2
0
 /**
  * Puts results into a HTML file.
  *
  * @param prefix
  * @param hits
  * @param reader
  * @param queryImage
  * @return
  * @throws IOException
  */
 public static String saveImageResultsToHtml(
     String prefix, TopDocs hits, IndexReader reader, String queryImage) throws IOException {
   long l = System.currentTimeMillis() / 1000;
   String fileName = "results-" + prefix + "-" + l + ".html";
   BufferedWriter bw = new BufferedWriter(new FileWriter(fileName));
   bw.write(
       "<html>\n"
           + "<head><title>Search Results</title></head>\n"
           + "<body bgcolor=\"#FFFFFF\">\n");
   bw.write("<h3>query</h3>\n");
   bw.write(
       "<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n");
   bw.write("<h3>results</h3>\n");
   for (int i = 0; i < hits.scoreDocs.length; i++) {
     bw.write(
         hits.scoreDocs[i].score
             + " - <a href=\"file://"
             + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier")
             + "\"><img src=\"file://"
             + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier")
             + "\"></a><p>\n");
   }
   bw.write("</body>\n" + "</html>");
   bw.close();
   return fileName;
 }
Пример #3
0
  public void test() throws IOException {
    assertTrue(dir != null);
    assertTrue(fieldInfos != null);
    IndexReader reader = DirectoryReader.open(dir);
    Document doc = reader.document(0);
    assertTrue(doc != null);
    assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);

    Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY);
    assertTrue(field != null);
    assertTrue(field.fieldType().storeTermVectors());

    assertFalse(field.fieldType().omitNorms());
    assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY);
    assertTrue(field != null);
    assertFalse(field.fieldType().storeTermVectors());
    assertTrue(field.fieldType().omitNorms());
    assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

    field = (Field) doc.getField(DocHelper.NO_TF_KEY);
    assertTrue(field != null);
    assertFalse(field.fieldType().storeTermVectors());
    assertFalse(field.fieldType().omitNorms());
    assertTrue(field.fieldType().indexOptions() == IndexOptions.DOCS);

    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY);
    reader.document(0, visitor);
    final List<IndexableField> fields = visitor.getDocument().getFields();
    assertEquals(1, fields.size());
    assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name());
    reader.close();
  }
Пример #4
0
  public void testSearch() throws IOException {
    int docNumber = 1;
    MetricSpacesInvertedListIndexing ms = MetricSpacesInvertedListIndexing.getDefaultInstance();
    MetricSpacesInvertedListIndexing.numReferenceObjectsUsed = 10;
    MetricSpacesInvertedListIndexing.numReferenceObjects = 50;
    IndexReader reader = ms.getIndexReader(indexPath);
    TopDocs docs = ms.search(reader.document(docNumber), indexPath);

    // print the results
    BufferedWriter bw = new BufferedWriter(new FileWriter("out.html"));
    bw.write("<html><body>");
    for (int i = 0; i < docs.scoreDocs.length; i++) {
      ScoreDoc scoreDoc = docs.scoreDocs[i];
      bw.write(
          "<img title=\"ID: "
              + scoreDoc.doc
              + ", "
              + "Score: "
              + scoreDoc.score
              + "\" src=\"file:///"
              + reader.document(scoreDoc.doc).getValues("descriptorImageIdentifier")[0]
              + "\"> ");
    }
    bw.write("</body></html>");
    bw.close();
    showUrl("out.html");
  }
  public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    if (!IndexReader.indexExists(reader.directory()))
      throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);
    ScalableColor sc = null;
    ColorLayout cl = null;
    EdgeHistogram eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0) {
      cl = new ColorLayout();
      cl.setStringRepresentation(cls[0]);
    }
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0) {
      sc = new ScalableColor();
      sc.setStringRepresentation(scs[0]);
    }
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0) {
      eh = new EdgeHistogram();
      eh.setStringRepresentation(ehs[0]);
    }

    HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

    // find duplicates ...
    boolean hasDeletions = reader.hasDeletions();

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
      if (hasDeletions && reader.isDeleted(i)) {
        continue;
      }
      Document d = reader.document(i);
      float distance = getDistance(d, cl, sc, eh);

      if (!duplicates.containsKey(distance)) {
        duplicates.put(distance, new LinkedList<String>());
      } else {
        numDuplicates++;
      }
      duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0) return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (float f : duplicates.keySet()) {
      if (duplicates.get(f).size() > 1) {
        results.add(duplicates.get(f));
      }
    }
    return new SimpleImageDuplicates(results);
  }
  public double singleSearch(int docNum)
      throws IOException, InstantiationException, IllegalAccessException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));

    // -----------

    String query = reader.document(docNum).getValues("hash")[0];
    CEDD ceddQuery = new CEDD();
    ceddQuery.setByteArrayRepresentation(
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length);

    // -----------

    HashSet<String> gold = new HashSet<String>(numImagesEval);
    ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100);
    ImageSearchHits hits = cis.search(reader.document(docNum), reader);
    for (int i = 0; i < 10; i++) {
      gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    }

    // ------------

    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(
        new SimilarityBase() {
          @Override
          protected float score(BasicStats basicStats, float freq, float v2) {
            return 1;
          }

          @Override
          public String toString() {
            return null;
          }
        });
    TopDocs topDocs = searcher.search(createQuery(query), 500);
    topDocs = rerank(topDocs, ceddQuery, reader);
    //        System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length);
    double numMatches = 0;
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
      ScoreDoc scoreDoc = topDocs.scoreDocs[i];
      //            System.out.print(scoreDoc.score + ": ");
      String file =
          reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
      //            System.out.println(file.substring(file.lastIndexOf('/') + 1) +
      // (gold.contains(file)?" x":" o"));
      if (gold.contains(file)) numMatches++;
    }
    return numMatches;
  }
Пример #7
0
  public void tttestGetDistribution() throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv"));
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    // get the first document:
    //        if (!IndexReader.indexExists(reader.directory()))
    //            throw new FileNotFoundException("No index found at this specific location.");

    CEDD cedd1 = new CEDD();
    FCTH fcth1 = new FCTH();

    CEDD cedd2 = new CEDD();
    FCTH fcth2 = new FCTH();

    JCD jcd1 = new JCD();
    JCD jcd2 = new JCD();
    String[] cls;

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.

      Document doc = reader.document(i);
      cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD);
      if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]);
      cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH);
      if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]);

      for (int j = i + 1; j < docs; j++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
          continue; // if it is deleted, just ignore it.
        Document doc2 = reader.document(j);
        cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD);
        if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]);
        cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH);
        if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]);
        jcd1.init(cedd1, fcth1);
        jcd2.init(cedd2, fcth2);
        bw.write(
            cedd1.getDistance(cedd2)
                + ";"
                + fcth1.getDistance(fcth2)
                + ";"
                + jcd1.getDistance(jcd2)
                + "\n");
      }
      if (i % 100 == 0) System.out.println(i + " entries processed ... ");
    }
    bw.close();
  }
Пример #8
0
  /**
   * words in each records in input is sorted by document frequency, if ceil(prefix*length)-prefix
   * share at least one token, block them,
   *
   * @param input
   * @param lines number of lines to block
   * @param prefix prefix parameter
   * @param maxDocFreq max document frequency for a token to be considered a rare feature
   * @param indexFolder temporary index folder
   * @param output
   * @param report
   * @throws Exception
   */
  public static void prefixBlockingWithLucene(
      String input,
      int lines,
      float prefix,
      int maxPrefixLength,
      int maxDocFreq,
      String indexFolder,
      String output,
      String report)
      throws Exception {
    long startTime = new Date().getTime();
    Common.indexPrefix(input, lines, prefix, maxPrefixLength, indexFolder);

    IndexReader ireader = IndexReader.open(indexFolder);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    TermEnum te = ireader.terms();
    PrintWriter pw = IOFactory.getPrintWriter(output);
    int maxBlockSize = 0;
    int totalBlockSize = 0;
    int blockCount = 0;
    while (te.next()) {
      TopDocs td = isearcher.search(new TermQuery(te.term()), maxDocFreq + 1);

      // discard blocks with only one individual or of too frequent tokens
      if (td.scoreDocs.length <= 1 || td.scoreDocs.length > maxDocFreq) continue;

      if (td.scoreDocs.length > maxBlockSize) maxBlockSize = td.scoreDocs.length;
      totalBlockSize += td.scoreDocs.length;
      blockCount++;
      pw.print(ireader.document(td.scoreDocs[0].doc).get("id"));
      for (int i = 1; i < td.scoreDocs.length; i++) {
        pw.print(" " + ireader.document(td.scoreDocs[i].doc).get("id"));
      }
      pw.println();
      if (blockCount % 1000 == 0)
        System.out.println(new Date().toString() + " : " + blockCount + " blocks");
    }
    pw.close();
    ireader.close();
    long time = new Date().getTime() - startTime;
    pw = IOFactory.getPrintWriter(report, true);
    pw.println(new Date().toString());
    pw.println("#individual: " + lines);
    pw.println("blocking parameter: " + prefix);
    pw.println("time: " + time);
    pw.println("#block: " + blockCount);
    pw.println("max block size: " + maxBlockSize);
    pw.println("avg block size: " + (totalBlockSize + 0.0) / blockCount);
    pw.close();
    Common.deleteFolder(new File(indexFolder));
    System.out.println(prefix + "\t" + lines + "\t" + time); // for speed test
  }
Пример #9
0
 public void testSearchRunTime() throws IOException {
   int queryDocID;
   IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new")));
   int featureIndex = 0;
   ImageSearchHits hits = searchers[featureIndex].search(reader.document(0), reader);
   hits = searchers[featureIndex].search(reader.document(1), reader);
   long ms = System.currentTimeMillis();
   for (int i = 0; i < 100; i++) {
     queryDocID = i;
     // select one feature for the large index:
     hits = searchers[featureIndex].search(reader.document(queryDocID), reader);
   }
   ms = System.currentTimeMillis() - ms;
   System.out.println("ms = " + ms / 100);
 }
 public void testIndexedBit() throws Exception {
   Directory dir = newDirectory();
   RandomIndexWriter w = new RandomIndexWriter(random(), dir);
   Document doc = new Document();
   FieldType onlyStored = new FieldType();
   onlyStored.setStored(true);
   doc.add(new Field("field", "value", onlyStored));
   doc.add(new StringField("field2", "value", Field.Store.YES));
   w.addDocument(doc);
   IndexReader r = w.getReader();
   w.close();
   assertFalse(r.document(0).getField("field").fieldType().indexed());
   assertTrue(r.document(0).getField("field2").fieldType().indexed());
   r.close();
   dir.close();
 }
  // LUCENE-1219
  public void testBinaryFieldOffsetLength() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w =
        new IndexWriter(
            dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    byte[] b = new byte[50];
    for (int i = 0; i < 50; i++) b[i] = (byte) (i + 77);

    Document doc = new Document();
    Field f = new StoredField("binary", b, 10, 17);
    byte[] bx = f.binaryValue().bytes;
    assertTrue(bx != null);
    assertEquals(50, bx.length);
    assertEquals(10, f.binaryValue().offset);
    assertEquals(17, f.binaryValue().length);
    doc.add(f);
    w.addDocument(doc);
    w.close();

    IndexReader ir = DirectoryReader.open(dir);
    Document doc2 = ir.document(0);
    IndexableField f2 = doc2.getField("binary");
    b = f2.binaryValue().bytes;
    assertTrue(b != null);
    assertEquals(17, b.length, 17);
    assertEquals(87, b[0]);
    ir.close();
    dir.close();
  }
  // LUCENE-1727: make sure doc fields are stored in order
  public void testStoredFieldsOrder() throws Throwable {
    Directory d = newDirectory();
    IndexWriter w =
        new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();

    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("zzz", "a b c", customType));
    doc.add(newField("aaa", "a b c", customType));
    doc.add(newField("zzz", "1 2 3", customType));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    Document doc2 = r.document(0);
    Iterator<IndexableField> it = doc2.getFields().iterator();
    assertTrue(it.hasNext());
    Field f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "aaa");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "1 2 3");
    assertFalse(it.hasNext());
    r.close();
    w.close();
    d.close();
  }
Пример #13
0
  /**
   * Find words for a more-like-this query former.
   *
   * @param docNum the id of the lucene document from which to find terms
   */
  private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
      final Fields vectors = ir.getTermVectors(docNum);
      final Terms vector;
      if (vectors != null) {
        vector = vectors.terms(fieldName);
      } else {
        vector = null;
      }

      // field does not store term vector info
      if (vector == null) {
        Document d = ir.document(docNum);
        IndexableField[] fields = d.getFields(fieldName);
        for (IndexableField field : fields) {
          final String stringValue = field.stringValue();
          if (stringValue != null) {
            addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
          }
        }
      } else {
        addTermFrequencies(field2termFreqMap, vector, fieldName);
      }
    }

    return createQueue(field2termFreqMap);
  }
Пример #14
0
  private void dumpDocuments() throws IOException {
    outputBanner("Documents");

    int totalDocs = mIndexReader.numDocs();

    outputLn();
    outputLn("There are " + totalDocs + " documents in this index.");

    mConsole.debug("Total number of documents: " + totalDocs);
    for (int i = 0; i < totalDocs; i++) {
      Document doc = null;
      try {
        doc = mIndexReader.document(i, null);
      } catch (IllegalArgumentException e) {
        if ("attempt to access a deleted document".equals(e.getMessage())) {
          mConsole.warn(
              "encountered exception while dumping document " + i + ": " + e.getMessage());
        } else {
          throw e;
        }
      }
      dumpDocument(i, doc);

      if ((i + 1) % 100 == 0) {
        mConsole.debug("Dumped " + (i + 1) + " documents");
      }
    }
  }
Пример #15
0
  private static Map<String, List<String>> generate_result(Directory directory) {
    Map<String, List<String>> result_map = new HashMap<String, List<String>>();

    try {
      IndexReader reader = IndexReader.open(directory);
      TermEnum termEnum = reader.terms();
      while (termEnum.next()) {
        String termEnumString = termEnum.term().toString();
        if (termEnumString.startsWith("content:")) {
          String term = termEnumString.substring(termEnumString.lastIndexOf(":") + 1);
          TermDocs termDocs = reader.termDocs(termEnum.term());
          while (termDocs.next()) {
            Document doc = reader.document(termDocs.doc());
            String relative_path = doc.get("relative_path");

            if (result_map.containsKey(relative_path)) {
              result_map.get(relative_path).add(term + termDocs.freq());
            } else {
              result_map.put(relative_path, new ArrayList<String>());
            }
          }
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
    }

    return result_map;
  }
 private void remove(Class entity, Serializable id) {
   log.trace("remove from Lucene index: " + entity + "#" + id);
   DocumentBuilder builder = workspace.getDocumentBuilder(entity);
   Term term = builder.getTerm(id);
   IndexReader reader = workspace.getIndexReader(entity);
   TermDocs termDocs = null;
   try {
     // TODO is there a faster way?
     // TODO include TermDocs into the workspace?
     termDocs = reader.termDocs(term);
     String entityName = entity.getName();
     while (termDocs.next()) {
       int docIndex = termDocs.doc();
       if (entityName.equals(reader.document(docIndex).get(DocumentBuilder.CLASS_FIELDNAME))) {
         // remove only the one of the right class
         // loop all to remove all the matches (defensive code)
         reader.deleteDocument(docIndex);
       }
     }
   } catch (Exception e) {
     throw new HibernateException("Unable to remove from Lucene index: " + entity + "#" + id, e);
   } finally {
     if (termDocs != null)
       try {
         termDocs.close();
       } catch (IOException e) {
         log.warn("Unable to close termDocs properly", e);
       }
   }
 }
Пример #17
0
  /** @return the indexs */
  public List<Index> getIndexes() {
    List<Index> indexes = new ArrayList<Index>();
    // Method[] methods = Index.class.getDeclaredMethods();
    int numDocs = reader.numDocs();
    // System.out.println(numDocs);
    for (int i = 0; i < numDocs; i++) {
      try {
        Document document = reader.document(i);
        List<Fieldable> f = document.getFields();

        Index index = new Index();
        for (Fieldable fieldable : f) {
          Field field = (Field) fieldable;
          Method m =
              Index.class.getDeclaredMethod("set" + field.name(), new Class[] {String.class});
          m.invoke(index, new Object[] {field.stringValue()});
          // Method m2 = Index.class.getDeclaredMethod("get" + field.name(), new Class[]{});
          // Object val = m2.invoke(index, new Object[]{});
          // System.out.println(m2.getName()+" = "+val);
          // System.out.println(m.getName() + " " + field.stringValue());
        }
        // System.out.println("RHAAR-"+i+" = "+index.getRHaarFeature());
        indexes.add(index);
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    return indexes;
  }
Пример #18
0
  public void doTest(int[] docs) throws Exception {
    Directory dir = makeIndex();
    IndexReader reader = IndexReader.open(dir, true);
    for (int i = 0; i < docs.length; i++) {
      Document d = reader.document(docs[i], SELECTOR);
      d.get(MAGIC_FIELD);

      List<Fieldable> fields = d.getFields();
      for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) {
        Fieldable f = null;
        try {
          f = fi.next();
          String fname = f.name();
          String fval = f.stringValue();
          assertNotNull(docs[i] + " FIELD: " + fname, fval);
          String[] vals = fval.split("#");
          if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) {
            fail("FIELD:" + fname + ",VAL:" + fval);
          }
        } catch (Exception e) {
          throw new Exception(docs[i] + " WTF: " + f.name(), e);
        }
      }
    }
    reader.close();
    dir.close();
  }
  @Override
  public boolean reload(String collectionName, int docNum) {
    if (collectionName == null) return false;

    CrescentCollectionHandler collectionHandler =
        SpringApplicationContext.getBean(
            "crescentCollectionHandler", CrescentCollectionHandler.class);
    CrescentCollection collection =
        collectionHandler.getCrescentCollections().getCrescentCollection(collectionName);

    if (collection == null) {
      logger.debug("doesn't Collection Info => {}", collectionName);
      return false;
    }

    List<String> fieldName = new ArrayList<String>();
    List<String> flag = new ArrayList<String>();
    List<String> norm = new ArrayList<String>();
    List<String> value = new ArrayList<String>();

    try {
      Directory directory = FSDirectory.open(new File(collection.getIndexingDirectory()));
      IndexReader reader = IndexReader.open(directory);

      Document document = null;
      try {
        document = reader.document(docNum);
      } catch (IllegalArgumentException e) {
        e.printStackTrace();
        return false;
      }

      String fName = null;
      for (Fieldable field : document.getFields()) {
        fName = field.name();
        fieldName.add(fName);
        flag.add(fieldFlag(field));
        if (reader.hasNorms(fName)) {
          norm.add(String.valueOf(Similarity.decodeNorm(reader.norms(fName)[docNum])));
        } else {
          norm.add("---");
        }
        value.add(field.stringValue());
      }

    } catch (IOException e) {
      e.printStackTrace();
      return false;
    }

    result.put("collection", collectionName);
    result.put("docNum", docNum);
    result.put("fieldName", fieldName);
    result.put("flag", flag);
    result.put("norm", norm);
    result.put("value", value);

    return true;
  }
Пример #20
0
 /**
  * 根据docId查询索引文档
  *
  * @param reader IndexReader对象
  * @param docID documentId
  * @param fieldsToLoad 需要返回的field
  * @return
  */
 public static Document findDocumentByDocId(
     IndexReader reader, int docID, Set<String> fieldsToLoad) {
   try {
     return reader.document(docID, fieldsToLoad);
   } catch (IOException e) {
     return null;
   }
 }
 public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer)
     throws IOException, ConfigurationException {
   Document doc = reader.document(docId);
   doc.removeFields(fieldName);
   Field uri = doc.getField("URI");
   Term term = new Term("URI", uri.stringValue());
   writer.updateDocument(term, doc, analyzer);
 }
Пример #22
0
 @Override
 protected Integer readFromDocument(IndexReader reader, int docId) throws IOException {
   // This implementation reads the length of the field ...
   Document doc = reader.document(docId, fieldSelector);
   String valueString = doc.get(fieldName);
   String value = stringFactory.create(valueString);
   return value != null ? value.length() : 0;
 }
Пример #23
0
  protected Diff<Document, Diff<Fieldable, DocumentDiff>> compare(
      IndexReader reader1, IndexReader reader2, String keyFieldName)
      throws IOException, ParseException {
    Diff<Document, Diff<Fieldable, DocumentDiff>> result =
        new Diff<Document, Diff<Fieldable, DocumentDiff>>();
    for (int docId = 0; docId < reader1.numDocs(); docId++) {
      if (!reader1.isDeleted(docId)) {
        Document doc1 = reader1.document(docId);
        Field keyField = doc1.getField(keyFieldName);
        if (keyField == null) {
          throw new IllegalArgumentException(
              "Key field " + keyFieldName + " should be defined in all docs in the index");
        }

        Document doc2 = findByKey(reader2, keyField);
        if (doc2 == null) {
          result.addAdded(doc1);
        } else {
          Diff<Fieldable, DocumentDiff> diff =
              CompareUtils.diff(keyField.stringValue(), doc1, doc2);
          if (!diff.isEquals()) {
            result.addDiff(diff);
          }
        }
      }
    }

    for (int docId = 0; docId < reader2.numDocs(); docId++) {
      if (!reader2.isDeleted(docId)) {
        Document doc2 = reader2.document(docId);
        Field keyField = doc2.getField(keyFieldName);
        if (keyField == null) {
          throw new IllegalArgumentException(
              "Key field '" + keyFieldName + "' should be defined in all docs in the index");
        }

        Document doc1 = findByKey(reader1, keyField);
        if (doc1 == null) {
          result.addRemoved(doc2);
        }
      }
    }

    return result;
  }
  /**
   * @param reader
   * @param lireFeature
   * @return the maximum distance found for normalizing.
   * @throws java.io.IOException
   */
  @SuppressWarnings("unchecked")
  private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException {
    float[] maxDistance = new float[lireFeature.length];
    float[] overallMaxDistance = new float[lireFeature.length];

    for (int i = 0; i < overallMaxDistance.length; i++) {
      overallMaxDistance[i] = -1f;
      maxDistance[i] = -1f;
    }

    parDocs = new TreeSet[lireFeature.length];
    for (int i = 0; i < parDocs.length; i++) {
      parDocs[i] = new TreeSet<SimpleResult>();
    }

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    // clear result set ...

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.

      Document d = reader.document(i);
      float[] distance = getDistance(d, lireFeature);
      // calculate the overall max distance to normalize score afterwards
      for (int j = 0; j < distance.length; j++) {
        float f = distance[j];
        if (overallMaxDistance[j] < f) {
          overallMaxDistance[j] = f;
        }
        // if it is the first document:
        if (maxDistance[j] < 0) {
          maxDistance[j] = f;
        }
        // if the array is not full yet:
        if (this.parDocs[j].size() < maxHits) {
          this.parDocs[j].add(new SimpleResult(f, d));
          if (f > maxDistance[j]) {
            maxDistance[j] = f;
          }
        } else if (f < maxDistance[j]) {
          // if it is nearer to the sample than at least on of the current set:
          // remove the last one ...
          this.parDocs[j].remove(this.parDocs[j].last());
          // add the new one ...
          this.parDocs[j].add(new SimpleResult(f, d));
          // and set our new distance border ...
          maxDistance[j] = this.parDocs[j].last().getDistance();
        }
      }
    }
    return maxDistance;
  }
Пример #25
0
 private Document findDoc(IndexReader reader, String file) throws IOException {
   for (int i = 0; i < reader.numDocs(); i++) {
     Document document = reader.document(i);
     String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
     if (s.endsWith(File.separator + file)) {
       //                System.out.println("s = " + s);
       return document;
     }
   }
   return null;
 }
Пример #26
0
  // LUCENE-1262
  public void testExceptions() throws Throwable {
    File indexDir = _TestUtil.getTempDir("testfieldswriterexceptions");

    try {
      Directory dir = new FaultyFSDirectory(indexDir);
      IndexWriter writer =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setOpenMode(OpenMode.CREATE));
      for (int i = 0; i < 2; i++) writer.addDocument(testDoc);
      writer.forceMerge(1);
      writer.close();

      IndexReader reader = DirectoryReader.open(dir);

      FaultyIndexInput.doFail = true;

      boolean exc = false;

      for (int i = 0; i < 2; i++) {
        try {
          reader.document(i);
        } catch (IOException ioe) {
          // expected
          exc = true;
        }
        try {
          reader.document(i);
        } catch (IOException ioe) {
          // expected
          exc = true;
        }
      }
      assertTrue(exc);
      reader.close();
      dir.close();
    } finally {
      _TestUtil.rmDir(indexDir);
    }
  }
  public void testSkipToFirsttimeHit() throws IOException {
    final DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
    dq.add(tq("dek", "albino"));
    dq.add(tq("dek", "DOES_NOT_EXIST"));

    QueryUtils.check(dq, s);

    final Weight dw = dq.weight(s);
    final Scorer ds = dw.scorer(s.getIndexReader(), true, false);
    assertTrue("firsttime skipTo found no match", ds.advance(3) != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals("found wrong docid", "d4", r.document(ds.docID()).get("id"));
  }
Пример #28
0
  /**
   * There was an error that images with the same score but different documents in the index were
   * not included in the result list. Here's the test for that.
   */
  public void testDuplicatesInIndex() throws IOException {
    indexFiles("src\\test\\resources\\images", "index-large-new", 0, true);
    indexFiles("src\\test\\resources\\images", "index-large-new", 0, false);
    indexFiles("src\\test\\resources\\images", "index-large-new", 0, false);

    ImageSearcher s = searchers[0];
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new")));
    Document query = reader.document(0);
    ImageSearchHits hits = s.search(query, reader);
    FileUtils.saveImageResultsToPng(
        "duplicate_", hits, query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
  }
  public void testOutputSearchResults()
      throws IOException, InstantiationException, IllegalAccessException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int docNum = 0; // doc to search for.
    // -----------

    String query = reader.document(docNum).getValues("hash")[0];
    CEDD ceddQuery = new CEDD();
    ceddQuery.setByteArrayRepresentation(
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset,
        reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length);

    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(createQuery(query), numImagesEval);
    FileUtils.saveImageResultsToPng(
        "result_lsh",
        topDocs,
        reader.document(docNum).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0],
        reader);
  }
Пример #30
0
 @Override
 public void collect(int doc) throws IOException {
   String id = fieldData.stringValue(doc);
   // the _source is the query
   Document document = reader.document(doc, SourceFieldSelector.INSTANCE);
   byte[] source = document.getBinaryValue(SourceFieldMapper.NAME);
   try {
     queries.put(id, percolator.parseQuery(id, source, 0, source.length));
   } catch (Exception e) {
     logger.warn("failed to add query [{}]", e, id);
   }
 }