Ejemplo n.º 1
0
 /**
  * Overwrite this method if you want to filter the input, apply hashing, etc.
  *
  * @param feature the current feature.
  * @param document the current document.
  * @param featureFieldName the field hashFunctionsFileName of the feature.
  */
 protected void addToDocument(LireFeature feature, Document document, String featureFieldName) {
   if (run == 0) {
   } // just count documents
   else if (run == 1) { // Select the representatives ...
     if (representativesID.contains(docCount)
         && feature
             .getClass()
             .getCanonicalName()
             .equals(featureClass.getCanonicalName())) { // it's a representative.
       // put it into a temporary data structure ...
       representatives.add(feature);
     }
   } else if (run
       == 2) { // actual hashing: find the nearest representatives and put those as a hash into a
     // document.
     if (feature
         .getClass()
         .getCanonicalName()
         .equals(featureClass.getCanonicalName())) { // it's a feature to be hashed
       int[] hashes = getHashes(feature);
       document.add(
           new TextField(
               featureFieldName + "_hash",
               createDocumentString(hashes, hashes.length),
               Field.Store.YES));
       document.add(
           new TextField(
               featureFieldName + "_hash_q", createDocumentString(hashes, 10), Field.Store.YES));
     }
     document.add(new StoredField(featureFieldName, feature.getByteArrayRepresentation()));
   }
 }
Ejemplo n.º 2
0
 public void run() {
   // do it ...
   try {
     IndexWriter indexWriter =
         LuceneUtils.createIndexWriter(
             indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
     for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext(); ) {
       File inputFile = iterator.next();
       if (verbose) System.out.println("Processing " + inputFile.getPath() + ".");
       if (verbose) System.out.println("Counting images.");
       run = 0;
       readFile(indexWriter, inputFile);
       if (verbose) System.out.printf("%d images found in the data file.\n", docCount);
       int numberOfRepresentatives = 1000; // TODO: clever selection.
       // select a number of representative "fixed stars" randomly from file
       if (numberOfRepresentatives > Math.sqrt(docCount))
         numberOfRepresentatives = (int) Math.sqrt(docCount);
       if (verbose)
         System.out.printf(
             "Selecting %d representative images for hashing.\n", numberOfRepresentatives);
       representativesID = new HashSet<Integer>(numberOfRepresentatives);
       while (representativesID.size() < numberOfRepresentatives) {
         representativesID.add((int) Math.floor(Math.random() * (docCount - 1)));
       }
       representatives = new ArrayList<LireFeature>(numberOfRepresentatives);
       docCount = 0;
       run = 1;
       if (verbose) System.out.println("Now getting representatives from the data file.");
       readFile(indexWriter, inputFile);
       docCount = 0;
       run = 2;
       if (verbose) System.out.println("Finally we start the indexing process, please wait ...");
       readFile(indexWriter, inputFile);
       if (verbose) System.out.println("Indexing finished.");
     }
     indexWriter.commit();
     indexWriter.close();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
  protected void validateResponse(
      TermVectorResponse esResponse, Fields luceneFields, TestConfig testConfig)
      throws IOException {
    TestDoc testDoc = testConfig.doc;
    HashSet<String> selectedFields =
        testConfig.selectedFields == null
            ? null
            : new HashSet<String>(Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
      Terms esTerms = esTermVectorFields.terms(field.name);
      if (selectedFields != null && !selectedFields.contains(field.name)) {
        assertNull(esTerms);
        continue;
      }

      assertNotNull(esTerms);

      Terms luceneTerms = luceneFields.terms(field.name);
      TermsEnum esTermEnum = esTerms.iterator(null);
      TermsEnum luceneTermEnum = luceneTerms.iterator(null);

      while (esTermEnum.next() != null) {
        assertNotNull(luceneTermEnum.next());

        assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
        DocsAndPositionsEnum esDocsPosEnum = esTermEnum.docsAndPositions(null, null, 0);
        DocsAndPositionsEnum luceneDocsPosEnum = luceneTermEnum.docsAndPositions(null, null, 0);
        if (luceneDocsPosEnum == null) {
          // test we expect that...
          assertFalse(field.storedOffset);
          assertFalse(field.storedPayloads);
          assertFalse(field.storedPositions);
          continue;
        }

        String currentTerm = esTermEnum.term().utf8ToString();

        assertThat(
            "Token mismatch for field: " + field.name,
            currentTerm,
            equalTo(luceneTermEnum.term().utf8ToString()));

        esDocsPosEnum.nextDoc();
        luceneDocsPosEnum.nextDoc();

        int freq = esDocsPosEnum.freq();
        assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
        for (int i = 0; i < freq; i++) {
          String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
          int lucenePos = luceneDocsPosEnum.nextPosition();
          int esPos = esDocsPosEnum.nextPosition();
          if (field.storedPositions && testConfig.requestPositions) {
            assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
          } else {
            assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
          }
          if (field.storedOffset && testConfig.requestOffsets) {
            assertThat(
                "Offset test failed" + failDesc,
                luceneDocsPosEnum.startOffset(),
                equalTo(esDocsPosEnum.startOffset()));
            assertThat(
                "Offset test failed" + failDesc,
                luceneDocsPosEnum.endOffset(),
                equalTo(esDocsPosEnum.endOffset()));
          } else {
            assertThat(
                "Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1));
            assertThat(
                "Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
          }
          if (field.storedPayloads && testConfig.requestPayloads) {
            assertThat(
                "Payload test failed" + failDesc,
                luceneDocsPosEnum.getPayload(),
                equalTo(esDocsPosEnum.getPayload()));
          } else {
            assertThat(
                "Missing payload test failed" + failDesc,
                esDocsPosEnum.getPayload(),
                equalTo(null));
          }
        }
      }

      assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
  }