Example #1
0
  public void useFSDirectory(final String indexDirectory) throws Exception {
    if (!IndexReader.indexExists(FSDirectory.open(new File(indexDirectory)))) {
      this.createIndexDirectory(indexDirectory);
    }

    this.directory = FSDirectory.open(new File(indexDirectory));
  }
 @PostConstruct
 public void createOrVerifyIndex() throws Exception {
   LOGGER.info("Initializing Index..........................please Wait..0%");
   index = new File(appproperties.getLuceneIndexPath());
   suggest = new File(appproperties.getLiceneSuggestIndexPath());
   directory = FSDirectory.open(index, NoLockFactory.getNoLockFactory());
   suggestDirectory = FSDirectory.open(suggest, NoLockFactory.getNoLockFactory());
   iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   writer = new IndexWriter(directory, iwc);
   writer.commit();
   indexReader = DirectoryReader.open(directory);
   indexSearcher = new IndexSearcher(indexReader, executorService);
   parser = new MultiFieldQueryParser(new String[] {TITLE_FIELD, CONTENTS_FIELD}, analyzer);
   suggester =
       new AnalyzingInfixSuggester(
           Version.LATEST,
           suggestDirectory,
           analyzer,
           analyzer,
           AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS);
   if (!suggest.exists() && !suggest.isDirectory()) {
     LOGGER.info(
         "Lucene Suggest did not exist.........................................Building Please wait.........0%");
     suggester.build(new IndexFileIterator(new ArrayList<IndexFile>().iterator()));
     suggester.refresh();
     LOGGER.info(
         "Lucene Suggest Build Complete...................................................................100%");
   }
   LOGGER.info("Lucene Ready............................................100%");
 }
Example #3
0
  /**
   * Compares indexes at different location based on the key field which should contain the unique
   * value.
   *
   * @param indexDir1 indexDir1
   * @param indexDir2 indexDir2
   * @param keyFieldName keyFieldName
   * @return result of the compare
   * @throws IOException problems accessing indexes
   * @throws ParseException problems parsing query
   */
  public Diff<Document, Diff<Fieldable, DocumentDiff>> compare(
      String indexDir1, String indexDir2, String keyFieldName) throws IOException, ParseException {
    FSDirectory dir1 = FSDirectory.open(new File(indexDir1));
    IndexReader reader1 = IndexReader.open(dir1);

    FSDirectory dir2 = FSDirectory.open(new File(indexDir2));
    IndexReader reader2 = IndexReader.open(dir2);

    return compare(reader1, reader2, keyFieldName);
  }
 private RAMDirectory(FSDirectory dir, boolean closeDir, IOContext context) throws IOException {
   this();
   for (String file : dir.listAll()) {
     if (!Files.isDirectory(dir.getDirectory().resolve(file))) {
       copyFrom(dir, file, file, context);
     }
   }
   if (closeDir) {
     dir.close();
   }
 }
  /**
   * Creates an FSDirectory in provided directory and initializes an index if not already existing.
   *
   * @param indexDir the directory where to write a new index
   * @param properties the configuration properties
   * @return the created {@code FSDirectory} instance
   * @throws IOException if an error
   */
  public static FSDirectory createFSIndex(File indexDir, Properties properties) throws IOException {
    LockFactory lockFactory = createLockFactory(indexDir, properties);
    FSDirectoryType fsDirectoryType = FSDirectoryType.getType(properties);
    FSDirectory fsDirectory = fsDirectoryType.getDirectory(indexDir, null);

    // must use the setter (instead of using the constructor) to set the lockFactory, or Lucene will
    // throw an exception if it's different than a previous setting.
    fsDirectory.setLockFactory(lockFactory);
    log.debug("Initialize index: '{}'", indexDir.getAbsolutePath());
    initializeIndexIfNeeded(fsDirectory);
    return fsDirectory;
  }
 /**
  * Provides basic search functions ...
  *
  * @param img
  * @param indexPath
  * @return
  * @throws IOException
  */
 public TopDocs search(BufferedImage img, String indexPath) throws IOException {
   ImageSearcher searcher =
       new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
   ImageSearchHits hits =
       searcher.search(img, DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))));
   StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4);
   for (int j = 0; j < numReferenceObjectsUsed; j++) {
     sb.append(hits.doc(j).getValues("ro-id")[0]);
     sb.append(' ');
   }
   return scoreDocs(sb.toString(), DirectoryReader.open(FSDirectory.open(new File(indexPath))));
 }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
 public void stop() {
   @SuppressWarnings("unused")
   int readCurrentState =
       current; // Another unneeded value, to ensure visibility of state protected by memory
                // barrier
   timer.cancel();
   task.stop();
   try {
     directory.close();
   } catch (Exception e) {
     log.unableToCloseLuceneDirectory(directory.getDirectory(), e);
   }
 }
  private void mergeIndexByPart(
      Path minorPath,
      Path inputPath,
      Path outputPath,
      int partId,
      int localIndexVer,
      int maxVersion,
      int partNo)
      throws CorruptIndexException, IOException {
    List<IndexReader> mergeIndexArray = new ArrayList<IndexReader>();
    if (minorPath != null && PathUtil.exists(minorPath)) {
      if (PathUtil.exists(minorPath.cat(partId + ""))) {
        mergeIndexArray.add(
            IndexReader.open(
                FSDirectory.open(
                    minorPath
                        .cat(MailConstants.PART_PRE + partId)
                        .cat(IndexBuilder.LUCENE_INDEX_DIR)
                        .asFile())));
      }
    }

    for (int i = localIndexVer + 1; i <= maxVersion; i++) {
      Path segPath = inputPath.cat(i + "");
      Path[] userPathes = segPath.listPathes();
      for (Path userPath : userPathes) {
        if (!userPath.getName().equals("built")) {
          int shouldInPart = LSUtils.genPartId(userPath.getName(), partNo);
          if (PathUtil.exists(segPath) && shouldInPart == partId) {
            mergeIndexArray.add(
                IndexReader.open(
                    FSDirectory.open(userPath.cat(IndexBuilder.LUCENE_INDEX_DIR).asFile())));
          }
        }
      }
    }
    IndexWriter indexWriter =
        new IndexWriter(
            FSDirectory.open(
                outputPath
                    .cat(MailConstants.PART_PRE + partId)
                    .cat(IndexBuilder.LUCENE_INDEX_DIR)
                    .asFile()),
            new IKAnalyzer(true),
            true,
            IndexWriter.MaxFieldLength.LIMITED);
    indexWriter.setMaxMergeDocs(1024);
    indexWriter.setMergeFactor(100);
    indexWriter.addIndexes(mergeIndexArray.toArray(new IndexReader[0]));
    indexWriter.close();
  }
Example #10
0
  @Override
  public void deleteItem(String id) throws Exception {
    boolean create = true;
    File indexDir = new File(getIndexPath());
    if (!indexDir.exists()) {
      indexDir.mkdirs();
    } else {
      if (indexDir.list().length > 0) {
        create = false;
      }
    }
    Directory dir = FSDirectory.open(indexDir);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
    if (create) {
      iwc.setOpenMode(OpenMode.CREATE);
    } else {
      iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    IndexWriter writer = null;
    try {
      writer = new IndexWriter(dir, iwc);
      writer.deleteDocuments(new Term(FIELD_LABEL_ID, id));
      writer.commit();

    } finally {
      if (writer != null) {
        writer.close();
      }
    }
  }
Example #11
0
 public void testRerankFilters() throws IOException {
   int queryDocID = (int) (Math.random() * 10000);
   IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large")));
   // select one feature for the large index:
   int featureIndex = 4;
   int count = 0;
   long ms = System.currentTimeMillis();
   ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader);
   RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD);
   LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD);
   FileUtils.saveImageResultsToPng(
       "GeneralTest_rerank_0_old",
       hits,
       reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
   hits = rerank.filter(hits, reader.document(queryDocID));
   FileUtils.saveImageResultsToPng(
       "GeneralTest_rerank_1_new",
       hits,
       reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
   hits = lsa.filter(hits, reader.document(queryDocID));
   FileUtils.saveImageResultsToPng(
       "GeneralTest_rerank_2_lsa",
       hits,
       reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
 }
Example #12
0
  @Override
  public synchronized void init(XWikiContext context) {
    LOGGER.debug("Lucene plugin: in init");

    this.indexDirs = context.getWiki().Param(PROP_INDEX_DIR);
    if (StringUtils.isEmpty(this.indexDirs)) {
      File workDir = getLuceneWorkDirectory();
      this.indexDirs = workDir.getAbsolutePath();
    }
    String indexDir = StringUtils.split(this.indexDirs, ",")[0];

    File f = new File(indexDir);
    Directory directory;
    try {
      if (!f.exists()) {
        f.mkdirs();
      }
      directory = FSDirectory.open(f);
    } catch (IOException e) {
      LOGGER.error("Failed to open the index directory: ", e);
      throw new RuntimeException(e);
    }

    init(directory, context);
  }
Example #13
0
  private void buildIndex(
      final List<GeoEntry> geoEntryList,
      final boolean create,
      final ProgressCallback progressCallback) {
    Directory directory;

    try {
      directory = FSDirectory.open(Paths.get(indexLocation));
    } catch (IOException e) {
      throw new GeoEntryIndexingException(
          "Couldn't open the directory for the index, " + indexLocation, e);
    }

    // Try-with-resources to ensure the IndexWriter always gets closed.
    try (final IndexWriter indexWriter = createIndexWriter(create, directory)) {
      try {
        indexGeoEntries(indexWriter, geoEntryList, progressCallback);
      } catch (IOException e) {
        // Need to roll back here before the IndexWriter is closed at the end of the try
        // block.
        indexWriter.rollback();
        throw e;
      }
    } catch (IOException e) {
      throw new GeoEntryIndexingException("Error writing to the index.", e);
    }
  }
Example #14
0
 /** Open an IndexWriter, executing error handling as needed. */
 private IndexWriter openIndexWriter(File searchIndexPath, boolean create) throws IOException {
   // NFS doesn't work with Lucene default locking as of Lucene 3.3, so use
   // SimpleFSLockFactory instead.
   LockFactory lockFactory = new SimpleFSLockFactory();
   FSDirectory fsDirectory = FSDirectory.open(searchIndexPath, lockFactory);
   IndexWriter indexWriter = null;
   try {
     indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create));
   } catch (LockObtainFailedException e) {
     logger.warn(
         "Unable to obtain lock for "
             + searchIndexPath.getAbsolutePath()
             + ".  Attempting to forcibly unlock the index.");
     if (IndexWriter.isLocked(fsDirectory)) {
       try {
         IndexWriter.unlock(fsDirectory);
         logger.info(
             "Successfully unlocked search directory " + searchIndexPath.getAbsolutePath());
       } catch (IOException ex) {
         logger.warn(
             "Unable to unlock search directory "
                 + searchIndexPath.getAbsolutePath()
                 + " "
                 + ex.toString());
       }
     }
   }
   if (indexWriter == null) {
     // try again, there could have been a stale lock
     indexWriter = new IndexWriter(fsDirectory, this.retrieveIndexWriterConfig(create));
   }
   return indexWriter;
 }
  /*
   * index all child directories(only first level directories) in parent directory
   * and indexed data is stored in the same name source directory
   */
  private long indexDirectories(String parent, String[] dirs, String index, SetupParameters Pa)
      throws FileHandlerException, IOException {
    long sumDocs = 0;
    // index each directory in parent directory

    for (int i = 0; i < dirs.length; i++) {
      System.out.println("\t-----FOLDER----- :" + dirs[i].toUpperCase());
      String dir_index = index + "/" + dirs[i];
      if ((index.endsWith("\\")) || (index.endsWith("/"))) {
        dir_index = index + dirs[i];
      }
      Directory di = FSDirectory.getDirectory(new File(dir_index), true);
      Pa.setDir(di);
      Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true));

      //             //get name of directory contains website to index
      //            int begin=dirs[i].lastIndexOf("\\");
      //            if(begin==-1) begin=dirs[i].lastIndexOf("/");
      //            int end=dirs[i].length()-1;
      //            String dir_site=dirs[i].substring(begin, end);
      this.index(dirs[i].toLowerCase(), Pa.getWriter(), new File(parent + "\\" + dirs[i]));

      Pa.getWriter().optimize();
      Pa.getWriter().close();
      IndexReader reader = Pa.getReader().open(Pa.getDir());
      sumDocs += reader.numDocs();
      reader.close();
    }
    return sumDocs;
  }
 static IndexWriter createWriter(String filename) throws IOException {
   IndexWriterConfig indexWriterConfig =
       new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
   indexWriterConfig.setRAMBufferSizeMB(100);
   indexWriterConfig.setOpenMode(OpenMode.CREATE);
   return new IndexWriter(FSDirectory.open(new File("output/" + filename)), indexWriterConfig);
 }
Example #17
0
  /** This function is only for test search. */
  public static List<String> searchQuery(
      String indexDir, String queryString, int numResults, CharArraySet stopwords) {
    String field = "contents";
    List<String> hitPaths = new ArrayList<String>();

    try {
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
      IndexSearcher searcher = new IndexSearcher(reader);

      Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query;
      query = parser.parse(QueryParser.escape(queryString));

      TopDocs results = searcher.search(query, null, numResults);
      for (ScoreDoc hit : results.scoreDocs) {
        String path = searcher.doc(hit.doc).get("path");
        hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt")
      }
    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (ParseException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

    return hitPaths;
  }
Example #18
0
  /**
   * give the id list of sentences, from Lucene index
   *
   * @param input input word
   * @param catalogName catalog (domain) name which we'd like to search in
   * @param limit how many hits are needed (0 means all)
   */
  public List<String> query(String input, String catalogName, int limit) {

    List<String> res = new ArrayList<String>();
    try {

      catalog c = catalogs.get(catalogName);
      IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath)));
      IndexSearcher searcher = new IndexSearcher(reader);

      QueryParser parser = new QueryParser("contents", analyzer);
      Query query = parser.parse(QueryParser.escape(input));

      int n = limit > 0 ? limit : searcher.count(query);
      if (n == 0) n = 1;
      TopDocs results = searcher.search(query, n);

      int endPos = limit;
      if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits
      else endPos = results.totalHits; // all hits

      for (int i = 0; i < endPos; i++) {
        int id = results.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        res.add(doc.get("filename"));
      }
      reader.close();
      return res;

    } catch (ParseException e) {
      log(e.getMessage());
    } catch (IOException e) {
      log(e.getMessage());
    }
    return res;
  }
Example #19
0
 public void writeIndex(IndexingValue indexingValue) throws Exception {
   boolean create = true;
   File indexDir = new File(getIndexPath());
   if (!indexDir.exists()) {
     indexDir.mkdirs();
   } else {
     if (indexDir.list().length > 0) {
       create = false;
     }
   }
   Directory dir = FSDirectory.open(indexDir);
   IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
   if (create) {
     iwc.setOpenMode(OpenMode.CREATE);
   } else {
     iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   }
   IndexWriter writer = null;
   try {
     writer = new IndexWriter(dir, iwc);
     addDoc(writer, indexingValue);
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
 public LuceneSearch() {
   String dvnIndexLocation = System.getProperty("dvn.index.location");
   //        System.out.println("INDEX LOCATION " + dvnIndexLocation);
   File locationDirectory = null;
   if (dvnIndexLocation != null) {
     locationDirectory = new File(dvnIndexLocation);
     if (locationDirectory.exists() && locationDirectory.isDirectory()) {
       indexDir = dvnIndexLocation + "/index-dir";
       //                System.out.println("INDEX " + indexDir);
     }
   }
   //        System.out.println("INDEX DEFAULT " + indexDir);
   String dvnMaxClauseCountStr = System.getProperty("dvn.search.maxclausecount");
   if (dvnMaxClauseCountStr != null) {
     try {
       dvnMaxClauseCount = Integer.parseInt(dvnMaxClauseCountStr);
     } catch (Exception e) {
       e.printStackTrace();
       dvnMaxClauseCount = 1024;
     }
   }
   try {
     dir = FSDirectory.getDirectory(indexDir, false);
     r = IndexReader.open(dir);
     searcher = new IndexSearcher(r);
   } catch (IOException ex) {
     ex.printStackTrace();
   }
 }
Example #21
0
  @SuppressWarnings("PMD.CollapsibleIfStatements")
  private void initialize() throws IOException {
    synchronized (this) {
      RuntimeEnvironment env = RuntimeEnvironment.getInstance();
      File indexDir = new File(env.getDataRootFile(), INDEX_DIR);
      if (project != null) {
        indexDir = new File(indexDir, project.getPath());
      }

      if (!indexDir.exists() && !indexDir.mkdirs()) {
        // to avoid race conditions, just recheck..
        if (!indexDir.exists()) {
          throw new FileNotFoundException(
              "Failed to create root directory [" + indexDir.getAbsolutePath() + "]");
        }
      }

      if (!env.isUsingLuceneLocking()) {
        lockfact = NoLockFactory.INSTANCE;
      }
      indexDirectory = FSDirectory.open(indexDir.toPath(), lockfact);
      ignoredNames = env.getIgnoredNames();
      includedNames = env.getIncludedNames();
      analyzerGuru = new AnalyzerGuru();
      if (env.isGenerateHtml()) {
        xrefDir = new File(env.getDataRootFile(), "xref");
      }
      listeners = new ArrayList<>();
      dirtyFile = new File(indexDir, "dirty");
      dirty = dirtyFile.exists();
      directories = new ArrayList<>();
    }
  }
Example #22
0
  @Test
  public void search() throws IOException {
    String[] q = {"title", "content"};

    String filePath = "e:/elewordIndex/LuceneArticle";
    Directory dir = FSDirectory.open(new File(filePath));
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher search = new IndexSearcher(reader);

    // Query query =SearchHelper.makeQuery("content", "网络", 0.3f);

    Query query = SearchHelper.makeMultiQueryFiled(q, "顶顶", 0.8f);
    TopDocs topDocs = search.search(query, 20);
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    System.out.println("共:" + topDocs.totalHits + "条结果");

    for (ScoreDoc doc : scoreDocs) {
      int docId = doc.doc;
      Document document = search.doc(docId);

      String id = document.get("id");
      String title = document.get("title");
      System.out.println("------------------------------------------------------------------");
      System.out.println("id=" + id + "    title=" + title);
    }
  }
Example #23
0
  @Before
  public void init() throws IOException {

    indexSearcher =
        new IndexSearcher(
            DirectoryReader.open(FSDirectory.open(new File("C:\\lucenedata\\indexdata"))));
  }
Example #24
0
 public void computeErrorRate(ImageSearcher searcher, String prefix)
     throws IOException, InstantiationException, IllegalAccessException {
   //        int maxHits = 10;
   IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(testIndex)));
   for (Iterator<String> testIterator = testcases.keySet().iterator(); testIterator.hasNext(); ) {
     queryImage = testIterator.next();
     Document query;
     if (cutImages) {
       BufferedImage bimg =
           ImageUtils.cropImage(ImageIO.read(new FileInputStream(queryImage)), 0, 0, 200, 69);
       query = builder.createDocument(new FileInputStream(queryImage), queryImage);
     } else query = builder.createDocument(new FileInputStream(queryImage), queryImage);
     ImageSearchHits hits = searcher.search(query, reader);
     // hits = rerank(hits, query, ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT);
     for (int i = 0; i < hits.length(); i++) {
       if (hits.doc(i)
           .get("descriptorImageIdentifier")
           .toLowerCase()
           .endsWith(testcases.get(queryImage))) {
         System.out.println(
             queryImage.substring(queryImage.lastIndexOf('\\') + 1)
                 + "-"
                 + prefix
                 + " -> Found at rank "
                 + i
                 + " ("
                 + hits.length()
                 + ")");
       }
     }
     // saveToHtml(queryImage.substring(queryImage.lastIndexOf('\\') + 1) + "-" + prefix, hits,
     // queryImage);
   }
 }
  /**
   * Get the type of the target term representation, query with the suitable input formatted file
   * and the corresponding index
   *
   * @param type
   * @return HashMap<String,ArrayList<ScoreDoc>> A set of target terms with their extracted
   *     documents
   * @throws IOException
   * @throws ParseException
   */
  @Override
  public HashMap<String, ArrayList<ScoreDoc>> extractDocsByRepresentation()
      throws IOException, ParseException {
    String indexName = null, inputFileName = null;
    indexName = "modernJewishOnly";
    m_qg.setType(InputType.Query);
    inputFileName = "hozOrigQueryAll.txt";

    // read the suitable input file
    LinkedList<Pair<String, String>> queries = new LinkedList<Pair<String, String>>();
    BufferedReader reader = new BufferedReader(new FileReader(m_inputDir + inputFileName));
    String line = reader.readLine();
    while (line != null) {
      int index = line.indexOf("\t");
      queries.add(new Pair<String, String>(line.substring(0, index), line.substring(index + 1)));
      line = reader.readLine();
    }
    reader.close();

    // search for the queries in the index
    IndexSearcher searcher =
        new IndexSearcher(IndexReader.open(FSDirectory.open(new File(m_indexDir + indexName))));
    HashMap<String, ArrayList<ScoreDoc>> termDocs = new HashMap<String, ArrayList<ScoreDoc>>();
    for (Pair<String, String> term : queries) {
      Query q = m_qg.generate(term.value());
      termDocs.put(
          TargetTerm2Id.getStrDesc(Integer.parseInt(term.key())),
          new ArrayList<ScoreDoc>(Arrays.asList(searcher.search(q, 1000).scoreDocs)));
    }
    return termDocs;
  }
Example #26
0
 @Override
 public void deleteOnCreator(Integer creator) throws Exception {
   boolean create = true;
   File indexDir = new File(getIndexPath());
   if (!indexDir.exists()) {
     indexDir.mkdirs();
   } else {
     if (indexDir.list().length > 0) {
       create = false;
     }
   }
   Directory dir = FSDirectory.open(indexDir);
   IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_2, analyzer);
   if (create) {
     iwc.setOpenMode(OpenMode.CREATE);
   } else {
     iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
   }
   IndexWriter writer = null;
   try {
     writer = new IndexWriter(dir, iwc);
     writer.deleteDocuments(
         new Term(
             FIELD_LABEL_CREATE_USER,
             StringUtils.zeroPadding(creator, IndexingValue.ID_ZEROPADDING_DIGIT)));
     writer.commit();
   } finally {
     if (writer != null) {
       writer.close();
     }
   }
 }
  public static void main(String[] args) throws IOException {

    if (args.length != 2) {
      System.out.println(
          "Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell");
      System.exit(1);
    }

    String spellCheckDir = args[0];
    String wordToRespell = args[1];

    Directory dir = FSDirectory.open(new File(spellCheckDir));
    if (!IndexReader.indexExists(dir)) {
      System.out.println(
          "\nERROR: No spellchecker index at path \""
              + spellCheckDir
              + "\"; please run CreateSpellCheckerIndex first\n");
      System.exit(1);
    }
    SpellChecker spell = new SpellChecker(dir); // #A

    spell.setStringDistance(new LevensteinDistance()); // #B
    // spell.setStringDistance(new JaroWinklerDistance());

    String[] suggestions = spell.suggestSimilar(wordToRespell, 5); // #C
    System.out.println(suggestions.length + " suggestions for '" + wordToRespell + "':");
    for (String suggestion : suggestions) System.out.println("  " + suggestion);
  }
Example #28
0
  /** Generate a spelling suggestion for the definitions stored in defs */
  public void createSpellingSuggestions() {
    IndexReader indexReader = null;
    SpellChecker checker;

    try {
      log.info("Generating spelling suggestion index ... ");
      indexReader = DirectoryReader.open(indexDirectory);
      checker = new SpellChecker(spellDirectory);
      // TODO below seems only to index "defs" , possible bug ?
      Analyzer analyzer = AnalyzerGuru.getAnalyzer();
      IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer);
      iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
      checker.indexDictionary(new LuceneDictionary(indexReader, QueryBuilder.DEFS), iwc, false);
      log.info("done");
    } catch (IOException e) {
      log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
    } finally {
      if (indexReader != null) {
        try {
          indexReader.close();
        } catch (IOException e) {
          log.log(Level.WARNING, "An error occured while closing reader", e);
        }
      }
      if (spellDirectory != null) {
        spellDirectory.close();
      }
    }
  }
  public void startSearch(String searchString) throws IOException {

    /*analyze(searchString);*/

    try {
      Directory directory = FSDirectory.open(new File(".//Index")); // где находится индекс
      IndexSearcher is = new IndexSearcher(directory); // объект поиска
      QueryParser parser =
          new QueryParser(
              Version.LUCENE_31,
              "name",
              new RussianAnalyzer(Version.LUCENE_31)); // поле поиска + анализатор
      /* String str1 = "фотоаппарат";
      String str2 = "телевизор";
      String str3 = "SONY";
      String total = "(" + str1 + " OR " + str2 + ")" + " AND " + str3;
      System.out.println(total);*/
      Query query = parser.parse(searchString); // что ищем
      TopDocs results =
          is.search(
              query, null,
              10); // включаем поиск ограничиваемся 10 документами, results содержит ...
      System.out.println(
          "getMaxScore()="
              + results.getMaxScore()
              + " totalHits="
              + results
                  .totalHits); // MaxScore - наилучший результат(приоритет), totalHits - количество
      // найденных документов

      /*proposalController.getProposalList().clear();*/

      for (ScoreDoc hits : results.scoreDocs) { // получаем подсказки
        Document doc = is.doc(hits.doc); // получаем документ по спец сылке doc

        for (Proposal proposal :
            proposalFacade.findPropolsalsByProduct(Long.valueOf(doc.get("recid")))) {

          proposalController.getProposalList().add(proposal);
          _log.info(
              "Предложение найдено:"
                  + proposal.getRecid().toString()
                  + ",Товар: "
                  + doc.get("recid")
                  + ", "
                  + doc.get("name"));
        }

        /*System.out.println("doc="+hits.doc+" score="+hits.score);//выводим спец сылку doc + приоритет
        addMessage(doc.get("id") + " | " + doc.get("recid") + " | " + doc.get("name"));//выводим поля найденного документа*/
      }

      directory.close();
    } catch (ParseException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
    addMessage("Поиск выполнен");
  }
  public static void main(String[] args) throws IOException, ParseException {
    String indexDir = "C:/lucenedir";
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    int day = (int) (new Date().getTime() / Constans.DAY_MILLIS);
    QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
    Query query = parser.parse("java in action");
    Query customScoreQuery =
        new RecencyBoostCustomScoreQuery(query, 2.0, day, 6 * 365, "pubmonthAsDay");
    Sort sort =
        new Sort(
            new SortField[] {
              SortField.FIELD_SCORE, new SortField("title2", SortField.Type.STRING)
            });
    TopDocs hits = searcher.search(customScoreQuery, null, Integer.MAX_VALUE, sort, true, false);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
      // 两种方式取Document都行,其实searcher.doc内部本质还是调用reader.document
      // Document doc = reader.document(hits.scoreDocs[i].doc);
      Document doc = searcher.doc(hits.scoreDocs[i].doc);
      System.out.println(
          (1 + i)
              + ": "
              + doc.get("title")
              + ": pubmonth="
              + doc.get("pubmonth")
              + " score="
              + hits.scoreDocs[i].score);
    }
    reader.close();
    directory.close();
  }