예제 #1
0
 public LuceneSearch() {
   String dvnIndexLocation = System.getProperty("dvn.index.location");
   //        System.out.println("INDEX LOCATION " + dvnIndexLocation);
   File locationDirectory = null;
   if (dvnIndexLocation != null) {
     locationDirectory = new File(dvnIndexLocation);
     if (locationDirectory.exists() && locationDirectory.isDirectory()) {
       indexDir = dvnIndexLocation + "/index-dir";
       //                System.out.println("INDEX " + indexDir);
     }
   }
   //        System.out.println("INDEX DEFAULT " + indexDir);
   String dvnMaxClauseCountStr = System.getProperty("dvn.search.maxclausecount");
   if (dvnMaxClauseCountStr != null) {
     try {
       dvnMaxClauseCount = Integer.parseInt(dvnMaxClauseCountStr);
     } catch (Exception e) {
       e.printStackTrace();
       dvnMaxClauseCount = 1024;
     }
   }
   try {
     dir = FSDirectory.getDirectory(indexDir, false);
     r = IndexReader.open(dir);
     searcher = new IndexSearcher(r);
   } catch (IOException ex) {
     ex.printStackTrace();
   }
 }
  /**
   * check the existence of the given word in the index
   *
   * @param indexPath index path's
   * @param word String to check
   * @return
   * @throws IOException
   */
  private boolean checkExistingWord(String indexPath, String word) throws IOException {

    File file = new File(indexPath);
    FSDirectory directory = FSDirectory.getDirectory(file);
    SpellChecker spellChecker = new SpellChecker(directory);
    return spellChecker.exist(word);
  }
예제 #3
0
  /*
   * index all child directories(only first level directories) in parent directory
   * and indexed data is stored in the same name source directory
   */
  private long indexDirectories(String parent, String[] dirs, String index, SetupParameters Pa)
      throws FileHandlerException, IOException {
    long sumDocs = 0;
    // index each directory in parent directory

    for (int i = 0; i < dirs.length; i++) {
      System.out.println("\t-----FOLDER----- :" + dirs[i].toUpperCase());
      String dir_index = index + "/" + dirs[i];
      if ((index.endsWith("\\")) || (index.endsWith("/"))) {
        dir_index = index + dirs[i];
      }
      Directory di = FSDirectory.getDirectory(new File(dir_index), true);
      Pa.setDir(di);
      Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true));

      //             //get name of directory contains website to index
      //            int begin=dirs[i].lastIndexOf("\\");
      //            if(begin==-1) begin=dirs[i].lastIndexOf("/");
      //            int end=dirs[i].length()-1;
      //            String dir_site=dirs[i].substring(begin, end);
      this.index(dirs[i].toLowerCase(), Pa.getWriter(), new File(parent + "\\" + dirs[i]));

      Pa.getWriter().optimize();
      Pa.getWriter().close();
      IndexReader reader = Pa.getReader().open(Pa.getDir());
      sumDocs += reader.numDocs();
      reader.close();
    }
    return sumDocs;
  }
예제 #4
0
 public static void IndexInputTest() throws Exception {
   String path = "D:\\Lucene Document";
   directory = FSDirectory.getDirectory(path);
   IndexInput indexInput = directory.openInput("segments.gen");
   int version = indexInput.readInt();
   System.out.println(version);
   System.out.println(indexInput.readString());
 }
예제 #5
0
  /** 具体执行删除索引的方法,将被工作流中DeleteTask类的workrun()方法调用。 */
  public void deleteIndex() throws IOException {

    Directory fsDir = FSDirectory.getDirectory(indexFile);
    Term t = new Term("ID", String.valueOf(this.id));
    IndexReader reader = IndexReader.open(fsDir);
    reader.deleteDocuments(t);
    reader.close();
    fsDir.close();
  }
예제 #6
0
 protected void openWriterForDatabase(String db) throws IOException, CorruptIndexException {
   File indexDir = indexPath(db);
   if (!indexDir.exists()) {
     onDatabaseCreated(db, -1);
   } else {
     Directory directory = FSDirectory.getDirectory(indexDir.getPath());
     writers.put(db, new IndexWriter(directory, true, new StandardAnalyzer(), false));
   }
 }
예제 #7
0
 private Directory getDirectory(Path file) throws IOException {
   if ("file".equals(this.fs.getUri().getScheme())) {
     Path qualified = file.makeQualified(FileSystem.getLocal(conf));
     File fsLocal = new File(qualified.toUri());
     return FSDirectory.getDirectory(fsLocal.getAbsolutePath());
   } else {
     return new FsDirectory(this.fs, file, false, this.conf);
   }
 }
예제 #8
0
  protected Hits query(String db, String defaultField, String queryString)
      throws IOException, CorruptIndexException, ParseException {
    Directory directory = FSDirectory.getDirectory(indexPath(db));
    IndexReader reader = IndexReader.open(directory);

    Searcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();
    QueryParser qp = new QueryParser(defaultField, analyzer);
    Query query = qp.parse(queryString);
    return searcher.search(query);
  }
 private RAMDirectory(FSDirectory dir, boolean closeDir, IOContext context) throws IOException {
   this();
   for (String file : dir.listAll()) {
     if (!Files.isDirectory(dir.getDirectory().resolve(file))) {
       copyFrom(dir, file, file, context);
     }
   }
   if (closeDir) {
     dir.close();
   }
 }
예제 #10
0
  public void startSearch() {
    try {
      IndexReader.unlock(FSDirectory.getDirectory(ConfigHandler.indexPath));
      reader = IndexReader.open(ConfigHandler.indexPath);

      searcher = new IndexSearcher(reader);
      analyzer = new StandardAnalyzer();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
예제 #11
0
 private void unlockIndex(String indexLocation) {
   if (IndexReader.indexExists(indexLocation)) {
     try {
       if (IndexReader.isLocked(indexLocation)) {
         Directory directory = FSDirectory.getDirectory(indexLocation);
         IndexReader.unlock(directory);
       }
     } catch (IOException e) {
       log.fatal("Error trying to unlock " + indexLocation + " index.", e);
     }
   }
 }
 public void stop() {
   @SuppressWarnings("unused")
   int readCurrentState =
       current; // Another unneeded value, to ensure visibility of state protected by memory
                // barrier
   timer.cancel();
   task.stop();
   try {
     directory.close();
   } catch (Exception e) {
     log.unableToCloseLuceneDirectory(directory.getDirectory(), e);
   }
 }
예제 #13
0
 public static void IndexCreate() throws Exception {
   String path = "D:\\Lucene Document";
   directory = FSDirectory.getDirectory(path);
   Analyzer analyzer = new StandardAnalyzer();
   IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
   iwriter.setMaxFieldLength(25000);
   // make a new, empty document
   Document doc = new Document();
   String text = "This is the text to be indexed.";
   doc.add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED));
   iwriter.addDocument(doc);
   iwriter.optimize();
   iwriter.close();
 }
예제 #14
0
 public void onDatabaseCreated(String db, long seq) {
   this.abort(db);
   File path = indexPath(db);
   onDatabaseDeleted(db, -1);
   if (!path.exists()) {
     log.info("Creating lucene directory {} for db {}", path.getPath(), db);
     path.mkdirs();
   }
   try {
     Directory directory = FSDirectory.getDirectory(path.getPath());
     writers.put(db, new IndexWriter(directory, true, new StandardAnalyzer(), true));
   } catch (CorruptIndexException e) {
   } catch (LockObtainFailedException e) {
   } catch (IOException e) {
   }
 }
예제 #15
0
  private String getId(IndexCommit commit) {
    StringBuilder sb = new StringBuilder();
    Directory dir = commit.getDirectory();

    // For anything persistent, make something that will
    // be the same, regardless of the Directory instance.
    if (dir instanceof FSDirectory) {
      FSDirectory fsd = (FSDirectory) dir;
      File fdir = fsd.getDirectory();
      sb.append(fdir.getPath());
    } else {
      sb.append(dir);
    }

    sb.append('/');
    sb.append(commit.getGeneration());
    sb.append('_');
    sb.append(commit.getVersion());
    return sb.toString();
  }
  /**
   * constructs a new IndexReader instance
   *
   * @param indexPath Where the index is.
   * @return Constructed IndexReader instance.
   * @throws IOException
   */
  private static InternalIndexReader newReader(File luceneDir, IndexSignature signature)
      throws IOException {
    if (!luceneDir.exists() || !IndexReader.indexExists(luceneDir)) return null;
    Directory dir = FSDirectory.getDirectory(luceneDir);
    int numTries = INDEX_OPEN_NUM_RETRIES;
    InternalIndexReader reader = null;

    // try max of 5 times, there might be a case where the segment file is being updated
    while (reader == null) {
      if (numTries == 0) {
        log.error("Problem refreshing disk index, all attempts failed.");
        throw new IOException("problem opening new index");
      }
      numTries--;

      try {
        log.debug("opening index reader at: " + luceneDir.getAbsolutePath());
        IndexReader srcReader = IndexReader.open(dir, true);

        try {
          reader = new InternalIndexReader(srcReader, signature);

        } catch (IOException ioe) {
          // close the source reader if InternalIndexReader construction fails
          if (srcReader != null) {
            srcReader.close();
          }
          throw ioe;
        }
      } catch (IOException ioe) {
        try {
          Thread.sleep(100);
        } catch (InterruptedException e) {
          log.warn("thread interrupted.");
          continue;
        }
      }
    }
    return reader;
  }
예제 #17
0
  /*
   * index specific directory-all file in one directory
   */
  private long indexDirectory(String directory, String index, SetupParameters Pa)
      throws FileHandlerException, IOException {
    long sumDocs = 0;
    Directory di = FSDirectory.getDirectory(new File(index), true);
    // RAMDirectory di = new RAMDirectory(new Directory());
    Pa.setDir(di);
    Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true));

    // get name of directory contains website to index
    int begin = directory.lastIndexOf("\\");
    if (begin == -1) begin = directory.lastIndexOf("/");
    int end = directory.length();
    String dir_site = directory.substring(begin + 1, end).toLowerCase();
    index(dir_site, Pa.getWriter(), new File(directory));

    Pa.getWriter().optimize();
    Pa.getWriter().close();
    IndexReader reader = Pa.getReader().open(Pa.getDir());
    sumDocs += reader.numDocs();
    reader.close();
    return sumDocs;
  }
예제 #18
0
  /**
   * Will reindex, shift if needed and publish indexes for a "remote" repository (published over
   * jetty component).
   *
   * @param repositoryRoot
   * @param repositoryId
   * @param deleteIndexFiles
   * @param shiftDays
   * @throws IOException
   */
  protected void reindexRemoteRepositoryAndPublish(
      File repositoryRoot, String repositoryId, boolean deleteIndexFiles, int shiftDays)
      throws IOException, ComponentLookupException {
    File indexDirectory = getIndexFamilyDirectory(repositoryId);

    Directory directory = FSDirectory.getDirectory(indexDirectory);

    IndexingContext ctx =
        nexusIndexer.addIndexingContextForced(
            repositoryId + "-temp",
            repositoryId,
            repositoryRoot,
            directory,
            null,
            null,
            new IndexCreatorHelper(getContainer()).getFullCreators());

    // shifting if needed (very crude way to do it, but heh)
    shiftContextInTime(ctx, shiftDays);

    // and scan "today"
    nexusIndexer.scan(ctx);

    ctx.updateTimestamp(true);

    // pack it up
    File targetDir = new File(repositoryRoot, ".index");

    targetDir.mkdirs();

    IndexPackingRequest ipr = new IndexPackingRequest(ctx, targetDir);

    ipr.setCreateIncrementalChunks(true);

    indexPacker.packIndex(ipr);

    nexusIndexer.removeIndexingContext(ctx, deleteIndexFiles);
  }
예제 #19
0
  static String str(IndexCommit commit) {
    StringBuilder sb = new StringBuilder();
    try {
      sb.append("commit{");

      Directory dir = commit.getDirectory();

      if (dir instanceof FSDirectory) {
        FSDirectory fsd = (FSDirectory) dir;
        sb.append("dir=").append(fsd.getDirectory());
      } else {
        sb.append("dir=").append(dir);
      }

      sb.append(",segFN=").append(commit.getSegmentsFileName());
      sb.append(",version=").append(commit.getVersion());
      sb.append(",generation=").append(commit.getGeneration());
      sb.append(",filenames=").append(commit.getFileNames());
    } catch (Exception e) {
      sb.append(e);
    }
    return sb.toString();
  }
  // CorruptIndexException, LockObtainFailedException, IOException,
  // ParseException
  public IndexesConfigLowVersionLucene(String indexDirectoryPath) {
    // create some index
    // we could also create an index in our ram ...
    // Directory index = new RAMDirectory();
    indexName = indexDirectoryPath;
    try {
      indexDirectory = FSDirectory.getDirectory(indexDirectoryPath); // "index/pages"
      indexDirectory.setLockFactory(new NativeFSLockFactory(indexDirectoryPath));
      w = new IndexWriter(indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
    } catch (CorruptIndexException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (LockObtainFailedException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    // searchIndex("vitol");
    urlsFileNames = new HashMap<String, String>();
    emptyFiles = new HashMap<String, String>();
  }
예제 #21
0
 protected void setup() throws IOException {
   File indexDirectory = new File(indexDir);
   dir = FSDirectory.getDirectory(indexDir, !indexDirectory.exists());
 }
예제 #22
0
  public void convert(Application app, File dbhome) throws Exception {
    FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false);
    if (indexDir instanceof TransFSDirectory) {
      FSDirectory.setDisableLocks(true);
      TransFSDirectory d = (TransFSDirectory) indexDir;
      TransSource source = app.getTransSource();
      d.setDriverClass(source.getDriverClass());
      d.setUrl(source.getUrl());
      d.setUser(source.getUser());
      d.setPassword(source.getPassword());
    }
    File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp");
    File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old");
    FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true);
    if (nindexDir instanceof TransFSDirectory) {
      FSDirectory.setDisableLocks(true);
      TransFSDirectory d = (TransFSDirectory) nindexDir;
      TransSource source = app.getTransSource();
      d.setDriverClass(source.getDriverClass());
      d.setUrl(source.getUrl());
      d.setUser(source.getUser());
      d.setPassword(source.getPassword());
    }

    IndexSearcher searcher = null;
    IndexWriter writer = null;
    LuceneManager lmgr = null;

    try {
      searcher = new IndexSearcher(indexDir);
      PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer();
      writer = IndexWriterManager.getWriter(nindexDir, a, true);
      final int numDocs = searcher.getIndexReader().numDocs();

      HashSet deldocs = new HashSet();
      HashMap infos = new HashMap();
      for (int i = 0; i < numDocs; i++) {
        Document doc = searcher.doc(i);
        String delprop = doc.get(DeletedInfos.DELETED);
        String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE);
        int layer = -1;
        try {
          layer = Integer.parseInt(layerStr);
        } catch (Exception ex) {
          layer = -1;
        }
        final String id =
            doc.get(LuceneManager.ID)
                + DeletedInfos.KEY_SEPERATOR
                + doc.get(LuceneManager.LAYER_OF_SAVE);
        if (delprop != null && "true".equals(delprop) /* && layer == DbKey.LIVE_LAYER*/) {
          deldocs.add(id);
        } else {
          Object v;
          if ((v = infos.get(id)) == null) {
            infos.put(id, new Integer(i));
          } else {
            final String lmod = doc.get(LuceneManager.LASTMODIFIED);
            final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified");
            if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) {
              infos.put(id, new Integer(i));
            }
          }
        }
      }

      ArrayList listOfMaps = new ArrayList();

      for (int i = 0; i < numDocs; i++) {
        Document doc = searcher.doc(i);
        String delprop = doc.get(DeletedInfos.DELETED);
        String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE);
        int layer = -1;
        try {
          layer = Integer.parseInt(layerStr);
        } catch (Exception ex) {
          layer = -1;
        }
        final String id =
            doc.get(LuceneManager.ID)
                + DeletedInfos.KEY_SEPERATOR
                + doc.get(LuceneManager.LAYER_OF_SAVE);
        if (delprop != null && "true".equals(delprop)) {
          continue;
        } else if (id != null && deldocs.contains(id) /* && layer == DbKey.LIVE_LAYER*/) {
          continue;
        }

        Integer idx = (Integer) infos.get(id);
        if (idx != null && i != idx.intValue()) {
          continue;
        }

        Document ndoc = convertDocument(doc);

        if (this.recordNodes) {
          listOfMaps.add(LuceneManager.luceneDocumentToMap(doc));
        }

        if (ndoc != null) {
          writer.addDocument(ndoc);
        }
      }

      if (this.recordNodes) {
        lmgr = new LuceneManager(this.app, false, true);
        this.allNodes = new HashMap();
        final int size = listOfMaps.size();
        for (int i = 0; i < size; i++) {
          HashMap m = (HashMap) listOfMaps.get(i);
          INode n = lmgr.mapToNode(m);
          this.allNodes.put(n.getID(), getPath(n));
          n = null;
        }
      }

    } catch (Exception ex) {
      ex.printStackTrace();
      throw new RuntimeException(ex);
    } finally {
      if (searcher != null) {
        try {
          searcher.close();
        } catch (Exception ex) {
          app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex);
        }
      }

      if (lmgr != null) {
        lmgr.shutdown();
        lmgr = null;
      }

      indexDir.close();
      SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir);
      sinfos.clear();
      IndexObjectsFactory.removeDeletedInfos(indexDir);
    }

    Connection conn = null;
    boolean exceptionOccured = false;

    try {
      if (writer != null) {
        TransSource ts = app.getTransSource();
        conn = ts.getConnection();

        DatabaseMetaData dmd = conn.getMetaData();
        ResultSet rs = dmd.getColumns(null, null, "Lucene", "version");
        if (!rs.next()) {
          final String alterTbl = "ALTER TABLE Lucene ADD version INT NOT NULL DEFAULT 1";
          PreparedStatement pstmt = null;
          try {
            pstmt = conn.prepareStatement(alterTbl);
            pstmt.execute();
          } catch (SQLException sqle) {
            app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), sqle);
          } finally {
            if (pstmt != null) {
              pstmt.close();
              pstmt = null;
            }
          }
        }
        rs.close();
        rs = null;

        writer.close();
        writer.flushCache(); // TODO:writer.writeSegmentsFile();
        LuceneManager.commitSegments(conn, app, writer.getDirectory());
        writer.finalizeTrans();

        this.updateSQL(conn);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
      exceptionOccured = true;
      throw new RuntimeException(ex);
    } finally {
      if (conn != null) {
        try {
          if (!conn.getAutoCommit()) {
            if (!exceptionOccured) {
              conn.commit();
            } else {
              conn.rollback();
            }
          }
          conn.close();
        } catch (Exception ex) {
          app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex);
        }
        conn = null;
      }

      nindexDir.close();
      SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir);
      sinfos.clear();
      IndexObjectsFactory.removeDeletedInfos(nindexDir);
    }

    if (!dbhome.renameTo(olddbhome)) {
      throw new Exception("Could not move the old version of the db into " + olddbhome);
    }

    if (!ndbhome.renameTo(dbhome)) {
      throw new Exception("Could not move the newer version of the db into " + dbhome);
    }

    File oldBlobDir = new File(olddbhome, "blob");
    File newBlobDir = new File(ndbhome, "blob");
    oldBlobDir.renameTo(newBlobDir);

    if (!FileUtils.deleteDir(olddbhome)) {
      throw new Exception("Could not delete the old version of the db at " + olddbhome);
    }
  }
예제 #23
0
 /**
  * Canonicalize the first maxLineNumber lines in input, i.e., sort the tokens by document
  * frequency in ascending order
  *
  * @param input
  * @param maxLineNumber
  * @param indexFolder
  * @param output
  * @throws Exception
  */
 public static void canonicalize(
     String input, int maxLineNumber, String indexFolder, String output) throws Exception {
   Directory dir = FSDirectory.getDirectory(indexFolder);
   IndexWriter iwriter = new IndexWriter(dir, null, true, IndexWriter.MaxFieldLength.UNLIMITED);
   BufferedReader br = IOFactory.getBufferedReader(input);
   int lineCount = 0;
   for (String line = br.readLine(); line != null; line = br.readLine()) {
     String[] tokens = Common.sortUnique(line, 1);
     for (int i = 1; i < tokens.length; i++) {
       String t = tokens[i];
       Document doc = new Document();
       doc.add(new Field("term", t, Field.Store.NO, Field.Index.NOT_ANALYZED));
       iwriter.addDocument(doc);
     }
     lineCount++;
     if (lineCount % 100 == 0)
       System.out.println(new Date().toString() + " : " + lineCount + " lines indexed");
     if (lineCount == maxLineNumber) break;
   }
   System.out.println(new Date().toString() + " : " + lineCount + " lines indexed");
   br.close();
   iwriter.optimize();
   iwriter.close();
   System.out.println(new Date().toString() + " : indexing finished");
   final IndexReader ireader = IndexReader.open(dir);
   br = IOFactory.getBufferedReader(input);
   PrintWriter pw = IOFactory.getPrintWriter(output);
   lineCount = 0;
   for (String line = br.readLine(); line != null; line = br.readLine()) {
     String[] tokens = Common.sortUnique(line, 1);
     Arrays.sort(
         tokens,
         1,
         tokens.length,
         new Comparator<String>() {
           public int compare(String a, String b) {
             try {
               int fa = ireader.docFreq(new Term("term", a));
               int fb = ireader.docFreq(new Term("term", b));
               if (fa > fb) return 1;
               else if (fa < fb) return -1;
               return 0;
             } catch (Exception e) {
               e.printStackTrace();
               return 0;
             }
           }
         });
     pw.print(tokens[0]);
     for (int i = 1; i < tokens.length; i++) pw.print(" " + tokens[i]);
     pw.println();
     lineCount++;
     if (lineCount % 100000 == 0)
       System.out.println(new Date().toString() + " : " + lineCount + " lines output");
   }
   System.out.println(new Date().toString() + " : " + lineCount + " lines output");
   pw.close();
   br.close();
   ireader.close();
   dir.close();
   Common.deleteFolder(new File(indexFolder));
   System.out.println(new Date().toString() + " : canonicalization finished");
 }