public LuceneSearch() { String dvnIndexLocation = System.getProperty("dvn.index.location"); // System.out.println("INDEX LOCATION " + dvnIndexLocation); File locationDirectory = null; if (dvnIndexLocation != null) { locationDirectory = new File(dvnIndexLocation); if (locationDirectory.exists() && locationDirectory.isDirectory()) { indexDir = dvnIndexLocation + "/index-dir"; // System.out.println("INDEX " + indexDir); } } // System.out.println("INDEX DEFAULT " + indexDir); String dvnMaxClauseCountStr = System.getProperty("dvn.search.maxclausecount"); if (dvnMaxClauseCountStr != null) { try { dvnMaxClauseCount = Integer.parseInt(dvnMaxClauseCountStr); } catch (Exception e) { e.printStackTrace(); dvnMaxClauseCount = 1024; } } try { dir = FSDirectory.getDirectory(indexDir, false); r = IndexReader.open(dir); searcher = new IndexSearcher(r); } catch (IOException ex) { ex.printStackTrace(); } }
/** * check the existence of the given word in the index * * @param indexPath index path's * @param word String to check * @return * @throws IOException */ private boolean checkExistingWord(String indexPath, String word) throws IOException { File file = new File(indexPath); FSDirectory directory = FSDirectory.getDirectory(file); SpellChecker spellChecker = new SpellChecker(directory); return spellChecker.exist(word); }
/* * index all child directories(only first level directories) in parent directory * and indexed data is stored in the same name source directory */ private long indexDirectories(String parent, String[] dirs, String index, SetupParameters Pa) throws FileHandlerException, IOException { long sumDocs = 0; // index each directory in parent directory for (int i = 0; i < dirs.length; i++) { System.out.println("\t-----FOLDER----- :" + dirs[i].toUpperCase()); String dir_index = index + "/" + dirs[i]; if ((index.endsWith("\\")) || (index.endsWith("/"))) { dir_index = index + dirs[i]; } Directory di = FSDirectory.getDirectory(new File(dir_index), true); Pa.setDir(di); Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true)); // //get name of directory contains website to index // int begin=dirs[i].lastIndexOf("\\"); // if(begin==-1) begin=dirs[i].lastIndexOf("/"); // int end=dirs[i].length()-1; // String dir_site=dirs[i].substring(begin, end); this.index(dirs[i].toLowerCase(), Pa.getWriter(), new File(parent + "\\" + dirs[i])); Pa.getWriter().optimize(); Pa.getWriter().close(); IndexReader reader = Pa.getReader().open(Pa.getDir()); sumDocs += reader.numDocs(); reader.close(); } return sumDocs; }
public static void IndexInputTest() throws Exception { String path = "D:\\Lucene Document"; directory = FSDirectory.getDirectory(path); IndexInput indexInput = directory.openInput("segments.gen"); int version = indexInput.readInt(); System.out.println(version); System.out.println(indexInput.readString()); }
/** 具体执行删除索引的方法,将被工作流中DeleteTask类的workrun()方法调用。 */ public void deleteIndex() throws IOException { Directory fsDir = FSDirectory.getDirectory(indexFile); Term t = new Term("ID", String.valueOf(this.id)); IndexReader reader = IndexReader.open(fsDir); reader.deleteDocuments(t); reader.close(); fsDir.close(); }
protected void openWriterForDatabase(String db) throws IOException, CorruptIndexException { File indexDir = indexPath(db); if (!indexDir.exists()) { onDatabaseCreated(db, -1); } else { Directory directory = FSDirectory.getDirectory(indexDir.getPath()); writers.put(db, new IndexWriter(directory, true, new StandardAnalyzer(), false)); } }
private Directory getDirectory(Path file) throws IOException { if ("file".equals(this.fs.getUri().getScheme())) { Path qualified = file.makeQualified(FileSystem.getLocal(conf)); File fsLocal = new File(qualified.toUri()); return FSDirectory.getDirectory(fsLocal.getAbsolutePath()); } else { return new FsDirectory(this.fs, file, false, this.conf); } }
protected Hits query(String db, String defaultField, String queryString) throws IOException, CorruptIndexException, ParseException { Directory directory = FSDirectory.getDirectory(indexPath(db)); IndexReader reader = IndexReader.open(directory); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new QueryParser(defaultField, analyzer); Query query = qp.parse(queryString); return searcher.search(query); }
private RAMDirectory(FSDirectory dir, boolean closeDir, IOContext context) throws IOException { this(); for (String file : dir.listAll()) { if (!Files.isDirectory(dir.getDirectory().resolve(file))) { copyFrom(dir, file, file, context); } } if (closeDir) { dir.close(); } }
public void startSearch() { try { IndexReader.unlock(FSDirectory.getDirectory(ConfigHandler.indexPath)); reader = IndexReader.open(ConfigHandler.indexPath); searcher = new IndexSearcher(reader); analyzer = new StandardAnalyzer(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
private void unlockIndex(String indexLocation) { if (IndexReader.indexExists(indexLocation)) { try { if (IndexReader.isLocked(indexLocation)) { Directory directory = FSDirectory.getDirectory(indexLocation); IndexReader.unlock(directory); } } catch (IOException e) { log.fatal("Error trying to unlock " + indexLocation + " index.", e); } } }
public void stop() { @SuppressWarnings("unused") int readCurrentState = current; // Another unneeded value, to ensure visibility of state protected by memory // barrier timer.cancel(); task.stop(); try { directory.close(); } catch (Exception e) { log.unableToCloseLuceneDirectory(directory.getDirectory(), e); } }
public static void IndexCreate() throws Exception { String path = "D:\\Lucene Document"; directory = FSDirectory.getDirectory(path); Analyzer analyzer = new StandardAnalyzer(); IndexWriter iwriter = new IndexWriter(directory, analyzer, true); iwriter.setMaxFieldLength(25000); // make a new, empty document Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, Field.Store.YES, Field.Index.TOKENIZED)); iwriter.addDocument(doc); iwriter.optimize(); iwriter.close(); }
public void onDatabaseCreated(String db, long seq) { this.abort(db); File path = indexPath(db); onDatabaseDeleted(db, -1); if (!path.exists()) { log.info("Creating lucene directory {} for db {}", path.getPath(), db); path.mkdirs(); } try { Directory directory = FSDirectory.getDirectory(path.getPath()); writers.put(db, new IndexWriter(directory, true, new StandardAnalyzer(), true)); } catch (CorruptIndexException e) { } catch (LockObtainFailedException e) { } catch (IOException e) { } }
private String getId(IndexCommit commit) { StringBuilder sb = new StringBuilder(); Directory dir = commit.getDirectory(); // For anything persistent, make something that will // be the same, regardless of the Directory instance. if (dir instanceof FSDirectory) { FSDirectory fsd = (FSDirectory) dir; File fdir = fsd.getDirectory(); sb.append(fdir.getPath()); } else { sb.append(dir); } sb.append('/'); sb.append(commit.getGeneration()); sb.append('_'); sb.append(commit.getVersion()); return sb.toString(); }
/** * constructs a new IndexReader instance * * @param indexPath Where the index is. * @return Constructed IndexReader instance. * @throws IOException */ private static InternalIndexReader newReader(File luceneDir, IndexSignature signature) throws IOException { if (!luceneDir.exists() || !IndexReader.indexExists(luceneDir)) return null; Directory dir = FSDirectory.getDirectory(luceneDir); int numTries = INDEX_OPEN_NUM_RETRIES; InternalIndexReader reader = null; // try max of 5 times, there might be a case where the segment file is being updated while (reader == null) { if (numTries == 0) { log.error("Problem refreshing disk index, all attempts failed."); throw new IOException("problem opening new index"); } numTries--; try { log.debug("opening index reader at: " + luceneDir.getAbsolutePath()); IndexReader srcReader = IndexReader.open(dir, true); try { reader = new InternalIndexReader(srcReader, signature); } catch (IOException ioe) { // close the source reader if InternalIndexReader construction fails if (srcReader != null) { srcReader.close(); } throw ioe; } } catch (IOException ioe) { try { Thread.sleep(100); } catch (InterruptedException e) { log.warn("thread interrupted."); continue; } } } return reader; }
/* * index specific directory-all file in one directory */ private long indexDirectory(String directory, String index, SetupParameters Pa) throws FileHandlerException, IOException { long sumDocs = 0; Directory di = FSDirectory.getDirectory(new File(index), true); // RAMDirectory di = new RAMDirectory(new Directory()); Pa.setDir(di); Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true)); // get name of directory contains website to index int begin = directory.lastIndexOf("\\"); if (begin == -1) begin = directory.lastIndexOf("/"); int end = directory.length(); String dir_site = directory.substring(begin + 1, end).toLowerCase(); index(dir_site, Pa.getWriter(), new File(directory)); Pa.getWriter().optimize(); Pa.getWriter().close(); IndexReader reader = Pa.getReader().open(Pa.getDir()); sumDocs += reader.numDocs(); reader.close(); return sumDocs; }
/** * Will reindex, shift if needed and publish indexes for a "remote" repository (published over * jetty component). * * @param repositoryRoot * @param repositoryId * @param deleteIndexFiles * @param shiftDays * @throws IOException */ protected void reindexRemoteRepositoryAndPublish( File repositoryRoot, String repositoryId, boolean deleteIndexFiles, int shiftDays) throws IOException, ComponentLookupException { File indexDirectory = getIndexFamilyDirectory(repositoryId); Directory directory = FSDirectory.getDirectory(indexDirectory); IndexingContext ctx = nexusIndexer.addIndexingContextForced( repositoryId + "-temp", repositoryId, repositoryRoot, directory, null, null, new IndexCreatorHelper(getContainer()).getFullCreators()); // shifting if needed (very crude way to do it, but heh) shiftContextInTime(ctx, shiftDays); // and scan "today" nexusIndexer.scan(ctx); ctx.updateTimestamp(true); // pack it up File targetDir = new File(repositoryRoot, ".index"); targetDir.mkdirs(); IndexPackingRequest ipr = new IndexPackingRequest(ctx, targetDir); ipr.setCreateIncrementalChunks(true); indexPacker.packIndex(ipr); nexusIndexer.removeIndexingContext(ctx, deleteIndexFiles); }
static String str(IndexCommit commit) { StringBuilder sb = new StringBuilder(); try { sb.append("commit{"); Directory dir = commit.getDirectory(); if (dir instanceof FSDirectory) { FSDirectory fsd = (FSDirectory) dir; sb.append("dir=").append(fsd.getDirectory()); } else { sb.append("dir=").append(dir); } sb.append(",segFN=").append(commit.getSegmentsFileName()); sb.append(",version=").append(commit.getVersion()); sb.append(",generation=").append(commit.getGeneration()); sb.append(",filenames=").append(commit.getFileNames()); } catch (Exception e) { sb.append(e); } return sb.toString(); }
// CorruptIndexException, LockObtainFailedException, IOException, // ParseException public IndexesConfigLowVersionLucene(String indexDirectoryPath) { // create some index // we could also create an index in our ram ... // Directory index = new RAMDirectory(); indexName = indexDirectoryPath; try { indexDirectory = FSDirectory.getDirectory(indexDirectoryPath); // "index/pages" indexDirectory.setLockFactory(new NativeFSLockFactory(indexDirectoryPath)); w = new IndexWriter(indexDirectory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } // searchIndex("vitol"); urlsFileNames = new HashMap<String, String>(); emptyFiles = new HashMap<String, String>(); }
protected void setup() throws IOException { File indexDirectory = new File(indexDir); dir = FSDirectory.getDirectory(indexDir, !indexDirectory.exists()); }
public void convert(Application app, File dbhome) throws Exception { FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false); if (indexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) indexDir; TransSource source = app.getTransSource(); d.setDriverClass(source.getDriverClass()); d.setUrl(source.getUrl()); d.setUser(source.getUser()); d.setPassword(source.getPassword()); } File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp"); File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old"); FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true); if (nindexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) nindexDir; TransSource source = app.getTransSource(); d.setDriverClass(source.getDriverClass()); d.setUrl(source.getUrl()); d.setUser(source.getUser()); d.setPassword(source.getPassword()); } IndexSearcher searcher = null; IndexWriter writer = null; LuceneManager lmgr = null; try { searcher = new IndexSearcher(indexDir); PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer(); writer = IndexWriterManager.getWriter(nindexDir, a, true); final int numDocs = searcher.getIndexReader().numDocs(); HashSet deldocs = new HashSet(); HashMap infos = new HashMap(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop) /* && layer == DbKey.LIVE_LAYER*/) { deldocs.add(id); } else { Object v; if ((v = infos.get(id)) == null) { infos.put(id, new Integer(i)); } else { final String lmod = doc.get(LuceneManager.LASTMODIFIED); final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified"); if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) { infos.put(id, new Integer(i)); } } } } ArrayList listOfMaps = new ArrayList(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { continue; } else if (id != null && deldocs.contains(id) /* && layer == DbKey.LIVE_LAYER*/) { continue; } Integer idx = (Integer) infos.get(id); if (idx != null && i != idx.intValue()) { continue; } Document ndoc = convertDocument(doc); if (this.recordNodes) { listOfMaps.add(LuceneManager.luceneDocumentToMap(doc)); } if (ndoc != null) { writer.addDocument(ndoc); } } if (this.recordNodes) { lmgr = new LuceneManager(this.app, false, true); this.allNodes = new HashMap(); final int size = listOfMaps.size(); for (int i = 0; i < size; i++) { HashMap m = (HashMap) listOfMaps.get(i); INode n = lmgr.mapToNode(m); this.allNodes.put(n.getID(), getPath(n)); n = null; } } } catch (Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex); } } if (lmgr != null) { lmgr.shutdown(); lmgr = null; } indexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(indexDir); } Connection conn = null; boolean exceptionOccured = false; try { if (writer != null) { TransSource ts = app.getTransSource(); conn = ts.getConnection(); DatabaseMetaData dmd = conn.getMetaData(); ResultSet rs = dmd.getColumns(null, null, "Lucene", "version"); if (!rs.next()) { final String alterTbl = "ALTER TABLE Lucene ADD version INT NOT NULL DEFAULT 1"; PreparedStatement pstmt = null; try { pstmt = conn.prepareStatement(alterTbl); pstmt.execute(); } catch (SQLException sqle) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), sqle); } finally { if (pstmt != null) { pstmt.close(); pstmt = null; } } } rs.close(); rs = null; writer.close(); writer.flushCache(); // TODO:writer.writeSegmentsFile(); LuceneManager.commitSegments(conn, app, writer.getDirectory()); writer.finalizeTrans(); this.updateSQL(conn); } } catch (Exception ex) { ex.printStackTrace(); exceptionOccured = true; throw new RuntimeException(ex); } finally { if (conn != null) { try { if (!conn.getAutoCommit()) { if (!exceptionOccured) { conn.commit(); } else { conn.rollback(); } } conn.close(); } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex); } conn = null; } nindexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(nindexDir); } if (!dbhome.renameTo(olddbhome)) { throw new Exception("Could not move the old version of the db into " + olddbhome); } if (!ndbhome.renameTo(dbhome)) { throw new Exception("Could not move the newer version of the db into " + dbhome); } File oldBlobDir = new File(olddbhome, "blob"); File newBlobDir = new File(ndbhome, "blob"); oldBlobDir.renameTo(newBlobDir); if (!FileUtils.deleteDir(olddbhome)) { throw new Exception("Could not delete the old version of the db at " + olddbhome); } }
/** * Canonicalize the first maxLineNumber lines in input, i.e., sort the tokens by document * frequency in ascending order * * @param input * @param maxLineNumber * @param indexFolder * @param output * @throws Exception */ public static void canonicalize( String input, int maxLineNumber, String indexFolder, String output) throws Exception { Directory dir = FSDirectory.getDirectory(indexFolder); IndexWriter iwriter = new IndexWriter(dir, null, true, IndexWriter.MaxFieldLength.UNLIMITED); BufferedReader br = IOFactory.getBufferedReader(input); int lineCount = 0; for (String line = br.readLine(); line != null; line = br.readLine()) { String[] tokens = Common.sortUnique(line, 1); for (int i = 1; i < tokens.length; i++) { String t = tokens[i]; Document doc = new Document(); doc.add(new Field("term", t, Field.Store.NO, Field.Index.NOT_ANALYZED)); iwriter.addDocument(doc); } lineCount++; if (lineCount % 100 == 0) System.out.println(new Date().toString() + " : " + lineCount + " lines indexed"); if (lineCount == maxLineNumber) break; } System.out.println(new Date().toString() + " : " + lineCount + " lines indexed"); br.close(); iwriter.optimize(); iwriter.close(); System.out.println(new Date().toString() + " : indexing finished"); final IndexReader ireader = IndexReader.open(dir); br = IOFactory.getBufferedReader(input); PrintWriter pw = IOFactory.getPrintWriter(output); lineCount = 0; for (String line = br.readLine(); line != null; line = br.readLine()) { String[] tokens = Common.sortUnique(line, 1); Arrays.sort( tokens, 1, tokens.length, new Comparator<String>() { public int compare(String a, String b) { try { int fa = ireader.docFreq(new Term("term", a)); int fb = ireader.docFreq(new Term("term", b)); if (fa > fb) return 1; else if (fa < fb) return -1; return 0; } catch (Exception e) { e.printStackTrace(); return 0; } } }); pw.print(tokens[0]); for (int i = 1; i < tokens.length; i++) pw.print(" " + tokens[i]); pw.println(); lineCount++; if (lineCount % 100000 == 0) System.out.println(new Date().toString() + " : " + lineCount + " lines output"); } System.out.println(new Date().toString() + " : " + lineCount + " lines output"); pw.close(); br.close(); ireader.close(); dir.close(); Common.deleteFolder(new File(indexFolder)); System.out.println(new Date().toString() + " : canonicalization finished"); }