/** * Populate a Lucene document with the required fields. * * @param doc The document to populate * @param file The file to index * @param path Where the file is located (from source root) * @param fa The analyzer to use on the file * @param xrefOut Where to write the xref (possibly {@code null}) * @throws IOException If an exception occurs while collecting the data */ public void populateDocument( Document doc, File file, String path, FileAnalyzer fa, Writer xrefOut) throws IOException { String date = DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND); doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date), string_ft_stored_nanalyzed_norms)); doc.add( new Field( QueryBuilder.FULLPATH, file.getAbsolutePath(), string_ft_nstored_nanalyzed_norms)); doc.add(new SortedDocValuesField(QueryBuilder.FULLPATH, new BytesRef(file.getAbsolutePath()))); try { HistoryReader hr = HistoryGuru.getInstance().getHistoryReader(file); if (hr != null) { doc.add(new TextField(QueryBuilder.HIST, hr)); // date = hr.getLastCommentDate() //RFE } } catch (HistoryException e) { LOGGER.log(Level.WARNING, "An error occurred while reading history: ", e); } doc.add(new Field(QueryBuilder.DATE, date, string_ft_stored_nanalyzed_norms)); doc.add(new SortedDocValuesField(QueryBuilder.DATE, new BytesRef(date))); if (path != null) { doc.add(new TextField(QueryBuilder.PATH, path, Store.YES)); Project project = Project.getProject(path); if (project != null) { doc.add(new TextField(QueryBuilder.PROJECT, project.getPath(), Store.YES)); } } if (fa != null) { Genre g = fa.getGenre(); if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) { doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms)); } fa.analyze(doc, StreamSource.fromFile(file), xrefOut); String type = fa.getFileTypeName(); doc.add(new StringField(QueryBuilder.TYPE, type, Store.YES)); } }
@Test public void uid2url() { assertEquals("/etc/passwd", Util.uid2url(Util.path2uid("/etc/passwd", "date"))); }
/** * Generate indexes recursively * * @param dir the root indexDirectory to generate indexes for * @param path the path * @param count_only if true will just traverse the source root and count files * @param cur_count current count during the traversal of the tree * @param est_total estimate total files to process */ private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total) throws IOException { int lcur_count = cur_count; if (isInterrupted()) { return lcur_count; } if (!accept(dir)) { return lcur_count; } File[] files = dir.listFiles(); if (files == null) { log.log(Level.SEVERE, "Failed to get file listing for: {0}", dir.getAbsolutePath()); return lcur_count; } Arrays.sort( files, new Comparator<File>() { @Override public int compare(File p1, File p2) { return p1.getName().compareTo(p2.getName()); } }); for (File file : files) { if (accept(dir, file)) { String path = parent + '/' + file.getName(); if (file.isDirectory()) { lcur_count = indexDown(file, path, count_only, lcur_count, est_total); } else { lcur_count++; if (count_only) { continue; } if (RuntimeEnvironment.getInstance().isPrintProgress() && est_total > 0 && log.isLoggable(Level.INFO)) { log.log( Level.INFO, "Progress: {0} ({1}%)", new Object[] {lcur_count, (lcur_count * 100.0f / est_total)}); } if (uidIter != null) { String uid = Util.path2uid( path, DateTools.timeToString( file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc BytesRef buid = new BytesRef(uid); while (uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0 && uidIter.term().compareTo(buid) < 0) { removeFile(); uidIter.next(); } if (uidIter.term() != null && uidIter.term().bytesEquals(buid)) { uidIter.next(); // keep matching docs continue; } } try { addFile(file, path); } catch (Exception e) { log.log(Level.WARNING, "Failed to add file " + file.getAbsolutePath(), e); } } } } return lcur_count; }
@Test public void path2uid() { assertEquals("\u0000etc\u0000passwd\u0000date", Util.path2uid("/etc/passwd", "date")); }
/** * Update the content of this index database * * @throws IOException if an error occurs * @throws HistoryException if an error occurs when accessing the history */ public void update() throws IOException, HistoryException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } String ctgs = RuntimeEnvironment.getInstance().getCtags(); if (ctgs != null) { ctags = new Ctags(); ctags.setBinary(ctgs); } if (ctags == null) { log.severe("Unable to run ctags! searching definitions will not work!"); } if (ctags != null) { String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile(); if (filename != null) { ctags.setCTagsExtraOptionsFile(filename); } } try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); // iwc.setRAMBufferSizeMB(256.0); //TODO check what is the sweet spot writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk // writer.setMaxFieldLength(RuntimeEnvironment.getInstance().getIndexWordLimit()); if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile(); } else { sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir); } HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); String startuid = Util.path2uid(dir, ""); IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader); // reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (numDocs > 0) { uidIter = terms.iterator(null); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid), true); // init uid if (stat == TermsEnum.SeekStatus.END || stat == TermsEnum.SeekStatus.NOT_FOUND) { uidIter = null; } } // TODO below should be optional, since it traverses the tree once more to get total // count! :( int file_cnt = 0; if (RuntimeEnvironment.getInstance().isPrintProgress()) { log.log(Level.INFO, "Counting files in {0} ...", dir); file_cnt = indexDown(sourceRoot, dir, true, 0, 0); if (log.isLoggable(Level.INFO)) { log.log( Level.INFO, "Need to process: {0} files for {1}", new Object[] {file_cnt, dir}); } } indexDown(sourceRoot, dir, false, 0, file_cnt); while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(); uidIter.next(); } } finally { reader.close(); } } } finally { if (writer != null) { try { writer.prepareCommit(); writer.commit(); writer.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } if (ctags != null) { try { ctags.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing ctags process", e); } } synchronized (lock) { running = false; } } if (!isInterrupted() && isDirty()) { if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) { optimize(); } createSpellingSuggestions(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File timestamp = new File(env.getDataRootFile(), "timestamp"); if (timestamp.exists()) { if (!timestamp.setLastModified(System.currentTimeMillis())) { log.log( Level.WARNING, "Failed to set last modified time on ''{0}'', used for timestamping the index database.", timestamp.getAbsolutePath()); } } else { if (!timestamp.createNewFile()) { log.log( Level.WARNING, "Failed to create file ''{0}'', used for timestamping the index database.", timestamp.getAbsolutePath()); } } } }