Beispiel #1
0
  /**
   * Populate a Lucene document with the required fields.
   *
   * @param doc The document to populate
   * @param file The file to index
   * @param path Where the file is located (from source root)
   * @param fa The analyzer to use on the file
   * @param xrefOut Where to write the xref (possibly {@code null})
   * @throws IOException If an exception occurs while collecting the data
   */
  public void populateDocument(
      Document doc, File file, String path, FileAnalyzer fa, Writer xrefOut) throws IOException {
    String date = DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND);
    doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date), string_ft_stored_nanalyzed_norms));
    doc.add(
        new Field(
            QueryBuilder.FULLPATH, file.getAbsolutePath(), string_ft_nstored_nanalyzed_norms));
    doc.add(new SortedDocValuesField(QueryBuilder.FULLPATH, new BytesRef(file.getAbsolutePath())));

    try {
      HistoryReader hr = HistoryGuru.getInstance().getHistoryReader(file);
      if (hr != null) {
        doc.add(new TextField(QueryBuilder.HIST, hr));
        // date = hr.getLastCommentDate() //RFE
      }
    } catch (HistoryException e) {
      LOGGER.log(Level.WARNING, "An error occurred while reading history: ", e);
    }
    doc.add(new Field(QueryBuilder.DATE, date, string_ft_stored_nanalyzed_norms));
    doc.add(new SortedDocValuesField(QueryBuilder.DATE, new BytesRef(date)));
    if (path != null) {
      doc.add(new TextField(QueryBuilder.PATH, path, Store.YES));
      Project project = Project.getProject(path);
      if (project != null) {
        doc.add(new TextField(QueryBuilder.PROJECT, project.getPath(), Store.YES));
      }
    }

    if (fa != null) {
      Genre g = fa.getGenre();
      if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) {
        doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms));
      }
      fa.analyze(doc, StreamSource.fromFile(file), xrefOut);

      String type = fa.getFileTypeName();
      doc.add(new StringField(QueryBuilder.TYPE, type, Store.YES));
    }
  }
Beispiel #2
0
  /**
   * Creates Lucene document with the fields:
   *
   * <ul>
   *   <li>path: relative path from the constructor
   *   <li>id: the same as path
   *   <li>modified: last modified date of the file
   *   <li>filesize: size of the file
   *   <li>title: name of the file
   * </ul>
   *
   * @return New Lucene document.
   */
  @Override
  public Document createDocument() {
    Document doc = new Document();

    doc.add(new StringField("path", path.toString(), Field.Store.YES));

    doc.add(new StringField("id", path.toString(), Field.Store.YES));

    try {
      doc.add(
          new StringField(
              "modified",
              DateTools.timeToString(
                  Files.getLastModifiedTime(file).toMillis(), DateTools.Resolution.MINUTE),
              Field.Store.YES));
      doc.add(new LongField("filesize", Files.size(file), Field.Store.YES));
    } catch (IOException ex) {
      LOG.error(ex);
    }

    doc.add(new TextField("title", file.getFileName().toString(), Field.Store.YES));
    return doc;
  }
 /**
  * Add a row to the index.
  *
  * @param row the row
  * @param commitIndex whether to commit the changes to the Lucene index
  */
 protected void insert(Object[] row, boolean commitIndex) throws SQLException {
   /*## LUCENE2 ##
   String query = getQuery(row);
   Document doc = new Document();
   doc.add(new Field(LUCENE_FIELD_QUERY, query,
           Field.Store.YES, Field.Index.UN_TOKENIZED));
   long time = System.currentTimeMillis();
   doc.add(new Field(LUCENE_FIELD_MODIFIED,
           DateTools.timeToString(time, DateTools.Resolution.SECOND),
           Field.Store.YES, Field.Index.UN_TOKENIZED));
   StatementBuilder buff = new StatementBuilder();
   for (int index : indexColumns) {
       String columnName = columns[index];
       String data = asString(row[index], columnTypes[index]);
       // column names that start with _ must be escaped to avoid conflicts
       // with internal field names (_DATA, _QUERY, _modified)
       if (columnName.startsWith(LUCENE_FIELD_COLUMN_PREFIX)) {
           columnName = LUCENE_FIELD_COLUMN_PREFIX + columnName;
       }
       doc.add(new Field(columnName, data,
               Field.Store.NO, Field.Index.TOKENIZED));
       buff.appendExceptFirst(" ");
       buff.append(data);
   }
   Field.Store storeText = STORE_DOCUMENT_TEXT_IN_INDEX ?
           Field.Store.YES : Field.Store.NO;
   doc.add(new Field(LUCENE_FIELD_DATA, buff.toString(), storeText,
           Field.Index.TOKENIZED));
   try {
       indexAccess.modifier.addDocument(doc);
   } catch (IOException e) {
       throw convertException(e);
   }
   //*/
   // ## LUCENE3 ##
   String query = getQuery(row);
   Document doc = new Document();
   doc.add(new Field(LUCENE_FIELD_QUERY, query, Field.Store.YES, Field.Index.NOT_ANALYZED));
   long time = System.currentTimeMillis();
   doc.add(
       new Field(
           LUCENE_FIELD_MODIFIED,
           DateTools.timeToString(time, DateTools.Resolution.SECOND),
           Field.Store.YES,
           Field.Index.NOT_ANALYZED));
   StatementBuilder buff = new StatementBuilder();
   for (int index : indexColumns) {
     String columnName = columns[index];
     String data = asString(row[index], columnTypes[index]);
     // column names that start with _
     // must be escaped to avoid conflicts
     // with internal field names (_DATA, _QUERY, _modified)
     if (columnName.startsWith(LUCENE_FIELD_COLUMN_PREFIX)) {
       columnName = LUCENE_FIELD_COLUMN_PREFIX + columnName;
     }
     doc.add(new Field(columnName, data, Field.Store.NO, Field.Index.ANALYZED));
     buff.appendExceptFirst(" ");
     buff.append(data);
   }
   Field.Store storeText = STORE_DOCUMENT_TEXT_IN_INDEX ? Field.Store.YES : Field.Store.NO;
   doc.add(new Field(LUCENE_FIELD_DATA, buff.toString(), storeText, Field.Index.ANALYZED));
   try {
     indexAccess.writer.addDocument(doc);
     if (commitIndex) {
       commitIndex();
     }
   } catch (IOException e) {
     throw convertException(e);
   }
   // */
 }
Beispiel #4
0
  /**
   * Generate indexes recursively
   *
   * @param dir the root indexDirectory to generate indexes for
   * @param path the path
   * @param count_only if true will just traverse the source root and count files
   * @param cur_count current count during the traversal of the tree
   * @param est_total estimate total files to process
   */
  private int indexDown(File dir, String parent, boolean count_only, int cur_count, int est_total)
      throws IOException {
    int lcur_count = cur_count;
    if (isInterrupted()) {
      return lcur_count;
    }

    if (!accept(dir)) {
      return lcur_count;
    }

    File[] files = dir.listFiles();
    if (files == null) {
      log.log(Level.SEVERE, "Failed to get file listing for: {0}", dir.getAbsolutePath());
      return lcur_count;
    }
    Arrays.sort(
        files,
        new Comparator<File>() {
          @Override
          public int compare(File p1, File p2) {
            return p1.getName().compareTo(p2.getName());
          }
        });

    for (File file : files) {
      if (accept(dir, file)) {
        String path = parent + '/' + file.getName();

        if (file.isDirectory()) {
          lcur_count = indexDown(file, path, count_only, lcur_count, est_total);
        } else {
          lcur_count++;
          if (count_only) {
            continue;
          }

          if (RuntimeEnvironment.getInstance().isPrintProgress()
              && est_total > 0
              && log.isLoggable(Level.INFO)) {
            log.log(
                Level.INFO,
                "Progress: {0} ({1}%)",
                new Object[] {lcur_count, (lcur_count * 100.0f / est_total)});
          }

          if (uidIter != null) {
            String uid =
                Util.path2uid(
                    path,
                    DateTools.timeToString(
                        file.lastModified(),
                        DateTools.Resolution.MILLISECOND)); // construct uid for doc
            BytesRef buid = new BytesRef(uid);
            while (uidIter.term() != null
                && uidIter.term().compareTo(emptyBR) != 0
                && uidIter.term().compareTo(buid) < 0) {
              removeFile();
              uidIter.next();
            }

            if (uidIter.term() != null && uidIter.term().bytesEquals(buid)) {
              uidIter.next(); // keep matching docs
              continue;
            }
          }
          try {
            addFile(file, path);
          } catch (Exception e) {
            log.log(Level.WARNING, "Failed to add file " + file.getAbsolutePath(), e);
          }
        }
      }
    }

    return lcur_count;
  }
  /**
   * Index the fileset.
   *
   * @exception IOException if Lucene I/O exception TODO: refactor!!!!!
   */
  private void indexDocs() throws IOException {
    Date start = new Date();

    boolean create = overwrite;
    // If the index directory doesn't exist,
    // create it and force create mode
    if (indexDir.mkdirs() && !overwrite) {
      create = true;
    }

    FSDirectory dir = FSDirectory.open(indexDir);
    try {
      Searcher searcher = null;
      boolean checkLastModified = false;
      if (!create) {
        try {
          searcher = new IndexSearcher(dir, true);
          checkLastModified = true;
        } catch (IOException ioe) {
          log("IOException: " + ioe.getMessage());
          // Empty - ignore, which indicates to index all
          // documents
        }
      }

      log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);

      IndexWriterConfig conf =
          new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
              .setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND);
      LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
      lmp.setUseCompoundFile(useCompoundIndex);
      lmp.setMergeFactor(mergeFactor);
      IndexWriter writer = new IndexWriter(dir, conf);
      int totalFiles = 0;
      int totalIndexed = 0;
      int totalIgnored = 0;
      try {

        for (int i = 0; i < rcs.size(); i++) {
          ResourceCollection rc = rcs.elementAt(i);
          if (rc.isFilesystemOnly()) {
            Iterator resources = rc.iterator();
            while (resources.hasNext()) {
              Resource r = (Resource) resources.next();
              if (!r.isExists() || !(r instanceof FileResource)) {
                continue;
              }

              totalFiles++;

              File file = ((FileResource) r).getFile();

              if (!file.exists() || !file.canRead()) {
                throw new BuildException(
                    "File \"" + file.getAbsolutePath() + "\" does not exist or is not readable.");
              }

              boolean indexIt = true;

              if (checkLastModified) {
                Term pathTerm = new Term("path", file.getPath());
                TermQuery query = new TermQuery(pathTerm);
                ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs;

                // if document is found, compare the
                // indexed last modified time with the
                // current file
                // - don't index if up to date
                if (hits.length > 0) {
                  Document doc = searcher.doc(hits[0].doc);
                  String indexModified = doc.get("modified").trim();
                  if (indexModified != null) {
                    long lastModified = 0;
                    try {
                      lastModified = DateTools.stringToTime(indexModified);
                    } catch (ParseException e) {
                      // if modified time is not parsable, skip
                    }
                    if (lastModified == file.lastModified()) {
                      // TODO: remove existing document
                      indexIt = false;
                    }
                  }
                }
              }

              if (indexIt) {
                try {
                  log("Indexing " + file.getPath(), Project.MSG_VERBOSE);
                  Document doc = handler.getDocument(file);

                  if (doc == null) {
                    totalIgnored++;
                  } else {
                    // Add the path of the file as a field named "path".  Use a Keyword field, so
                    // that the index stores the path, and so that the path is searchable
                    doc.add(
                        new Field(
                            "path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    // Add the last modified date of the file a field named "modified".  Use a
                    // Keyword field, so that it's searchable, but so that no attempt is made
                    // to tokenize the field into words.
                    doc.add(
                        new Field(
                            "modified",
                            DateTools.timeToString(
                                file.lastModified(), DateTools.Resolution.MILLISECOND),
                            Field.Store.YES,
                            Field.Index.NOT_ANALYZED));

                    writer.addDocument(doc);
                    totalIndexed++;
                  }
                } catch (DocumentHandlerException e) {
                  throw new BuildException(e);
                }
              }
            }
            // for j
          }
          // if (fs != null)
        }
        // for i

        writer.optimize();
      }
      // try
      finally {
        // always make sure everything gets closed,
        // no matter how we exit.
        writer.close();
        if (searcher != null) {
          searcher.close();
        }
      }

      Date end = new Date();

      log(
          totalIndexed
              + " out of "
              + totalFiles
              + " indexed ("
              + totalIgnored
              + " ignored) in "
              + (end.getTime() - start.getTime())
              + " milliseconds");
    } finally {
      dir.close();
    }
  }
 private static String timeToString(long time) {
   return DateTools.timeToString(time, DATE_TIME_RES);
 }
 private Query createQuery(SearchEngineFilter filter) {
   BooleanQuery fieldQuery = new BooleanQuery();
   String key = filter.getKey();
   String attachmentKey = key + IIndexerDAO.ATTACHMENT_FIELD_SUFFIX;
   Object value = filter.getValue();
   if (null != value) {
     if (value instanceof String) {
       SearchEngineFilter.TextSearchOption option = filter.getTextSearchOption();
       if (null == option) {
         option = SearchEngineFilter.TextSearchOption.AT_LEAST_ONE_WORD;
       }
       String stringValue = value.toString();
       String[] values = stringValue.split("\\s+");
       if (!option.equals(SearchEngineFilter.TextSearchOption.EXACT)) {
         BooleanClause.Occur bc = BooleanClause.Occur.SHOULD;
         if (option.equals(SearchEngineFilter.TextSearchOption.ALL_WORDS)) {
           bc = BooleanClause.Occur.MUST;
         } else if (option.equals(SearchEngineFilter.TextSearchOption.ANY_WORD)) {
           bc = BooleanClause.Occur.MUST_NOT;
         }
         for (int i = 0; i < values.length; i++) {
           TermQuery term = new TermQuery(new Term(key, values[i].toLowerCase()));
           // NOTE: search lower case....
           if (filter.isIncludeAttachments()) {
             BooleanQuery compositeQuery = new BooleanQuery();
             compositeQuery.add(term, BooleanClause.Occur.SHOULD);
             TermQuery termAttachment =
                 new TermQuery(new Term(attachmentKey, values[i].toLowerCase()));
             compositeQuery.add(termAttachment, BooleanClause.Occur.SHOULD);
             fieldQuery.add(compositeQuery, bc);
           } else {
             fieldQuery.add(term, bc);
           }
         }
       } else {
         PhraseQuery phraseQuery = new PhraseQuery();
         for (int i = 0; i < values.length; i++) {
           // NOTE: search lower case....
           phraseQuery.add(new Term(key, values[i].toLowerCase()));
         }
         if (filter.isIncludeAttachments()) {
           fieldQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
           PhraseQuery phraseQuery2 = new PhraseQuery();
           for (int i = 0; i < values.length; i++) {
             // NOTE: search lower case....
             phraseQuery2.add(new Term(attachmentKey, values[i].toLowerCase()));
           }
           fieldQuery.add(phraseQuery2, BooleanClause.Occur.SHOULD);
         } else {
           return phraseQuery;
         }
       }
     } else if (value instanceof Date) {
       String toString =
           DateTools.timeToString(((Date) value).getTime(), DateTools.Resolution.MINUTE);
       TermQuery term = new TermQuery(new Term(filter.getKey(), toString));
       fieldQuery.add(term, BooleanClause.Occur.MUST);
     } else if (value instanceof Number) {
       TermQuery term = new TermQuery(new Term(filter.getKey(), value.toString()));
       fieldQuery.add(term, BooleanClause.Occur.MUST);
     }
   } else {
     if (filter.getStart() instanceof Number || filter.getEnd() instanceof Number) {
       // .............................. TODO
     } else {
       String start = null;
       String end = null;
       if (filter.getStart() instanceof Date || filter.getEnd() instanceof Date) {
         if (null != filter.getStart()) {
           start =
               DateTools.timeToString(
                   ((Date) filter.getStart()).getTime(), DateTools.Resolution.MINUTE);
         }
         if (null != filter.getEnd()) {
           end =
               DateTools.timeToString(
                   ((Date) filter.getEnd()).getTime(), DateTools.Resolution.MINUTE);
         }
       } else {
         start = (null != filter.getStart()) ? filter.getStart().toString().toLowerCase() : null;
         end = (null != filter.getEnd()) ? filter.getEnd().toString().toLowerCase() : null;
       }
       BytesRef byteStart = (null != start) ? new BytesRef(start.getBytes()) : null;
       BytesRef byteEnd = (null != end) ? new BytesRef(end.getBytes()) : null;
       TermRangeQuery range = new TermRangeQuery(filter.getKey(), byteStart, byteEnd, true, true);
       fieldQuery.add(range, BooleanClause.Occur.MUST);
     }
   }
   return fieldQuery;
 }