Example #1
0
  public static Document Document(File f) throws IOException, InterruptedException {
    // make a new, empty document
    Document doc = new Document();

    // Add the url as a field named "url".  Use an UnIndexed field, so
    // that the url is just stored with the document, but is not searchable.
    doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/')));

    // Add the last modified date of the file a field named "modified".  Use a
    // Keyword field, so that it's searchable, but so that no attempt is made
    // to tokenize the field into words.
    doc.add(Field.Keyword("modified", DateField.timeToString(f.lastModified())));

    // Add the uid as a field, so that index can be incrementally maintained.
    // This field is not stored with document, it is indexed, but it is not
    // tokenized prior to indexing.
    doc.add(new Field("uid", uid(f), false, true, false));

    HTMLParser parser = new HTMLParser(f);

    // Add the tag-stripped contents as a Reader-valued Text field so it will
    // get tokenized and indexed.
    doc.add(Field.Text("contents", parser.getReader()));

    // Add the summary as an UnIndexed field, so that it is stored and returned
    // with hit documents for display.
    doc.add(Field.UnIndexed("summary", parser.getSummary()));

    // Add the title as a separate Text field, so that it can be searched
    // separately.
    doc.add(Field.Text("title", parser.getTitle()));

    // return the document
    return doc;
  }
 /** for testing DateTools support */
 private String getDate(Date d, DateTools.Resolution resolution) throws Exception {
   if (resolution == null) {
     return DateField.dateToString(d);
   } else {
     return DateTools.dateToString(d, resolution);
   }
 }
Example #3
0
 public static String uid(File f) {
   // Append path and date into a string in such a way that lexicographic
   // sorting gives the same results as a walk of the file hierarchy.  Thus
   // null (\u0000) is used both to separate directory components and to
   // separate the path from the date.
   return f.getPath().replace(dirSep, '\u0000')
       + "\u0000"
       + DateField.timeToString(f.lastModified());
 }
 /** for testing legacy DateField support */
 public void testLegacyDateRange() throws Exception {
   String startDate = getLocalizedDate(2002, 1, 1, false);
   String endDate = getLocalizedDate(2002, 1, 4, false);
   Calendar endDateExpected = new GregorianCalendar();
   endDateExpected.set(2002, 1, 4, 23, 59, 59);
   endDateExpected.set(Calendar.MILLISECOND, 999);
   assertQueryEquals(
       "[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]",
       null,
       "["
           + getLegacyDate(startDate)
           + " TO "
           + DateField.dateToString(endDateExpected.getTime())
           + "]");
   assertQueryEquals(
       "{  " + escapeDateString(startDate) + "    " + escapeDateString(endDate) + "   }",
       null,
       "{" + getLegacyDate(startDate) + " TO " + getLegacyDate(endDate) + "}");
 }
 private static void addDateDoc(
     String content,
     int year,
     int month,
     int day,
     int hour,
     int minute,
     int second,
     IndexWriter iw)
     throws IOException {
   Document d = new Document();
   d.add(new Field("f", content, Field.Store.YES, Field.Index.ANALYZED));
   Calendar cal = Calendar.getInstance(Locale.ENGLISH);
   cal.set(year, month - 1, day, hour, minute, second);
   d.add(
       new Field(
           "date",
           DateField.dateToString(cal.getTime()),
           Field.Store.YES,
           Field.Index.NOT_ANALYZED));
   iw.addDocument(d);
 }
 /** for testing legacy DateField support */
 private String getLegacyDate(String s) throws Exception {
   DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
   return DateField.dateToString(df.parse(s));
 }