예제 #1
0
  /**
   * Gets the number.
   *
   * @param field the field
   * @param number the number
   * @return the number
   */
  public static NumericField getNumber(String field, String number) {
    NumericField numericField = new NumericField(field, Field.Store.YES, true);

    numericField.setLongValue(GetterUtil.getLong(number));

    return numericField;
  }
예제 #2
0
  public SearchItem toResult(int documentId) throws IOException {
    Document document = searcher.doc(documentId);

    String type = document.getFieldable(FieldNames.TYPE).stringValue();
    NumericField date = (NumericField) document.getFieldable(FieldNames.DATE);
    Fieldable path = document.getFieldable(FieldNames.PATH);
    NumericField version = (NumericField) document.getFieldable(FieldNames.VERSION);
    return new SearchItem(
        Integer.parseInt(type),
        path.stringValue(),
        (version != null) ? version.getNumericValue().intValue() : -1,
        new Date(date.getNumericValue().longValue()));
  }
예제 #3
0
  @Before
  public void init() throws CorruptIndexException, LockObtainFailedException, IOException {

    IndexWriter indexWriter = getWriter();

    for (int i = 0; i < ids.length; i++) {
      Document doc = new Document();
      doc.add(new Field("ids", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
      doc.add(new Field("titles", titles[i], Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("titles2", titles[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
      doc.add(
          new Field(
              "contents", contents[i], Field.Store.YES, Field.Index.ANALYZED, TermVector.YES));

      NumericField numField = new NumericField("price", Field.Store.YES, true);
      numField.setIntValue(prices[i]);
      doc.add(numField);

      indexWriter.addDocument(doc);
    }

    // indexWriter.commit();
    indexWriter.close();
  }
예제 #4
0
  @Override
  public Fieldable createField(SchemaField field, String externalVal, float boost) {
    boolean indexed = field.indexed();
    boolean stored = field.stored();

    if (!indexed && !stored) {
      if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field);
      return null;
    }

    final NumericField f =
        new NumericField(
            field.getName(), precisionStep, stored ? Field.Store.YES : Field.Store.NO, indexed);
    switch (type) {
      case INTEGER:
        f.setIntValue(Integer.parseInt(externalVal));
        break;
      case FLOAT:
        f.setFloatValue(Float.parseFloat(externalVal));
        break;
      case LONG:
        f.setLongValue(Long.parseLong(externalVal));
        break;
      case DOUBLE:
        f.setDoubleValue(Double.parseDouble(externalVal));
        break;
      case DATE:
        f.setLongValue(dateField.parseMath(null, externalVal).getTime());
        break;
      default:
        throw new SolrException(
            SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
    }

    f.setOmitNorms(field.omitNorms());
    f.setIndexOptions(getIndexOptions(field, externalVal));
    f.setBoost(boost);
    return f;
  }
예제 #5
0
  // create a doc
  // use only part of the body, modify it to keep the rest (or use all if size==0).
  // reset the docdata properties so they are not added more than once.
  private Document createDocument(DocData docData, int size, int cnt)
      throws UnsupportedEncodingException {

    final DocState ds = getDocState();
    final Document doc = reuseFields ? ds.doc : new Document();
    doc.getFields().clear();

    // Set ID_FIELD
    Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
    int id;
    if (r != null) {
      id = r.nextInt(updateDocIDLimit);
    } else {
      id = docData.getID();
      if (id == -1) {
        id = numDocsCreated.getAndIncrement();
      }
    }
    idField.setValue(Integer.toString(id));
    doc.add(idField);

    // Set NAME_FIELD
    String name = docData.getName();
    if (name == null) name = "";
    name = cnt < 0 ? name : name + "_" + cnt;
    Field nameField = ds.getField(NAME_FIELD, storeVal, indexVal, termVecVal);
    nameField.setValue(name);
    doc.add(nameField);

    // Set DATE_FIELD
    DateUtil util = dateParsers.get();
    if (util == null) {
      util = new DateUtil();
      dateParsers.set(util);
    }
    Date date = null;
    String dateString = docData.getDate();
    if (dateString != null) {
      util.pos.setIndex(0);
      date = util.parser.parse(dateString, util.pos);
      // System.out.println(dateString + " parsed to " + date);
    } else {
      dateString = "";
    }
    Field dateStringField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
    dateStringField.setValue(dateString);
    doc.add(dateStringField);

    if (date == null) {
      // just set to right now
      date = new Date();
    }

    NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD);
    dateField.setLongValue(date.getTime());
    doc.add(dateField);

    util.cal.setTime(date);
    final int sec =
        util.cal.get(Calendar.HOUR_OF_DAY) * 3600
            + util.cal.get(Calendar.MINUTE) * 60
            + util.cal.get(Calendar.SECOND);

    NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD);
    timeSecField.setIntValue(sec);
    doc.add(timeSecField);

    // Set TITLE_FIELD
    String title = docData.getTitle();
    Field titleField = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal);
    titleField.setValue(title == null ? "" : title);
    doc.add(titleField);

    String body = docData.getBody();
    if (body != null && body.length() > 0) {
      String bdy;
      if (size <= 0 || size >= body.length()) {
        bdy = body; // use all
        docData.setBody(""); // nothing left
      } else {
        // attempt not to break words - if whitespace found within next 20 chars...
        for (int n = size - 1; n < size + 20 && n < body.length(); n++) {
          if (Character.isWhitespace(body.charAt(n))) {
            size = n;
            break;
          }
        }
        bdy = body.substring(0, size); // use part
        docData.setBody(body.substring(size)); // some left
      }
      Field bodyField = ds.getField(BODY_FIELD, bodyStoreVal, bodyIndexVal, termVecVal);
      bodyField.setValue(bdy);
      doc.add(bodyField);

      if (storeBytes) {
        Field bytesField =
            ds.getField(BYTES_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
        bytesField.setValue(bdy.getBytes("UTF-8"));
        doc.add(bytesField);
      }
    }

    if (indexProperties) {
      Properties props = docData.getProps();
      if (props != null) {
        for (final Map.Entry<Object, Object> entry : props.entrySet()) {
          Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal);
          f.setValue((String) entry.getValue());
          doc.add(f);
        }
        docData.setProps(null);
      }
    }

    // System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
    return doc;
  }
예제 #6
0
  static {
    try {
      // set the theoretical maximum term count for 8bit (see docs for the number)
      BooleanQuery.setMaxClauseCount(7 * 255 * 2 + 255);

      directory = new RAMDirectory();
      IndexWriter writer =
          new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);

      NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true),
          field6 = new NumericField("field6", 6, Field.Store.YES, true),
          field4 = new NumericField("field4", 4, Field.Store.YES, true),
          field2 = new NumericField("field2", 2, Field.Store.YES, true),
          fieldNoTrie =
              new NumericField(
                  "field" + Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true),
          ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
          ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true),
          ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
          ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);

      Document doc = new Document();
      // add fields, that have a distance to test general functionality
      doc.add(field8);
      doc.add(field6);
      doc.add(field4);
      doc.add(field2);
      doc.add(fieldNoTrie);
      // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct
      // splitting of range and inclusive/exclusive
      doc.add(ascfield8);
      doc.add(ascfield6);
      doc.add(ascfield4);
      doc.add(ascfield2);

      // Add a series of noDocs docs with increasing long values, by updating the fields
      for (int l = 0; l < noDocs; l++) {
        long val = distance * l + startOffset;
        field8.setLongValue(val);
        field6.setLongValue(val);
        field4.setLongValue(val);
        field2.setLongValue(val);
        fieldNoTrie.setLongValue(val);

        val = l - (noDocs / 2);
        ascfield8.setLongValue(val);
        ascfield6.setLongValue(val);
        ascfield4.setLongValue(val);
        ascfield2.setLongValue(val);
        writer.addDocument(doc);
      }

      writer.optimize();
      writer.close();
      searcher = new IndexSearcher(directory, true);
    } catch (Exception e) {
      throw new Error(e);
    }
  }
예제 #7
0
  /**
   * Indexes the given file using the given writer, or if a directory is given, recurses over files
   * and directories found under the given directory.
   *
   * <p>NOTE: This method indexes one document per input file. This is slow. For good throughput,
   * put multiple documents into your input file(s). An example of this is in the benchmark module,
   * which can create "line doc" files, one document per line, using the <a
   * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
   * >WriteLineDocTask</a>.
   *
   * @param writer Writer to the index where the given file/dir info will be stored
   * @param file The file to index, or the directory to recurse into to find files to index
   * @throws IOException
   */
  static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
      if (file.isDirectory()) {
        String[] files = file.list();
        // an IO error could occur
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            indexDocs(writer, new File(file, files[i]));
          }
        }
      } else {

        FileInputStream fis;
        try {
          fis = new FileInputStream(file);
        } catch (FileNotFoundException fnfe) {
          // at least on windows, some temporary files raise this exception with an "access denied"
          // message
          // checking if the file can be read doesn't help
          return;
        }

        try {

          // make a new, empty document
          Document doc = new Document();

          // Add the path of the file as a field named "path".  Use a
          // field that is indexed (i.e. searchable), but don't tokenize
          // the field into separate words and don't index term frequency
          // or positional information:
          Field pathField =
              new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
          pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
          doc.add(pathField);

          // Add the last modified date of the file a field named "modified".
          // Use a NumericField that is indexed (i.e. efficiently filterable with
          // NumericRangeFilter).  This indexes to milli-second resolution, which
          // is often too fine.  You could instead create a number based on
          // year/month/day/hour/minutes/seconds, down the resolution you require.
          // For example the long value 2011021714 would mean
          // February 17, 2011, 2-3 PM.
          NumericField modifiedField = new NumericField("modified");
          modifiedField.setLongValue(file.lastModified());
          doc.add(modifiedField);

          // Add the contents of the file to a field named "contents".  Specify a Reader,
          // so that the text of the file is tokenized and indexed, but not stored.
          // Note that FileReader expects the file to be in UTF-8 encoding.
          // If that's not the case searching for special characters will fail.
          doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

          if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
          } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.getPath()), doc);
          }

        } finally {
          fis.close();
        }
      }
    }
  }
  @BeforeClass
  public static void beforeClass() throws Exception {
    directory = newDirectory();
    RandomIndexWriter writer =
        new RandomIndexWriter(
            random,
            directory,
            newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
                .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)));

    NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true),
        field6 = new NumericField("field6", 6, Field.Store.YES, true),
        field4 = new NumericField("field4", 4, Field.Store.YES, true),
        field2 = new NumericField("field2", 2, Field.Store.YES, true),
        fieldNoTrie =
            new NumericField("field" + Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true),
        ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true),
        ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true),
        ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true),
        ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);

    Document doc = new Document();
    // add fields, that have a distance to test general functionality
    doc.add(field8);
    doc.add(field6);
    doc.add(field4);
    doc.add(field2);
    doc.add(fieldNoTrie);
    // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct
    // splitting of range and inclusive/exclusive
    doc.add(ascfield8);
    doc.add(ascfield6);
    doc.add(ascfield4);
    doc.add(ascfield2);

    // Add a series of noDocs docs with increasing long values, by updating the fields
    for (int l = 0; l < noDocs; l++) {
      long val = distance * l + startOffset;
      field8.setLongValue(val);
      field6.setLongValue(val);
      field4.setLongValue(val);
      field2.setLongValue(val);
      fieldNoTrie.setLongValue(val);

      val = l - (noDocs / 2);
      ascfield8.setLongValue(val);
      ascfield6.setLongValue(val);
      ascfield4.setLongValue(val);
      ascfield2.setLongValue(val);
      writer.addDocument(doc);
    }

    reader = writer.getReader();
    searcher = newSearcher(reader);
    writer.close();
  }
예제 #9
0
  /**
   * Create Field instances, according to the attribute configurations inside the fieldConfig
   * parameter
   *
   * @param strAttName the attributes name
   * @param strAttValue the attributes value
   * @param fieldConfig the field configuration. Here you can specify whether a specific field
   *     should be analyzed, etc. You can also set default values.
   * @return the field, with Store, Index and TermVector configuration as given in fieldConfig. Null
   *     in the case the field could'nt generated
   */
  public static AbstractField createField(
      String strAttName, String strAttValue, FieldConfig fieldConfig) {

    // Der Store

    Store store = fieldConfig.defaultFieldMapping.store;

    FieldMapping fieldMapping4Att = fieldConfig.hsFieldName2FieldMapping.get(strAttName);

    Index index = fieldMapping4Att != null ? fieldMapping4Att.index : null;

    if (index == null) index = fieldConfig.defaultFieldMapping.index;

    TermVector termVector = fieldMapping4Att != null ? fieldMapping4Att.termVector : null;
    if (termVector == null) termVector = fieldConfig.defaultFieldMapping.termVector;

    // welches Field erzeugt wird, steht ebenfalls in der config
    FieldType fieldType = fieldMapping4Att != null ? fieldMapping4Att.fieldType : null;
    if (fieldType == null) fieldType = fieldConfig.defaultFieldMapping.fieldType;

    AbstractField newField = null;
    if (strAttValue == null) strAttValue = "";

    if (FieldType.STRING.equals(fieldType)) {
      newField = new Field(strAttName, strAttValue, store, index, termVector);
    } else if (StringUtils.nullOrWhitespace(strAttValue)) {
      // wir können keine leeren numericValues eintragen - wir probieren, ob man string und number
      // mit einem attributnamen mischen kann^^
      // scheint zu gehen
      // wenn das value leer ist und wir ein numerisches Field haben, dann indexieren wir dieses
      // value nicht - ansonsten gibt es
      // Probleme beim Sortieren nach diesem numerischen Field
      newField = new Field(strAttName, strAttValue, store, Index.NO, TermVector.NO);
    } else if (m_llNumberTypes.contains(fieldType)) {
      boolean bIndex = false;
      if (index == Index.ANALYZED || index == Index.ANALYZED_NO_NORMS) bIndex = true;
      newField = new NumericField(strAttName, store, bIndex);

      if (FieldType.INTEGER.equals(fieldType))
        ((NumericField) newField).setIntValue(Integer.parseInt(strAttValue));
      else if (FieldType.LONG.equals(fieldType))
        ((NumericField) newField).setLongValue(Long.parseLong(strAttValue));
      else if (FieldType.FLOAT.equals(fieldType))
        ((NumericField) newField).setFloatValue(Float.parseFloat(strAttValue));
      else if (FieldType.DOUBLE.equals(fieldType))
        ((NumericField) newField).setDoubleValue(Double.parseDouble(strAttValue));
    } else if (FieldType.DATE.equals(fieldType)) {
      boolean bIndex = false;
      if (index == Index.ANALYZED || index == Index.ANALYZED_NO_NORMS) bIndex = true;
      newField = new NumericField(strAttName, store, bIndex);

      Date parsedDate = DateParser.parseDateString(strAttValue);
      if (parsedDate == null) {
        return null;
      }
      ((NumericField) newField).setLongValue(DateUtils.date2Number(parsedDate));
    } else if (FieldType.TIME.equals(fieldType)) {
      boolean bIndex = false;
      if (index == Index.ANALYZED || index == Index.ANALYZED_NO_NORMS) bIndex = true;
      newField = new NumericField(strAttName, store, bIndex);

      Date parsedDate = DateParser.parseDateString(strAttValue);
      ((NumericField) newField)
          .setIntValue(Integer.parseInt(new SimpleDateFormat("HHmmssSSS").format(parsedDate)));
    }

    return newField;
  }