Пример #1
0
  private static void populateFieldInfo(
      IndexSchema schema,
      Map<String, List<String>> typeusemap,
      SimpleOrderedMap<Object> fields,
      SchemaField uniqueField,
      SchemaField f) {
    FieldType ft = f.getType();
    SimpleOrderedMap<Object> field = new SimpleOrderedMap<Object>();
    field.add("type", ft.getTypeName());
    field.add("flags", getFieldFlags(f));
    if (f.isRequired()) {
      field.add("required", f.isRequired());
    }
    if (f.getDefaultValue() != null) {
      field.add("default", f.getDefaultValue());
    }
    if (f == uniqueField) {
      field.add("uniqueKey", true);
    }
    if (ft.getAnalyzer().getPositionIncrementGap(f.getName()) != 0) {
      field.add("positionIncrementGap", ft.getAnalyzer().getPositionIncrementGap(f.getName()));
    }
    field.add("copyDests", schema.getCopyFields(f.getName()));
    field.add("copySources", schema.getCopySources(f.getName()));

    fields.add(f.getName(), field);

    List<String> v = typeusemap.get(ft.getTypeName());
    if (v == null) {
      v = new ArrayList<String>();
    }
    v.add(f.getName());
    typeusemap.put(ft.getTypeName(), v);
  }
Пример #2
0
  /** Return info from the index */
  private static SimpleOrderedMap<Object> getSchemaInfo(IndexSchema schema) {
    Map<String, List<String>> typeusemap = new HashMap<String, List<String>>();
    SimpleOrderedMap<Object> fields = new SimpleOrderedMap<Object>();
    SchemaField uniqueField = schema.getUniqueKeyField();
    for (SchemaField f : schema.getFields().values()) {
      populateFieldInfo(schema, typeusemap, fields, uniqueField, f);
    }

    SimpleOrderedMap<Object> dynamicFields = new SimpleOrderedMap<Object>();
    for (SchemaField f : schema.getDynamicFieldPrototypes()) {
      populateFieldInfo(schema, typeusemap, dynamicFields, uniqueField, f);
    }
    SimpleOrderedMap<Object> types = new SimpleOrderedMap<Object>();
    for (FieldType ft : schema.getFieldTypes().values()) {
      SimpleOrderedMap<Object> field = new SimpleOrderedMap<Object>();
      field.add("fields", typeusemap.get(ft.getTypeName()));
      field.add("tokenized", ft.isTokenized());
      field.add("className", ft.getClass().getName());
      field.add("indexAnalyzer", getAnalyzerInfo(ft.getAnalyzer()));
      field.add("queryAnalyzer", getAnalyzerInfo(ft.getQueryAnalyzer()));
      types.add(ft.getTypeName(), field);
    }

    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
    finfo.add("fields", fields);
    finfo.add("dynamicFields", dynamicFields);
    finfo.add("uniqueKeyField", null == uniqueField ? null : uniqueField.getName());
    finfo.add("defaultSearchField", schema.getDefaultSearchFieldName());
    finfo.add("types", types);
    return finfo;
  }
Пример #3
0
  @SuppressWarnings("unchecked")
  private void loadExternalFileDictionary(IndexSchema schema, SolrResourceLoader loader) {
    try {

      // Get the field's analyzer
      if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
        FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
        // Do index-time analysis using the given fieldType's analyzer
        RAMDirectory ramDir = new RAMDirectory();
        IndexWriter writer =
            new IndexWriter(
                ramDir, fieldType.getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setMergeFactor(300);
        writer.setMaxBufferedDocs(150);

        List<String> lines = loader.getLines(sourceLocation, characterEncoding);

        for (String s : lines) {
          Document d = new Document();
          d.add(new Field(WORD_FIELD_NAME, s, Field.Store.NO, Field.Index.TOKENIZED));
          writer.addDocument(d);
        }
        writer.optimize();
        writer.close();

        dictionary = new HighFrequencyDictionary(IndexReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
      } else {
        // check if character encoding is defined
        if (characterEncoding == null) {
          dictionary = new PlainTextDictionary(loader.openResource(sourceLocation));
        } else {
          dictionary =
              new PlainTextDictionary(
                  new InputStreamReader(loader.openResource(sourceLocation), characterEncoding));
        }
      }

    } catch (IOException e) {
      log.error("Unable to load spellings", e);
    }
  }
  @Override
  public Lookup create(NamedList params, SolrCore core) {

    // mandatory parameter
    Object fieldTypeName = params.get(AnalyzingLookupFactory.QUERY_ANALYZER);
    if (fieldTypeName == null) {
      throw new IllegalArgumentException(
          "Error in configuration: "
              + AnalyzingLookupFactory.QUERY_ANALYZER
              + " parameter is mandatory");
    }
    // retrieve index and query analyzers for the field
    FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
    if (ft == null) {
      throw new IllegalArgumentException(
          "Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
    }
    Analyzer indexAnalyzer = ft.getAnalyzer();
    Analyzer queryAnalyzer = ft.getQueryAnalyzer();

    // optional parameters
    boolean exactMatchFirst =
        (params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST) != null)
            ? Boolean.valueOf(params.get(AnalyzingLookupFactory.EXACT_MATCH_FIRST).toString())
            : true;

    boolean preserveSep =
        (params.get(AnalyzingLookupFactory.PRESERVE_SEP) != null)
            ? Boolean.valueOf(params.get(AnalyzingLookupFactory.PRESERVE_SEP).toString())
            : true;

    int options = 0;
    if (exactMatchFirst) {
      options |= FuzzySuggester.EXACT_FIRST;
    }
    if (preserveSep) {
      options |= FuzzySuggester.PRESERVE_SEP;
    }

    int maxSurfaceFormsPerAnalyzedForm =
        (params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS) != null)
            ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_SURFACE_FORMS).toString())
            : 256;

    int maxGraphExpansions =
        (params.get(AnalyzingLookupFactory.MAX_EXPANSIONS) != null)
            ? Integer.parseInt(params.get(AnalyzingLookupFactory.MAX_EXPANSIONS).toString())
            : -1;

    boolean preservePositionIncrements =
        params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS) != null
            ? Boolean.valueOf(
                params.get(AnalyzingLookupFactory.PRESERVE_POSITION_INCREMENTS).toString())
            : false;

    int maxEdits =
        (params.get(MAX_EDITS) != null)
            ? Integer.parseInt(params.get(MAX_EDITS).toString())
            : FuzzySuggester.DEFAULT_MAX_EDITS;

    boolean transpositions =
        (params.get(TRANSPOSITIONS) != null)
            ? Boolean.parseBoolean(params.get(TRANSPOSITIONS).toString())
            : FuzzySuggester.DEFAULT_TRANSPOSITIONS;

    int nonFuzzyPrefix =
        (params.get(NON_FUZZY_PREFIX) != null)
            ? Integer.parseInt(params.get(NON_FUZZY_PREFIX).toString())
            : FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX;

    int minFuzzyLength =
        (params.get(MIN_FUZZY_LENGTH) != null)
            ? Integer.parseInt(params.get(MIN_FUZZY_LENGTH).toString())
            : FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH;

    boolean unicodeAware =
        (params.get(UNICODE_AWARE) != null)
            ? Boolean.valueOf(params.get(UNICODE_AWARE).toString())
            : FuzzySuggester.DEFAULT_UNICODE_AWARE;

    return new FuzzySuggester(
        indexAnalyzer,
        queryAnalyzer,
        options,
        maxSurfaceFormsPerAnalyzedForm,
        maxGraphExpansions,
        preservePositionIncrements,
        maxEdits,
        transpositions,
        nonFuzzyPrefix,
        minFuzzyLength,
        unicodeAware);
  }