public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();
    params = adjustParams(params);
    req.setParams(params);
    TupleStream tupleStream = null;

    try {
      tupleStream = this.streamFactory.constructStream(params.get("expr"));
    } catch (Exception e) {
      // Catch exceptions that occur while the stream is being created. This will include streaming
      // expression parse rules.
      SolrException.log(logger, e);
      rsp.add("result-set", new DummyErrorStream(e));

      return;
    }

    int worker = params.getInt("workerID", 0);
    int numWorkers = params.getInt("numWorkers", 1);
    StreamContext context = new StreamContext();
    context.workerID = worker;
    context.numWorkers = numWorkers;
    context.setSolrClientCache(clientCache);
    tupleStream.setStreamContext(context);
    rsp.add("result-set", new TimerStream(new ExceptionStream(tupleStream)));
  }
Esempio n. 2
0
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();
    params = adjustParams(params);
    req.setParams(params);

    if (params.get("action") != null) {
      handleAdmin(req, rsp, params);
      return;
    }

    TupleStream tupleStream;

    try {
      tupleStream = this.streamFactory.constructStream(params.get("expr"));
    } catch (Exception e) {
      // Catch exceptions that occur while the stream is being created. This will include streaming
      // expression parse rules.
      SolrException.log(logger, e);
      rsp.add("result-set", new DummyErrorStream(e));

      return;
    }

    int worker = params.getInt("workerID", 0);
    int numWorkers = params.getInt("numWorkers", 1);
    StreamContext context = new StreamContext();
    context.workerID = worker;
    context.numWorkers = numWorkers;
    context.setSolrClientCache(clientCache);
    context.setModelCache(modelCache);
    context.put("core", this.coreName);
    context.put("solr-core", req.getCore());
    tupleStream.setStreamContext(context);

    // if asking for explanation then go get it
    if (params.getBool("explain", false)) {
      rsp.add("explanation", tupleStream.toExplanation(this.streamFactory));
    }

    if (tupleStream instanceof DaemonStream) {
      DaemonStream daemonStream = (DaemonStream) tupleStream;
      if (daemons.containsKey(daemonStream.getId())) {
        daemons.remove(daemonStream.getId()).close();
      }
      daemonStream.setDaemons(daemons);
      daemonStream.open(); // This will start the deamonStream
      daemons.put(daemonStream.getId(), daemonStream);
      rsp.add(
          "result-set",
          new DaemonResponseStream("Deamon:" + daemonStream.getId() + " started on " + coreName));
    } else {
      rsp.add("result-set", new TimerStream(new ExceptionStream(tupleStream)));
    }
  }
Esempio n. 3
0
  @Override
  public XMLLoader init(SolrParams args) {
    // Init StAX parser:
    inputFactory = XMLInputFactory.newInstance();
    EmptyEntityResolver.configureXMLInputFactory(inputFactory);
    inputFactory.setXMLReporter(xmllog);
    try {
      // The java 1.6 bundled stax parser (sjsxp) does not currently have a thread-safe
      // XMLInputFactory, as that implementation tries to cache and reuse the
      // XMLStreamReader.  Setting the parser-specific "reuse-instance" property to false
      // prevents this.
      // All other known open-source stax parsers (and the bea ref impl)
      // have thread-safe factories.
      inputFactory.setProperty("reuse-instance", Boolean.FALSE);
    } catch (IllegalArgumentException ex) {
      // Other implementations will likely throw this exception since "reuse-instance"
      // isimplementation specific.
      log.debug("Unable to set the 'reuse-instance' property for the input chain: " + inputFactory);
    }

    // Init SAX parser (for XSL):
    saxFactory = SAXParserFactory.newInstance();
    saxFactory.setNamespaceAware(true); // XSL needs this!
    EmptyEntityResolver.configureSAXParserFactory(saxFactory);

    xsltCacheLifetimeSeconds = XSLT_CACHE_DEFAULT;
    if (args != null) {
      xsltCacheLifetimeSeconds = args.getInt(XSLT_CACHE_PARAM, XSLT_CACHE_DEFAULT);
      log.info("xsltCacheLifetimeSeconds=" + xsltCacheLifetimeSeconds);
    }
    return this;
  }
  public void processGetVersions(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;
    SolrParams params = req.getParams();

    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }

    int nVersions = params.getInt("getVersions", -1);
    if (nVersions == -1) return;

    String sync = params.get("sync");
    if (sync != null) {
      processSync(rb, nVersions, sync);
      return;
    }

    UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
    if (ulog == null) return;

    UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates();
    try {
      rb.rsp.add("versions", recentUpdates.getVersions(nVersions));
    } finally {
      recentUpdates.close(); // cache this somehow?
    }
  }
  /**
   * Generates a list of Highlighted query fragments for each item in a list of documents, or
   * returns null if highlighting is disabled.
   *
   * @param docs query results
   * @param query the query
   * @param req the current request
   * @param defaultFields default list of fields to summarize
   * @return NamedList containing a NamedList for each document, which in turns contains sets
   *     (field, summary) pairs.
   */
  @Override
  @SuppressWarnings("unchecked")
  public NamedList<Object> doHighlighting(
      DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params)) return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    NamedList fragments = new SimpleOrderedMap();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
      // pre-fetch documents using the Searcher's doc cache
      for (String f : fieldNames) {
        fset.add(f);
      }
      // fetch unique key if one exists.
      SchemaField keyField = schema.getUniqueKeyField();
      if (null != keyField) fset.add(keyField.getName());
    }

    // get FastVectorHighlighter instance out of the processing loop
    FastVectorHighlighter fvh =
        new FastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false));
    fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader());

    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
      int docId = iterator.nextDoc();
      Document doc = searcher.doc(docId, fset);
      NamedList docSummaries = new SimpleOrderedMap();
      for (String fieldName : fieldNames) {
        fieldName = fieldName.trim();
        if (useFastVectorHighlighter(params, schema, fieldName))
          doHighlightingByFastVectorHighlighter(
              fvh, fieldQuery, req, docSummaries, docId, doc, fieldName);
        else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
      }
      String printId = schema.printableUniqueKey(doc);
      fragments.add(printId == null ? null : printId, docSummaries);
    }
    return fragments;
  }
  public ExtractingDocumentLoader(
      SolrQueryRequest req,
      UpdateRequestProcessor processor,
      TikaConfig config,
      ParseContextConfig parseContextConfig,
      SolrContentHandlerFactory factory) {
    this.params = req.getParams();
    this.core = req.getCore();
    this.config = config;
    this.parseContextConfig = parseContextConfig;
    this.processor = processor;

    templateAdd = new AddUpdateCommand(req);
    templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
    templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);

    // this is lightweight
    autoDetectParser = new AutoDetectParser(config);
    this.factory = factory;

    ignoreTikaException = params.getBool(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
  }
  CSVLoaderBase(SolrQueryRequest req, UpdateRequestProcessor processor) {
    this.processor = processor;
    this.params = req.getParams();
    this.literals = new HashMap<>();

    templateAdd = new AddUpdateCommand(req);
    templateAdd.overwrite = params.getBool(OVERWRITE, true);
    templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);

    strategy =
        new CSVStrategy(
            ',',
            '"',
            CSVStrategy.COMMENTS_DISABLED,
            CSVStrategy.ESCAPE_DISABLED,
            false,
            false,
            false,
            true);
    String sep = params.get(SEPARATOR);
    if (sep != null) {
      if (sep.length() != 1)
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'");
      strategy.setDelimiter(sep.charAt(0));
    }

    String encapsulator = params.get(ENCAPSULATOR);
    if (encapsulator != null) {
      if (encapsulator.length() != 1)
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'");
    }

    String escape = params.get(ESCAPE);
    if (escape != null) {
      if (escape.length() != 1)
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'");
    }
    rowId = params.get(ROW_ID);
    rowIdOffset = params.getInt(ROW_ID_OFFSET, 0);

    // if only encapsulator or escape is set, disable the other escaping mechanism
    if (encapsulator == null && escape != null) {
      strategy.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
      strategy.setEscape(escape.charAt(0));
    } else {
      if (encapsulator != null) {
        strategy.setEncapsulator(encapsulator.charAt(0));
      }
      if (escape != null) {
        char ch = escape.charAt(0);
        strategy.setEscape(ch);
        if (ch == '\\') {
          // If the escape is the standard backslash, then also enable
          // unicode escapes (it's harmless since 'u' would not otherwise
          // be escaped.
          strategy.setUnicodeEscapeInterpretation(true);
        }
      }
    }

    String fn = params.get(FIELDNAMES);
    fieldnames = fn != null ? commaSplit.split(fn, -1) : null;

    Boolean hasHeader = params.getBool(HEADER);

    skipLines = params.getInt(SKIPLINES, 0);

    if (fieldnames == null) {
      if (null == hasHeader) {
        // assume the file has the headers if they aren't supplied in the args
        hasHeader = true;
      } else if (!hasHeader) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            "CSVLoader: must specify fieldnames=<fields>* or header=true");
      }
    } else {
      // if the fieldnames were supplied and the file has a header, we need to
      // skip over that header.
      if (hasHeader != null && hasHeader) skipLines++;

      prepareFields();
    }
  }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false)) return;

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0) return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
      limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort =
        !TermsParams.TERMS_SORT_INDEX.equals(
            params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
      freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
      NamedList<Integer> fieldTerms = new NamedList<>();
      termsResult.add(field, fieldTerms);

      Terms terms = lfields == null ? null : lfields.terms(field);
      if (terms == null) {
        // no terms for this field
        continue;
      }

      FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
      if (ft == null) ft = new StrField();

      // prefix must currently be text
      BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

      BytesRef upperBytes = null;
      if (upperStr != null) {
        upperBytes = new BytesRef();
        ft.readableToIndexed(upperStr, upperBytes);
      }

      BytesRef lowerBytes;
      if (lowerStr == null) {
        // If no lower bound was specified, use the prefix
        lowerBytes = prefixBytes;
      } else {
        lowerBytes = new BytesRef();
        if (raw) {
          // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
          // perhaps we detect if the FieldType is non-character and expect hex if so?
          lowerBytes = new BytesRef(lowerStr);
        } else {
          lowerBytes = new BytesRef();
          ft.readableToIndexed(lowerStr, lowerBytes);
        }
      }

      TermsEnum termsEnum = terms.iterator(null);
      BytesRef term = null;

      if (lowerBytes != null) {
        if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        } else {
          term = termsEnum.term();
          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerIncl == false && term.equals(lowerBytes)) {
            term = termsEnum.next();
          }
        }
      } else {
        // position termsEnum on first term
        term = termsEnum.next();
      }

      int i = 0;
      BoundedTreeSet<CountPair<BytesRef, Integer>> queue =
          (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
      CharsRef external = new CharsRef();
      while (term != null && (i < limit || sort)) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // stop if the prefix doesn't match
        if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break;

        if (pattern != null) {
          // indexed text or external text?
          // TODO: support "raw" mode?
          ft.indexedToReadable(term, external);
          externalized = true;
          if (!pattern.matcher(external).matches()) {
            term = termsEnum.next();
            continue;
          }
        }

        if (upperBytes != null) {
          int upperCmp = term.compareTo(upperBytes);
          // if we are past the upper term, or equal to it (when don't include upper) then stop.
          if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
        }

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        if (docFreq >= freqmin && docFreq <= freqmax) {
          // add the term to the list
          if (sort) {
            queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq));
          } else {

            // TODO: handle raw somehow
            if (!externalized) {
              ft.indexedToReadable(term, external);
            }
            fieldTerms.add(external.toString(), docFreq);
            i++;
          }
        }

        term = termsEnum.next();
      }

      if (sort) {
        for (CountPair<BytesRef, Integer> item : queue) {
          if (i >= limit) break;
          ft.indexedToReadable(item.key, external);
          fieldTerms.add(external.toString(), item.val);
          i++;
        }
      }
    }
  }
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (params.getBool(TermsParams.TERMS, false)) {
      String lowerStr = params.get(TermsParams.TERMS_LOWER, null);
      String[] fields = params.getParams(TermsParams.TERMS_FIELD);
      if (fields != null && fields.length > 0) {
        NamedList terms = new SimpleOrderedMap();
        rb.rsp.add("terms", terms);
        int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
        if (limit < 0) {
          limit = Integer.MAX_VALUE;
        }
        String upperStr = params.get(TermsParams.TERMS_UPPER);
        boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
        boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
        boolean sort =
            !TermsParams.TERMS_SORT_INDEX.equals(
                params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
        int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
        int freqmax =
            params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
        if (freqmax < 0) {
          freqmax = Integer.MAX_VALUE;
        }
        String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
        String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
        Pattern pattern =
            regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

        boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
        for (int j = 0; j < fields.length; j++) {
          String field = StringHelper.intern(fields[j]);
          FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
          if (ft == null) ft = new StrField();

          // If no lower bound was specified, use the prefix
          String lower = lowerStr == null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr));
          if (lower == null) lower = "";
          String upper = upperStr == null ? null : (raw ? upperStr : ft.toInternal(upperStr));

          Term lowerTerm = new Term(field, lower);
          Term upperTerm = upper == null ? null : new Term(field, upper);

          TermEnum termEnum =
              rb.req
                  .getSearcher()
                  .getReader()
                  .terms(lowerTerm); // this will be positioned ready to go
          int i = 0;
          BoundedTreeSet<CountPair<String, Integer>> queue =
              (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null);
          NamedList fieldTerms = new NamedList();
          terms.add(field, fieldTerms);
          Term lowerTestTerm = termEnum.term();

          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerTestTerm != null
              && lowerIncl == false
              && lowerTestTerm.field() == field // intern'd comparison
              && lowerTestTerm.text().equals(lower)) {
            termEnum.next();
          }

          while (i < limit || sort) {

            Term theTerm = termEnum.term();

            // check for a different field, or the end of the index.
            if (theTerm == null || field != theTerm.field()) // intern'd comparison
            break;

            String indexedText = theTerm.text();

            // stop if the prefix doesn't match
            if (prefix != null && !indexedText.startsWith(prefix)) break;

            if (pattern != null && !pattern.matcher(indexedText).matches()) {
              termEnum.next();
              continue;
            }

            if (upperTerm != null) {
              int upperCmp = theTerm.compareTo(upperTerm);
              // if we are past the upper term, or equal to it (when don't include upper) then stop.
              if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are
            // satisfied.
            int docFreq = termEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
              // add the term to the list
              String label = raw ? indexedText : ft.indexedToReadable(indexedText);
              if (sort) {
                queue.add(new CountPair<String, Integer>(label, docFreq));
              } else {
                fieldTerms.add(label, docFreq);
                i++;
              }
            }

            termEnum.next();
          }

          termEnum.close();

          if (sort) {
            for (CountPair<String, Integer> item : queue) {
              if (i < limit) {
                fieldTerms.add(item.key, item.val);
                i++;
              } else {
                break;
              }
            }
          }
        }
      } else {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
      }
    }
  }
  private void initParams(SolrParams params) {
    if (params != null) {
      // Document-centric langId params
      setEnabled(params.getBool(LANGUAGE_ID, true));
      if (params.get(FIELDS_PARAM, "").length() > 0) {
        inputFields = params.get(FIELDS_PARAM, "").split(",");
      }
      langField = params.get(LANG_FIELD, DOCID_LANGFIELD_DEFAULT);
      langsField = params.get(LANGS_FIELD, DOCID_LANGSFIELD_DEFAULT);
      SchemaField uniqueKeyField = schema.getUniqueKeyField();
      docIdField =
          params.get(
              DOCID_PARAM, uniqueKeyField == null ? DOCID_FIELD_DEFAULT : uniqueKeyField.getName());
      fallbackValue = params.get(FALLBACK);
      if (params.get(FALLBACK_FIELDS, "").length() > 0) {
        fallbackFields = params.get(FALLBACK_FIELDS).split(",");
      }
      overwrite = params.getBool(OVERWRITE, false);
      langWhitelist = new HashSet<>();
      threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT);
      if (params.get(LANG_WHITELIST, "").length() > 0) {
        for (String lang : params.get(LANG_WHITELIST, "").split(",")) {
          langWhitelist.add(lang);
        }
      }

      // Mapping params (field centric)
      enableMapping = params.getBool(MAP_ENABLE, false);
      if (params.get(MAP_FL, "").length() > 0) {
        mapFields = params.get(MAP_FL, "").split(",");
      } else {
        mapFields = inputFields;
      }
      mapKeepOrig = params.getBool(MAP_KEEP_ORIG, false);
      mapOverwrite = params.getBool(MAP_OVERWRITE, false);
      mapIndividual = params.getBool(MAP_INDIVIDUAL, false);

      // Process individual fields
      String[] mapIndividualFields = {};
      if (params.get(MAP_INDIVIDUAL_FL, "").length() > 0) {
        mapIndividualFields = params.get(MAP_INDIVIDUAL_FL, "").split(",");
      } else {
        mapIndividualFields = mapFields;
      }
      mapIndividualFieldsSet = new HashSet<>(Arrays.asList(mapIndividualFields));
      // Compile a union of the lists of fields to map
      allMapFieldsSet = new HashSet<>(Arrays.asList(mapFields));
      if (Arrays.equals(mapFields, mapIndividualFields)) {
        allMapFieldsSet.addAll(mapIndividualFieldsSet);
      }

      // Normalize detected langcode onto normalized langcode
      lcMap = new HashMap<>();
      if (params.get(LCMAP) != null) {
        for (String mapping : params.get(LCMAP).split("[, ]")) {
          String[] keyVal = mapping.split(":");
          if (keyVal.length == 2) {
            lcMap.put(keyVal[0], keyVal[1]);
          } else {
            log.error(
                "Unsupported format for langid.lcmap: " + mapping + ". Skipping this mapping.");
          }
        }
      }

      // Language Code mapping
      mapLcMap = new HashMap<>();
      if (params.get(MAP_LCMAP) != null) {
        for (String mapping : params.get(MAP_LCMAP).split("[, ]")) {
          String[] keyVal = mapping.split(":");
          if (keyVal.length == 2) {
            mapLcMap.put(keyVal[0], keyVal[1]);
          } else {
            log.error(
                "Unsupported format for langid.map.lcmap: " + mapping + ". Skipping this mapping.");
          }
        }
      }
      enforceSchema = params.getBool(ENFORCE_SCHEMA, true);

      mapPattern = Pattern.compile(params.get(MAP_PATTERN, MAP_PATTERN_DEFAULT));
      mapReplaceStr = params.get(MAP_REPLACE, MAP_REPLACE_DEFAULT);
      maxFieldValueChars = params.getInt(MAX_FIELD_VALUE_CHARS, MAX_FIELD_VALUE_CHARS_DEFAULT);
      maxTotalChars = params.getInt(MAX_TOTAL_CHARS, MAX_TOTAL_CHARS_DEFAULT);
      if (maxFieldValueChars > maxTotalChars) {
        if (maxTotalChars == MAX_TOTAL_CHARS_DEFAULT) {
          // If the user specified only maxFieldValueChars, make maxTotalChars the same as it
          log.warn(
              MAX_FIELD_VALUE_CHARS
                  + " ("
                  + maxFieldValueChars
                  + ") is less than "
                  + MAX_TOTAL_CHARS
                  + " ("
                  + maxTotalChars
                  + ").  Setting "
                  + MAX_TOTAL_CHARS
                  + " to "
                  + maxFieldValueChars
                  + ".");
          maxTotalChars = maxFieldValueChars;
        } else {
          // If the user specified maxTotalChars, make maxFieldValueChars the same as it
          log.warn(
              MAX_FIELD_VALUE_CHARS
                  + " ("
                  + maxFieldValueChars
                  + ") is less than "
                  + MAX_TOTAL_CHARS
                  + " ("
                  + maxTotalChars
                  + ").  Setting "
                  + MAX_FIELD_VALUE_CHARS
                  + " to "
                  + maxTotalChars
                  + ".");
          maxFieldValueChars = maxTotalChars;
        }
      }
    }
    log.debug("LangId configured");

    if (inputFields.length == 0) {
      throw new SolrException(
          ErrorCode.BAD_REQUEST,
          "Missing or faulty configuration of LanguageIdentifierUpdateProcessor. Input fields must be specified as a comma separated list");
    }
  }
Esempio n. 11
0
  /**
   * Verify that the PivotFields we're lookin at doesn't violate any of the expected behaviors based
   * on the <code>TRACE_*</code> params found in the base params
   */
  private void assertTraceOk(
      String pivotName, SolrParams baseParams, List<PivotField> constraints) {
    if (null == constraints || 0 == constraints.size()) {
      return;
    }
    final int maxIdx = constraints.size() - 1;

    final int min = baseParams.getInt(TRACE_MIN, -1);
    final boolean expectMissing = baseParams.getBool(TRACE_MISS, false);
    final boolean checkCount = "count".equals(baseParams.get(TRACE_SORT, "count"));

    int prevCount = Integer.MAX_VALUE;

    for (int i = 0; i <= maxIdx; i++) {
      final PivotField constraint = constraints.get(i);
      final int count = constraint.getCount();

      if (0 < min) {
        assertTrue(
            pivotName
                + ": val #"
                + i
                + " of "
                + maxIdx
                + ": count("
                + count
                + ") < facet.mincount("
                + min
                + "): "
                + constraint,
            min <= count);
      }
      // missing value must always come last, but only if facet.missing was used
      // and may not exist at all (mincount, none missing for this sub-facet, etc...)
      if ((i < maxIdx) || (!expectMissing)) {
        assertNotNull(
            pivotName + ": val #" + i + " of " + maxIdx + " has null value: " + constraint,
            constraint.getValue());
      }
      // if we are expecting count based sort, then the count of each constraint
      // must be lt-or-eq the count that came before -- or it must be the last value and
      // be "missing"
      if (checkCount) {
        assertTrue(
            pivotName
                + ": val #"
                + i
                + " of"
                + maxIdx
                + ": count("
                + count
                + ") > prevCount("
                + prevCount
                + "): "
                + constraint,
            ((count <= prevCount)
                || (expectMissing && i == maxIdx && null == constraint.getValue())));
        prevCount = count;
      }
    }
  }
Esempio n. 12
0
  /** Actually run the query */
  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;
    SolrParams params = req.getParams();
    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }
    SolrIndexSearcher searcher = req.getSearcher();

    if (rb.getQueryCommand().getOffset() < 0) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
    }

    // -1 as flag if not set.
    long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1);

    // Optional: This could also be implemented by the top-level searcher sending
    // a filter that lists the ids... that would be transparent to
    // the request handler, but would be more expensive (and would preserve score
    // too if desired).
    String ids = params.get(ShardParams.IDS);
    if (ids != null) {
      SchemaField idField = req.getSchema().getUniqueKeyField();
      List<String> idArr = StrUtils.splitSmart(ids, ",", true);
      int[] luceneIds = new int[idArr.size()];
      int docs = 0;
      for (int i = 0; i < idArr.size(); i++) {
        int id =
            req.getSearcher()
                .getFirstMatch(
                    new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
        if (id >= 0) luceneIds[docs++] = id;
      }

      DocListAndSet res = new DocListAndSet();
      res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
      if (rb.isNeedDocSet()) {
        List<Query> queries = new ArrayList<Query>();
        queries.add(rb.getQuery());
        List<Query> filters = rb.getFilters();
        if (filters != null) queries.addAll(filters);
        res.docSet = searcher.getDocSet(queries);
      }
      rb.setResults(res);
      rsp.add("response", rb.getResults().docList);
      return;
    }

    SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
    cmd.setTimeAllowed(timeAllowed);
    SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();

    //
    // grouping / field collapsing
    //
    boolean doGroup = params.getBool(GroupParams.GROUP, false);
    if (doGroup) {
      try {
        cmd.groupCommands = new ArrayList<Grouping.Command>();

        String[] fields = params.getParams(GroupParams.GROUP_FIELD);
        String[] funcs = params.getParams(GroupParams.GROUP_FUNC);
        String[] queries = params.getParams(GroupParams.GROUP_QUERY);
        String groupSortStr = params.get(GroupParams.GROUP_SORT);
        Sort groupSort =
            groupSortStr != null ? QueryParsing.parseSort(groupSortStr, req.getSchema()) : null;

        int limitDefault = cmd.getLen(); // this is normally from "rows"
        int docsPerGroupDefault = params.getInt(GroupParams.GROUP_LIMIT, 1);

        // temporary: implement all group-by-field as group-by-func
        if (funcs == null) {
          funcs = fields;
        } else if (fields != null) {
          // catenate functions and fields
          String[] both = new String[fields.length + funcs.length];
          System.arraycopy(fields, 0, both, 0, fields.length);
          System.arraycopy(funcs, 0, both, fields.length, funcs.length);
          funcs = both;
        }

        if (funcs != null) {
          for (String groupByStr : funcs) {
            QParser parser = QParser.getParser(groupByStr, "func", rb.req);
            Query q = parser.getQuery();
            Grouping.CommandFunc gc = new Grouping.CommandFunc();
            gc.groupSort = groupSort;

            if (q instanceof FunctionQuery) {
              gc.groupBy = ((FunctionQuery) q).getValueSource();
            } else {
              gc.groupBy = new QueryValueSource(q, 0.0f);
            }
            gc.key = groupByStr;
            gc.groupLimit = limitDefault;
            gc.docsPerGroup = docsPerGroupDefault;

            cmd.groupCommands.add(gc);
          }
        }

        if (cmd.groupCommands.size() == 0) cmd.groupCommands = null;

        if (cmd.groupCommands != null) {
          if (rb.doHighlights || rb.isDebug()) {
            // we need a single list of the returned docs
            cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
          }

          searcher.search(result, cmd);
          rb.setResult(result);
          rsp.add("grouped", result.groupedResults);
          // TODO: get "hits" a different way to log
          return;
        }
      } catch (ParseException e) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
      }
    }

    // normal search result
    searcher.search(result, cmd);
    rb.setResult(result);

    rsp.add("response", rb.getResults().docList);
    rsp.getToLog().add("hits", rb.getResults().docList.matches());

    doFieldSortValues(rb, searcher);
    doPrefetch(rb);
  }
  @Override
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    IndexSchema schema = req.getSchema();
    SolrIndexSearcher searcher = req.getSearcher();
    IndexReader reader = searcher.getReader();
    SolrParams params = req.getParams();
    int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);

    // Always show the core lucene info
    rsp.add("index", getIndexInfo(reader, numTerms > 0));

    Integer docId = params.getInt(DOC_ID);
    if (docId == null && params.get(ID) != null) {
      // Look for something with a given solr ID
      SchemaField uniqueKey = schema.getUniqueKeyField();
      String v = uniqueKey.getType().toInternal(params.get(ID));
      Term t = new Term(uniqueKey.getName(), v);
      docId = searcher.getFirstMatch(t);
      if (docId < 0) {
        throw new SolrException(
            SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID));
      }
    }

    // Read the document from the index
    if (docId != null) {
      Document doc = null;
      try {
        doc = reader.document(docId);
      } catch (Exception ex) {
      }
      if (doc == null) {
        throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + docId);
      }

      SimpleOrderedMap<Object> info = getDocumentFieldsInfo(doc, docId, reader, schema);

      SimpleOrderedMap<Object> docinfo = new SimpleOrderedMap<Object>();
      docinfo.add("docId", docId);
      docinfo.add("lucene", info);
      docinfo.add("solr", doc);
      rsp.add("doc", docinfo);
    } else if ("schema".equals(params.get("show"))) {
      rsp.add("schema", getSchemaInfo(req.getSchema()));
    } else {
      // If no doc is given, show all fields and top terms
      Set<String> fields = null;
      if (params.get(CommonParams.FL) != null) {
        fields = new HashSet<String>();
        for (String f : params.getParams(CommonParams.FL)) {
          fields.add(f);
        }
      }
      rsp.add("fields", getIndexedFieldsInfo(searcher, fields, numTerms));
    }

    // Add some generally helpful information
    NamedList<Object> info = new SimpleOrderedMap<Object>();
    info.add("key", getFieldFlagsKey());
    info.add(
        "NOTE",
        "Document Frequency (df) is not updated when a document is marked for deletion.  df values include deleted documents.");
    rsp.add("info", info);
    rsp.setHttpCaching(false);
  }
Esempio n. 14
0
  /** @since solr 1.3 */
  void processDelete(SolrQueryRequest req, UpdateRequestProcessor processor, XMLStreamReader parser)
      throws XMLStreamException, IOException {
    // Parse the command
    DeleteUpdateCommand deleteCmd = new DeleteUpdateCommand(req);

    // First look for commitWithin parameter on the request, will be overwritten for individual
    // <delete>'s
    SolrParams params = req.getParams();
    deleteCmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);

    for (int i = 0; i < parser.getAttributeCount(); i++) {
      String attrName = parser.getAttributeLocalName(i);
      String attrVal = parser.getAttributeValue(i);
      if ("fromPending".equals(attrName)) {
        // deprecated
      } else if ("fromCommitted".equals(attrName)) {
        // deprecated
      } else if (UpdateRequestHandler.COMMIT_WITHIN.equals(attrName)) {
        deleteCmd.commitWithin = Integer.parseInt(attrVal);
      } else {
        log.warn("XML element <delete> has invalid XML attr: " + attrName);
      }
    }

    StringBuilder text = new StringBuilder();
    while (true) {
      int event = parser.next();
      switch (event) {
        case XMLStreamConstants.START_ELEMENT:
          String mode = parser.getLocalName();
          if (!("id".equals(mode) || "query".equals(mode))) {
            String msg = "XML element <delete> has invalid XML child element: " + mode;
            log.warn(msg);
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg);
          }
          text.setLength(0);

          if ("id".equals(mode)) {
            for (int i = 0; i < parser.getAttributeCount(); i++) {
              String attrName = parser.getAttributeLocalName(i);
              String attrVal = parser.getAttributeValue(i);
              if (UpdateRequestHandler.VERSION.equals(attrName)) {
                deleteCmd.setVersion(Long.parseLong(attrVal));
              }
              if (UpdateRequest.ROUTE.equals(attrName)) {
                deleteCmd.setRoute(attrVal);
              }
            }
          }
          break;

        case XMLStreamConstants.END_ELEMENT:
          String currTag = parser.getLocalName();
          if ("id".equals(currTag)) {
            deleteCmd.setId(text.toString());
          } else if ("query".equals(currTag)) {
            deleteCmd.setQuery(text.toString());
          } else if ("delete".equals(currTag)) {
            return;
          } else {
            String msg = "XML element <delete> has invalid XML (closing) child element: " + currTag;
            log.warn(msg);
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg);
          }
          processor.processDelete(deleteCmd);
          deleteCmd.clear();
          break;

          // Add everything to the text
        case XMLStreamConstants.SPACE:
        case XMLStreamConstants.CDATA:
        case XMLStreamConstants.CHARACTERS:
          text.append(parser.getText());
          break;
      }
    }
  }
Esempio n. 15
0
  /** @since solr 1.2 */
  void processUpdate(SolrQueryRequest req, UpdateRequestProcessor processor, XMLStreamReader parser)
      throws XMLStreamException, IOException, FactoryConfigurationError {
    AddUpdateCommand addCmd = null;
    SolrParams params = req.getParams();
    while (true) {
      int event = parser.next();
      switch (event) {
        case XMLStreamConstants.END_DOCUMENT:
          parser.close();
          return;

        case XMLStreamConstants.START_ELEMENT:
          String currTag = parser.getLocalName();
          if (currTag.equals(UpdateRequestHandler.ADD)) {
            log.trace("SolrCore.update(add)");

            addCmd = new AddUpdateCommand(req);

            // First look for commitWithin parameter on the request, will be overwritten for
            // individual <add>'s
            addCmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);
            addCmd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);

            for (int i = 0; i < parser.getAttributeCount(); i++) {
              String attrName = parser.getAttributeLocalName(i);
              String attrVal = parser.getAttributeValue(i);
              if (UpdateRequestHandler.OVERWRITE.equals(attrName)) {
                addCmd.overwrite = StrUtils.parseBoolean(attrVal);
              } else if (UpdateRequestHandler.COMMIT_WITHIN.equals(attrName)) {
                addCmd.commitWithin = Integer.parseInt(attrVal);
              } else {
                log.warn("XML element <add> has invalid XML attr: " + attrName);
              }
            }

          } else if ("doc".equals(currTag)) {
            if (addCmd != null) {
              log.trace("adding doc...");
              addCmd.clear();
              addCmd.solrDoc = readDoc(parser);
              processor.processAdd(addCmd);
            } else {
              throw new SolrException(
                  SolrException.ErrorCode.BAD_REQUEST,
                  "Unexpected <doc> tag without an <add> tag surrounding it.");
            }
          } else if (UpdateRequestHandler.COMMIT.equals(currTag)
              || UpdateRequestHandler.OPTIMIZE.equals(currTag)) {
            log.trace("parsing " + currTag);

            CommitUpdateCommand cmd =
                new CommitUpdateCommand(req, UpdateRequestHandler.OPTIMIZE.equals(currTag));
            ModifiableSolrParams mp = new ModifiableSolrParams();

            for (int i = 0; i < parser.getAttributeCount(); i++) {
              String attrName = parser.getAttributeLocalName(i);
              String attrVal = parser.getAttributeValue(i);
              mp.set(attrName, attrVal);
            }

            RequestHandlerUtils.validateCommitParams(mp);
            SolrParams p =
                SolrParams.wrapDefaults(
                    mp, req.getParams()); // default to the normal request params for commit options
            RequestHandlerUtils.updateCommit(cmd, p);

            processor.processCommit(cmd);
          } // end commit
          else if (UpdateRequestHandler.ROLLBACK.equals(currTag)) {
            log.trace("parsing rollback");

            RollbackUpdateCommand cmd = new RollbackUpdateCommand(req);

            processor.processRollback(cmd);
          } // end rollback
          else if (UpdateRequestHandler.DELETE.equals(currTag)) {
            log.trace("parsing delete");
            processDelete(req, processor, parser);
          } // end delete
          break;
      }
    }
  }
  @Override
  protected Directory create(String path, DirContext dirContext) throws IOException {
    LOG.info("creating directory factory for path {}", path);
    Configuration conf = getConf();

    if (metrics == null) {
      metrics = new Metrics(conf);
    }

    boolean blockCacheEnabled = params.getBool(BLOCKCACHE_ENABLED, true);
    boolean blockCacheReadEnabled = params.getBool(BLOCKCACHE_READ_ENABLED, true);
    boolean blockCacheWriteEnabled = params.getBool(BLOCKCACHE_WRITE_ENABLED, true);

    if (blockCacheEnabled) {
      LOG.warn("Block cache is enabled: " + BLOCKCACHE_ENABLED + " == true");
    }

    if (blockCacheWriteEnabled) {
      LOG.warn(
          "Using "
              + BLOCKCACHE_WRITE_ENABLED
              + " is currently buggy and can result in readers seeing a corrupted view of the index.");
    }
    Directory dir = null;

    if (blockCacheEnabled && dirContext != DirContext.META_DATA) {
      int numberOfBlocksPerBank = params.getInt(NUMBEROFBLOCKSPERBANK, 16384);

      int blockSize = BlockDirectory.BLOCK_SIZE;

      int bankCount = params.getInt(BLOCKCACHE_SLAB_COUNT, 1);

      boolean directAllocation = params.getBool(BLOCKCACHE_DIRECT_MEMORY_ALLOCATION, true);

      BlockCache blockCache;

      int slabSize = numberOfBlocksPerBank * blockSize;
      LOG.info(
          "Number of slabs of block cache [{}] with direct memory allocation set to [{}]",
          bankCount,
          directAllocation);
      LOG.info(
          "Block cache target memory usage, slab size of [{}] will allocate [{}] slabs and use ~[{}] bytes",
          new Object[] {slabSize, bankCount, ((long) bankCount * (long) slabSize)});

      int bufferSize = params.getInt("solr.hdfs.blockcache.bufferstore.buffersize", 128);
      int bufferCount = params.getInt("solr.hdfs.blockcache.bufferstore.buffercount", 128 * 128);

      BufferStore.initNewBuffer(bufferSize, bufferCount);
      long totalMemory = (long) bankCount * (long) numberOfBlocksPerBank * (long) blockSize;
      try {
        blockCache = new BlockCache(metrics, directAllocation, totalMemory, slabSize, blockSize);
      } catch (OutOfMemoryError e) {
        throw new RuntimeException(
            "The max direct memory is likely too low.  Either increase it (by adding -XX:MaxDirectMemorySize=<size>g -XX:+UseLargePages to your containers startup args)"
                + " or disable direct allocation using solr.hdfs.blockcache.direct.memory.allocation=false in solrconfig.xml. If you are putting the block cache on the heap,"
                + " your java heap size might not be large enough."
                + " Failed allocating ~"
                + totalMemory / 1000000.0
                + " MB.",
            e);
      }
      Cache cache = new BlockDirectoryCache(blockCache, metrics);
      HdfsDirectory hdfsDirectory = new HdfsDirectory(new Path(path), conf);
      dir =
          new BlockDirectory(
              "solrcore",
              hdfsDirectory,
              cache,
              null,
              blockCacheReadEnabled,
              blockCacheWriteEnabled);
    } else {
      dir = new HdfsDirectory(new Path(path), conf);
    }

    boolean nrtCachingDirectory = params.getBool(NRTCACHINGDIRECTORY_ENABLE, false);
    if (nrtCachingDirectory) {
      double nrtCacheMaxMergeSizeMB = params.getInt(NRTCACHINGDIRECTORY_MAXMERGESIZEMB, 16);
      double nrtCacheMaxCacheMB = params.getInt(NRTCACHINGDIRECTORY_MAXCACHEMB, 192);

      return new NRTCachingDirectory(dir, nrtCacheMaxMergeSizeMB, nrtCacheMaxCacheMB);
    }
    return dir;
  }