Java SolrParams.getBool 예제들, org.apache.solr.common.params.SolrParams.getBool Java 예제들

예제 #1

0

파일 보기

파일: ClusteringComponent.java 프로젝트: jibaro/lucene_solr

 @Override
 public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
   SolrParams params = rb.req.getParams();
   if (!params.getBool(COMPONENT_NAME, false)
       || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
     return;
   }
   sreq.params.remove(COMPONENT_NAME);
   if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
     String fl = sreq.params.get(CommonParams.FL, "*");
     // if fl=* then we don't need check
     if (fl.indexOf('*') >= 0) return;
     Set<String> fields = getSearchClusteringEngine(rb).getFieldsToLoad(rb.req);
     if (fields == null || fields.size() == 0) return;
     StringBuilder sb = new StringBuilder();
     String[] flparams = fl.split("[,\\s]+");
     Set<String> flParamSet = new HashSet<String>(flparams.length);
     for (String flparam : flparams) {
       // no need trim() because of split() by \s+
       flParamSet.add(flparam);
     }
     for (String aFieldToLoad : fields) {
       if (!flParamSet.contains(aFieldToLoad)) {
         sb.append(',').append(aFieldToLoad);
       }
     }
     if (sb.length() > 0) {
       sreq.params.set(CommonParams.FL, fl + sb.toString());
     }
   }
 }

예제 #2

0

파일 보기

파일: ClusteringComponent.java 프로젝트: jibaro/lucene_solr

 @Override
 public void finishStage(ResponseBuilder rb) {
   SolrParams params = rb.req.getParams();
   if (!params.getBool(COMPONENT_NAME, false)
       || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
     return;
   }
   if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
     SearchClusteringEngine engine = getSearchClusteringEngine(rb);
     if (engine != null) {
       SolrDocumentList solrDocList = (SolrDocumentList) rb.rsp.getValues().get("response");
       // TODO: Currently, docIds is set to null in distributed environment.
       // This causes CarrotParams.PRODUCE_SUMMARY doesn't work.
       // To work CarrotParams.PRODUCE_SUMMARY under distributed mode, we can choose either one of:
       // (a) In each shard, ClusteringComponent produces summary and finishStage()
       //     merges these summaries.
       // (b) Adding doHighlighting(SolrDocumentList, ...) method to SolrHighlighter and
       //     making SolrHighlighter uses "external text" rather than stored values to produce
       // snippets.
       Map<SolrDocument, Integer> docIds = null;
       Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
       rb.rsp.add("clusters", clusters);
     } else {
       String name = getClusteringEngineName(rb);
       log.warn("No engine for: " + name);
     }
   }
 }

예제 #3

0

파일 보기

파일: DefaultSolrHighlighter.java 프로젝트: netboynb/search-core

  /**
   * Generates a list of Highlighted query fragments for each item in a list of documents, or
   * returns null if highlighting is disabled.
   *
   * @param docs query results
   * @param query the query
   * @param req the current request
   * @param defaultFields default list of fields to summarize
   * @return NamedList containing a NamedList for each document, which in turns contains sets
   *     (field, summary) pairs.
   */
  @Override
  @SuppressWarnings("unchecked")
  public NamedList<Object> doHighlighting(
      DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
    SolrParams params = req.getParams();
    if (!isHighlightingEnabled(params)) return null;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    NamedList fragments = new SimpleOrderedMap();
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> fset = new HashSet<String>();

    {
      // pre-fetch documents using the Searcher's doc cache
      for (String f : fieldNames) {
        fset.add(f);
      }
      // fetch unique key if one exists.
      SchemaField keyField = schema.getUniqueKeyField();
      if (null != keyField) fset.add(keyField.getName());
    }

    // get FastVectorHighlighter instance out of the processing loop
    FastVectorHighlighter fvh =
        new FastVectorHighlighter(
            // FVH cannot process hl.usePhraseHighlighter parameter per-field basis
            params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
            // FVH cannot process hl.requireFieldMatch parameter per-field basis
            params.getBool(HighlightParams.FIELD_MATCH, false));
    fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE));
    FieldQuery fieldQuery = fvh.getFieldQuery(query, searcher.getIndexReader());

    // Highlight each document
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
      int docId = iterator.nextDoc();
      Document doc = searcher.doc(docId, fset);
      NamedList docSummaries = new SimpleOrderedMap();
      for (String fieldName : fieldNames) {
        fieldName = fieldName.trim();
        if (useFastVectorHighlighter(params, schema, fieldName))
          doHighlightingByFastVectorHighlighter(
              fvh, fieldQuery, req, docSummaries, docId, doc, fieldName);
        else doHighlightingByHighlighter(query, req, docSummaries, docId, doc, fieldName);
      }
      String printId = schema.printableUniqueKey(doc);
      fragments.add(printId == null ? null : printId, docSummaries);
    }
    return fragments;
  }

예제 #4

0

파일 보기

파일: XJoinSearchComponent.java 프로젝트: ntung/BioSolr

  /** Generate external process results (if they have not already been generated). */
  @Override
  public void prepare(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(getName(), false)) {
      return;
    }

    XJoinResults<?> results = (XJoinResults<?>) rb.req.getContext().get(getResultsTag());
    if (results != null) {
      return;
    }

    // generate external process results, by passing 'external' prefixed parameters
    // from the query string to our factory
    String prefix = getName() + "." + XJoinParameters.EXTERNAL_PREFIX + ".";
    ModifiableSolrParams externalParams = new ModifiableSolrParams();
    for (Iterator<String> it = params.getParameterNamesIterator(); it.hasNext(); ) {
      String name = it.next();
      if (name.startsWith(prefix)) {
        externalParams.set(name.substring(prefix.length()), params.get(name));
      }
    }
    results = factory.getResults(externalParams);
    rb.req.getContext().put(getResultsTag(), results);
  }

예제 #5

0

파일 보기

파일: RealTimeGetComponent.java 프로젝트: shaie/lucene-solr

  public void processGetVersions(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;
    SolrParams params = req.getParams();

    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }

    int nVersions = params.getInt("getVersions", -1);
    if (nVersions == -1) return;

    String sync = params.get("sync");
    if (sync != null) {
      processSync(rb, nVersions, sync);
      return;
    }

    UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
    if (ulog == null) return;

    UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates();
    try {
      rb.rsp.add("versions", recentUpdates.getVersions(nVersions));
    } finally {
      recentUpdates.close(); // cache this somehow?
    }
  }

예제 #6

0

파일 보기

파일: GroupCollapseComponentTest.java 프로젝트: rmerizalde/opencommercesearch

  @Test
  public void testGroupCollapseFilterFieldAllFiltered() throws IOException {
    mockResponse(true);
    when(rb.grouping()).thenReturn(true);
    when(params.getBool(GroupCollapseParams.GROUP_COLLAPSE, false)).thenReturn(true);
    when(params.get(GroupCollapseParams.GROUP_COLLAPSE_FL))
        .thenReturn("price,discount,isCloseout,color,colorFamily");
    when(params.get(GroupCollapseParams.GROUP_COLLAPSE_FF)).thenReturn(FIELD_CLOSEOUT);
    component.process(rb);
    verify(rb).grouping();
    verify(rb).getGroupingSpec();
    verify(params).getBool(GroupCollapseParams.GROUP_COLLAPSE, false);
    verify(params).get(GroupCollapseParams.GROUP_COLLAPSE_FL);
    verify(params).get(GroupCollapseParams.GROUP_COLLAPSE_FF);
    verify(params).getParams(GroupCollapseParams.GROUP_COLLAPSE_FQ);
    verifyNoMoreInteractions(rb);
    verifyNoMoreInteractions(params);

    ArgumentCaptor<NamedList> namedListArgument = ArgumentCaptor.forClass(NamedList.class);
    verify(rsp).add(eq("groups_summary"), namedListArgument.capture());

    NamedList groupsSummary = namedListArgument.getValue();
    NamedList productId = (NamedList) groupsSummary.get("productId");
    assertNotNull(productId);
    Set<String> colorFamilies = new HashSet<String>();
    colorFamilies.add("RedColorFamily");
    colorFamilies.add("BlackColorFamily");
    verifyProductSummary(
        (NamedList) productId.get("product1"), 80.0f, 100.0f, 0.0f, 20.0f, 2, colorFamilies);
    colorFamilies = new HashSet<String>();
    colorFamilies.add("OrangeColorFamily");
    colorFamilies.add("BrownColorFamily");
    verifyProductSummary(
        (NamedList) productId.get("product2"), 60.0f, 80.0f, 20.0f, 40.0f, 2, colorFamilies);
  }

예제 #7

0

파일 보기

파일: ClusteringComponent.java 프로젝트: jibaro/lucene_solr

 @Override
 public void prepare(ResponseBuilder rb) throws IOException {
   SolrParams params = rb.req.getParams();
   if (!params.getBool(COMPONENT_NAME, false)) {
     return;
   }
 }

예제 #8

0

파일 보기

파일: CoreAdminHandler.java 프로젝트: esteve/solr-packaging

  /**
   * Handle "UNLOAD" Action
   *
   * @param req
   * @param rsp
   * @return true if a modification has resulted that requires persistance of the CoreContainer
   *     configuration.
   */
  protected boolean handleUnloadAction(SolrQueryRequest req, SolrQueryResponse rsp)
      throws SolrException {
    SolrParams params = req.getParams();
    String cname = params.get(CoreAdminParams.CORE);
    SolrCore core = coreContainer.remove(cname);
    if (core == null) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST, "No such core exists '" + cname + "'");
    }
    if (params.getBool(CoreAdminParams.DELETE_INDEX, false)) {
      core.addCloseHook(
          new CloseHook() {
            @Override
            public void preClose(SolrCore core) {}

            @Override
            public void postClose(SolrCore core) {
              File dataDir = new File(core.getIndexDir());
              for (File file : dataDir.listFiles()) {
                if (!file.delete()) {
                  log.error(file.getAbsolutePath() + " could not be deleted on core unload");
                }
              }
              if (!dataDir.delete())
                log.error(dataDir.getAbsolutePath() + " could not be deleted on core unload");
            }
          });
    }
    core.close();
    return coreContainer.isPersistent();
  }

예제 #9

0

파일 보기

파일: RegexpBoostProcessor.java 프로젝트: smalldirector/heliosearch

 private void initParameters(SolrParams parameters) {
   if (parameters != null) {
     this.setEnabled(parameters.getBool("enabled", true));
     this.inputFieldname = parameters.get(INPUT_FIELD_PARAM, DEFAULT_INPUT_FIELDNAME);
     this.boostFieldname = parameters.get(BOOST_FIELD_PARAM, DEFAULT_BOOST_FIELDNAME);
     this.boostFilename = parameters.get(BOOST_FILENAME_PARAM);
   }
 }

예제 #10

0

파일 보기

파일: GroupCollapseComponentTest.java 프로젝트: rmerizalde/opencommercesearch

 @Test
 public void testNoGroupCollapse() throws IOException {
   when(rb.grouping()).thenReturn(true);
   when(params.getBool(GroupCollapseParams.GROUP_COLLAPSE, false)).thenReturn(false);
   component.process(rb);
   verify(rb).grouping();
   verify(params).getBool(GroupCollapseParams.GROUP_COLLAPSE, false);
   verifyNoMoreInteractions(rb);
   verifyNoMoreInteractions(params);
 }

예제 #11

0

파일 보기

파일: CoreAdminHandler.java 프로젝트: esteve/solr-packaging

  /**
   * Handle "SWAP" action
   *
   * @param req
   * @param rsp
   * @return true if a modification has resulted that requires persistance of the CoreContainer
   *     configuration.
   */
  protected boolean handleSwapAction(SolrQueryRequest req, SolrQueryResponse rsp) {
    final SolrParams params = req.getParams();
    final SolrParams required = params.required();

    final String cname = params.get(CoreAdminParams.CORE);
    boolean doPersist = params.getBool(CoreAdminParams.PERSISTENT, coreContainer.isPersistent());
    String other = required.get(CoreAdminParams.OTHER);
    coreContainer.swap(cname, other);
    return doPersist;
  }

예제 #12

0

파일 보기

파일: IBSimilarityFactory.java 프로젝트: RainingWang/lucene-solr

 @Override
 public void init(SolrParams params) {
   super.init(params);
   discountOverlaps = params.getBool("discountOverlaps", true);
   distribution = parseDistribution(params.get("distribution"));
   lambda = parseLambda(params.get("lambda"));
   normalization =
       DFRSimilarityFactory.parseNormalization(
           params.get("normalization"), params.get("c"), params.get("mu"), params.get("z"));
 }

예제 #13

0

파일 보기

파일: StreamHandler.java 프로젝트: kamaci/lucene-solr

  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    SolrParams params = req.getParams();
    params = adjustParams(params);
    req.setParams(params);

    if (params.get("action") != null) {
      handleAdmin(req, rsp, params);
      return;
    }

    TupleStream tupleStream;

    try {
      tupleStream = this.streamFactory.constructStream(params.get("expr"));
    } catch (Exception e) {
      // Catch exceptions that occur while the stream is being created. This will include streaming
      // expression parse rules.
      SolrException.log(logger, e);
      rsp.add("result-set", new DummyErrorStream(e));

      return;
    }

    int worker = params.getInt("workerID", 0);
    int numWorkers = params.getInt("numWorkers", 1);
    StreamContext context = new StreamContext();
    context.workerID = worker;
    context.numWorkers = numWorkers;
    context.setSolrClientCache(clientCache);
    context.setModelCache(modelCache);
    context.put("core", this.coreName);
    context.put("solr-core", req.getCore());
    tupleStream.setStreamContext(context);

    // if asking for explanation then go get it
    if (params.getBool("explain", false)) {
      rsp.add("explanation", tupleStream.toExplanation(this.streamFactory));
    }

    if (tupleStream instanceof DaemonStream) {
      DaemonStream daemonStream = (DaemonStream) tupleStream;
      if (daemons.containsKey(daemonStream.getId())) {
        daemons.remove(daemonStream.getId()).close();
      }
      daemonStream.setDaemons(daemons);
      daemonStream.open(); // This will start the deamonStream
      daemons.put(daemonStream.getId(), daemonStream);
      rsp.add(
          "result-set",
          new DaemonResponseStream("Deamon:" + daemonStream.getId() + " started on " + coreName));
    } else {
      rsp.add("result-set", new TimerStream(new ExceptionStream(tupleStream)));
    }
  }

예제 #14

0

파일 보기

파일: RealTimeGetComponent.java 프로젝트: shaie/lucene-solr

  public void processGetUpdates(ResponseBuilder rb) throws IOException {
    SolrQueryRequest req = rb.req;
    SolrQueryResponse rsp = rb.rsp;
    SolrParams params = req.getParams();

    if (!params.getBool(COMPONENT_NAME, true)) {
      return;
    }

    String versionsStr = params.get("getUpdates");
    if (versionsStr == null) return;

    UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
    if (ulog == null) return;

    List<String> versions = StrUtils.splitSmart(versionsStr, ",", true);

    List<Object> updates = new ArrayList<Object>(versions.size());

    long minVersion = Long.MAX_VALUE;

    // TODO: get this from cache instead of rebuilding?
    UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates();
    try {
      for (String versionStr : versions) {
        long version = Long.parseLong(versionStr);
        try {
          Object o = recentUpdates.lookup(version);
          if (o == null) continue;

          if (version > 0) {
            minVersion = Math.min(minVersion, version);
          }

          // TODO: do any kind of validation here?
          updates.add(o);

        } catch (SolrException e) {
          log.warn("Exception reading log for updates", e);
        } catch (ClassCastException e) {
          log.warn("Exception reading log for updates", e);
        }
      }

      // Must return all delete-by-query commands that occur after the first add requested
      // since they may apply.
      updates.addAll(recentUpdates.getDeleteByQuery(minVersion));

      rb.rsp.add("updates", updates);

    } finally {
      recentUpdates.close(); // cache this somehow?
    }
  }

예제 #15

0

파일 보기

파일: ClusteringComponent.java 프로젝트: jibaro/lucene_solr

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }
    String name = getClusteringEngineName(rb);
    boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
    if (useResults == true) {
      SearchClusteringEngine engine = getSearchClusteringEngine(rb);
      if (engine != null) {
        DocListAndSet results = rb.getResults();
        Map<SolrDocument, Integer> docIds = Maps.newHashMapWithExpectedSize(results.docList.size());
        SolrDocumentList solrDocList =
            SolrPluginUtils.docListToSolrDocumentList(
                results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
        Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
        rb.rsp.add("clusters", clusters);
      } else {
        log.warn("No engine for: " + name);
      }
    }
    boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
    if (useCollection == true) {
      DocumentClusteringEngine engine = documentClusteringEngines.get(name);
      if (engine != null) {
        boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
        NamedList<?> nl = null;

        // TODO: This likely needs to be made into a background task that runs in an executor
        if (useDocSet == true) {
          nl = engine.cluster(rb.getResults().docSet, params);
        } else {
          nl = engine.cluster(params);
        }
        rb.rsp.add("clusters", nl);
      } else {
        log.warn("No engine for " + name);
      }
    }
  }

예제 #16

0

파일 보기

파일: HdfsDirectoryFactory.java 프로젝트: jonSteve/webarchive-discovery

 @Override
 public void init(NamedList args) {
   params = SolrParams.toSolrParams(args);
   this.hdfsDataDir = params.get(HDFS_HOME);
   if (this.hdfsDataDir != null && this.hdfsDataDir.length() == 0) {
     this.hdfsDataDir = null;
   }
   boolean kerberosEnabled = params.getBool(KERBEROS_ENABLED, false);
   LOG.info("Solr Kerberos Authentication " + (kerberosEnabled ? "enabled" : "disabled"));
   if (kerberosEnabled) {
     initKerberos();
   }
 }

예제 #17

0

파일 보기

파일: ExtractingDocumentLoader.java 프로젝트: markrmiller/lucene-solr-svn2git

  public ExtractingDocumentLoader(
      SolrQueryRequest req,
      UpdateRequestProcessor processor,
      TikaConfig config,
      ParseContextConfig parseContextConfig,
      SolrContentHandlerFactory factory) {
    this.params = req.getParams();
    this.core = req.getCore();
    this.config = config;
    this.parseContextConfig = parseContextConfig;
    this.processor = processor;

    templateAdd = new AddUpdateCommand(req);
    templateAdd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
    templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);

    // this is lightweight
    autoDetectParser = new AutoDetectParser(config);
    this.factory = factory;

    ignoreTikaException = params.getBool(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
  }

예제 #18

0

파일 보기

파일: XJoinSearchComponent.java 프로젝트: ntung/BioSolr

  /**
   * Match up search results and add corresponding data for each result (if we have query results
   * available).
   */
  @Override
  @SuppressWarnings({"rawtypes", "unchecked"})
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(getName(), false)) {
      return;
    }

    XJoinResults<?> results = (XJoinResults<?>) rb.req.getContext().get(getResultsTag());
    if (results == null || rb.getResults() == null) {
      return;
    }

    // general results
    FieldAppender appender =
        new FieldAppender(
            (String) params.get(getName() + "." + XJoinParameters.RESULTS_FIELD_LIST, "*"));
    NamedList general = appender.addNamedList(rb.rsp.getValues(), getName(), results);

    // per join id results
    FieldAppender docAppender =
        new FieldAppender(
            (String) params.get(getName() + "." + XJoinParameters.DOC_FIELD_LIST, "*"));
    Set<String> joinFields = new HashSet<>();
    joinFields.add(joinField);

    List<String> joinIds = new ArrayList<>();
    for (Iterator<Integer> it = docIterator(rb); it.hasNext(); ) {
      StoredDocument doc = rb.req.getSearcher().doc(it.next(), joinFields);
      for (String joinId : doc.getValues(joinField)) {
        if (!joinIds.contains(joinId)) {
          joinIds.add(joinId);
        }
      }
    }

    for (String joinId : joinIds) {
      Object object = results.getResult(joinId);
      if (object == null) continue;
      NamedList external = new NamedList<>();
      general.add("external", external);
      external.add("joinId", joinId);
      if (object instanceof Iterable) {
        for (Object item : (Iterable) object) {
          docAppender.addNamedList(external, "doc", item);
        }
      } else {
        docAppender.addNamedList(external, "doc", object);
      }
    }
  }

예제 #19

0

파일 보기

파일: TestCloudPivotFacet.java 프로젝트: nknize/lucene-solr

  /**
   * Executes a query and compares the results with the data available in the {@link PivotField}
   * constraint -- this method is not recursive, and doesn't check anything about the sub-pivots (if
   * any).
   *
   * @param pivotName pivot name
   * @param constraint filters on pivot
   * @param params base solr parameters
   */
  private void assertPivotData(String pivotName, PivotField constraint, SolrParams params)
      throws SolrServerException, IOException {

    SolrParams p = SolrParams.wrapDefaults(params("rows", "0"), params);
    QueryResponse res = cloudClient.query(p);
    String msg = pivotName + ": " + p;

    assertNumFound(msg, constraint.getCount(), res);

    if (p.getBool(StatsParams.STATS, false)) {
      // only check stats if stats expected
      assertPivotStats(msg, constraint, res);
    }
  }

예제 #20

0

파일 보기

파일: GroupCollapseComponentTest.java 프로젝트: rmerizalde/opencommercesearch

  @Test
  public void testGroupCollapseNoGroups() throws IOException {

    NamedList values = mock(NamedList.class);
    when(rsp.getValues()).thenReturn(values);
    when(values.get("grouped")).thenReturn(null);

    when(rb.grouping()).thenReturn(true);
    when(params.getBool(GroupCollapseParams.GROUP_COLLAPSE, false)).thenReturn(true);
    when(params.get(GroupCollapseParams.GROUP_COLLAPSE_FL)).thenReturn("price,discount,isCloseout");
    component.process(rb);

    verify(rsp, never()).add(anyString(), anyObject());
  }

예제 #21

0

파일 보기

파일: ClusteringComponent.java 프로젝트: sleonardo/Solr

  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }
    String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
    boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
    if (useResults == true) {
      SearchClusteringEngine engine = searchClusteringEngines.get(name);
      if (engine != null) {
        DocListAndSet results = rb.getResults();
        Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req);
        rb.rsp.add("clusters", clusters);
      } else {
        log.warn("No engine for: " + name);
      }
    }
    boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
    if (useCollection == true) {
      DocumentClusteringEngine engine = documentClusteringEngines.get(name);
      if (engine != null) {
        boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
        NamedList nl = null;

        // TODO: This likely needs to be made into a background task that runs in an executor
        if (useDocSet == true) {
          nl = engine.cluster(rb.getResults().docSet, params);
        } else {
          nl = engine.cluster(params);
        }
        rb.rsp.add("clusters", nl);
      } else {
        log.warn("No engine for " + name);
      }
    }
  }

예제 #22

0

파일 보기

파일: TermsComponent.java 프로젝트: ByteInternet/lucene-solr

  @Override
  public void prepare(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (params.getBool(TermsParams.TERMS, false)) {
      rb.doTerms = true;
    }

    // TODO: temporary... this should go in a different component.
    String shards = params.get(ShardParams.SHARDS);
    if (shards != null) {
      if (params.get(ShardParams.SHARDS_QT) == null) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "No shards.qt parameter specified");
      }
      List<String> lst = StrUtils.splitSmart(shards, ",", true);
      rb.shards = lst.toArray(new String[lst.size()]);
    }
  }

예제 #23

0

파일 보기

파일: ResponseLogComponent.java 프로젝트: Gukie/lucene-solr

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) return;

    SolrIndexSearcher searcher = rb.req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    if (schema.getUniqueKeyField() == null) return;

    ResultContext rc = (ResultContext) rb.rsp.getResponse();

    DocList docs = rc.getDocList();
    if (docs.hasScores()) {
      processScores(rb, docs, schema, searcher);
    } else {
      processIds(rb, docs, schema, searcher);
    }
  }

예제 #24

0

파일 보기

파일: SimpleFacets.java 프로젝트: kleopatra999/lucene-solr

  public NamedList getHeatmapCounts() throws IOException, SyntaxError {
    final NamedList<Object> resOuter = new SimpleOrderedMap<>();
    String[] unparsedFields = rb.req.getParams().getParams(FacetParams.FACET_HEATMAP);
    if (unparsedFields == null || unparsedFields.length == 0) {
      return resOuter;
    }
    if (global.getBool(GroupParams.GROUP_FACET, false)) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "Heatmaps can't be used with " + GroupParams.GROUP_FACET);
    }
    for (String unparsedField : unparsedFields) {
      final ParsedParams parsed =
          parseParams(
              FacetParams.FACET_HEATMAP, unparsedField); // populates facetValue, rb, params, docs

      resOuter.add(
          parsed.key,
          SpatialHeatmapFacets.getHeatmapForField(
              parsed.key, parsed.facetValue, rb, parsed.params, parsed.docs));
    }
    return resOuter;
  }

예제 #25

0

파일 보기

파일: CSVLoaderBase.java 프로젝트: markrmiller/lucene-solr-svn2git

  CSVLoaderBase(SolrQueryRequest req, UpdateRequestProcessor processor) {
    this.processor = processor;
    this.params = req.getParams();
    this.literals = new HashMap<>();

    templateAdd = new AddUpdateCommand(req);
    templateAdd.overwrite = params.getBool(OVERWRITE, true);
    templateAdd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);

    strategy =
        new CSVStrategy(
            ',',
            '"',
            CSVStrategy.COMMENTS_DISABLED,
            CSVStrategy.ESCAPE_DISABLED,
            false,
            false,
            false,
            true);
    String sep = params.get(SEPARATOR);
    if (sep != null) {
      if (sep.length() != 1)
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'");
      strategy.setDelimiter(sep.charAt(0));
    }

    String encapsulator = params.get(ENCAPSULATOR);
    if (encapsulator != null) {
      if (encapsulator.length() != 1)
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'");
    }

    String escape = params.get(ESCAPE);
    if (escape != null) {
      if (escape.length() != 1)
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'");
    }
    rowId = params.get(ROW_ID);
    rowIdOffset = params.getInt(ROW_ID_OFFSET, 0);

    // if only encapsulator or escape is set, disable the other escaping mechanism
    if (encapsulator == null && escape != null) {
      strategy.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
      strategy.setEscape(escape.charAt(0));
    } else {
      if (encapsulator != null) {
        strategy.setEncapsulator(encapsulator.charAt(0));
      }
      if (escape != null) {
        char ch = escape.charAt(0);
        strategy.setEscape(ch);
        if (ch == '\\') {
          // If the escape is the standard backslash, then also enable
          // unicode escapes (it's harmless since 'u' would not otherwise
          // be escaped.
          strategy.setUnicodeEscapeInterpretation(true);
        }
      }
    }

    String fn = params.get(FIELDNAMES);
    fieldnames = fn != null ? commaSplit.split(fn, -1) : null;

    Boolean hasHeader = params.getBool(HEADER);

    skipLines = params.getInt(SKIPLINES, 0);

    if (fieldnames == null) {
      if (null == hasHeader) {
        // assume the file has the headers if they aren't supplied in the args
        hasHeader = true;
      } else if (!hasHeader) {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST,
            "CSVLoader: must specify fieldnames=<fields>* or header=true");
      }
    } else {
      // if the fieldnames were supplied and the file has a header, we need to
      // skip over that header.
      if (hasHeader != null && hasHeader) skipLines++;

      prepareFields();
    }
  }

예제 #26

0

파일 보기

파일: TermsComponent.java 프로젝트: smalldirector/heliosearch

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false)) return;

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0) return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
      limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort =
        !TermsParams.TERMS_SORT_INDEX.equals(
            params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
      freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
      NamedList<Integer> fieldTerms = new NamedList<>();
      termsResult.add(field, fieldTerms);

      Terms terms = lfields == null ? null : lfields.terms(field);
      if (terms == null) {
        // no terms for this field
        continue;
      }

      FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
      if (ft == null) ft = new StrField();

      // prefix must currently be text
      BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

      BytesRef upperBytes = null;
      if (upperStr != null) {
        upperBytes = new BytesRef();
        ft.readableToIndexed(upperStr, upperBytes);
      }

      BytesRef lowerBytes;
      if (lowerStr == null) {
        // If no lower bound was specified, use the prefix
        lowerBytes = prefixBytes;
      } else {
        lowerBytes = new BytesRef();
        if (raw) {
          // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
          // perhaps we detect if the FieldType is non-character and expect hex if so?
          lowerBytes = new BytesRef(lowerStr);
        } else {
          lowerBytes = new BytesRef();
          ft.readableToIndexed(lowerStr, lowerBytes);
        }
      }

      TermsEnum termsEnum = terms.iterator(null);
      BytesRef term = null;

      if (lowerBytes != null) {
        if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
          termsEnum = null;
        } else {
          term = termsEnum.term();
          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerIncl == false && term.equals(lowerBytes)) {
            term = termsEnum.next();
          }
        }
      } else {
        // position termsEnum on first term
        term = termsEnum.next();
      }

      int i = 0;
      BoundedTreeSet<CountPair<BytesRef, Integer>> queue =
          (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
      CharsRef external = new CharsRef();
      while (term != null && (i < limit || sort)) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // stop if the prefix doesn't match
        if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break;

        if (pattern != null) {
          // indexed text or external text?
          // TODO: support "raw" mode?
          ft.indexedToReadable(term, external);
          externalized = true;
          if (!pattern.matcher(external).matches()) {
            term = termsEnum.next();
            continue;
          }
        }

        if (upperBytes != null) {
          int upperCmp = term.compareTo(upperBytes);
          // if we are past the upper term, or equal to it (when don't include upper) then stop.
          if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
        }

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        if (docFreq >= freqmin && docFreq <= freqmax) {
          // add the term to the list
          if (sort) {
            queue.add(new CountPair<>(BytesRef.deepCopyOf(term), docFreq));
          } else {

            // TODO: handle raw somehow
            if (!externalized) {
              ft.indexedToReadable(term, external);
            }
            fieldTerms.add(external.toString(), docFreq);
            i++;
          }
        }

        term = termsEnum.next();
      }

      if (sort) {
        for (CountPair<BytesRef, Integer> item : queue) {
          if (i >= limit) break;
          ft.indexedToReadable(item.key, external);
          fieldTerms.add(external.toString(), item.val);
          i++;
        }
      }
    }
  }

예제 #27

0

파일 보기

파일: XMLLoader.java 프로젝트: Photobucket/Solbase-Solr

  /** @since solr 1.3 */
  void processDelete(UpdateRequestProcessor processor, XMLStreamReader parser, SolrParams params)
      throws XMLStreamException, IOException {
    // Parse the command
    DeleteUpdateCommand deleteCmd = new DeleteUpdateCommand();
    deleteCmd.fromPending = true;
    deleteCmd.fromCommitted = true;

    Boolean updateStore = params.getBool(UpdateParams.UPDATE_STORE);
    if (updateStore != null) {
      deleteCmd.setUpdateStore(updateStore.booleanValue());
    }

    for (int i = 0; i < parser.getAttributeCount(); i++) {
      String attrName = parser.getAttributeLocalName(i);
      String attrVal = parser.getAttributeValue(i);
      if ("fromPending".equals(attrName)) {
        deleteCmd.fromPending = StrUtils.parseBoolean(attrVal);
      } else if ("fromCommitted".equals(attrName)) {
        deleteCmd.fromCommitted = StrUtils.parseBoolean(attrVal);
      } else {
        XmlUpdateRequestHandler.log.warn("unexpected attribute delete/@" + attrName);
      }
    }

    StringBuilder text = new StringBuilder();
    while (true) {
      int event = parser.next();
      switch (event) {
        case XMLStreamConstants.START_ELEMENT:
          String mode = parser.getLocalName();
          if (!("id".equals(mode) || "query".equals(mode))) {
            XmlUpdateRequestHandler.log.warn("unexpected XML tag /delete/" + mode);
            throw new SolrException(
                SolrException.ErrorCode.BAD_REQUEST, "unexpected XML tag /delete/" + mode);
          }
          text.setLength(0);
          break;

        case XMLStreamConstants.END_ELEMENT:
          String currTag = parser.getLocalName();
          if ("id".equals(currTag)) {
            deleteCmd.id = text.toString();
          } else if ("query".equals(currTag)) {
            deleteCmd.query = text.toString();
          } else if ("delete".equals(currTag)) {
            return;
          } else {
            XmlUpdateRequestHandler.log.warn("unexpected XML tag /delete/" + currTag);
            throw new SolrException(
                SolrException.ErrorCode.BAD_REQUEST, "unexpected XML tag /delete/" + currTag);
          }
          processor.processDelete(deleteCmd);
          deleteCmd.id = null;
          deleteCmd.query = null;
          break;

          // Add everything to the text
        case XMLStreamConstants.SPACE:
        case XMLStreamConstants.CDATA:
        case XMLStreamConstants.CHARACTERS:
          text.append(parser.getText());
          break;
      }
    }
  }

예제 #28

0

파일 보기

파일: ExtractingDocumentLoader.java 프로젝트: markrmiller/lucene-solr-svn2git

  @Override
  public void load(
      SolrQueryRequest req,
      SolrQueryResponse rsp,
      ContentStream stream,
      UpdateRequestProcessor processor)
      throws Exception {
    Parser parser = null;
    String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null);
    if (streamType != null) {
      // Cache?  Parsers are lightweight to construct and thread-safe, so I'm told
      MediaType mt = MediaType.parse(streamType.trim().toLowerCase(Locale.ROOT));
      parser = new DefaultParser(config.getMediaTypeRegistry()).getParsers().get(mt);
    } else {
      parser = autoDetectParser;
    }
    if (parser != null) {
      Metadata metadata = new Metadata();

      // If you specify the resource name (the filename, roughly) with this parameter,
      // then Tika can make use of it in guessing the appropriate MIME type:
      String resourceName = req.getParams().get(ExtractingParams.RESOURCE_NAME, null);
      if (resourceName != null) {
        metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, resourceName);
      }
      // Provide stream's content type as hint for auto detection
      if (stream.getContentType() != null) {
        metadata.add(HttpHeaders.CONTENT_TYPE, stream.getContentType());
      }

      InputStream inputStream = null;
      try {
        inputStream = stream.getStream();
        metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
        metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
        metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
        metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
        // HtmlParser and TXTParser regard Metadata.CONTENT_ENCODING in metadata
        String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
        if (charset != null) {
          metadata.add(HttpHeaders.CONTENT_ENCODING, charset);
        }

        String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
        boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
        SolrContentHandler handler =
            factory.createSolrContentHandler(metadata, params, req.getSchema());
        ContentHandler parsingHandler = handler;

        StringWriter writer = null;
        BaseMarkupSerializer serializer = null;
        if (extractOnly == true) {
          String extractFormat = params.get(ExtractingParams.EXTRACT_FORMAT, "xml");
          writer = new StringWriter();
          if (extractFormat.equals(TEXT_FORMAT)) {
            serializer = new TextSerializer();
            serializer.setOutputCharStream(writer);
            serializer.setOutputFormat(new OutputFormat("Text", "UTF-8", true));
          } else {
            serializer = new XMLSerializer(writer, new OutputFormat("XML", "UTF-8", true));
          }
          if (xpathExpr != null) {
            Matcher matcher = PARSER.parse(xpathExpr);
            serializer
                .startDocument(); // The MatchingContentHandler does not invoke startDocument.  See
                                  // http://tika.markmail.org/message/kknu3hw7argwiqin
            parsingHandler = new MatchingContentHandler(serializer, matcher);
          } else {
            parsingHandler = serializer;
          }
        } else if (xpathExpr != null) {
          Matcher matcher = PARSER.parse(xpathExpr);
          parsingHandler = new MatchingContentHandler(handler, matcher);
        } // else leave it as is

        try {
          // potentially use a wrapper handler for parsing, but we still need the SolrContentHandler
          // for getting the document.
          ParseContext context = parseContextConfig.create();

          context.set(Parser.class, parser);
          context.set(HtmlMapper.class, MostlyPassthroughHtmlMapper.INSTANCE);

          // Password handling
          RegexRulesPasswordProvider epp = new RegexRulesPasswordProvider();
          String pwMapFile = params.get(ExtractingParams.PASSWORD_MAP_FILE);
          if (pwMapFile != null && pwMapFile.length() > 0) {
            InputStream is = req.getCore().getResourceLoader().openResource(pwMapFile);
            if (is != null) {
              log.debug("Password file supplied: " + pwMapFile);
              epp.parse(is);
            }
          }
          context.set(PasswordProvider.class, epp);
          String resourcePassword = params.get(ExtractingParams.RESOURCE_PASSWORD);
          if (resourcePassword != null) {
            epp.setExplicitPassword(resourcePassword);
            log.debug("Literal password supplied for file " + resourceName);
          }
          parser.parse(inputStream, parsingHandler, metadata, context);
        } catch (TikaException e) {
          if (ignoreTikaException)
            log.warn(
                new StringBuilder("skip extracting text due to ")
                    .append(e.getLocalizedMessage())
                    .append(". metadata=")
                    .append(metadata.toString())
                    .toString());
          else throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
        if (extractOnly == false) {
          addDoc(handler);
        } else {
          // serializer is not null, so we need to call endDoc on it if using xpath
          if (xpathExpr != null) {
            serializer.endDocument();
          }
          rsp.add(stream.getName(), writer.toString());
          writer.close();
          String[] names = metadata.names();
          NamedList metadataNL = new NamedList();
          for (int i = 0; i < names.length; i++) {
            String[] vals = metadata.getValues(names[i]);
            metadataNL.add(names[i], vals);
          }
          rsp.add(stream.getName() + "_metadata", metadataNL);
        }
      } catch (SAXException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      } finally {
        IOUtils.closeQuietly(inputStream);
      }
    } else {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "Stream type of "
              + streamType
              + " didn't match any known parsers.  Please supply the "
              + ExtractingParams.STREAM_TYPE
              + " parameter.");
    }
  }

예제 #29

0

파일 보기

파일: TermsComponent.java 프로젝트: ByteInternet/lucene-solr

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (params.getBool(TermsParams.TERMS, false)) {
      String lowerStr = params.get(TermsParams.TERMS_LOWER, null);
      String[] fields = params.getParams(TermsParams.TERMS_FIELD);
      if (fields != null && fields.length > 0) {
        NamedList terms = new SimpleOrderedMap();
        rb.rsp.add("terms", terms);
        int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
        if (limit < 0) {
          limit = Integer.MAX_VALUE;
        }
        String upperStr = params.get(TermsParams.TERMS_UPPER);
        boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
        boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
        boolean sort =
            !TermsParams.TERMS_SORT_INDEX.equals(
                params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
        int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
        int freqmax =
            params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
        if (freqmax < 0) {
          freqmax = Integer.MAX_VALUE;
        }
        String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
        String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
        Pattern pattern =
            regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

        boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
        for (int j = 0; j < fields.length; j++) {
          String field = StringHelper.intern(fields[j]);
          FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
          if (ft == null) ft = new StrField();

          // If no lower bound was specified, use the prefix
          String lower = lowerStr == null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr));
          if (lower == null) lower = "";
          String upper = upperStr == null ? null : (raw ? upperStr : ft.toInternal(upperStr));

          Term lowerTerm = new Term(field, lower);
          Term upperTerm = upper == null ? null : new Term(field, upper);

          TermEnum termEnum =
              rb.req
                  .getSearcher()
                  .getReader()
                  .terms(lowerTerm); // this will be positioned ready to go
          int i = 0;
          BoundedTreeSet<CountPair<String, Integer>> queue =
              (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null);
          NamedList fieldTerms = new NamedList();
          terms.add(field, fieldTerms);
          Term lowerTestTerm = termEnum.term();

          // Only advance the enum if we are excluding the lower bound and the lower Term actually
          // matches
          if (lowerTestTerm != null
              && lowerIncl == false
              && lowerTestTerm.field() == field // intern'd comparison
              && lowerTestTerm.text().equals(lower)) {
            termEnum.next();
          }

          while (i < limit || sort) {

            Term theTerm = termEnum.term();

            // check for a different field, or the end of the index.
            if (theTerm == null || field != theTerm.field()) // intern'd comparison
            break;

            String indexedText = theTerm.text();

            // stop if the prefix doesn't match
            if (prefix != null && !indexedText.startsWith(prefix)) break;

            if (pattern != null && !pattern.matcher(indexedText).matches()) {
              termEnum.next();
              continue;
            }

            if (upperTerm != null) {
              int upperCmp = theTerm.compareTo(upperTerm);
              // if we are past the upper term, or equal to it (when don't include upper) then stop.
              if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are
            // satisfied.
            int docFreq = termEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
              // add the term to the list
              String label = raw ? indexedText : ft.indexedToReadable(indexedText);
              if (sort) {
                queue.add(new CountPair<String, Integer>(label, docFreq));
              } else {
                fieldTerms.add(label, docFreq);
                i++;
              }
            }

            termEnum.next();
          }

          termEnum.close();

          if (sort) {
            for (CountPair<String, Integer> item : queue) {
              if (i < limit) {
                fieldTerms.add(item.key, item.val);
                i++;
              } else {
                break;
              }
            }
          }
        }
      } else {
        throw new SolrException(
            SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
      }
    }
  }

예제 #30

0

파일 보기

파일: TermVectorComponent.java 프로젝트: netboynb/search-core

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
      return;
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
      uniqFieldName = keyField.getName();
      termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    // figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    // boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    // short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
      allFields.termFreq = true;
      allFields.positions = true;
      allFields.offsets = true;
      allFields.docFreq = true;
      allFields.tfIdf = true;
    }

    // Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
      // we have specific fields to retrieve, or no fields
      for (String field : fields) {

        // workarround SOLR-3523
        if (null == field || "score".equals(field)) continue;

        // we don't want to issue warnings about the uniqueKey field
        // since it can cause lots of confusion in distributed requests
        // where the uniqueKey field is injected into the fl for merging
        final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

        SchemaField sf = schema.getFieldOrNull(field);
        if (sf != null) {
          if (sf.storeTermVector()) {
            FieldOptions option = fieldOptions.get(field);
            if (option == null) {
              option = new FieldOptions();
              option.fieldName = field;
              fieldOptions.put(field, option);
            }
            // get the per field mappings
            option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
            option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
            option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
            // Validate these are even an option
            option.positions =
                params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
            if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
              noPos.add(field);
            }
            option.offsets =
                params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
            if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
              noOff.add(field);
            }
          } else { // field doesn't have term vectors
            if (!fieldIsUniqueKey) noTV.add(field);
          }
        } else {
          // field doesn't exist
          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
        }
      }
    } // else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings
    // are added that might be differnet between shards, finishStage() needs
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
      warnings.add("noTermVectors", noTV);
      hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
      warnings.add("noPositions", noPos);
      hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
      warnings.add("noOffsets", noOff);
      hasWarnings = true;
    }
    if (hasWarnings) {
      termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
      iter = docIds.iterator();
    } else {
      DocList list = listAndSet.docList;
      iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    // the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    // Only load the id field to get the uniqueKey of that
    // field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue =
        new StoredFieldVisitor() {
          @Override
          public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
          }

          @Override
          public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
          }

          @Override
          public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
          }

          @Override
          public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
          }
        };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
      Integer docId = iter.next();
      NamedList<Object> docNL = new NamedList<Object>();

      if (keyField != null) {
        reader.document(docId, getUniqValue);
        String uniqVal = null;
        if (uniqValues.size() != 0) {
          uniqVal = uniqValues.get(0);
          uniqValues.clear();
          docNL.add("uniqueKey", uniqVal);
          termVectors.add(uniqVal, docNL);
        }
      } else {
        // support for schemas w/o a unique key,
        termVectors.add("doc-" + docId, docNL);
      }

      if (null != fields) {
        for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
          final String field = entry.getKey();
          final Terms vector = reader.getTermVector(docId, field);
          if (vector != null) {
            termsEnum = vector.iterator(termsEnum);
            mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
          }
        }
      } else {
        // extract all fields
        final Fields vectors = reader.getTermVectors(docId);
        for (String field : vectors) {
          Terms terms = vectors.terms(field);
          if (terms != null) {
            termsEnum = terms.iterator(termsEnum);
            mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
          }
        }
      }
    }
  }