Example #1
0
  protected SolrInputDocument parseSolrLogLine(String line, int lineNum) {

    int prev = 0;
    int pos = line.indexOf(" - ");
    if (pos != 5) return null;

    String level = line.substring(0, 5).trim();
    prev = pos + 3;
    pos = line.indexOf(";", prev);
    if (pos == -1) return null;

    String timestamp = line.substring(prev, pos).trim();
    prev = pos + 1;

    pos = line.indexOf(";", prev);
    if (pos == -1) return null;

    Date timestampDt = null;
    try {
      timestampDt = TS_PARSER.parse(timestamp);
    } catch (ParseException pe) {
      log.warn("Failed to parse timestamp at line " + lineNum + " due to: " + pe);
      return null;
    }

    String category = line.substring(prev, pos).trim();

    String message = line.substring(pos + 1).trim();

    // unique ID based on host, line num, log level, and timestamp
    String docId =
        String.format(
                "%s/%d/%s/%d/%s",
                "localhost",
                lineNum,
                level,
                timestampDt.getTime(),
                ExampleDriver.getMD5Hash(message))
            .toLowerCase();

    SolrInputDocument doc = new SolrInputDocument();
    doc.setField("id", docId);
    doc.setField("source_s", "solr"); // name of the app that created the log message
    doc.setField("host_s", "localhost:8983"); // host and port of app that created the log message
    doc.setField("level_s", level);
    doc.setField("timestamp_tdt", timestampDt);
    doc.setField("category_s", category);
    doc.setField("text_en", message);

    log.info("Parsed log message at line " + lineNum + " into SolrInputDocument: " + doc);

    return doc;
  }
Example #2
0
  /** Main method of this example. */
  @Override
  public void runExample(ExampleDriver driver) throws Exception {
    long startMs = System.currentTimeMillis();

    CommandLine cli = driver.getCommandLine();

    // Size of index batch requests to Solr
    //		int batchSize = Integer.parseInt(cli.getOptionValue("batchSize", "500"));

    // Get a connection to Solr cloud using Zookeeper
    String zkHost = cli.getOptionValue("zkhost", ZK_HOST);
    String collectionName = cli.getOptionValue("collection", COLLECTION);
    int zkClientTimeout = Integer.parseInt(cli.getOptionValue("zkClientTimeout", "15000"));

    CloudSolrServer solr = new CloudSolrServer(zkHost);
    solr.setDefaultCollection(collectionName);
    solr.setZkClientTimeout(zkClientTimeout);
    solr.connect();

    int numSent = 0;
    int numSkipped = 0;
    int lineNum = 0;
    SolrInputDocument doc = null;
    String line = null;

    // read file line-by-line
    BufferedReader reader = new BufferedReader(driver.readFile("log"));
    driver.rememberCloseable(reader);

    LogFormat fmt = LogFormat.valueOf(cli.getOptionValue("format", "solr"));

    // process each sighting as a document
    while ((line = reader.readLine()) != null) {
      doc = parseNextDoc(line, ++lineNum, fmt);
      if (doc != null) {
        addDocWithRetry(solr, doc, 10);
        ++numSent;
      } else {
        ++numSkipped;
        continue;
      }

      if (lineNum % 1000 == 0) log.info(String.format("Processed %d lines.", lineNum));
    }

    // hard commit all docs sent
    solr.commit(true, true);

    float tookSecs = Math.round(((System.currentTimeMillis() - startMs) / 1000f) * 100f) / 100f;
    log.info(
        String.format(
            "Sent %d log messages (skipped %d) took %f seconds", numSent, numSkipped, tookSecs));

    // queries to demonstrate results of indexing
    SolrQuery solrQuery = new SolrQuery("*:*");
    solrQuery.setRows(0);
    QueryResponse resp = solr.query(solrQuery);
    SolrDocumentList hits = resp.getResults();
    log.info("Match all docs distributed query found " + hits.getNumFound() + " docs.");

    solrQuery.set("shards", "shard1");
    resp = solr.query(solrQuery);
    hits = resp.getResults();
    log.info(
        "Match all docs non-distributed query to shard1 found " + hits.getNumFound() + " docs.");

    solrQuery.set("shards", "shard2");
    resp = solr.query(solrQuery);
    hits = resp.getResults();
    log.info(
        "Match all docs non-distributed query to shard2 found " + hits.getNumFound() + " docs.");

    solr.shutdown();
  }