protected SolrInputDocument parseSolrLogLine(String line, int lineNum) { int prev = 0; int pos = line.indexOf(" - "); if (pos != 5) return null; String level = line.substring(0, 5).trim(); prev = pos + 3; pos = line.indexOf(";", prev); if (pos == -1) return null; String timestamp = line.substring(prev, pos).trim(); prev = pos + 1; pos = line.indexOf(";", prev); if (pos == -1) return null; Date timestampDt = null; try { timestampDt = TS_PARSER.parse(timestamp); } catch (ParseException pe) { log.warn("Failed to parse timestamp at line " + lineNum + " due to: " + pe); return null; } String category = line.substring(prev, pos).trim(); String message = line.substring(pos + 1).trim(); // unique ID based on host, line num, log level, and timestamp String docId = String.format( "%s/%d/%s/%d/%s", "localhost", lineNum, level, timestampDt.getTime(), ExampleDriver.getMD5Hash(message)) .toLowerCase(); SolrInputDocument doc = new SolrInputDocument(); doc.setField("id", docId); doc.setField("source_s", "solr"); // name of the app that created the log message doc.setField("host_s", "localhost:8983"); // host and port of app that created the log message doc.setField("level_s", level); doc.setField("timestamp_tdt", timestampDt); doc.setField("category_s", category); doc.setField("text_en", message); log.info("Parsed log message at line " + lineNum + " into SolrInputDocument: " + doc); return doc; }
/** Main method of this example. */ @Override public void runExample(ExampleDriver driver) throws Exception { long startMs = System.currentTimeMillis(); CommandLine cli = driver.getCommandLine(); // Size of index batch requests to Solr // int batchSize = Integer.parseInt(cli.getOptionValue("batchSize", "500")); // Get a connection to Solr cloud using Zookeeper String zkHost = cli.getOptionValue("zkhost", ZK_HOST); String collectionName = cli.getOptionValue("collection", COLLECTION); int zkClientTimeout = Integer.parseInt(cli.getOptionValue("zkClientTimeout", "15000")); CloudSolrServer solr = new CloudSolrServer(zkHost); solr.setDefaultCollection(collectionName); solr.setZkClientTimeout(zkClientTimeout); solr.connect(); int numSent = 0; int numSkipped = 0; int lineNum = 0; SolrInputDocument doc = null; String line = null; // read file line-by-line BufferedReader reader = new BufferedReader(driver.readFile("log")); driver.rememberCloseable(reader); LogFormat fmt = LogFormat.valueOf(cli.getOptionValue("format", "solr")); // process each sighting as a document while ((line = reader.readLine()) != null) { doc = parseNextDoc(line, ++lineNum, fmt); if (doc != null) { addDocWithRetry(solr, doc, 10); ++numSent; } else { ++numSkipped; continue; } if (lineNum % 1000 == 0) log.info(String.format("Processed %d lines.", lineNum)); } // hard commit all docs sent solr.commit(true, true); float tookSecs = Math.round(((System.currentTimeMillis() - startMs) / 1000f) * 100f) / 100f; log.info( String.format( "Sent %d log messages (skipped %d) took %f seconds", numSent, numSkipped, tookSecs)); // queries to demonstrate results of indexing SolrQuery solrQuery = new SolrQuery("*:*"); solrQuery.setRows(0); QueryResponse resp = solr.query(solrQuery); SolrDocumentList hits = resp.getResults(); log.info("Match all docs distributed query found " + hits.getNumFound() + " docs."); solrQuery.set("shards", "shard1"); resp = solr.query(solrQuery); hits = resp.getResults(); log.info( "Match all docs non-distributed query to shard1 found " + hits.getNumFound() + " docs."); solrQuery.set("shards", "shard2"); resp = solr.query(solrQuery); hits = resp.getResults(); log.info( "Match all docs non-distributed query to shard2 found " + hits.getNumFound() + " docs."); solr.shutdown(); }