@Override public AtomicLong next() { if (done) { return fileCounter; } try { URI uri = input.poll(); if (uri != null) { push(uri); } else { done = true; } fileCounter.incrementAndGet(); } catch (Exception e) { logger.error(e.getMessage(), e); done = true; } return fileCounter; }
public static void main(String[] args) { int exitcode = 0; try { OptionParser parser = new OptionParser() { { accepts("elasticsearch").withRequiredArg().ofType(String.class).required(); accepts("index").withRequiredArg().ofType(String.class).required(); accepts("type").withRequiredArg().ofType(String.class).required(); accepts("maxbulkactions").withRequiredArg().ofType(Integer.class).defaultsTo(1000); accepts("maxconcurrentbulkrequests") .withRequiredArg() .ofType(Integer.class) .defaultsTo(4 * Runtime.getRuntime().availableProcessors()); accepts("mock").withOptionalArg().ofType(Boolean.class).defaultsTo(Boolean.FALSE); accepts("path").withRequiredArg().ofType(String.class).required(); accepts("pattern") .withRequiredArg() .ofType(String.class) .required() .defaultsTo("*.txt"); accepts("threads").withRequiredArg().ofType(Integer.class).defaultsTo(1); accepts("help"); } }; final OptionSet options = parser.parse(args); if (options.hasArgument("help")) { System.err.println( "Help for " + Medline.class.getCanonicalName() + lf + " --help print this help message" + lf + " --elasticsearch <uri> Elasticesearch URI" + lf + " --index <index> Elasticsearch index name" + lf + " --type <type> Elasticsearch type name" + lf + " --maxbulkactions <n> the number of bulk actions per request (optional, default: 1000)" + " --maxconcurrentbulkrequests <n>the number of concurrent bulk requests (optional, default: 4 * cpu cores)" + " --path <path> a file path from where the input files are recursively collected (required)" + lf + " --pattern <pattern> a regex for selecting matching file names for input (default: *.txt)" + lf + " --threads <n> the number of threads (optional, default: <num-of=cpus)"); System.exit(1); } input = new Finder((String) options.valueOf("pattern")) .find((String) options.valueOf("path")) .getURIs(); final Integer threads = (Integer) options.valueOf("threads"); logger.info("found {} input files", input.size()); URI esURI = URI.create((String) options.valueOf("elasticsearch")); index = (String) options.valueOf("index"); type = (String) options.valueOf("type"); int maxbulkactions = (Integer) options.valueOf("maxbulkactions"); int maxconcurrentbulkrequests = (Integer) options.valueOf("maxconcurrentbulkrequests"); boolean mock = (Boolean) options.valueOf("mock"); final IngestClient es = mock ? new MockIngestClient() : new IngestClient(); es.maxBulkActions(maxbulkactions) .maxConcurrentBulkRequests(maxconcurrentbulkrequests) .newClient(esURI) .waitForCluster(ClusterHealthStatus.YELLOW, TimeValue.timeValueSeconds(30)); logger.info("creating new index ..."); es.setIndex(index).setType(type).newIndex(); logger.info("... new index created"); final ResourceSink sink = new ResourceSink(es); ImportService service = new ImportService() .threads(threads) .factory( new ImporterFactory() { @Override public Importer newImporter() { return new SpringerCitations(sink); } }) .execute(); logger.info( "finished, number of files = {}, resources indexed = {}", fileCounter, sink.getCounter()); service.shutdown(); logger.info("service shutdown"); es.shutdown(); logger.info("elasticsearch client shutdown"); } catch (IOException | InterruptedException | ExecutionException e) { logger.error(e.getMessage(), e); exitcode = 1; } System.exit(exitcode); }