Esempio n. 1
0
  public void resolveNames(Long batchSize) {
    StopWatch watchForEntireRun = new StopWatch();
    watchForEntireRun.start();
    StopWatch watchForBatch = new StopWatch();
    watchForBatch.start();
    Long count = 0L;

    Index<Node> studyIndex = graphService.index().forNodes("studies");
    IndexHits<Node> studies = studyIndex.query("title", "*");
    for (Node studyNode : studies) {
      final Study study1 = new Study(studyNode);
      final Iterable<Relationship> specimens = study1.getSpecimens();
      for (Relationship collected : specimens) {
        Specimen specimen = new Specimen(collected.getEndNode());
        final Relationship classifiedAs =
            specimen
                .getUnderlyingNode()
                .getSingleRelationship(RelTypes.CLASSIFIED_AS, Direction.OUTGOING);
        if (classifiedAs == null) {
          final Relationship describedAs =
              specimen
                  .getUnderlyingNode()
                  .getSingleRelationship(RelTypes.ORIGINALLY_DESCRIBED_AS, Direction.OUTGOING);
          final TaxonNode describedAsTaxon = new TaxonNode(describedAs.getEndNode());
          try {
            if (taxonFilter.shouldInclude(describedAsTaxon)) {
              TaxonNode resolvedTaxon = taxonIndex.getOrCreateTaxon(describedAsTaxon);
              if (resolvedTaxon != null) {
                specimen.classifyAs(resolvedTaxon);
              }
            }
          } catch (NodeFactoryException e) {
            LOG.warn(
                "failed to create taxon with name ["
                    + describedAsTaxon.getName()
                    + "] and id ["
                    + describedAsTaxon.getExternalId()
                    + "]",
                e);
          } finally {
            count++;
            if (count % batchSize == 0) {
              watchForBatch.stop();
              final long duration = watchForBatch.getTime();
              if (duration > 0) {
                LOG.info(
                    "resolved batch of ["
                        + batchSize
                        + "] names in "
                        + getProgressMsg(batchSize, duration));
              }
              watchForBatch.reset();
              watchForBatch.start();
            }
          }
        }
      }
    }
    studies.close();
    watchForEntireRun.stop();
    LOG.info(
        "resolved [" + count + "] names in " + getProgressMsg(count, watchForEntireRun.getTime()));
  }