コード例 #1
0
  @Override
  public final List<String[]> analyzeSandboxed(final Document document)
      throws XPathExpressionException {
    Preconditions.checkNotNull(document);

    final NodeList mainNodes = ParserUtil.evaluateXPath(document, this.getXPath());
    if (mainNodes.getLength() == 0) {
      return null;
    }
    final Node mainNode = mainNodes.item(0);

    this.assertBeforeProcessingBegins();
    this.enterProcessingStage();

    final String[] token = new String[this.tokenLenght];
    token[0] = this.getVenueNameFromNode(mainNode);
    token[0] = VenueUtil.cleanupVenueName(token[0]);

    token[1] = this.getVenueAddressFromNode(mainNode);
    token[1] = CleanupUtil.cleanUpName(token[1]);

    if (!this.isTokenValid(token)) {
      this.endProcessingOfToken();
      return Lists.newArrayList();
    }

    this.assertDuringProcessing();
    this.endProcessingOfToken();

    final List<String[]> tokenInList = Lists.<String[]>newArrayList();
    tokenInList.add(token);

    Preconditions.checkNotNull(tokenInList);
    return tokenInList;
  }
コード例 #2
0
  @Override
  protected final String[] analyzeNodeInternal(final Node node) {
    final String[] token = new String[this.tokenLenght];

    // 0 - event full name
    token[0] = this.getStringFromNode(node);
    token[0] = CleanupUtil.cleanUpName(token[0]);

    // 1 - name of band/artist
    token[1] = BandUtil.cleanBandName(token[0]);

    // 2 - name of venue
    token[2] = this.getVenueFromNode(node.getParentNode().getNextSibling().getNextSibling());
    token[2] = CleanupUtil.cleanUpName(token[2]);
    token[2] = VenueCleanupComponent.extractKnownVenueNameFromLargerText(token[2]);

    // 3 - date
    token[3] = this.getDateFromNode(node.getParentNode().getNextSibling().getNextSibling());
    token[3] = DateAnalysisUtil.cleanupDate(token[3]);

    return token;
  }
コード例 #3
0
  /**
   * - note: the assumption that once the XPath reaches the elements, everything is one big list
   * with no interruptions is wrong <br>
   */
  @Override
  protected final String[] analyzeNodeInternal(final Node node) {
    final String[] token = new String[this.tokenLenght];

    String text = null;
    text = this.getStringFromNode(node);
    if (text == null) { // skipping over noise
      return null;
    }
    text = text.trim();
    token[0] = this.getStreetType(text);
    if (token[0] != null) {
      token[0] = CleanupUtil.cleanUpName(token[0]);
    }

    token[1] = this.getStreetName(text);
    if (token[1] != null) {
      token[1] = StreetUtil.cleanStreetName(token[1]);
    }

    return token;
  }