Пример #1
0
 public boolean containsWord(int word, Mention mention) {
   if (!indexWithoutStopWords.containsKey(word)) return false;
   TIntLinkedList positions = indexIncludingStopWords.get(word);
   int mentionStart = mention.getStartToken();
   int mentionEnd = mention.getEndToken();
   for (TIntIterator itr = positions.iterator(); itr.hasNext(); ) {
     int position = itr.next();
     if (position < mentionStart || position > mentionEnd) return true;
   }
   return false;
 }
Пример #2
0
  @Override
  public String getOutput() {
    Collections.sort(keyphrases);

    TIntLinkedList wordIds = new TIntLinkedList();
    for (keyphraseTracingObject kto : keyphrases) {
      for (int keyword : kto.keyphraseTokens) {
        wordIds.add(keyword);
      }
    }
    StringBuilder sb = new StringBuilder();
    sb.append(
        "<strong style='color: #0000FF;'> score = "
            + formatter.format(score)
            + " for "
            + keyphrases.size()
            + " keyphrases</strong><br />");
    int keyphraseCount = 0;
    for (keyphraseTracingObject keyphrase : keyphrases) {
      if (keyphraseCount == 5) {
        countForUI++;
        sb.append(
            "<a onclick=\"setVisibility('div"
                + countForUI
                + "', 'block');\">More ...</a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a onclick=\"setVisibility('div"
                + countForUI
                + "', 'none');\">Less ...</a>");
        sb.append("<div id='div" + countForUI + "' style='display:none'>");
      }
      sb.append(
          "<span style='color: #005500;'>"
              + formatter.format(keyphrase.score)
              + "</span> - <span>\"");
      sb.append(
          buildKeyhraseHTMLEntry(keyphrase.keyphraseTokens, keyphrase.matchedKeywords, id2word));
      sb.append("\" </span> ");
      sb.append("<br />");
      keyphraseCount++;
    }
    if (keyphraseCount >= 5) {
      sb.append("</div>");
    }
    return sb.toString();
  }
Пример #3
0
  public void addToIndex(TIntIntHashMap newIndexEntries) {
    for (int word : newIndexEntries.keys()) {
      int offset = newIndexEntries.get(word);

      TIntLinkedList positions;
      positions = indexIncludingStopWords.get(word);
      if (positions == null) {
        positions = new TIntLinkedList();
        indexIncludingStopWords.put(word, positions);
      }
      positions.add(offset);

      positions = indexWithoutStopWords.get(word);
      if (positions == null) {
        positions = new TIntLinkedList();
        indexWithoutStopWords.put(word, positions);
      }
      positions.add(offset);
    }
  }
Пример #4
0
  public InputTextInvertedIndex(int[] tokens, boolean isRemoveStopWords) {
    indexIncludingStopWords = new TIntObjectHashMap<TIntLinkedList>();
    indexWithoutStopWords = new TIntObjectHashMap<TIntLinkedList>();
    int noStopwordsPosition = 0;
    for (int position = 0; position < tokens.length; ++position) {
      int token = tokens[position];
      TIntLinkedList positions = indexIncludingStopWords.get(token);
      if (positions == null) {
        positions = new TIntLinkedList();
        indexIncludingStopWords.put(token, positions);
      }
      positions.add(position);

      if (!isRemoveStopWords || !StopWord.isStopwordOrSymbol(token)) {
        positions = indexWithoutStopWords.get(token);
        if (positions == null) {
          positions = new TIntLinkedList();
          indexWithoutStopWords.put(token, positions);
        }
        positions.add(noStopwordsPosition);
        noStopwordsPosition++;
      }
    }
  }
Пример #5
0
  public int[] getDocsForPageRange(Integer begin_inclusive, Integer end_exclusive)
      throws SQLException {
    Statement st = SQL.forThread().createStatement();
    try {
      String query = "SELECT " + ResolvedPage.DOC_ID + " FROM " + ResolvedPage.TABLE;
      if (begin_inclusive != null && end_exclusive != null) {
        query +=
            " WHERE "
                + ResolvedPage.PAGE_NUMBER
                + ">="
                + begin_inclusive
                + " AND "
                + ResolvedPage.PAGE_NUMBER
                + "<"
                + end_exclusive;
      } else if (begin_inclusive != null) {
        query += " WHERE " + ResolvedPage.PAGE_NUMBER + ">=" + begin_inclusive;
      } else if (end_exclusive != null) {
        query += " WHERE " + ResolvedPage.PAGE_NUMBER + "<" + end_exclusive;
      }

      query += " ORDER BY " + ResolvedPage.DOC_ID;
      ResultSet rs = st.executeQuery(query);
      try {
        TIntLinkedList docs = new TIntLinkedList();
        while (rs.next()) {
          docs.add(rs.getInt(1));
        }
        return docs.toArray(new int[docs.size()]);
      } finally {
        rs.close();
      }
    } finally {
      st.close();
    }
  }