public boolean containsWord(int word, Mention mention) { if (!indexWithoutStopWords.containsKey(word)) return false; TIntLinkedList positions = indexIncludingStopWords.get(word); int mentionStart = mention.getStartToken(); int mentionEnd = mention.getEndToken(); for (TIntIterator itr = positions.iterator(); itr.hasNext(); ) { int position = itr.next(); if (position < mentionStart || position > mentionEnd) return true; } return false; }
@Override public String getOutput() { Collections.sort(keyphrases); TIntLinkedList wordIds = new TIntLinkedList(); for (keyphraseTracingObject kto : keyphrases) { for (int keyword : kto.keyphraseTokens) { wordIds.add(keyword); } } StringBuilder sb = new StringBuilder(); sb.append( "<strong style='color: #0000FF;'> score = " + formatter.format(score) + " for " + keyphrases.size() + " keyphrases</strong><br />"); int keyphraseCount = 0; for (keyphraseTracingObject keyphrase : keyphrases) { if (keyphraseCount == 5) { countForUI++; sb.append( "<a onclick=\"setVisibility('div" + countForUI + "', 'block');\">More ...</a> <a onclick=\"setVisibility('div" + countForUI + "', 'none');\">Less ...</a>"); sb.append("<div id='div" + countForUI + "' style='display:none'>"); } sb.append( "<span style='color: #005500;'>" + formatter.format(keyphrase.score) + "</span> - <span>\""); sb.append( buildKeyhraseHTMLEntry(keyphrase.keyphraseTokens, keyphrase.matchedKeywords, id2word)); sb.append("\" </span> "); sb.append("<br />"); keyphraseCount++; } if (keyphraseCount >= 5) { sb.append("</div>"); } return sb.toString(); }
public void addToIndex(TIntIntHashMap newIndexEntries) { for (int word : newIndexEntries.keys()) { int offset = newIndexEntries.get(word); TIntLinkedList positions; positions = indexIncludingStopWords.get(word); if (positions == null) { positions = new TIntLinkedList(); indexIncludingStopWords.put(word, positions); } positions.add(offset); positions = indexWithoutStopWords.get(word); if (positions == null) { positions = new TIntLinkedList(); indexWithoutStopWords.put(word, positions); } positions.add(offset); } }
public InputTextInvertedIndex(int[] tokens, boolean isRemoveStopWords) { indexIncludingStopWords = new TIntObjectHashMap<TIntLinkedList>(); indexWithoutStopWords = new TIntObjectHashMap<TIntLinkedList>(); int noStopwordsPosition = 0; for (int position = 0; position < tokens.length; ++position) { int token = tokens[position]; TIntLinkedList positions = indexIncludingStopWords.get(token); if (positions == null) { positions = new TIntLinkedList(); indexIncludingStopWords.put(token, positions); } positions.add(position); if (!isRemoveStopWords || !StopWord.isStopwordOrSymbol(token)) { positions = indexWithoutStopWords.get(token); if (positions == null) { positions = new TIntLinkedList(); indexWithoutStopWords.put(token, positions); } positions.add(noStopwordsPosition); noStopwordsPosition++; } } }
public int[] getDocsForPageRange(Integer begin_inclusive, Integer end_exclusive) throws SQLException { Statement st = SQL.forThread().createStatement(); try { String query = "SELECT " + ResolvedPage.DOC_ID + " FROM " + ResolvedPage.TABLE; if (begin_inclusive != null && end_exclusive != null) { query += " WHERE " + ResolvedPage.PAGE_NUMBER + ">=" + begin_inclusive + " AND " + ResolvedPage.PAGE_NUMBER + "<" + end_exclusive; } else if (begin_inclusive != null) { query += " WHERE " + ResolvedPage.PAGE_NUMBER + ">=" + begin_inclusive; } else if (end_exclusive != null) { query += " WHERE " + ResolvedPage.PAGE_NUMBER + "<" + end_exclusive; } query += " ORDER BY " + ResolvedPage.DOC_ID; ResultSet rs = st.executeQuery(query); try { TIntLinkedList docs = new TIntLinkedList(); while (rs.next()) { docs.add(rs.getInt(1)); } return docs.toArray(new int[docs.size()]); } finally { rs.close(); } } finally { st.close(); } }