예제 #1
0
  @Test
  public void testGetPathLength() throws WikiApiException {
    String catString = "UKP";
    String neighborCatString = "Projects of UKP";
    String twoStepsAwayCatString = "SIR";

    Category cat = wiki.getCategory(catString);
    Category neighborCat = wiki.getCategory(neighborCatString);
    Category twoStepsAwayCat = wiki.getCategory(twoStepsAwayCatString);

    int equalNodes = catGraph.getPathLengthInNodes(cat, cat);
    int oneStepNodes = catGraph.getPathLengthInNodes(cat, neighborCat);
    int twoStepNodes = catGraph.getPathLengthInNodes(cat, twoStepsAwayCat);

    int equalEdges = catGraph.getPathLengthInEdges(cat, cat);
    int oneStepEdges = catGraph.getPathLengthInEdges(cat, neighborCat);
    int twoStepEdges = catGraph.getPathLengthInEdges(cat, twoStepsAwayCat);

    assertEquals(0, equalNodes);
    assertEquals(0, oneStepNodes);
    assertEquals(1, twoStepNodes);

    assertEquals(0, equalEdges);
    assertEquals(1, oneStepEdges);
    assertEquals(2, twoStepEdges);
  }
예제 #2
0
  /**
   * @return true if the connection to wikipedia is active (or was able to be activated), otherwise
   *     false.
   */
  public boolean checkConnection() {

    if (!wikipedia.getDatabase().checkConnection()) {
      try {
        wikipedia.getDatabase().connect();
      } catch (Exception e) {
        return false;
      }
    }
    return true;
  }
예제 #3
0
  private void processLink(String markup, StringBuffer context, HashSet<Integer> bannedTopics) {

    // ignore everything that is not in main namespace
    if (markup.indexOf(":") > 0) return;

    String anchor = markup;
    String dest = markup;

    int pos = markup.lastIndexOf("|");
    if (pos > 0) {
      anchor = markup.substring(pos + 1);
      dest = markup.substring(0, pos);
    }

    context.append("\n").append(anchor);

    Article art = wikipedia.getArticleByTitle(dest);
    if (art != null) {
      bannedTopics.add(art.getId());
    }
  }
예제 #4
0
    private boolean fillBuffer() {

      // decide whether to load from list or retrieve all available articles
      if (loadFromList) {
        // clear the old buffer and all variables regarding the state of the buffer
        buffer.clear();
        bufferOffset = 0;
        bufferFillSize = 0;

        // load pages
        if (pageIds.isEmpty() && pageTitles.isEmpty()) {
          return false;
        }

        while (bufferFillSize <= maxBufferSize && !pageIds.isEmpty()) {
          String id = pageIds.remove(0);
          if (id != null && !id.isEmpty()) {
            try {
              buffer.add(wiki.getPage(Integer.parseInt(id)));
              bufferFillSize++;
            } catch (WikiApiException e) {
              logger.warn("Missing article with id " + id);
            }
          }
        }
        while (bufferFillSize <= maxBufferSize && !pageTitles.isEmpty()) {
          String title = pageTitles.remove(0);
          if (title != null && !title.isEmpty()) {
            try {
              buffer.add(wiki.getPage(title));
              bufferFillSize++;
            } catch (WikiApiException e) {
              logger.warn("Missing article with title \"" + title + "\"");
            }
          }
        }

        if (buffer.size() > 0) {
          bufferFillSize = buffer.size();
          return true;
        } else {
          return false;
        }
      } else {
        Session session = this.wiki.__getHibernateSession();
        session.beginTransaction();
        List returnValues = null;
        if (onlyArticles) {
          returnValues =
              session
                  .createCriteria(de.tudarmstadt.ukp.wikipedia.api.hibernate.Page.class)
                  .add(Restrictions.eq("isDisambiguation", false))
                  .add(Restrictions.gt("id", lastPage))
                  .setMaxResults(maxBufferSize)
                  .list();
        } else {
          returnValues =
              session
                  .createCriteria(de.tudarmstadt.ukp.wikipedia.api.hibernate.Page.class)
                  .add(Restrictions.gt("id", lastPage))
                  .setMaxResults(maxBufferSize)
                  .list();
        }
        session.getTransaction().commit();

        // clear the old buffer and all variables regarding the state of the buffer
        buffer.clear();
        bufferOffset = 0;
        bufferFillSize = 0;

        Page apiPage;
        for (Object o : returnValues) {
          if (o == null) {
            return false;
          } else {
            de.tudarmstadt.ukp.wikipedia.api.hibernate.Page hibernatePage =
                (de.tudarmstadt.ukp.wikipedia.api.hibernate.Page) o;
            long id = hibernatePage.getId();
            try {
              apiPage = new Page(this.wiki, id, hibernatePage);
              if (this.onlyArticles) {
                if (!apiPage.isRedirect()) {
                  buffer.add(apiPage);
                }
              } else {
                buffer.add(apiPage);
              }
            } catch (WikiApiException e) {
              logger.error("Page with hibernateID " + id + " not found.");
              e.printStackTrace();
            }
            lastPage = id;
          }
        }
        if (buffer.size() > 0) {
          bufferFillSize = buffer.size();
          return true;
        } else {
          return false;
        }
      }
    } // fillBuffer