コード例 #1
0
  public void textExtraction(Article a) {

    //        storePageExtractionCondition();
    System.out.println("starting driver : " + a.getLink());
    driver.get(a.getLink());

    System.out.println("driver started");

    Newspaper newspaper = a.getPaper();

    List<String> parts = new ArrayList<String>();
    parts.add("header");
    parts.add("date");
    parts.add("preface");
    parts.add("body");
    parts.add("upleft-component");
    parts.add("downright-component");

    for (String s : parts) {
      StringBuilder headerString = new StringBuilder();
      System.out.println("part : " + s);

      String descriptor = newspaper.getCondition(s);

      System.out.println("element to find : " + descriptor);

      List<WebElement> lista = null;
      lista = driver.findElements(By.tagName(descriptor));

      for (Iterator<WebElement> it1 = lista.iterator(); it1.hasNext(); ) {
        WebElement webElement = it1.next();

        headerString.append(webElement.getText());
      }

      //            headerText = findByDescriptor(descriptor, i, n);
      //            WebElement selectheaderText = findByDescriptor(descriptor, i, n);
      if (headerString.length() > 0) {

        System.out.println("str to print : " + headerString.toString());
        if (s.equalsIgnoreCase("header")) {
          a.setTitle(headerString.toString());
        } else if (s.equalsIgnoreCase("preface")) {
          a.setPreface(headerString.toString());
        } else if (s.equalsIgnoreCase("date")) {
          //                    a.setPreface(headerString.toString());
        } else if (s.equalsIgnoreCase("upleft-component")) {
          //                    a.setPreface(headerString.toString());
        } else if (s.equalsIgnoreCase("downright-component")) {
          //                    a.setPreface(headerString.toString());
        } else {
          a.setBody(headerString.toString());
        }
      }
    }
  }