/**
  * @param linkElement
  * @return whether the current link element is an image link
  */
 protected boolean isImageLink(Element linkElement) {
   if (linkElement.children().isEmpty()
       || linkElement.children().size() > 1
       || StringUtils.isNotBlank(linkElement.ownText())) {
     return false;
   }
   return !linkElement.children().select(IMAGE_LINK_CHILDREN_CSS_LIKE_QUERY).isEmpty();
 }
 /**
  * @param linkElement
  * @return whether the current link element is a svg link
  */
 protected boolean isSvgLink(Element linkElement) {
   if (linkElement.children().isEmpty()
       || linkElement.children().size() > 1
       || StringUtils.isNotBlank(linkElement.ownText())) {
     return false;
   }
   return !linkElement.children().select(HtmlElementStore.SVG_ELEMENT).isEmpty();
 }
Пример #3
0
  private void recurse(final Element element, final Map<String, Object> values, final int depth) {

    final Tag tag = element.tag();
    final Set<String> classes = element.classNames();
    final String link = element.attr("href");
    final Object content = extractChildContent(element);

    if (!classes.isEmpty()) {

      removeEmpty(classes);

      // toplevel classes define type
      if (tag.isBlock()) {

        if (depth == 0) {

          // store type attribute
          values.put("type", classes);

          for (final Element child : element.children()) {
            recurse(child, values, depth + 1);
          }

        } else {

          final Map<String, Object> childMap = new LinkedHashMap<>();
          values.put(classes.iterator().next(), childMap);

          if (content != null) {
            childMap.put("name", content);
          }

          for (final Element child : element.children()) {
            recurse(child, childMap, depth + 1);
          }
        }

      } else if (tag.isInline()) {

        // extract href and store as URL
        if (classes.contains("url") && StringUtils.isNotBlank(link)) {

          values.put("url", link);
          classes.remove("url");
        }

        if (content != null) {

          for (final String type : classes) {
            values.put(type, content);
          }
        }
      }
    }
  }
Пример #4
0
  // Busca os endereços pelo número do CEP.
  public List<Address> getByCep(String cep) throws IOException {

    listEnderecos = new ArrayList<Address>();

    // mapeamento dos parametros que será passado na requisição
    Map<String, String> query = new HashMap<String, String>();
    query.put("CEP", cep);
    query.put("Metodo", "listaLogradouro");
    query.put("TipoConsulta", "cep");
    query.put("StartRow", "1");
    query.put("EndRow", "10");

    // Faz uma requisição no site do correios (www.buscacep.com.br) com Json, passando os parametros
    // mapeados,
    // requisição deverá ser do tipo post.
    // Armazena o retorno em uma variavel doc.
    Document doc =
        Jsoup.connect(Utils.adressCorreios)
            .data(query)
            .header("Origin", "http://www.buscacep.correios.com.br")
            .header("Referer", "http://www.buscacep.correios.com.br")
            .post();

    // Acessa o retorno do doc e percorre o resultado buscando as informações dos endereços
    // Armazena os resultados na lista de endereços criadas e retorna a mesma para que outras
    // classes possam acessar.
    Elements elements = doc.select("table").eq(2);
    Elements rows = elements.select("tr");

    Iterator<Element> rowIterator = rows.iterator();

    while (rowIterator.hasNext()) {
      Address enderecos = new Address();

      Element element = rowIterator.next();

      Elements logradouro = element.children().select("td").eq(0);
      enderecos.setLogradouro(logradouro.text());
      Elements bairro = element.children().select("td").eq(1);
      enderecos.setBairro(bairro.text());
      Elements cidade = element.children().select("td").eq(2);
      Elements estado = element.children().select("td").eq(3);
      StringBuilder sbLocalidade = new StringBuilder();
      sbLocalidade.append(cidade.text());
      sbLocalidade.append("/");
      sbLocalidade.append(estado.text());
      enderecos.setLocalidade(sbLocalidade.toString());
      Elements codigopostal = element.children().select("td").eq(4);
      enderecos.setCEP(codigopostal.text());

      listEnderecos.add(enderecos);
    }

    return listEnderecos;
  }
Пример #5
0
  /**
   * Reads an Item from a design and inserts it into the data source. Recursively handles any
   * children of the item as well.
   *
   * @since 7.5.0
   * @param node an element representing the item (tree node).
   * @param selected A set accumulating selected items. If the item that is read is marked as
   *     selected, its item id should be added to this set.
   * @param context the DesignContext instance used in parsing
   * @return the item id of the new item
   * @throws DesignException if the tag name of the {@code node} element is not {@code node}.
   */
  @Override
  protected String readItem(Element node, Set<String> selected, DesignContext context) {

    if (!"node".equals(node.tagName())) {
      throw new DesignException(
          "Unrecognized child element in " + getClass().getSimpleName() + ": " + node.tagName());
    }

    String itemId = node.attr("text");
    addItem(itemId);
    if (node.hasAttr("icon")) {
      Resource icon =
          DesignAttributeHandler.readAttribute("icon", node.attributes(), Resource.class);
      setItemIcon(itemId, icon);
    }
    if (node.hasAttr("selected")) {
      selected.add(itemId);
    }

    for (Element child : node.children()) {
      String childItemId = readItem(child, selected, context);
      setParent(childItemId, itemId);
    }
    return itemId;
  }
Пример #6
0
  /**
   * 解析数据,默认解析第一列
   *
   * @param rows 源数据集
   * @return 节目数据
   */
  private static String[][] parseRows(Elements rows) {
    String[][] programs = new String[rows.size()][2];
    int rowspan_0 = 0;
    int rowspan_1 = 0;
    for (int i = 0; i < rows.size(); i++) {
      Element row = rows.get(i);
      try {
        Elements cells = row.children();

        if (rowspan_0 == 0) {
          Element cell_0 = cells.get(0);
          rowspan_0 = Integer.valueOf(cell_0.attr("rowspan"));
          if (rowspan_1 == 0) {
            Element cell_1 = cells.get(1);
            rowspan_1 = Integer.valueOf(cell_1.attr("rowspan"));
            programs[i][0] = DBclass.xmlFilte(cell_1.select("dt").text());
            programs[i][1] = DBclass.xmlFilte(cell_1.select("dd").text());
          }
        } else if (rowspan_1 == 0) {
          Element cell_0 = cells.get(0);
          rowspan_1 = Integer.valueOf(cell_0.attr("rowspan"));
          programs[i][0] = DBclass.xmlFilte(cell_0.select("dt").text());
          programs[i][1] = DBclass.xmlFilte(cell_0.select("dd").text());
        }
        rowspan_0--;
        rowspan_1--;
      } catch (Exception e) {
        e.printStackTrace(System.out);
      }
    }
    return programs;
  }
Пример #7
0
 public List<AreaVO> parseMessage(String text, int pid) {
   Document doc = Jsoup.parse(text);
   Element body = doc.body();
   List<AreaVO> areas = new ArrayList<AreaVO>();
   Elements divs = body.getElementsByClass("subarea");
   if (divs.size() > 0) {
     Element div = divs.get(0);
     Elements childs = div.children();
     String letter = "";
     for (int i = 1; i < childs.size(); i++) {
       Element child = childs.get(i);
       if ("b".equals(child.tagName())) {
         letter = child.text();
         continue;
       }
       if ("a".equals(child.tagName())) {
         AreaVO area = new AreaVO();
         area.setLetter(letter);
         area.setName(child.text());
         area.setOrderIdx(index);
         area.setPid(pid);
         String href = child.attr("href");
         String pinyin = href.substring(7, href.lastIndexOf("/"));
         area.setPinyin(pinyin);
         index++;
         System.out.println(area.toString());
         areas.add(area);
       }
     }
   }
   return areas;
 }
Пример #8
0
  public Holder doParse(String html, String url) {
    Holder holder = new Holder();
    holder.url = url;

    Document doc = Jsoup.parse(html, url);
    Elements typeElement =
        doc.select("body > div.main_w.clearfix > div.main.clearfix > ul > li:nth-child(5) > a");
    holder.dishType = typeElement.text();

    Elements titleElement =
        doc.select(
            "body > div.main_w.clearfix > div.main.clearfix > div.cp_header.clearfix > div.cp_main_info_w > div.info1 > h1 > a");

    holder.title = titleElement.text();

    Elements methodElement =
        doc.select(
            "body > div.main_w.clearfix > div.main.clearfix > div.cp_header.clearfix > div.cp_main_info_w > div.info2 > ul > li:nth-child(1) > a");
    holder.method = methodElement.text();

    Elements materialElement =
        doc.select(
            "body > div.main_w.clearfix > div.main.clearfix > div.cp_body.clearfix > div.cp_body_left > div.materials > div > div.yl.zl.clearfix > ul > li > div > h4 > a");

    holder.mainMaterial = materialElement.text();

    Elements stepE =
        doc.select(
            "body > div.main_w.clearfix > div.main.clearfix > div.cp_body.clearfix > div.cp_body_left > div.measure > div.editnew.edit > div.content.clearfix");
    //
    // body > div.main_w.clearfix > div.main.clearfix > div.cp_body.clearfix
    // > div.cp_body_left > div.measure > div.editnew.edit >
    // div.content.clearfix
    // body > div.main_w.clearfix > div.main.clearfix > div.cp_body.clearfix
    // > div.cp_body_left > div.measure > div.edit > p:nth-child(1) > em
    //

    if (stepE.size() == 0) {
      stepE =
          doc.select(
              "body > div.main_w.clearfix > div.main.clearfix > div.cp_body.clearfix > div.cp_body_left > div.measure > div.edit > p");
    }

    for (int i = 0; i < stepE.size(); i++) {
      Element e = stepE.get(i);

      if (e.children().hasClass("step")) {
        String step = e.text();
        if (!"".equals(step)) {
          holder.steps.add(step);
        }
      }
    }
    // body > div.main_w.clearfix > div.main.clearfix > div.cp_body.clearfix
    // > div.cp_body_left > div.measure > div.editnew.edit >
    // div:nth-child(1)

    return holder;
  }
Пример #9
0
  @Test
  public void createsDocumentStructure() {
    String html =
        "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.getHead();
    Element body = doc.getBody();

    assertEquals(2, doc.children().size());
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.getTitle());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
  }
Пример #10
0
 // recursively processes the element to replace <br>'s with \n
 private void fixLineBreaks(Element el) {
   for (final Element e : el.children()) {
     if (e.tagName().equals("br")) {
       e.before("\n");
       e.remove();
     } else {
       fixLineBreaks(e);
     }
   }
 }
Пример #11
0
 /**
  * Constructs a component hierarchy from the design specified as an html tree.
  *
  * <p>If a component root is given, the component instances created during reading the design are
  * assigned to its member fields based on their id, local id, and caption
  *
  * @param doc the html tree
  * @param componentRoot optional component root instance. The type must match the type of the root
  *     element in the design.
  * @param classWithFields a class (componentRoot class or a super class) with some member fields.
  *     The member fields whose type is assignable from {@link Component} are bound to fields in
  *     the design based on id/local id/caption
  */
 private static DesignContext designToComponentTree(
     Document doc, Component componentRoot, Class<?> classWithFields) {
   DesignContext designContext = new DesignContext(doc);
   designContext.readPackageMappings(doc);
   // No special handling for a document without a body element - should be
   // taken care of by jsoup.
   Element root = doc.body();
   Elements children = root.children();
   if (children.size() > 1) {
     throw new DesignException(
         "The first level of a component hierarchy should contain at most one root component, but found "
             + children.size()
             + ".");
   }
   Element element = children.size() == 0 ? null : children.first();
   if (componentRoot != null) {
     if (element == null) {
       throw new DesignException(
           "The root element cannot be null when the specified root Component is" + " not null.");
     }
     // user has specified root instance that may have member fields that
     // should be bound
     final FieldBinder binder;
     try {
       binder = new FieldBinder(componentRoot, classWithFields);
     } catch (IntrospectionException e) {
       throw new DesignException("Could not bind fields of the root component", e);
     }
     // create listener for component creations that binds the created
     // components to the componentRoot instance fields
     ComponentCreationListener creationListener =
         new ComponentCreationListener() {
           @Override
           public void componentCreated(ComponentCreatedEvent event) {
             binder.bindField(event.getComponent(), event.getLocalId());
           }
         };
     designContext.addComponentCreationListener(creationListener);
     // create subtree
     designContext.readDesign(element, componentRoot);
     // make sure that all the member fields are bound
     Collection<String> unboundFields = binder.getUnboundFields();
     if (!unboundFields.isEmpty()) {
       throw new DesignException("Found unbound fields from component root " + unboundFields);
     }
     // no need to listen anymore
     designContext.removeComponentCreationListener(creationListener);
   } else {
     // createChild creates the entire component hierarchy
     componentRoot = element == null ? null : designContext.readDesign(element);
   }
   designContext.setRootComponent(componentRoot);
   return designContext;
 }
Пример #12
0
  private boolean hasValidHeader() {
    Elements titleElement = getDoc().getElementsByTag("h1");

    if (titleElement.size() == 0) return false;

    for (Element element : titleElement) {

      if (element.children().size() == 0) continue;

      Pattern pattern = Pattern.compile("[(](\\d){4}\u2013 [)]");
      Matcher matcher = pattern.matcher(element.children().first().text());
      if (!matcher.matches()) continue;

      setYear(matcher.group(0).substring(1, 5));
      setTitle(
          element.text().substring(0, element.text().length() - matcher.group(0).length()).trim());
      return true;
    }
    return false;
  }
Пример #13
0
 private static void parseReplyCount(Topic.Builder topicBuilder, Element ele) {
   final Elements children = ele.children();
   final int count;
   if (children.size() > 0) {
     final String numStr = ele.child(0).text();
     count = Integer.parseInt(numStr);
   } else {
     // do not have reply yet
     count = 0;
   }
   topicBuilder.setReplyCount(count);
 }
Пример #14
0
 private void getChildElement(Element parentElement, Integer level) {
   parentElement.html(deleteComent(parentElement.html()));
   // System.out.println("key:"+(level+","+parentElement.hashCode())+",value:"+parentElement.html());
   if (parentElement.children().size() > 0) {
     level += 1;
     for (int i = 0; i < parentElement.children().size(); i++) {
       if (("ul".equals(parentElement.tagName().toLowerCase()))
           || ("table".equals(parentElement.tagName().toLowerCase()))) { // 整体标签
         String html = parentElement.html().replaceAll(" ", "").replaceAll(" ", ""); // 去中英文空格
         if (html.contains("首页") || parentElement.id().contains("nav")) {
           //
           // System.out.println("----------------------首页Start-----------------------------");
           Elements links = parentElement.select("a");
           for (Element ele : links) {
             if (topMenumap.get(level + "," + ele.hashCode()) == null) {
               topMenumap.put(level + "," + ele.hashCode(), ele);
               // System.out.println(level + "," + ele.hashCode() + ",---------------" +
               // ele.html());
               // System.out.println("a:" +
               // ele.attr("abs:href") + ",文本:" + ele.text());
             }
           }
           //						 System.out.println("----------------------首页End-----------------------------");
         } else {
           map.put(level + "," + parentElement.hashCode(), parentElement);
         }
       } else {
         getChildElement(parentElement.child(i), level);
       }
     }
   } else {
     if ("script".equals(parentElement.tagName().toLowerCase())) {
       return;
     }
     if (StringUtils.isNotEmpty(parentElement.html())) {
       level += 1;
       map.put(level + "," + parentElement.hashCode(), parentElement);
     }
   }
 }
Пример #15
0
  public ForumUser getUserData() {

    ForumUser fUser = new ForumUser();
    fUser.userDefinedInfo = new HashMap<String, String>();

    Element temp = mainUserInfo.select("h1").get(0);

    System.out.println(temp.text());
    if (!temp.text().equals("")) {
      fUser.userName = temp.text();
    }

    fUser.avatarUri = "www.forum.hr/" + doc.getElementById("user_avatar").attr("src");

    temp = mainUserInfo.select("div[id=last_online]").get(0);

    if (temp.text() != null) {
      fUser.userLastActivity = temp.text();
    }

    int i = 0;
    for (Element el : userMiniStats.children()) {
      System.out.println(el);
      System.out.println("______________________________");
      // check only evens for keys (on odds are values)
      if (i % 2 == 0) {
        temp = userMiniStats.children().get(i);
        if (!temp.text().equals(""))
          fUser.userDefinedInfo.put(temp.text(), userMiniStats.children().get(i + 1).text());
      }
      i += 1;
    }

    System.out.println("USername: "******"Last activity: " + fUser.userLastActivity);
    System.out.println("Avatar URI: " + fUser.avatarUri);
    System.out.println("Other info: " + fUser.userDefinedInfo);

    return fUser;
  }
Пример #16
0
  @Override
  public LinkedList<Element> getAllIndexesRows() {
    Document doc = getDocument();
    LinkedList<Element> indexes = new LinkedList<Element>();

    Element tbody = doc.getElementById("OtherIndicesTable").child(1);

    for (Element tr : tbody.children()) {
      indexes.add(tr);
    }

    return indexes;
  }
Пример #17
0
  @Test
  public void parsesUnterminatedTag() {
    String h1 = "<p";
    Document doc = Jsoup.parse(h1);
    assertEquals(1, doc.getElementsByTag("p").size());

    String h2 = "<div id=1<p id='2'";
    doc = Jsoup.parse(h2);
    Element d = doc.getElementById("1");
    assertEquals(1, d.children().size());
    Element p = doc.getElementById("2");
    assertNotNull(p);
  }
Пример #18
0
  // Busca o Cep pelo logradouro.
  public List<String> getByAdress(String address) throws IOException {

    listAddress = new ArrayList<String>();

    // mapeamento dos parametros que será passado na requisição
    Map<String, String> query = new HashMap<String, String>();

    query.put("relaxation", address);
    query.put("TipoCep", "ALL");
    query.put("semelhante", "N");
    query.put("cfm", "1");
    query.put("Metodo", "listaLogradouro");
    query.put("TipoConsulta", "relaxation");
    query.put("StartRow", "1");
    query.put("EndRow", "10");

    // Faz uma requisição no site do correios (www.buscacep.com.br) com Json, passando os parametros
    // mapeados,
    // requisição deverá ser do tipo post.
    // Armazena o retorno em uma variavel doc.
    Document doc =
        Jsoup.connect(Utils.adressCorreios)
            .timeout(20000)
            .data(query)
            .header("Origin", "http://www.buscacep.correios.com.br")
            .header("Referer", "http://www.buscacep.correios.com.br")
            .post();

    // Acessa o retorno do doc e percorre o resultado buscando as informações de Cep de acordo com o
    // endereço passado.
    // Armazena os resultados na lista criada e retorna a mesma para que outras classes possam
    // acessar
    Elements elements = doc.select("table").eq(2);
    Elements rows = elements.select("tr");

    Iterator<Element> rowIterator = rows.iterator();

    while (rowIterator.hasNext()) {
      Address enderecos = new Address();

      Element element = rowIterator.next();

      Elements codigopostal = element.children().select("td").eq(4);

      enderecos.setCEP(codigopostal.text());

      listAddress.add(enderecos.getCEP());
    }

    return listAddress;
  }
Пример #19
0
  private static Topic parseItemForNode(Element item, Node node) {
    final Elements list = item.children();

    final Topic.Builder topicBuilder = new Topic.Builder();
    parseMember(topicBuilder, list.get(0));

    final Element ele = list.get(2);
    parseTitle(topicBuilder, ele);
    parseInfo(topicBuilder, ele, node);

    parseReplyCount(topicBuilder, list.get(3));

    return topicBuilder.createTopic();
  }
Пример #20
0
  @Test
  public void testChildThrowsIndexOutOfBoundsOnMissing() {
    Document doc = Jsoup.parse("<div><p>One</p><p>Two</p></div>");
    Element div = doc.select("div").first();

    assertEquals(2, div.children().size());
    assertEquals("One", div.child(0).text());

    try {
      div.child(3);
      fail("Should throw index out of bounds");
    } catch (IndexOutOfBoundsException e) {
    }
  }
  public void download(Connection aInConnection, Collection<Image> images) throws IOException {
    aInConnection.url(url);
    Document lDocument = aInConnection.get();
    Element lMain = lDocument.getElementById("main");
    Elements lContents = lMain.getElementsByClass("content");

    if (lContents.size() == 1) {
      StringBuilder sb = new StringBuilder();
      Element lContent = lContents.first();

      collectImages(lContent, images);

      Elements lLightboxElements = lContent.getElementsByClass("lightbox");
      for (Element lLightboxElement : lLightboxElements) {
        Collection<Node> lImageNodes = extractImageNodes(lLightboxElement);

        Element lParent = lLightboxElement.parent();
        int i = lLightboxElement.siblingIndex();
        lParent.insertChildren(i, lImageNodes);
        lLightboxElement.remove();
      }

      Elements lChildElements = lContent.children();
      for (Element lChildElement : lChildElements) {
        if (lChildElement.hasClass("clear")) {
          // no more post content
          break;
        }

        if (title == null && lChildElement.tagName().equals("h1")) {
          // the first h1 header is the title
          title = lChildElement.html();
        } else {
          if (excerpt == null && lChildElement.tagName().equals("p")) {
            excerpt = lChildElement.text();
          }
          String lStr = lChildElement.toString();
          sb.append(lStr);
        }
      }

      content = sb.toString();

      Elements lDateElements = lContent.getElementsByClass("date");
      String lHunDate = lDateElements.first().html();
      date = new PostDate(lHunDate);
    } else {
      System.out.println("More than one content in main section of post page " + toString());
    }
  }
Пример #22
0
 public static String printNode(Element root, int indentation) {
   StringBuilder sb = new StringBuilder();
   for (int i = 0; i < indentation; i++) {
     sb.append(' ');
   }
   sb.append(root.tagName());
   sb.append(":");
   sb.append(root.ownText());
   sb.append("\n");
   for (Element el : root.children()) {
     sb.append(printNode(el, indentation + 1));
     sb.append("\n");
   }
   return sb.toString();
 }
Пример #23
0
        public int walk(Element el) {
          Elements children = el.children();
          String tagName = el.tagName().toLowerCase();

          if (tagName.matches("h[1-6]")) {
            secIndex++;
            String secName = el.text();
            String key =
                "SectionTitle" + "/" + title + "/" + hash + "/" + new Integer(secIndex).toString();
            String value = secNameFilter(secName);
            dataStore.put(key, value);
          }

          for (Element child : children) {
            secIndex = (new Walker(secIndex)).walk(child);
          }

          return secIndex;
        }
Пример #24
0
  private static void alterElement(Element e) {
    org.jsoup.select.Elements s = e.children();
    Iterator<Element> ele = s.iterator();
    int i = 0;
    while (ele.hasNext()) {
      Element r = ele.next();

      if (!r.tag().getName().equals("p")) {
        r.tagName("p"); // plain replace
        //				Element rtemp = r.clone();
        //				Element ep = new Element(Tag.valueOf("p"), "");
        //				ep.appendChild(rtemp);
        //				r.replaceWith(ep);
        //				StringBuffer bf = new StringBuffer();
        //				bf.append("<k>").append(r.toString()).append("</k>");
        //				r.html(bf.toString());
        //				System.out.println(r.tagName());

      }
      i++;
    }
  }
Пример #25
0
  @SuppressWarnings("unused")
  private void getHtml(String url, int groupPos) {
    Document doc;
    try {

      List<MissionItemData> missionsList = new ArrayList<>();
      // need http protocol
      doc = Jsoup.connect(url).userAgent("Mozilla").get();
      Elements elementsByClass = doc.getElementsByClass("asset-abstract");
      int id = 0;

      for (int i = 0; i < elementsByClass.size(); i++) {
        Element el = elementsByClass.get(i);
        if (el.children().hasClass("asset-more")) {

          Element inCon = el.getElementsByClass("asset-content").first().select("a").first();
          String name = inCon.text();
          String hrefLink = inCon.attr("href");

          Element imgEl =
              el.getElementsByClass("asset-abstract-imgLink").first().select("img").first();
          String imgLink = imgEl.attr("src");

          String content = el.html();

          MissionItemData itemMission = new MissionItemData(id, name, hrefLink, imgLink, content);

          missionsList.add(itemMission);

          id = id + 1;
        }
      }

      listDataChild.put(listDataHeader.get(groupPos).getName(), missionsList);

    } catch (IOException e) {
      e.printStackTrace();
    }
  }
Пример #26
0
    @Override
    protected void onPostExecute(Document document) {
      super.onPostExecute(document);
      story_pull_list.onRefreshComplete();
      File file = new File(Environment.getExternalStorageDirectory() + "/Latest_qbaobei.txt");
      try {
        if (!file.exists()) {
          file.createNewFile();
        }
        String str = document.toString();
        FileWriter writer = new FileWriter(file.getAbsolutePath());
        BufferedWriter bufferedWriter = new BufferedWriter(writer);
        bufferedWriter.write(str);
        bufferedWriter.close();
      } catch (FileNotFoundException e) {
        e.printStackTrace();
      } catch (IOException e) {
        e.printStackTrace();
      }
      if (document == null) {
        ToastAlone.show(R.string.load_fail_hint);
        return;
      }
      Element page = document.select("div.page").first();
      if (page != null) {
        Elements children = page.children();
        if (children.size() == 1 && "prev".equals(children.first().attr("class"))) {
          // 证明已经到了最后一页
          isLastPage = true;
          ToastAlone.show(R.string.load_all_data);
          Log.i("cxm", "the last one");
          return;
        }
      }
      progressbar.setVisibility(View.GONE);
      // 使用新的爬虫规则
      Elements div_elements = document.select("div.news-list-ul");
      if (div_elements != null) {
        Element div_fir = div_elements.first();
        if (div_fir != null) {
          Elements div_children = div_fir.children();
          if (div_children != null) {
            ArrayList<StoryBean> storyBeans = new ArrayList<StoryBean>();
            for (Element element : div_children) {
              StoryBean bean = new StoryBean();
              Element href_elem = element.select("a[href]").first();
              String href_str = href_elem.attr("href");
              Element img_elem = element.select("img[src]").first();
              String img_str = Constans.defualt_pic;
              if (img_elem != null) {
                img_str = img_elem.attr("src");
              }
              Element tit_element = element.select("p.tit").first();
              String tit_str = tit_element.text();
              LogUtil.v(tit_str + "---" + href_str + "---" + img_str);
              bean.setTitle(tit_str);
              bean.setImg(img_str);
              bean.setUrl(href_str);
              storyBeans.add(bean);
            }
            if (mLatestStart == 1) {
              mStoryAdapter.setData(storyBeans);
            } else {
              mStoryAdapter.addData(storyBeans);
            }
          }
        }
      } else {
        // 立马启动第二种解析方式
        Elements ul_elements = document.select("ul.index-ul");
        if (ul_elements != null) {
          Element ul_fir = ul_elements.first();
          Elements ul_children = ul_fir.children();
          if (ul_children != null) {
            ArrayList<StoryBean> storyBeans = new ArrayList<StoryBean>();
            for (Element child : ul_children) {
              StoryBean bean = new StoryBean();
              Element href_elem = child.select("a[href]").first();
              Element img_elem = child.select("img[src]").first();
              String title = href_elem.text();
              String content_url = href_elem.attr("href");
              String img_url = Constans.defualt_pic;
              if (img_elem != null) {
                img_url = img_elem.attr("src");
              }
              bean.setTitle(title);
              bean.setUrl(content_url);
              bean.setImg(img_url);
              storyBeans.add(bean);
            }
            if (mLatestStart == 1) {
              mStoryAdapter.setData(storyBeans);
            } else {
              mStoryAdapter.addData(storyBeans);
            }
          }
        }
      }

      /*Elements elements = document.select("[class]");
      for (Element element : elements) {
          if (element == null) {
              Log.v("cxm", "null");
          } else {
              String className = element.className();
              if ("index-ul".equals(className)) {

                  Elements elements1 = element.select("li");
                  Log.v("cxm", "size=" + elements1.size());
                  ArrayList<StoryBean> storyBeans = new ArrayList<StoryBean>();
                  for (Element child : elements1) {
                      StoryBean bean = new StoryBean();
                      Element href = child.select("[href]").first();
                      String name = href.text();
                      Element img = child.select("img[src]").first();
                      Log.w("cxm", "href=" + href.attr("href") + " ~~ name=" + name + "" +
                              " ~~ img=");
                      bean.setTitle(name);
                      bean.setmContentUrl(href.attr("href"));
                      if (null == img) {
                          Log.e("cxm", "img = null");
                          bean.setPicUrl("");
                      } else {
                          bean.setPicUrl(img.attr("src"));
                      }
                      storyBeans.add(bean);
                  }
                  if (mLatestStart == 1) {
                      mStoryAdapter.setData(storyBeans);
                  } else {
                      mStoryAdapter.addData(storyBeans);
                  }
              }
          }
      }*/
    }
Пример #27
0
  private void parseNotes() {
    Integer currentVoice;
    int duration = 0;
    Integer position;
    Integer lastDuration;
    Elements parts = this.doc.select("part");
    for (Element part : parts) {
      position = 0;
      lastDuration = 0;
      divisions = 1;
      for (Element thismeasure : part.getElementsByTag("measure")) {
        String measure = "0";
        if (!thismeasure.getElementsByTag("divisions").isEmpty()) {
          divisions = Integer.valueOf(thismeasure.getElementsByTag("divisions").text());
        }
        measure = thismeasure.attr("number");
        for (Element thisnote : thismeasure.children()) {
          if (thisnote.tagName().equals("note")) {
            Note note = new Note();
            if (!thisnote.getElementsByTag("voice").isEmpty()) {
              currentVoice = Integer.valueOf(thisnote.getElementsByTag("voice").text());
              note.setVoice(currentVoice);
            }
            // get the pitch
            if (!thisnote.getElementsByTag("pitch").isEmpty()) {
              for (Element thispitch : thisnote.getElementsByTag("pitch")) {
                String step = thispitch.getElementsByTag("step").text();
                int pitch = getPitchFromStep(step);
                String octave =
                    thispitch
                        .getElementsByTag("octave")
                        .text()
                        .replaceAll("^\\s+|\\s+$|\\s*(\n)\\s*|(\\s)\\s*", "$1$2");
                int octaveInt = Integer.parseInt(octave);
                note.setOctave(octaveInt);
                String alter = String.valueOf(thispitch.getElementsByTag("alter").text());
                int alterValue = 0;
                if (!thispitch.getElementsByTag("alter").isEmpty()) {
                  if (alter.equals("1")) {
                    alterValue = 1;
                  } else if (alter.equals("-1")) {
                    alterValue = -1;
                  } else if (alter.equals("2")) {
                    alterValue = 2;
                  } else if (alter.equals("-2")) {
                    alterValue = -2;
                  }
                }
                int pitchClass = pitch + alterValue;
                note.setPitchClass(pitchClass);
                note.setPitch(pitchClass + 12 * octaveInt);
              }

            } else {
              note.setPitch(cp.model.note.Note.REST);
            }
            if (thisnote.getElementsByTag("time-modification").isEmpty()) {
              switch (thisnote.getElementsByTag("type").text()) {
                case "16th":
                  duration = DurationConstants.SIXTEENTH;
                  if (!thisnote.getElementsByTag("dot").isEmpty()) {
                    // TODO
                  }
                  break;
                case "eighth":
                  duration = DurationConstants.EIGHT;
                  if (!thisnote.getElementsByTag("dot").isEmpty()) {
                    duration = DurationConstants.EIGHT + DurationConstants.SIXTEENTH;
                  }
                  break;
                case "quarter":
                  duration = DurationConstants.QUARTER;
                  if (!thisnote.getElementsByTag("dot").isEmpty()) {
                    duration = DurationConstants.QUARTER + DurationConstants.EIGHT;
                  }
                  break;
                case "half":
                  duration = DurationConstants.HALF;
                  if (!thisnote.getElementsByTag("dot").isEmpty()) {
                    duration = DurationConstants.HALF + DurationConstants.QUARTER;
                  }
                  break;
                case "whole":
                  duration = DurationConstants.WHOLE;
                  if (!thisnote.getElementsByTag("dot").isEmpty()) {
                    duration = DurationConstants.WHOLE + DurationConstants.HALF;
                  }
                  break;
                default:
                  break;
              }
            } else {
              if (thisnote.getElementsByTag("actual-notes").text().equals("3")) {
                note.setTriplet(true);
                note.setTimeModification(thisnote.getElementsByTag("normal-type").text());
                switch (thisnote.getElementsByTag("type").text()) {
                  case "16th":
                    duration = DurationConstants.SIXTEENTH_TRIPLET;
                    break;
                  case "eighth":
                    duration = DurationConstants.EIGHT_TRIPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration =
                          DurationConstants.EIGHT_TRIPLET + DurationConstants.SIXTEENTH_TRIPLET;
                    }
                    break;
                  case "quarter":
                    duration = DurationConstants.QUARTER_TRIPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration =
                          DurationConstants.QUARTER_TRIPLET + DurationConstants.EIGHT_TRIPLET;
                    }
                    break;
                  case "half":
                    duration = DurationConstants.HALF_TRIPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration = DurationConstants.HALF_TRIPLET + DurationConstants.QUARTER_TRIPLET;
                    }
                    break;
                  default:
                    break;
                }
              } else if (thisnote.getElementsByTag("actual-notes").text().equals("6")) {
                note.setSextuplet(true);
                note.setTimeModification(thisnote.getElementsByTag("normal-type").text());
                switch (thisnote.getElementsByTag("type").text()) {
                  case "16th":
                    duration = DurationConstants.SIXTEENTH_TRIPLET;
                    break;
                  case "eighth":
                    duration = DurationConstants.EIGHT_TRIPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration =
                          DurationConstants.EIGHT_TRIPLET + DurationConstants.SIXTEENTH_TRIPLET;
                    }
                    break;
                  case "quarter":
                    duration = DurationConstants.QUARTER_TRIPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration =
                          DurationConstants.QUARTER_TRIPLET + DurationConstants.EIGHT_TRIPLET;
                    }
                    break;
                  case "half":
                    duration = DurationConstants.HALF_TRIPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration = DurationConstants.HALF_TRIPLET + DurationConstants.QUARTER_TRIPLET;
                    }
                    break;
                  default:
                    break;
                }
              } else if (thisnote.getElementsByTag("actual-notes").text().equals("5")) {
                note.setQuintuplet(true);
                note.setTimeModification(thisnote.getElementsByTag("normal-type").text());
                switch (thisnote.getElementsByTag("type").text()) {
                  case "16th":
                    duration = DurationConstants.SIXTEENTH_QUINTUPLET;
                    break;
                  case "eighth":
                    duration = DurationConstants.EIGHT_QUINTUPLET;
                    if (!thisnote.getElementsByTag("dot").isEmpty()) {
                      duration =
                          DurationConstants.EIGHT_QUINTUPLET
                              + DurationConstants.SIXTEENTH_QUINTUPLET;
                    }
                    break;
                  default:
                    break;
                }
              }
            }

            //						duration = Integer.valueOf(thisnote.getElementsByTag("duration").text()); // *
            //																									// divMultiplier.get(divisions);
            //						duration =  duration * Note.DEFAULT_LENGTH / MusicXMLWriter.DIVISIONS;

            note.setLength(duration);
            note.setDisplayLength(duration);

            // now check if it is a chord
            if (!thisnote.getElementsByTag("chord").isEmpty()) {
              //							note.setStartTime(position);
              // retract previous duration
              note.setPosition(position - lastDuration);

            } else {
              // increment start time of the current voice
              // System.out.print(" start: " + position);
              note.setPosition(position);
              position = position + duration;
            }

            lastDuration = duration;
            note.setInstrument(thisnote.getElementsByTag("instrument").attr("id"));

            if (!thisnote.getElementsByTag("tied").isEmpty()) {
              if (thisnote.getElementsByTag("tied").attr("type").equals("start")) {
                note.setTieStart(true);
              }
              if (thisnote.getElementsByTag("tied").attr("type").equals("stop")) {
                note.setTieEnd(true);
              }
            }

            if (!thisnote.getElementsByTag("tuplet").isEmpty()) {
              Element tuplet = thisnote.getElementsByTag("tuplet").first();
              if (tuplet.attr("type").equals("start")) {
                note.setTupletType(TupletType.START);
              }
              if (tuplet.attr("type").equals("stop")) {
                note.setTupletType(TupletType.STOP);
              }
              if (tuplet.attr("bracket").equals("yes")) {
                note.setBracket(true);
              }
            }

            if (!thisnote.getElementsByTag("beam").isEmpty()) {
              if (thisnote.getElementsByTag("beam").size() == 1) {
                if ("begin".equals(thisnote.getElementsByTag("beam").text())) {
                  note.setBeamType(BeamType.BEGIN);
                } else if ("continue".equals(thisnote.getElementsByTag("beam").text())) {
                  note.setBeamType(BeamType.CONTINUE);
                } else if ("end".equals(thisnote.getElementsByTag("beam").text())) {
                  note.setBeamType(BeamType.END);
                }
              }
              if (thisnote.getElementsByTag("beam").size() == 2) {
                Element firsBeam = thisnote.getElementsByTag("beam").get(0);
                Element secondBeam = thisnote.getElementsByTag("beam").get(1);
                if ("begin".equals(firsBeam.text()) && "begin".equals(secondBeam.text())) {
                  note.setBeamType(BeamType.BEGIN_BEGIN);
                } else if ("continue".equals(firsBeam.text())
                    && "continue".equals(secondBeam.text())) {
                  note.setBeamType(BeamType.CONTINUE_CONTINUE);
                } else if ("end".equals(firsBeam.text()) && "end".equals(secondBeam.text())) {
                  note.setBeamType(BeamType.END_END);
                } else if ("continue".equals(firsBeam.text())
                    && "begin".equals(secondBeam.text())) {
                  note.setBeamType(BeamType.CONTINUE_BEGIN);
                } else if ("continue".equals(secondBeam.text())
                    && "end".equals(secondBeam.text())) {
                  note.setBeamType(BeamType.CONTINUE_END);
                }
              }
            }

            notes.add(note);

          } else if (thisnote.tagName().equals("forward")) {
            position = position + Integer.valueOf(thisnote.getElementsByTag("duration").text());
            // * divMultiplier.get(divisions);

          } else if (thisnote.tagName().equals("backup")) {
            // System.out.println("BACKUP" +
            // Integer.valueOf(thisnote.getElementsByTag("duration").text())
            // * divMultiplier.get(divisions));
            position =
                position - Integer.valueOf(thisnote.getElementsByTag("duration").text()); // *
            // divMultiplier.get(divisions);
          }
        }
      }
    }
  }
Пример #28
0
  private ArrayList<HashMap<String, ArrayList<String>>> fetchResult(String url) {
    ArrayList<HashMap<String, ArrayList<String>>> results =
        new ArrayList<HashMap<String, ArrayList<String>>>();
    try {
      Document doc = Jsoup.connect(url).get();
      Elements infos = doc.select("div.result");
      Elements pagingLinks = doc.select("div.paging").select("a");
      for (Element info : infos) {
        HashMap<String, ArrayList<String>> res = new HashMap<String, ArrayList<String>>();
        ArrayList<String> names = new ArrayList<String>();
        ArrayList<String> address = new ArrayList<String>();
        ArrayList<String> phoneNrs = new ArrayList<String>();
        ArrayList<String> titles = new ArrayList<String>();

        Elements nm = info.select("span.cut");
        if (nm.size() == 0) {
          // Businesses have have links not spans
          nm = info.select("a.cut");
        }

        for (Element name : nm) {
          names.add(name.text());
          if (name.children().size() > 0) {
            Element theTitle = name.child(0);
            if (theTitle != null) {
              titles.add(theTitle.text());
            }
          }
        }

        Elements adrs = info.select("a.addressinfo");
        for (Element adr : adrs) {
          address.add(adr.text());
        }

        Elements pNrs = info.select("a.phone");
        for (Element phoneNr : pNrs) {
          phoneNrs.add(phoneNr.text());
        }
        res.put(Keys.KEY_NAMES, names);
        res.put(Keys.KEY_ADDRESSES, address);
        res.put(Keys.KEY_PHONE_NUMBERS, phoneNrs);
        res.put(Keys.KEY_TITLES, titles);
        results.add(res);
      }
      if (!pagingLinks.isEmpty()) {
        Element lastUrl = pagingLinks.last();
        String linkText = lastUrl.text().replaceAll("\\s", "").toLowerCase();
        if (linkText.equals(Keys.KEY_MORE_RESULTS)) {
          mNextUrl = BASE_URL + lastUrl.attr("href").replace(" ", "+");
        } else {
          mNextUrl = null;
        }
      } else {
        mNextUrl = null;
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      return results;
    }
  }
Пример #29
0
  public void extractReferences(Document htmlDoc) {
    Elements references = htmlDoc.select(ContentXPath.REFERENCE.path);
    Element firstAuthorSNM,
        firstAuthorFNM,
        authors,
        citationAuthorsList,
        source,
        volume,
        fpage,
        lpage,
        date,
        citeComplete,
        medline,
        titleElement,
        publisherName,
        publisherLocation,
        referenceUnstructured;
    Elements citationAuthorsEntries, authorElements;
    Document referenceHtml, authorsHtml;
    Reference refInfo;
    String authorsList,
        firstPage,
        lastPage,
        completePages,
        title,
        completeCitation,
        citeNodeText,
        medlineLink,
        authorClass,
        firstName,
        lastName;

    if (references != null) {
      for (Element reference : references) {
        refInfo = new Reference();

        //				doi
        refInfo.setDoi(reference.attr(ContentXPath.REFERENCE_DOI_ATTR.path));

        //				parse content of reference
        referenceHtml = HtmlDocumentUtil.getHtmlDocumentFromString(reference.html());

        //				check if reference is unstructured
        referenceUnstructured =
            referenceHtml.select(ContentXPath.REFERENCE_UNSTRUCTURED.path).first();
        if (referenceUnstructured != null) {
          //					TODO: handle unstructured Data
        } else {

          //					authors
          authorsList = "";
          citationAuthorsList = referenceHtml.select(ContentXPath.AUTHORS2_ROOT.path).first();
          if (citationAuthorsList != null) {
            //						citation version 2
            authorsHtml = HtmlDocumentUtil.getHtmlDocumentFromString(citationAuthorsList.html());
            citationAuthorsEntries = authorsHtml.select(ContentXPath.AUTHORS2_ENTRY.path);
            for (Element authorEntry : citationAuthorsEntries) {
              authorElements = authorEntry.children();
              lastName = "";
              firstName = "";
              for (Element authorElement : authorElements) {
                authorClass = authorElement.attr("class");
                if (authorClass.contains(ContentXPath.AUTHORS2_SURNAME.path))
                  lastName = authorElement.text();
                else if (authorClass.contains(ContentXPath.AUTHORS2_FIRSTNAME.path))
                  firstName = authorElement.text();
              }

              refInfo.addAuthor(refInfo.new Author(lastName, firstName));
            }
          } else {
            //						citation version 1
            firstAuthorSNM = referenceHtml.select(ContentXPath.FIRST_AUTHOR_SURNAME.path).first();
            firstAuthorFNM = referenceHtml.select(ContentXPath.FIRST_AUTHOR_FIRSTNAME.path).first();

            refInfo.addAuthor(refInfo.new Author(firstAuthorSNM.text(), firstAuthorFNM.text()));

            authors = referenceHtml.select(ContentXPath.AUTHORS.path).first();
            authorsList = "";
            if (authors != null) {
              authorsList = authors.text();
              addAuthorsToReference(authorsList, refInfo);
            }
          }

          //					reference source/journal
          source = referenceHtml.select(ContentXPath.CITE_SOURCE.path).first();
          if (source != null) {
            refInfo.setSource(source.text());
          } else {
            source = referenceHtml.select(ContentXPath.CITE_SOURCE_JNL.path).first();
            if (source != null) {
              refInfo.setSource(source.text());
            }
          }

          //					reference volume
          volume = referenceHtml.select(ContentXPath.CITE_VOLUME.path).first();
          if (volume != null) refInfo.setVolume(volume.text());

          //					reference date
          date = referenceHtml.select(ContentXPath.CITE_DATE.path).first();
          if (date != null) refInfo.setDate(date.text());

          //					complete citation
          citeComplete = referenceHtml.select(ContentXPath.CITE_COMPLETE.path).first();
          completeCitation = citeComplete.text();
          refInfo.setCompleteCitation(completeCitation);

          //					reference title
          titleElement = referenceHtml.select(ContentXPath.CITE_TITLE.path).first();
          if (titleElement != null) {
            title = titleElement.text();
            refInfo.setTitle(title);
          } else {
            citeNodeText = citeComplete.ownText();
            title = extractTitleFromCitation(citeNodeText);
            refInfo.setTitle(title);
          }

          //					reference first page
          fpage = referenceHtml.select(ContentXPath.CITE_FPAGE.path).first();
          firstPage = "";
          if (fpage != null) firstPage = fpage.text();

          //					reference last page
          lpage = referenceHtml.select(ContentXPath.CITE_LPAGE.path).first();
          if (lpage != null) {
            lastPage = lpage.text();
            completePages = firstPage + "-" + lastPage;
            refInfo.setPages(completePages);
          } else if (fpage != null) {
            completePages = extractCompletePagesFromCitation(completeCitation, firstPage);
            refInfo.setPages(completePages);
          }

          //					pmid from medline link (if available)
          medline = referenceHtml.select(ContentXPath.MEDLINE_LINK.path).first();
          if (medline != null) {
            medlineLink = medline.attr(ContentXPath.MEDLINE_LINK_ATTR.path);
            refInfo.setPmid(extractPMIDFromMedlineLink(medlineLink));
          }

          //					publisherName
          publisherName = referenceHtml.select(ContentXPath.PUBL_NAME.path).first();
          if (publisherName != null) refInfo.setPublisherName(publisherName.text());

          //					publisherLocation
          publisherLocation = referenceHtml.select(ContentXPath.PUBL_LOC.path).first();
          if (publisherLocation != null) refInfo.setPublisherLocation(publisherLocation.text());
        }

        //				System.out.print("doi: " + refInfo.getDoi() + "; authors: ");
        //				for(Author author: refInfo.getAuthors()){
        //					System.out.print(author.getLastName() + ", " + author.getFirstName() + "; ");
        //				}
        //				System.out.print("source: " + refInfo.getSource() + "; volume: " + refInfo.getVolume()
        // + "; date: " + refInfo.getDate() + "; pages: " + refInfo.getPages() + "; title: " +
        // title);
        //				System.out.println();
        //				System.out.println("pages: " + refInfo.getPages());
        //				System.out.println("pmid: " + refInfo.getPmid());
        //				System.out.println("citeNode: " + citeNodeText);
        //				System.out.println("title: " + refInfo.getTitle());
        //				System.out.println("publisher: " + refInfo.getPublisherName() + "; loc: " +
        // refInfo.getPublisherLocation());
      }
    }
  }
Пример #30
0
  static void parseCopies(DetailledItem res, Document doc, JSONObject data) throws JSONException {
    if ("doublestacked".equals(data.optString("copystyle"))) {
      // e.g. http://vopac.nlg.gr/Record/393668/Holdings#tabnav
      // for Athens_GreekNationalLibrary
      Element container = doc.select(".tab-container").first();
      String branch = "";
      for (Element child : container.children()) {
        if (child.tagName().equals("h5")) {
          branch = child.text();
        } else if (child.tagName().equals("table")) {
          int i = 0;
          String callNumber = "";
          for (Element row : child.select("tr")) {
            if (i == 0) {
              callNumber = row.child(1).text();
            } else {
              Copy copy = new Copy();
              copy.setBranch(branch);
              copy.setShelfmark(callNumber);
              copy.setBarcode(row.child(0).text());
              copy.setStatus(row.child(1).text());
              res.addCopy(copy);
            }
            i++;
          }
        }
      }

    } else if ("stackedtable".equals(data.optString("copystyle"))) {
      // e.g. http://search.lib.auth.gr/Record/376356
      // or https://katalog.ub.uni-leipzig.de/Record/0000196115
      // or https://www.stadt-muenster.de/opac2/Record/0367968
      Element container = doc.select(".recordsubcontent, .tab-container").first();
      // .tab-container is used in Muenster.
      String branch = "";
      JSONObject copytable = data.getJSONObject("copytable");
      for (Element child : container.children()) {
        if (child.tagName().equals("div")) {
          child = child.child(0);
        }
        if (child.tagName().equals("h3")) {
          branch = child.text();
        } else if (child.tagName().equals("table")) {
          if (child.select("caption").size() > 0) {
            // Leipzig_Uni
            branch = child.select("caption").first().ownText();
          }
          int i = 0;
          String callNumber = null;
          if ("headrow".equals(copytable.optString("signature"))) {
            callNumber = child.select("tr").get(0).child(1).text();
          }
          for (Element row : child.select("tr")) {
            if (i < copytable.optInt("_offset", 0)) {
              i++;
              continue;
            }
            Copy copy = new Copy();
            if (callNumber != null) {
              copy.setShelfmark(callNumber);
            }
            copy.setBranch(branch);
            Iterator<?> keys = copytable.keys();
            while (keys.hasNext()) {
              String key = (String) keys.next();
              if (key.startsWith("_")) continue;
              if (copytable.optString(key, "").contains("/")) {
                // Leipzig_Uni
                String[] splitted = copytable.getString(key).split("/");
                int col = Integer.parseInt(splitted[0]);
                int line = Integer.parseInt(splitted[1]);
                int j = 0;
                for (Node node : row.child(col).childNodes()) {
                  if (node instanceof Element) {
                    if (((Element) node).tagName().equals("br")) {
                      j++;
                    } else if (j == line) {
                      copy.set(key, ((Element) node).text());
                    }
                  } else if (node instanceof TextNode
                      && j == line
                      && !((TextNode) node).text().trim().equals("")) {
                    copy.set(key, ((TextNode) node).text());
                  }
                }
              } else {
                // Thessaloniki_University
                if (copytable.optInt(key, -1) == -1) continue;
                String value = row.child(copytable.getInt(key)).text();
                copy.set(key, value);
              }
            }
            res.addCopy(copy);
            i++;
          }
        }
      }
    }
  }