public List parsePage(String pageCode) { List sections = new ArrayList(); List folders = new ArrayList(); List files = new ArrayList(); int start = pageCode.indexOf("<div id=\"list-view\" class=\"view\""); int end = pageCode.indexOf("<div id=\"gallery-view\" class=\"view\""); String usefulSection = ""; if (start != -1 && end != -1) { usefulSection = pageCode.substring(start, end); } else { debug("Could not parse page"); } try { DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); InputSource is = new InputSource(); is.setCharacterStream(new StringReader(usefulSection)); Document doc = db.parse(is); NodeList divs = doc.getElementsByTagName("div"); for (int i = 0; i < divs.getLength(); i++) { Element div = (Element) divs.item(i); boolean isFolder = false; if (div.getAttribute("class").equals("filename")) { NodeList imgs = div.getElementsByTagName("img"); for (int j = 0; j < imgs.getLength(); j++) { Element img = (Element) imgs.item(j); if (img.getAttribute("class").indexOf("folder") > 0) { isFolder = true; } else { isFolder = false; // it's a file } } NodeList anchors = div.getElementsByTagName("a"); Element anchor = (Element) anchors.item(0); String attr = anchor.getAttribute("href"); String fileName = anchor.getAttribute("title"); String fileURL; if (isFolder && !attr.equals("#")) { folders.add(attr); folders.add(fileName); } else if (!isFolder && !attr.equals("#")) { // Dropbox uses ajax to get the file for download, so the url isn't enough. We must be // sneaky here. fileURL = "https://dl.dropbox.com" + attr.substring(23) + "?dl=1"; files.add(fileURL); files.add(fileName); } } } } catch (Exception e) { debug(e.toString()); } sections.add(files); sections.add(folders); return sections; }
public String getFolderName(String pageCode) { String usefulSection = pageCode.substring( pageCode.indexOf("<h3 id=\"breadcrumb\">"), pageCode.indexOf("<div id=\"list-view\" class=\"view\"")); String folderName; try { DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); InputSource is = new InputSource(); is.setCharacterStream(new StringReader(usefulSection)); Document doc = db.parse(is); NodeList divs = doc.getElementsByTagName("h3"); for (int i = 0; i < divs.getLength(); i++) { Element div = (Element) divs.item(i); String a = div.getTextContent(); folderName = a.substring(a.indexOf("/>") + 2).trim(); return folderName; } } catch (Exception e) { debug(e.toString()); } return "Error!"; }