Exemplo n.º 1
0
  public static Boolean updateArtists(StaticDataStore db) {

    ArrayList<ArrayList<String>> artists = new ArrayList<ArrayList<String>>();
    int numArtists;

    HtmlCleaner pageParser = new HtmlCleaner();
    CleanerProperties props = pageParser.getProperties();
    props.setAllowHtmlInsideAttributes(true);
    props.setAllowMultiWordAttributes(true);
    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);

    try {
      String url =
          "http://www.archive.org/browse.php?field=/metadata/bandWithMP3s&collection=etree";

      HttpParams params = new BasicHttpParams();
      int timeout = (int) (15 * DateUtils.SECOND_IN_MILLIS);
      HttpConnectionParams.setConnectionTimeout(params, timeout);
      HttpConnectionParams.setSoTimeout(params, timeout);
      HttpClient client = new DefaultHttpClient(params);

      HttpGet request = new HttpGet(url);
      HttpResponse response = client.execute(request);
      StatusLine status = response.getStatusLine();
      if (status.getStatusCode() == HttpStatus.SC_OK) {
        ResponseHandler<String> responseHandler = new BasicResponseHandler();
        TagNode node = pageParser.clean(responseHandler.handleResponse(response));
        client.getConnectionManager().shutdown();
        // XPATH to get the nodes that we Want.
        Object[] artistsNodes = node.evaluateXPath("//tr[@valign='top']//li");

        numArtists = artistsNodes.length;

        for (int i = 0; i < numArtists; i++) {

          // Cast the artistNode as a TagNode.
          TagNode artist = ((TagNode) artistsNodes[i]);
          // Grab the first child node, which is the link to the artist's page.
          // The inner HTML of this node will be the title.
          TagNode artistTitleSubNode = artist.getChildTags()[0];
          // Remove the child node, so that the inner HTML of the artistNode
          // only contains the number of shows that the artist has.
          artist.removeChild(artistTitleSubNode);
          String artistTitle = pageParser.getInnerHtml(artistTitleSubNode);

          if (artistTitle != null) {
            ArrayList<String> artistPair = new ArrayList<String>();
            artistPair.add(
                artistTitle
                    .replace("&apos;", "'")
                    .replace("&gt;", ">")
                    .replace("&lt;", "<")
                    .replace("&quot;", "\"")
                    .replace("&amp;", "&"));
            artistPair.add(pageParser.getInnerHtml(artist).trim());
            /*
             * VibeVault.db.addArtist(artistTitle, pageParser
             * .getInnerHtml(artist).trim());
             */
            artists.add(artistPair);
          }
        }
        if (artists.size() > 0) {
          db.insertArtistBulk(artists);
          String s = DateFormat.format("yyyy-MM-dd", new GregorianCalendar().getTime()).toString();
          db.updatePref("artistUpdate", s);

        } else {

        }
      } else {
        client.getConnectionManager().shutdown();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return true;
  }