예제 #1
0
  public static void main(String[] args) throws Exception {
    try {
      HtmlCleaner cleaner = new HtmlCleaner();
      nameList = new ArrayList<String>();
      URL url =
          new URL(
              "http://apps.wandoujia.com/apps/com.eg.android.AlipayGphone/versions?pos=w/popup");
      TagNode node = cleaner.clean(url);
      Object[] tags =
          node.evaluateXPath("/body/div//div[@class='version-block']/div[position()<4]");
      int i = 1;
      for (Object tag : tags) {
        // System.out.println(((TagNode)tagSize).getText()+"");
        Object[] tagVersion =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div["
                    + i
                    + "]//i[@itemprop='softwareVersion']");
        String app_verison = ((TagNode) tagVersion[0]).getText() + "";
        System.out.println(((TagNode) tagVersion[0]).getText() + "");

        Object[] tagVersionCode =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div["
                    + i
                    + "]//span[@class='version-code']");
        String app_versioncode = ((TagNode) tagVersionCode[0]).getText() + "";
        System.out.println(((TagNode) tagVersionCode[0]).getText() + "");
        Object[] tagFileSize =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div[" + i + "]//span[@class='apk-size']");
        String app_size = ((TagNode) tagFileSize[0]).getText() + "";
        System.out.println(((TagNode) tagFileSize[0]).getText() + "");
        Object[] tagDownload =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div[" + i + "]//a[@download]");
        System.out.println(((TagNode) tagDownload[0]).getAttributeByName("href"));
        String app_url = ((TagNode) tagDownload[0]).getAttributeByName("href");
        String app_name = ((TagNode) tagDownload[0]).getAttributeByName("download");
        i++;

        // ***写入数据库 明天写***

      }
    } catch (Exception exception) {
      exception.printStackTrace();
    }
  }
예제 #2
0
  private void processSelectSource(TagNode formNode, FormFlow formFlow)
      throws XPatherException, ResourceLoaderException {
    Object[] dynamicSelectNodes =
        formNode.evaluateXPath("//select[@" + Constants.SELECT_SOURCE_ATTR + "]");
    for (Object dynamicSelectNodeO : dynamicSelectNodes) {
      TagNode dynamicSelectNode = (TagNode) dynamicSelectNodeO;
      String name = dynamicSelectNode.getAttributeByName(Constants.NAME_ATTR);
      String source = dynamicSelectNode.getAttributeByName(Constants.SELECT_SOURCE_ATTR);
      source = formFlow.resolveResourcePathIfRelative(source);
      String preselectFirstOption =
          dynamicSelectNode.getAttributeByName(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR);
      dynamicSelectNode.removeAttribute(Constants.SELECT_SOURCE_ATTR);
      dynamicSelectNode.removeAttribute(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR);
      logger.debug("Found dynamicSelectNode name:{}, source:{}", name, source);

      List<SelectOptionPojo> options = selectOptionHelper.loadOptions(source);
      if (!"true".equals(preselectFirstOption)) {
        options.add(0, new SelectOptionPojo("-- Please Select --", ""));
      }
      for (SelectOptionPojo selectOptionPojo : options) {
        TagNode optionNode = new TagNode("option");
        String value = selectOptionPojo.getValue();
        if (value != null) {
          optionNode.setAttribute("value", value);
        }
        optionNode.addChild(new ContentNode(selectOptionPojo.getText()));
        dynamicSelectNode.addChild(optionNode);
      }
    } // TODO: validate that submitted value comes from the list
  }
예제 #3
0
 @Override
 public List<String> selectList(String text) {
   HtmlCleaner htmlCleaner = new HtmlCleaner();
   TagNode tagNode = htmlCleaner.clean(text);
   if (tagNode == null) {
     return null;
   }
   List<String> results = new ArrayList<String>();
   try {
     Object[] objects = tagNode.evaluateXPath(xpathStr);
     if (objects != null && objects.length >= 1) {
       for (Object object : objects) {
         if (object instanceof TagNode) {
           TagNode tagNode1 = (TagNode) object;
           results.add(htmlCleaner.getInnerHtml(tagNode1));
         } else {
           results.add(object.toString());
         }
       }
     }
   } catch (XPatherException e) {
     e.printStackTrace();
   }
   return results;
 }
  protected void getFileAttache(
      SimpleHtmlSerializer htmlSerializer, TagNode pNode, KnouNoticeInfo knouNoticeInfo) {

    String expressionContent = "//div[@class=\"MultiFile-list\"]";
    Object[] myNodeBody = null;
    try {
      myNodeBody = pNode.evaluateXPath(expressionContent);
    } catch (XPatherException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    if (myNodeBody.length <= 0) {
      return;
    }
    TagNode tmpNode = (TagNode) myNodeBody[0];
    TagNode[] nl = tmpNode.getChildTags();
    // Log.d("HAN", "nl.length:" + nl.length);
    KnouNoticeFileInfo attacheFileInfo = null;
    for (int i = 0; i < nl.length; i++) { // 0번은 필요없는거 지움
      // Log.d("HAN", "nl[i].getName():" + nl[i].getName());
      // Log.d("HAN", "nl[i].getText():" + nl[i].getText());
      attacheFileInfo = new KnouNoticeFileInfo();
      if (nl[i].getName().trim().equals("a")) {
        String href = nl[i].getAttributeByName("href");

        attacheFileInfo.href = "http://ep.knou.ac.kr" + href;
        attacheFileInfo.fileName = nl[i].getText().toString();
        knouNoticeInfo.AttacheFile.add(attacheFileInfo);
      }
    }
  }
 public String GetInnerTextByXpath(String html, String xpath) throws Exception {
   HtmlCleaner hc = new HtmlCleaner();
   TagNode tn = hc.clean(html);
   Object[] objarr = null;
   objarr = tn.evaluateXPath(xpath);
   TagNode newNode = (TagNode) objarr[0];
   return newNode.getText().toString();
 }
예제 #6
0
 public static ArrayList<String> dobisliko(TagNode node, String XPathExpression) {
   TagNode description_node = null;
   ArrayList<String> Temp = new ArrayList<String>();
   NodeList nodes;
   try {
     //			description_node = (TagNode) node.evaluateXPath(XPathExpression)[0];
     for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) {
       description_node = (TagNode) node.evaluateXPath(XPathExpression)[x];
       //
       //	System.out.println("http://www.krka.si"+description_node.getAttributeByName("src")+"\n"+"---------------------------------------");
       Temp.add("http://www.krka.si" + description_node.getAttributeByName("src").toString());
     }
   } catch (XPatherException e) {
     e.printStackTrace();
   }
   return Temp;
   //
   //	System.out.println(description_node.getText()+"\n"+"---------------------------------------");
 }
예제 #7
0
  public static ArrayList<String> dobi_opis(TagNode node, String XPathExpression) {
    ArrayList<String> Temp = new ArrayList<String>();
    TagNode description_node = null;
    NodeList nodes;
    try {

      //			description_node = (TagNode) node.evaluateXPath(XPathExpression)[0];
      for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) {
        description_node = (TagNode) node.evaluateXPath(XPathExpression)[x];
        //
        //	System.out.println(description_node.getText()+"\n"+"---------------------------------------");
        Temp.add(description_node.getText().toString());
      }
    } catch (XPatherException e) {
      e.printStackTrace();
    }
    return Temp;
    //
    //	System.out.println(description_node.getText()+"\n"+"---------------------------------------");
  }
예제 #8
0
 public static void main(String[] args) throws Exception {
   try {
     HtmlCleaner cleaner = new HtmlCleaner();
     //			cleaner.clean(new File("s"));
     URL url = new URL("http://www.baidu.com");
     TagNode node = cleaner.clean(url, "utf-8");
     node.Object[] tagNodes = node.evaluateXPath("//p[@id='nv']/a");
     for (Object tagNode : tagNodes) {
       System.out.println(((TagNode) tagNode).getText());
       System.out.println(((TagNode) tagNode).getAttributeByName("href"));
     }
   } catch (Exception exception) {
     exception.printStackTrace();
   }
 }
예제 #9
0
  private void processInputSourceFields(TagNode formNode, String currentPath, FormFlow formFlow)
      throws XPatherException {
    Object[] autoCompleteNodes =
        formNode.evaluateXPath("//input[@" + Constants.SELECT_SOURCE_ATTR + "]");
    for (Object autoCompleteNodeO : autoCompleteNodes) {
      TagNode autoCompleteNode = (TagNode) autoCompleteNodeO;
      String fieldName = autoCompleteNode.getAttributeByName(Constants.NAME_ATTR);
      String source = autoCompleteNode.getAttributeByName(Constants.INPUT_SOURCE_ATTR);

      FieldSourceProxy fieldSourceProxy =
          proxyFactory.createFlowProxy(currentPath, fieldName, source);
      formFlow.addFieldSourceProxy(fieldSourceProxy);
      autoCompleteNode.removeAttribute(Constants.INPUT_SOURCE_ATTR);
      autoCompleteNode.setAttribute(
          "rf.source", "rhinoforms/proxy/" + fieldSourceProxy.getProxyPath());
    }
  }
  public void parseScoreSheet(String id) throws XPatherException, ParseException {
    ScoreSheetEntity scoreSheet = em.find(ScoreSheetEntity.class, id);

    TagNode html = cleaner.clean(scoreSheet.getContent());

    // Race ------------------------------------------
    String name = ((TagNode) html.evaluateXPath("//body//h1")[0]).getText().toString();
    RaceEntity r = new RaceEntity();
    r.setName(name);
    raceEntity = (RaceEntity) checkPossibleMatches(r, RaceEntity.class);

    // RaceVolume ------------------------------------------
    RaceVolumeEntity rv = new RaceVolumeEntity();

    String dateStr =
        ((TagNode) html.evaluateXPath("//body//div[@class='date']")[0]).getText().toString();
    Date d = new SimpleDateFormat("dd. MM. yyyy").parse(dateStr);
    rv.setDate(d);

    rv.setRace(raceEntity);

    String vol =
        ((TagNode) html.evaluateXPath("//body//div[@class='volume']")[0]).getText().toString();
    vol = vol.substring(0, vol.indexOf("."));
    rv.setVolume(Integer.valueOf(vol));
    raceVolume = (RaceVolumeEntity) checkPossibleMatches(rv, RaceVolumeEntity.class);

    man = true;
    TagNode menDiv = ((TagNode) html.evaluateXPath("//body//div[@id='men']")[0]);
    Object[] cats = menDiv.evaluateXPath("//table");
    for (int i = 0; i < cats.length; i++) {
      TagNode c = (TagNode) cats[i];
      processCategory(c);
    }

    man = false;
    TagNode womenDiv = ((TagNode) html.evaluateXPath("//body//div[@id='women']")[0]);
    cats = womenDiv.evaluateXPath("//table");
    for (int i = 0; i < cats.length; i++) {
      TagNode c = (TagNode) cats[i];
      processCategory(c);
    }
  }
예제 #11
0
 @Override
 public String select(String text) {
   HtmlCleaner htmlCleaner = new HtmlCleaner();
   TagNode tagNode = htmlCleaner.clean(text);
   if (tagNode == null) {
     return null;
   }
   try {
     Object[] objects = tagNode.evaluateXPath(xpathStr);
     if (objects != null && objects.length >= 1) {
       if (objects[0] instanceof TagNode) {
         TagNode tagNode1 = (TagNode) objects[0];
         return htmlCleaner.getInnerHtml(tagNode1);
       } else {
         return objects[0].toString();
       }
     }
   } catch (XPatherException e) {
     e.printStackTrace();
   }
   return null;
 }
예제 #12
0
  public List<InstitutionDataItem> getData(int year)
      throws MalformedURLException, IOException, XPatherException {
    String url = String.format(URL_PATTERN, year);

    System.out.println("reading from " + url);

    ArrayList<InstitutionDataItem> data = new ArrayList<InstitutionDataItem>();

    TagNode cleaned = ScraperUtils.getCleanedHtml(url);

    Object[] rows = cleaned.evaluateXPath(DATA_ROW_XPATH);

    for (Object row : rows) {

      TagNode tr = (TagNode) row;

      InstitutionDataItem dataItem = new InstitutionDataItem();

      String[] attributes = attributes2008;
      if (year >= 2010) attributes = attributes2010;
      if (year >= 2016) attributes = attributes2016;
      if (year >= 2017) attributes = attributes2017;

      dataItem.data.put("year", String.valueOf(year));

      for (int i = 0; i < attributes.length; i++) {

        if (!attributes[i].equals(IGNORE)) {
          dataItem.data.put(
              attributes[i], tr.getChildTags()[i].getText().toString().trim().replace(",", ""));
        }
      }

      data.add(dataItem);
    }

    return data;
  }
예제 #13
0
  public static void getSongs(
      ArchiveShowObj show,
      ArrayList<ArchiveSongObj> songs,
      StaticDataStore db,
      boolean processSongs) {

    HtmlCleaner pageParser = new HtmlCleaner();
    CleanerProperties props = pageParser.getProperties();
    props.setAllowHtmlInsideAttributes(true);
    props.setAllowMultiWordAttributes(true);
    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);

    ArrayList<String> songLinks = new ArrayList<String>();
    ArrayList<String> songTitles = new ArrayList<String>();
    String showTitle = show.getArtistAndTitle();
    String showIdent = show.getIdentifier();

    // XPATH says "Select out of all 'table' elements with attribute 'class'
    // equal to 'fileFormats' which contain element 'tr'..."
    // String songXPath = "//table[@class='fileFormats']//tr";

    // XPATH says "Select out of all 'script' elements with attribute 'type'
    // equal to 'text/javascript'..."
    String m3uXPath = "//script";
    String titlePath = "//head//title";

    if (db.getShowExists(show) && processSongs) {

      songs.addAll(db.getSongsFromShow(show.getIdentifier()));
      show.setFullTitle(db.getShow(show.getIdentifier()).getArtistAndTitle());
      return;
    }

    try {
      HttpParams params = new BasicHttpParams();
      int timeout = (int) (15 * DateUtils.SECOND_IN_MILLIS);
      HttpConnectionParams.setConnectionTimeout(params, timeout);
      HttpConnectionParams.setSoTimeout(params, timeout);
      HttpClient client = new DefaultHttpClient(params);

      HttpGet page = new HttpGet(show.getShowURL().toString());
      HttpResponse pageResponse = client.execute(page);
      StatusLine pageStatus = pageResponse.getStatusLine();
      if (pageStatus.getStatusCode() == HttpStatus.SC_OK) {
        ResponseHandler<String> pageResponseHandler = new BasicResponseHandler();
        TagNode node = pageParser.clean(pageResponseHandler.handleResponse(pageResponse));

        String queryString = show.getLinkPrefix();

        if (db.getPref("downloadFormat").equalsIgnoreCase("LBR")) {
          if (show.hasLBR()) {
            queryString += "_64kb.m3u";
          } else if (show.hasVBR()) {
            queryString += "_vbr.m3u";
          }
        } else {
          if (show.hasVBR()) {
            queryString += "_vbr.m3u";
          } else if (show.hasLBR()) {
            queryString += "_64kb.m3u";
          }
        }

        HttpGet M3Urequest = new HttpGet(queryString);

        HttpResponse M3Uresponse = client.execute(M3Urequest);
        StatusLine M3Ustatus = M3Uresponse.getStatusLine();
        if (M3Ustatus.getStatusCode() == HttpStatus.SC_OK) {
          ResponseHandler<String> M3UresponseHandler = new BasicResponseHandler();
          String m3uString = M3UresponseHandler.handleResponse(M3Uresponse);

          client.getConnectionManager().shutdown();

          // Now split the .M3U file based on newlines. This will give
          // us the download links, which we store..

          String m3uLinks[] = m3uString.split("\n");
          for (String link : m3uLinks) {
            songLinks.add(link);
          }

          // Now use an XPATH evaluation to find all of the javascript scripts on the page.
          // If one of them can be split by "IAD.mrss = ", it should have the track names
          // in it. The second half of the split is valid javascript and can be interpreted,
          // therefore, as JSON. Pull the song titles out of that, and together with the
          // download links make ArchiveSongObjs and add them to the list of songs.
          Object[] titleNodes = node.evaluateXPath(m3uXPath);
          for (Object titleNode : titleNodes) {
            //
            List x = ((TagNode) titleNode).getChildren();
            String songTitle = "";
            for (Object y : x) {
              if (y instanceof ContentNode) {
                songTitle = ((ContentNode) y).toString();
                songTitle = songTitle.trim();
                if (songTitle.startsWith("Play(")) {
                  String[] titles = songTitle.split("\\{\"title\"");
                  for (int i = 1; i < titles.length; i++) {
                    try {
                      String title =
                          titles[i].substring(
                              nthIndexOf(titles[i], '"', 1), nthIndexOf(titles[i], '"', 2));
                      songTitles.add(title.substring(title.indexOf('.') + 2));
                    } catch (StringIndexOutOfBoundsException e) {
                    }
                  }
                }
              }
            }
          }
          if (show.getShowTitle().length() < 2) {

            String s =
                ((TagNode) node.evaluateXPath(titlePath)[0])
                    .getChildren()
                    .toString()
                    .replaceFirst(Pattern.quote("["), "");
            show.setFullTitle(s.substring(0, s.lastIndexOf(": Free") - 1));
            showTitle = show.getArtistAndTitle();
            db.updateShow(show);
          }

          if (processSongs) {
            if (songLinks.size() == 0) {

            } else {
              // Do things for successful show parse
              db.insertShow(show);
            }
            // If we have the same amount of song titles as song links,
            // we should be all set.
            if (songTitles.size() == songLinks.size()) {
              for (int i = 0; i < songTitles.size(); i++) {
                String songLink = songLinks.get(i);
                String songTitle = songTitles.get(i);
                // If the show has a "selectedSong"
                // meaning that it was opened by
                // the user clicking on a song link, do
                // a comparison to see
                // if the song being added is the
                // selected song. If it is, set
                // selectedPos to the right index so
                // that the song can be played
                // once the ListView is filled.  This is
                // inefficient, though it probably doesn't make a difference,
                // but we might consider making this a bit more efficient/elegant in the future.
                // FIXME.
                //						if (show.hasSelectedSong()) {
                //							if (songLink.equals(show.getSelectedSong())) {
                //								selectedPos = i;
                //							}
                //						} else {
                //							selectedPos = -1;
                //						}
                ArchiveSongObj song = new ArchiveSongObj(songTitle, songLink, showTitle, showIdent);
                song.setID(db.insertSong(song));
                songs.add(song);
              }
              db.setShowExists(show);
              db.insertRecentShow(show);
            } else {

            }
          }

        } else {
          client.getConnectionManager().shutdown();
        }
      } else {
        client.getConnectionManager().shutdown();
      }

    } catch (XPatherException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }

    // TODO Auto-generated method stub

  }
예제 #14
0
  public static Boolean updateArtists(StaticDataStore db) {

    ArrayList<ArrayList<String>> artists = new ArrayList<ArrayList<String>>();
    int numArtists;

    HtmlCleaner pageParser = new HtmlCleaner();
    CleanerProperties props = pageParser.getProperties();
    props.setAllowHtmlInsideAttributes(true);
    props.setAllowMultiWordAttributes(true);
    props.setRecognizeUnicodeChars(true);
    props.setOmitComments(true);

    try {
      String url =
          "http://www.archive.org/browse.php?field=/metadata/bandWithMP3s&collection=etree";

      HttpParams params = new BasicHttpParams();
      int timeout = (int) (15 * DateUtils.SECOND_IN_MILLIS);
      HttpConnectionParams.setConnectionTimeout(params, timeout);
      HttpConnectionParams.setSoTimeout(params, timeout);
      HttpClient client = new DefaultHttpClient(params);

      HttpGet request = new HttpGet(url);
      HttpResponse response = client.execute(request);
      StatusLine status = response.getStatusLine();
      if (status.getStatusCode() == HttpStatus.SC_OK) {
        ResponseHandler<String> responseHandler = new BasicResponseHandler();
        TagNode node = pageParser.clean(responseHandler.handleResponse(response));
        client.getConnectionManager().shutdown();
        // XPATH to get the nodes that we Want.
        Object[] artistsNodes = node.evaluateXPath("//tr[@valign='top']//li");

        numArtists = artistsNodes.length;

        for (int i = 0; i < numArtists; i++) {

          // Cast the artistNode as a TagNode.
          TagNode artist = ((TagNode) artistsNodes[i]);
          // Grab the first child node, which is the link to the artist's page.
          // The inner HTML of this node will be the title.
          TagNode artistTitleSubNode = artist.getChildTags()[0];
          // Remove the child node, so that the inner HTML of the artistNode
          // only contains the number of shows that the artist has.
          artist.removeChild(artistTitleSubNode);
          String artistTitle = pageParser.getInnerHtml(artistTitleSubNode);

          if (artistTitle != null) {
            ArrayList<String> artistPair = new ArrayList<String>();
            artistPair.add(
                artistTitle
                    .replace("&apos;", "'")
                    .replace("&gt;", ">")
                    .replace("&lt;", "<")
                    .replace("&quot;", "\"")
                    .replace("&amp;", "&"));
            artistPair.add(pageParser.getInnerHtml(artist).trim());
            /*
             * VibeVault.db.addArtist(artistTitle, pageParser
             * .getInnerHtml(artist).trim());
             */
            artists.add(artistPair);
          }
        }
        if (artists.size() > 0) {
          db.insertArtistBulk(artists);
          String s = DateFormat.format("yyyy-MM-dd", new GregorianCalendar().getTime()).toString();
          db.updatePref("artistUpdate", s);

        } else {

        }
      } else {
        client.getConnectionManager().shutdown();
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return true;
  }
  private void processCategory(TagNode table) throws XPatherException, ParseException {
    String categoryStr =
        ((TagNode) table.evaluateXPath("./caption")[0]).getText().toString().trim();
    CategoryEntity cat = new CategoryEntity();
    cat.setMan(man);
    cat.setName(categoryStr);
    cat.getRaces().add(raceVolume);

    if (categoryStr.contains("39")) {
      cat.setFromAge(0);
      cat.setToAge(39);
    } else if (categoryStr.contains("70")) {
      cat.setFromAge(70);
      cat.setToAge(1000);
    } else if (categoryStr.contains("-")) {
      int ind = categoryStr.indexOf("-");
      String from = categoryStr.substring(ind - 2, ind);
      String to = categoryStr.substring(ind, ind + 2);
      cat.setFromAge(Integer.valueOf(from));
      cat.setToAge(Integer.valueOf(to));
    } else if (categoryStr.contains("34")) {
      cat.setFromAge(0);
      cat.setToAge(34);
    } else if (categoryStr.contains("45")) {
      cat.setFromAge(45);
      cat.setToAge(1000);
    } else {
      throw new IllegalArgumentException("Category cannot be processed");
    }

    cat = (CategoryEntity) checkPossibleMatches(cat, cat.getClass());

    Object[] runners = table.evaluateXPath("/tbody/tr");

    for (int i = 0; i < runners.length; i++) {
      TagNode row = (TagNode) runners[i];

      // Person -------------
      String name =
          ((TagNode) row.evaluateXPath("/td[@class='jmeno']")[0]).getText().toString().trim();

      int index = name.indexOf(" ");
      String sn = name.substring(0, index).trim();
      String fn = name.substring(index).trim();

      String bd = ((TagNode) row.evaluateXPath("/td[@class='rn']")[0]).getText().toString().trim();

      PersonEntity person = new PersonEntity(fn, sn, Integer.valueOf(bd), man);
      person = (PersonEntity) checkPossibleMatches(person, PersonEntity.class);

      // Club and city -------------
      String all =
          ((TagNode) row.evaluateXPath("/td[@class='klub']")[0]).getText().toString().trim();

      int delI = all.indexOf("(");

      String clubStr;
      String cityStr;
      if (delI >= 0) {
        clubStr = all.substring(0, delI).trim();
        cityStr = all.substring(delI);
        cityStr = cityStr.replace("(", "").replace(")", "").trim();
      } else {
        clubStr = all.trim();
        cityStr = "";
      }

      if (clubStr.isEmpty()) {
        clubStr = "-";
      }
      if (cityStr.isEmpty()) {
        cityStr = "-";
      }

      ClubEntity club = new ClubEntity(clubStr);
      club = (ClubEntity) checkPossibleMatches(club, club.getClass());

      CityEntity city = new CityEntity(cityStr);
      city = (CityEntity) checkPossibleMatches(city, city.getClass());

      // Number -------------
      Integer number =
          Integer.valueOf(
              ((TagNode) row.evaluateXPath("/td[@class='stCislo']")[0])
                  .getText()
                  .toString()
                  .trim());

      // Time -------------
      String timeStr =
          ((TagNode) row.evaluateXPath("/td[@class='cas']")[0]).getText().toString().trim();
      if (timeStr.equalsIgnoreCase("NF")) {
        timeStr = "23:59:59";
      }
      Date time = new SimpleDateFormat("H:mm:ss").parse(timeStr);

      // Runner -------------
      RunnerEntity runner = new RunnerEntity(number, person, club, city, time);
      runner.setCategory(cat);
      runner.setRace(raceVolume);
      checkPossibleMatches(runner, runner.getClass());
    }
  }
예제 #16
0
  public static void main(String[] args) {
    // 检查数据库
    try {
      Class.forName("org.gjt.mm.mysql.Driver");
      System.out.println("Success loading Mysql Driver!");
      Connection connect = DriverManager.getConnection("jdbc:mysql://*****:*****@class='version-block']/div["
                  + "1"
                  + "]//i[@itemprop='softwareVersion']");
      app_verison = ((TagNode) tagVersion[0]).getText() + "";
      System.out.println(((TagNode) tagVersion[0]).getText() + "");

      Object[] tagVersionCode =
          node.evaluateXPath(
              "/body/div//div[@class='version-block']/div["
                  + "1"
                  + "]//span[@class='version-code']");
      String app_versioncode = ((TagNode) tagVersionCode[0]).getText() + "";
      System.out.println(((TagNode) tagVersionCode[0]).getText() + "");
      Object[] tagFileSize =
          node.evaluateXPath(
              "/body/div//div[@class='version-block']/div[" + "1" + "]//span[@class='apk-size']");
      String app_size = ((TagNode) tagFileSize[0]).getText() + "";
      System.out.println(((TagNode) tagFileSize[0]).getText() + "");
      Object[] tagDownload =
          node.evaluateXPath(
              "/body/div//div[@class='version-block']/div[" + "1" + "]//a[@download]");
      String app_url1 = ((TagNode) tagDownload[0]).getAttributeByName("href");
      app_url = app_url1.replaceAll("&amp;", "&");
      System.out.println("下载地址: " + app_url + "\n");
      app_name = ((TagNode) tagDownload[0]).getAttributeByName("download");

    } catch (MalformedURLException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } catch (XPatherException e) {
      e.printStackTrace();
    }

    // 对比数据库 判断app是否最新
    if (app_verison != sql_version) {
      IsNewest = false;
    }
  }
  @Override
  public boolean authenticate() {
    if (!super.authenticate()) {
      LOG.error(
          String.format(
              "blank username or password detected, no %s xword will be downloaded",
              this.getType()));
      return false;
    }

    final HttpUriRequest loginGet = RequestBuilder.get().setUri(NYT_LOGIN_URL).build();

    final String loginPage;
    try (final CloseableHttpResponse getResponse = this.getHttpClient().execute(loginGet)) {
      loginPage = EntityUtils.toString(getResponse.getEntity());
    } catch (final IOException e) {
      LOG.error("error while navigating to NYT login page", e);
      return false;
    }

    final String token;
    final String expires;

    try {
      final TagNode node = this.getCleaner().clean(loginPage);

      final Object[] foundNodes = node.evaluateXPath("//input[@name='token']");
      if (foundNodes.length != 1) {
        this.throwLoginException(
            "unexpected login page, found %d hidden token input elements, expected 1",
            foundNodes.length);
      }
      final TagNode hiddenTokenInput = (TagNode) foundNodes[0];
      token = hiddenTokenInput.getAttributeByName("value");
      LOG.debug("found hidden input token {}", token);

      final Object[] foundExpiresNodes = node.evaluateXPath("//input[@name='expires']");
      if (foundExpiresNodes.length != 1) {
        this.throwLoginException(
            "unexpected login page, found %d hidden token expiration input elements, expected 1",
            foundNodes.length);
      }
      final TagNode hiddenTokenExpiresInput = (TagNode) foundExpiresNodes[0];
      expires = hiddenTokenExpiresInput.getAttributeByName("value");
      LOG.debug("found hidden input token expiration {}", expires);
    } catch (LoginException | XPatherException e) {
      LOG.error("error while pulling login tokens from NYT login page", e);
      return false;
    }

    // @formatter:off
    final HttpUriRequest loginPost =
        RequestBuilder.post()
            .setUri("https://myaccount.nytimes.com/auth/login")
            .addParameter("is_continue", Boolean.FALSE.toString())
            .addParameter("token", token)
            .addParameter("expires", expires)
            .addParameter("userid", this.getLoginInfo().getUsername())
            .addParameter("password", this.getLoginInfo().getPassword())
            .addParameter("remember", Boolean.TRUE.toString())
            .build();
    // @formatter:on

    try (CloseableHttpResponse postResponse = this.getHttpClient().execute(loginPost)) {

      // successful NYT login should give 302 status
      final int responseStatus = postResponse.getStatusLine().getStatusCode();
      if (responseStatus != 302) {
        final String errorMessage =
            String.format("did not detect expected 302 redirect, got %d instead", responseStatus);
        throw new LoginException(errorMessage);
      }

      // successful NYT login redirects to the NYT homepage
      final Header location = postResponse.getFirstHeader("Location");
      // have seen this redirect both with and without the final portion
      final Pattern expectedRedirectLocation =
          Pattern.compile("http://www.nytimes.com(\\?login=email)*");
      final String actualRedirectLocation = location.getValue();
      final Matcher matcher = expectedRedirectLocation.matcher(actualRedirectLocation);
      if (!matcher.matches()) {
        final String errorMessage =
            String.format(
                "redirect to unexpected URL, expected %s, found Location=%s instead",
                expectedRedirectLocation, actualRedirectLocation);
        throw new LoginException(errorMessage);
      }

      // successful NYT login should set a few cookies
      final Header[] cookies = postResponse.getHeaders("Set-Cookie");
      if (cookies.length < 1) {
        throw new LoginException("no post login cookies set, login likely failed");
      }

    } catch (final IOException | LoginException e) {
      LOG.error("error while logging in, e={}", e.getMessage());
      return false;
    }

    LOG.info("successfully logged in to nyt");
    return true;
  }
예제 #18
0
  private void recordInputFields(
      TagNode formNode, FormFlow formFlow, Document dataDocument, String docBase)
      throws XPathExpressionException, XPatherException {
    List<InputPojo> inputPojos = new ArrayList<InputPojo>();
    Map<String, InputPojo> inputPojosMap = new HashMap<String, InputPojo>();

    @SuppressWarnings("unchecked")
    List<TagNode> inputs = formNode.getElementListByName("input", true);
    @SuppressWarnings("unchecked")
    List<TagNode> selects = formNode.getElementListByName("select", true);
    inputs.addAll(selects);
    for (TagNode inputTagNode : inputs) {
      String name = inputTagNode.getAttributeByName(Constants.NAME_ATTR);
      if (name != null) {
        String type;

        if (inputTagNode.getName().equals("select")) {
          type = "select";
        } else {
          type = inputTagNode.getAttributeByName(Constants.TYPE_ATTR);
        }

        if (type != null) {

          if (!(type.equals("radio") && inputPojosMap.containsKey(name))) {

            // Collect all rf.xxx attributes
            Map<String, String> rfAttributes = new HashMap<String, String>();
            Map<String, String> attributes = inputTagNode.getAttributes();
            for (String attName : attributes.keySet()) {
              if (attName.startsWith("rf.")) {
                rfAttributes.put(attName, attributes.get(attName));
              }
            }

            InputPojo inputPojo = new InputPojo(name, type, rfAttributes);
            inputPojosMap.put(name, inputPojo);
            inputPojos.add(inputPojo);
          }

          // Push values from the dataDocument into the form html.
          String inputValue = lookupValueByFieldName(dataDocument, name, docBase);
          if (inputValue != null) {
            if (type.equals("radio")) {
              String value = inputTagNode.getAttributeByName(Constants.VALUE_ATTR);
              if (inputValue.equals(value)) {
                inputTagNode.setAttribute(Constants.CHECKED_ATTR, Constants.CHECKED_ATTR);
              }
            } else if (type.equals("checkbox")) {
              if (inputValue.equals("true")) {
                inputTagNode.setAttribute(Constants.CHECKED_ATTR, Constants.CHECKED_ATTR);
              }
            } else if (type.equals("select")) {
              Object[] nodes = inputTagNode.evaluateXPath("option[@value=\"" + inputValue + "\"]");
              if (nodes.length == 0) {
                nodes = inputTagNode.evaluateXPath("option[text()=\"" + inputValue + "\"]");
              }
              if (nodes.length > 0) {
                ((TagNode) nodes[0]).setAttribute(Constants.SELECTED_ATTR, "selected");
              }
            } else {
              inputTagNode.setAttribute("value", inputValue);
            }
          }
        } else {
          logger.debug("Input name:{} has no type attribute!", name);
        }
      }
    }
    formFlow.setCurrentInputPojos(inputPojos);
  }
예제 #19
0
  public void parseForm(
      InputStream formStream,
      FormFlow formFlow,
      PrintWriter writer,
      JSMasterScope masterScope,
      boolean suppressDebugBar)
      throws XPatherException, XPathExpressionException, IOException, ResourceLoaderException,
          FormParserException {

    TagNode formHtml = htmlCleaner.clean(formStream);
    String flowID = formFlow.getId();

    Document dataDocument = formFlow.getDataDocument();
    String docBase = formFlow.getCurrentDocBase();
    String currentPath = formFlow.getCurrentPath();
    Map<String, FlowAction> currentActions = formFlow.getCurrentActions();

    // Process rf.include
    processIncludes(formHtml, formFlow);

    // Add debugBar
    if (showDebugBar && !suppressDebugBar) {
      addDebugBar(formHtml);
    }

    // Process rf.forEach statements
    valueInjector.processForEachStatements(formHtml, dataDocument, docBase);
    valueInjector.processRemainingCurlyBrackets(formHtml, dataDocument, docBase, flowID);

    // Process first Rhinoforms form in doc
    Object[] rfFormNodes =
        formHtml.evaluateXPath("//form[@" + Constants.RHINOFORMS_FLAG + "='true']");
    if (rfFormNodes.length > 0) {
      logger.debug("{} forms found.", rfFormNodes.length);
      TagNode formNode = (TagNode) rfFormNodes[0];

      // Process dynamic select elements
      processSelectSource(formNode, formFlow);

      // Process range select elements
      processSelectRange(formNode, masterScope);

      // Record input fields
      recordInputFields(formNode, formFlow, dataDocument, docBase);

      // Process Actions
      processActions(currentActions, formNode);

      // Process auto-complete fields, replace source with proxy path
      processInputSourceFields(formNode, currentPath, formFlow);

      // Add flowId as hidden field
      addFlowId(flowID, formNode);

      // Mark form as parsed
      formNode.setAttribute("parsed", "true");
    } else {
      logger.warn("No forms found");
    }

    // Write out processed document
    new SimpleHtmlSerializer(htmlCleaner.getProperties()).write(formHtml, writer, "utf-8");
  }
예제 #20
0
  private void processSelectRange(TagNode formNode, JSMasterScope masterScope)
      throws XPatherException {
    Object[] rangeSelectNodes =
        formNode.evaluateXPath("//select[@" + Constants.SELECT_RANGE_START_ATTR + "]");
    if (rangeSelectNodes.length > 0) {
      Scriptable workingScope = masterScope.createWorkingScope();
      Context context = masterScope.getCurrentContext();
      for (Object rangeSelectNodeO : rangeSelectNodes) {
        TagNode rangeSelectNode = (TagNode) rangeSelectNodeO;
        String name = rangeSelectNode.getAttributeByName(Constants.NAME_ATTR);
        String rangeStart = rangeSelectNode.getAttributeByName(Constants.SELECT_RANGE_START_ATTR);
        String rangeEnd = rangeSelectNode.getAttributeByName(Constants.SELECT_RANGE_END_ATTR);
        String preselectFirstOption =
            rangeSelectNode.getAttributeByName(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR);
        rangeSelectNode.removeAttribute(Constants.SELECT_RANGE_START_ATTR);
        rangeSelectNode.removeAttribute(Constants.SELECT_RANGE_END_ATTR);
        rangeSelectNode.removeAttribute(Constants.SELECT_PRESELECT_FIRST_OPTION_ATTR);

        logger.debug(
            "Found rangeSelectNode name:{}, rangeStart:{}, rangeEnd:{}",
            new String[] {name, rangeStart, rangeEnd});
        boolean rangeStartValid = rangeStart != null && !rangeStart.isEmpty();
        boolean rangeEndValid = rangeEnd != null && !rangeEnd.isEmpty();
        if (rangeStartValid && rangeEndValid) {
          Object rangeStartResult =
              context.evaluateString(
                  workingScope, "{" + rangeStart + "}", Constants.SELECT_RANGE_START_ATTR, 1, null);
          Object rangeEndResult =
              context.evaluateString(
                  workingScope, "{" + rangeEnd + "}", Constants.SELECT_RANGE_END_ATTR, 1, null);
          logger.debug(
              "RangeSelectNode name:{}, rangeStartResult:{}, rangeEndResult:{}",
              new Object[] {name, rangeStartResult, rangeEndResult});

          double rangeStartResultNumber = Context.toNumber(rangeStartResult);
          double rangeEndResultNumber = Context.toNumber(rangeEndResult);
          String comparator;
          String incrementor;
          if (rangeStartResultNumber < rangeEndResultNumber) {
            comparator = "<=";
            incrementor = "++";
          } else {
            comparator = ">=";
            incrementor = "--";
          }

          String rangeStatement =
              "{ var range = []; for( var i = "
                  + rangeStartResult
                  + "; i "
                  + comparator
                  + " "
                  + rangeEndResult
                  + "; i"
                  + incrementor
                  + ") { range.push(i); }; '' + range; }";
          logger.debug("RangeSelectNode name:{}, rangeStatement:{}", name, rangeStatement);
          String rangeResult =
              (String)
                  context.evaluateString(workingScope, rangeStatement, "Calculate range", 1, null);
          logger.debug("RangeSelectNode name:{}, rangeResult:{}", name, rangeResult);

          if (!"true".equals(preselectFirstOption)) {
            TagNode optionNode = new TagNode("option");
            optionNode.setAttribute("value", "");
            optionNode.addChild(new ContentNode("-- Please Select --"));
            rangeSelectNode.addChild(optionNode);
          }

          for (String item : rangeResult.split(",")) {
            TagNode optionNode = new TagNode("option");
            optionNode.addChild(new ContentNode(item));
            rangeSelectNode.addChild(optionNode);
          }

        } else {
          logger.warn(
              "Range select node '{}' not processed because {} is empty.",
              name,
              (rangeStartValid
                  ? Constants.SELECT_RANGE_START_ATTR
                  : Constants.SELECT_RANGE_END_ATTR));
        }
      }
    }
  }