Пример #1
0
  private PhoneResult mineAgentList(SearchResultItem item) {
    if (!isMinableAgentList(item)) return null;

    String html = getHtml(item.getRealUrl());
    if (TextUtils.isEmpty(html) || !html.contains(item.getPhone())) return null;
    List<String> list = TextUtils.getList(html, "<div id='list_", "<div class=\"clear\">");
    if (list != null && list.size() > 0) {
      for (String l : list) {
        if (l.contains(item.getPhone())) {
          String agent_href = TextUtils.getSubString(l, "<a href='", "'");
          if (!TextUtils.isEmpty(agent_href) && agent_href.startsWith("/")) {
            String domain = TextUtils.getMatchGroup(PATTERN_AGENT_LIST, item.getRealUrl());
            if (!TextUtils.isEmpty(domain)) {
              String aLink = domain + agent_href;
              return mineAgent(aLink);
            }
          }

          break;
        }
      }
    }

    return null;
  }
Пример #2
0
 private boolean isMinableAgentList(SearchResultItem item) {
   return TextUtils.isMatchReg(PATTERN_AGENT_LIST, item.getRealUrl());
 }
Пример #3
0
  private PhoneResult mineAgent(String url) {
    if (!isMinableAgent(url)) return null;

    PhoneResult phoneResult = new PhoneResult();

    String html = getHtml(url);
    if (TextUtils.isEmpty(html)) return null;

    String floatHtml = TextUtils.getSubString(html, "<div class=\"Floating\">", "</div>");
    if (!TextUtils.isEmpty(floatHtml)) {
      String TAG_COM = "<dd style=\"padding-top:2px\\9;*padding-top:3px;\" >";
      String company = TextUtils.getSubString(floatHtml, TAG_COM, "</dd>");
      phoneResult.setJigou(company);

      String name = TextUtils.getSubString(floatHtml, "<dd>", "&nbsp;");
      if (!TextUtils.isEmpty(name)) phoneResult.setChenghu(TextUtils.appendJob(name, "房产经纪"));

      String rz = TextUtils.getSubString(html, "<div class=\"rzren\">", "</div>");
      if (!TextUtils.isEmpty(rz)) {
        String avartar = TextUtils.getSubString(rz, "<img src=\"", "\"");
        phoneResult.setImage(avartar);
      }

      String address = TextUtils.getSubString(html, "<li>地<span class=\"pl24\">址</span>:", "</li>");
      phoneResult.setAddress(address);

      List<String> hangyeList = new ArrayList<String>();
      hangyeList.add("房屋中介");
      phoneResult.setHangyeList(hangyeList);
    }
    // 如果没有人头像,就试着用公司logo
    if (TextUtils.isEmpty(phoneResult.getImage())) {
      String componyLogoHtml = TextUtils.getSubString(html, "<li class=\"companylogo\">", "</li>");
      String componyLogo = TextUtils.getSubString(componyLogoHtml, "<img src=\"", "\"");
      phoneResult.setImage(componyLogo);
    }

    // 如果公司头像也没有,可能网页是另外一种格式的,如:
    if (TextUtils.isEmpty(phoneResult.getImage())) {
      String photoHtml = TextUtils.getSubString(html, "<div class=\"photo\">", "</div>");
      if (!TextUtils.isEmpty(photoHtml)) {
        String avartar = TextUtils.getSubString(photoHtml, "<img src=\"", "\"");
        phoneResult.setImage(avartar);
      }
    }

    if (phoneResult.isFound()) return phoneResult;
    else return null;
  }
Пример #4
0
 @Override
 public boolean isMinableDomain(SearchResultItem item) {
   return TextUtils.isMatchReg(PATTERN_DOMAIN, item.getDomain());
 }
Пример #5
0
 private boolean isMinableFang(SearchResultItem item) {
   return TextUtils.isMatchReg(PATTERN_FANG, item.getRealUrl());
 }
Пример #6
0
  private PhoneResult mineFang(SearchResultItem item) {
    String html = getHtml(item);
    if (TextUtils.isEmpty(html)) return null;
    String link = TextUtils.getMatchGroup(PATTERN_AGENT_LINK, html);
    System.out.println("link=" + link);

    if (!TextUtils.isEmpty(link)) {
      return mineAgent(link);
    }

    if (html.contains("100%个人房源")) {
      PhoneResult phoneResult = new PhoneResult();

      String chenghu =
          TextUtils.getSubString(html, "<span class=\"name floatl\" id=\"Span2\">", "</span>");
      if (!TextUtils.isEmpty(chenghu)) {
        chenghu = chenghu.trim();
        chenghu = TextUtils.clearHuanhang(chenghu);
        if (!TextUtils.isEmpty(chenghu)) phoneResult.setChenghu(TextUtils.appendJob(chenghu, "房东"));

        List<String> hangyeList = new ArrayList<String>();
        hangyeList.add("房屋出租或出售");
        phoneResult.setHangyeList(hangyeList);
      }

      // 房产图片
      String imageHtml =
          TextUtils.getSubString(html, "<div class=\"slider\" id=\"thumbnail\">", "</div>");
      if (!TextUtils.isEmpty(imageHtml)) {
        String avartar = TextUtils.getSubString(imageHtml, "<img src=\"", "\"");
        phoneResult.setImage(avartar);
      }

      if (phoneResult.isFound()) return phoneResult;
    }
    return null;
  }
Пример #7
0
 private boolean isMinableAgent(String url) {
   return TextUtils.isMatchReg(PATTERN_AGENT, url)
       || TextUtils.isMatchReg(PATTERN_AGENT_SUBPAGE, url);
 }