예제 #1
0
  // DATABASE
  static void DataBaseExample() {

    // send cluster client and name of the bucket
    CouchbaseLogger db = new CouchbaseLogger(CouchbaseCluster.create(), "logger");

    // naming the fields as you like , if you not mention names they wont apper in logs
    db.date("date")
        .method("method")
        .exception("exception")
        .file("file")
        .level("level")
        .line("line")
        .thread("thread")
        .message("message")
        .className("class")
        .setExpire((int) TimeUnit.MINUTES.toSeconds(1));
    // create handler
    Handler dbHandler = new DatabaseHandler(db);

    // add handler
    GlobLoggerPool.getLogger("log1").addHandler(dbHandler);

    // send LOG
    GlobLoggerPool.getLogger("log1")
        .publish(new NullPointerException(), "This is gonna do alot of problems!", "WARNING");
  }
 protected AbstractDemo() {
   cluster = CouchbaseCluster.create(ENV, COUCHBASE_IP);
   try {
     bucket = cluster.openBucket(BUCKET, PASSWORD);
     demo();
   } finally {
     cluster.disconnect();
   }
 }
  public static void main(String[] args) {
    Cluster cluster = CouchbaseCluster.create("10.242.182.84", "10.242.182.246", "10.242.183.228");

    Bucket bucket = cluster.openBucket("inventory_transactions", "password");

    long start = 0;
    String id = "__flush_marker_1121";
    // String id = "docId";

    docCleanup(bucket, start);
    // singleDocCleanup( bucket, id );
    // entireBucketFlush( bucket );
    // flushCleanup( bucket, start );

    System.out.println("Cleaned.");
  }
예제 #4
0
/**
 * 抓取HTML写入couchbase库
 *
 * @author Administrator
 */
public class ExtractionHtml {
  private static Cluster cluster2 = CouchbaseCluster.create("192.168.1.13");
  public static String[] DATESTATUS = {
    "提交时间", "提交日期", "发布时间", "发布日期", "关注时间", "编辑时间", "编辑日期", "发表时间", "录入时间", "更新时间", "作者点击", "点击时间",
    "【关闭窗口】", "大中小", "大小", "年月日编辑", "字体", "点击率", "浏览", "点击数", "∣法律社区", "发表于", "阅读", "点击", "日期",
    "作者", "时间", "今天是"
  };
  public static String[] DEFENDANT = {
    "被上诉人", "被执行人", "被申诉人", "被申请人", "被申请执行人", "原审被告人", "原审原告", "罪犯", "被告", "赔偿义务机关", "一审被告",
    "二审被上诉人"
  };
  public static String[] PLAINTIFF = {
    "第三人",
    "诉讼代理人",
    "辩护人",
    "上诉人",
    "申诉人",
    "申请执行人",
    "申请人",
    "执行人",
    "原审被告",
    "赔偿请求人",
    "原公诉机关",
    "公诉机关",
    "执行机构",
    "原告",
    "复议机关",
    "申请复议人",
    "一审原告",
    "委托代理人",
    "法定代表人",
    "起诉人",
    "移送执行机构",
    "二审上诉人",
    "原审第三人",
    "负责人",
    "抗诉机关",
    "申请再审人",
    "委托代理",
    "四被上诉人委托代理人",
    "两上诉人的委托代理人"
  };
  public static String[] KEYWORDKE = {
    "申请再审人",
    "被上诉人",
    "二审被上诉人",
    "原审被告人",
    "原审第三人",
    "二审上诉人",
    "一审被告",
    "一审原告",
    "被申请人",
    "赔偿请求人",
    "被告人",
    "原告",
    "执行机构",
    "被申请人",
    "申请执行人",
    "申请人",
    "辩护人",
    "被申请执行人",
    "赔偿请求人",
    "赔偿义务机关",
    "原公诉机关",
    "抗诉机关",
    "公诉机关",
    "复议机关",
    "委托代理人",
    "委托代理",
    "特别授权代理",
    "四被上诉人委托代理人",
    "两上诉人的委托代理人",
    "移送执行机构",
    "诉讼代理人",
    "法定代表人",
    "申请复议人",
    "被上诉人",
    "被申诉人",
    "被执行人",
    "反诉被告",
    "反诉原告",
    "原审被告",
    "原审原告",
    "执行人",
    "负责人",
    "上诉人",
    "起诉人",
    "申诉人",
    "被告人",
    "原告人",
    "被告",
    "原告",
    "罪犯",
    "第三人"
  };
  public static String[] THEVERDICT = {
    "裁定如下", "决定如下", "判决如下", "协议如下", "处理意见如下", "调解协议", "如下协议", "决定"
  };
  public static String[] CAUSE = {
    "驳回申诉通知",
    "赔偿决定书",
    "提起公诉",
    "提出公诉",
    "提起上诉",
    "提出上诉",
    "提起诉讼",
    "提出诉讼",
    "提起行政诉讼",
    "提出行政诉讼",
    "争议一案",
    "纠纷一案",
    "通告一案",
    "违法一案",
    "执行一案",
    "赔偿一案",
    "劫罪一案",
    "确认一案",
    "涉嫌",
    "危险驾驶",
    "诈骗",
    "盗窃",
    "死亡",
    "强奸",
    "聚众斗殴",
    "寻衅滋事",
    "贩卖毒品",
    "运输毒品",
    "故意伤害",
    "涉嫌诽谤",
    "抢劫",
    "绑架",
    "勒索",
    "杀人",
    "纠纷",
    "非法拘禁",
    "运输毒品",
    "破坏电力设备",
    "一案",
    "违法",
    "非法",
    "犯罪",
    "未履行生效法律文书",
    "未履行法律文书",
    "申请强制执行"
  };

  public static String[] BOOKCLASS = {
    "准许强制执行裁定书",
    "民事附带刑事判决书",
    "强制医疗决定书",
    "指定管辖决定书",
    "非诉行政执行裁定书",
    "行政审查裁定书",
    "不予受理案件决定书",
    "暂予监外执行决定书",
    "民事调解书判决书",
    "强制医疗决定书",
    "准予撤诉决定书",
    "刑事附带民事判决书",
    "刑事附带民事调解书",
    "案件执行结束通知书",
    "减刑假释文书",
    "口头撤诉裁定笔录",
    "普通民事文书",
    "普通刑事文书",
    "民事调解书",
    "民事裁定书",
    "民事判决书",
    "民事决定书",
    "刑事判决书",
    "刑事裁定书",
    "刑事决定书",
    "行政判决书",
    "行政决定书",
    "行政裁定书",
    "执行裁定书",
    "普通执行文书",
    "普通行政文书",
    "行政文书",
    "商事文书",
    "执行判决书",
    "执行决定书",
    "国家赔偿裁定书",
    "国家赔偿判决书",
    "国家赔偿决定书",
    "驳回申诉通知书",
    "调解书",
    "决定书",
    "通知书",
    "判决书",
    "裁定书",
    "民事",
    "刑事",
    "行政",
    "执行"
  }; // 文书类型

  public static String[] PROVINCE = {
    "北京市", "天津市", "上海市", "重庆市", "河北省", "河南省", "云南省", "辽宁省", "黑龙江省", "湖南省", "安徽省", "山东省", "新疆",
    "江苏省", "浙江省", "江西省", "湖北省", "广西", "甘肃省", "山西省", "内蒙古", "陕西省", "吉林省", "福建省", "贵州省", "广东省", "青海省",
    "西藏", "四川省", "宁夏", "海南省", "台湾省 "
  };
  public static String[] CAUSENUM = {
    "(20", "(20", "(20", "〔20", "[20", "【20", "(20", "(20", "〔20", "[2", "[20", "【20", "(19", "(19",
    "〔19", "[19", "【19"
  };
  public static String[] CAUSENUM2 = {"判决书字号", "字号"};
  public static String[] CAUSENUM3 = {"第", "字"};
  static long count = 0; // 总数量
  static long ALL = 0; // 出错数据
  static long SUM = 0;
  static Map<String, String> MAPS = new HashMap<String, String>();

  static {
    MAPS.put("html", "html");
    MAPS.put("htm", "htm");
    MAPS.put("txt", "txt");
  }

  public static String[] charset = {"utf-8", "gbk", "gb2312", "gb18030", "big5"};
  public static String[] ERCOEDING = {
    "й", "෨", "Ժ", "ۼ", "ҩ", "ල", "ɷ", "ص", "δ", "ġ", "Ϊ", "ط", "Ϣ", "ȡ", "Ӫ", "ã", "", "Դ", "ڲ",
    "Ѱ", "�"
  };
  private static Logger logger = LogManager.getLogger(ExtractionHtml.class.getName());
  /** @param args */
  public static void main(String[] args) throws IOException {

    //		File filepor = new File("D:\\Company_File\\log4j-1216\\Java2\\batchImport.log");
    //		if (filepor.exists()) {
    //			filepor.delete();// 删除日志文件
    //		}
    //		filepor = null;
    PropertyConfigurator.configure(
        "F:\\work\\WorkSpace_Eclipse\\WorkSpace_Eclipse\\MassPick\\WebContent\\WEB-INF\\log4j.properties");
    long da = System.currentTimeMillis();
    File file = new File("G:\\Data\\十二月\\中国裁判文书网最新文书(每日下载)\\HTML\\HTML-zgcpwsw20151214-20151217");
    // 查PG省市县/区
    Bucket bucket = null;
    bucket = connectionBucket(bucket);
    AdministrationUtils util = new AdministrationUtils();
    util.initData(); // 查询行政区
    try {
      show(file, bucket, util);
    } catch (Exception e) {
      logger.error(e.getMessage());
    } finally {
      //			file = null;
      util = null;
      bucket.close();
      cluster2 = null;
    }
    logger.info(count + ":数量");
    logger.info("所有文件总耗时" + (((System.currentTimeMillis() - da) / 1000) / 60) + "分钟");
  }
  // 连接CB
  private static Bucket connectionBucket(Bucket bucket) {
    try {
      bucket = connectionCouchBaseLocal(); // 本地CB
    } catch (Exception e) {
      while (true) {
        try {
          bucket = connectionCouchBaseLocal(); // 本地CB
          break;
        } catch (Exception ee) {
          Log.error(ee);
        }
      }
    }

    return bucket;
  }
  /**
   * 递归遍历html文件
   *
   * @param file
   * @throws
   * @throws Exception
   */
  private static void show(File file, Bucket bucket, AdministrationUtils util) throws Exception {
    String variable = null;
    String html = null;
    ArchivesVO arch = null;
    Map<String, List<String>> list = null;
    List<ArchivesVO> listarchs = null;
    Document doc;
    listarchs = new ArrayList<ArchivesVO>();
    int i = 0;
    if (file.isFile()) {
      arch = new ArchivesVO();
      String suffix = file.getName();
      suffix = suffix.substring(suffix.indexOf(".") + 1, suffix.length());
      suffix = MAPS.get(suffix);
      if (null == suffix) {
        return;
      }
      logger.info("网址:" + file.getPath());
      for (String val : charset) { // 匹配不同编码格式
        doc = Jsoup.parse(file, val);
        html = doc.body().text(); // 取页面body标签中所有内容
        boolean Garbled = getErrorCode(html); // 判断编码是否错误
        if (Garbled == false) {
          logger.info(val + "编码错误!!!");
          i++;
          if (i == 5) {
            html = null;
          }
          continue;
        }
        i = 0;
        variable = getReplaceAll(doc.title());
        if (variable != null && !"".equals(variable)) {
          arch.setTitle(variable.trim()); // 标题 √
        }
        logger.info("标题:" + arch.getTitle());
        break;
      }
      if (html == null || "".equals(html)) {
        logger.info("内容为空的HTML页面:" + file.getPath());
      }
      html = getReplaceAll(html).trim();
      logger.info("所有内容:" + html);
      list = ExtractthepeopleText.getPersonName(html);
      arch.setPlaintiff(getKeyName(list, 1)); // 原告相关人 √
      logger.info("<<------------------------------------------------------>>");
      logger.info("原告相关人:" + arch.getPlaintiff());
      arch.setDefendant(getKeyName(list, 2)); // 被告相关人 √
      logger.info("被告相关人:" + arch.getDefendant());
      variable = getCourtName(html);
      if (variable != null && !"".equals(variable)) {
        arch.setCourtName(variable); // 法院 √
      }
      if (variable == null || "".equals(variable)) {
        arch.setCourtName(getAtherthe(html)); // 法院 √
      }
      logger.info("法院:" + arch.getCourtName());
      arch.setCaseCause(StringCause(html)); // 案由 √
      logger.info("案由:" + arch.getCaseCause());
      arch.setApprovalDate(getConcludeDate(html)); // 审结日期 √
      logger.info("审结日期:" + arch.getApprovalDate());
      arch.setApproval(getTheVerdictData(html)); // 判决结果 √
      logger.info("判决结果:" + arch.getApproval());
      arch.setCatalog(getCatalog(html)); // 文书类型 √
      logger.info("文书类型:" + arch.getCatalog());
      variable = getCaseNum(html);
      if (!"".equals(variable) && variable != null) {
        arch.setCaseNum(variable); // 案号 √
      }
      if ("".equals(variable) || variable == null) {
        arch.setCaseNum(getSentenceNo3(html)); // 案号 √
      }
      logger.info("案号:" + arch.getCaseNum());
      logger.info("<<------------------------------------------------------>>");
      arch.setUuid(file.getName().substring(0, file.getName().lastIndexOf(".")));
      listarchs.add(arch);
      boolean result = updateJsonData(listarchs, bucket, util);
      if (!result) {
        logger.info(file.getPath() + ":更新失败");
      }
      count += listarchs.size();
      logger.info("<<------------------------count------------------------------>>" + count);
      listarchs = null;
      return;
    }
    File[] files = file.listFiles();
    for (File fi : files) {
      if (fi.isFile()) {
        arch = new ArchivesVO();
        String suffix = fi.getName();
        suffix = suffix.substring(suffix.indexOf(".") + 1, suffix.length());
        suffix = MAPS.get(suffix);
        if (null == suffix) {
          return;
        }
        //				logger.info("网址:" + fi.getPath());
        for (String val : charset) { // 匹配不同编码格式
          doc = Jsoup.parse(fi, val);
          html = doc.body().text();
          //					html = doc.text();
          //					html = getDataAll(html);
          boolean Garbled = getErrorCode(html); // 判断编码是否错误
          if (Garbled == false) {
            i++;
            if (i == 5) {
              html = null;
            } // 判断编码格式都不匹配的时候赋予空值
            continue;
          }
          i = 0;
          variable = getReplaceAll(doc.title());
          if (variable != null && !"".equals(variable)) { // 防止title标签为空的情况
            arch.setTitle(variable.trim()); // 标题 √
          }
          break;
        }
        if (html == null || "".equals(html)) {
          logger.info("内容为空的HTML页面:" + fi.getPath());
          continue;
        }
        html = getReplaceAll(html).trim(); // 所有内容去掉特殊字符√

        list = ExtractthepeopleText.getPersonName(html);

        arch.setPlaintiff(getKeyName(list, 1)); // 原告相关人√

        arch.setDefendant(getKeyName(list, 2)); // 被告相关人√

        arch.setCatalog(getCatalog(html)); // 文书类型 √

        variable = getCourtName(html);
        if (variable != null && !"".equals(variable)) {
          arch.setCourtName(variable); // 法院 √
        }
        if (variable == null || "".equals(variable)) {
          arch.setCourtName(getAtherthe(html)); // 法院 √
        }

        arch.setCaseCause(StringCause(html)); // 案由 √

        arch.setApprovalDate(getConcludeDate(html)); // 审结日期√

        arch.setApproval(getTheVerdictData(html)); // 判决结果√

        variable = getCaseNum(html);
        if (!"".equals(variable) && variable != null) {
          arch.setCaseNum(variable); // 案号 √
        }
        if ("".equals(variable) || variable == null) {
          arch.setCaseNum(getSentenceNo3(html)); // 案号 √
        }
        arch.setUuid(fi.getName().substring(0, fi.getName().lastIndexOf(".")));
        //				showData(arch); // 打印所有截取字段
        listarchs.add(arch);
        if (listarchs.size() >= 1000) {
          boolean result = updateJsonData(listarchs, bucket, util);
          if (!result) {
            logger.info(fi.getPath() + ":更新失败1");
          }
          count += listarchs.size();
          logger.info("<<------------------------count------------------------------>>" + count);
          listarchs = null;
          listarchs = new ArrayList<ArchivesVO>();
        }
      } else if (fi.isDirectory()) {
        logger.info(fi.getName());
        show(fi, bucket, util);
      } else {
        continue;
      }
    }
    if (null != listarchs && listarchs.size() > 0) {
      boolean result = updateJsonData(listarchs, bucket, util);
      if (!result) {
        logger.info(":更新失败2");
      }
      count += listarchs.size();
      listarchs = null;
      arch = null;
      return;
    }
  }

  /** @param arch */
  public static void showData(ArchivesVO arch) {
    logger.info("UUID:" + arch.getUuid());
    logger.info("原告相关人:" + arch.getPlaintiff());
    logger.info("被告相关人:" + arch.getDefendant());
    logger.info("法院:" + arch.getCourtName());
    logger.info("审结日期:" + arch.getApprovalDate());
    logger.info("文书类型:" + arch.getCatalog());
    logger.info("案号:" + arch.getCaseNum());
    logger.info("标题:" + arch.getTitle());
    logger.info("案由:" + arch.getCaseCause());
    logger.info("判决结果:" + arch.getApproval());
  }

  /** 裁判文书 抓取word,HTML修改court桶 */
  public static boolean updateJsonData(
      List<ArchivesVO> list, Bucket bucket, AdministrationUtils util) throws Exception {
    if (null == list || list.size() <= 0) {
      return false;
    }
    //		util.initData(); // 查询行政区
    String[] array = null;
    JsonDocument doc = null;
    JsonObject obj2 = null;
    com.google.gson.JsonObject json = null;
    Gson gson = new Gson();
    ArchivesVO archs = null;
    try {
      for (ArchivesVO arch : list) {
        SUM++;
        // 查询数据
        doc = JsonDocument.create(arch.getUuid()); // 获取ID
        obj2 = bucket.get(doc) == null ? null : bucket.get(doc).content();
        if (obj2 == null) {
          logger.info("匹配不到UUID:" + arch.getUuid());
          continue;
        }
        archs = new ArchivesVO();
        json = gson.fromJson(obj2.toString(), com.google.gson.JsonObject.class);
        archs = gson.fromJson(json, ArchivesVO.class);

        if (null != arch.getTitle() && !"".equals(arch.getTitle())) {
          archs.setTitle(arch.getTitle());
        }
        if (null != obj2.get("title") && !"".equals(obj2.get("title"))) {
          archs.setTitle(obj2.get("title").toString()); // 标题
        }
        if (null != arch.getCaseNum() && !"".equals(arch.getCaseNum())) {
          archs.setCaseNum(arch.getCaseNum());
        }
        if (null != obj2.get("caseNum") && !"".equals(obj2.get("caseNum"))) {
          archs.setCaseNum(obj2.get("caseNum").toString()); // 案号
        }
        if (null != arch.getCourtName() && !"".equals(arch.getCourtName())) {
          archs.setCourtName(arch.getCourtName());
        }
        if (null != obj2.get("courtName") && !"".equals(obj2.get("courtName"))) {
          archs.setCourtName(obj2.get("courtName").toString()); // 法院名
        }
        if (null != arch.getCatalog() && !"".equals(arch.getCatalog())) {
          archs.setCatalog(arch.getCatalog());
        }
        if (null != obj2.get("catalog") && !"".equals(obj2.get("catalog"))) {
          archs.setCatalog(obj2.get("catalog").toString()); // 分类
        }
        if (null != arch.getApproval() && !"".equals(arch.getApproval())) {
          archs.setApproval(arch.getApproval());
        }
        if (null != obj2.get("approval") && !"".equals(obj2.get("approval"))) {
          archs.setApproval(obj2.get("approval").toString()); // 审批结果
        }
        if (null != arch.getCaseCause() && !"".equals(arch.getCaseCause())) {
          archs.setCaseCause(arch.getCaseCause());
        }
        if (null != obj2.get("caseCause") && !"".equals(obj2.get("caseCause"))) {
          archs.setCaseCause(obj2.get("caseCause").toString()); // 案由
        }
        if (null != arch.getPlaintiff() && !"".equals(arch.getPlaintiff())) {
          archs.setPlaintiff(arch.getPlaintiff());
        }
        if (null != obj2.get("plaintiff") && !"".equals(obj2.get("plaintiff"))) {
          archs.setPlaintiff(obj2.get("plaintiff").toString()); // 原告
        }

        if (null != arch.getDefendant() && !"".equals(arch.getDefendant())) {
          archs.setDefendant(arch.getDefendant());
        }
        if (null != obj2.get("defendant") && !"".equals(obj2.get("defendant"))) {
          archs.setDefendant(obj2.get("defendant").toString()); // 被告
        }

        if (null != arch.getApprovalDate() && !"".equals(arch.getApprovalDate())) {
          archs.setApprovalDate(arch.getApprovalDate());
        }
        if (null != obj2.get("approvalDate") && !"".equals(obj2.get("approvalDate"))) {
          archs.setApprovalDate(obj2.get("approvalDate").toString()); // 审结日期
        }

        if (null != arch.getSummary() && !"".equals(arch.getSummary())) {
          archs.setSummary(arch.getSummary());
        }
        if (null != obj2.get("summary") && !"".equals(obj2.get("summary"))) {
          archs.setSummary(obj2.get("summary").toString()); // 摘要
        }

        if (null != obj2.get("detailLink") && !"".equals(obj2.get("detailLink"))) {
          archs.setDetailLink(obj2.get("detailLink").toString()); // url
        }

        if (null != obj2.get("publishDate") && !"".equals(obj2.get("publishDate"))) {
          archs.setPublishDate(getReplaceAllDate(obj2.get("publishDate").toString())); // 发布日期
        }

        if (null != obj2.get("province") && !"".equals(obj2.get("province"))) {
          archs.setProvince(obj2.get("province").toString()); // 省
        }
        if (null != obj2.get("city") && !"".equals(obj2.get("city"))) {
          archs.setCity(obj2.get("city").toString()); // 市
        }
        if (null != obj2.get("area") && !"".equals(obj2.get("area"))) {
          archs.setArea(obj2.get("area").toString()); // 县
        }
        if (null != archs.getCourtName() && !"".equals(archs.getCourtName())) {
          array = util.utils(arch.getCourtName());
        }
        if (null != obj2.get("courtName") && !"".equals(obj2.get("courtName"))) {
          array = util.utils(obj2.get("courtName").toString());
        }
        if (null != array) {
          if (null != array[0] && !"".equals(array[0])) {
            archs.setProvince(array[0]);
          }
          if (null != array[1] && !"".equals(array[1])) {
            archs.setCity(array[1]);
          }
          if (null != array[2] && !"".equals(array[2])) {
            archs.setArea(array[2]);
          }
        }

        if (null != obj2.get("collectDate") && !"".equals(obj2.get("collectDate"))) {
          archs.setCollectDate(getReplaceAllDate(obj2.get("collectDate").toString())); // 采集时间
        }
        if (null != obj2.get("suitDate") && !"".equals(obj2.get("suitDate"))) {
          archs.setSuitDate(obj2.get("suitDate").toString()); // 起诉日期
        }
        String jsonss = gson.toJson(archs);
        doc = JsonDocument.create(arch.getUuid(), JsonObject.fromJson(jsonss));
        logger.info("更新条数:" + SUM + "---省:" + array[0] + "---市:" + array[1] + "---县/区:" + array[2]);
        bucket.upsert(doc);
      }
    } catch (Exception e) {
      logger.error(e.getMessage());
      return false;
    } finally {
      array = null;
      gson = null;
      json = null;
      archs = null;
      obj2 = null;
      doc = null;
    }
    return true;
  }
  // 判断是否存在乱码
  public static boolean getErrorCode(String value) {
    if (value == null || "".equals(value)) {
      return false;
    }
    for (String val : ERCOEDING) {
      int index = value.lastIndexOf(val);
      if (index <= 0) {
        continue;
      }
      return false;
    }
    return true;
  }
  // 提取文书类型
  public static String getCatalog(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    int index;
    for (String val : BOOKCLASS) {
      index = value.indexOf(val);
      if (index >= 0) {
        return val;
      }
    }
    return null;
  }

  // 取文书编号
  public static String getSentenceNo3(String valthml) {
    try {
      if (null == valthml || "".equals(valthml)) {
        return null;
      }
      String[] valueSipt = valthml.split("。");
      int index = 0;
      for (String val : valueSipt) {
        index = getDateIndex2(val);
        if (index >= 0) {
          valthml = val;
          break;
        }
      }
      if (index == -1) {
        return null;
      }
      index = valthml.lastIndexOf("书");
      if (index == -1) {
        return null;
      }
      valthml = valthml.substring(index + 1, valthml.length());
      index = valthml.indexOf("号");
      if (index == -1) {
        return null;
      }
      return valthml.substring(0, index + 1);
    } catch (Exception e) {
      logger.error(e.getMessage());
    }
    return null;
  }

  // 判断字符是否非数字
  public static boolean isDigit(String strNum) {
    Pattern pattern = Pattern.compile("[0-9]{1,}");
    Matcher matcher = pattern.matcher((CharSequence) strNum);
    return matcher.matches();
  }

  // 根据符号+年份获取案号
  public static String getSplitCaseNum(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    try {
      for (String val : CAUSENUM) {
        int firstIndex = value.lastIndexOf(val);
        if (firstIndex == -1) {
          continue;
        }
        value = value.substring(firstIndex);
        int secondIndex = value.indexOf("号");
        if (secondIndex <= 0) { // 当案号中没有“号”字的时候
          String spl = null;
          String value2 = null;
          secondIndex = value.indexOf("第");
          if (secondIndex <= 0) {
            secondIndex = value.indexOf("字");
          }
          value2 = value.substring(0, secondIndex + 1);
          value = value.substring(secondIndex + 1);
          String[] split = value.split("");
          for (int i = 1; i < split.length; i++) {
            spl = split[i];
            boolean result = isDigit(spl);
            if (!result) {
              break;
            }
            value2 = value2 + spl; // 取"第"字后面的数字,一个个添加进去
          }
          return value2;
        }
        value = value.substring(0, secondIndex + 1);
        return value;
      }
    } catch (Exception e) {
      logger.error("提取案号出错" + e.getMessage());
    }
    return null;
  }

  // 根据文书类型截取获得案号
  public static String getSplitCaseNum2(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    try {
      for (String val : BOOKCLASS) {
        int firstIndex = value.lastIndexOf(val);
        if (firstIndex == -1) {
          continue;
        }
        firstIndex = firstIndex + val.length();
        value = value.substring(firstIndex);
        int secondIndex = value.indexOf("号");
        if (secondIndex <= 0) { // 当案号中没有“号”字的时候
          String value2 = null;
          String spl = null;
          secondIndex = value.indexOf("第");
          if (secondIndex <= 0) {
            secondIndex = value.indexOf("字");
          }
          value2 = value.substring(0, secondIndex + 1);
          value = value.substring(secondIndex + 1);
          String[] split = value.split("");

          for (int i = 1; i < split.length; i++) {
            spl = split[i];
            boolean result = isDigit(spl);
            if (!result) {
              break;
            }
            value2 = value2 + spl; // 取"第"字后面的数字,一个个添加进去
          }
          return value2;
        }
        value = value.substring(0, secondIndex + 1);
        for (String val3 : CAUSENUM3) {
          int lastIndex = value.indexOf(val3);
          if (lastIndex == -1) {
            continue;
          }
          return value;
        }
      }
    } catch (Exception e) {
      logger.error("提取案号出错" + e.getMessage());
    }
    return null;
  }

  // 特殊处理获取案号
  public static String getSplitCaseNum3(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    for (String val : CAUSENUM2) {
      int firstIndex = value.lastIndexOf(val);
      if (firstIndex == -1) {
        continue;
      }
      firstIndex = firstIndex + val.length();
      value = value.substring(firstIndex);
      int secondIndex = value.indexOf("号");
      if (secondIndex <= 0) { // 当案号中没有“号”字的时候
        String value2 = null;
        String spl = null;
        secondIndex = value.indexOf("第");
        if (secondIndex <= 0) {
          secondIndex = value.indexOf("字");
        }
        value2 = value.substring(0, secondIndex + 1);
        value = value.substring(secondIndex + 1);
        String[] split = value.split("");

        for (int i = 1; i < split.length; i++) {
          spl = split[i];
          boolean result = isDigit(spl);
          if (!result) {
            break;
          }
          value2 = value2 + spl; // 取"第"字后面的数字,一个个添加进去
        }
        value2 = replaceCaseNum1(value);
        return value2;
      }
      value = value.substring(0, secondIndex + 1);
      value = replaceCaseNum1(value);
      return value;
    }

    return null;
  }

  // 去掉案号中多余的字
  public static String replaceCaseNum1(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    int index1 = value.indexOf("共印");
    int index2 = value.indexOf("份");
    if (index1 > 0) {
      String value3 = value.substring(index1, index2 + 1);
      value = value.replace(value3, "");
    }
    return value;
  }

  // 按符號切割字符-获取案号
  public static String getCaseNum(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    String[] split = value.split("。");
    for (int i = 0; i < split.length; i++) {
      value = getSplitCaseNum(split[i]);
      if (value != null && !"".equals(value)) {
        return value;
      }
      value = getSplitCaseNum2(split[i]);
      if (value != null && !"".equals(value)) {
        return value;
      }
      value = getSplitCaseNum3(split[i]); //
      if (value != null && !"".equals(value)) {
        return value;
      }
    }
    return null;
  }

  // 取审结日期
  public static String getConcludeDate(String date) {
    if (date == null || "".equals(date)) {
      return null;
    }
    String[] data = {"二〇", "一九", "二○", "二0", "二0", "二O", "二0", "二O", "二�", "20", "19"};
    int[] splt = {9, 10, 11, 12};
    String value;
    String[] datas;
    boolean result = false;
    try {
      for (int index = 0; index < data.length; index++) {
        if (date.lastIndexOf(data[index]) < 0) {
          continue;
        }
        value = date.substring(date.lastIndexOf(data[index]));
        for (int index2 = 0; index2 < splt.length; index2++) {
          datas = value.split("");
          String da = datas[splt[index2]];
          if ("日".equals(da)) {
            value = value.substring(0, splt[index2]);
            result = true;
            ;
            break;
          }
        }
        if (result) return value == null ? null : value.replaceAll("�", "0");
      }
    } catch (Exception e) {
      logger.error("取审结日期出错:" + e.getMessage());
    } finally {
      datas = null;
      splt = null;
      data = null;
    }
    return null;
  }

  // 提取判决结果
  public static String getTheVerdictData(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    try {
      String text = null;
      int index = 0;
      int index2 = 0;
      for (String key : THEVERDICT) {
        index = value.lastIndexOf(key);
        if (index >= 0) {
          index2 = value.indexOf("审判长");
          if (index2 < index) {
            index2 = value.length();
          }
          text = value.substring(index, index2);
          index = text.lastIndexOf("公告");
          if (index >= 0) {
            text = text.substring(0, index);
          }
          text = text.substring(0, text.lastIndexOf("。"));
          if (null != text) return text;
        }
      }

    } catch (Exception e) {
      logger.error("提取判决结果出错:" + e.getMessage());
    }
    return null;
  }

  // 去掉无用字符
  public static String getReplaceAll(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    StringBuffer sb = null;
    if (value != null && !"".equals(value)) {
      value = value.replaceAll(",", ",");
      value = value.replaceAll("�,o,O", "〇");
      value = value.replaceAll("[×,X,X,x,╳,*,\\*]", "某");
      value =
          value.replaceAll(
              "[\n,\t,\r,\\s,&nbsp; ,:,“,”,|,:,<,/>,</,>,-,+,=,},{,#,\",',-,%,^,*]",
              ""); // a-z,A-Z,没有去掉字母
      //			value = value.replaceAll("[\n,\t,\r,\\s,&nbsp; ,:,“,”,・
      // ,:,<,/>,</,>,a-z,A-Z,-,+,=,},{,.,#,\",',-,%,^,*]","");	//去掉所有字母
      value = getSpecialStringALL(value);
      value = value.trim();
      sb = new StringBuffer();
      sb.append(value);
    }
    return sb == null ? "" : sb.toString();
  }

  // 去掉特殊字符
  public static String getSpecialStringALL(String value) {
    if (null == value || "".equals(value)) {
      return null;
    }
    char[] chs = value.toCharArray();
    StringBuffer sb = new StringBuffer();
    for (char c : chs) {
      if (((int) c) != 12288 && ((int) c) != 160) {
        sb.append(String.valueOf(c));
      }
    }
    return sb.toString();
  }

  /**
   * 统一日期格式
   *
   * @param value
   * @return
   */
  public static String getReplaceAllDate(String value) {
    StringBuffer sb = null;
    if (value != null && !"".equals(value)) {
      value = value.replaceAll("[(,),(,),【,】,{,},<,>]", "");
      value = value.replaceAll("[-,-,/,\",年,月]", "-");
      value = value.replaceAll("[:,:]", ":");
      value = value.replace("]", "");
      value = value.replace("[", "");
      value = value.trim();
      sb = new StringBuffer();
      sb.append(value);
    }
    return sb == null ? null : sb.toString();
  }

  // 根据关键字提取人
  public static String getKeyName(Map<String, List<String>> map, int status) {
    if (map == null || "".equals(map)) {
      return null;
    }
    String[] keys = null;
    if (status == 1) keys = PLAINTIFF;
    else keys = DEFENDANT;
    Set<String> setNames = null;
    List<String> list = null;
    String[] vals = null;
    for (String key : keys) {
      list = map.get(key);
      if (null != list) {
        for (String val : list) {
          if (null == setNames) {
            setNames = new HashSet<String>();
          }
          if (val.indexOf("、") >= 0) {
            vals = val.split("、");
            for (String va : vals) {
              setNames.add(va);
            }
          } else setNames.add(val);
        }
      }
    }
    if (setNames == null || setNames.size() == 0) {
      return null;
    }
    StringBuffer sb = null;
    for (String val : setNames) {
      if (sb == null) sb = new StringBuffer(val);
      else sb.append("、").append(val);
    }
    return sb == null ? null : sb.toString();
  }

  public static String replaceCourtName(String value) {
    if (value != null && !"".equals(value)) {
      for (String val : DATESTATUS) {
        value = value.replaceAll(val, "");
        continue;
      }
      value =
          value.replaceAll("[(,),(,),【,】,{,},<,>,★,?,0-9,a-z,A-Z,!,!,#,$,%,&,*,/,\",/,|,、]", "");
      value =
          value.replaceAll(
              "[欢迎,登陆,登录,编辑,录入,年月日,首页,发表日期,发布日期,裁判文书,次数次,打印此页,关闭,下载,查看次数,字号,双击,屏幕滚动]", "");
      int index = 0;
      for (String val2 : PROVINCE) {
        index = value.lastIndexOf(val2);
        if (index > 0) {
          value = value.substring(index, value.length());
        }
        continue;
      }
      return value;
    }
    return value;
  }

  // 法院提取--提取法院名称方法1
  public static String getCourtName(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    value = getDataAll(value);
    if (value == null || "".equals(value)) {
      return null;
    }
    String[] valuesplit = value.split("。");
    String courtName = null;
    int index;
    try {
      for (String val : valuesplit) {
        index = val.lastIndexOf("书");
        if (index == -1) {
          index = val.lastIndexOf("号");
          if (index == -1) {
            index = val.lastIndexOf("第");
          }
        }
        courtName = val.substring(0, index + 1);
        courtName = courtName.replaceAll("[0-9,\\-,:,_,:]", "");
        if (null == courtName && "".equals(courtName)) {
          continue;
        }
        index = courtName.lastIndexOf("法院");
        if (index == -1) {
          continue;
        }
        courtName = courtName.substring(0, index + 2);
        courtName = replaceCourtName(courtName);
        if (courtName == null && "".equals(courtName)) {
          continue;
        }
        if (courtName.length() >= 4) {
          return courtName;
        }
      }
    } catch (Exception e) {
      logger.error("提取法院名称出错:" + e.getMessage());
    }
    return null;
  }
  // 法院提取--提取法院名称方法2
  public static String getAtherthe(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    String gatherthe = null;
    String[] gatherthes = null;
    StringBuffer sb = null;
    String[] valuesplit = value.split("。");
    try {
      for (String val : valuesplit) {
        value = val;
        break;
      }
      gatherthe = value.substring(0, value.length());
      int index = value.indexOf("书");
      if (index == -1) {
        index = value.indexOf("号");
      }
      gatherthe = gatherthe.substring(0, gatherthe.indexOf(index) + 1);
      gatherthes = gatherthe.split("[0-9,\\-,:,_,:]");
      sb = new StringBuffer();
      for (String val : gatherthes) {
        if (null != val) sb.append(val);
      }
      if (sb.toString().lastIndexOf("}") >= 0) {
        gatherthe =
            sb.toString().substring(sb.toString().lastIndexOf("}") + 1, sb.toString().length());
        sb = new StringBuffer(gatherthe);
      }
      gatherthe =
          getAthertheReplace(sb.toString().substring(0, sb.toString().lastIndexOf("法院") + 2));
      gatherthe = replaceCourtName(gatherthe);
      if (gatherthe != null && !"".equals(gatherthes)) {
        if (gatherthe.length() >= 4) {
          return gatherthe;
        }
      }
    } catch (Exception e) {
      //			logger.error("提取法院名称出错:" + e.getMessage());
    } finally {
      sb = null;
      gatherthes = null;
    }
    return null;
  }

  // 法院提取--过滤法院
  public static String getAthertheReplace(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    int index = value.indexOf("法院");
    int index1 = value.lastIndexOf("法院");
    if (index != index1) {
      value = value.substring(index + 2, index1 + 2);
    }
    return value;
  }

  // 法院提取--取第一段
  public static int getDateIndex2(String value) {
    if (value == null || "".equals(value)) {
      return -1;
    }
    int index = 0;
    for (String date : DATESTATUS) {
      index = value.indexOf(date);
      if (index >= 0) {
        return index + date.length();
      }
    }
    return -1;
  }

  // 取第一段
  public static int getDateIndex(String value) {
    if (value == null || "".equals(value)) {
      return -1;
    }
    int index = 0;
    value = value.substring(0, value.indexOf("。"));
    for (String date : DATESTATUS) {
      index = value.indexOf(date);
      if (index >= 0) {
        return index + date.length();
      }
    }
    return -1;
  }

  // 提取案由
  public static String StringCause(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    value = getDataAll(value);
    try {
      int indx = value.lastIndexOf("书记员");
      if (indx > 0) {
        value = value.substring(0, indx);
      }
      int index = 0;
      String firTxt = null;
      String lastxt = null;
      int count = 0;
      for (String val : CAUSE) {
        index = value.indexOf(val);
        if (index >= 0) {
          indx = value.lastIndexOf("公告");
          if (indx > 0) {
            if (index < indx) {
              return null;
            }
          }
          if (val.equals("驳回申诉通知") || val.equals("赔偿决定书")) {
            int index2 = getDateIndex(value);
            if (index == -1) {
              return null;
            }
            firTxt = value.substring(index2, value.length());
            firTxt = firTxt.substring(firTxt.indexOf("号") + 1, firTxt.length());
            value = firTxt;
            if (val.equals("赔偿决定书")) val = "国家赔偿";
          }
          index = value.indexOf(val);
          if (index < 0) index = value.indexOf("国家赔偿");
          firTxt = value.substring(0, index);
          count = firTxt.lastIndexOf("。");
          if (count == -1) {
            firTxt.lastIndexOf("号");
          }
          firTxt = firTxt.substring(count + 1, firTxt.length());
          index = value.indexOf(val);
          if (index < 0) {
            if (val.equals("驳回申诉通知")) val = "国家赔偿";
          }
          lastxt = value.substring(value.indexOf(val), value.length());
          lastxt = lastxt.substring(lastxt.indexOf(val), lastxt.indexOf("。") + 1);
          return new StringBuffer(firTxt).append(lastxt).toString();
        }
      }

    } catch (Exception e) {
      logger.error("提取案由出错:" + e.getMessage());
    }
    return null;
  }

  // 提取全文
  public static String getDataAll(String value) {
    if (value == null || "".equals(value)) {
      return null;
    }
    int index = 0;
    index = value.indexOf("。");
    if (index == -1) {
      return null;
    }
    String value2 = value.substring(0, index);
    for (String date : DATESTATUS) {
      index = value2.indexOf(date);
      if (index >= 0) {
        value = value.substring(index + date.length(), value.length());
        return value;
      }
    }
    return value;
  }

  // 正确的编码读取内容    已停用
  public static String getAllcharset(File fi) throws Exception {
    Document doc;
    String html = "";
    for (String val : charset) {
      doc = Jsoup.parse(fi, val);
      html = getDataAll(doc.text());
      boolean Garbled = getErrorCode(html); // 判断编码是否错误
      if (Garbled == false) {
        continue;
      }
      return html;
    }
    return null;
  }

  /**
   * 半角转全角
   *
   * @param input String.
   * @return 全角字符串.
   */
  public static String ToSBC(String input) {
    char c[] = input.toCharArray();
    for (int i = 0; i < c.length; i++) {
      if (c[i] == ' ') {
        c[i] = '\u3000';
      } else if (c[i] < '\177') {
        c[i] = (char) (c[i] + 65248);
      }
    }
    return new String(c);
  }
  /**
   * 全角转半角
   *
   * @param input String.
   * @return 半角字符串
   */
  public static String ToDBC(String input) {
    char c[] = input.toCharArray();
    for (int i = 0; i < c.length; i++) {
      if (c[i] == '\u3000') {
        c[i] = ' ';
      } else if (c[i] > '\uFF00' && c[i] < '\uFF5F') {
        c[i] = (char) (c[i] - 65248);
      }
    }
    String returnString = new String(c);
    return returnString;
  }

  public static Bucket connectionCouchBaseLocal() {
    // 连接指定的桶
    return cluster2.openBucket("zwh_court", 1, TimeUnit.MINUTES);
  }
}
 public @Bean Cluster cluster() {
   return CouchbaseCluster.create(couchbaseHostname);
 }
 @BeforeClass
 public static void setup() {
   cluster = CouchbaseCluster.create();
   bucket = cluster.openBucket();
 }