コード例 #1
0
ファイル: Extractor.java プロジェクト: dalinhuang/concourse
  public void split(String fileName, String newFileName) {

    String content = "";
    File file = new File(fileName);
    BufferedReader reader = null;
    try {
      System.out.println("以行为单位读取文件内容,一次读一整行:");
      reader = new BufferedReader(new FileReader(file));
      String tempString = null;
      int line = 1;
      // 一次读入一行,直到读入null为文件结束
      while ((tempString = reader.readLine()) != null) {
        // 显示行号
        //                System.out.println("line " + line + ": " + tempString);
        content += tempString;
        line++;
      }
      reader.close();
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (reader != null) {
        try {
          reader.close();
        } catch (IOException e1) {
        }
      }
    }
    content = content.replace("  ", ",");
    content = content.replace(",", ",");
    content = content.replace("@@tag", ",");
    String[] contents = content.split(",");

    Set<String> tagSet = new HashSet<String>();
    for (int i = 0; i < contents.length; i++) {
      if (!contents[i].equals("")) {
        System.out.println("==" + contents[i].trim());
        tagSet.add(contents[i].trim());
      }
    }

    String clearStr = "";
    for (String str : tagSet) {
      clearStr += str + System.getProperty("line.separator");
    }
    FileHelp.writeFile(TAG_FILE_NAME + ".txt", clearStr);
    logger.info(tagSet.size());
  }
コード例 #2
0
ファイル: Extractor.java プロジェクト: dalinhuang/concourse
  public List<NewsItem> parse(String filePath) {

    List<NewsItem> newsItems = new ArrayList<NewsItem>();
    List<String> filelist = new ArrayList<String>();

    FileHelp.refreshFileList(filePath, filelist, ".xml");

    for (String fileName : filelist) {
      try {
        ItemParser parser = new ItemParser();
        NewsItem newsItem = parser.parse(fileName);
        newsItems.add(newsItem);
      } catch (Exception e) {
        continue;
      }
    }

    return newsItems;
  }